Source code for pyrltr.agents.NeuralNetworkAgent
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 25 11:56:39 2013
@author: Chris Stahlhut
"""
import numpy as np
from Agent import Agent
from pymlp.mlp.FFNetwork import FFNetwork
from pymlp.mlp.TransferFunctions import Logistic, TanH
[docs]class NeuralNetworkAgent():
[docs] def initController(self):
"""Initialize the controller. In this case, it is a
state-action-table."""
self.numberOfActions = 4
self.inputDimension = self.world.getSensors().shape[0]
self.controller = FFNetwork(
(self.inputDimension, 20, 20, self.numberOfActions),
[TanH()] * 3, self.alpha, 1)
self.randomState = np.random.RandomState()
[docs] def updateReward(self, state, reward, nextState, action, nextAction, episodeOver):
"""Updates the reward for the current action by considering
the reward and next action."""
reward = self.scaleReward(reward)
self.rewards.append(reward)
expectedFutureReward = self.getQValues(self.scaleState(
nextState))[nextAction]
QValues = self.getQValues(self.scaleState(state))
QDiff = (reward + self.gamma * expectedFutureReward) - QValues[action]
self.errors.append(QDiff)
QDiffVec = np.zeros(QValues.shape[0])
QDiffVec[action] = QDiff
self.controller.propagateBack(QDiffVec)
[docs] def scaleReward(self, reward):
reward = reward / float(self.N)
# reward = (reward + 1) / 2.0
return reward
[docs] def scaleState(self, state):
# np.where(self.world.worldSize < 1, 1, self.world.worldSize)
borders = self.world.worldSize
scaledState = (state / (borders * 2) - 1)
return scaledState
[docs] def selectAction(self, state):
"""Epsilon-greedy action selection for the state"""
Q = self.getQValues(self.scaleState(self.state))
possibleActions = np.where(Q == Q.max())[0]
if self.randomState.uniform(0, 1) < self.epsilon:
# choose among the output nodes of the last layer
possibleActions = xrange(self.numberOfActions)
return self.randomState.choice(possibleActions)
[docs] def getQValues(self, state):
return self.controller.feedForward(state)[0]