Source code for pyrltr.agents.NeuralNetworkAgent

# -*- coding: utf-8 -*-
"""
Created on Tue Jun 25 11:56:39 2013

@author: Chris Stahlhut
"""

import numpy as np

from Agent import Agent
from pymlp.mlp.FFNetwork import FFNetwork
from pymlp.mlp.TransferFunctions import Logistic, TanH


[docs]class NeuralNetworkAgent():
[docs] def initController(self): """Initialize the controller. In this case, it is a state-action-table.""" self.numberOfActions = 4 self.inputDimension = self.world.getSensors().shape[0] self.controller = FFNetwork( (self.inputDimension, 20, 20, self.numberOfActions), [TanH()] * 3, self.alpha, 1) self.randomState = np.random.RandomState()
[docs] def updateReward(self, state, reward, nextState, action, nextAction, episodeOver): """Updates the reward for the current action by considering the reward and next action.""" reward = self.scaleReward(reward) self.rewards.append(reward) expectedFutureReward = self.getQValues(self.scaleState( nextState))[nextAction] QValues = self.getQValues(self.scaleState(state)) QDiff = (reward + self.gamma * expectedFutureReward) - QValues[action] self.errors.append(QDiff) QDiffVec = np.zeros(QValues.shape[0]) QDiffVec[action] = QDiff self.controller.propagateBack(QDiffVec)
[docs] def scaleReward(self, reward): reward = reward / float(self.N) # reward = (reward + 1) / 2.0 return reward
[docs] def scaleState(self, state): # np.where(self.world.worldSize < 1, 1, self.world.worldSize) borders = self.world.worldSize scaledState = (state / (borders * 2) - 1) return scaledState
[docs] def selectAction(self, state): """Epsilon-greedy action selection for the state""" Q = self.getQValues(self.scaleState(self.state)) possibleActions = np.where(Q == Q.max())[0] if self.randomState.uniform(0, 1) < self.epsilon: # choose among the output nodes of the last layer possibleActions = xrange(self.numberOfActions) return self.randomState.choice(possibleActions)
[docs] def getQValues(self, state): return self.controller.feedForward(state)[0]
[docs] def reset(self): pass