Source code for pyrltr.agents.StateActionTableLearner

import numpy as np

[docs]class StateActionTableLearner(): def __init__(self, alpha=0.3, gamma=0.1, epsilon=0.5, N=1): self.alpha = alpha self.gamma = gamma self.epsilon = epsilon self.N = N
[docs] def initController(self): """Initialize the controller. In this case, it is a state-action-table.""" self.N = self.N / 5 self.Q = np.zeros((self.N, 4)) self.randomState = np.random.RandomState()
[docs] def getDataFolderName(self): return self.__class__.__name__
[docs] def updateReward(self, state, reward, nextState, action, nextAction, episodeOver): """Updates the reward for the current action by considering the reward and next action.""" QDiff = self.alpha*(reward + self.gamma*self.Q[state, nextAction]- self.Q[state, action]) self.Q[state, action] += QDiff
[docs] def selectAction(self, state): """Epsilon-greedy action selection for the state""" maxValue = self.Q[state].max() possibleActions = np.where(self.Q[state] == maxValue)[0] if self.randomState.uniform(0,1) < self.epsilon: possibleActions = xrange(self.Q.shape[1]) return self.randomState.choice(possibleActions)
[docs] def reset(self): pass
[docs] def writeResults(self, writer): pass