Source code for pyrltr.agents.Agent

import os
import numpy as np
from pyrltr.analyzer.DataContainer import DataContainer

[docs]class Agent(): """ Agent is the base class for all learning agents. Attributes: world -- the world in which this agent is supposed to act learner -- the learning algorithm teacher -- the teacher askLikelihood -- the likelihood of asking the teacher to evaluate the last action randomState -- the RandomState for the stochastic influence """ def __init__(self, world, learner, teacher, askLikelihood, index=0): = world self.learner = learner self.teacher = teacher self.askLikelihood = min(askLikelihood, 0.99) self.undos = 0 self.randomState = np.random.RandomState() # setup controller self.learner.initController() self.folderName = "%s_%s/" % (self.learner.getDataFolderName(), self.askLikelihood) if not os.path.exists(self.folderName): try: os.makedirs(self.folderName) except OSError: # documentation says: raised if file already exists pass # set-up containers to store data for analysis self.trainingDataContainer = DataContainer(self.folderName, "training") self.testingDataContainer = DataContainer(self.folderName, "testing") self.finalTestingDataContainer = DataContainer(self.folderName, "finaltesting") # store index for better data storage self.index = index
[docs] def doTrainingEpoch(self): """ Does one training epoch. """ self.trainingDataContainer.prepareNewEpoch() for i in xrange( self.doTrainingEpisode()
[docs] def doTestingEpoch(self): """ Does one testing epoch. """ self.testingDataContainer.prepareNewEpoch() for i in xrange( self.doTestingEpisode(self.testingDataContainer)
[docs] def doFinalTestingEpoch(self): """ Does one testing epoch. """ self.finalTestingDataContainer.prepareNewEpoch() for i in xrange( self.doTestingEpisode(self.finalTestingDataContainer)
[docs] def doTrainingEpisode(self): """ Runs one training episode. The learner does the action selection and update calculation. """ self.trainingDataContainer.prepareNewEpisode() self.trainingDataContainer.addStartPosition( sumOfRewards = 0 undos = 0 while not state = action = self.learner.selectAction(state) nextState = goodAction = True if self.randomState.uniform() < self.askLikelihood: goodAction = self.teacher.isBetter(state, nextState) if goodAction: self.trainingDataContainer.addAction(action) reward = sumOfRewards = sumOfRewards + reward self.learner.updateReward(state, reward, nextState, action, self.trainingDataContainer.addUndo(undos) else: undos += 1 self.trainingDataContainer.addReward(sumOfRewards) self.trainingDataContainer.addLength( self.trainingDataContainer.addGoalPosition( self.reset()
[docs] def doTestingEpisode(self, dataContainer): """ Runs one testing episode. The learner does the action selection without exploration and no updates are performed. """ dataContainer.prepareNewEpisode() dataContainer.addStartPosition( sumOfRewards = 0 while not state = action = self.learner.getAction(state) dataContainer.addAction(action) reward = sumOfRewards = sumOfRewards + reward dataContainer.addFinalPosition( dataContainer.addReward(sumOfRewards) dataContainer.addLength( dataContainer.addGoalPosition( self.reset()
def _writeResults(self): """ Writes the results (number of steps, rewards, positions, etc) into the appropriate files. """ self.trainingDataContainer.writeData(self.index) self.testingDataContainer.writeData(self.index) self.finalTestingDataContainer.writeData(self.index)
[docs] def reset(self): """Resets the world after an episode and adds the latest result to the collection.""" if print("REACHED!!!") else: print("DEAD!!!") self.learner.reset()
[docs] def finalize(self): """Finalize complete run of this actor.""" self._writeResults() self.learner.finalize(self.folderName, self.index)