Source code for pyrltr.agents.Agent

import os
import numpy as np
from pyrltr.analyzer.DataContainer import DataContainer


[docs]class Agent(): """ Agent is the base class for all learning agents. Attributes: world -- the world in which this agent is supposed to act learner -- the learning algorithm teacher -- the teacher askLikelihood -- the likelihood of asking the teacher to evaluate the last action randomState -- the RandomState for the stochastic influence """ def __init__(self, world, learner, teacher, askLikelihood, index=0): self.world = world self.learner = learner self.teacher = teacher self.askLikelihood = min(askLikelihood, 0.99) self.undos = 0 self.randomState = np.random.RandomState() # setup controller self.learner.initController() self.folderName = "%s_%s/" % (self.learner.getDataFolderName(), self.askLikelihood) if not os.path.exists(self.folderName): try: os.makedirs(self.folderName) except OSError: # documentation says: raised if file already exists pass # set-up containers to store data for analysis self.trainingDataContainer = DataContainer(self.folderName, "training") self.testingDataContainer = DataContainer(self.folderName, "testing") self.finalTestingDataContainer = DataContainer(self.folderName, "finaltesting") # store index for better data storage self.index = index
[docs] def doTrainingEpoch(self): """ Does one training epoch. """ self.world.resetTrainingEpoch() self.trainingDataContainer.prepareNewEpoch() for i in xrange(self.world.numberOfTrainingEpisodes): self.doTrainingEpisode()
[docs] def doTestingEpoch(self): """ Does one testing epoch. """ self.world.resetTestingEpoch() self.testingDataContainer.prepareNewEpoch() for i in xrange(self.world.numberOfTestingEpisodes): self.doTestingEpisode(self.testingDataContainer)
[docs] def doFinalTestingEpoch(self): """ Does one testing epoch. """ self.world.resetFinalTestingEpoch() self.finalTestingDataContainer.prepareNewEpoch() for i in xrange(self.world.numberOfFinalTestingEpisodes): self.doTestingEpisode(self.finalTestingDataContainer)
[docs] def doTrainingEpisode(self): """ Runs one training episode. The learner does the action selection and update calculation. """ self.world.setupTrainingEpisode() self.trainingDataContainer.prepareNewEpisode() self.trainingDataContainer.addStartPosition(self.world.agentPosition) sumOfRewards = 0 undos = 0 while not self.world.episodeOver(): state = self.world.getSensors() action = self.learner.selectAction(state) self.world.performAction(action) nextState = self.world.getSensors() goodAction = True if self.randomState.uniform() < self.askLikelihood: goodAction = self.teacher.isBetter(state, nextState) if goodAction: self.trainingDataContainer.addAction(action) reward = self.world.getReward() sumOfRewards = sumOfRewards + reward self.learner.updateReward(state, reward, nextState, action, self.world.episodeOver()) self.trainingDataContainer.addUndo(undos) else: self.world.undo() undos += 1 self.trainingDataContainer.addReward(sumOfRewards) self.trainingDataContainer.addLength(self.world.steps) self.trainingDataContainer.addGoalPosition(self.world.goalPosition) self.reset()
[docs] def doTestingEpisode(self, dataContainer): """ Runs one testing episode. The learner does the action selection without exploration and no updates are performed. """ self.world.setupTestingEpisode() dataContainer.prepareNewEpisode() dataContainer.addStartPosition(self.world.agentPosition) sumOfRewards = 0 while not self.world.episodeOver(): state = self.world.getSensors() action = self.learner.getAction(state) self.world.performAction(action) dataContainer.addAction(action) reward = self.world.getReward() sumOfRewards = sumOfRewards + reward dataContainer.addFinalPosition(self.world.agentPosition) dataContainer.addReward(sumOfRewards) dataContainer.addLength(self.world.steps) dataContainer.addGoalPosition(self.world.goalPosition) self.reset()
def _writeResults(self): """ Writes the results (number of steps, rewards, positions, etc) into the appropriate files. """ self.trainingDataContainer.writeData(self.index) self.testingDataContainer.writeData(self.index) self.finalTestingDataContainer.writeData(self.index)
[docs] def reset(self): """Resets the world after an episode and adds the latest result to the collection.""" if self.world.targetReached(self.world.distance): print("REACHED!!!") else: print("DEAD!!!") self.world.reset() self.learner.reset()
[docs] def finalize(self): """Finalize complete run of this actor.""" self._writeResults() self.learner.finalize(self.folderName, self.index)