Source code for pyrltr.agents.Agent
import os
import numpy as np
from pyrltr.analyzer.DataContainer import DataContainer
[docs]class Agent():
"""
Agent is the base class for all learning agents.
Attributes:
world -- the world in which this agent is supposed to act
learner -- the learning algorithm
teacher -- the teacher
askLikelihood -- the likelihood of asking the teacher to evaluate the
last action
randomState -- the RandomState for the stochastic influence
"""
def __init__(self, world, learner, teacher, askLikelihood, index=0):
self.world = world
self.learner = learner
self.teacher = teacher
self.askLikelihood = min(askLikelihood, 0.99)
self.undos = 0
self.randomState = np.random.RandomState()
# setup controller
self.learner.initController()
self.folderName = "%s_%s/" % (self.learner.getDataFolderName(),
self.askLikelihood)
if not os.path.exists(self.folderName):
try:
os.makedirs(self.folderName)
except OSError:
# documentation says: raised if file already exists
pass
# set-up containers to store data for analysis
self.trainingDataContainer = DataContainer(self.folderName, "training")
self.testingDataContainer = DataContainer(self.folderName, "testing")
self.finalTestingDataContainer = DataContainer(self.folderName,
"finaltesting")
# store index for better data storage
self.index = index
[docs] def doTrainingEpoch(self):
"""
Does one training epoch.
"""
self.world.resetTrainingEpoch()
self.trainingDataContainer.prepareNewEpoch()
for i in xrange(self.world.numberOfTrainingEpisodes):
self.doTrainingEpisode()
[docs] def doTestingEpoch(self):
"""
Does one testing epoch.
"""
self.world.resetTestingEpoch()
self.testingDataContainer.prepareNewEpoch()
for i in xrange(self.world.numberOfTestingEpisodes):
self.doTestingEpisode(self.testingDataContainer)
[docs] def doFinalTestingEpoch(self):
"""
Does one testing epoch.
"""
self.world.resetFinalTestingEpoch()
self.finalTestingDataContainer.prepareNewEpoch()
for i in xrange(self.world.numberOfFinalTestingEpisodes):
self.doTestingEpisode(self.finalTestingDataContainer)
[docs] def doTrainingEpisode(self):
"""
Runs one training episode. The learner does the action selection and
update calculation.
"""
self.world.setupTrainingEpisode()
self.trainingDataContainer.prepareNewEpisode()
self.trainingDataContainer.addStartPosition(self.world.agentPosition)
sumOfRewards = 0
undos = 0
while not self.world.episodeOver():
state = self.world.getSensors()
action = self.learner.selectAction(state)
self.world.performAction(action)
nextState = self.world.getSensors()
goodAction = True
if self.randomState.uniform() < self.askLikelihood:
goodAction = self.teacher.isBetter(state, nextState)
if goodAction:
self.trainingDataContainer.addAction(action)
reward = self.world.getReward()
sumOfRewards = sumOfRewards + reward
self.learner.updateReward(state, reward, nextState,
action, self.world.episodeOver())
self.trainingDataContainer.addUndo(undos)
else:
self.world.undo()
undos += 1
self.trainingDataContainer.addReward(sumOfRewards)
self.trainingDataContainer.addLength(self.world.steps)
self.trainingDataContainer.addGoalPosition(self.world.goalPosition)
self.reset()
[docs] def doTestingEpisode(self, dataContainer):
"""
Runs one testing episode. The learner does the action selection without
exploration and no updates are performed.
"""
self.world.setupTestingEpisode()
dataContainer.prepareNewEpisode()
dataContainer.addStartPosition(self.world.agentPosition)
sumOfRewards = 0
while not self.world.episodeOver():
state = self.world.getSensors()
action = self.learner.getAction(state)
self.world.performAction(action)
dataContainer.addAction(action)
reward = self.world.getReward()
sumOfRewards = sumOfRewards + reward
dataContainer.addFinalPosition(self.world.agentPosition)
dataContainer.addReward(sumOfRewards)
dataContainer.addLength(self.world.steps)
dataContainer.addGoalPosition(self.world.goalPosition)
self.reset()
def _writeResults(self):
"""
Writes the results (number of steps, rewards, positions, etc)
into the appropriate files.
"""
self.trainingDataContainer.writeData(self.index)
self.testingDataContainer.writeData(self.index)
self.finalTestingDataContainer.writeData(self.index)
[docs] def reset(self):
"""Resets the world after an episode and adds the latest result to
the collection."""
if self.world.targetReached(self.world.distance):
print("REACHED!!!")
else:
print("DEAD!!!")
self.world.reset()
self.learner.reset()
[docs] def finalize(self):
"""Finalize complete run of this actor."""
self._writeResults()
self.learner.finalize(self.folderName, self.index)