Source code for pyrltr.worlds.GridWorld

import numpy as np
from BaseWorld import BaseWorld


[docs]class GridWorld(BaseWorld): """Implmentation of a GridWorld. Abstract class, please use concrete implementation.""" def __init__(self, size=np.array([5, 5]), agentPosition=np.zeros(2), goalPosition=np.array([2, 2])): assert size.shape == agentPosition.shape assert size.shape == goalPosition.shape self.worldSize = size self.initialAgentPosition = agentPosition self.initialGoalPosition = goalPosition BaseWorld.__init__(self, maxEpisodeLength=np.prod(size)) def _validatePosition(self, position): """Validates the position. If the position is outside the border, it will be set to the border. So the agent cannot leave the world.""" result = np.where(position < self.worldSize, position, self.worldSize - 1) result = np.where(result > 0, result, 0) return result
[docs] def isWithinWorld(self, point): return ((self.worldSize > point).all() and (np.zeros(self.worldSize.shape) <= point).all())
[docs] def calculateDistanceToTarget(self, position, goalPosition): return np.sqrt(np.sum((position - goalPosition) ** 2))
[docs] def getRandomPosition(self): """Returns a random position within the world.""" # TODO 2013-10-29 make dimension agnostic # (see ContinuousGridWorld as an example) return np.array([self.randomState.randint(0, self.worldSize[0] - 1), self.randomState.randint(0, self.worldSize[1] - 1)])
def _getInitialAgentPosition(self): """ Returns the initial agent position. """ return self.initialAgentPosition.copy() def _getInitialGoalPosition(self): """" Returns the initial goal position. """ return self.initialGoalPosition.copy()
[docs]class GridWorldStateActionTable(GridWorld): """Implementation to use with a state action table. Returns the position as an index within a list.""" positionAddition = np.array([[1, 0], [-1, 0], [0, 1], [0, -1], [0, 0]])
[docs] def getSensors(self): assert self.worldSize.ndim == 1 return (int(self.worldSize[1] * self.agentPosition[0]) + self.agentPosition[1])
def _perform(self, position, action): """Performs the actual action and returns the new position.""" maxIndex = action actionToPerform = self.positionAddition[maxIndex] position = position + actionToPerform return position
[docs]class GridWorldNeuralNetwork(GridWorld): """Implementation to use with neural networks. Returns the position as an array. Basically uses a continuous state space."""
[docs] def getSensors(self): sensorValues = self.agentPosition scaledValues = self.scale(sensorValues) # to avoid problems, if any dimension has size zero result = np.nan_to_num(scaledValues) return result # return self.agentPosition
[docs] def scale(self, position): """Scales the given position down to the world size.""" return position.astype(np.float64) / self.worldSize
[docs]class GridWorldNNMovingTarget(GridWorldNeuralNetwork): """Implementation where the target position moves in between runs and is feed back as sensor."""
[docs] def getSensors(self): # TODO 2013-10-29 change to scale first and then concatenate return self.scale(np.concatenate((self.agentPosition, self.goalPosition), axis=0))
[docs] def scale(self, position): return position.astype(float) / np.concatenate((self.worldSize, self.worldSize), axis=0)
[docs] def reset(self): GridWorldNeuralNetwork.reset(self) self.goalPosition = self.getRandomPosition()
[docs]class ContinuousGridWorld(GridWorldNeuralNetwork): """Implementation with continuous state and action space.""" def _perform(self, position, action): """In continuous action space the action is just agentPosition + action.""" assert position.shape == action.shape, "%s == %s" % (str(position.shape), str(action.shape)) position = position + action return position
[docs] def hitObstacle(self): """Returns True, if the agentPosition is within a distance of 1 the obstacle.""" distance = np.linalg.norm(self.agentPosition - self.obstaclePosition) return distance < 1
[docs] def targetReached(self, distance): """Returns True, if the distance to the target is < 1.""" return distance < 1
[docs] def getRandomPosition(self): """Returns a continuous random position within the worlds limitations.""" return self.randomState.uniform(0, self.worldSize, self.worldSize.shape)
[docs] def getSensors(self): agentPosition = GridWorldNeuralNetwork.getSensors(self) scaledFoodPosition = self.scale(self.goalPosition) - agentPosition # to avoid problems, if any dimension has size zero goalPosition = np.nan_to_num(scaledFoodPosition) # return np.array([GridWorldNeuralNetwork.getSensors(self), goalPosition]) return np.array([np.concatenate((agentPosition, goalPosition))])
def _getInitialAgentPosition(self): # return self.initialAgentPosition.copy() return self.getRandomPosition()
[docs] def reset(self): self.goalPosition = self.getRandomPosition()