Modified the RLBrain code

From IPRE Wiki
Jump to: navigation, search
from pyrobot.brain import Brain
from time import sleep
from random import *
from pyrobot.brain.td import *

class SimpleBrain(Brain):

    random_percent = 20
    reset = 0

    # reinforcement value for temporal differencing
    REINFORCEMENT_VALUE = .2

    visit = [0]*15
    for i in range(len(visit)):
        visit[i] = [0]*15
    visit[0][0] = 1
    maxVisit = 1

    ....

            if px==cx:
                if py < cy:
                    return "up"
                else:
                    return "down"
            else:
                if px < cx:
                    return "left"
                else:
                    return "right"
        else:
            '''
            Modifications Ahoy
            '''
            xCoord, yCoord = self.robot.ask('location')
            freq = []

            for i in moves:
                if i == "up":
                    freq.append((visit[xCoord][yCoord-1], "up"))
                elif i == "down":
                    freq.append((visit[xCoord][yCoord+1], "down"))
                elif i == "left":
                    freq.append((visit[xCoord-1][yCoord], "left"))
                else:
                    freq.append((visit[xCoord+1][yCoord], "right"))

            for i in range(len(freq)):
                u, v = freq[i]
                if u > maxVisit:
                    freq[i] = None
                else:
                    freq[i] = v
                
            freq.sort()
            while freq[0] == None:
                freq.remove(None)

            move_type = randrange(0, 100, 1)
            if move_type < self.random_percent:  # chance of random movement
                return moves[ randrange(0,len(freq)) ]
            else:
                return self.util_successor( loc, moves )

    .....

    # run once every iteration
    def step(self):
        visit = [0]*15
        for i in range(len(visit)):
            visit[i] = [0]*15
        visit[0][0] = 1
        maxVisit = 1
        
        if self.reset == 1:
            self.robot.tell("reset")
            self.robot.tell("start")
            self.setup()
            self.reset = 0

        # if the goal has not been reached, continue to find a path
        if self.robot.ask('complete') == 0:
            direction = self.find_path(visit, maxVisit)
            self.robot.move( direction )

            xCoord, yCoord = self.robot.ask('location')
            visit[xCoord][yCoord] = visit[xCoord][yCoord] + 1
            if visit[xCoord][yCoord] > maxVisit:
                maxVisit = visit[xCoord][yCoord]

        ....