Source Code of WorldDescription

/*
 * Copyright 2008 Brian Tanner
 * http://rl-glue-ext.googlecode.com/
 * brian@tannerpages.com
 * http://brian.tannerpages.com
 * 
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.


 *  $Revision: 998 $
 *  $Date: 2009-02-08 22:21:50 -0500 (Sun, 08 Feb 2009) $
 *  $Author: brian@tannerpages.com $
 *  $HeadURL: http://rl-library.googlecode.com/svn/trunk/projects/packages/examples/mines-sarsa-java/SampleMinesEnvironment.java $


 *
 */


import java.util.Random;
import org.rlcommunity.rlglue.codec.EnvironmentInterface;
import org.rlcommunity.rlglue.codec.types.Action;
import org.rlcommunity.rlglue.codec.types.Observation;
import org.rlcommunity.rlglue.codec.types.Reward_observation_terminal;
import org.rlcommunity.rlglue.codec.util.EnvironmentLoader;
import org.rlcommunity.rlglue.codec.taskspec.TaskSpecVRLGLUE3;
import org.rlcommunity.rlglue.codec.taskspec.TaskSpec;
import org.rlcommunity.rlglue.codec.taskspec.ranges.IntRange;
import org.rlcommunity.rlglue.codec.taskspec.ranges.DoubleRange;


/**
 * This code is adapted from the Mines.cpp code written by Adam White
 * for earlier versions of RL-Glue.
 *
 * See the RL-Library page: 
 * http://library.rl-community.org/environments/mines
 *
 *  This is a very simple discrete-state, episodic grid world that has
 *  exploding mines in it.  If the agent steps on a mine, the episode
 *  ends with a large negative reward.
 *
 *  The reward per step is -1, with +10 for exiting the game successfully
 *  and -100 for stepping on a mine.
 *
 * This example follows my (Brian Tanner) favorite pattern of keeping the dynamics
 * of the world fairly separate from the class that implements EnvironmentInterface.
 * In this case, I've put it in the class WorldDescription, which is inside this
 * same Java file as SampleMinesEnvironment.  Usually I would put it in a separate
 * file.  This separation means that SampleMinesEnvironment doesn't need to know
 * much about the dynamics of the world, and WorldDescription doesn't need to know
 * much about RL-Glue.
 * 
 * @author Brian Tanner
 */
public class SampleMinesEnvironment implements EnvironmentInterface {


    static final int WORLD_FREE = 0;
    static final int WORLD_OBSTACLE = 1;
    static final int WORLD_MINE = 2;
    static final int WORLD_GOAL = 3;


    //WorldDescription contains the state of the world and manages the dynamics.
    WorldDescription theWorld;
    //These are used if the environment has been sent a message to use a fixed
    //starting state.
    boolean fixedStartState = false;
    int startRow = 0;
    int startCol = 0;


    public String env_init() {
        //This is hard coded, but there is no reason it couldn't be automatically
        //generated or read from a file.


        int world_map[][] = new int[][]{
            {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
            {1, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1},
            {1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
            {1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1},
            {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1},
            {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
        };




        theWorld = new WorldDescription(world_map);




        //Create a task spec programmatically.  This task spec encodes that state, action, and reward space for the problem.
        //You could forgo the task spec if your agent and environment have been created specifically to work with each other
        //ie, there is no need to share this information at run time.  You could also use your own ad-hoc task specification language,
        //or use the official one but just hard code the string instead of constructing it this way.
        TaskSpecVRLGLUE3 theTaskSpecObject = new TaskSpecVRLGLUE3();
        theTaskSpecObject.setEpisodic();
        theTaskSpecObject.setDiscountFactor(1.0d);


        //Specify that there will be an integer observation [0,107] for the state
        theTaskSpecObject.addDiscreteObservation(new IntRange(0, theWorld.getNumStates() - 1));
        //Specify that there will be an integer action [0,3]
        theTaskSpecObject.addDiscreteAction(new IntRange(0, 3));
        //Specify the reward range [-100,10]
        theTaskSpecObject.setRewardRange(new DoubleRange(-100.0d, 10.0d));


        theTaskSpecObject.setExtra("SampleMinesEnvironment(Java) by Brian Tanner.");


        String taskSpecString = theTaskSpecObject.toTaskSpec();
        TaskSpec.checkTaskSpec(taskSpecString);


        return taskSpecString;
    }


    /**
     * Put the environment in a random state and return the appropriate observation.
     * @return
     */
    public Observation env_start() {
        if (fixedStartState) {
            boolean stateIsValid = theWorld.setAgentState(startRow, startCol);
            if (!stateIsValid) {
                theWorld.setRandomAgentState();
            }
        } else {
            theWorld.setRandomAgentState();
        }
        Observation theObservation = new Observation(1, 0, 0);
        theObservation.setInt(0, theWorld.getState());
        return theObservation;
    }


    /**
     * Make sure the action is in the appropriate range, update the state,
     * generate the new observation, reward, and whether the episode is over.
     * @param thisAction
     * @return
     */
    public Reward_observation_terminal env_step(Action thisAction) {
        /* Make sure the action is valid */
        assert (thisAction.getNumInts() == 1) : "Expecting a 1-dimensional integer action. " + thisAction.getNumInts() + "D was provided";
        assert (thisAction.getInt(0) >= 0) : "Action should be in [0,4], " + thisAction.getInt(0) + " was provided";
        assert (thisAction.getInt(0) < 4) : "Action should be in [0,4], " + thisAction.getInt(0) + " was provided";


        theWorld.updatePosition(thisAction.getInt(0));




        Observation theObservation = new Observation(1, 0, 0);
        theObservation.setInt(0, theWorld.getState());
        Reward_observation_terminal RewardObs = new Reward_observation_terminal();
        RewardObs.setObservation(theObservation);
        RewardObs.setTerminal(theWorld.isTerminal());
        RewardObs.setReward(theWorld.getReward());


        return RewardObs;
    }


    public void env_cleanup() {
    }


    public String env_message(String message) {
        /*  Message Description
         * 'set-random-start-state'
         * Action: Set flag to do random starting states (the default)
         */
        if (message.startsWith("set-random-start-state")) {
            fixedStartState = false;
            return "Message understood.  Using random start state.";
        }


        /*  Message Description
         * 'set-start-state X Y'
         * Action: Set flag to do fixed starting states (row=X, col=Y)
         */
        if (message.startsWith("set-start-state")) {
            String[] theTokens = message.split(" ");
            startRow = Integer.parseInt(theTokens[1]);
            startCol = Integer.parseInt(theTokens[2]);
            fixedStartState = true;
            return "Message understood.  Using fixed start state.";
        }


        /*  Message Description
    *  'print-state'
    *  Action: Print the map and the current agent location
        */
    if (message.startsWith("print-state")){
      theWorld.print_state();
      return "Message understood.  Printed the state.";
        }


        return "SamplesMinesEnvironment(Java) does not understand your message.";
    }


    /**
     * This is a trick we can use to make the agent easily loadable.
     * @param args
     */
    public static void main(String[] args) {
        EnvironmentLoader theLoader = new EnvironmentLoader(new SampleMinesEnvironment());
        theLoader.run();
    }
}


/**
 * This class holds all of the internal state information about the environment,
 * and manages the dynamics, state update, reward calculation, etc.
 * @author btanner
 */
class WorldDescription {


    private final int numRows;
    private final int numCols;
    public int agentRow;
    public int agentCol;
    private final int[][] theMap;
    private Random randGen = new Random();


    public WorldDescription(int[][] worldMap) {
        this.theMap = worldMap;


        this.numRows = theMap.length;
        this.numCols = theMap[0].length;
    }


    public int getNumStates() {
        return numRows * numCols;
    }


    /**
     * Puts the agent into a random state.  Uses a generate and test method, in
     * a loop, only accepts the state if it is valid and not terminal.
     */
    public void setRandomAgentState() {


        int startRow = randGen.nextInt(numRows);
        int startCol = randGen.nextInt(numCols);


        while (isTerminal(startRow, startCol) || !isValid(startRow, startCol)) {
            startRow = randGen.nextInt(numRows);
            startCol = randGen.nextInt(numCols);
        }


        this.agentRow = startRow;
        this.agentCol = startCol;
    }


    /**
     * Convert the row/col state into a single number.
     * @return
     */
    public int getState() {
        return agentCol * numRows + agentRow;
    }


    /**
     * Sets the agent current state to startRow,startCol.
     * @param startRow
     * @param startCol
     * @return true if the state is valid and not terminal, otherwise
     * return false.
     */
    boolean setAgentState(int startRow, int startCol) {
        this.agentRow = startRow;
        this.agentCol = startCol;


        return isValid(startRow, startCol) && !isTerminal();
    }


    public boolean isTerminal() {
        return isTerminal(agentRow, agentCol);
    }


    private boolean isTerminal(int row, int col) {
        if (theMap[row][col] == SampleMinesEnvironment.WORLD_GOAL || theMap[row][col] == SampleMinesEnvironment.WORLD_MINE) {
            return true;
        }
        return false;
    }


    private boolean isValid(int row, int col) {
        boolean valid = false;
        if (row < numRows && row >= 0 && col < numCols && col >= 0) {
            if (theMap[row][col] != SampleMinesEnvironment.WORLD_OBSTACLE) {
                valid = true;
            }
        }
        return valid;
    }


    /**
     * Calculate the reward for the current agent state.
     * @return
     */
    public double getReward() {
        if (theMap[agentRow][agentCol] == SampleMinesEnvironment.WORLD_GOAL) {
            return 10.0f;
        }


        if (theMap[agentRow][agentCol] == SampleMinesEnvironment.WORLD_MINE) {
            return -100.0f;
        }


        return -1.0f;
    }


    public void updatePosition(int theAction) {
        /* When the move would result in hitting an obstacles, the agent simply doesn't move */
        int newRow = agentRow;
        int newCol = agentCol;




        if (theAction == 0) {/*move down*/
            newCol = agentCol - 1;
        }
        if (theAction == 1) { /*move up*/
            newCol = agentCol + 1;
        }
        if (theAction == 2) {/*move left*/
            newRow = agentRow - 1;
        }
        if (theAction == 3) {/*move right*/
            newRow = agentRow + 1;
        }




        /*Check if new position is out of bounds or inside an obstacle */
        if (isValid(newRow, newCol)) {
            agentRow = newRow;
            agentCol = newCol;
        }
    }


    /**
     * Print out the current state to the screen
     */
    void print_state() {
        System.out.printf("Agent is at: %d,%d\n", agentRow, agentCol);
        System.out.printf("Columns:0-10                10-17\n");
        System.out.printf("Col    ");
        for (int col = 0; col < 18; col++) {
            System.out.printf("%d ", col % 10);
        }


        for (int row = 0; row < 6; row++) {
            System.out.printf("\nRow: %d ", row);


            for (int col = 0; col < 18; col++) {
                if (agentRow == row && agentCol == col) {
                    System.out.printf("A ");
                } else {
                    if (theMap[row][col] == SampleMinesEnvironment.WORLD_GOAL) {
                        System.out.printf("G ");
                    }
                    if (theMap[row][col] == SampleMinesEnvironment.WORLD_MINE) {
                        System.out.printf("M ");
                    }
                    if (theMap[row][col] == SampleMinesEnvironment.WORLD_OBSTACLE) {
                        System.out.printf("* ");
                    }
                    if (theMap[row][col] == SampleMinesEnvironment.WORLD_FREE) {
                        System.out.printf("  ");
                    }
                }
            }
        }
        System.out.printf("\n");
    }
}
Source Code of WorldDescription

Related Classes of WorldDescription