Package

Source Code of SkeletonEnvironment

/*
* Copyright 2008 Brian Tanner
* http://rl-glue-ext.googlecode.com/
* brian@tannerpages.com
* http://brian.tannerpages.com
*
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*
*  $Revision: 676 $
*  $Date: 2009-02-08 20:15:04 -0500 (Sun, 08 Feb 2009) $
*  $Author: brian@tannerpages.com $
*  $HeadURL: http://rl-glue-ext.googlecode.com/svn/trunk/projects/codecs/Java/examples/skeleton-sample/SkeletonEnvironment.java $
*
*/

import org.rlcommunity.rlglue.codec.EnvironmentInterface;
import org.rlcommunity.rlglue.codec.types.Action;
import org.rlcommunity.rlglue.codec.types.Observation;
import org.rlcommunity.rlglue.codec.types.Reward_observation_terminal;
import org.rlcommunity.rlglue.codec.util.EnvironmentLoader;
import org.rlcommunity.rlglue.codec.taskspec.TaskSpecVRLGLUE3;
import org.rlcommunity.rlglue.codec.taskspec.TaskSpec;
import org.rlcommunity.rlglue.codec.taskspec.ranges.IntRange;
import org.rlcommunity.rlglue.codec.taskspec.ranges.DoubleRange;

/**
*  This is a very simple environment with discrete observations corresponding to states labeled {0,1,...,19,20}
    The starting state is 10.

    There are 2 actions = {0,1}.  0 decrements the state, 1 increments the state.

    The problem is episodic, ending when state 0 or 20 is reached, giving reward -1 or +1, respectively.  The reward is 0 on
    all other steps.
* @author Brian Tanner
*/
public class SkeletonEnvironment implements EnvironmentInterface {
    private int currentState=10;
   
    public String env_init() {
 
  //Create a task spec programatically.  This task spec encodes that state, action, and reward space for the problem.
  //You could forgo the task spec if your agent and environment have been created specifically to work with each other
  //ie, there is no need to share this information at run time.  You could also use your own ad-hoc task specification language,
  //or use the official one but just hard code the string instead of constructing it this way.
      TaskSpecVRLGLUE3 theTaskSpecObject = new TaskSpecVRLGLUE3();
        theTaskSpecObject.setEpisodic();
        theTaskSpecObject.setDiscountFactor(1.0d);
  //Specify that there will be an integer observation [0,20] for the state
        theTaskSpecObject.addDiscreteObservation(new IntRange(0, 20));
  //Specify that there will be an integer action [0,1]
        theTaskSpecObject.addDiscreteAction(new IntRange(0, 1));
  //Specify the reward range [-1,1]
        theTaskSpecObject.setRewardRange(new DoubleRange(-1, 1));

        String taskSpecString = theTaskSpecObject.toTaskSpec();
        TaskSpec.checkTaskSpec(taskSpecString);

    //This actual string this makes is:
    //VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1.0 OBSERVATIONS INTS (1 0 20)  ACTIONS INTS (1 0 1)  REWARDS (1 -1.0 1.0)  EXTRA
   
    //This could be simplified a bit if you made it manually to
    //VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1.0 OBSERVATIONS INTS (0 20)  ACTIONS INTS (0 1)  REWARDS (-1.0 1.0)  EXTRA
    return taskSpecString;
    }

    public Observation env_start() {
        currentState=10;
       
        Observation returnObservation=new Observation(1,0,0);
        returnObservation.intArray[0]=currentState;
        return returnObservation;
    }

    public Reward_observation_terminal env_step(Action thisAction) {
        boolean episodeOver=false;
        double theReward=0.0d;
       
        if(thisAction.intArray[0]==0)
            currentState--;
        if(thisAction.intArray[0]==1)
            currentState++;
       
        if(currentState<=0){
            currentState=0;
            theReward=-1.0d;
            episodeOver=true;
        }
       
        if(currentState>=20){
            currentState=20;
            episodeOver=true;
            theReward=1.0d;
        }
        Observation returnObservation=new Observation(1,0,0);
        returnObservation.intArray[0]=currentState;
       
        Reward_observation_terminal returnRewardObs=new Reward_observation_terminal(theReward,returnObservation,episodeOver);
        return returnRewardObs;
    }

    public void env_cleanup() {
    }

    public String env_message(String message) {
        if(message.equals("what is your name?"))
            return "my name is skeleton_environment, Java edition!";

  return "I don't know how to respond to your message";
    }
   
   /**
     * This is a trick we can use to make the agent easily loadable.
     * @param args
     */
    public static void main(String[] args){
        EnvironmentLoader theLoader=new EnvironmentLoader(new SkeletonEnvironment());
        theLoader.run();
    }


}
TOP

Related Classes of SkeletonEnvironment

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.