Package org.rlcommunity.rlglue.codec.taskspec

Examples of org.rlcommunity.rlglue.codec.taskspec.TaskSpec


    Random randGenerator = new Random();
    Action lastAction;
    Observation lastObservation;

    public void agent_init(String taskSpecification) {
    TaskSpec theTaskSpec=new TaskSpec(taskSpecification);
    System.out.println("Skeleton agent parsed the task spec.");
    System.out.println("Observation have "+theTaskSpec.getNumDiscreteObsDims()+" integer dimensions");
    System.out.println("Actions have "+theTaskSpec.getNumDiscreteActionDims()+" integer dimensions");
    IntRange theObsRange=theTaskSpec.getDiscreteObservationRange(0);
    System.out.println("Observation (state) range is: "+theObsRange.getMin()+" to "+theObsRange.getMax());
    IntRange theActRange=theTaskSpec.getDiscreteActionRange(0);
    System.out.println("Action range is: "+theActRange.getMin()+" to "+theActRange.getMax());
    DoubleRange theRewardRange=theTaskSpec.getRewardRange();
    System.out.println("Reward range is: "+theRewardRange.getMin()+" to "+theRewardRange.getMax());
   
    //In more complex agents, you would also check for continuous observations and actions, discount factors, etc.
    //Also, these ranges can have special values like "NEGINF, POSINF, UNSPEC (unspecified)".  There is no guarantee
    //that they are all specified and that they are all nice numbers.
View Full Code Here


  * Called when starting a new MDP.
  * @param taskSpec  the Task Specification Object of the current MDP.
  */

  public void agent_init(String taskSpec) {
    TSO = new TaskSpec(taskSpec);
    firstActionOfEpisode = true;

    action = new Action(TSO.getNumDiscreteActionDims(),TSO.getNumDiscreteActionDims())
    totalRew = 0;
    totalSteps = 0;
View Full Code Here

     * action, and then allocate the valueFunction.
     *
     * @param taskSpecification
     */
    public void agent_init(String taskSpecification) {
        TaskSpec theTaskSpec = new TaskSpec(taskSpecification);

        /* Lots of assertions to make sure that we can handle this problem.  */
        assert (theTaskSpec.getNumDiscreteObsDims() == 1);
        assert (theTaskSpec.getNumContinuousObsDims() == 0);
        assert (!theTaskSpec.getDiscreteObservationRange(0).hasSpecialMinStatus());
        assert (!theTaskSpec.getDiscreteObservationRange(0).hasSpecialMaxStatus());
        numStates = theTaskSpec.getDiscreteObservationRange(0).getMax() + 1;

        assert (theTaskSpec.getNumDiscreteActionDims() == 1);
        assert (theTaskSpec.getNumContinuousActionDims() == 0);
        assert (!theTaskSpec.getDiscreteActionRange(0).hasSpecialMinStatus());
        assert (!theTaskSpec.getDiscreteActionRange(0).hasSpecialMaxStatus());
        numActions = theTaskSpec.getDiscreteActionRange(0).getMax() + 1;

        sarsa_gamma=theTaskSpec.getDiscountFactor();

        valueFunction = new double[numActions][numStates];

    }
View Full Code Here

TOP

Related Classes of org.rlcommunity.rlglue.codec.taskspec.TaskSpec

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.