Examples of Reward_observation_action_terminal

org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal
Composite type for holding reward, observation, action, and terminal. We're trying to move towards not directly accessing the underlying members, which is why we have the getters and setters. @author btanner

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

        if (RO.isTerminal()) {
            RL_agent_end(RO.getReward());
        } else {
           lastAction = RL_agent_step(RO.getReward(), RO.getObservation());
        }
        return new Reward_observation_action_terminal(RO.getReward(), RO.getObservation(), lastAction, RO.terminal);
    }

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

    }


//Btanner: Jan 13 : Changing this to make it more like RL_glue.c
//Btanner: Sept 19 2008 : Re-ported directly from RL_glue.c
    public synchronized int RL_episode(int maxStepsThisEpisode) {
        Reward_observation_action_terminal rlStepResult = new Reward_observation_action_terminal(0, null, null, 0);
        int currentStep = 0;
        RL_start();
        /* RL_start sets current step to 1, so we should start x at 1 */
        for (currentStep = 1; rlStepResult.terminal != 1 && (maxStepsThisEpisode == 0 ? true : currentStep < maxStepsThisEpisode); currentStep++) {
            rlStepResult = RL_step();

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

                lastAction = firstAction;
            }
            updateObservers(firstAction);


        } else {
            Reward_observation_action_terminal ROAT = new Reward_observation_action_terminal();


            Reward_observation_terminal ROT = RLGlue.RL_env_step(lastAction);


            ROAT.o = ROT.getObservation();
            ROAT.r = ROT.getReward();
            boolean isTerminal=ROT.isTerminal();
            ROAT.terminal=0;
            if(isTerminal){
                ROAT.terminal=1;
            }






            synchronized (this) {
                totalSteps++;
                timeStep++;
                lastObservation = ROAT.getObservation();
                lastReward = ROAT.getReward();


                returnThisEpisode += lastReward;
                totalReturn += lastReward;
            }


            updateObservers(ROT);


            if (ROT.isTerminal()) {
                RLGlue.RL_agent_end(ROT.getReward());
            } else {
                ROAT.a = RLGlue.RL_agent_step(ROT.getReward(), ROT.getObservation());
            }


            synchronized (this) {
                if (!ROAT.isTerminal()) {
                    lastAction = ROAT.getAction();
                }
            }
            updateObservers(ROAT);
        }
        return RLGlue.isCurrentEpisodeOver();

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

            episodeNumber++;
            timeStep = 1;
            lastReward = Double.NaN;
        }
        if (theEvent instanceof Reward_observation_action_terminal) {
            Reward_observation_action_terminal ROAT = (Reward_observation_action_terminal) theEvent;
            lastReward = ROAT.r;
            timeStep++;
            totalSteps++;
        }
        if (theChangeListener != null) {

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

        int firstObservation = startResponse.o.intArray[0];
        int firstAction = startResponse.a.intArray[0];
        System.out.println("First observation and action were: " + firstObservation + " and: " + firstAction);


        /*Run one step */
        Reward_observation_action_terminal stepResponse = RLGlue.RL_step();


        /*Run until the episode ends*/
        while (stepResponse.terminal != 1) {
            stepResponse = RLGlue.RL_step();
            if (stepResponse.terminal != 1) {

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

    public static Reward_observation_action_terminal RL_step() {
        checkInstance();
        if (!inited) {
            System.err.println("-- Warning From RLGlue :: RL_step() was called without RL_init().");
        }
        Reward_observation_action_terminal stepResponse = instance.RL_step();
        if (stepResponse == null) {
            System.err.println("-- Warning From RLGlue :: RL_step() response was NULL, that should be impossible.");
            stepResponse = new Reward_observation_action_terminal();
        }


        currentEpisodeOver = (stepResponse.terminal == 1);
        return stepResponse;
    }

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

 */
public class Test_1_Experiment {


    public static int runTest() {
        Glue_Test tester = new Glue_Test("Test_1_Experiment");
        Reward_observation_action_terminal roat;


        String task_spec = RLGlue.RL_init();


        RLGlue.RL_start();


        roat = RLGlue.RL_step();






        tester.check_fail(roat.getObservation().getNumInts() != 1);
        tester.check_fail(roat.getObservation().getNumDoubles() != 0);
        tester.check_fail(roat.getObservation().getNumChars() != 0);
        tester.check_fail(roat.getObservation().intArray[0] != 0);
        tester.check_fail(!"one|1.|one".equals(RLGlue.RL_env_message("one")));
        tester.check_fail(!"one|1.|one".equals(RLGlue.RL_agent_message("one")));


        tester.check_fail(roat.terminal != 0);




        roat = RLGlue.RL_step();


        tester.check_fail(!"two|2.2.|two".equals(RLGlue.RL_env_message("two")));
        tester.check_fail(!"two|2.2.|two".equals(RLGlue.RL_agent_message("two")));
        tester.check_fail(roat.terminal != 0);
        tester.check_fail(roat.getObservation().getNumInts() != 1);
        tester.check_fail(roat.getObservation().getNumDoubles() != 0);
        tester.check_fail(roat.getObservation().getNumChars() != 0);
        tester.check_fail(roat.getObservation().intArray[0] != 1);


        roat = RLGlue.RL_step();


        tester.check_fail(!"three||three".equals(RLGlue.RL_env_message("three")));
        tester.check_fail(!"three||three".equals(RLGlue.RL_agent_message("three")));
        tester.check_fail(roat.terminal != 0);
        tester.check_fail(roat.getObservation().getNumInts() != 1);
        tester.check_fail(roat.getObservation().getNumDoubles() != 0);
        tester.check_fail(roat.getObservation().getNumChars() != 0);
        tester.check_fail(roat.getObservation().intArray[0] != 2);


        roat = RLGlue.RL_step();
        tester.check_fail(!"four|4.|four".equals(RLGlue.RL_env_message("four")));
        tester.check_fail(!"four|4.|four".equals(RLGlue.RL_agent_message("four")));
        tester.check_fail(roat.terminal != 0);
        tester.check_fail(roat.getObservation().getNumInts() != 1);
        tester.check_fail(roat.getObservation().getNumDoubles() != 0);
        tester.check_fail(roat.getObservation().getNumChars() != 0);
        tester.check_fail(roat.getObservation().intArray[0] != 3);




        roat = RLGlue.RL_step();
        tester.check_fail(!"five|5.5.|five".equals(RLGlue.RL_env_message("five")));
        tester.check_fail(!"five|4.|five".equals(RLGlue.RL_agent_message("five")));
        tester.check_fail(roat.terminal == 0);




        /* Gabor has made it so this environment will step past terminal.  This is
        not something we want to do in general at all.


        But, in order to keep the other tests all working, I'll allow it*/


        roat = RLGlue.RL_step();
        tester.check_fail(roat.getObservation().getNumInts() != 5);
        tester.check_fail(roat.getObservation().getNumDoubles() != 5);
        tester.check_fail(roat.getObservation().getNumChars() != 5);
        tester.check_fail(roat.getObservation().getInt(0) != 173);
        tester.check_fail(roat.getObservation().getInt(1) != -173);
        tester.check_fail(roat.getObservation().getInt(2) != 2147483647);
        tester.check_fail(roat.getObservation().getInt(3) != 0);
        tester.check_fail(roat.getObservation().getInt(4) != -2147483648);


        tester.check_fail(roat.getObservation().getDouble(0) != 0.0078125);
        tester.check_fail(roat.getObservation().getDouble(1) != -0.0078125);
        tester.check_fail(roat.getObservation().getDouble(2) != 0);
        tester.check_fail(roat.getObservation().getDouble(3) != 0.0078125e150);
        tester.check_fail(roat.getObservation().getDouble(4) != -0.0078125e150);
        tester.check_fail(roat.getObservation().getChar(0) != 'g');
        tester.check_fail(roat.getObservation().getChar(1) != 'F');
        tester.check_fail(roat.getObservation().getChar(2) != '?');
        tester.check_fail(roat.getObservation().getChar(3) != ' ');
        tester.check_fail(roat.getObservation().getChar(4) != '&');
        System.out.println(tester);


        return tester.getFailCount();
    }

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

      tester.check_fail(startTuple.getObservation().getNumDoubles()!=4);
      tester.check_fail(startTuple.getObservation().getNumChars()!=5);
    }
    
    for(int whichStep=0;whichStep<5;whichStep++){
      Reward_observation_action_terminal stepTuple=RLGlue.RL_step();
      tester.check_fail(stepTuple.terminal!=0);
      tester.check_fail(stepTuple.r!=0);


      if(whichEpisode%2==0){
        tester.check_fail(stepTuple.getAction().getNumInts()!=0);
        tester.check_fail(stepTuple.getAction().getNumDoubles()!=0);
        tester.check_fail(stepTuple.getAction().getNumChars()!=0);


        tester.check_fail(stepTuple.getObservation().getNumInts()!=0);
        tester.check_fail(stepTuple.getObservation().getNumDoubles()!=0);
        tester.check_fail(stepTuple.getObservation().getNumChars()!=0);
      }else{
        tester.check_fail(stepTuple.getAction().getNumInts()!=7);
        tester.check_fail(stepTuple.getAction().getNumDoubles()!=3);
        tester.check_fail(stepTuple.getAction().getNumChars()!=1);


        tester.check_fail(stepTuple.getObservation().getNumInts()!=2);
        tester.check_fail(stepTuple.getObservation().getNumDoubles()!=4);
        tester.check_fail(stepTuple.getObservation().getNumChars()!=5);
      }
      
    }
  }

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

        if (RO.isTerminal()) {
            RL_agent_end(RO.getReward());
        } else {
           lastAction = RL_agent_step(RO.getReward(), RO.getObservation());
        }
        return new Reward_observation_action_terminal(RO.getReward(), RO.getObservation(), lastAction, RO.isTerminal());
    }

View Full Code Here

Examples of org.rlcommunity.rlglue.codec.types.Reward_observation_action_terminal

    }


//Btanner: Jan 13 : Changing this to make it more like RL_glue.c
//Btanner: Sept 19 2008 : Re-ported directly from RL_glue.c
    public synchronized int RL_episode(int maxStepsThisEpisode) {
        Reward_observation_action_terminal rlStepResult = new Reward_observation_action_terminal(0, null, null, 0);
        int currentStep = 0;
        RL_start();
        /* RL_start sets current step to 1, so we should start x at 1 */
        for (currentStep = 1; rlStepResult.terminal != 1 && (maxStepsThisEpisode == 0 ? true : currentStep < maxStepsThisEpisode); currentStep++) {
            rlStepResult = RL_step();

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.