updateObservers(firstAction);
} else {
Reward_observation_action_terminal ROAT = new Reward_observation_action_terminal();
Reward_observation_terminal ROT = RLGlue.RL_env_step(lastAction);
ROAT.o = ROT.getObservation();
ROAT.r = ROT.getReward();
boolean isTerminal=ROT.isTerminal();
ROAT.terminal=0;
if(isTerminal){
ROAT.terminal=1;
}
synchronized (this) {
totalSteps++;
timeStep++;
lastObservation = ROAT.getObservation();
lastReward = ROAT.getReward();
returnThisEpisode += lastReward;
totalReturn += lastReward;
}
updateObservers(ROT);
if (ROT.isTerminal()) {
RLGlue.RL_agent_end(ROT.getReward());
} else {
ROAT.a = RLGlue.RL_agent_step(ROT.getReward(), ROT.getObservation());
}
synchronized (this) {
if (!ROAT.isTerminal()) {
lastAction = ROAT.getAction();