policy.setAction(new CellWorldPosition(3, 1), CellWorld.RIGHT);
policy.setAction(new CellWorldPosition(3, 2), CellWorld.RIGHT);
policy.setAction(new CellWorldPosition(3, 3), CellWorld.RIGHT);
PassiveTDAgent<CellWorldPosition, String> agent = new PassiveTDAgent<CellWorldPosition, String>(
fourByThree, policy);
Randomizer r = new JavaRandomizer();
System.out
.println("Deriving Utility Function in the Passive ADP Agent From 200 trials in the 4 by 3 world");
MDPUtilityFunction<CellWorldPosition> uf = null;
for (int i = 0; i < 200; i++) {
agent.executeTrial(r);