if (state.isTerminal())
{
for (String role : roles)
{
double q = state.getReward(role);
RAction action = from_actions.get(role);
table.addQValue(getState(parent), role, getAction(action), q);
}
}
else
{
// Browse all the children of the current state
// If a child is not already seen, we do a recursive call on it.
List<Map<String, RAction>> all_actions = getAllPossibleActions(state,roles);
tracker.addLayer(all_actions.size());
for (Map<String, RAction> actions : all_actions)
{
GDLState child = (GDLState)state.next(actions.values());
if (!SerializableState.already_seen(child))
{
fillQTable(table, child, roles, state, actions, tracker);
}
tracker.addProgress();
}
// At this point, the Q table is up to date for all the children
// So we can estimate the Q value of the current state
// by using a minimax-like algorithm
RoleQMap min_q = new RoleQMap(roles);
for (Map<String, RAction> actions : all_actions)
{
GDLState child = (GDLState)state.next(actions.values());
for (String role : roles)
{
// Get the max q from child actions
double max_q = getMaxQ(table, child, role);
// Update the min of the maxes
RAction action = actions.get(role);
Double q = min_q.get(role, action);
if (q == null || max_q < q)
{
min_q.put(role, action, max_q);
}