// If a child is not already seen, we do a recursive call on it.
List<Map<String, RAction>> all_actions = getAllPossibleActions(state,roles);
tracker.addLayer(all_actions.size());
for (Map<String, RAction> actions : all_actions)
{
GDLState child = (GDLState)state.next(actions.values());
if (!SerializableState.already_seen(child))
{
fillQTable(table, child, roles, state, actions, tracker);
}
tracker.addProgress();
}
// At this point, the Q table is up to date for all the children
// So we can estimate the Q value of the current state
// by using a minimax-like algorithm
RoleQMap min_q = new RoleQMap(roles);
for (Map<String, RAction> actions : all_actions)
{
GDLState child = (GDLState)state.next(actions.values());
for (String role : roles)
{
// Get the max q from child actions
double max_q = getMaxQ(table, child, role);