Examples of fr.irit.halterego.rrl_ggp.base.RAction

fr.irit.halterego.rrl_ggp.base.RAction

        if(legalActions.isEmpty())
        {
            System.out.println("no legal action for me in this state : " + state);
        }
        
        RAction action = algorithm.decideStraight(state, legalActions);
        
        last_state = state;
        last_action = action;
        
        return createRLGlueAction(action);

View Full Code Here

    {
        RState current_state = state;
        while(!current_state.isTerminal())
        {
            List<RAction> chose_actions = new ArrayList<RAction>();
            RAction my_action = null;
            
            for(Agent agent : agents)
            {
                RAction action = decideExplore(current_state, agent.legalActions(current_state));
                chose_actions.add(action);
                
                if(agent.equals(learner))
                {
                    my_action = action;

View Full Code Here

        while(!current_state.isTerminal())
        {
            Map<Agent,RAction> actions = new HashMap<Agent,RAction>();
            for(Agent role : episodes.keySet())
            {
                RAction action = decideExplore(current_state, role.legalActions(current_state));
                actions.put(role,action);
            }
            
            RState start = current_state;
            current_state = current_state.next(actions.values());

View Full Code Here

        // let l the lower bound and u the upper bound of the probability
        // field for each action. if the random number ([0 1]) is in this
        // field, we choose this action.
        double r = Math.random();
        double u = 0;
        RAction choice = null;
        for(RAction action : weights.keySet())
        {
            double l = u;
            u += weights.get(action);
            choice = action;

View Full Code Here

    ////////////////////////////////////////////////////////////////////////////
    
    private RAction decideMax(RState state, Set<RAction> legalActions)
    {
        double best_q = 0;
        RAction best_action = legalActions.iterator().next();
        for(RAction action : legalActions)
        {
            double q = tree.getQ(state, action, false);
            if(q > best_q)
            {

View Full Code Here

        // let l the lower bound and u the upper bound of the probability
        // field for each action. if the random number ([0 1]) is in this
        // field, we choose this action.
        double r = Math.random();
        double u = 0;
        RAction choice = null;
        for(RAction action : weights.keySet())
        {
            double l = u;
            u += weights.get(action);
            choice = action;

View Full Code Here

        return choice;
    }
    
    private Double getBestQ(RState state, Set<RAction> legalActions, boolean straight)
    {
        RAction first = legalActions.iterator().next();
        Double best_q = tree.getQ(state, first,straight);
        for(RAction action : legalActions)
        {
            Double q = tree.getQ(state, action,straight);
            if(best_q == null || q != null && q > best_q)

View Full Code Here

        if (state.isTerminal())
        {
            for (String role : roles)
            {
                double q = state.getReward(role);
                RAction action = from_actions.get(role);
                table.addQValue(getState(parent), role, getAction(action), q);
            }
        }
        else
        {
            // Browse all the children of the current state
            // If a child is not already seen, we do a recursive call on it.
            List<Map<String, RAction>> all_actions = getAllPossibleActions(state,roles);
            tracker.addLayer(all_actions.size());
            for (Map<String, RAction> actions : all_actions)
            {
                GDLState child = (GDLState)state.next(actions.values());
                if (!SerializableState.already_seen(child))
                {
                    fillQTable(table, child, roles, state, actions, tracker);
                }
                tracker.addProgress();
            }
            
            // At this point, the Q table is up to date for all the children
            // So we can estimate the Q value of the current state
            // by using a minimax-like algorithm
            RoleQMap min_q = new RoleQMap(roles);
            for (Map<String, RAction> actions : all_actions)
            {
                GDLState child = (GDLState)state.next(actions.values());
                
                for (String role : roles)
                {
                    // Get the max q from child actions
                    double max_q = getMaxQ(table, child, role);
                    
                    // Update the min of the maxes
                    RAction action = actions.get(role);
                    Double q = min_q.get(role, action);
                    if (q == null || max_q < q)
                    {
                        min_q.put(role, action, max_q);
                    }

View Full Code Here

        
        Action vanaction = new Action();
        State vanstate = new State();
        vanstate.discreteState = id(state);
        vanstate.discrete = true;
        RAction action;
        boolean legal = false;
        do
        {
            algo.getMaxActionFirst(vanstate, vanaction);
            fr.irit.halterego.rrl_ggp.serialization.SerializableAction fact = moves.get(vanaction.discreteAction);

View Full Code Here

TOP

Related Classes of fr.irit.halterego.rrl_ggp.base.RAction

fr.irit.halterego.ggp_agent.AbstractRRLAlgorithm

fr.irit.halterego.ggp_agent.algorithms.TabularLearning

fr.irit.halterego.ggp_agent.algorithms.TGAlgorithm

fr.irit.halterego.ggp_agent.GGPAgent

fr.irit.halterego.rrl_ggp.cmd.BuildQTable

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.