package burlap.behavior.stochasticgame.agents.naiveq.history;

import burlap.behavior.singleagent.QValue;
import burlap.behavior.statehashing.DiscreteStateHashFactory;
import burlap.behavior.statehashing.StateHashFactory;
import burlap.behavior.statehashing.StateHashTuple;
import burlap.behavior.stochasticgame.agents.naiveq.SGNaiveQLAgent;
import burlap.oomdp.core.Attribute;
import burlap.oomdp.core.ObjectClass;
import burlap.oomdp.core.ObjectInstance;
import burlap.oomdp.core.State;
import burlap.oomdp.stochasticgames.Agent;
import burlap.oomdp.stochasticgames.GroundedSingleAction;
import burlap.oomdp.stochasticgames.JointAction;
import burlap.oomdp.stochasticgames.SGDomain;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:burlap/behavior/stochasticgame/agents/naiveq/history/SGQWActionHistory.class */
public class SGQWActionHistory extends SGNaiveQLAgent {
    protected LinkedList<JointAction> history;
    protected int historySize;
    protected ActionIdMap actionMap;
    protected ObjectClass classHistory;
    public static final String ATTHNUM = "histNum";
    public static final String ATTHPN = "histPN";
    public static final String ATTHAID = "histAID";
    public static String CLASSHISTORY = ATTHAID;

    public SGQWActionHistory(SGDomain sGDomain, double d, double d2, StateHashFactory stateHashFactory, int i, int i2, ActionIdMap actionIdMap) {
        super(sGDomain, d, d2, stateHashFactory);
        this.actionMap = null;
        this.historySize = i;
        this.actionMap = actionIdMap;
        initializeHistoryAugmentedDomain(i2);
    }

    public SGQWActionHistory(SGDomain sGDomain, double d, double d2, StateHashFactory stateHashFactory, int i) {
        super(sGDomain, d, d2, stateHashFactory);
        this.actionMap = null;
        this.historySize = i;
    }

    protected void initializeHistoryAugmentedDomain(int i) {
        SGDomain sGDomain = new SGDomain();
        Attribute attribute = new Attribute(sGDomain, ATTHNUM, Attribute.AttributeType.DISC);
        attribute.setDiscValuesForRange(0, this.historySize - 1, 1);
        Attribute attribute2 = new Attribute(sGDomain, ATTHPN, Attribute.AttributeType.DISC);
        attribute2.setDiscValuesForRange(0, i - 1, 1);
        Attribute attribute3 = new Attribute(sGDomain, ATTHAID, Attribute.AttributeType.DISC);
        attribute3.setDiscValuesForRange(0, this.actionMap.maxValue(), 1);
        this.classHistory = new ObjectClass(sGDomain, CLASSHISTORY);
        this.classHistory.addAttribute(attribute);
        this.classHistory.addAttribute(attribute2);
        this.classHistory.addAttribute(attribute3);
        ArrayList arrayList = new ArrayList();
        arrayList.add(attribute);
        arrayList.add(attribute2);
        arrayList.add(attribute3);
        if (this.hashFactory instanceof DiscreteStateHashFactory) {
            ((DiscreteStateHashFactory) this.hashFactory).setAttributesForClass(CLASSHISTORY, arrayList);
        }
    }

    @Override // burlap.behavior.stochasticgame.agents.naiveq.SGNaiveQLAgent, burlap.oomdp.stochasticgames.Agent
    public void gameStarting() {
        this.history = new LinkedList<>();
        if (this.actionMap == null) {
            initializeActionMapAndAugmentedDomain();
        }
    }

    protected void initializeActionMapAndAugmentedDomain() {
        this.actionMap = new ParameterNaiveActionIdMap(this.domain);
        initializeHistoryAugmentedDomain(this.world.getRegisteredAgents().size());
    }

    @Override // burlap.behavior.stochasticgame.agents.naiveq.SGNaiveQLAgent, burlap.oomdp.stochasticgames.Agent
    public void observeOutcome(State state, JointAction jointAction, Map<String, Double> map, State state2, boolean z) {
        GroundedSingleAction action = jointAction.action(this.worldAgentName);
        QValue q = getQ(state, action);
        State historyAugmentedState = getHistoryAugmentedState(state);
        if (this.history.size() == this.historySize) {
            this.history.removeLast();
        }
        this.history.addFirst(jointAction);
        State historyAugmentedState2 = getHistoryAugmentedState(state2);
        if (this.internalRewardFunction != null) {
            map = this.internalRewardFunction.reward(historyAugmentedState, jointAction, historyAugmentedState2);
        }
        double doubleValue = map.get(this.worldAgentName).doubleValue();
        double d = 0.0d;
        if (!z) {
            d = getMaxQValue(state2);
        }
        q.q += this.learningRate.pollLearningRate(this.totalNumberOfSteps, state, action) * ((doubleValue + (this.discount * d)) - q.q);
        this.totalNumberOfSteps++;
    }

    protected State getHistoryAugmentedState(State state) {
        State copy = state.copy();
        int i = 0;
        Iterator<JointAction> it = this.history.iterator();
        while (it.hasNext()) {
            Iterator<GroundedSingleAction> it2 = it.next().iterator();
            while (it2.hasNext()) {
                copy.addObject(getHistoryObjectInstanceForAgent(it2.next(), i));
            }
            i++;
        }
        if (i < this.historySize) {
            List<Agent> registeredAgents = this.world.getRegisteredAgents();
            while (i < this.historySize) {
                Iterator<Agent> it3 = registeredAgents.iterator();
                while (it3.hasNext()) {
                    copy.addObject(getHistoryLessObjectInstanceForAgent(it3.next().getAgentName(), i));
                }
                i++;
            }
        }
        return copy;
    }

    protected ObjectInstance getHistoryObjectInstanceForAgent(GroundedSingleAction groundedSingleAction, int i) {
        String str = groundedSingleAction.actingAgent;
        ObjectInstance objectInstance = new ObjectInstance(this.classHistory, str + "-h" + i);
        objectInstance.setValue(ATTHNUM, i);
        objectInstance.setValue(ATTHPN, this.world.getPlayerNumberForAgent(str));
        objectInstance.setValue(ATTHAID, this.actionMap.getActionId(groundedSingleAction));
        return objectInstance;
    }

    protected ObjectInstance getHistoryLessObjectInstanceForAgent(String str, int i) {
        ObjectInstance objectInstance = new ObjectInstance(this.classHistory, str + "-h" + i);
        objectInstance.setValue(ATTHNUM, i);
        objectInstance.setValue(ATTHPN, this.world.getPlayerNumberForAgent(str));
        objectInstance.setValue(ATTHAID, this.actionMap.maxValue());
        return objectInstance;
    }

    @Override // burlap.behavior.stochasticgame.agents.naiveq.SGNaiveQLAgent
    protected StateHashTuple stateHash(State state) {
        return this.hashFactory.hashState(getHistoryAugmentedState(this.storedMapAbstraction.abstraction(state)));
    }
}
