package burlap.behavior.stochasticgame.mavaluefunction;

import burlap.behavior.singleagent.ValueFunctionInitialization;
import burlap.behavior.statehashing.StateHashFactory;
import burlap.behavior.statehashing.StateHashTuple;
import burlap.behavior.stochasticgame.mavaluefunction.AgentQSourceMap;
import burlap.oomdp.core.State;
import burlap.oomdp.core.TerminalFunction;
import burlap.oomdp.core.TransitionProbability;
import burlap.oomdp.stochasticgames.AgentType;
import burlap.oomdp.stochasticgames.JointAction;
import burlap.oomdp.stochasticgames.JointActionModel;
import burlap.oomdp.stochasticgames.JointReward;
import burlap.oomdp.stochasticgames.SGDomain;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:burlap/behavior/stochasticgame/mavaluefunction/MAValueFunctionPlanner.class */
public abstract class MAValueFunctionPlanner implements MultiAgentQSourceProvider {
    protected SGDomain domain;
    protected Map<String, AgentType> agentDefinitions;
    protected JointActionModel jointActionModel;
    protected JointReward jointReward;
    protected TerminalFunction terminalFunction;
    protected double discount;
    protected StateHashFactory hashingFactory;
    protected ValueFunctionInitialization vInit;
    protected SGBackupOperator backupOperator;
    protected AgentQSourceMap.HashMapAgentQSourceMap qSources;
    protected boolean planningStarted = false;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:burlap/behavior/stochasticgame/mavaluefunction/MAValueFunctionPlanner$BackupBasedQSource.class */
    public class BackupBasedQSource implements QSourceForSingleAgent {
        protected String agentName;
        protected Map<StateHashTuple, Double> valueFunction = new HashMap();

        public BackupBasedQSource(String str) {
            this.agentName = str;
        }

        @Override // burlap.behavior.stochasticgame.mavaluefunction.QSourceForSingleAgent
        public JAQValue getQValueFor(State state, JointAction jointAction) {
            JointActionTransitions jointActionTransitions = new JointActionTransitions(state, jointAction);
            double d = 0.0d;
            if (!MAValueFunctionPlanner.this.terminalFunction.isTerminal(state)) {
                for (int i = 0; i < jointActionTransitions.tps.size(); i++) {
                    TransitionProbability transitionProbability = jointActionTransitions.tps.get(i);
                    d += transitionProbability.p * (jointActionTransitions.jrs.get(i).get(this.agentName).doubleValue() + (MAValueFunctionPlanner.this.discount * getValue(MAValueFunctionPlanner.this.hashingFactory.hashState(transitionProbability.s))));
                }
            }
            return new JAQValue(state, jointAction, d);
        }

        public double getValue(StateHashTuple stateHashTuple) {
            Double d = this.valueFunction.get(stateHashTuple);
            if (d != null) {
                return d.doubleValue();
            }
            double d2 = 0.0d;
            if (!MAValueFunctionPlanner.this.terminalFunction.isTerminal(stateHashTuple.s)) {
                d2 = MAValueFunctionPlanner.this.vInit.value(stateHashTuple.s);
            }
            this.valueFunction.put(stateHashTuple, Double.valueOf(d2));
            return d2;
        }

        public void setValue(StateHashTuple stateHashTuple, double d) {
            this.valueFunction.put(stateHashTuple, Double.valueOf(d));
        }
    }

    /* loaded from: input_file:burlap/behavior/stochasticgame/mavaluefunction/MAValueFunctionPlanner$JointActionTransitions.class */
    public class JointActionTransitions {
        public JointAction ja;
        public List<TransitionProbability> tps;
        public List<Map<String, Double>> jrs;

        public JointActionTransitions(State state, JointAction jointAction) {
            this.ja = jointAction;
            this.tps = MAValueFunctionPlanner.this.jointActionModel.transitionProbsFor(state, jointAction);
            this.jrs = new ArrayList(this.tps.size());
            Iterator<TransitionProbability> it = this.tps.iterator();
            while (it.hasNext()) {
                this.jrs.add(MAValueFunctionPlanner.this.jointReward.reward(state, jointAction, it.next().s));
            }
        }
    }

    public void initMAVF(SGDomain sGDomain, Map<String, AgentType> map, JointActionModel jointActionModel, JointReward jointReward, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, ValueFunctionInitialization valueFunctionInitialization, SGBackupOperator sGBackupOperator) {
        this.domain = sGDomain;
        this.jointActionModel = jointActionModel;
        this.jointReward = jointReward;
        this.terminalFunction = terminalFunction;
        this.discount = d;
        this.hashingFactory = stateHashFactory;
        this.vInit = valueFunctionInitialization;
        this.backupOperator = sGBackupOperator;
        setAgentDefinitions(map);
    }

    public boolean hasStartedPlanning() {
        return this.planningStarted;
    }

    public void setAgentDefinitions(Map<String, AgentType> map) {
        if (this.planningStarted) {
            throw new RuntimeException("Cannot reset the agent definitions after planning has already started.");
        }
        if (map == null || this.agentDefinitions == map) {
            return;
        }
        this.agentDefinitions = map;
        HashMap hashMap = new HashMap();
        for (String str : this.agentDefinitions.keySet()) {
            hashMap.put(str, new BackupBasedQSource(str));
        }
        this.qSources = new AgentQSourceMap.HashMapAgentQSourceMap(hashMap);
    }

    public abstract void planFromState(State state);

    @Override // burlap.behavior.stochasticgame.mavaluefunction.MultiAgentQSourceProvider
    public AgentQSourceMap getQSources() {
        return this.qSources;
    }

    public double backupAllValueFunctions(State state) {
        StateHashTuple hashState = this.hashingFactory.hashState(state);
        double d = Double.NEGATIVE_INFINITY;
        for (String str : this.agentDefinitions.keySet()) {
            BackupBasedQSource backupBasedQSource = (BackupBasedQSource) this.qSources.agentQSource(str);
            double value = backupBasedQSource.getValue(hashState);
            double performBackup = this.backupOperator.performBackup(state, str, this.agentDefinitions, this.qSources);
            d = Math.max(d, Math.abs(performBackup - value));
            backupBasedQSource.setValue(hashState, performBackup);
        }
        return d;
    }
}
