package burlap.behavior.stochasticgame.mavaluefunction.vfplanners;

import burlap.behavior.singleagent.ValueFunctionInitialization;
import burlap.behavior.statehashing.StateHashFactory;
import burlap.behavior.statehashing.StateHashTuple;
import burlap.behavior.stochasticgame.mavaluefunction.MAValueFunctionPlanner;
import burlap.behavior.stochasticgame.mavaluefunction.SGBackupOperator;
import burlap.debugtools.DPrint;
import burlap.oomdp.core.State;
import burlap.oomdp.core.TerminalFunction;
import burlap.oomdp.core.TransitionProbability;
import burlap.oomdp.stochasticgames.AgentType;
import burlap.oomdp.stochasticgames.JointAction;
import burlap.oomdp.stochasticgames.JointActionModel;
import burlap.oomdp.stochasticgames.JointReward;
import burlap.oomdp.stochasticgames.SGDomain;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:burlap/behavior/stochasticgame/mavaluefunction/vfplanners/MAValueIteration.class */
public class MAValueIteration extends MAValueFunctionPlanner {
    protected double maxDelta;
    protected int maxIterations;
    protected Set<StateHashTuple> states = new HashSet();
    protected int debugCode = 88934789;

    public MAValueIteration(SGDomain sGDomain, JointActionModel jointActionModel, JointReward jointReward, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, double d2, SGBackupOperator sGBackupOperator, double d3, int i) {
        initMAVF(sGDomain, null, jointActionModel, jointReward, terminalFunction, d, stateHashFactory, new ValueFunctionInitialization.ConstantValueFunctionInitialization(d2), sGBackupOperator);
        this.maxDelta = d3;
        this.maxIterations = i;
    }

    public MAValueIteration(SGDomain sGDomain, JointActionModel jointActionModel, JointReward jointReward, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, ValueFunctionInitialization valueFunctionInitialization, SGBackupOperator sGBackupOperator, double d2, int i) {
        initMAVF(sGDomain, null, jointActionModel, jointReward, terminalFunction, d, stateHashFactory, valueFunctionInitialization, sGBackupOperator);
        this.maxDelta = d2;
        this.maxIterations = i;
    }

    public MAValueIteration(SGDomain sGDomain, Map<String, AgentType> map, JointActionModel jointActionModel, JointReward jointReward, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, double d2, SGBackupOperator sGBackupOperator, double d3, int i) {
        initMAVF(sGDomain, map, jointActionModel, jointReward, terminalFunction, d, stateHashFactory, new ValueFunctionInitialization.ConstantValueFunctionInitialization(d2), sGBackupOperator);
        this.maxDelta = d3;
        this.maxIterations = i;
    }

    public MAValueIteration(SGDomain sGDomain, Map<String, AgentType> map, JointActionModel jointActionModel, JointReward jointReward, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, ValueFunctionInitialization valueFunctionInitialization, SGBackupOperator sGBackupOperator, double d2, int i) {
        initMAVF(sGDomain, map, jointActionModel, jointReward, terminalFunction, d, stateHashFactory, valueFunctionInitialization, sGBackupOperator);
        this.maxDelta = d2;
        this.maxIterations = i;
    }

    @Override // burlap.behavior.stochasticgame.mavaluefunction.MAValueFunctionPlanner
    public void planFromState(State state) {
        if (performStateReachabilityFrom(state)) {
            runVI();
        }
    }

    public void runVI() {
        if (this.states.size() == 0) {
            throw new RuntimeException("No states to iterate over. Note that state reacability needs to be performed before runVI() can be called. Consider using planFromState(State s) method instead or using the performStateReachabilityFrom(State s) method first.");
        }
        int i = 0;
        while (i < this.maxIterations) {
            double d = Double.NEGATIVE_INFINITY;
            Iterator<StateHashTuple> it = this.states.iterator();
            while (it.hasNext()) {
                d = Math.max(backupAllValueFunctions(it.next().s), d);
            }
            DPrint.cl(this.debugCode, "Finished pass: " + i + " with max change: " + d);
            if (d < this.maxDelta) {
                break;
            } else {
                i++;
            }
        }
        DPrint.cl(this.debugCode, "Performed " + i + " passes.");
    }

    public boolean performStateReachabilityFrom(State state) {
        StateHashTuple hashState = this.hashingFactory.hashState(state);
        if (this.states.contains(hashState)) {
            return false;
        }
        this.states.add(hashState);
        LinkedList linkedList = new LinkedList();
        linkedList.add(hashState);
        while (linkedList.size() > 0) {
            StateHashTuple stateHashTuple = (StateHashTuple) linkedList.poll();
            Iterator<JointAction> it = JointAction.getAllJointActions(stateHashTuple.s, this.agentDefinitions).iterator();
            while (it.hasNext()) {
                Iterator<TransitionProbability> it2 = this.jointActionModel.transitionProbsFor(stateHashTuple.s, it.next()).iterator();
                while (it2.hasNext()) {
                    StateHashTuple hashState2 = this.hashingFactory.hashState(it2.next().s);
                    if (!this.states.contains(hashState2)) {
                        this.states.add(hashState2);
                        linkedList.add(hashState2);
                    }
                }
            }
        }
        DPrint.cl(this.debugCode, "Finished State reachability; " + this.states.size() + " unique states found.");
        return true;
    }
}
