package burlap.behavior.singleagent.planning.stochastic.valueiteration;

import burlap.behavior.singleagent.planning.ActionTransitions;
import burlap.behavior.singleagent.planning.HashedTransitionProbability;
import burlap.behavior.singleagent.planning.ValueFunctionPlanner;
import burlap.behavior.statehashing.StateHashFactory;
import burlap.behavior.statehashing.StateHashTuple;
import burlap.debugtools.DPrint;
import burlap.oomdp.core.Domain;
import burlap.oomdp.core.State;
import burlap.oomdp.core.TerminalFunction;
import burlap.oomdp.singleagent.RewardFunction;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;

/* loaded from: input_file:burlap/behavior/singleagent/planning/stochastic/valueiteration/ValueIteration.class */
public class ValueIteration extends ValueFunctionPlanner {
    protected double maxDelta;
    protected int maxIterations;
    protected boolean foundReachableStates = false;
    protected boolean stopReachabilityFromTerminalStates = false;
    protected boolean hasRunVI = false;

    public ValueIteration(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, double d2, int i) {
        VFPInit(domain, rewardFunction, terminalFunction, d, stateHashFactory);
        this.maxDelta = d2;
        this.maxIterations = i;
    }

    public void recomputeReachableStates() {
        this.foundReachableStates = false;
        this.transitionDynamics = new HashMap();
    }

    public void toggleReachabiltiyTerminalStatePruning(boolean z) {
        this.stopReachabilityFromTerminalStates = z;
    }

    @Override // burlap.behavior.singleagent.planning.ValueFunctionPlanner, burlap.behavior.singleagent.planning.OOMDPPlanner
    public void planFromState(State state) {
        initializeOptionsForExpectationComputations();
        if (performReachabilityFrom(state) || !this.hasRunVI) {
            runVI();
        }
    }

    @Override // burlap.behavior.singleagent.planning.ValueFunctionPlanner, burlap.behavior.singleagent.planning.OOMDPPlanner
    public void resetPlannerResults() {
        super.resetPlannerResults();
        this.foundReachableStates = false;
        this.hasRunVI = false;
    }

    public void runVI() {
        if (!this.foundReachableStates) {
            throw new RuntimeException("Cannot run VI until the reachable states have been found. Use the planFromState or performReachabilityFrom method at least once before calling runVI.");
        }
        Set<StateHashTuple> keySet = this.mapToStateIndex.keySet();
        int i = 0;
        while (i < this.maxIterations) {
            double d = 0.0d;
            for (StateHashTuple stateHashTuple : keySet) {
                d = Math.max(Math.abs(performBellmanUpdateOn(stateHashTuple) - value(stateHashTuple)), d);
            }
            if (d < this.maxDelta) {
                break;
            } else {
                i++;
            }
        }
        DPrint.cl(this.debugCode, "Passes: " + i);
        this.hasRunVI = true;
    }

    public boolean performReachabilityFrom(State state) {
        StateHashTuple stateHash = stateHash(state);
        if (this.mapToStateIndex.containsKey(stateHash) && this.foundReachableStates) {
            return false;
        }
        DPrint.cl(this.debugCode, "Starting reachability analysis");
        LinkedList linkedList = new LinkedList();
        HashSet hashSet = new HashSet();
        linkedList.offer(stateHash);
        hashSet.add(stateHash);
        while (linkedList.size() > 0) {
            StateHashTuple stateHashTuple = (StateHashTuple) linkedList.poll();
            if (!this.mapToStateIndex.containsKey(stateHashTuple)) {
                this.mapToStateIndex.put(stateHashTuple, stateHashTuple);
                if (!this.tf.isTerminal(stateHashTuple.s) || !this.stopReachabilityFromTerminalStates) {
                    Iterator<ActionTransitions> it = getActionsTransitions(stateHashTuple).iterator();
                    while (it.hasNext()) {
                        Iterator<HashedTransitionProbability> it2 = it.next().transitions.iterator();
                        while (it2.hasNext()) {
                            StateHashTuple stateHashTuple2 = it2.next().sh;
                            if (!hashSet.contains(stateHashTuple2) && !this.transitionDynamics.containsKey(stateHashTuple2)) {
                                hashSet.add(stateHashTuple2);
                                linkedList.offer(stateHashTuple2);
                            }
                        }
                    }
                }
            }
        }
        DPrint.cl(this.debugCode, "Finished reachability analysis; # states: " + this.mapToStateIndex.size());
        this.foundReachableStates = true;
        this.hasRunVI = false;
        return true;
    }
}
