package burlap.behavior.singleagent.learning.modellearning.artdp;

import burlap.behavior.singleagent.EpisodeAnalysis;
import burlap.behavior.singleagent.Policy;
import burlap.behavior.singleagent.QValue;
import burlap.behavior.singleagent.ValueFunctionInitialization;
import burlap.behavior.singleagent.learning.LearningAgent;
import burlap.behavior.singleagent.learning.modellearning.Model;
import burlap.behavior.singleagent.learning.modellearning.ModeledDomainGenerator;
import burlap.behavior.singleagent.learning.modellearning.models.TabularModel;
import burlap.behavior.singleagent.planning.OOMDPPlanner;
import burlap.behavior.singleagent.planning.PlannerDerivedPolicy;
import burlap.behavior.singleagent.planning.QComputablePlanner;
import burlap.behavior.singleagent.planning.ValueFunctionPlanner;
import burlap.behavior.singleagent.planning.commonpolicies.BoltzmannQPolicy;
import burlap.behavior.statehashing.StateHashFactory;
import burlap.oomdp.core.AbstractGroundedAction;
import burlap.oomdp.core.Domain;
import burlap.oomdp.core.State;
import burlap.oomdp.core.TerminalFunction;
import burlap.oomdp.singleagent.GroundedAction;
import burlap.oomdp.singleagent.RewardFunction;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:burlap/behavior/singleagent/learning/modellearning/artdp/ARTDP.class */
public class ARTDP extends OOMDPPlanner implements QComputablePlanner, LearningAgent {
    protected Model model;
    protected ValueFunctionPlanner modelPlanner;
    protected Policy policy;
    protected LinkedList<EpisodeAnalysis> episodeHistory = new LinkedList<>();
    protected int maxNumSteps = Integer.MAX_VALUE;
    protected int numEpisodesToStore = 1;

    /* loaded from: input_file:burlap/behavior/singleagent/learning/modellearning/artdp/ARTDP$ARTDPPlanner.class */
    protected class ARTDPPlanner extends ValueFunctionPlanner {
        public ARTDPPlanner(ARTDP artdp, Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, double d2) {
            this(domain, rewardFunction, terminalFunction, d, stateHashFactory, new ValueFunctionInitialization.ConstantValueFunctionInitialization(d2));
        }

        public ARTDPPlanner(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, ValueFunctionInitialization valueFunctionInitialization) {
            VFPInit(domain, rewardFunction, terminalFunction, d, stateHashFactory);
            this.useCachedTransitions = false;
            this.valueInitializer = valueFunctionInitialization;
        }

        @Override // burlap.behavior.singleagent.planning.ValueFunctionPlanner, burlap.behavior.singleagent.planning.OOMDPPlanner
        public void planFromState(State state) {
            throw new UnsupportedOperationException("This method should not be called for the inner ARTDP planner");
        }
    }

    public ARTDP(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, double d2) {
        plannerInit(domain, rewardFunction, terminalFunction, d, stateHashFactory);
        this.model = new TabularModel(domain, stateHashFactory, 1);
        this.modelPlanner = new ARTDPPlanner(this, new ModeledDomainGenerator(domain, this.model, true).generateDomain(), this.model.getModelRF(), this.model.getModelTF(), d, stateHashFactory, d2);
        this.policy = new BoltzmannQPolicy(this, 0.1d);
    }

    public ARTDP(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, ValueFunctionInitialization valueFunctionInitialization) {
        plannerInit(domain, rewardFunction, terminalFunction, d, stateHashFactory);
        this.model = new TabularModel(domain, stateHashFactory, 1);
        this.modelPlanner = new ARTDPPlanner(new ModeledDomainGenerator(domain, this.model, true).generateDomain(), this.model.getModelRF(), this.model.getModelTF(), d, stateHashFactory, valueFunctionInitialization);
        this.policy = new BoltzmannQPolicy(this, 0.1d);
    }

    public ARTDP(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, Model model, ValueFunctionInitialization valueFunctionInitialization) {
        plannerInit(domain, rewardFunction, terminalFunction, d, stateHashFactory);
        this.model = model;
        this.modelPlanner = new ARTDPPlanner(new ModeledDomainGenerator(domain, this.model, true).generateDomain(), this.model.getModelRF(), this.model.getModelTF(), d, stateHashFactory, valueFunctionInitialization);
        this.policy = new BoltzmannQPolicy(this, 0.1d);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void setPolicy(PlannerDerivedPolicy plannerDerivedPolicy) {
        this.policy = (Policy) plannerDerivedPolicy;
        plannerDerivedPolicy.setPlanner(this);
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis runLearningEpisodeFrom(State state) {
        return runLearningEpisodeFrom(state, this.maxNumSteps);
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis runLearningEpisodeFrom(State state, int i) {
        EpisodeAnalysis episodeAnalysis = new EpisodeAnalysis(state);
        State state2 = state;
        for (int i2 = 0; !this.tf.isTerminal(state2) && i2 < i; i2++) {
            GroundedAction groundedAction = (GroundedAction) this.policy.getAction(state2);
            State executeIn = groundedAction.executeIn(state2);
            double reward = this.rf.reward(state2, groundedAction, executeIn);
            episodeAnalysis.recordTransitionTo(groundedAction, executeIn, reward);
            this.model.updateModel(state2, groundedAction, executeIn, reward, this.tf.isTerminal(executeIn));
            this.modelPlanner.performBellmanUpdateOn(state2);
            state2 = executeIn;
        }
        return episodeAnalysis;
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis getLastLearningEpisode() {
        return this.episodeHistory.getLast();
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public void setNumEpisodesToStore(int i) {
        if (i > 0) {
            this.numEpisodesToStore = i;
        } else {
            this.numEpisodesToStore = 1;
        }
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public List<EpisodeAnalysis> getAllStoredLearningEpisodes() {
        return this.episodeHistory;
    }

    @Override // burlap.behavior.singleagent.planning.OOMDPPlanner
    public void planFromState(State state) {
        throw new RuntimeException("Model learning algorithms should not be used as planning algorithms.");
    }

    @Override // burlap.behavior.singleagent.planning.QComputablePlanner
    public List<QValue> getQs(State state) {
        List<QValue> qs = this.modelPlanner.getQs(state);
        for (QValue qValue : qs) {
            if (!this.model.transitionIsModeled(state, (GroundedAction) qValue.a)) {
                qValue.q = this.modelPlanner.getValueFunctionInitialization().qValue(state, qValue.a);
            }
            qValue.a = new GroundedAction(this.domain.getAction(qValue.a.actionName()), qValue.a.params);
        }
        return qs;
    }

    @Override // burlap.behavior.singleagent.planning.QComputablePlanner
    public QValue getQ(State state, AbstractGroundedAction abstractGroundedAction) {
        QValue q = this.modelPlanner.getQ(state, abstractGroundedAction);
        if (!this.model.transitionIsModeled(state, (GroundedAction) q.a)) {
            q.q = this.modelPlanner.getValueFunctionInitialization().qValue(state, q.a);
        }
        q.a = new GroundedAction(this.domain.getAction(q.a.actionName()), q.a.params);
        return q;
    }

    @Override // burlap.behavior.singleagent.planning.OOMDPPlanner
    public void resetPlannerResults() {
        this.model.resetModel();
        this.modelPlanner.resetPlannerResults();
        this.episodeHistory.clear();
    }
}
