package burlap.behavior.singleagent.learning.modellearning.rmax;

import burlap.behavior.singleagent.EpisodeAnalysis;
import burlap.behavior.singleagent.Policy;
import burlap.behavior.singleagent.learning.LearningAgent;
import burlap.behavior.singleagent.learning.modellearning.DomainMappedPolicy;
import burlap.behavior.singleagent.learning.modellearning.Model;
import burlap.behavior.singleagent.learning.modellearning.ModelPlanner;
import burlap.behavior.singleagent.learning.modellearning.ModeledDomainGenerator;
import burlap.behavior.singleagent.learning.modellearning.modelplanners.VIModelPlanner;
import burlap.behavior.singleagent.learning.modellearning.models.TabularModel;
import burlap.behavior.singleagent.planning.OOMDPPlanner;
import burlap.behavior.singleagent.shaping.potential.PotentialFunction;
import burlap.behavior.statehashing.StateHashFactory;
import burlap.oomdp.core.Domain;
import burlap.oomdp.core.State;
import burlap.oomdp.core.TerminalFunction;
import burlap.oomdp.singleagent.GroundedAction;
import burlap.oomdp.singleagent.RewardFunction;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:burlap/behavior/singleagent/learning/modellearning/rmax/PotentialShapedRMax.class */
public class PotentialShapedRMax extends OOMDPPlanner implements LearningAgent {
    protected Model model;
    protected Domain modeledDomain;
    protected RewardFunction modeledRewardFunction;
    protected TerminalFunction modeledTerminalFunction;
    protected ModelPlanner modelPlanner;
    protected int maxNumSteps = Integer.MAX_VALUE;
    protected LinkedList<EpisodeAnalysis> episodeHistory = new LinkedList<>();
    protected int numEpisodesToStore = 1;

    /* loaded from: input_file:burlap/behavior/singleagent/learning/modellearning/rmax/PotentialShapedRMax$PotentialShapedRMaxRF.class */
    protected class PotentialShapedRMaxRF implements RewardFunction {
        protected RewardFunction sourceRF;
        protected PotentialFunction potential;

        public PotentialShapedRMaxRF(RewardFunction rewardFunction, PotentialFunction potentialFunction) {
            this.sourceRF = rewardFunction;
            this.potential = potentialFunction;
        }

        @Override // burlap.oomdp.singleagent.RewardFunction
        public double reward(State state, GroundedAction groundedAction, State state2) {
            if (ModeledDomainGenerator.isRmaxFictitiousState(state2)) {
                return 0.0d;
            }
            double d = 0.0d;
            if (!PotentialShapedRMax.this.model.getModelTF().isTerminal(state2)) {
                d = this.potential.potentialValue(state2);
            }
            return (this.sourceRF.reward(state, groundedAction, state2) + (PotentialShapedRMax.this.gamma * d)) - this.potential.potentialValue(state);
        }
    }

    /* loaded from: input_file:burlap/behavior/singleagent/learning/modellearning/rmax/PotentialShapedRMax$PotentialShapedRMaxTerminal.class */
    public class PotentialShapedRMaxTerminal implements TerminalFunction {
        TerminalFunction sourceModelTF;

        public PotentialShapedRMaxTerminal(TerminalFunction terminalFunction) {
            this.sourceModelTF = terminalFunction;
        }

        @Override // burlap.oomdp.core.TerminalFunction
        public boolean isTerminal(State state) {
            if (state.getObjectsOfClass(ModeledDomainGenerator.RMAXFICTIOUSSTATENAME).size() <= 0 && PotentialShapedRMax.this.model.stateTransitionsAreModeled(state)) {
                return this.sourceModelTF.isTerminal(state);
            }
            return true;
        }
    }

    /* loaded from: input_file:burlap/behavior/singleagent/learning/modellearning/rmax/PotentialShapedRMax$RMaxPotential.class */
    public static class RMaxPotential implements PotentialFunction {
        double vmax;

        public RMaxPotential(double d, double d2) {
            this.vmax = d / (1.0d - d2);
        }

        public RMaxPotential(double d) {
            this.vmax = d;
        }

        @Override // burlap.behavior.singleagent.shaping.potential.PotentialFunction
        public double potentialValue(State state) {
            if (state.getObjectsOfClass(ModeledDomainGenerator.RMAXFICTIOUSSTATENAME).size() > 0) {
                return 0.0d;
            }
            return this.vmax;
        }
    }

    public PotentialShapedRMax(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, double d2, int i, double d3, int i2) {
        plannerInit(domain, rewardFunction, terminalFunction, d, stateHashFactory);
        this.model = new TabularModel(domain, stateHashFactory, i);
        this.modeledDomain = new ModeledDomainGenerator(domain, this.model, true).generateDomain();
        this.modeledTerminalFunction = new PotentialShapedRMaxTerminal(this.model.getModelTF());
        this.modeledRewardFunction = new PotentialShapedRMaxRF(this.model.getModelRF(), new RMaxPotential(d2, d));
        this.modelPlanner = new VIModelPlanner(this.modeledDomain, this.modeledRewardFunction, this.modeledTerminalFunction, d, stateHashFactory, d3, i2);
    }

    public PotentialShapedRMax(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, PotentialFunction potentialFunction, int i, double d2, int i2) {
        plannerInit(domain, rewardFunction, terminalFunction, d, stateHashFactory);
        this.model = new TabularModel(domain, stateHashFactory, i);
        this.modeledDomain = new ModeledDomainGenerator(domain, this.model, true).generateDomain();
        this.modeledTerminalFunction = new PotentialShapedRMaxTerminal(this.model.getModelTF());
        this.modeledRewardFunction = new PotentialShapedRMaxRF(this.model.getModelRF(), potentialFunction);
        this.modelPlanner = new VIModelPlanner(this.modeledDomain, this.modeledRewardFunction, this.modeledTerminalFunction, d, stateHashFactory, d2, i2);
    }

    public PotentialShapedRMax(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, StateHashFactory stateHashFactory, PotentialFunction potentialFunction, Model model, ModelPlanner.ModelPlannerGenerator modelPlannerGenerator) {
        plannerInit(domain, rewardFunction, terminalFunction, d, stateHashFactory);
        this.model = model;
        this.modeledDomain = new ModeledDomainGenerator(domain, this.model, true).generateDomain();
        this.modeledTerminalFunction = new PotentialShapedRMaxTerminal(this.model.getModelTF());
        this.modeledRewardFunction = new PotentialShapedRMaxRF(this.model.getModelRF(), potentialFunction);
        this.modelPlanner = modelPlannerGenerator.getModelPlanner(this.modeledDomain, this.modeledRewardFunction, this.modeledTerminalFunction, d);
    }

    public Model getModel() {
        return this.model;
    }

    public Domain getModeledDomain() {
        return this.modeledDomain;
    }

    public ModelPlanner getModelPlanner() {
        return this.modelPlanner;
    }

    public RewardFunction getModeledRewardFunction() {
        return this.modeledRewardFunction;
    }

    public TerminalFunction getModeledTerminalFunction() {
        return this.modeledTerminalFunction;
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis runLearningEpisodeFrom(State state) {
        return runLearningEpisodeFrom(state, this.maxNumSteps);
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis runLearningEpisodeFrom(State state, int i) {
        this.modelPlanner.initializePlannerIn(state);
        EpisodeAnalysis episodeAnalysis = new EpisodeAnalysis(state);
        Policy createDomainMappedPolicy = createDomainMappedPolicy();
        State state2 = state;
        for (int i2 = 0; !this.tf.isTerminal(state2) && i2 < i; i2++) {
            GroundedAction groundedAction = (GroundedAction) createDomainMappedPolicy.getAction(state2);
            State executeIn = groundedAction.executeIn(state2);
            double reward = this.rf.reward(state2, groundedAction, executeIn);
            boolean isTerminal = this.tf.isTerminal(executeIn);
            episodeAnalysis.recordTransitionTo(groundedAction, executeIn, reward);
            boolean isTerminal2 = this.model.getModelTF().isTerminal(executeIn);
            if (!this.model.transitionIsModeled(state2, groundedAction) || !this.model.stateTransitionsAreModeled(executeIn)) {
                this.model.updateModel(state2, groundedAction, executeIn, reward, isTerminal);
                if (this.model.transitionIsModeled(state2, groundedAction) || (isTerminal != isTerminal2 && isTerminal2 != this.model.getModelTF().isTerminal(executeIn))) {
                    this.modelPlanner.modelChanged(state2);
                    createDomainMappedPolicy = createDomainMappedPolicy();
                }
            }
            state2 = executeIn;
        }
        if (this.episodeHistory.size() >= this.numEpisodesToStore) {
            this.episodeHistory.poll();
        }
        this.episodeHistory.offer(episodeAnalysis);
        return episodeAnalysis;
    }

    protected Policy createDomainMappedPolicy() {
        return new DomainMappedPolicy(this.domain, new UnmodeledFavoredPolicy(this.modelPlanner.modelPlannedPolicy(), this.model, this.modeledDomain.getActions()));
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis getLastLearningEpisode() {
        return this.episodeHistory.getLast();
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public void setNumEpisodesToStore(int i) {
        if (i > 0) {
            this.numEpisodesToStore = i;
        } else {
            this.numEpisodesToStore = 1;
        }
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public List<EpisodeAnalysis> getAllStoredLearningEpisodes() {
        return this.episodeHistory;
    }

    @Override // burlap.behavior.singleagent.planning.OOMDPPlanner
    public void planFromState(State state) {
        throw new RuntimeException("Model learning algorithms should not be used as planning algorithms.");
    }

    @Override // burlap.behavior.singleagent.planning.OOMDPPlanner
    public void resetPlannerResults() {
        this.model.resetModel();
        this.modelPlanner.resetPlanner();
        this.episodeHistory.clear();
    }
}
