package burlap.behavior.singleagent.learning.actorcritic;

import burlap.behavior.singleagent.EpisodeAnalysis;
import burlap.behavior.singleagent.Policy;
import burlap.behavior.singleagent.learning.LearningAgent;
import burlap.behavior.singleagent.planning.OOMDPPlanner;
import burlap.oomdp.core.Domain;
import burlap.oomdp.core.State;
import burlap.oomdp.core.TerminalFunction;
import burlap.oomdp.singleagent.Action;
import burlap.oomdp.singleagent.GroundedAction;
import burlap.oomdp.singleagent.RewardFunction;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:burlap/behavior/singleagent/learning/actorcritic/ActorCritic.class */
public class ActorCritic extends OOMDPPlanner implements LearningAgent {
    protected Actor actor;
    protected Critic critic;
    protected int maxEpisodeSize;
    protected int numEpisodesForPlanning;
    protected LinkedList<EpisodeAnalysis> episodeHistory;
    protected int numEpisodesToStore;

    public ActorCritic(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, Actor actor, Critic critic) {
        this.maxEpisodeSize = Integer.MAX_VALUE;
        this.actor = actor;
        this.critic = critic;
        this.numEpisodesForPlanning = 1;
        this.episodeHistory = new LinkedList<>();
        this.numEpisodesToStore = 1;
        plannerInit(domain, rewardFunction, terminalFunction, d, null);
    }

    public ActorCritic(Domain domain, RewardFunction rewardFunction, TerminalFunction terminalFunction, double d, Actor actor, Critic critic, int i) {
        this.maxEpisodeSize = Integer.MAX_VALUE;
        this.actor = actor;
        this.critic = critic;
        this.maxEpisodeSize = i;
        this.numEpisodesForPlanning = 1;
        this.episodeHistory = new LinkedList<>();
        this.numEpisodesToStore = 1;
        plannerInit(domain, rewardFunction, terminalFunction, d, null);
    }

    @Override // burlap.behavior.singleagent.planning.OOMDPPlanner
    public void addNonDomainReferencedAction(Action action) {
        super.addNonDomainReferencedAction(action);
        this.actor.addNonDomainReferencedAction(action);
        this.critic.addNonDomainReferencedAction(action);
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis runLearningEpisodeFrom(State state) {
        return runLearningEpisodeFrom(state, this.maxEpisodeSize);
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis runLearningEpisodeFrom(State state, int i) {
        EpisodeAnalysis episodeAnalysis = new EpisodeAnalysis(state);
        State state2 = state;
        this.critic.initializeEpisode(state2);
        for (int i2 = 0; !this.tf.isTerminal(state2) && i2 < i; i2++) {
            GroundedAction groundedAction = (GroundedAction) this.actor.getAction(state2);
            State executeIn = groundedAction.executeIn(state2);
            episodeAnalysis.recordTransitionTo(groundedAction, executeIn, this.rf.reward(state2, groundedAction, executeIn));
            this.actor.updateFromCritqique(this.critic.critiqueAndUpdate(state2, groundedAction, executeIn));
            state2 = executeIn;
        }
        this.critic.endEpisode();
        if (this.episodeHistory.size() >= this.numEpisodesToStore) {
            this.episodeHistory.poll();
        }
        this.episodeHistory.offer(episodeAnalysis);
        return episodeAnalysis;
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public EpisodeAnalysis getLastLearningEpisode() {
        return this.episodeHistory.getLast();
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public void setNumEpisodesToStore(int i) {
        this.numEpisodesToStore = i;
    }

    @Override // burlap.behavior.singleagent.learning.LearningAgent
    public List<EpisodeAnalysis> getAllStoredLearningEpisodes() {
        return this.episodeHistory;
    }

    @Override // burlap.behavior.singleagent.planning.OOMDPPlanner
    public void planFromState(State state) {
        for (int i = 0; i < this.numEpisodesForPlanning; i++) {
            runLearningEpisodeFrom(state);
        }
    }

    @Override // burlap.behavior.singleagent.planning.OOMDPPlanner
    public void resetPlannerResults() {
        this.episodeHistory.clear();
        this.mapToStateIndex.clear();
        this.actor.resetData();
        this.critic.resetData();
    }

    public Policy getPolicy() {
        return this.actor;
    }
}
