package burlap.behavior.stochasticgame.mavaluefunction.policies;

import burlap.behavior.singleagent.Policy;
import burlap.behavior.stochasticgame.JointPolicy;
import burlap.behavior.stochasticgame.agents.maql.MultiAgentQLearning;
import burlap.behavior.stochasticgame.mavaluefunction.AgentQSourceMap;
import burlap.behavior.stochasticgame.mavaluefunction.MAQSourcePolicy;
import burlap.behavior.stochasticgame.mavaluefunction.MultiAgentQSourceProvider;
import burlap.datastructures.HashedAggregator;
import burlap.debugtools.RandomFactory;
import burlap.oomdp.core.AbstractGroundedAction;
import burlap.oomdp.core.State;
import burlap.oomdp.stochasticgames.JointAction;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;

/* loaded from: input_file:burlap/behavior/stochasticgame/mavaluefunction/policies/EGreedyMaxWellfare.class */
public class EGreedyMaxWellfare extends MAQSourcePolicy {
    protected MultiAgentQSourceProvider qSourceProvider;
    protected double epsilon;
    protected Random rand;
    protected boolean breakTiesRandomly;

    public EGreedyMaxWellfare(double d) {
        this.rand = RandomFactory.getMapped(0);
        this.breakTiesRandomly = true;
        this.epsilon = d;
    }

    public EGreedyMaxWellfare(double d, boolean z) {
        this.rand = RandomFactory.getMapped(0);
        this.breakTiesRandomly = true;
        this.epsilon = d;
        this.breakTiesRandomly = z;
    }

    public EGreedyMaxWellfare(MultiAgentQLearning multiAgentQLearning, double d) {
        this.rand = RandomFactory.getMapped(0);
        this.breakTiesRandomly = true;
        this.qSourceProvider = multiAgentQLearning;
        this.epsilon = d;
    }

    public EGreedyMaxWellfare(MultiAgentQLearning multiAgentQLearning, double d, boolean z) {
        this.rand = RandomFactory.getMapped(0);
        this.breakTiesRandomly = true;
        this.qSourceProvider = multiAgentQLearning;
        this.epsilon = d;
        this.breakTiesRandomly = z;
    }

    public void setBreakTiesRandomly(boolean z) {
        this.breakTiesRandomly = z;
    }

    @Override // burlap.behavior.stochasticgame.mavaluefunction.MAQSourcePolicy
    public void setQSourceProvider(MultiAgentQSourceProvider multiAgentQSourceProvider) {
        this.qSourceProvider = multiAgentQSourceProvider;
    }

    @Override // burlap.behavior.singleagent.Policy
    public AbstractGroundedAction getAction(State state) {
        JointAction jointAction;
        List<JointAction> allJointActions = getAllJointActions(state);
        AgentQSourceMap qSources = this.qSourceProvider.getQSources();
        if (this.rand.nextDouble() < this.epsilon) {
            jointAction = allJointActions.get(this.rand.nextInt(allJointActions.size()));
        } else {
            ArrayList arrayList = new ArrayList(allJointActions.size());
            double d = Double.NEGATIVE_INFINITY;
            for (JointAction jointAction2 : allJointActions) {
                double d2 = 0.0d;
                Iterator<String> it = jointAction2.getAgentNames().iterator();
                while (it.hasNext()) {
                    d2 += qSources.agentQSource(it.next()).getQValueFor(state, jointAction2).q;
                }
                if (d2 == d && this.breakTiesRandomly) {
                    arrayList.add(jointAction2);
                } else if (d2 > d) {
                    arrayList.clear();
                    arrayList.add(jointAction2);
                    d = d2;
                }
            }
            jointAction = arrayList.size() == 1 ? (JointAction) arrayList.get(0) : (JointAction) arrayList.get(this.rand.nextInt(arrayList.size()));
        }
        return jointAction;
    }

    @Override // burlap.behavior.singleagent.Policy
    public List<Policy.ActionProb> getActionDistributionForState(State state) {
        List<JointAction> allJointActions = getAllJointActions(state);
        AgentQSourceMap qSources = this.qSourceProvider.getQSources();
        HashedAggregator hashedAggregator = new HashedAggregator();
        double size = this.epsilon / allJointActions.size();
        Iterator<JointAction> it = allJointActions.iterator();
        while (it.hasNext()) {
            hashedAggregator.add(it.next(), size);
        }
        ArrayList arrayList = new ArrayList(allJointActions.size());
        double d = Double.NEGATIVE_INFINITY;
        for (JointAction jointAction : allJointActions) {
            double d2 = 0.0d;
            Iterator<String> it2 = jointAction.getAgentNames().iterator();
            while (it2.hasNext()) {
                d2 += qSources.agentQSource(it2.next()).getQValueFor(state, jointAction).q;
            }
            if (d2 == d && this.breakTiesRandomly) {
                arrayList.add(jointAction);
            } else if (d2 > d) {
                arrayList.clear();
                arrayList.add(jointAction);
                d = d2;
            }
        }
        double size2 = (1.0d - this.epsilon) / arrayList.size();
        Iterator it3 = arrayList.iterator();
        while (it3.hasNext()) {
            hashedAggregator.add((JointAction) it3.next(), size2);
        }
        ArrayList arrayList2 = new ArrayList(allJointActions.size());
        for (JointAction jointAction2 : allJointActions) {
            double v = hashedAggregator.v(jointAction2);
            if (v > 0.0d) {
                arrayList2.add(new Policy.ActionProb(jointAction2, v));
            }
        }
        return arrayList2;
    }

    @Override // burlap.behavior.singleagent.Policy
    public boolean isStochastic() {
        return this.epsilon > 0.0d || this.breakTiesRandomly;
    }

    @Override // burlap.behavior.singleagent.Policy
    public boolean isDefinedFor(State state) {
        return true;
    }

    @Override // burlap.behavior.stochasticgame.JointPolicy
    public void setTargetAgent(String str) {
    }

    @Override // burlap.behavior.stochasticgame.JointPolicy
    public JointPolicy copy() {
        EGreedyMaxWellfare eGreedyMaxWellfare = new EGreedyMaxWellfare(this.epsilon, this.breakTiesRandomly);
        eGreedyMaxWellfare.setAgentsInJointPolicy(this.agentsInJointPolicy);
        eGreedyMaxWellfare.setQSourceProvider(this.qSourceProvider);
        return eGreedyMaxWellfare;
    }
}
