package aima.learning.reinforcement;

import aima.probability.decision.MDP;
import aima.probability.decision.MDPPerception;
import aima.probability.decision.MDPPolicy;
import aima.probability.decision.MDPTransition;
import aima.probability.decision.MDPUtilityFunction;
import aima.util.Pair;
import java.util.Hashtable;
import java.util.List;

/* loaded from: input_file:aima/learning/reinforcement/PassiveADPAgent.class */
public class PassiveADPAgent<STATE_TYPE, ACTION_TYPE> extends MDPAgent<STATE_TYPE, ACTION_TYPE> {
    private MDPPolicy<STATE_TYPE, ACTION_TYPE> policy;
    private MDPUtilityFunction<STATE_TYPE> utilityFunction;
    private Hashtable<Pair<STATE_TYPE, ACTION_TYPE>, Double> nsa;
    private Hashtable<MDPTransition<STATE_TYPE, ACTION_TYPE>, Double> nsasdash;

    public PassiveADPAgent(MDP<STATE_TYPE, ACTION_TYPE> mdp, MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy) {
        super(mdp.emptyMdp());
        this.policy = mDPPolicy;
        this.utilityFunction = new MDPUtilityFunction<>();
        this.nsa = new Hashtable<>();
        this.nsasdash = new Hashtable<>();
    }

    @Override // aima.learning.reinforcement.MDPAgent
    public ACTION_TYPE decideAction(MDPPerception<STATE_TYPE> mDPPerception) {
        if (!this.utilityFunction.hasUtilityFor(mDPPerception.getState())) {
            this.utilityFunction.setUtility(mDPPerception.getState(), mDPPerception.getReward());
            this.mdp.setReward(mDPPerception.getState(), mDPPerception.getReward());
        }
        if (this.previousState != null) {
            Double d = this.nsa.get(new Pair(this.previousState, this.previousAction));
            if (d == null) {
                this.nsa.put(new Pair<>(this.previousState, this.previousAction), Double.valueOf(1.0d));
            } else {
                this.nsa.put(new Pair<>(this.previousState, this.previousAction), Double.valueOf(d.doubleValue() + 1.0d));
            }
            Double d2 = this.nsasdash.get(new MDPTransition(this.previousState, this.previousAction, this.currentState));
            if (d2 == null) {
                this.nsasdash.put(new MDPTransition<>(this.previousState, this.previousAction, this.currentState), Double.valueOf(1.0d));
            } else {
                this.nsasdash.put(new MDPTransition<>(this.previousState, this.previousAction, this.currentState), Double.valueOf(d2.doubleValue() + 1.0d));
            }
            for (MDPTransition<STATE_TYPE, ACTION_TYPE> mDPTransition : this.nsasdash.keySet()) {
                if (this.nsasdash.get(mDPTransition).doubleValue() != 0.0d) {
                    this.mdp.setTransitionProbability(mDPTransition, this.nsasdash.get(mDPTransition).doubleValue() / this.nsa.get(new Pair(mDPTransition.getInitialState(), mDPTransition.getAction())).doubleValue());
                }
            }
            this.utilityFunction = valueDetermination(this.mdp.getTransitionsWith(this.previousState, this.policy.getAction(this.previousState)), 1.0d);
        }
        if (this.mdp.isTerminalState(this.currentState)) {
            this.previousState = null;
            this.previousAction = null;
        } else {
            this.previousState = this.currentState;
            this.previousAction = this.policy.getAction(this.currentState);
        }
        return this.previousAction;
    }

    private MDPUtilityFunction<STATE_TYPE> valueDetermination(List<MDPTransition<STATE_TYPE, ACTION_TYPE>> list, double d) {
        MDPUtilityFunction<STATE_TYPE> copy = this.utilityFunction.copy();
        double d2 = 0.0d;
        if (list.size() > 0) {
            STATE_TYPE initialState = list.get(0).getInitialState();
            double rewardFor = this.mdp.getRewardFor(initialState);
            for (MDPTransition<STATE_TYPE, ACTION_TYPE> mDPTransition : list) {
                d2 += this.mdp.getTransitionProbability(mDPTransition) * this.utilityFunction.getUtility(mDPTransition.getDestinationState()).doubleValue();
            }
            copy.setUtility(initialState, rewardFor + (d * d2));
        }
        return copy;
    }

    public MDPUtilityFunction<STATE_TYPE> getUtilityFunction() {
        return this.utilityFunction;
    }
}
