package aima.learning.reinforcement;

import aima.probability.decision.MDP;
import aima.probability.decision.MDPPerception;
import aima.probability.decision.MDPPolicy;
import aima.probability.decision.MDPUtilityFunction;
import aima.util.FrequencyCounter;

/* loaded from: input_file:aima/learning/reinforcement/PassiveTDAgent.class */
public class PassiveTDAgent<STATE_TYPE, ACTION_TYPE> extends MDPAgent<STATE_TYPE, ACTION_TYPE> {
    private MDPPolicy<STATE_TYPE, ACTION_TYPE> policy;
    private MDPUtilityFunction<STATE_TYPE> utilityFunction;
    private FrequencyCounter<STATE_TYPE> stateCount;
    private Double previousReward;

    public PassiveTDAgent(MDP<STATE_TYPE, ACTION_TYPE> mdp, MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy) {
        super(mdp.emptyMdp());
        this.policy = mDPPolicy;
        this.utilityFunction = new MDPUtilityFunction<>();
        this.stateCount = new FrequencyCounter<>();
    }

    @Override // aima.learning.reinforcement.MDPAgent
    public ACTION_TYPE decideAction(MDPPerception<STATE_TYPE> mDPPerception) {
        if (!this.utilityFunction.hasUtilityFor(mDPPerception.getState())) {
            this.utilityFunction.setUtility(mDPPerception.getState(), mDPPerception.getReward());
            this.mdp.setReward(mDPPerception.getState(), mDPPerception.getReward());
        }
        if (this.previousState != null) {
            this.stateCount.incrementFor(this.previousState);
            this.utilityFunction = updateUtilityFunction(1.0d);
        }
        if (this.mdp.isTerminalState(this.currentState)) {
            this.previousState = null;
            this.previousAction = null;
            this.previousReward = null;
        } else {
            this.previousState = this.currentState;
            this.previousAction = this.policy.getAction(this.currentState);
            this.previousReward = this.currentReward;
        }
        return this.previousAction;
    }

    private MDPUtilityFunction<STATE_TYPE> updateUtilityFunction(double d) {
        MDPUtilityFunction<STATE_TYPE> copy = this.utilityFunction.copy();
        copy.setUtility(this.previousState, this.utilityFunction.getUtility(this.previousState).doubleValue() + (this.stateCount.probabilityOf(this.previousState).doubleValue() * (this.previousReward.doubleValue() + ((d * this.utilityFunction.getUtility(this.currentState).doubleValue()) - this.utilityFunction.getUtility(this.previousState).doubleValue()))));
        return copy;
    }

    public MDPUtilityFunction<STATE_TYPE> getUtilityFunction() {
        return this.utilityFunction;
    }
}
