package aima.probability.decision;

import aima.probability.Randomizer;
import aima.util.Pair;
import java.util.List;

/* loaded from: input_file:aima/probability/decision/MDP.class */
public class MDP<STATE_TYPE, ACTION_TYPE> {
    private STATE_TYPE initialState;
    private MDPTransitionModel<STATE_TYPE, ACTION_TYPE> transitionModel;
    private MDPRewardFunction<STATE_TYPE> rewardFunction;
    private List<STATE_TYPE> nonFinalstates;
    private List<STATE_TYPE> terminalStates;
    private MDPSource<STATE_TYPE, ACTION_TYPE> source;

    public MDP(MDPSource<STATE_TYPE, ACTION_TYPE> mDPSource) {
        this.initialState = mDPSource.getInitialState();
        this.transitionModel = mDPSource.getTransitionModel();
        this.rewardFunction = mDPSource.getRewardFunction();
        this.nonFinalstates = mDPSource.getNonFinalStates();
        this.terminalStates = mDPSource.getFinalStates();
        this.source = mDPSource;
    }

    public MDP<STATE_TYPE, ACTION_TYPE> emptyMdp() {
        MDP<STATE_TYPE, ACTION_TYPE> mdp = new MDP<>(this.source);
        mdp.rewardFunction = new MDPRewardFunction<>();
        mdp.rewardFunction.setReward(this.initialState, Double.valueOf(this.rewardFunction.getRewardFor(this.initialState)));
        mdp.transitionModel = new MDPTransitionModel<>(this.terminalStates);
        return mdp;
    }

    public MDPUtilityFunction<STATE_TYPE> valueIteration(double d, double d2, double d3) {
        MDPUtilityFunction<STATE_TYPE> copy;
        double d4;
        initialUtilityFunction();
        MDPUtilityFunction<STATE_TYPE> initialUtilityFunction = initialUtilityFunction();
        double d5 = (d2 * d) / (1.0d - d);
        do {
            copy = initialUtilityFunction.copy();
            d4 = 0.0d;
            for (STATE_TYPE state_type : this.nonFinalstates) {
                initialUtilityFunction.setUtility(state_type, this.rewardFunction.getRewardFor(state_type) + (d * this.transitionModel.getTransitionWithMaximumExpectedUtility(state_type, copy).getSecond().doubleValue()));
                if (Math.abs(initialUtilityFunction.getUtility(state_type).doubleValue() - copy.getUtility(state_type).doubleValue()) > d4) {
                    d4 = Math.abs(initialUtilityFunction.getUtility(state_type).doubleValue() - copy.getUtility(state_type).doubleValue());
                }
            }
        } while (d4 < d5);
        return copy;
    }

    public MDPUtilityFunction<STATE_TYPE> valueIterationForFixedIterations(int i, double d) {
        MDPUtilityFunction<STATE_TYPE> initialUtilityFunction = initialUtilityFunction();
        for (int i2 = 0; i2 < i; i2++) {
            initialUtilityFunction.copy();
            Pair<MDPUtilityFunction<STATE_TYPE>, Double> valueIterateOnce = valueIterateOnce(d, initialUtilityFunction);
            initialUtilityFunction = valueIterateOnce.getFirst();
            valueIterateOnce.getSecond().doubleValue();
        }
        return initialUtilityFunction;
    }

    public MDPUtilityFunction<STATE_TYPE> valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin(double d, double d2) {
        Pair<MDPUtilityFunction<STATE_TYPE>, Double> valueIterateOnce;
        int i = 0;
        MDPUtilityFunction<STATE_TYPE> initialUtilityFunction = initialUtilityFunction();
        do {
            valueIterateOnce = valueIterateOnce(d, initialUtilityFunction);
            initialUtilityFunction = valueIterateOnce.getFirst();
            i++;
        } while (valueIterateOnce.getSecond().doubleValue() > d2);
        return initialUtilityFunction;
    }

    public Pair<MDPUtilityFunction<STATE_TYPE>, Double> valueIterateOnce(double d, MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction) {
        double d2 = 0.0d;
        MDPUtilityFunction mDPUtilityFunction2 = new MDPUtilityFunction();
        for (STATE_TYPE state_type : this.nonFinalstates) {
            this.transitionModel.getTransitionWithMaximumExpectedUtility(state_type, mDPUtilityFunction);
            double valueIterateOnceForGivenState = valueIterateOnceForGivenState(d, mDPUtilityFunction, state_type);
            double abs = Math.abs(valueIterateOnceForGivenState - mDPUtilityFunction.getUtility(state_type).doubleValue());
            if (abs > d2) {
                d2 = abs;
            }
            mDPUtilityFunction2.setUtility(state_type, valueIterateOnceForGivenState);
            for (STATE_TYPE state_type2 : this.terminalStates) {
                mDPUtilityFunction2.setUtility(state_type2, mDPUtilityFunction.getUtility(state_type2).doubleValue());
            }
        }
        return new Pair<>(mDPUtilityFunction2, Double.valueOf(d2));
    }

    private double valueIterateOnceForGivenState(double d, MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction, STATE_TYPE state_type) {
        return this.rewardFunction.getRewardFor(state_type) + (d * this.transitionModel.getTransitionWithMaximumExpectedUtility(state_type, mDPUtilityFunction).getSecond().doubleValue());
    }

    public MDPPolicy<STATE_TYPE, ACTION_TYPE> policyIteration(double d) {
        boolean z;
        MDPUtilityFunction<STATE_TYPE> initialUtilityFunction = initialUtilityFunction();
        MDPPolicy<STATE_TYPE, ACTION_TYPE> randomPolicy = randomPolicy();
        do {
            z = true;
            initialUtilityFunction = policyEvaluation(randomPolicy, initialUtilityFunction, d, 3);
            for (STATE_TYPE state_type : this.nonFinalstates) {
                Pair<ACTION_TYPE, Double> transitionWithMaximumExpectedUtility = this.transitionModel.getTransitionWithMaximumExpectedUtility(state_type, initialUtilityFunction);
                if (transitionWithMaximumExpectedUtility.getSecond().doubleValue() > this.transitionModel.getTransitionWithMaximumExpectedUtilityUsingPolicy(randomPolicy, state_type, initialUtilityFunction).getSecond().doubleValue()) {
                    randomPolicy.setAction(state_type, transitionWithMaximumExpectedUtility.getFirst());
                    z = false;
                }
            }
        } while (!z);
        return randomPolicy;
    }

    public MDPUtilityFunction<STATE_TYPE> policyEvaluation(MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy, MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction, double d, int i) {
        MDPUtilityFunction<STATE_TYPE> copy = mDPUtilityFunction.copy();
        for (int i2 = 0; i2 < i; i2++) {
            copy = valueIterateOnceWith(d, mDPPolicy, copy);
        }
        return copy;
    }

    private MDPUtilityFunction<STATE_TYPE> valueIterateOnceWith(double d, MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy, MDPUtilityFunction<STATE_TYPE> mDPUtilityFunction) {
        MDPUtilityFunction<STATE_TYPE> copy = mDPUtilityFunction.copy();
        for (STATE_TYPE state_type : this.nonFinalstates) {
            copy.setUtility(state_type, this.rewardFunction.getRewardFor(state_type) + (d * this.transitionModel.getTransitionWithMaximumExpectedUtilityUsingPolicy(mDPPolicy, state_type, mDPUtilityFunction).getSecond().doubleValue()));
        }
        return copy;
    }

    public MDPPolicy<STATE_TYPE, ACTION_TYPE> randomPolicy() {
        MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy = new MDPPolicy<>();
        for (STATE_TYPE state_type : this.nonFinalstates) {
            mDPPolicy.setAction(state_type, this.transitionModel.randomActionFor(state_type));
        }
        return mDPPolicy;
    }

    public MDPUtilityFunction<STATE_TYPE> initialUtilityFunction() {
        return this.rewardFunction.asUtilityFunction();
    }

    public STATE_TYPE getInitialState() {
        return this.initialState;
    }

    public double getRewardFor(STATE_TYPE state_type) {
        return this.rewardFunction.getRewardFor(state_type);
    }

    public void setReward(STATE_TYPE state_type, double d) {
        this.rewardFunction.setReward(state_type, Double.valueOf(d));
    }

    public void setTransitionProbability(MDPTransition<STATE_TYPE, ACTION_TYPE> mDPTransition, double d) {
        this.transitionModel.setTransitionProbability(mDPTransition.getInitialState(), mDPTransition.getAction(), mDPTransition.getDestinationState(), d);
    }

    public double getTransitionProbability(MDPTransition<STATE_TYPE, ACTION_TYPE> mDPTransition) {
        return this.transitionModel.getTransitionProbability(mDPTransition.getInitialState(), mDPTransition.getAction(), mDPTransition.getDestinationState());
    }

    public MDPPerception<STATE_TYPE> execute(STATE_TYPE state_type, ACTION_TYPE action_type, Randomizer randomizer) {
        return this.source.execute(state_type, action_type, randomizer);
    }

    public boolean isTerminalState(STATE_TYPE state_type) {
        return this.terminalStates.contains(state_type);
    }

    public List<MDPTransition<STATE_TYPE, ACTION_TYPE>> getTransitionsWith(STATE_TYPE state_type, ACTION_TYPE action_type) {
        return this.transitionModel.getTransitionsWithStartingStateAndAction(state_type, action_type);
    }

    public List<ACTION_TYPE> getAllActions() {
        return this.source.getAllActions();
    }

    public String toString() {
        return "initial State = " + this.initialState.toString() + "\n rewardFunction = " + this.rewardFunction.toString() + "\n transitionModel = " + this.transitionModel.toString() + "\n states = " + this.nonFinalstates.toString();
    }
}
