package aima.learning.reinforcement;

import aima.probability.decision.MDPPolicy;
import aima.util.Pair;
import aima.util.Util;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:aima/learning/reinforcement/QTable.class */
public class QTable<STATE_TYPE, ACTION_TYPE> {
    Hashtable<Pair<STATE_TYPE, ACTION_TYPE>, Double> table = new Hashtable<>();
    private List<ACTION_TYPE> allPossibleActions;

    public QTable(List<ACTION_TYPE> list) {
        this.allPossibleActions = list;
    }

    public Double getQValue(STATE_TYPE state_type, ACTION_TYPE action_type) {
        Pair pair = new Pair(state_type, action_type);
        return !this.table.keySet().contains(pair) ? Double.valueOf(0.0d) : this.table.get(pair);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public Pair<ACTION_TYPE, Double> maxDiff(STATE_TYPE state_type, ACTION_TYPE action_type, STATE_TYPE state_type2) {
        Double.valueOf(0.0d);
        Object selectRandomlyFromList = Util.selectRandomlyFromList(this.allPossibleActions);
        Double valueOf = Double.valueOf(getQValue(state_type2, selectRandomlyFromList).doubleValue() - getQValue(state_type, action_type).doubleValue());
        for (ACTION_TYPE action_type2 : this.allPossibleActions) {
            Double valueOf2 = Double.valueOf(getQValue(state_type2, action_type2).doubleValue() - getQValue(state_type, action_type).doubleValue());
            if (valueOf2.doubleValue() > valueOf.doubleValue()) {
                selectRandomlyFromList = action_type2;
                valueOf = valueOf2;
            }
        }
        return new Pair<>(selectRandomlyFromList, valueOf);
    }

    public void setQValue(STATE_TYPE state_type, ACTION_TYPE action_type, Double d) {
        this.table.put(new Pair<>(state_type, action_type), d);
    }

    public ACTION_TYPE upDateQ(STATE_TYPE state_type, ACTION_TYPE action_type, STATE_TYPE state_type2, double d, double d2, double d3) {
        double doubleValue = getQValue(state_type, action_type).doubleValue();
        Pair<ACTION_TYPE, Double> maxDiff = maxDiff(state_type, action_type, state_type2);
        setQValue(state_type, action_type, Double.valueOf(doubleValue + (d * (d2 + (d3 * maxDiff.getSecond().doubleValue())))));
        return maxDiff.getFirst();
    }

    public void normalize() {
        Double findMaximumValue = findMaximumValue();
        if (findMaximumValue.doubleValue() != 0.0d) {
            for (Pair<STATE_TYPE, ACTION_TYPE> pair : this.table.keySet()) {
                this.table.put(pair, Double.valueOf(this.table.get(pair).doubleValue() / findMaximumValue.doubleValue()));
            }
        }
    }

    private Double findMaximumValue() {
        Set<Pair<STATE_TYPE, ACTION_TYPE>> keySet = this.table.keySet();
        if (keySet.size() <= 0) {
            return Double.valueOf(0.0d);
        }
        Double d = this.table.get(keySet.toArray()[0]);
        Iterator<Pair<STATE_TYPE, ACTION_TYPE>> it = keySet.iterator();
        while (it.hasNext()) {
            Double d2 = this.table.get(it.next());
            if (d2.doubleValue() > d.doubleValue()) {
                d = d2;
            }
        }
        return d;
    }

    public MDPPolicy<STATE_TYPE, ACTION_TYPE> getPolicy() {
        MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy = new MDPPolicy<>();
        for (STATE_TYPE state_type : getAllStartingStates()) {
            mDPPolicy.setAction(state_type, getRecordedActionWithMaximumQValue(state_type));
        }
        return mDPPolicy;
    }

    private ACTION_TYPE getRecordedActionWithMaximumQValue(STATE_TYPE state_type) {
        Double valueOf = Double.valueOf(Double.NEGATIVE_INFINITY);
        ACTION_TYPE action_type = null;
        for (Pair<STATE_TYPE, ACTION_TYPE> pair : this.table.keySet()) {
            if (pair.getFirst().equals(state_type)) {
                ACTION_TYPE second = pair.getSecond();
                Double d = this.table.get(pair);
                if (d.doubleValue() > valueOf.doubleValue()) {
                    valueOf = d;
                    action_type = second;
                }
            }
        }
        return action_type;
    }

    private List<STATE_TYPE> getAllStartingStates() {
        ArrayList arrayList = new ArrayList();
        Iterator<Pair<STATE_TYPE, ACTION_TYPE>> it = this.table.keySet().iterator();
        while (it.hasNext()) {
            STATE_TYPE first = it.next().getFirst();
            if (!arrayList.contains(first)) {
                arrayList.add(first);
            }
        }
        return arrayList;
    }

    public String toString() {
        return this.table.toString();
    }
}
