package org.tweetyproject.machinelearning.rl.mdp.algorithms;

import java.util.HashMap;
import java.util.Iterator;
import org.tweetyproject.logics.translators.adfpossibilistic.PossibilityDistribution;
import org.tweetyproject.machinelearning.rl.mdp.Action;
import org.tweetyproject.machinelearning.rl.mdp.MarkovDecisionProcess;
import org.tweetyproject.machinelearning.rl.mdp.Policy;
import org.tweetyproject.machinelearning.rl.mdp.State;

/* loaded from: input_file:org.tweetyproject.machinelearning-1.24.jar:org/tweetyproject/machinelearning/rl/mdp/algorithms/ValueIteration.class */
public class ValueIteration<S extends State, A extends Action> extends OfflineAlgorithm<S, A> {
    private long num_iterations;

    public ValueIteration(long j) {
        this.num_iterations = j;
    }

    @Override // org.tweetyproject.machinelearning.rl.mdp.algorithms.OfflineAlgorithm
    public Policy<S, A> getPolicy(MarkovDecisionProcess<S, A> markovDecisionProcess, double d) {
        HashMap hashMap = new HashMap();
        Iterator<S> it = markovDecisionProcess.getStates().iterator();
        while (it.hasNext()) {
            hashMap.put(it.next(), Double.valueOf(PossibilityDistribution.LOWER_BOUND));
        }
        for (int i = 0; i < this.num_iterations; i++) {
            HashMap hashMap2 = new HashMap();
            for (S s : markovDecisionProcess.getStates()) {
                if (markovDecisionProcess.isTerminal(s)) {
                    hashMap2.put(s, Double.valueOf(PossibilityDistribution.LOWER_BOUND));
                } else {
                    double d2 = Double.NEGATIVE_INFINITY;
                    for (A a : markovDecisionProcess.getActions()) {
                        double d3 = 0.0d;
                        for (S s2 : markovDecisionProcess.getStates()) {
                            d3 += markovDecisionProcess.getProb(s, a, s2) * (markovDecisionProcess.getReward(s, a, s2) + (d * ((Double) hashMap.get(s2)).doubleValue()));
                        }
                        if (d3 > d2) {
                            d2 = d3;
                        }
                    }
                    hashMap2.put(s, Double.valueOf(d2));
                }
            }
            hashMap = hashMap2;
        }
        return getPolicy(hashMap, markovDecisionProcess, d);
    }
}
