package org.tweetyproject.machinelearning.examples;

import java.util.HashSet;
import org.tweetyproject.logics.pl.examples.StreamInconsistencyEvaluationExample2;
import org.tweetyproject.machinelearning.rl.mdp.FixedPolicy;
import org.tweetyproject.machinelearning.rl.mdp.MarkovDecisionProcess;
import org.tweetyproject.machinelearning.rl.mdp.NamedAction;
import org.tweetyproject.machinelearning.rl.mdp.NamedState;
import org.tweetyproject.machinelearning.rl.mdp.Policy;
import org.tweetyproject.machinelearning.rl.mdp.algorithms.IterativePolicyEvaluation;
import org.tweetyproject.machinelearning.rl.mdp.algorithms.PolicyIteration;
import org.tweetyproject.machinelearning.rl.mdp.algorithms.ValueIteration;

/* loaded from: input_file:org.tweetyproject.machinelearning-1.24.jar:org/tweetyproject/machinelearning/examples/MdpExample.class */
public class MdpExample {
    public static void main(String[] strArr) {
        HashSet hashSet = new HashSet();
        NamedState namedState = new NamedState("s111");
        hashSet.add(namedState);
        NamedState namedState2 = new NamedState("s211");
        hashSet.add(namedState2);
        NamedState namedState3 = new NamedState("s101");
        hashSet.add(namedState3);
        NamedState namedState4 = new NamedState("s201");
        hashSet.add(namedState4);
        NamedState namedState5 = new NamedState("s110");
        hashSet.add(namedState5);
        NamedState namedState6 = new NamedState("s210");
        hashSet.add(namedState6);
        NamedState namedState7 = new NamedState("s100");
        hashSet.add(namedState7);
        NamedState namedState8 = new NamedState("s200");
        hashSet.add(namedState8);
        NamedState namedState9 = new NamedState("st");
        hashSet.add(namedState9);
        HashSet hashSet2 = new HashSet();
        NamedAction namedAction = new NamedAction("clean");
        hashSet2.add(namedAction);
        NamedAction namedAction2 = new NamedAction("move");
        hashSet2.add(namedAction2);
        NamedAction namedAction3 = new NamedAction("charge");
        hashSet2.add(namedAction3);
        HashSet hashSet3 = new HashSet();
        hashSet3.add(namedState9);
        MarkovDecisionProcess markovDecisionProcess = new MarkovDecisionProcess(hashSet, namedState, hashSet3, hashSet2);
        markovDecisionProcess.putProb(namedState, namedAction2, namedState2, 0.9d);
        markovDecisionProcess.putProb(namedState, namedAction2, namedState, 1.0d - 0.9d);
        markovDecisionProcess.putProb(namedState2, namedAction2, namedState, 0.9d);
        markovDecisionProcess.putProb(namedState2, namedAction2, namedState2, 1.0d - 0.9d);
        markovDecisionProcess.putProb(namedState3, namedAction2, namedState4, 0.9d);
        markovDecisionProcess.putProb(namedState3, namedAction2, namedState3, 1.0d - 0.9d);
        markovDecisionProcess.putProb(namedState4, namedAction2, namedState3, 0.9d);
        markovDecisionProcess.putProb(namedState4, namedAction2, namedState4, 1.0d - 0.9d);
        markovDecisionProcess.putProb(namedState5, namedAction2, namedState6, 0.9d);
        markovDecisionProcess.putProb(namedState5, namedAction2, namedState5, 1.0d - 0.9d);
        markovDecisionProcess.putProb(namedState6, namedAction2, namedState5, 0.9d);
        markovDecisionProcess.putProb(namedState6, namedAction2, namedState6, 1.0d - 0.9d);
        markovDecisionProcess.putProb(namedState7, namedAction2, namedState8, 0.9d);
        markovDecisionProcess.putProb(namedState7, namedAction2, namedState7, 1.0d - 0.9d);
        markovDecisionProcess.putProb(namedState8, namedAction2, namedState7, 0.9d);
        markovDecisionProcess.putProb(namedState8, namedAction2, namedState8, 1.0d - 0.9d);
        markovDecisionProcess.putProb(namedState, namedAction, namedState3, 0.8d);
        markovDecisionProcess.putProb(namedState, namedAction, namedState, 1.0d - 0.8d);
        markovDecisionProcess.putProb(namedState5, namedAction, namedState7, 0.8d);
        markovDecisionProcess.putProb(namedState5, namedAction, namedState5, 1.0d - 0.8d);
        markovDecisionProcess.putProb(namedState3, namedAction, namedState3, 1.0d);
        markovDecisionProcess.putProb(namedState7, namedAction, namedState7, 1.0d);
        markovDecisionProcess.putProb(namedState2, namedAction, namedState6, 0.8d);
        markovDecisionProcess.putProb(namedState2, namedAction, namedState2, 1.0d - 0.8d);
        markovDecisionProcess.putProb(namedState4, namedAction, namedState8, 0.8d);
        markovDecisionProcess.putProb(namedState4, namedAction, namedState4, 1.0d - 0.8d);
        markovDecisionProcess.putProb(namedState6, namedAction, namedState6, 1.0d);
        markovDecisionProcess.putProb(namedState8, namedAction, namedState8, 1.0d);
        markovDecisionProcess.putProb(namedState, namedAction3, namedState9, 1.0d);
        markovDecisionProcess.putProb(namedState3, namedAction3, namedState9, 1.0d);
        markovDecisionProcess.putProb(namedState5, namedAction3, namedState9, 1.0d);
        markovDecisionProcess.putProb(namedState7, namedAction3, namedState9, 1.0d);
        markovDecisionProcess.putProb(namedState2, namedAction3, namedState2, 1.0d);
        markovDecisionProcess.putProb(namedState4, namedAction3, namedState4, 1.0d);
        markovDecisionProcess.putProb(namedState6, namedAction3, namedState6, 1.0d);
        markovDecisionProcess.putProb(namedState8, namedAction3, namedState8, 1.0d);
        markovDecisionProcess.putReward(namedState, namedAction2, namedState, -1.0d);
        markovDecisionProcess.putReward(namedState2, namedAction2, namedState2, -1.0d);
        markovDecisionProcess.putReward(namedState3, namedAction2, namedState3, -1.0d);
        markovDecisionProcess.putReward(namedState4, namedAction2, namedState4, -1.0d);
        markovDecisionProcess.putReward(namedState5, namedAction2, namedState5, -1.0d);
        markovDecisionProcess.putReward(namedState6, namedAction2, namedState6, -1.0d);
        markovDecisionProcess.putReward(namedState7, namedAction2, namedState7, -1.0d);
        markovDecisionProcess.putReward(namedState8, namedAction2, namedState8, -1.0d);
        markovDecisionProcess.putReward(namedState, namedAction2, namedState2, -1.0d);
        markovDecisionProcess.putReward(namedState2, namedAction2, namedState, -1.0d);
        markovDecisionProcess.putReward(namedState3, namedAction2, namedState4, -1.0d);
        markovDecisionProcess.putReward(namedState4, namedAction2, namedState3, -1.0d);
        markovDecisionProcess.putReward(namedState5, namedAction2, namedState6, -1.0d);
        markovDecisionProcess.putReward(namedState6, namedAction2, namedState5, -1.0d);
        markovDecisionProcess.putReward(namedState7, namedAction2, namedState8, -1.0d);
        markovDecisionProcess.putReward(namedState8, namedAction2, namedState7, -1.0d);
        markovDecisionProcess.putReward(namedState, namedAction, namedState3, 10.0d);
        markovDecisionProcess.putReward(namedState5, namedAction, namedState7, 10.0d);
        markovDecisionProcess.putReward(namedState3, namedAction, namedState3, -2.0d);
        markovDecisionProcess.putReward(namedState7, namedAction, namedState7, -2.0d);
        markovDecisionProcess.putReward(namedState2, namedAction, namedState6, 10.0d);
        markovDecisionProcess.putReward(namedState4, namedAction, namedState8, 10.0d);
        markovDecisionProcess.putReward(namedState6, namedAction, namedState6, -2.0d);
        markovDecisionProcess.putReward(namedState8, namedAction, namedState8, -2.0d);
        markovDecisionProcess.putReward(namedState, namedAction3, namedState9, -7.0d);
        markovDecisionProcess.putReward(namedState3, namedAction3, namedState9, -7.0d);
        markovDecisionProcess.putReward(namedState5, namedAction3, namedState9, -7.0d);
        markovDecisionProcess.putReward(namedState2, namedAction3, namedState2, -5.0d);
        markovDecisionProcess.putReward(namedState4, namedAction3, namedState4, -5.0d);
        markovDecisionProcess.putReward(namedState6, namedAction3, namedState6, -5.0d);
        markovDecisionProcess.putReward(namedState8, namedAction3, namedState8, -5.0d);
        FixedPolicy fixedPolicy = new FixedPolicy();
        fixedPolicy.set(namedState, namedAction);
        fixedPolicy.set(namedState2, namedAction);
        fixedPolicy.set(namedState3, namedAction2);
        fixedPolicy.set(namedState4, namedAction);
        fixedPolicy.set(namedState5, namedAction);
        fixedPolicy.set(namedState6, namedAction2);
        fixedPolicy.set(namedState7, namedAction3);
        fixedPolicy.set(namedState8, namedAction2);
        FixedPolicy fixedPolicy2 = new FixedPolicy();
        fixedPolicy2.set(namedState, namedAction);
        fixedPolicy2.set(namedState2, namedAction2);
        fixedPolicy2.set(namedState3, namedAction3);
        fixedPolicy2.set(namedState4, namedAction2);
        fixedPolicy2.set(namedState5, namedAction);
        fixedPolicy2.set(namedState6, namedAction2);
        fixedPolicy2.set(namedState7, namedAction3);
        fixedPolicy2.set(namedState8, namedAction2);
        Policy policy = new ValueIteration(100L).getPolicy(markovDecisionProcess, 0.9d);
        System.out.println(policy);
        System.out.println(markovDecisionProcess.expectedUtility(fixedPolicy, StreamInconsistencyEvaluationExample2.STANDARD_EVENTS, 0.9d));
        System.out.println(markovDecisionProcess.expectedUtility(fixedPolicy2, StreamInconsistencyEvaluationExample2.STANDARD_EVENTS, 0.9d));
        System.out.println(markovDecisionProcess.expectedUtility(policy, StreamInconsistencyEvaluationExample2.STANDARD_EVENTS, 0.9d));
        System.out.println();
        System.out.println(new PolicyIteration(new IterativePolicyEvaluation(10000L)).getPolicy(markovDecisionProcess, 0.9d));
    }
}
