package aima.core.learning.reinforcement.agent;

import aima.core.agent.Action;
import aima.core.learning.reinforcement.PerceptStateReward;
import aima.core.util.FrequencyCounter;
import java.util.HashMap;
import java.util.Map;

/* loaded from: input_file:lib/aima-core-3.0.0.jar:aima/core/learning/reinforcement/agent/PassiveTDAgent.class */
public class PassiveTDAgent<S, A extends Action> extends ReinforcementAgent<S, A> {
    private Map<S, A> pi = new HashMap();
    private Map<S, Double> U = new HashMap();
    private FrequencyCounter<S> Ns = new FrequencyCounter<>();
    private S s = null;
    private A a = null;
    private Double r = null;
    private double alpha;
    private double gamma;

    public PassiveTDAgent(Map<S, A> map, double d, double d2) {
        this.alpha = 0.0d;
        this.gamma = 0.0d;
        this.pi.putAll(map);
        this.alpha = d;
        this.gamma = d2;
    }

    @Override // aima.core.learning.reinforcement.agent.ReinforcementAgent
    public A execute(PerceptStateReward<S> perceptStateReward) {
        S state = perceptStateReward.state();
        double reward = perceptStateReward.reward();
        if (!this.U.containsKey(state)) {
            this.U.put(state, Double.valueOf(reward));
        }
        if (null != this.s) {
            this.Ns.incrementFor(this.s);
            double doubleValue = this.U.get(this.s).doubleValue();
            this.U.put(this.s, Double.valueOf(doubleValue + (alpha(this.Ns, this.s) * ((this.r.doubleValue() + (this.gamma * this.U.get(state).doubleValue())) - doubleValue))));
        }
        if (isTerminal(state)) {
            this.s = null;
            this.a = null;
            this.r = null;
        } else {
            this.s = state;
            this.a = this.pi.get(state);
            this.r = Double.valueOf(reward);
        }
        return this.a;
    }

    @Override // aima.core.learning.reinforcement.agent.ReinforcementAgent
    public Map<S, Double> getUtility() {
        return new HashMap(this.U);
    }

    @Override // aima.core.learning.reinforcement.agent.ReinforcementAgent
    public void reset() {
        this.U = new HashMap();
        this.Ns.clear();
        this.s = null;
        this.a = null;
        this.r = null;
    }

    protected double alpha(FrequencyCounter<S> frequencyCounter, S s) {
        return this.alpha;
    }

    private boolean isTerminal(S s) {
        boolean z = false;
        A a = this.pi.get(s);
        if (null == a || a.isNoOp()) {
            z = true;
        }
        return z;
    }
}
