/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.df.split;

import java.util.Arrays;
import org.apache.mahout.df.data.Data;
import org.apache.mahout.df.data.conditions.Condition;
import org.apache.mahout.df.split.IgSplit;
import org.apache.mahout.df.split.Split;

public class DefaultIgSplit
extends IgSplit {
    private int[] counts;

    @Override
    public Split computeSplit(Data data, int attr) {
        if (data.getDataset().isNumerical(attr)) {
            double[] values = data.values(attr);
            double bestIg = -1.0;
            double bestSplit = 0.0;
            for (double value : values) {
                double ig = this.numericalIg(data, attr, value);
                if (!(ig > bestIg)) continue;
                bestIg = ig;
                bestSplit = value;
            }
            return new Split(attr, bestIg, bestSplit);
        }
        double ig = this.categoricalIg(data, attr);
        return new Split(attr, ig);
    }

    protected double categoricalIg(Data data, int attr) {
        double[] values = data.values(attr);
        double hy = this.entropy(data);
        double hyx = 0.0;
        double invDataSize = 1.0 / (double)data.size();
        for (double value : values) {
            Data subset = data.subset(Condition.equals(attr, value));
            hyx += (double)subset.size() * invDataSize * this.entropy(subset);
        }
        return hy - hyx;
    }

    protected double numericalIg(Data data, int attr, double split) {
        double hy = this.entropy(data);
        double invDataSize = 1.0 / (double)data.size();
        Data subset = data.subset(Condition.lesser(attr, split));
        hy -= (double)subset.size() * invDataSize * this.entropy(subset);
        subset = data.subset(Condition.greaterOrEquals(attr, split));
        return hy -= (double)subset.size() * invDataSize * this.entropy(subset);
    }

    protected double entropy(Data data) {
        double invDataSize = 1.0 / (double)data.size();
        if (this.counts == null) {
            this.counts = new int[data.getDataset().nblabels()];
        }
        Arrays.fill(this.counts, 0);
        data.countLabels(this.counts);
        double entropy = 0.0;
        for (int label = 0; label < data.getDataset().nblabels(); ++label) {
            int count = this.counts[label];
            if (count == 0) continue;
            double p = (double)count * invDataSize;
            entropy += -p * Math.log(p) / LOG2;
        }
        return entropy;
    }
}

