package org.wikibrain.sr.evaluation;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.wikibrain.core.lang.Language;
import org.wikibrain.sr.dataset.Dataset;
import org.wikibrain.sr.utils.KnownSim;

/* loaded from: input_file:org/wikibrain/sr/evaluation/MostSimilarDataset.class */
public class MostSimilarDataset {
    private static final double DEFAULT_THRESHOLD = Double.NEGATIVE_INFINITY;
    private final String name;
    private final Language language;
    private final Map<String, KnownMostSim> data;

    private MostSimilarDataset(Language language, String str) {
        this.language = language;
        this.name = str;
        this.data = new HashMap();
    }

    public MostSimilarDataset(Dataset dataset) {
        this((List<Dataset>) Arrays.asList(dataset));
    }

    public MostSimilarDataset(List<Dataset> list) {
        this(list, DEFAULT_THRESHOLD);
    }

    public MostSimilarDataset(List<Dataset> list, double d) {
        if (list.isEmpty()) {
            throw new IllegalArgumentException();
        }
        this.language = list.get(0).getLanguage();
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        for (Dataset dataset : list) {
            dataset.normalize();
            if (dataset.getLanguage() != this.language) {
                throw new IllegalArgumentException("All datasets must be the same language");
            }
            for (KnownSim knownSim : dataset.getData()) {
                addToMap(hashMap, knownSim);
                addToMap(hashMap, knownSim.getReversed());
            }
            arrayList.add(dataset.getName());
        }
        this.name = StringUtils.join(arrayList, ",") + (d == DEFAULT_THRESHOLD ? "" : "+threshold=" + d);
        this.data = new HashMap();
        for (String str : hashMap.keySet()) {
            KnownMostSim knownMostSim = new KnownMostSim(hashMap.get(str), d);
            if (knownMostSim.getMostSimilar().size() > 0) {
                this.data.put(str, knownMostSim);
            }
        }
    }

    public Set<String> getPhrases() {
        return this.data.keySet();
    }

    public KnownMostSim getSimilarities(String str) {
        return this.data.get(str);
    }

    public MostSimilarDataset pruneSmallLists(int i) {
        MostSimilarDataset mostSimilarDataset = new MostSimilarDataset(this.language, this.name + "+pruned=" + i);
        for (String str : this.data.keySet()) {
            if (this.data.get(str).getMostSimilar().size() >= i) {
                mostSimilarDataset.data.put(str, this.data.get(str));
            }
        }
        return mostSimilarDataset;
    }

    private void addToMap(Map<String, List<KnownSim>> map, KnownSim knownSim) {
        if (!map.containsKey(knownSim.phrase1)) {
            map.put(knownSim.phrase1, new ArrayList());
        }
        map.get(knownSim.phrase1).add(knownSim);
    }

    public String getName() {
        return this.name;
    }

    public Language getLanguage() {
        return this.language;
    }

    public Dataset toDataset() {
        ArrayList arrayList = new ArrayList();
        Iterator<KnownMostSim> it = this.data.values().iterator();
        while (it.hasNext()) {
            arrayList.addAll(it.next().getMostSimilar());
        }
        return new Dataset(this.name, this.language, arrayList);
    }

    public List<MostSimilarDataset> split(int i) {
        ArrayList arrayList = new ArrayList(this.data.keySet());
        Collections.shuffle(arrayList);
        ArrayList arrayList2 = new ArrayList();
        for (int i2 = 0; i2 < i; i2++) {
            arrayList2.add(new MostSimilarDataset(this.language, this.name + "+split-" + i2));
        }
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            String str = (String) arrayList.get(i3);
            ((MostSimilarDataset) arrayList2.get(i3 % i)).data.put(str, this.data.get(str));
        }
        return arrayList2;
    }

    public List<Dataset> splitIntoDatasets(int i) {
        ArrayList arrayList = new ArrayList();
        Iterator<MostSimilarDataset> it = split(i).iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().toDataset());
        }
        return arrayList;
    }
}
