package org.carrot2.clustering.lingo;

import com.carrotsearch.hppc.BitSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.IControllerContext;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Internal;
import org.carrot2.core.attribute.Processing;
import org.carrot2.matrix.NNIInterface;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.clustering.IMonolingualClusteringAlgorithm;
import org.carrot2.text.clustering.MultilingualClustering;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.CompletePreprocessingPipeline;
import org.carrot2.text.vsm.ReducedVectorSpaceModelContext;
import org.carrot2.text.vsm.TermDocumentMatrixBuilder;
import org.carrot2.text.vsm.TermDocumentMatrixReducer;
import org.carrot2.text.vsm.VectorSpaceModelContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.DoubleRange;
import org.carrot2.util.attribute.constraint.IntRange;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Bindable(prefix = "LingoClusteringAlgorithm", inherit = {CommonAttributes.class})
/* loaded from: input_file:org/carrot2/clustering/lingo/LingoClusteringAlgorithm.class */
public class LingoClusteringAlgorithm extends ProcessingComponentBase implements IClusteringAlgorithm {
    private static final Logger log = LoggerFactory.getLogger(LingoClusteringAlgorithm.class);
    private static boolean nativeLibrariesReported;

    @Processing
    @Required
    @Input
    @Internal
    @Attribute(key = "documents", inherit = true)
    public List<Document> documents;

    @Output
    @Attribute
    @Processing
    public boolean nativeMatrixUsed;

    @Processing
    @Input
    @Internal
    @Attribute(key = "query", inherit = true)
    public String query = null;

    @Processing
    @Output
    @Internal
    @Attribute(key = "clusters", inherit = true)
    public List<Cluster> clusters = null;

    @Processing
    @Input
    @Attribute
    @DoubleRange(min = 0.0d, max = 1.0d)
    public double scoreWeight = 0.0d;

    @Processing
    @Input
    @Attribute
    @IntRange(min = ITokenizer.TT_NUMERIC, max = 100)
    public int desiredClusterCountBase = 30;
    public final CompletePreprocessingPipeline preprocessingPipeline = new CompletePreprocessingPipeline();
    public final TermDocumentMatrixBuilder matrixBuilder = new TermDocumentMatrixBuilder();
    public final TermDocumentMatrixReducer matrixReducer = new TermDocumentMatrixReducer();
    public final ClusterBuilder clusterBuilder = new ClusterBuilder();
    public final LabelFormatter labelFormatter = new LabelFormatter();
    public final MultilingualClustering multilingualClustering = new MultilingualClustering();

    @Override // org.carrot2.core.ProcessingComponentBase, org.carrot2.core.IProcessingComponent
    public void init(IControllerContext iControllerContext) {
        synchronized (LingoClusteringAlgorithm.class) {
            if (!nativeLibrariesReported) {
                if (NNIInterface.isNativeBlasAvailable()) {
                    log.info("Native BLAS routines available");
                }
                nativeLibrariesReported = true;
            }
        }
    }

    @Override // org.carrot2.core.ProcessingComponentBase, org.carrot2.core.IProcessingComponent
    public void process() throws ProcessingException {
        this.nativeMatrixUsed = NNIInterface.isNativeBlasAvailable();
        List<Document> list = this.documents;
        this.clusters = this.multilingualClustering.process(this.documents, new IMonolingualClusteringAlgorithm() { // from class: org.carrot2.clustering.lingo.LingoClusteringAlgorithm.1
            @Override // org.carrot2.text.clustering.IMonolingualClusteringAlgorithm
            public List<Cluster> process(List<Document> list2, LanguageCode languageCode) {
                LingoClusteringAlgorithm.this.documents = list2;
                LingoClusteringAlgorithm.this.cluster(languageCode);
                return LingoClusteringAlgorithm.this.clusters;
            }
        });
        this.documents = list;
        if (this.multilingualClustering.languageAggregationStrategy == MultilingualClustering.LanguageAggregationStrategy.FLATTEN_ALL) {
            Collections.sort(this.clusters, Ordering.compound(Lists.newArrayList(new Comparator[]{Cluster.OTHER_TOPICS_AT_THE_END, Cluster.byReversedWeightedScoreAndSizeComparator(this.scoreWeight)})));
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void cluster(LanguageCode languageCode) {
        PreprocessingContext preprocess = this.preprocessingPipeline.preprocess(this.documents, this.query, languageCode);
        this.clusters = Lists.newArrayList();
        if (preprocess.hasLabels()) {
            VectorSpaceModelContext vectorSpaceModelContext = new VectorSpaceModelContext(preprocess);
            ReducedVectorSpaceModelContext reducedVectorSpaceModelContext = new ReducedVectorSpaceModelContext(vectorSpaceModelContext);
            LingoProcessingContext lingoProcessingContext = new LingoProcessingContext(reducedVectorSpaceModelContext);
            this.matrixBuilder.buildTermDocumentMatrix(vectorSpaceModelContext);
            this.matrixBuilder.buildTermPhraseMatrix(vectorSpaceModelContext);
            this.matrixReducer.reduce(reducedVectorSpaceModelContext, computeClusterCount(this.desiredClusterCountBase, this.documents.size()));
            this.clusterBuilder.buildLabels(lingoProcessingContext, this.matrixBuilder.termWeighting);
            this.clusterBuilder.assignDocuments(lingoProcessingContext);
            this.clusterBuilder.merge(lingoProcessingContext);
            int[] iArr = lingoProcessingContext.clusterLabelFeatureIndex;
            BitSet[] bitSetArr = lingoProcessingContext.clusterDocuments;
            double[] dArr = lingoProcessingContext.clusterLabelScore;
            for (int i = 0; i < iArr.length; i++) {
                Cluster cluster = new Cluster();
                int i2 = iArr[i];
                if (i2 >= 0) {
                    cluster.addPhrases(this.labelFormatter.format(preprocess, i2));
                    cluster.setAttribute("score", Double.valueOf(dArr[i]));
                    BitSet bitSet = bitSetArr[i];
                    int nextSetBit = bitSet.nextSetBit(0);
                    while (true) {
                        int i3 = nextSetBit;
                        if (i3 < 0) {
                            break;
                        }
                        cluster.addDocuments(this.documents.get(i3));
                        nextSetBit = bitSet.nextSetBit(i3 + 1);
                    }
                    this.clusters.add(cluster);
                }
            }
            Collections.sort(this.clusters, Cluster.byReversedWeightedScoreAndSizeComparator(this.scoreWeight));
        }
        Cluster.appendOtherTopics(this.documents, this.clusters);
    }

    static int computeClusterCount(int i, int i2) {
        return Math.min((int) ((i / 10.0d) * Math.sqrt(i2)), i2);
    }
}
