/*
 * Decompiled with CFR 0.152.
 */
package eu.openminted.uc.socialsciences.variabledetection.features;

import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordLemmatizer;
import de.tudarmstadt.ukp.dkpro.core.stopwordremover.StopWordRemover;
import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter;
import eu.openminted.uc.socialsciences.variabledetection.pipelines.VariableDisambiguationConstants;
import eu.openminted.uc.socialsciences.variabledetection.uima.io.SemEvalCorpusReader;
import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.similarity.algorithms.lexical.string.LongestCommonSubsequenceComparator;
import org.dkpro.similarity.algorithms.lexical.string.LongestCommonSubsequenceNormComparator;
import org.dkpro.similarity.algorithms.lexical.string.LongestCommonSubstringComparator;
import org.dkpro.similarity.algorithms.lexical.uima.ngrams.CharacterNGramResource;
import org.dkpro.similarity.algorithms.lexical.uima.ngrams.WordNGramContainmentResource;
import org.dkpro.similarity.algorithms.lexical.uima.ngrams.WordNGramJaccardResource;
import org.dkpro.similarity.algorithms.lexical.uima.string.GreedyStringTilingMeasureResource;
import org.dkpro.similarity.ml.FeatureConfig;
import org.dkpro.similarity.ml.filters.LogFilter;
import org.dkpro.similarity.ml.io.SimilarityScoreWriter;
import org.dkpro.similarity.uima.annotator.SimilarityScorer;
import org.dkpro.similarity.uima.api.type.ExperimentalTextSimilarityScore;
import org.dkpro.similarity.uima.api.type.TextSimilarityScore;
import org.dkpro.similarity.uima.io.CombinationReader;
import org.dkpro.similarity.uima.resource.SimpleTextSimilarityResource;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;

public class FeatureGeneration {
    public static final String MODE_TEMP = "temp";
    private final AnalysisEngine preprocessingStopwordFiltering;
    private final AnalysisEngine preprocessing;
    private final Map<FeatureConfig, AnalysisEngine> engineMap = new HashMap<FeatureConfig, AnalysisEngine>();
    private final List<FeatureConfig> featureConfigList;
    public static final int[] CHAR_NGRAMS_N = new int[]{2, 3, 4};
    private static JCas featureJCasStopwordFiltering;
    private static JCas featureJCas;

    public FeatureGeneration(String aModelBase) throws Exception {
        this.featureConfigList = FeatureGeneration.getFeatureConfigs(aModelBase, VariableDisambiguationConstants.Dataset.ALL, VariableDisambiguationConstants.Mode.TRAIN);
        this.preprocessingStopwordFiltering = AnalysisEngineFactory.createEngine((AnalysisEngineDescription)FeatureGeneration.preprocessors(true), (Object[])new Object[0]);
        this.preprocessing = AnalysisEngineFactory.createEngine((AnalysisEngineDescription)FeatureGeneration.preprocessors(false), (Object[])new Object[0]);
        for (FeatureConfig config : this.featureConfigList) {
            System.out.println(config.getMeasureName());
            AnalysisEngineDescription scorer = AnalysisEngineFactory.createEngineDescription(SimilarityScorer.class, (Object[])new Object[]{"NameView1", "View1", "NameView2", "View2", "SegmentFeaturePath", config.getSegmentFeaturePath(), "TextRelatednessResource", config.getResource()});
            AnalysisEngine engine = AnalysisEngineFactory.createEngine((AnalysisEngineDescription)AnalysisEngineFactory.createEngineDescription((AnalysisEngineDescription[])new AnalysisEngineDescription[]{scorer}), (Object[])new Object[0]);
            this.engineMap.put(config, engine);
        }
    }

    private static void setupCas(JCas aJCas, String text1, String text2) throws CASException {
        JCas view1 = aJCas.createView("View1");
        JCas view2 = aJCas.createView("View2");
        view1.setDocumentText(text1);
        view1.setDocumentLanguage("en");
        DocumentMetaData metadata = DocumentMetaData.create((JCas)view1);
        metadata.setDocumentId("1");
        view2.setDocumentText(text2);
        view2.setDocumentLanguage("en");
        metadata = DocumentMetaData.create((JCas)view2);
        metadata.setDocumentId("2");
    }

    public synchronized void generateFeaturesAsFiles(String text1, String text2) throws Exception {
        if (featureJCas == null) {
            featureJCasStopwordFiltering = JCasFactory.createJCas();
            featureJCas = JCasFactory.createJCas();
        } else {
            featureJCasStopwordFiltering.reset();
            featureJCas.reset();
        }
        FeatureGeneration.setupCas(featureJCasStopwordFiltering, text1, text2);
        FeatureGeneration.setupCas(featureJCas, text1, text2);
        this.preprocessing.process(featureJCas);
        this.preprocessingStopwordFiltering.process(featureJCasStopwordFiltering);
        for (FeatureConfig config : this.featureConfigList) {
            File featureDirectory = new File("target/features/" + VariableDisambiguationConstants.Mode.TEMP.toString().toLowerCase() + "/" + (Object)((Object)VariableDisambiguationConstants.Dataset.TEMP) + "/" + config.getTargetPath() + "/");
            featureDirectory.mkdirs();
            AnalysisEngine engine = this.engineMap.get(config);
            engine.process(featureJCas);
            TextSimilarityScore score = (TextSimilarityScore)JCasUtil.selectSingle((JCas)featureJCas, ExperimentalTextSimilarityScore.class);
            File outputFile = new File(featureDirectory, config.getMeasureName() + ".txt");
            try (FileWriter writer = new FileWriter(outputFile);){
                writer.write(Double.toString(score.getScore()));
            }
            score.removeFromIndexes();
        }
    }

    public synchronized Instance generateFeatures(String text1, String text2, boolean useLogFilter) throws Exception {
        if (featureJCas == null) {
            featureJCasStopwordFiltering = JCasFactory.createJCas();
            featureJCas = JCasFactory.createJCas();
        } else {
            featureJCasStopwordFiltering.reset();
            featureJCas.reset();
        }
        FeatureGeneration.setupCas(featureJCasStopwordFiltering, text1, text2);
        FeatureGeneration.setupCas(featureJCas, text1, text2);
        this.preprocessing.process(featureJCas);
        this.preprocessingStopwordFiltering.process(featureJCasStopwordFiltering);
        ArrayList<Attribute> attributes = new ArrayList<Attribute>();
        IdentityHashMap<FeatureConfig, Attribute> attrMap = new IdentityHashMap<FeatureConfig, Attribute>();
        for (FeatureConfig config : this.featureConfigList) {
            Attribute attr = new Attribute(config.getTargetPath() + "/" + config.getMeasureName());
            attributes.add(attr);
            attrMap.put(config, attr);
        }
        Attribute dummyGold = new Attribute("gold");
        attributes.add(dummyGold);
        Instances instances = new Instances("temp-relation", attributes, 10);
        DenseInstance instance = new DenseInstance(attributes.size());
        for (FeatureConfig config : this.featureConfigList) {
            AnalysisEngine engine = this.engineMap.get(config);
            engine.process(featureJCas);
            TextSimilarityScore score = (TextSimilarityScore)JCasUtil.selectSingle((JCas)featureJCas, ExperimentalTextSimilarityScore.class);
            score.removeFromIndexes();
            double s = score.getScore();
            if (s < 0.0) {
                s = 0.0;
            }
            if (s > 5.0) {
                s = 5.0;
            }
            instance.setValue((Attribute)attrMap.get(config), s);
        }
        instance.setValue(dummyGold, 0.0);
        instances.add((Instance)instance);
        if (useLogFilter) {
            LogFilter logFilter = new LogFilter();
            logFilter.setInputFormat(instances);
            instances = Filter.useFilter((Instances)instances, (Filter)logFilter);
        }
        return instances.get(0);
    }

    private static AnalysisEngineDescription preprocessors(boolean aFilterStopwords) throws ResourceInitializationException {
        AggregateBuilder builder = new AggregateBuilder();
        AnalysisEngineDescription seg = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class, (Object[])new Object[0]);
        builder.add(seg, new String[]{"_InitialView", "View1"});
        builder.add(seg, new String[]{"_InitialView", "View2"});
        AnalysisEngineDescription pos = AnalysisEngineFactory.createEngineDescription(OpenNlpPosTagger.class, (Object[])new Object[]{"language", "en"});
        builder.add(pos, new String[]{"_InitialView", "View1"});
        builder.add(pos, new String[]{"_InitialView", "View2"});
        AnalysisEngineDescription lem = AnalysisEngineFactory.createEngineDescription(StanfordLemmatizer.class, (Object[])new Object[0]);
        builder.add(lem, new String[]{"_InitialView", "View1"});
        builder.add(lem, new String[]{"_InitialView", "View2"});
        if (aFilterStopwords) {
            AnalysisEngineDescription stopw = AnalysisEngineFactory.createEngineDescription(StopWordRemover.class, (Object[])new Object[]{"modelLocation", "classpath:/stopwords/stopwords_english_punctuation.txt"});
            builder.add(stopw, new String[]{"_InitialView", "View1"});
            builder.add(stopw, new String[]{"_InitialView", "View2"});
        }
        return builder.createAggregateDescription();
    }

    public static void generateFeatures(String aModelBase, VariableDisambiguationConstants.Dataset target, List<VariableDisambiguationConstants.Dataset> datasets, VariableDisambiguationConstants.Mode mode) throws Exception {
        List<FeatureConfig> configs = FeatureGeneration.getFeatureConfigs(aModelBase, target, mode);
        for (FeatureConfig config : configs) {
            System.out.println(config.getMeasureName());
            File outputFile = new File("target/features/" + mode.toString().toLowerCase() + "/" + (Object)((Object)target) + "/" + config.getTargetPath() + "/" + config.getMeasureName() + ".txt");
            if (outputFile.exists()) {
                System.out.println(" - skipped, feature already generated");
                continue;
            }
            ArrayList<String> datasetLocations = new ArrayList<String>();
            for (VariableDisambiguationConstants.Dataset dataset : datasets) {
                datasetLocations.add("classpath:/datasets/semeval-2012/" + mode.toString().toLowerCase() + "/STS.input." + dataset.toString() + ".txt");
            }
            CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(SemEvalCorpusReader.class, (Object[])new Object[]{"inputFiles", datasetLocations, "CombinationStrategy", CombinationReader.CombinationStrategy.SAME_ROW_ONLY.toString(), "Language", "en"});
            AnalysisEngineDescription preprocessing = FeatureGeneration.preprocessors(config.filterStopwords());
            AnalysisEngineDescription scorer = AnalysisEngineFactory.createEngineDescription(SimilarityScorer.class, (Object[])new Object[]{"NameView1", "View1", "NameView2", "View2", "SegmentFeaturePath", config.getSegmentFeaturePath(), "TextRelatednessResource", config.getResource()});
            AnalysisEngineDescription writer = AnalysisEngineFactory.createEngineDescription(SimilarityScoreWriter.class, (Object[])new Object[]{"OutputFile", outputFile.getAbsolutePath(), "OutputScoresOnly", true});
            SimplePipeline.runPipeline((CollectionReaderDescription)reader, (AnalysisEngineDescription[])new AnalysisEngineDescription[]{preprocessing, scorer, writer});
            System.out.println(" - done");
        }
        System.out.println("Successful.");
    }

    private static List<FeatureConfig> getFeatureConfigs(String aModelBase, VariableDisambiguationConstants.Dataset dataset, VariableDisambiguationConstants.Mode mode) throws Exception {
        ArrayList<FeatureConfig> configs = new ArrayList<FeatureConfig>();
        configs.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(GreedyStringTilingMeasureResource.class, (Object[])new Object[]{"MinMatchLength", "3"}), null, false, "string", "GreedyStringTiling_3"));
        configs.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(SimpleTextSimilarityResource.class, (Object[])new Object[]{"Mode", "text", "TextSimilarityMeasure", LongestCommonSubsequenceComparator.class.getName()}), null, false, "string", "LongestCommonSubsequenceComparator"));
        configs.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(SimpleTextSimilarityResource.class, (Object[])new Object[]{"Mode", "text", "TextSimilarityMeasure", LongestCommonSubsequenceNormComparator.class.getName()}), null, false, "string", "LongestCommonSubsequenceNormComparator"));
        configs.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(SimpleTextSimilarityResource.class, (Object[])new Object[]{"Mode", "text", "TextSimilarityMeasure", LongestCommonSubstringComparator.class.getName()}), null, false, "string", "LongestCommonSubstringComparator"));
        for (int n : CHAR_NGRAMS_N) {
            configs.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(CharacterNGramResource.class, (Object[])new Object[]{"N", Integer.toString(n), "IdfValuesFile", aModelBase + "/character-ngrams-idf/" + mode.toString().toLowerCase() + "/" + n + "/" + dataset.toString() + ".txt"}), null, false, "n-grams", "CharacterNGramMeasure_" + n));
        }
        for (int n : new int[]{1, 2}) {
            configs.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(WordNGramContainmentResource.class, (Object[])new Object[]{"N", Integer.toString(n)}), Token.class.getName(), true, "n-grams", "WordNGramContainmentMeasure_" + n + "_stopword-filtered"));
        }
        for (int n : new int[]{1, 3, 4}) {
            configs.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(WordNGramJaccardResource.class, (Object[])new Object[]{"N", Integer.toString(n)}), Token.class.getName(), false, "n-grams", "WordNGramJaccardMeasure_" + n));
        }
        for (int n : new int[]{2, 4}) {
            configs.add(new FeatureConfig(ExternalResourceFactory.createExternalResourceDescription(WordNGramJaccardResource.class, (Object[])new Object[]{"N", Integer.toString(n)}), Token.class.getName(), true, "n-grams", "WordNGramJaccardMeasure_" + n + "_stopword-filtered"));
        }
        configs.sort((a, b) -> Comparator.comparing(FeatureConfig::getTargetPath).thenComparing(FeatureConfig::getMeasureName).compare((FeatureConfig)a, (FeatureConfig)b));
        return configs;
    }
}

