/*
 * Decompiled with CFR 0.152.
 */
package eu.openminted.uc.socialsciences.variabledetection.pipelines;

import de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordLemmatizer;
import de.tudarmstadt.ukp.dkpro.core.stopwordremover.StopWordRemover;
import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter;
import eu.openminted.uc.socialsciences.variabledetection.features.LuceneLemmaNGram;
import eu.openminted.uc.socialsciences.variabledetection.pipelines.AbstractPipeline;
import eu.openminted.uc.socialsciences.variabledetection.uima.io.XmlCorpusAllDocsReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.lab.Lab;
import org.dkpro.lab.task.Dimension;
import org.dkpro.lab.task.ParameterSpace;
import org.dkpro.lab.task.Task;
import org.dkpro.tc.api.features.TcFeature;
import org.dkpro.tc.api.features.TcFeatureFactory;
import org.dkpro.tc.api.features.TcFeatureSet;
import org.dkpro.tc.core.Constants;
import org.dkpro.tc.features.ngram.LuceneNGram;
import org.dkpro.tc.ml.ExperimentTrainTest;
import org.dkpro.tc.ml.report.BatchTrainTestReport;
import org.dkpro.tc.ml.weka.WekaClassificationAdapter;
import weka.attributeSelection.InfoGainAttributeEval;
import weka.attributeSelection.Ranker;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.functions.SMO;
import weka.classifiers.functions.supportVector.PolyKernel;
import weka.classifiers.meta.Bagging;
import weka.classifiers.trees.J48;

public class DetectionOnlyTrainAndTestPipeline
extends AbstractPipeline
implements Constants {
    private static final String CORPUS_FILEPATH_TRAIN = "../data/datasets/Full_ALLDOCS_English-train.xml";
    private static final String COPRUS_FILEPATH_TEST = "../data/datasets/Full_ALLDOCS_English-test.xml";
    private static final String LANGUAGE_CODE = "en";
    private static final String EXPERIMENT_NAME = "AllbusVariableDetection";

    public static void main(String[] args) throws Exception {
        DetectionOnlyTrainAndTestPipeline.assertDkproHomeVariableIsSet();
        ParameterSpace pSpace = DetectionOnlyTrainAndTestPipeline.getParameterSpace();
        DetectionOnlyTrainAndTestPipeline experiment = new DetectionOnlyTrainAndTestPipeline();
        experiment.runTrainTest(pSpace);
    }

    public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
        return new ParameterSpace(new Dimension[]{DetectionOnlyTrainAndTestPipeline.createReadersDimension(), Dimension.create((String)"learningMode", (Object[])new String[]{"singleLabel"}), Dimension.create((String)"featureMode", (Object[])new String[]{"document"}), DetectionOnlyTrainAndTestPipeline.createFeatureExtractorsDimension(), DetectionOnlyTrainAndTestPipeline.createClassifiersDimension()});
    }

    private static Dimension<Map<String, Object>> createFeatureSelectionDimension() {
        HashMap<String, Object> dimFeatureSelection = new HashMap<String, Object>();
        dimFeatureSelection.put("featureSearcher", Arrays.asList(Ranker.class.getName(), "-N", "100"));
        dimFeatureSelection.put("attributeEvaluator", Arrays.asList(InfoGainAttributeEval.class.getName()));
        dimFeatureSelection.put("applySelection", true);
        return Dimension.createBundle((String)"featureSelection", (Map[])new Map[]{dimFeatureSelection});
    }

    private static Dimension<TcFeatureSet> createFeatureExtractorsDimension() {
        return Dimension.create((String)"featureSet", (Object[])new TcFeatureSet[]{new TcFeatureSet(new TcFeature[]{TcFeatureFactory.create(LuceneNGram.class, (Object[])new Object[]{"ngramMinN", 1, "ngramMaxN", 3}), TcFeatureFactory.create(LuceneLemmaNGram.class, (Object[])new Object[]{"ngramMinN", 3, "ngramMaxN", 3})})});
    }

    private static Dimension<List<String>> createClassifiersDimension() {
        return Dimension.create((String)"classificationArguments", (Object[])new List[]{Arrays.asList(SMO.class.getName(), "-C", "1.0", "-K", PolyKernel.class.getName() + " -C -1 -E 2"), Arrays.asList(NaiveBayes.class.getName(), "-K"), Arrays.asList(Bagging.class.getName(), "-I", "2", "-W", J48.class.getName(), "--", "-C", "0.5", "-M", "2")});
    }

    private static Dimension<Map<String, Object>> createReadersDimension() throws ResourceInitializationException {
        HashMap<String, CollectionReaderDescription> dimReaders = new HashMap<String, CollectionReaderDescription>();
        CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(XmlCorpusAllDocsReader.class, (Object[])new Object[]{"includeTargetAndOutcome", true, "sourceLocation", CORPUS_FILEPATH_TRAIN, "language", LANGUAGE_CODE});
        dimReaders.put("readerTrain", readerTrain);
        CollectionReaderDescription readerTest = CollectionReaderFactory.createReaderDescription(XmlCorpusAllDocsReader.class, (Object[])new Object[]{"includeTargetAndOutcome", true, "sourceLocation", COPRUS_FILEPATH_TEST, "language", LANGUAGE_CODE});
        dimReaders.put("readerTest", readerTest);
        return Dimension.createBundle((String)"readers", (Map[])new Map[]{dimReaders});
    }

    protected void runTrainTest(ParameterSpace pSpace) throws Exception {
        ExperimentTrainTest batch = new ExperimentTrainTest(EXPERIMENT_NAME, WekaClassificationAdapter.class);
        batch.setPreprocessing(this.getPreprocessing());
        batch.setParameterSpace(pSpace);
        batch.addReport(BatchTrainTestReport.class);
        Lab.getInstance().run((Task)batch);
    }

    protected AnalysisEngineDescription getPreprocessing() throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription((AnalysisEngineDescription[])new AnalysisEngineDescription[]{AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class, (Object[])new Object[]{"language", LANGUAGE_CODE}), AnalysisEngineFactory.createEngineDescription(OpenNlpPosTagger.class, (Object[])new Object[0]), AnalysisEngineFactory.createEngineDescription(StanfordLemmatizer.class, (Object[])new Object[0]), AnalysisEngineFactory.createEngineDescription(StopWordRemover.class, (Object[])new Object[]{"modelLocation", "classpath:/stopwords/english.txt"})});
    }
}

