/*
 * Decompiled with CFR 0.152.
 */
package eu.openminted.uc.socialsciences.variabledetection.pipelines;

import de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordLemmatizer;
import de.tudarmstadt.ukp.dkpro.core.stopwordremover.StopWordRemover;
import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter;
import eu.openminted.uc.socialsciences.variabledetection.features.LuceneLemmaNGram;
import eu.openminted.uc.socialsciences.variabledetection.pipelines.AbstractPipeline;
import eu.openminted.uc.socialsciences.variabledetection.uima.io.XmlCorpusAllDocsReader;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.lab.Lab;
import org.dkpro.lab.task.BatchTask;
import org.dkpro.lab.task.Dimension;
import org.dkpro.lab.task.ParameterSpace;
import org.dkpro.lab.task.Task;
import org.dkpro.tc.api.features.TcFeature;
import org.dkpro.tc.api.features.TcFeatureFactory;
import org.dkpro.tc.api.features.TcFeatureSet;
import org.dkpro.tc.core.Constants;
import org.dkpro.tc.features.entityrecognition.NEFeatureExtractor;
import org.dkpro.tc.features.ngram.LuceneNGram;
import org.dkpro.tc.features.ngram.LuceneSkipNGram;
import org.dkpro.tc.ml.ExperimentSaveModel;
import org.dkpro.tc.ml.weka.WekaClassificationAdapter;
import weka.classifiers.bayes.NaiveBayes;

public class DetectionOnlyTrainingPipeline
extends AbstractPipeline
implements Constants {
    private static final String CORPUS_FILEPATH_TRAIN = "/home/local/UKP/kiaeeha/workspace/Datasets/openminted/uc-ss/variable-detection/detection/Full_ALLDOCS-train.xml";
    private static final String LANGUAGE_CODE = "en";
    private static final String EXPERIMENT_NAME = "AllbusVariableDetection";
    public static final File modelPath = new File("target/model");

    public static void main(String[] args) throws Exception {
        DetectionOnlyTrainingPipeline.assertDkproHomeVariableIsSet();
        ParameterSpace pSpace = DetectionOnlyTrainingPipeline.getParameterSpace();
        DetectionOnlyTrainingPipeline experiment = new DetectionOnlyTrainingPipeline();
        experiment.runTrainSaveModel(pSpace);
    }

    public static ParameterSpace getParameterSpace() throws ResourceInitializationException {
        Dimension<Map<String, Object>> dimReaders = DetectionOnlyTrainingPipeline.createReadersDimension();
        Dimension<List<String>> dimClassificationArgs = DetectionOnlyTrainingPipeline.createClassifiersDimension();
        Dimension<TcFeatureSet> dimFeatureSets = DetectionOnlyTrainingPipeline.createFeatureExtractorsDimension();
        ParameterSpace pSpace = new ParameterSpace(new Dimension[]{dimReaders, Dimension.create((String)"learningMode", (Object[])new String[]{"singleLabel"}), Dimension.create((String)"featureMode", (Object[])new String[]{"document"}), dimFeatureSets, dimClassificationArgs});
        return pSpace;
    }

    private static Dimension<TcFeatureSet> createFeatureExtractorsDimension() {
        return Dimension.create((String)"featureSet", (Object[])new TcFeatureSet[]{new TcFeatureSet(new TcFeature[]{TcFeatureFactory.create(LuceneNGram.class, (Object[])new Object[]{"ngramUseTopK", 50, "ngramMinN", 1, "ngramMaxN", 3}), TcFeatureFactory.create(LuceneLemmaNGram.class, (Object[])new Object[]{"ngramUseTopK", 50, "ngramMinN", 2, "ngramMaxN", 3}), TcFeatureFactory.create(LuceneSkipNGram.class, (Object[])new Object[]{"ngramUseTopK", 50, "ngramMinN", 2, "ngramMaxN", 3}), TcFeatureFactory.create(NEFeatureExtractor.class, (Object[])new Object[0])})});
    }

    private static Dimension<List<String>> createClassifiersDimension() {
        return Dimension.create((String)"classificationArguments", (Object[])new List[]{Arrays.asList(NaiveBayes.class.getName(), "-K")});
    }

    private static Dimension<Map<String, Object>> createReadersDimension() throws ResourceInitializationException {
        HashMap<String, CollectionReaderDescription> dimReaders = new HashMap<String, CollectionReaderDescription>();
        CollectionReaderDescription readerTrain = CollectionReaderFactory.createReaderDescription(XmlCorpusAllDocsReader.class, (Object[])new Object[]{"includeTargetAndOutcome", true, "sourceLocation", CORPUS_FILEPATH_TRAIN, "language", LANGUAGE_CODE});
        dimReaders.put("readerTrain", readerTrain);
        return Dimension.createBundle((String)"readers", (Map[])new Map[]{dimReaders});
    }

    protected void runTrainSaveModel(ParameterSpace pSpace) throws Exception {
        ExperimentSaveModel batch = new ExperimentSaveModel("AllbusVariableDetection-TrainSave", WekaClassificationAdapter.class, modelPath);
        batch.setPreprocessing(this.getPreprocessing());
        batch.setParameterSpace(pSpace);
        batch.setExecutionPolicy(BatchTask.ExecutionPolicy.RUN_AGAIN);
        Lab.getInstance().run((Task)batch);
    }

    protected AnalysisEngineDescription getPreprocessing() throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription((AnalysisEngineDescription[])new AnalysisEngineDescription[]{AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class, (Object[])new Object[]{"language", LANGUAGE_CODE}), AnalysisEngineFactory.createEngineDescription(OpenNlpPosTagger.class, (Object[])new Object[0]), AnalysisEngineFactory.createEngineDescription(StanfordLemmatizer.class, (Object[])new Object[0]), AnalysisEngineFactory.createEngineDescription(StopWordRemover.class, (Object[])new Object[]{"modelLocation", "classpath:/stopwords/english.txt"})});
    }
}

