/*
 * Decompiled with CFR 0.152.
 */
package eu.openminted.uc.socialsciences.variabledetection.pipelines;

import eu.openminted.uc.socialsciences.variabledetection.features.CharacterNGramIdfValuesGenerator;
import eu.openminted.uc.socialsciences.variabledetection.features.FeatureGeneration;
import eu.openminted.uc.socialsciences.variabledetection.features.WordIdfValuesGenerator;
import eu.openminted.uc.socialsciences.variabledetection.pipelines.VariableDisambiguationConstants;
import eu.openminted.uc.socialsciences.variabledetection.similarity.LinearRegressionSimilarityMeasure;
import eu.openminted.uc.socialsciences.variabledetection.util.Features2Arff;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.Arrays;
import java.util.List;

public class DisambiguationOnlyTrainingPipeline {
    public static final String DATASET_DIR = "classpath:/datasets/semeval-2012";
    public static final String GOLDSTANDARD_DIR = "classpath:/goldstandards/semeval-2012";

    public static void main(String[] args) throws Exception {
        if (args.length < 1) {
            System.err.println("Provide path to save model as the first argument!");
            System.exit(1);
        }
        new DisambiguationOnlyTrainingPipeline().run(args[0]);
    }

    public void run(String modelFileName) throws Exception {
        this.generateFeaturesForTrainingData();
        LinearRegressionSimilarityMeasure classifier = DisambiguationOnlyTrainingPipeline.trainClassifier();
        this.saveClassifier(classifier, modelFileName);
    }

    private void saveClassifier(LinearRegressionSimilarityMeasure classifier, String aFilename) throws Exception, IOException, FileNotFoundException {
        try (ObjectOutputStream output = new ObjectOutputStream(new FileOutputStream(aFilename));){
            output.writeObject(classifier);
        }
    }

    private void generateFeaturesForTrainingData() throws Exception, IOException {
        VariableDisambiguationConstants.Mode mode = VariableDisambiguationConstants.Mode.TRAIN;
        List<VariableDisambiguationConstants.Dataset> datasets = Arrays.asList(VariableDisambiguationConstants.Dataset.MSRpar, VariableDisambiguationConstants.Dataset.MSRvid, VariableDisambiguationConstants.Dataset.SMTeuroparl);
        for (int n : FeatureGeneration.CHAR_NGRAMS_N) {
            CharacterNGramIdfValuesGenerator.computeIdfScores(VariableDisambiguationConstants.Dataset.ALL, mode, datasets, n);
        }
        WordIdfValuesGenerator.computeIdfScores(VariableDisambiguationConstants.Dataset.ALL, mode, datasets);
        FeatureGeneration.generateFeatures("target/models", VariableDisambiguationConstants.Dataset.ALL, datasets, mode);
        Features2Arff.toArffFile(VariableDisambiguationConstants.Mode.TRAIN, VariableDisambiguationConstants.Dataset.ALL);
    }

    public static LinearRegressionSimilarityMeasure trainClassifier() throws Exception {
        File trainingFile = new File("target/models/train/" + VariableDisambiguationConstants.Dataset.ALL.toString() + ".arff");
        LinearRegressionSimilarityMeasure classifier = new LinearRegressionSimilarityMeasure(trainingFile, true);
        return classifier;
    }
}

