package org.dkpro.tc.features.style;

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceSpecifier;
import org.dkpro.tc.api.exception.TextClassificationException;
import org.dkpro.tc.api.features.DocumentFeatureExtractor;
import org.dkpro.tc.api.features.Feature;
import org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase;

/* loaded from: input_file:org/dkpro/tc/features/style/TopicWordsFeatureExtractor.class */
public class TopicWordsFeatureExtractor extends FeatureExtractorResource_ImplBase implements DocumentFeatureExtractor {
    public static final String PARAM_TOPIC_FILE_PATH = "topicFilePath";

    @ConfigurationParameter(name = PARAM_TOPIC_FILE_PATH, mandatory = true)
    private String topicFilePath;
    private String prefix;

    public Set<Feature> extract(JCas jCas) throws TextClassificationException {
        if (this.topicFilePath == null || this.topicFilePath.isEmpty()) {
            System.out.println("Path to word list must be set!");
        }
        HashSet hashSet = new HashSet();
        List<String> text = JCasUtil.toText(JCasUtil.select(jCas, Token.class));
        try {
            Iterator it = FileUtils.readLines(new File(this.topicFilePath)).iterator();
            while (it.hasNext()) {
                hashSet.addAll(countWordHits((String) it.next(), text));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return hashSet;
    }

    private List<Feature> countWordHits(String str, List<String> list) throws TextClassificationException {
        try {
            List readLines = FileUtils.readLines(new File(TopicWordsFeatureExtractor.class.getClassLoader().getResource("./" + str).getPath()), "utf-8");
            int i = 0;
            Iterator<String> it = list.iterator();
            while (it.hasNext()) {
                if (readLines.contains(it.next())) {
                    i++;
                }
            }
            double size = list.size();
            Feature[] featureArr = new Feature[1];
            featureArr[0] = new Feature(this.prefix + str, Double.valueOf(size > 0.0d ? i / size : 0.0d));
            return Arrays.asList(featureArr);
        } catch (IOException e) {
            throw new TextClassificationException(e);
        }
    }

    public boolean initialize(ResourceSpecifier resourceSpecifier, Map map) throws ResourceInitializationException {
        if (!super.initialize(resourceSpecifier, map)) {
            return false;
        }
        this.prefix = "TopicWords_";
        return true;
    }
}
