/*
 * Decompiled with CFR 0.152.
 */
package eu.openminted.uc.socialsciences.variabledetection.uima.io;

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase;
import eu.openminted.uc.socialsciences.variabledetection.type.GoldVariableMention;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.jcas.JCas;
import org.dkpro.tc.api.type.TextClassificationOutcome;
import org.dkpro.tc.api.type.TextClassificationTarget;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

@ResourceMetaData(name="eu.openminted.uc.socialsciences.variabledetection.uima.io.XmlCorpusAllDocsReader", description="Collection reader for Variable mention XML file containing sentences from several documents", version="1.0.1", vendor="OpenMinTeD Project", copyright="Copyright 2016-2018\n                \n                Ubiquitous Knowledge Processing (UKP) Lab\n                Technische Universit\u00e4t Darmstadt\n                \n                GESIS \u2013 Leibniz-Institute for the Social Sciences\n                \n                OpenMinTeD - Open Mining of Text and Data Project http://openminted.eu/")
@EnhancedClassFile
public class XmlCorpusAllDocsReader
extends JCasResourceCollectionReader_ImplBase {
    public static final String PARAM_INCLUDE_TARGET_AND_OUTCOME = "includeTargetAndOutcome";
    @ConfigurationParameter(name="includeTargetAndOutcome", defaultValue={"false"})
    private boolean includeTargetAndOutcome;
    public static final String PARAM_INCLUDE_GOLD = "includeGold";
    @ConfigurationParameter(name="includeGold", defaultValue={"false"})
    private boolean includeGold;
    private Deque<DataRecord> dataQueue = new LinkedList<DataRecord>();
    private ResourceCollectionReaderBase.Resource res;
    private int count = 0;

    public void getNext(JCas aJCas) throws IOException, CollectionException {
        if (this.dataQueue.isEmpty()) {
            this.res = this.nextFile();
            InputStream is = this.res.getInputStream();
            Object object = null;
            try {
                this.fillDataQueue(is);
            }
            catch (Throwable throwable) {
                object = throwable;
                throw throwable;
            }
            finally {
                if (is != null) {
                    if (object != null) {
                        try {
                            is.close();
                        }
                        catch (Throwable throwable) {
                            ((Throwable)object).addSuppressed(throwable);
                        }
                    } else {
                        is.close();
                    }
                }
            }
            this.count = 0;
        }
        this.initCas(aJCas, this.res, Integer.toString(this.count));
        try {
            DataRecord data = this.dataQueue.pop();
            aJCas.setDocumentText(data.text);
            if (this.includeTargetAndOutcome) {
                TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas);
                outcome.setOutcome(data.variablePresent ? "Yes" : "No");
                outcome.setWeight(1.0);
                outcome.addToIndexes();
                new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length()).addToIndexes();
            }
            if (this.includeGold && data.variablePresent) {
                for (String varId : this.getMatchingVariableIds(data.originalLabel)) {
                    GoldVariableMention gold = new GoldVariableMention(aJCas, 0, aJCas.getDocumentText().length());
                    gold.setVariableId(varId);
                    gold.addToIndexes();
                }
            }
            ++this.count;
        }
        catch (CASRuntimeException e) {
            throw new CollectionException((Throwable)e);
        }
    }

    private List<String> getMatchingVariableIds(String aOriginalLabel) {
        String[] individualLabels;
        ArrayList<String> variableIDs = new ArrayList<String>();
        String labels = StringUtils.substring((String)aOriginalLabel, (int)1, (int)-1);
        for (String l : individualLabels = labels.split(",")) {
            String[] pair = l.split("-");
            if (!"Yes".equals(pair[1])) continue;
            variableIDs.add(pair[0]);
        }
        return variableIDs;
    }

    private void fillDataQueue(InputStream aInputStream) throws IOException {
        Document document;
        try {
            DocumentBuilderFactory xmlDocumentBuilderFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder xmlDocumentBuilder = xmlDocumentBuilderFactory.newDocumentBuilder();
            document = xmlDocumentBuilder.parse(new InputSource(aInputStream));
        }
        catch (ParserConfigurationException | SAXException e) {
            throw new IOException(e);
        }
        try {
            XPath xpath = XPathFactory.newInstance().newXPath();
            NodeList docNodes = (NodeList)xpath.compile("/docs//doc").evaluate(document, XPathConstants.NODESET);
            for (Node docNode = docNodes.item(0); docNode != null; docNode = docNode.getNextSibling()) {
                NamedNodeMap docAttributes = docNode.getAttributes();
                if (docAttributes != null && docAttributes.getNamedItem("lang") != null && !Objects.equals(this.getLanguage(), docAttributes.getNamedItem("lang").getTextContent())) {
                    this.getLogger().warn((Object)("Component language [" + this.getLanguage() + "] does not match sentence language [" + docAttributes.getNamedItem("lang").getTextContent() + "]."));
                }
                NodeList sentenceNodes = (NodeList)xpath.compile(".//s").evaluate(docNode, XPathConstants.NODESET);
                for (Node sentenceNode = sentenceNodes.item(0); sentenceNode != null; sentenceNode = sentenceNode.getNextSibling()) {
                    NamedNodeMap sentenceAttributes;
                    String correct;
                    if (sentenceNode.getTextContent().trim().isEmpty() || (correct = (sentenceAttributes = sentenceNode.getAttributes()).getNamedItem("correct").getTextContent().trim()).equals("NoSkip")) continue;
                    boolean variablePresent = !correct.equals("No");
                    DataRecord data = new DataRecord();
                    data.text = this.normalizeWhitespaces(sentenceNode.getTextContent());
                    data.variablePresent = variablePresent;
                    data.originalLabel = correct;
                    this.dataQueue.addLast(data);
                }
            }
        }
        catch (XPathExpressionException e) {
            throw new IOException("Problem with parsing the expression: " + e.getLocalizedMessage(), e);
        }
    }

    private String normalizeWhitespaces(String input) {
        return input.replaceAll("\\s+", " ").trim();
    }

    public boolean hasNext() throws IOException, CollectionException {
        this.getLogger().info((Object)("Processed: " + this.count + " / " + (this.dataQueue.size() + this.count)));
        return super.hasNext() || !this.dataQueue.isEmpty();
    }

    private static class DataRecord {
        String text;
        boolean variablePresent;
        String originalLabel;

        private DataRecord() {
        }
    }
}

