/*
 * Decompiled with CFR 0.152.
 */
package eu.openminted.uc.socialsciences.variabledetection.uima.io;

import de.tudarmstadt.ukp.dkpro.core.api.io.ResourceCollectionReaderBase;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import eu.openminted.uc.socialsciences.variabledetection.type.VariableMention;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.io.IOUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.internal.EnhancedClassFile;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

@ResourceMetaData(name="eu.openminted.uc.socialsciences.variabledetection.uima.io.XmlCorpusReader", description="Collection reader for Variable mention XML corpus", version="1.0.1", vendor="OpenMinTeD Project", copyright="Copyright 2016-2018\n                \n                Ubiquitous Knowledge Processing (UKP) Lab\n                Technische Universit\u00e4t Darmstadt\n                \n                GESIS \u2013 Leibniz-Institute for the Social Sciences\n                \n                OpenMinTeD - Open Mining of Text and Data Project http://openminted.eu/")
@EnhancedClassFile
public class XmlCorpusReader
extends ResourceCollectionReaderBase {
    private int sentenceBegin = 0;
    private int sentenceEnd = 0;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void getNext(CAS aCAS) throws IOException, CollectionException {
        ResourceCollectionReaderBase.Resource res = this.nextFile();
        this.initCas(aCAS, res);
        InputStream is = null;
        try {
            is = res.getInputStream();
            this.process(is, aCAS);
            if (this.getConfigParameterValue("language") != null) {
                aCAS.setDocumentLanguage((String)this.getConfigParameterValue("language"));
            }
        }
        finally {
            IOUtils.closeQuietly((InputStream)is);
        }
    }

    private void process(InputStream aInputStream, CAS aCAS) throws IOException {
        StringBuilder stringBuilder = new StringBuilder();
        DocumentBuilderFactory xmlDocumentBuilderFactory = DocumentBuilderFactory.newInstance();
        Document document = null;
        try {
            DocumentBuilder xmlDocumentBuilder = xmlDocumentBuilderFactory.newDocumentBuilder();
            document = xmlDocumentBuilder.parse(new InputSource(aInputStream));
        }
        catch (ParserConfigurationException | SAXException e) {
            throw new IOException(e);
        }
        if (document != null) {
            try {
                XPath xpath = XPathFactory.newInstance().newXPath();
                Node sampleNode = (Node)xpath.compile("//testset/sample").evaluate(document, XPathConstants.NODE);
                if (sampleNode == null) {
                    sampleNode = (Node)xpath.compile("//testset/topic/sample").evaluate(document, XPathConstants.NODE);
                }
                Node docNode = (Node)xpath.compile("./doc").evaluate(sampleNode, XPathConstants.NODE);
                NamedNodeMap docAttributes = docNode.getAttributes();
                String language = docAttributes.getNamedItem("lang").getTextContent();
                Node titleNode = (Node)xpath.compile("./doc_title").evaluate(docNode, XPathConstants.NODE);
                DocumentMetaData metadata = DocumentMetaData.get((JCas)aCAS.getJCas());
                metadata.setDocumentTitle(titleNode.getTextContent());
                NodeList sentenceNodes = (NodeList)xpath.compile(".//s").evaluate(docNode, XPathConstants.NODESET);
                int sentenceStart = 0;
                for (Node sentenceNode = sentenceNodes.item(0); sentenceNode != null; sentenceNode = sentenceNode.getNextSibling()) {
                    if (sentenceNode.getTextContent().trim().isEmpty()) continue;
                    NamedNodeMap attributes = sentenceNode.getAttributes();
                    stringBuilder.append(this.normalizeWhitespaces(sentenceNode.getTextContent().trim()));
                    Sentence sentence = new Sentence(aCAS.getJCas(), sentenceStart, stringBuilder.length());
                    sentence.setId(attributes.getNamedItem("id").getTextContent());
                    sentence.addToIndexes();
                    if (attributes.getNamedItem("correct").getTextContent().equals("Yes")) {
                        this.sentenceBegin = sentenceStart;
                        this.sentenceEnd = stringBuilder.length();
                    }
                    stringBuilder.append(" ");
                    sentenceStart = stringBuilder.length();
                }
                NodeList variableNodes = (NodeList)xpath.compile(".//variable").evaluate(sampleNode, XPathConstants.NODESET);
                for (Node variableNode = variableNodes.item(0); variableNode != null; variableNode = variableNode.getNextSibling()) {
                    if (variableNode.getTextContent().trim().isEmpty()) continue;
                    NamedNodeMap attributes = variableNode.getAttributes();
                    String correct = attributes.getNamedItem("correct").getTextContent();
                    String variableId = attributes.getNamedItem("v_id").getTextContent();
                    Node labelNode = (Node)xpath.compile("./v_label").evaluate(variableNode, XPathConstants.NODE);
                    String label = labelNode.getTextContent().trim();
                    Node questionNode = (Node)xpath.compile("./v_question").evaluate(variableNode, XPathConstants.NODE);
                    String question = this.normalizeWhitespaces(questionNode.getTextContent().trim());
                    Node subQuestionNode = (Node)xpath.compile("./v_subquestion").evaluate(variableNode, XPathConstants.NODE);
                    String subQuestion = "";
                    if (subQuestionNode != null) {
                        subQuestion = this.normalizeWhitespaces(subQuestionNode.getTextContent().trim());
                    }
                    String answer = "";
                    VariableMention variableMention = new VariableMention(aCAS.getJCas(), this.sentenceBegin, this.sentenceEnd);
                    variableMention.setVariableId(variableId);
                    variableMention.setCorrect(correct);
                    variableMention.setLabel(label);
                    variableMention.setQuestion(question);
                    variableMention.setSubQuestion(subQuestion);
                    variableMention.setAnswers(answer);
                    variableMention.addToIndexes();
                }
                aCAS.getJCas().setDocumentText(stringBuilder.toString());
                aCAS.setDocumentLanguage(language);
            }
            catch (XPathExpressionException e) {
                throw new IOException("Problem with parsing the expression: " + e.getLocalizedMessage(), e);
            }
            catch (CASException e) {
                throw new IOException(e);
            }
        }
    }

    private String normalizeWhitespaces(String input) {
        return input.replaceAll("\\s+", " ");
    }
}

