package edu.stanford.nlp.ie.machinereading.domains.ace;

import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.ie.machinereading.GenericDataSetReader;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceCharSeq;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceDocument;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEntityMention;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEventMention;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceRelationMention;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceRelationMentionArgument;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceToken;
import edu.stanford.nlp.ie.machinereading.structure.AnnotationUtils;
import edu.stanford.nlp.ie.machinereading.structure.EntityMention;
import edu.stanford.nlp.ie.machinereading.structure.EventMention;
import edu.stanford.nlp.ie.machinereading.structure.ExtractionObject;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ie.machinereading.structure.RelationMention;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import org.archive.format.warc.WARCConstants;
import org.archive.url.UsableURIFactory;
import org.xml.sax.SAXException;

/* loaded from: input_file:edu/stanford/nlp/ie/machinereading/domains/ace/AceReader.class */
public class AceReader extends GenericDataSetReader {
    private final Counter<String> entityCounts;
    private final Counter<String> adjacentEntityMentions;
    private final Counter<String> relationCounts;
    private final Counter<String> nameRelationCounts;
    private final Counter<String> eventCounts;
    private final Counter<String> mentionTypeCounts;
    private final String aceVersion;
    private static final boolean VERBOSE = false;
    static final /* synthetic */ boolean $assertionsDisabled;

    public AceReader() {
        this(null, true);
    }

    public AceReader(StanfordCoreNLP stanfordCoreNLP, boolean z) {
        this(stanfordCoreNLP, z, "ACE2005");
    }

    public AceReader(StanfordCoreNLP stanfordCoreNLP, boolean z, String str) {
        super(stanfordCoreNLP, z, false, true);
        this.entityCounts = new ClassicCounter();
        this.adjacentEntityMentions = new ClassicCounter();
        this.nameRelationCounts = new ClassicCounter();
        this.relationCounts = new ClassicCounter();
        this.eventCounts = new ClassicCounter();
        this.mentionTypeCounts = new ClassicCounter();
        this.logger = Logger.getLogger(AceReader.class.getName());
        this.logger.setLevel(Level.SEVERE);
        this.aceVersion = str;
    }

    @Override // edu.stanford.nlp.ie.machinereading.GenericDataSetReader
    public Annotation read(String str) throws IOException, SAXException, ParserConfigurationException {
        ArrayList<CoreMap> arrayList = new ArrayList();
        File file = new File(str);
        if (!$assertionsDisabled && !file.exists()) {
            throw new AssertionError();
        }
        Annotation annotation = new Annotation("");
        if (file.isDirectory()) {
            for (File file2 : IOUtils.iterFilesRecursive(file, AceDocument.XML_EXT)) {
                if (!file2.getName().endsWith(".UPC1.apf.xml")) {
                    arrayList.addAll(readDocument(file2, annotation));
                }
            }
        } else {
            arrayList.addAll(readDocument(file, annotation));
        }
        AnnotationUtils.addSentences(annotation, arrayList);
        for (CoreMap coreMap : arrayList) {
            countAdjacentMentions(coreMap);
            countNameRelations(coreMap);
            countMentionTypes(coreMap);
        }
        return annotation;
    }

    private void countMentionTypes(CoreMap coreMap) {
        List list = (List) coreMap.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (list != null) {
            Iterator it2 = list.iterator();
            while (it2.hasNext()) {
                this.mentionTypeCounts.incrementCount(((EntityMention) it2.next()).getMentionType());
            }
        }
    }

    private void countNameRelations(CoreMap coreMap) {
        List<RelationMention> list = (List) coreMap.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
        if (list != null) {
            for (RelationMention relationMention : list) {
                List<EntityMention> entityMentionArgs = relationMention.getEntityMentionArgs();
                if (entityMentionArgs.size() == 2 && entityMentionArgs.get(0).getMentionType().equals("NAM") && entityMentionArgs.get(1).getMentionType().equals("NAM")) {
                    this.nameRelationCounts.incrementCount(relationMention.getType() + "." + relationMention.getSubType());
                }
            }
        }
    }

    private void countAdjacentMentions(CoreMap coreMap) {
        List<EntityMention> list = (List) coreMap.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (list != null) {
            for (EntityMention entityMention : list) {
                for (EntityMention entityMention2 : list) {
                    if (entityMention != entityMention2 && entityMention.getHeadTokenEnd() == entityMention2.getHeadTokenStart() && entityMention.getType().equals(entityMention2.getType())) {
                        this.adjacentEntityMentions.incrementCount(entityMention.getType());
                    }
                }
            }
        }
    }

    private void printCounter(Counter<String> counter, String str) {
        StringBuilder sb = new StringBuilder();
        sb.append(str).append(" counts:\n");
        for (String str2 : counter.keySet()) {
            sb.append(LinearClassifier.TEXT_SERIALIZATION_DELIMITER).append(str2).append(WARCConstants.COLON_SPACE).append(counter.getCount(str2)).append("\n");
        }
        this.logger.info(sb.toString());
    }

    private List<CoreMap> readDocument(File file, Annotation annotation) throws IOException, SAXException, ParserConfigurationException {
        return readDocument(file.getAbsolutePath().replace(AceDocument.XML_EXT, ""), annotation);
    }

    private List<CoreMap> readDocument(String str, Annotation annotation) throws IOException, SAXException, ParserConfigurationException {
        int i;
        int size;
        this.logger.info("Reading document: " + str);
        ArrayList arrayList = new ArrayList();
        AceDocument parseDocument = this.aceVersion.equals("ACE2004") ? AceDocument.parseDocument(str, false, this.aceVersion) : AceDocument.parseDocument(str, false);
        String id = parseDocument.getId();
        Map<String, EntityMention> newHashMap = Generics.newHashMap();
        int i2 = 0;
        for (int i3 = 0; i3 < parseDocument.getSentenceCount(); i3++) {
            List<AceToken> sentence = parseDocument.getSentence(i3);
            ArrayList arrayList2 = new ArrayList();
            StringBuilder sb = new StringBuilder();
            for (int i4 = 0; i4 < sentence.size(); i4++) {
                CoreLabel coreLabel = new CoreLabel();
                coreLabel.setWord(sentence.get(i4).getLiteral());
                coreLabel.set(CoreAnnotations.ValueAnnotation.class, coreLabel.word());
                coreLabel.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, Integer.valueOf(sentence.get(i4).getByteStart()));
                coreLabel.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, Integer.valueOf(sentence.get(i4).getByteEnd()));
                arrayList2.add(coreLabel);
                if (i4 > 0) {
                    sb.append(" ");
                }
                sb.append(sentence.get(i4).getLiteral());
            }
            if (arrayList2.size() == 1) {
                String word = ((CoreLabel) arrayList2.get(0)).word();
                if (word.startsWith("<") && word.endsWith(">")) {
                    i = i2;
                    size = sentence.size();
                    i2 = i + size;
                }
            }
            Annotation annotation2 = new Annotation(sb.toString());
            annotation2.set(CoreAnnotations.DocIDAnnotation.class, id);
            annotation2.set(CoreAnnotations.TokensAnnotation.class, arrayList2);
            this.logger.info("Reading sentence: \"" + ((Object) sb) + UsableURIFactory.QUOT);
            ArrayList<AceEntityMention> entityMentions = parseDocument.getEntityMentions(i3);
            ArrayList<AceRelationMention> relationMentions = parseDocument.getRelationMentions(i3);
            ArrayList<AceEventMention> eventMentions = parseDocument.getEventMentions(i3);
            for (AceEntityMention aceEntityMention : entityMentions) {
                String str2 = "";
                Iterator<String> it2 = parseDocument.getKeySetEntities().iterator();
                while (true) {
                    if (it2.hasNext()) {
                        String next = it2.next();
                        if (parseDocument.getEntity(next).getMentions().contains(aceEntityMention)) {
                            str2 = next;
                            break;
                        }
                    }
                }
                EntityMention convertAceEntityMention = convertAceEntityMention(aceEntityMention, id, annotation2, i2, str2);
                this.entityCounts.incrementCount(convertAceEntityMention.getType());
                this.logger.info("CONVERTED MENTION HEAD SPAN: " + convertAceEntityMention.getHead());
                this.logger.info("CONVERTED ENTITY MENTION: " + convertAceEntityMention);
                AnnotationUtils.addEntityMention(annotation2, convertAceEntityMention);
                newHashMap.put(aceEntityMention.getId(), convertAceEntityMention);
            }
            Iterator<AceRelationMention> it3 = relationMentions.iterator();
            while (it3.hasNext()) {
                RelationMention convertAceRelationMention = convertAceRelationMention(it3.next(), id, annotation2, newHashMap);
                if (convertAceRelationMention != null) {
                    this.relationCounts.incrementCount(convertAceRelationMention.getType());
                    this.logger.info("CONVERTED RELATION MENTION: " + convertAceRelationMention);
                    AnnotationUtils.addRelationMention(annotation2, convertAceRelationMention);
                }
            }
            Iterator<AceEventMention> it4 = eventMentions.iterator();
            while (it4.hasNext()) {
                EventMention convertAceEventMention = convertAceEventMention(it4.next(), id, annotation2, newHashMap, i2);
                if (convertAceEventMention != null) {
                    this.eventCounts.incrementCount(convertAceEventMention.getType());
                    this.logger.info("CONVERTED EVENT MENTION: " + convertAceEventMention);
                    AnnotationUtils.addEventMention(annotation2, convertAceEventMention);
                }
            }
            arrayList.add(annotation2);
            i = i2;
            size = sentence.size();
            i2 = i + size;
        }
        return arrayList;
    }

    private EventMention convertAceEventMention(AceEventMention aceEventMention, String str, CoreMap coreMap, Map<String, EntityMention> map, int i) {
        Set<String> roles = aceEventMention.getRoles();
        ArrayList arrayList = new ArrayList();
        Iterator<String> it2 = roles.iterator();
        while (it2.hasNext()) {
            arrayList.add(it2.next());
        }
        ArrayList arrayList2 = new ArrayList();
        int i2 = Integer.MAX_VALUE;
        int i3 = Integer.MIN_VALUE;
        Iterator it3 = arrayList.iterator();
        while (it3.hasNext()) {
            AceEntityMention arg = aceEventMention.getArg((String) it3.next());
            EntityMention entityMention = map.get(arg.getId());
            if (entityMention == null) {
                this.logger.severe("READER ERROR: Failed to find event argument with id " + arg.getId());
                this.logger.severe("This happens because a few event mentions illegally span multiple sentences. Will ignore this mention.");
                return null;
            }
            arrayList2.add(entityMention);
            if (entityMention.getExtentTokenStart() < i2) {
                i2 = entityMention.getExtentTokenStart();
            }
            if (entityMention.getExtentTokenEnd() > i3) {
                i3 = entityMention.getExtentTokenEnd();
            }
        }
        AceCharSeq anchor = aceEventMention.getAnchor();
        return new EventMention(aceEventMention.getId(), coreMap, new Span(i2, i3), aceEventMention.getParent().getType(), aceEventMention.getParent().getSubtype(), new ExtractionObject(aceEventMention.getId() + "-anchor", coreMap, new Span(anchor.getTokenStart() - i, (anchor.getTokenEnd() + 1) - i), "ANCHOR", null), arrayList2, arrayList);
    }

    private RelationMention convertAceRelationMention(AceRelationMention aceRelationMention, String str, CoreMap coreMap, Map<String, EntityMention> map) {
        List<AceRelationMentionArgument> asList = Arrays.asList(aceRelationMention.getArgs());
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = Integer.MAX_VALUE;
        int i2 = Integer.MIN_VALUE;
        for (AceRelationMentionArgument aceRelationMentionArgument : asList) {
            EntityMention entityMention = map.get(aceRelationMentionArgument.getContent().getId());
            if (entityMention == null) {
                this.logger.severe("READER ERROR: Failed to find relation argument with id " + aceRelationMentionArgument.getContent().getId());
                this.logger.severe("This happens because a few relation mentions illegally span multiple sentences. Will ignore this mention.");
                return null;
            }
            arrayList.add(entityMention);
            arrayList2.add(aceRelationMentionArgument.getRole());
            if (entityMention.getExtentTokenStart() < i) {
                i = entityMention.getExtentTokenStart();
            }
            if (entityMention.getExtentTokenEnd() > i2) {
                i2 = entityMention.getExtentTokenEnd();
            }
        }
        if (arrayList2.size() == 2 && ((String) arrayList2.get(0)).equalsIgnoreCase("arg-1") && ((String) arrayList2.get(1)).equalsIgnoreCase("arg-2")) {
            return new RelationMention(aceRelationMention.getId(), coreMap, new Span(i, i2), aceRelationMention.getParent().getType(), aceRelationMention.getParent().getSubtype(), arrayList, null);
        }
        this.logger.severe("READER ERROR: Invalid succession of arguments in relation mention: " + arrayList2);
        this.logger.severe("ACE relations must have two arguments. Will ignore this mention.");
        return null;
    }

    private EntityMention convertAceEntityMention(AceEntityMention aceEntityMention, String str, CoreMap coreMap, int i) {
        AceCharSeq extent = aceEntityMention.getExtent();
        AceCharSeq head = aceEntityMention.getHead();
        int tokenStart = extent.getTokenStart() - i;
        int tokenEnd = (extent.getTokenEnd() - i) + 1;
        if (tokenStart < 0) {
            this.logger.severe("READER ERROR: Invalid extent start " + tokenStart + " for entity mention " + aceEntityMention.getId() + " in document " + str + " in sentence " + coreMap);
            this.logger.severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
            tokenStart = 0;
        }
        if (tokenEnd > ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).size()) {
            this.logger.severe("READER ERROR: Invalid extent end " + tokenEnd + " for entity mention " + aceEntityMention.getId() + " in document " + str + " in sentence " + coreMap);
            this.logger.severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
            tokenEnd = ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).size();
        }
        int tokenStart2 = head.getTokenStart() - i;
        int tokenEnd2 = (head.getTokenEnd() - i) + 1;
        if (tokenStart2 < 0) {
            this.logger.severe("READER ERROR: Invalid head start " + tokenStart2 + " for entity mention " + aceEntityMention.getId() + " in document " + str + " in sentence " + coreMap);
            this.logger.severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
            tokenStart2 = 0;
        }
        if (tokenEnd2 > ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).size()) {
            this.logger.severe("READER ERROR: Invalid head end " + tokenEnd2 + " for entity mention " + aceEntityMention.getId() + " in document " + str + " in sentence " + coreMap);
            this.logger.severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
            tokenEnd2 = ((List) coreMap.get(CoreAnnotations.TokensAnnotation.class)).size();
        }
        if (tokenStart2 < tokenStart) {
            tokenStart2 = tokenStart;
        }
        if (tokenEnd2 > tokenEnd) {
            tokenEnd2 = tokenEnd;
        }
        if ($assertionsDisabled || tokenStart2 < tokenEnd2) {
            return new EntityMention(aceEntityMention.getId(), coreMap, new Span(tokenStart, tokenEnd), new Span(tokenStart2, tokenEnd2), aceEntityMention.getParent().getType(), aceEntityMention.getParent().getSubtype(), aceEntityMention.getLdctype());
        }
        throw new AssertionError();
    }

    private EntityMention convertAceEntityMention(AceEntityMention aceEntityMention, String str, CoreMap coreMap, int i, String str2) {
        EntityMention convertAceEntityMention = convertAceEntityMention(aceEntityMention, str, coreMap, i);
        convertAceEntityMention.setCorefID(str2);
        return convertAceEntityMention;
    }

    public static void main(String[] strArr) throws IOException {
        AceReader aceReader = new AceReader(new StanfordCoreNLP(StringUtils.argsToProperties(strArr), false), false);
        aceReader.setLoggerLevel(Level.INFO);
        aceReader.parse("/scr/nlp/data/ACE2005/");
        System.err.println("done");
    }

    static {
        $assertionsDisabled = !AceReader.class.desiredAssertionStatus();
    }
}
