package edu.jhu.hlt.concrete.ingesters.conll;

import edu.jhu.hlt.concrete.Communication;
import edu.jhu.hlt.concrete.ingesters.base.IngestException;
import edu.jhu.hlt.concrete.ingesters.base.stream.StreamBasedStreamIngester;
import edu.jhu.hlt.concrete.util.Timing;
import edu.jhu.hlt.tutils.PennTreeReader;
import edu.jhu.hlt.utilt.ex.LoggedUncaughtExceptionHandler;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:edu/jhu/hlt/concrete/ingesters/conll/Ontonotes5.class */
public class Ontonotes5 implements StreamBasedStreamIngester {
    private static final Logger LOGGER = LoggerFactory.getLogger(Ontonotes5.class);
    private static final Pattern ANNOTATIONS_PARSE_PATTERN = Pattern.compile(".*annotations/(\\S+).parse");
    private Conll2011 skels;
    private Map<String, PennTreeReader.Indexer> sentId2Parse;
    public boolean debug;
    public boolean showAllFileReads;

    public Ontonotes5(Conll2011 conll2011, Path path) throws IOException {
        this(conll2011, path, false);
    }

    public Ontonotes5(Conll2011 conll2011, Path path, boolean z) throws IOException {
        this.debug = false;
        this.showAllFileReads = false;
        if (!Files.isDirectory(path, new LinkOption[0])) {
            throw new IllegalArgumentException("not a directory: " + path);
        }
        this.debug = z;
        LOGGER.info("reading parse information from {}", path.toString());
        int i = 0;
        this.skels = conll2011;
        this.sentId2Parse = new HashMap();
        for (Path path2 : (List) Files.list(path).filter(path3 -> {
            return path3.endsWith(".parse");
        }).collect(Collectors.toList())) {
            Matcher matcher = ANNOTATIONS_PARSE_PATTERN.matcher(path2.toString());
            matcher.find();
            if (matcher.matches()) {
                i++;
                LOGGER.debug("pf={}", path2.toString());
                String group = matcher.group(1);
                List list = (List) Files.lines(path2).collect(Collectors.toList());
                StringBuilder sb = new StringBuilder();
                int i2 = 0;
                for (int i3 = 0; i3 < list.size(); i3++) {
                    String str = (String) list.get(i3);
                    if (str.isEmpty()) {
                        PennTreeReader.Node parse = PennTreeReader.parse(sb.toString());
                        int i4 = i2;
                        i2++;
                        String str2 = group + "/" + i4;
                        LOGGER.debug("id={}", str2);
                        if (this.sentId2Parse.put(str2, new PennTreeReader.Indexer(parse)) != null) {
                            throw new RuntimeException("id=" + str2);
                        }
                        sb = new StringBuilder();
                    } else {
                        sb.append(str.trim());
                    }
                }
            }
        }
        LOGGER.info("done, read in {} parses in {} documents", Integer.valueOf(this.sentId2Parse.size()), Integer.valueOf(i));
    }

    private void setWords(Conll2011Sentence conll2011Sentence) {
        String str = conll2011Sentence.getDocId() + "/" + conll2011Sentence.getIndex();
        LOGGER.debug("id={}", str);
        PennTreeReader.Indexer indexer = this.sentId2Parse.get(str);
        if (indexer == null) {
            throw new RuntimeException();
        }
        List leaves = indexer.getLeaves(false);
        if (leaves.size() == 0) {
            LOGGER.warn("bogus parse, skipping: doc=" + conll2011Sentence.getDocId() + " part=" + conll2011Sentence.getPart() + " sent=" + conll2011Sentence.getIndex() + " leaves.size=" + leaves.size() + " s.size=" + conll2011Sentence.size() + " notraces.size=" + indexer.getLeaves(false).size());
            return;
        }
        if (leaves.size() == conll2011Sentence.size()) {
            int size = leaves.size();
            for (int i = 0; i < size; i++) {
                conll2011Sentence.getWord(i).setWord(((PennTreeReader.Node) leaves.get(i)).getWord());
            }
            return;
        }
        Iterator<Conll2011Row> it = conll2011Sentence.getWords().iterator();
        while (it.hasNext()) {
            LOGGER.info(it.next().pos);
        }
        LOGGER.warn("root=" + indexer.getRoot().getTreeString());
        LOGGER.warn("leaves=" + leaves);
        System.err.flush();
        throw new RuntimeException("doc=" + conll2011Sentence.getDocId() + " part=" + conll2011Sentence.getPart() + " sent=" + conll2011Sentence.getIndex() + " leaves.size=" + leaves.size() + " s.size=" + conll2011Sentence.size() + " notraces.size=" + indexer.getLeaves(false).size());
    }

    public static void main(String[] strArr) {
        Thread.setDefaultUncaughtExceptionHandler(new LoggedUncaughtExceptionHandler());
        String str = strArr[0];
        String str2 = strArr[1];
        try {
            try {
                Iterator it = ((List) new Ontonotes5(new Conll2011(Paths.get(str2, new String[0]), path -> {
                    return path.toString().endsWith(".gold_skel");
                }), Paths.get(str, new String[0])).stream().collect(Collectors.toList())).iterator();
                while (it.hasNext()) {
                    LOGGER.info(((Communication) it.next()).toString());
                }
            } catch (IngestException e) {
                throw new RuntimeException((Throwable) e);
            }
        } catch (IOException e2) {
            throw new RuntimeException(e2);
        }
    }

    public String getKind() {
        return "document";
    }

    public long getTimestamp() {
        return Timing.currentLocalTime();
    }

    public String getTool() {
        return Ontonotes5.class.getName();
    }

    public String getToolVersion() {
        return "4.8.6";
    }

    public List<String> getToolNotes() {
        return new ArrayList();
    }

    public Stream<Communication> stream() throws IngestException {
        try {
            return this.skels.preIngest().map(stream -> {
                stream.forEach(conll2011Document -> {
                    conll2011Document.getSentences().stream().forEach(conll2011Sentence -> {
                        setWords(conll2011Sentence);
                    });
                });
                return stream;
            }).map(stream2 -> {
                return stream2.map(conll2011Document -> {
                    return conll2011Document.convertToConcrete();
                });
            }).map(stream3 -> {
                return (List) stream3.collect(Collectors.toList());
            }).map(Conll2011::mergeCommunicationsAsSections);
        } catch (IOException e) {
            throw new IngestException(e);
        }
    }
}
