package org.opencb.cellbase.app.transform;

import java.io.BufferedReader;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import javax.xml.bind.JAXBException;
import org.opencb.biodata.formats.protein.uniprot.UniProtParser;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.Entry;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.FeatureType;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.LocationType;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.OrganismNameType;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.PositionType;
import org.opencb.biodata.formats.protein.uniprot.v201504jaxb.Uniprot;
import org.opencb.cellbase.core.serializer.CellBaseSerializer;
import org.opencb.commons.utils.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opencb/cellbase/app/transform/ProteinParser.class */
public class ProteinParser extends CellBaseParser {
    private Path uniprotFilesDir;
    private Path interproFilePath;
    private String species;
    private Map<String, Entry> proteinMap;
    protected Logger logger;

    public ProteinParser(Path path, String str, CellBaseSerializer cellBaseSerializer) {
        this(path, null, str, cellBaseSerializer);
    }

    public ProteinParser(Path path, Path path2, String str, CellBaseSerializer cellBaseSerializer) {
        super(cellBaseSerializer);
        this.logger = LoggerFactory.getLogger(getClass());
        this.uniprotFilesDir = path;
        this.interproFilePath = path2;
        this.species = str;
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // org.opencb.cellbase.app.transform.CellBaseParser
    public void parse() throws IOException {
        if (this.uniprotFilesDir == null || !Files.exists(this.uniprotFilesDir, new LinkOption[0])) {
            throw new IOException("File '" + this.uniprotFilesDir + "' not valid");
        }
        this.proteinMap = new HashMap(30000);
        new UniProtParser();
        try {
            for (File file : this.uniprotFilesDir.toFile().listFiles(new FilenameFilter() { // from class: org.opencb.cellbase.app.transform.ProteinParser.1
                @Override // java.io.FilenameFilter
                public boolean accept(File file2, String str) {
                    return str.endsWith(".xml") || str.endsWith(".xml.gz");
                }
            })) {
                for (Entry entry : ((Uniprot) UniProtParser.loadXMLInfo(file.toString(), "org.opencb.biodata.formats.protein.uniprot.v201504jaxb")).getEntry()) {
                    Iterator it = entry.getOrganism().getName().iterator();
                    while (it.hasNext()) {
                        if (((OrganismNameType) it.next()).getValue().equals(this.species)) {
                            this.proteinMap.put(entry.getAccession().get(0), entry);
                        }
                    }
                }
            }
            this.logger.debug("Number of proteins stored in map: '{}'", Integer.valueOf(this.proteinMap.size()));
            if (this.interproFilePath != null && Files.exists(this.interproFilePath, new LinkOption[0])) {
                BufferedReader newBufferedReader = FileUtils.newBufferedReader(this.interproFilePath);
                HashSet hashSet = new HashSet(this.proteinMap.keySet());
                HashSet hashSet2 = new HashSet(30000);
                int i = 0;
                int i2 = 0;
                while (true) {
                    String readLine = newBufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String[] split = readLine.split("\t");
                    if (hashSet.contains(split[0])) {
                        boolean z = false;
                        BigInteger valueOf = BigInteger.valueOf(Integer.parseInt(split[4]));
                        BigInteger valueOf2 = BigInteger.valueOf(Integer.parseInt(split[5]));
                        Iterator it2 = this.proteinMap.get(split[0]).getFeature().iterator();
                        while (true) {
                            if (!it2.hasNext()) {
                                break;
                            }
                            FeatureType featureType = (FeatureType) it2.next();
                            if (featureType.getLocation() != null && featureType.getLocation().getBegin() != null && featureType.getLocation().getBegin().getPosition() != null && featureType.getLocation().getEnd().getPosition() != null && featureType.getLocation().getBegin().getPosition().equals(valueOf) && featureType.getLocation().getEnd().getPosition().equals(valueOf2)) {
                                featureType.setId(split[1]);
                                featureType.setRef(split[3]);
                                z = true;
                                break;
                            }
                        }
                        if (!z) {
                            FeatureType featureType2 = new FeatureType();
                            featureType2.setId(split[1]);
                            featureType2.setDescription(split[2]);
                            featureType2.setRef(split[3]);
                            LocationType locationType = new LocationType();
                            PositionType positionType = new PositionType();
                            positionType.setPosition(valueOf);
                            locationType.setBegin(positionType);
                            PositionType positionType2 = new PositionType();
                            positionType2.setPosition(valueOf2);
                            locationType.setEnd(positionType2);
                            featureType2.setLocation(locationType);
                            this.proteinMap.get(split[0]).getFeature().add(featureType2);
                        }
                        if (!hashSet2.contains(split[0])) {
                            hashSet2.add(split[0]);
                            i2++;
                        }
                    }
                    i++;
                    if (i % 10000000 == 0) {
                        this.logger.debug("{} InterPro lines processed. {} unique proteins processed", Integer.valueOf(i), Integer.valueOf(i2));
                    }
                }
                newBufferedReader.close();
            }
            Iterator<Entry> it3 = this.proteinMap.values().iterator();
            while (it3.hasNext()) {
                this.serializer.serialize(it3.next());
            }
        } catch (JAXBException e) {
            e.printStackTrace();
        }
    }
}
