package org.intermine.bio.dataconversion;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.apache.tools.ant.BuildException;
import org.biojava.nbio.core.exceptions.ParserException;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AmbiguityDNACompoundSet;
import org.biojava.nbio.core.sequence.io.DNASequenceCreator;
import org.biojava.nbio.core.sequence.io.FastaReader;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
import org.biojava.nbio.core.sequence.io.PlainFastaHeaderParser;
import org.biojava.nbio.core.sequence.template.Sequence;
import org.intermine.bio.util.OrganismData;
import org.intermine.bio.util.OrganismRepository;
import org.intermine.metadata.Util;
import org.intermine.model.InterMineObject;
import org.intermine.model.bio.BioEntity;
import org.intermine.model.bio.DataSet;
import org.intermine.model.bio.DataSource;
import org.intermine.model.bio.Organism;
import org.intermine.objectstore.ObjectStoreException;
import org.intermine.objectstore.query.PendingClob;
import org.intermine.task.FileDirectDataLoaderTask;

/* loaded from: input_file:org/intermine/bio/dataconversion/FastaLoaderTask.class */
public class FastaLoaderTask extends FileDirectDataLoaderTask {
    private static final Logger LOG = Logger.getLogger(FastaLoaderTask.class);

    /* renamed from: org, reason: collision with root package name */
    private Organism f0org;
    private String className;
    private String dataSetTitle;
    private String sequenceType = "dna";
    private String classAttribute = "primaryIdentifier";
    private int storeCount = 0;
    private String dataSourceName = null;
    private DataSource dataSource = null;
    private String fastaTaxonId = null;
    private Map<String, String> taxonIds = new HashMap();
    private String idSuffix = "";
    private File[] files = null;
    private String licence = null;
    private Map<String, DataSet> dataSets = new HashMap();

    public void setTaxonId(String str) {
        this.fastaTaxonId = str;
        parseTaxonIds();
    }

    public void setSequenceType(String str) {
        if ("${fasta.sequenceType}".equals(str)) {
            this.sequenceType = "dna";
        } else {
            this.sequenceType = str;
        }
    }

    public void setIdSuffix(String str) {
        this.idSuffix = str;
    }

    public void setClassName(String str) {
        this.className = str;
    }

    public String getClassName() {
        return this.className;
    }

    public void setClassAttribute(String str) {
        this.classAttribute = str;
    }

    public void setDataSourceName(String str) {
        this.dataSourceName = str;
    }

    public void setDataSetTitle(String str) {
        this.dataSetTitle = str;
    }

    public void setLicence(String str) {
        this.licence = str;
    }

    protected void setFileArray(File[] fileArr) {
        this.files = fileArr;
    }

    public void process() {
        long currentTimeMillis = System.currentTimeMillis();
        try {
            this.storeCount++;
            super.process();
            getIntegrationWriter().commitTransaction();
            getIntegrationWriter().beginTransaction();
            getDirectDataLoader().close();
            long currentTimeMillis2 = System.currentTimeMillis();
            LOG.info("Finished dataloading " + this.storeCount + " objects at " + ((60000 * this.storeCount) / (currentTimeMillis2 - currentTimeMillis)) + " objects per minute (" + (currentTimeMillis2 - currentTimeMillis) + " ms total) for source " + this.sourceName);
        } catch (ObjectStoreException e) {
            throw new BuildException("failed to store object", e);
        }
    }

    public void close() throws ObjectStoreException {
        getDirectDataLoader().close();
    }

    public void execute() {
        if (getProject() != null) {
            configureDynamicAttributes(this);
        }
        if (this.fastaTaxonId == null) {
            throw new RuntimeException("fastaTaxonId needs to be set");
        }
        if (this.className == null) {
            throw new RuntimeException("className needs to be set");
        }
        if (this.files == null) {
            super.execute();
            return;
        }
        for (int i = 0; i < this.files.length; i++) {
            processFile(this.files[i]);
        }
    }

    public void processFile(File file) {
        try {
            System.err.println("reading " + this.sequenceType + " sequence from: " + file);
            LOG.debug("FastaLoaderTask loading file " + file.getName());
            if ("dna".equalsIgnoreCase(this.sequenceType)) {
                Iterator it = new FastaReader(file, new PlainFastaHeaderParser(), new DNASequenceCreator(AmbiguityDNACompoundSet.getDNACompoundSet())).process().entrySet().iterator();
                while (it.hasNext()) {
                    Sequence sequence = (Sequence) ((Map.Entry) it.next()).getValue();
                    processSequence(getOrganism(sequence), sequence);
                }
            } else {
                Iterator it2 = FastaReaderHelper.readFastaProteinSequence(file).entrySet().iterator();
                while (it2.hasNext()) {
                    Sequence sequence2 = (Sequence) ((Map.Entry) it2.next()).getValue();
                    processSequence(getOrganism((ProteinSequence) sequence2), sequence2);
                }
            }
        } catch (FileNotFoundException e) {
            throw new BuildException("problem reading file - file not found: " + file, e);
        } catch (IOException e2) {
            throw new BuildException("error while closing FileReader for: " + file, e2);
        } catch (NoSuchElementException e3) {
            throw new BuildException("no fasta sequences in: " + file, e3);
        } catch (ObjectStoreException e4) {
            throw new BuildException("ObjectStore problem while processing: " + file, e4);
        } catch (ParserException e5) {
            throw new BuildException("sequence not in fasta format or wrong alphabet for: " + file, e5);
        }
    }

    protected Organism getOrganism(Sequence sequence) throws ObjectStoreException {
        if (this.f0org == null) {
            this.f0org = (Organism) getDirectDataLoader().createObject(Organism.class);
            this.f0org.setTaxonId(this.fastaTaxonId);
            getDirectDataLoader().store(this.f0org);
        }
        return this.f0org;
    }

    private void processSequence(Organism organism, Sequence sequence) throws ObjectStoreException {
        if (organism == null) {
            return;
        }
        org.intermine.model.bio.Sequence sequence2 = (org.intermine.model.bio.Sequence) getDirectDataLoader().createObject(org.intermine.model.bio.Sequence.class);
        String sequenceAsString = sequence.getSequenceAsString();
        String md5checksum = Util.getMd5checksum(sequenceAsString);
        sequence2.setResidues(new PendingClob(sequenceAsString));
        sequence2.setLength(sequence.getLength());
        sequence2.setMd5checksum(md5checksum);
        try {
            Class<?> cls = Class.forName(this.className);
            if (!InterMineObject.class.isAssignableFrom(cls)) {
                throw new RuntimeException("Feature className must be a valid class in the model that inherits from InterMineObject, but was: " + this.className);
            }
            BioEntity bioEntity = (BioEntity) getDirectDataLoader().createObject(cls);
            String identifier = getIdentifier(sequence);
            try {
                bioEntity.setFieldValue(this.classAttribute, identifier);
                try {
                    bioEntity.setFieldValue("sequence", sequence2);
                    bioEntity.setOrganism(organism);
                    try {
                        bioEntity.setFieldValue("length", new Integer(sequence2.getLength()));
                        try {
                            bioEntity.setFieldValue("md5checksum", md5checksum);
                        } catch (Exception e) {
                        }
                        extraProcessing(sequence, sequence2, bioEntity, organism, getDataSet());
                        if (StringUtils.isEmpty(this.dataSetTitle)) {
                            throw new RuntimeException("DataSet title (fasta.dataSetTitle) not set");
                        }
                        bioEntity.addDataSets(getDataSet());
                        try {
                            getDirectDataLoader().store(sequence2);
                            getDirectDataLoader().store(bioEntity);
                            this.storeCount += 2;
                        } catch (ObjectStoreException e2) {
                            throw new BuildException("store failed", e2);
                        }
                    } catch (Exception e3) {
                        throw new IllegalArgumentException("Error setting: " + this.className + ".length to: " + sequence2.getLength() + ". Does the attribute exist?");
                    }
                } catch (Exception e4) {
                    throw new IllegalArgumentException("Error setting: " + this.className + ".sequence to: " + identifier + ". Does the attribute exist?");
                }
            } catch (Exception e5) {
                throw new IllegalArgumentException("Error setting: " + this.className + "." + this.classAttribute + " to: " + identifier + ". Does the attribute exist?");
            }
        } catch (ClassNotFoundException e6) {
            throw new RuntimeException("unknown class: " + this.className + " while creating new Sequence object");
        }
    }

    public DataSet getDataSet() throws ObjectStoreException {
        if (this.dataSets.containsKey(this.dataSetTitle)) {
            return this.dataSets.get(this.dataSetTitle);
        }
        DataSet dataSet = (DataSet) getDirectDataLoader().createObject(DataSet.class);
        dataSet.setName(this.dataSetTitle);
        if (this.licence != null) {
            dataSet.setLicence(this.licence);
        }
        if (this.dataSourceName != null) {
            dataSet.setDataSource(getDataSource());
        }
        getDirectDataLoader().store(dataSet);
        this.dataSets.put(this.dataSetTitle, dataSet);
        return dataSet;
    }

    protected void extraProcessing(Sequence sequence, org.intermine.model.bio.Sequence sequence2, BioEntity bioEntity, Organism organism, DataSet dataSet) throws ObjectStoreException {
    }

    protected String getIdentifier(Sequence sequence) {
        String str = sequence.getAccession().getID() + this.idSuffix;
        if (str.contains(" ")) {
            str = str.split(" ")[0];
        }
        if (str.contains("|")) {
            String[] split = str.split("\\|");
            if (split.length < 2) {
                return null;
            }
            str = split[1];
        }
        return str;
    }

    private DataSource getDataSource() throws ObjectStoreException {
        if (StringUtils.isEmpty(this.dataSourceName)) {
            throw new RuntimeException("dataSourceName not set");
        }
        if (this.dataSource == null) {
            this.dataSource = (DataSource) getDirectDataLoader().createObject(DataSource.class);
            this.dataSource.setName(this.dataSourceName);
            getDirectDataLoader().store(this.dataSource);
            this.storeCount++;
        }
        return this.dataSource;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getTaxonId(String str) {
        return this.taxonIds.get(str);
    }

    private void parseTaxonIds() {
        OrganismRepository organismRepository = OrganismRepository.getOrganismRepository();
        for (String str : this.fastaTaxonId.split(" ")) {
            OrganismData organismDataByTaxonInternal = organismRepository.getOrganismDataByTaxonInternal(str);
            this.taxonIds.put(organismDataByTaxonInternal.getGenus() + " " + organismDataByTaxonInternal.getSpecies(), str);
        }
    }

    protected Organism getOrganism(ProteinSequence proteinSequence) throws ObjectStoreException {
        if (this.f0org == null) {
            this.f0org = (Organism) getDirectDataLoader().createObject(Organism.class);
            this.f0org.setTaxonId(this.fastaTaxonId);
            getDirectDataLoader().store(this.f0org);
        }
        return this.f0org;
    }
}
