/*******************************************************************************
 * Copyright 2016-2017 the original author or authors.
 *  
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package pro.parseq.GenomeExplorer;

import java.io.File;
import java.io.FilenameFilter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.FastaSequenceIndex;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import pro.parseq.GenomeExplorer.exceptions.ContigNotFoundException;
import pro.parseq.GenomeExplorer.exceptions.IllegalReferencesFolderPathException;
import pro.parseq.GenomeExplorer.exceptions.NoSuchReferenceException;

/**
 * Reference genome sequences explorer class
 * It wraps samtools functionality to provide it over your references
 * References should be located in separate folders inside the "parent" references folder
 * Each reference's folder should include 3 files:
 *     - reference fasta file
 *     - reference index file
 *     - reference dictionary file
 * 
 * @author Alexander Afanasyev <a href="mailto:aafanasyev@parseq.pro">aafanasyev@parseq.pro</a>
 */
public class Explorer {

	private static final Logger logger = LoggerFactory.getLogger(Explorer.class);

	// This is "parent" references folder
	private File referencesFolder;
	// References dictionary
	private Map<String, ReferenceSequenceFile> references;

	/** Filters reference fasta files */
	private static final FilenameFilter fastaFilter = new FilenameFilter() {

		public boolean accept(File dir, String name) {

			String lowercaseName = name.toLowerCase();
			int endIndex = lowercaseName.length();
			if (lowercaseName.endsWith(".gz")) {
				endIndex -= 3;
			}
			// Do not treat .gz extension as an reference extension if exists
			String filename = lowercaseName.substring(0, endIndex);

			if (ReferenceSequenceFileFactory.FASTA_EXTENSIONS
					.contains(filename.substring(filename.lastIndexOf(".")))) {
				return true;
			} else {
				return false;
			}
		}
	};

	/** Filters reference index files  */
	private static final FilenameFilter indexFilter = new FilenameFilter() {

		public boolean accept(File dir, String name) {
			String lowercaseName = name.toLowerCase();
			return lowercaseName.endsWith(".fai");
		}
	};

	/** Filters reference dictionary files */
	private static final FilenameFilter dictionaryFilter = new FilenameFilter() {

		public boolean accept(File dir, String name) {
			String lowercaseName = name.toLowerCase();
			return lowercaseName.endsWith(".dict");
		}
	};

	/**
	 * Instantiates Explorer class
	 * Finds all references inside the "parent" folder specified
	 * 
	 * @param referencesFolderPath "Parent" folder to search references in
	 * @throws IllegalReferencesFolderPathException If specified "parent" folder is not a directory
	 */
	public Explorer(String referencesFolderPath)
			throws IllegalReferencesFolderPathException {

		referencesFolder = new File(referencesFolderPath);
		if (!referencesFolder.isDirectory()) {
			throw new IllegalReferencesFolderPathException(referencesFolderPath);
		}
		references = new HashMap<String, ReferenceSequenceFile>();

		for (File referenceFolder: referencesFolder.listFiles()) {
			if (referenceFolder.isDirectory()) {

				logger.debug("Found {} reference.", referenceFolder.getName());

				File[] referenceFile = referenceFolder.listFiles(fastaFilter);
				File[] indexFile = referenceFolder.listFiles(indexFilter);
				File[] dictionaryFile = referenceFolder.listFiles(dictionaryFilter);

				if (referenceFile.length < 1) {
					logger.error("No reference file found for {}! Skip it.",
							referenceFolder.getName());
					continue;
				}
				if (indexFile.length < 1) {
					logger.error("No index file found for {}! Skip it.",
							referenceFolder.getName());
					continue;
				}
				if (dictionaryFile.length < 1) {
					logger.error("No dictionary file found for {}! Skip it.",
							referenceFolder.getName());
					continue;
				}

				ReferenceSequenceFile referenceSequenceFile =
						new IndexedFastaSequenceFile(referenceFile[0],
								new FastaSequenceIndex(indexFile[0]));

				references.put(referenceFolder.getName(), referenceSequenceFile);
			}
		}

		logger.debug("Explorer found {} references.", references.size());
	}

	/**
	 * Get available references
	 * 
	 * @return Set of available references inside <code>referencesFolder</code>
	 */
	public Set<String> getReferenceGenomesList() {
		return references.keySet();
	}

	/**
	 * Returns <code>true</code> if references dictionary
	 * contains mapping for specified reference name
	 * 
	 * @param reference Reference name to look for
	 * @return Whether <code>reference</code> is in the references dictionary
	 */
	public boolean hasReference(String reference) {
		return references.containsKey(reference);
	}

	/**
	 * Returns <code>true</code> if reference's contig list
	 * contains contig of specified name
	 * 
	 * @param reference Reference name to look in
	 * @param contig Contig name to look for
	 * @return Whether <code>contig</code> is in the <code>reference</code>'s contigs list
	 * @throws NoSuchReferenceException If there is no such reference name in the references dictionary
	 */
	public boolean hasContig(String reference, String contig)
			throws NoSuchReferenceException {

		if (references.containsKey(reference)) {

			for (SAMSequenceRecord record: references.get(reference)
					.getSequenceDictionary().getSequences()) {
				if (record.getSequenceName().equals(contig)) {
					return true;
				}
			}

			return false;
		} else {
			throw new NoSuchReferenceException(reference);
		}
	}

	/**
	 * Get reference's contig length
	 * 
	 * @param reference Reference genome name
	 * @param contig Contig id
	 * @return Reference's contig length 
	 * @throws NoSuchReferenceException If there is no such reference name in the references dictionary 
	 * @throws ContigNotFoundException If there is no reference's contig of name specified
	 */
	public long getContigLength(String reference, String contig)
			throws NoSuchReferenceException, ContigNotFoundException {

		if (references.containsKey(reference)) {

			for (SAMSequenceRecord record: references.get(reference)
					.getSequenceDictionary().getSequences()) {
				if (record.getSequenceName().equals(contig)) {
					return record.getSequenceLength();
				}
			}

			throw new ContigNotFoundException(reference, contig);
		} else {
			throw new NoSuchReferenceException(reference);
		}
	}

	/**
	 * Get references sequences for reference specified
	 * 
	 * @param reference To get sequences of
	 * @return Sequences list, or null if no such reference
	 */
	public List<SAMSequenceRecord> getReferenceSequencesList(String reference) {

		if (references.containsKey(reference)) {
			return references.get(reference).getSequenceDictionary().getSequences();
		} else {
			logger.error("No such reference {}!", reference);
			return null;
		}
	}

	/**
	 * Get contigs list for reference specified 
	 * 
	 * @param reference To get contigs of
	 * @return Contigs list, or null if no such reference
	 */
	public List<String> getReferenceContigsList(String reference) {

		if (references.containsKey(reference)) {

			List<String> contigs = new ArrayList<String>();
			for (SAMSequenceRecord record: references.get(reference)
					.getSequenceDictionary().getSequences()) {
				contigs.add(record.getSequenceName());
			}

			return contigs;
		} else {
			logger.error("No such reference {}!", reference);
			return null;
		}
	}

	/**
	 * Get reference genome sequence for parameters specified
	 * 
	 * @param reference To get sequence of
	 * @param contig To get sequence of
	 * @param start To get sequence from (1-based inclusive)
	 * @param stop To get sequence to (1-based inclusive)
	 * @param sequenceCase Preferred sequence case
	 * @return Reference genome sequence bases for parameters specified, of null if no such reference
	 */
	public String getReferenceSequence(String reference, String contig,
			long start, long stop, ReferenceSequenceCase sequenceCase) {

		if (references.containsKey(reference)) {

			String sequence = new String(references.get(reference)
					.getSubsequenceAt(contig, start, stop).getBases());

			switch (sequenceCase) {
			case LOWER:
				return sequence.toLowerCase();
			case UPPER:
				return sequence.toUpperCase();
			case ORIGINAL:
			default:
				return sequence;
			}
		} else {
			logger.error("No such reference {}!", reference);
			return null;
		}
	}
}
