/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/
package org.broad.igv.feature;
//~--- non-JDK imports --------------------------------------------------------
import com.jidesoft.utils.SortedList;
import org.apache.log4j.Logger;
import org.broad.igv.Globals;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.feature.genome.GenomeManager;
import org.broad.igv.util.collections.MultiMap;
import htsjdk.tribble.Feature;
import java.util.*;
/**
* This is a placeholder class for a true "feature database" wrapper. Its purpose
* is to return a feature given a name. Used to support the "search" box.
*
* @author jrobinso
*/
public class FeatureDB {
private static Logger log = Logger.getLogger(FeatureDB.class);
/**
* Map for all features other than genes.
*/
//private static Map<String, NamedFeature> featureMap = new HashMap(10000);
private static Map<String, List<NamedFeature>> featureMap = Collections.synchronizedSortedMap(new TreeMap<String, List<NamedFeature>>());
private static final int MAX_DUPLICATE_COUNT = 20;
public static void addFeature(NamedFeature feature, Genome genome) {
final String name = feature.getName();
if (name != null && name.length() > 0 && !name.equals(".")) {
put(name, feature, genome);
}
if (feature instanceof IGVFeature) {
final IGVFeature igvFeature = (IGVFeature) feature;
final String id = igvFeature.getIdentifier();
if (id != null && id.length() > 0) {
put(id, feature, genome);
}
addByAttributes(igvFeature, genome);
List<Exon> exons = igvFeature.getExons();
if (exons != null) {
for (Exon exon : exons) {
addByAttributes(exon, genome);
}
}
}
}
private static void addByAttributes(IGVFeature igvFeature, Genome genome) {
MultiMap<String, String> attributes = igvFeature.getAttributes();
if (attributes != null) {
for (String value : attributes.values()) {
if (value.length() < 20) {
put(value, igvFeature, genome);
}
}
}
}
/**
* Add feature to the list of features associated with this name.
* Performs no data integrity checks
*
* @param name
* @param feature
* @param genome The genome which these features belong to. Used for checking chromosomes
* @return true if successfully added, false if not
*/
static boolean put(String name, NamedFeature feature, Genome genome) {
String key = name.toUpperCase();
if (!Globals.isHeadless()) {
Genome currentGenome = genome != null ? genome : GenomeManager.getInstance().getCurrentGenome();
if (currentGenome != null && currentGenome.getChromosome(feature.getChr()) == null) {
return false;
}
}
synchronized (featureMap) {
List<NamedFeature> currentList = featureMap.get(key);
if (currentList == null) {
List<NamedFeature> newList = new SortedList<NamedFeature>(
new ArrayList<NamedFeature>(), FeatureComparator.get(true));
boolean added = newList.add(feature);
if (added) {
featureMap.put(key, newList);
}
return added;
} else {
// Don't let list grow without bounds
if (currentList.size() > MAX_DUPLICATE_COUNT) {
return false;
}
return currentList.add(feature);
}
}
}
/*
String key = name.toUpperCase();
Genome currentGenome = GenomeManager.getInstance().getCurrentGenome();
if (currentGenome == null || currentGenome.getChromosome(feature.getChr()) != null) {
NamedFeature currentFeature = featureMap.get(key);
if (currentFeature == null) {
featureMap.put(key, feature);
} else {
// If there are multiple features, prefer the one that is NOT on a "random" chromosome.
// This is a hack, but an important one for the human assemblies
String featureChr = feature.getChr().toLowerCase();
String currentFeatureChr = currentFeature.getChr();
if (featureChr.contains("random") || featureChr.contains("chrun") || featureChr.contains("hap")) {
return;
} else if (currentFeatureChr.contains("random") || currentFeatureChr.contains("chrun") ||
currentFeatureChr.contains("hap")) {
featureMap.put(key, feature);
return;
}
// If there are multiple features, use or keep the longest one
int w1 = currentFeature.getEnd() - currentFeature.getStart();
int w2 = feature.getEnd() - feature.getStart();
if (w2 > w1) {
featureMap.put(key, feature);
}
}
}
*/
public static void addFeature(String name, NamedFeature feature, Genome genome) {
put(name.toUpperCase(), feature, genome);
}
private FeatureDB() {
}
public static void addFeatures(List<htsjdk.tribble.Feature> features, Genome genome) {
for (htsjdk.tribble.Feature feature : features) {
if (feature instanceof IGVFeature)
addFeature((IGVFeature) feature, genome);
}
}
public static void clearFeatures() {
featureMap.clear();
}
static int size() {
return featureMap.size();
}
/**
* Return a feature with the given name.
*/
public static NamedFeature getFeature(String name) {
String nm = name.trim().toUpperCase();
List<NamedFeature> features = featureMap.get(nm);
if (features != null) {
return features.get(0);
} else {
return null;
}
}
/**
* Get all features which match nm. Not necessarily
* an exact match. Current implementation will match anything
* for which name is at the beginning, including but not limited to
* exact matches.
* <p/>
* NOTE: "It is imperative that the user manually synchronize
* on [this sorted map] when iterating over any of its
* collection views, or the collections views of any of its
* subMap, headMap or tailMap views". See
* <a href="http://docs.oracle.com/javase/6/docs/api/java/util/Collections.html#synchronizedSortedMap%28java.util.SortedMap%29> here</a>
*
* @param name : Search string. Features which begin with this
* string will be found.
* @return
*/
static Map<String, List<NamedFeature>> getFeaturesMap(String name) {
String nm = name.trim().toUpperCase();
SortedMap<String, List<NamedFeature>> treeMap = (SortedMap) featureMap;
//Search is inclusive to first argument, exclusive to second
return treeMap.subMap(nm, nm + Character.MAX_VALUE);
}
/**
* Shortcut to getFeaturesList(name, limit, true)
*
* @param name
* @param limit
* @return
* @see #getFeaturesList(String, int, boolean)
*/
public static List<NamedFeature> getFeaturesList(String name, int limit) {
return getFeaturesList(name, limit, true);
}
/**
* Get a list of features which start with the provided name.
* Note that matches can be inexact
*
* @param name
* @param limit
* @param longestOnly Whether to take only the longest feature for each name
* @return
*/
public static List<NamedFeature> getFeaturesList(String name, int limit, boolean longestOnly) {
//Note: We are iterating over submap, this needs
//to be synchronized over the main map.
synchronized (featureMap) {
Map<String, List<NamedFeature>> resultMap = getFeaturesMap(name);
Set<String> names = resultMap.keySet();
Iterator<String> nameIter = names.iterator();
ArrayList<NamedFeature> features = new ArrayList<NamedFeature>((Math.min(limit, names.size())));
int ii = 0;
while (nameIter.hasNext() && ii < limit) {
List<NamedFeature> subFeats = resultMap.get(nameIter.next());
if (longestOnly) {
features.add(subFeats.get(0));
} else {
features.addAll(subFeats);
}
ii++;
}
return features;
}
}
/**
* Search for a feature with the given name, which has the specified aminoAcid
* at the specified (1-indexed) proteinPosition .
*
* @param name
* @param proteinPosition 1-indexed position along the feature in which the amino acid is found, in protein coordinates
* @param refAA String symbolizing the desired amino acid
* @param mutAA String symbolizing the mutated amino acid
* @param currentGenome
* @return Map from genome position to features found. Feature name
* must be exact, but there can be multiple features with the same name
*/
public static Map<Integer, BasicFeature> getMutationAA(String name, int proteinPosition, String refAA,
String mutAA, Genome currentGenome) {
String nm = name.toUpperCase();
if (!Globals.isHeadless() && currentGenome == null) {
currentGenome = GenomeManager.getInstance().getCurrentGenome();
}
Map<Integer, BasicFeature> results = new HashMap<Integer, BasicFeature>();
List<NamedFeature> possibles = featureMap.get(nm);
if (possibles != null) {
synchronized (featureMap) {
for (NamedFeature f : possibles) {
if (!(f instanceof BasicFeature)) {
continue;
}
BasicFeature bf = (BasicFeature) f;
Codon c = bf.getCodon(currentGenome, proteinPosition);
if (c == null) {
continue;
}
if (c.getAminoAcid().equalsByName(refAA)) {
Set<String> snps = AminoAcidManager.getInstance().getMappingSNPs(c.getSequence(),
AminoAcidManager.getAminoAcidByName(mutAA));
if (snps.size() >= 1) {
results.put(c.getGenomePositions()[0], bf);
}
}
}
}
}
return results;
}
/**
* Find features with a given name, which have refNT as the base pair at the specified position within the feature.
* refNT considered based on the read strand, so a negative strand feature with A at position 1 on the positive strand
* would be found only if refNT = T.
*
* @param name Feature name
* @param startPosition 1-based location within the feature
* @param refNT Nucleotide (A, G, C, T) of feature.
* @param currentGenome The genome in which to search
* @return
*/
public static Map<Integer, BasicFeature> getMutationNT(String name, int startPosition, String refNT, Genome currentGenome) {
String nm = name.toUpperCase();
if (!Globals.isHeadless() && currentGenome == null) {
currentGenome = GenomeManager.getInstance().getCurrentGenome();
}
Map<Integer, BasicFeature> results = new HashMap<Integer, BasicFeature>();
List<NamedFeature> possibles = featureMap.get(nm);
String tempNT;
String brefNT = refNT.toUpperCase();
if (possibles != null) {
synchronized (featureMap) {
for (NamedFeature f : possibles) {
if (!(f instanceof BasicFeature)) {
continue;
}
BasicFeature bf = (BasicFeature) f;
int genomePosition = bf.featureToGenomePosition(new int[]{startPosition - 1})[0];
if (genomePosition < 0) {
continue;
}
final byte[] nuclSequence = currentGenome.getSequence(bf.getChr(), genomePosition, genomePosition + 1);
if (nuclSequence == null) {
continue;
}
tempNT = new String(nuclSequence);
if (bf.getStrand() == Strand.NEGATIVE) {
tempNT = AminoAcidManager.getNucleotideComplement(tempNT);
}
if (tempNT.toUpperCase().equals(brefNT)) {
results.put(genomePosition, bf);
}
}
}
}
return results;
}
/**
* Doubleton class. Can sort forward or descending, at most 2 instances.
*/
private static class FeatureComparator implements Comparator<Feature> {
private boolean descending;
private static FeatureComparator ascending_instance;
private static FeatureComparator descending_instance;
public static FeatureComparator get(boolean descending) {
FeatureComparator instance;
if (descending) {
if (ascending_instance == null) {
ascending_instance = new FeatureComparator(descending);
}
instance = ascending_instance;
} else {
if (descending_instance == null) {
descending_instance = new FeatureComparator(descending);
}
instance = descending_instance;
}
return instance;
}
private FeatureComparator(boolean reverse) {
this.descending = reverse;
}
public int compare(Feature feat1, Feature feat2) {
// Prefer the shortest chromosome name. Longer names are most likely "weird"
// e.g. chr1_gl000191_random
int nameLen1 = feat1.getChr().length();
int nameLen2 = feat2.getChr().length();
if (nameLen1 != nameLen2) {
return nameLen1 - nameLen2;
}
int len1 = (feat1.getEnd() - feat1.getStart());
int len2 = (feat2.getEnd() - feat2.getStart());
int toRet;
if (!this.descending) {
toRet = len1 - len2;
} else {
toRet = len2 - len1;
}
return toRet;
}
}
}