/*
* Copyright (c) 2007-2012 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/
package org.broad.igv.track;
import org.broad.igv.Globals;
import org.broad.igv.data.AbstractDataSource;
import org.broad.igv.data.DataSource;
import org.broad.igv.data.DataTile;
import org.broad.igv.feature.BasicFeature;
import org.broad.igv.feature.*;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.feature.tribble.*;
import org.broad.igv.tdf.TDFDataSource;
import org.broad.igv.tdf.TDFReader;
import org.broad.igv.ui.IGV;
import org.broad.igv.ui.panel.ReferenceFrame;
import org.broad.igv.ui.util.IndexCreatorDialog;
import org.broad.igv.util.FileUtils;
import org.broad.igv.util.ParsingUtils;
import org.broad.igv.util.ResourceLocator;
import org.broad.igv.util.RuntimeUtils;
import org.broad.igv.util.collections.CollUtils;
import org.broad.igv.variant.VariantTrack;
import htsjdk.tribble.*;
import htsjdk.tribble.index.Index;
import java.io.File;
import java.io.IOException;
import java.util.*;
/**
* @author jrobinso
* @date Jun 27, 2010
*/
abstract public class TribbleFeatureSource implements org.broad.igv.track.FeatureSource {
IGVFeatureReader reader;
DataSource coverageSource;
boolean isVCF;
Genome genome;
/**
* Map of IGV chromosome name -> source name
*/
Map<String, String> chrNameMap = new HashMap<String, String>();
private int featureWindowSize;
Object header;
Class featureClass;
public static TribbleFeatureSource getFeatureSource(ResourceLocator locator, Genome genome) throws IOException, TribbleIndexNotFoundException {
return getFeatureSource(locator, genome, true);
}
public static TribbleFeatureSource getFeatureSource(ResourceLocator locator, Genome genome, boolean useCache) throws IOException, TribbleIndexNotFoundException {
FeatureCodec codec = CodecFactory.getCodec(locator, genome);
String idxPath = ResourceLocator.indexFile(locator);
boolean indexExists = FileUtils.resourceExists(idxPath);
// Optionally let the user create an index.
final int tenMB = 10000000;
final int oneGB = 1000000000;
long size = FileUtils.getLength(locator.getPath());
final boolean indexRequired =
(VariantTrack.isVCF(locator.getTypeString()) && size > tenMB) || size > oneGB;
if (!Globals.isHeadless() && locator.isLocal() && !locator.getPath().endsWith(".gz") && !indexExists) {
if (size > tenMB) {
createIndex(locator, indexRequired); // Note, might return null.
}
}
//We make sure to require and index if one exists, so it gets loaded
//TODO Temporary, shouldn't be necessary pending a tribble update
AbstractFeatureReader basicReader = AbstractFeatureReader.getFeatureReader(locator.getPath(), idxPath, codec, indexRequired || indexExists);
if (basicReader.hasIndex()) {
return new IndexedFeatureSource(basicReader, codec, locator, genome, useCache);
} else {
return new NonIndexedFeatureSource(basicReader, codec, locator, genome);
}
}
/**
* Present a dialog for the user to create an index. This method can return null if the user cancels, or there
* is an error while creating the index.
*
* @param locator
* @param indexRequired
* @return
*/
private static Index createIndex(ResourceLocator locator, boolean indexRequired) {
File baseFile = new File(locator.getPath());
File newIdxFile = new File(locator.getPath() + ".idx");
String messageText = "An index file for " + baseFile.getAbsolutePath() + " could not " +
"be located. An index is " + (indexRequired ? "required" : "recommended") +
" to view files of this size. Click \"Go\" to create one now.";
IndexCreatorDialog dialog = IndexCreatorDialog.createShowDialog(IGV.getMainFrame(), baseFile, newIdxFile, messageText);
return (Index) dialog.getIndex();
}
private TribbleFeatureSource(ResourceLocator locator, AbstractFeatureReader reader, FeatureCodec codec, Genome genome, boolean useCache) throws IOException {
this.genome = genome;
this.isVCF = codec.getClass() == VCFWrapperCodec.class;
this.featureClass = codec.getFeatureType();
this.header = reader.getHeader();
this.featureWindowSize = estimateFeatureWindowSize(reader);
this.reader = useCache ?
new CachingFeatureReader(reader, 5, featureWindowSize) :
new TribbleReaderWrapper(reader);
initCoverageSource(locator.getPath() + ".tdf");
}
protected abstract int estimateFeatureWindowSize(FeatureReader reader);
protected abstract Collection<String> getSequenceNames();
public abstract boolean isIndexed();
public Class getFeatureClass() {
return featureClass;
}
public int getFeatureWindowSize() {
return featureWindowSize;
}
public void setFeatureWindowSize(int size) {
this.featureWindowSize = size;
if (reader instanceof CachingFeatureReader) {
((CachingFeatureReader) reader).setBinSize(size);
}
}
public Object getHeader() {
return header;
}
private void initCoverageSource(String covPath) {
if (ParsingUtils.pathExists(covPath)) {
TDFReader reader = TDFReader.getReader(covPath);
coverageSource = new TDFDataSource(reader, 0, "", genome);
}
}
static class IndexedFeatureSource extends TribbleFeatureSource {
private IndexedFeatureSource(AbstractFeatureReader basicReader, FeatureCodec codec, ResourceLocator locator,
Genome genome, boolean useCache) throws IOException {
super(locator, basicReader, codec, genome, useCache);
if (genome != null) {
Collection<String> seqNames = reader.getSequenceNames();
if (seqNames != null) {
for (String seqName : seqNames) {
String igvChr = genome.getChromosomeAlias(seqName);
if (igvChr != null && !igvChr.equals(seqName)) {
chrNameMap.put(igvChr, seqName);
}
}
}
}
}
@Override
public boolean isIndexed() {
return true;
}
@Override
public Iterator<Feature> getFeatures(String chr, int start, int end) throws IOException {
String seqName = chrNameMap.get(chr);
if (seqName == null) seqName = chr;
return reader.query(seqName, start, end);
}
/**
* Return coverage values overlapping the query interval. At this time Tribble sources do not provide
* coverage values
*
* @param chr
* @param start
* @param end
* @param zoom
* @return
*/
@Override
public List<LocusScore> getCoverageScores(String chr, int start, int end, int zoom) {
return coverageSource == null ? null :
coverageSource.getSummaryScoresForRange(chr, start, end, zoom);
}
@Override
protected Collection<String> getSequenceNames() {
return reader.getSequenceNames();
}
/**
* Estimate an appropriate feature window size.
*
* @param reader
*/
@Override
protected int estimateFeatureWindowSize(FeatureReader reader) {
// Simple formula for VCF. Appropriate for human 1KG/dbSNp, probably overly conservative otherwise
if(isVCF) {
return 10000;
}
else {
}
CloseableTribbleIterator<htsjdk.tribble.Feature> iter = null;
try {
double mem = RuntimeUtils.getAvailableMemory();
iter = reader.iterator();
if (iter.hasNext()) {
int nSamples = 1000;
htsjdk.tribble.Feature firstFeature = iter.next();
htsjdk.tribble.Feature lastFeature = firstFeature;
String chr = firstFeature.getChr();
int n = 1;
long len = 0;
while (iter.hasNext() && n < nSamples) {
htsjdk.tribble.Feature f = iter.next();
if (f != null) {
n++;
if (f.getChr().equals(chr)) {
lastFeature = f;
} else {
len += lastFeature.getEnd() - firstFeature.getStart() + 1;
firstFeature = f;
lastFeature = f;
chr = f.getChr();
}
}
}
double dMem = mem - RuntimeUtils.getAvailableMemory();
double bytesPerFeature = Math.max(100, dMem / n);
len += lastFeature.getEnd() - firstFeature.getStart() + 1;
double featuresPerBase = ((double) n) / len;
double targetBinMemory = 20000000; // 20 mega bytes
int maxBinSize = Integer.MAX_VALUE;
int bs = Math.min(maxBinSize, (int) (targetBinMemory / (bytesPerFeature * featuresPerBase)));
return Math.max(1000000, bs);
} else {
return Integer.MAX_VALUE;
}
} catch (IOException e) {
return 1000000;
} finally {
if (iter != null) iter.close();
}
}
}
static class NonIndexedFeatureSource extends TribbleFeatureSource {
/**
* Map containing all features. Used only when there is no index.
*/
Map<String, List<Feature>> featureMap;
CoverageDataSource coverageData;
private NonIndexedFeatureSource(AbstractFeatureReader basicReader, FeatureCodec codec, ResourceLocator locator, Genome genome) throws IOException {
super(locator, basicReader, codec, genome, false);
featureMap = new HashMap<String, List<Feature>>(25);
Iterator<Feature> iter = reader.iterator();
while (iter.hasNext()) {
Feature f = iter.next();
if (f == null) continue;
String seqName = f.getChr();
String igvChr = genome == null ? seqName : genome.getChromosomeAlias(seqName);
List<Feature> featureList = featureMap.get(igvChr);
if (featureList == null) {
featureList = new ArrayList();
featureMap.put(igvChr, featureList);
}
featureList.add(f);
if (f instanceof NamedFeature) FeatureDB.addFeature((NamedFeature) f, genome);
}
for (List<Feature> featureList : featureMap.values()) {
FeatureUtils.sortFeatureList(featureList);
}
if (genome != null) {
coverageData = new CoverageDataSource(genome);
coverageData.computeGenomeCoverage();
sampleGenomeFeatures();
}
}
@Override
public boolean isIndexed() {
return false;
}
@Override
public List<LocusScore> getCoverageScores(String chr, int start, int end, int zoom) {
return coverageData == null ? Collections.<LocusScore>emptyList() :
coverageData.getSummaryScoresForRange(chr, start, end, zoom);
}
@Override
public Iterator getFeatures(String chr, int start, int end) throws IOException {
List<Feature> features = featureMap.get(chr);
if (features == null) {
return Collections.<Feature>emptyList().iterator();
}
List<Feature> filteredFeatures = CollUtils.filter(features, FeatureUtils.getOverlapPredicate(chr, start, end));
return filteredFeatures.iterator();
}
@Override
protected Collection<String> getSequenceNames() {
return featureMap.keySet();
}
@Override
protected int estimateFeatureWindowSize(FeatureReader reader) {
return 0;
}
protected void sampleGenomeFeatures() {
List<Feature> chrAllFeatures = new ArrayList(1000);
int sampleLength = (int) ((double) genome.getNominalLength() / (1000 * 700));
int lastFeaturePosition = -1;
for (String chr : genome.getLongChromosomeNames()) {
List<Feature> features = featureMap.get(chr);
if (features != null) {
long offset = genome.getCumulativeOffset(chr);
for (Feature feature : features) {
if (feature instanceof IGVFeature) {
IGVFeature f = (IGVFeature) feature;
int genStart = (int) ((offset + f.getStart()) / 1000);
int genEnd = (int) ((offset + f.getEnd()) / 1000);
if (genEnd > lastFeaturePosition + sampleLength) {
BasicFeature f2 = new BasicFeature(Globals.CHR_ALL, genStart, genEnd);
if (f instanceof BasicFeature) {
BasicFeature bf = (BasicFeature) f;
f2.setThickEnd((int) ((offset + bf.getThickEnd()) / 1000));
f2.setThickStart((int) ((offset + bf.getThickStart()) / 1000));
f2.setName(f.getName());
}
chrAllFeatures.add(f2);
lastFeaturePosition = genEnd;
}
}
}
}
}
featureMap.put(Globals.CHR_ALL, chrAllFeatures);
}
class CoverageDataSource extends AbstractDataSource {
int windowSize = 1000;
double dataMin = 0;
double dataMax = 0;
Map<String, DataTile> dataCache = new HashMap();
CoverageDataSource(Genome genome) {
super(genome);
}
protected DataTile getRawData(String chr, int startLocation, int endLocation) {
DataTile coverageData = dataCache.get(chr);
if (coverageData == null) {
coverageData = computeCoverage(chr, startLocation, endLocation);
dataCache.put(chr, coverageData);
}
return coverageData;
}
@Override
protected List<LocusScore> getPrecomputedSummaryScores(String chr, int startLocation, int endLocation, int zoom) {
return null; //To change body of implemented methods use File | Settings | File Templates.
}
@Override
public int getLongestFeature(String chr) {
return windowSize;
}
public double getDataMax() {
return dataMax;
}
public double getDataMin() {
return dataMin;
}
public TrackType getTrackType() {
return TrackType.OTHER; //To change body of implemented methods use File | Settings | File Templates.
}
// This won't work for large track!
private DataTile computeCoverage(String chr, int start, int end) {
int nBins = (end - start) / windowSize + 1;
int[] starts = new int[nBins];
int[] ends = new int[nBins];
for (int i = 0; i < nBins; i++) {
starts[i] = start + i * windowSize;
ends[i] = starts[i] + windowSize;
}
float[] values = new float[nBins];
List<Feature> features = featureMap.get(chr);
if (features != null) {
for (Feature f : features) {
int startBin = f.getStart() / windowSize;
int endBin = f.getEnd() / windowSize;
for (int i = startBin; i < endBin; i++) {
values[i] = values[i] + 1;
dataMax = Math.max(dataMax, values[i]);
}
}
}
return new DataTile(starts, ends, values, null);
}
protected void computeGenomeCoverage() {
int nBins = 1000;
int[] starts = new int[nBins];
int[] ends = new int[nBins];
float[] values = new float[nBins];
Arrays.fill(values, 0);
double step = ((double) genome.getNominalLength() / 1000) / nBins;
for (int i = 0; i < nBins; i++) {
starts[i] = (int) (i * step);
ends[i] = (int) ((i + 1) * step);
}
for (String chr : genome.getLongChromosomeNames()) {
List<Feature> features = featureMap.get(chr);
if (features != null) {
long offset = genome.getCumulativeOffset(chr);
for (Feature f : features) {
int genStart = (int) ((offset + f.getStart()) / 1000);
int genEnd = (int) ((offset + f.getEnd()) / 1000);
int binStart = Math.min(values.length - 1, (int) (genStart / step));
int binEnd = Math.min(values.length - 1, (int) (genEnd / step));
for (int i = binStart; i <= binEnd; i++) {
values[i] = values[i] + 1;
dataMax = Math.max(dataMax, values[i]);
}
}
}
}
dataCache.put(Globals.CHR_ALL, new DataTile(starts, ends, values, null));
}
public String getValueString(String chr, double position, ReferenceFrame frame) {
int zoom = Math.max(0, frame.getZoom());
List<LocusScore> scores = getSummaryScoresForRange(chr, (int) position - 10, (int) position + 10, zoom);
// give a 2 pixel window, otherwise very narrow features will be missed.
double bpPerPixel = frame.getScale();
int minWidth = (int) (2 * bpPerPixel); /* * */
if (scores == null) {
return "";
} else {
LocusScore score = (LocusScore) FeatureUtils.getFeatureAt(position, minWidth, scores);
return score == null ? "" : "Mean count: " + score.getScore();
}
}
}
}
}