Source Code of org.broad.igv.tdf.TDFDataSource

/*
 * Copyright (c) 2007-2012 The Broad Institute, Inc.
 * SOFTWARE COPYRIGHT NOTICE
 * This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
 *
 * This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
 *
 * This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
 * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
 */


/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package org.broad.igv.tdf;


import org.apache.log4j.Logger;
import org.broad.igv.Globals;
import org.broad.igv.PreferenceManager;
import org.broad.igv.data.BasicScore;
import org.broad.igv.data.CompositeScore;
import org.broad.igv.data.CoverageDataSource;
import org.broad.igv.data.NamedScore;
import org.broad.igv.feature.Chromosome;
import org.broad.igv.feature.LocusScore;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.track.TrackType;
import org.broad.igv.track.WindowFunction;
import org.broad.igv.ui.panel.FrameManager;
import org.broad.igv.util.collections.LRUCache;


import java.util.*;


/**
 * @author jrobinso
 */
public class TDFDataSource implements CoverageDataSource {


    private static Logger log = Logger.getLogger(TDFDataSource.class);


    TDFReader reader;
    int maxPrecomputedZoom = 6;
    private int trackNumber = 0;
    String trackName;
    LRUCache<String, List<LocusScore>> summaryScoreCache = new LRUCache(20);
    Genome genome;
    WindowFunction windowFunction = WindowFunction.mean;
    List<WindowFunction> availableFunctions;


    boolean normalizeCounts = false;
    int totalCount = 0;
    float normalizationFactor = 1.0f;
    private Map<String, String> chrNameMap = new HashMap();




    public TDFDataSource(TDFReader reader, int trackNumber, String trackName, Genome genome) {


        this.genome = genome;
        this.trackNumber = trackNumber;
        this.trackName = trackName;
        this.reader = reader;
        this.availableFunctions = reader.getWindowFunctions();


        TDFGroup rootGroup = reader.getGroup("/");
        try {
            maxPrecomputedZoom = Integer.parseInt(rootGroup.getAttribute("maxZoom"));
        } catch (Exception e) {
            log.error("Error reading attribute 'maxZoom'", e);
        }
        try {
            String dataGenome = rootGroup.getAttribute("genome");
            // TODO -- throw exception if data genome != current genome 
        } catch (Exception e) {
            log.error("Unknown genome " + rootGroup.getAttribute("genome"));
            throw new RuntimeException("Unknown genome " + rootGroup.getAttribute("genome"));
        }


        try {
            String totalCountString = rootGroup.getAttribute("totalCount");
            if (totalCountString != null) {
                totalCount = Integer.parseInt(totalCountString);
            }
        } catch (Exception e) {
            log.error("Error reading attribute 'totalCount'", e);
        }


        intChrMap();


        boolean normalizeCounts = PreferenceManager.getInstance().getAsBoolean(PreferenceManager.NORMALIZE_COVERAGE);
        setNormalize(normalizeCounts);




    }


    public void updateGenome(Genome genome) {
        this.genome = genome;
        chrNameMap.clear();
        intChrMap();
    }


    private void intChrMap() {
        // If we have a genome, build a reverse-lookup table for queries
        if (genome != null) {
            Set<String> chrNames = reader.getChromosomeNames();
            for (String chr : chrNames) {
                String igvChr = genome.getChromosomeAlias(chr);
                if (igvChr != null && !igvChr.equals(chr)) {
                    chrNameMap.put(igvChr, chr);
                }
            }
        }
    }


    public void setNormalize(boolean normalizeCounts) {
        setNormalizeCounts(normalizeCounts, 1.0e6f);
    }


    public boolean getNormalize() {
        return this.normalizeCounts;
    }


    public void setNormalizeCounts(boolean normalizeCounts, float scalingFactor) {
        this.normalizeCounts = normalizeCounts;
        if (normalizeCounts && totalCount > 0) {
            normalizationFactor = scalingFactor / totalCount;
        } else {
            normalizationFactor = 1;
        }


    }


    public String getPath() {
        return reader == null ? null : reader.getPath();
    }


    public String getTrackName() {
        return trackName;
    }


    public double getDataMax() {
        return reader.getUpperLimit() * normalizationFactor;
    }


    public double getDataMin() {
        return reader.getLowerLimit() * normalizationFactor;
    }


    private List<LocusScore> getCachedSummaryScores(String querySeq, int zoom, int tileNumber, double tileWidth) {


        String key = querySeq + "_" + zoom + "_" + tileNumber + "_" + windowFunction;


        List<LocusScore> scores = summaryScoreCache.get(key);
        if (scores == null) {


            int startLocation = (int) (tileNumber * tileWidth);
            int endLocation = (int) ((tileNumber + 1) * tileWidth);


            scores = getSummaryScores(querySeq, startLocation, endLocation, zoom);


            summaryScoreCache.put(key, scores);
        }


        return scores;


    }


    protected List<LocusScore> getSummaryScores(String querySeq, int startLocation, int endLocation, int zoom) {


        List<LocusScore> scores;


        if (zoom <= this.maxPrecomputedZoom) {
            // Window function == none => no windowing, so its not clear what to do.  For now use mean
            WindowFunction wf = (windowFunction == WindowFunction.none ? WindowFunction.mean : windowFunction);


            List<TDFTile> tiles = null;
            if (querySeq.equals(Globals.CHR_ALL) && !isChrOrderValid()) {
                TDFTile wgTile = reader.getWholeGenomeTile(genome, wf);
                tiles = Arrays.asList(wgTile);
            } else {
                TDFDataset ds = reader.getDataset(querySeq, zoom, wf);
                if (ds != null) {
                    tiles = ds.getTiles(startLocation, endLocation);
                }
            }


            scores = new ArrayList(1000);
            if (tiles != null && tiles.size() > 0) {
                for (TDFTile tile : tiles) {


                    if (tile.getSize() > 0) {
                        for (int i = 0; i < tile.getSize(); i++) {
                            float v = tile.getValue(trackNumber, i);
                            if (!Float.isNaN(v)) {
                                v *= normalizationFactor;
                                scores.add(new BasicScore(tile.getStartPosition(i), tile.getEndPosition(i), v));
                            }
                        }
                    }
                }
            }


        } else {


            int chrLength = getChrLength(querySeq);
            if (chrLength == 0) {
                return Collections.emptyList();
            }
            endLocation = Math.min(endLocation, chrLength);
            // By definition there are 2^z tiles per chromosome, and 700 bins per tile, where z is the zoom level.
            // By definition there are 2^z tiles per chromosome, and 700 bins per tile, where z is the zoom level.
            //int maxZoom = (int) (Math.log(chrLength / 700) / Globals.log2) + 1;
            //int z = Math.min(zReq, maxZoom);
            int nTiles = (int) Math.pow(2, zoom);
            double binSize = Math.max(1, (((double) chrLength) / nTiles) / 700);


            scores = computeSummaryScores(querySeq, startLocation, endLocation, binSize);
        }
        return scores;
    }




    public int getChrLength(String chr) {
        if (chr.equals(Globals.CHR_ALL)) {
            return (int) (genome.getNominalLength() / 1000);
        } else {
            Chromosome c = genome.getChromosome(chr);
            return c == null ? 0 : c.getLength();
        }
    }


    private List<LocusScore> computeSummaryScores(String chr, int startLocation, int endLocation, double scale) {


        List<LocusScore> scores = new ArrayList(1000);


        String dsName = "/" + chr + "/raw";


        TDFDataset rawDataset = reader.getDataset(dsName);
        if (rawDataset != null) {
            List<TDFTile> rawTiles = rawDataset.getTiles(startLocation, endLocation);
            if (rawTiles.size() > 0) {


                if (windowFunction == WindowFunction.none) {
                    for (TDFTile rawTile : rawTiles) {
                        // Tile of raw data
                        if (rawTile != null && rawTile.getSize() > 0) {


                            for (int i = 0; i < rawTile.getSize(); i++) {
                                int s = rawTile.getStartPosition(i);
                                int e = Math.max(s, rawTile.getEndPosition(i) - 1);


                                if (e < startLocation) {
                                    continue;
                                } else if (s > endLocation) {
                                    break;
                                }
                                float v = rawTile.getValue(trackNumber, i);
                                if (!Float.isNaN(v)) {
                                    v *= normalizationFactor;
                                }
                                scores.add(new BasicScore(s, e, v));
                            }
                        }
                    }




                } else {




                    Accumulator accumulator = new Accumulator(windowFunction, 5);
                    int accumulatedStart = -1;
                    int accumulatedEnd = -1;
                    int lastEndBin = 0;
                    for (TDFTile rawTile : rawTiles) {
                        // Tile of raw data
                        int size = rawTile.getSize();
                        if (rawTile != null && size > 0) {


                            int[] starts = rawTile.getStart();
                            int[] ends = rawTile.getEnd();
                            String[] features = rawTile.getNames();
                            float[] values = rawTile.getData(trackNumber);


                            // Loop through and bin scores for this interval.
                            for (int i = 0; i < size; i++) {


                                if (starts[i] >= endLocation) {
                                    break;  // We're beyond the end of the requested interval
                                }




                                int s = Math.max(startLocation, starts[i]);
                                int e = ends == null ? s + 1 : Math.min(endLocation, ends[i]);
                                float v = values[i] * normalizationFactor;


                                if (e < startLocation || Float.isNaN(v)) {
                                    continue;
                                }


                                String probeName = features == null ? null : features[i];


                                // Compute bin numbers, relative to start of this tile
                                int endBin = (int) ((e - startLocation) / scale);
                                int startBin = (int) ((s - startLocation) / scale);


                                // If this feature spans multiple bins, or extends beyond last end bin, record
                                if (endBin > lastEndBin || endBin > startBin) {
                                    if (accumulator.hasData()) {
                                        scores.add(getCompositeScore(accumulator, accumulatedStart, accumulatedEnd));
                                        accumulator = new Accumulator(windowFunction, 5);
                                    }
                                }


                                if (endBin > startBin) {
                                    scores.add(new NamedScore(s, e, v, probeName));
                                } else {
                                    if (!accumulator.hasData()) {
                                        accumulatedStart = s;
                                        accumulatedEnd = e;
                                        accumulator.add(e - s, v, probeName);
                                    }


                                }


                                lastEndBin = endBin;
                            }


                            // End of loop cleanup
                            if (accumulator.hasData()) {
                                scores.add(getCompositeScore(accumulator, accumulatedStart, accumulatedEnd));
                            }
                        }
                    }
                }
            }
        }
        return scores;
    }




    private LocusScore getCompositeScore(Accumulator accumulator, int accumulatedStart, int accumulatedEnd) {
        LocusScore ls;
        if (accumulator.getNpts() == 1) {
            ls = new NamedScore(accumulatedStart, accumulatedEnd, accumulator.getRepData()[0], accumulator.getRepProbes()[0]);
        } else {
            float value = accumulator.getValue();
            ls = new CompositeScore(accumulatedStart, accumulatedEnd, value, accumulator.getRepData(),
                    accumulator.getRepProbes(), windowFunction);
        }
        return ls;


    }


    public List<LocusScore> getSummaryScoresForRange(String chr, int startLocation, int endLocation, int zoom) {


        Chromosome chromosome = genome.getChromosome(chr);


        String tmp = chrNameMap.get(chr);
        String querySeq = tmp == null ? chr : tmp;


        // If we are in gene list view bypass caching.
        if (Globals.isHeadless() || FrameManager.isGeneListMode() || FrameManager.isExomeMode()) {
            return getSummaryScores(querySeq, startLocation, endLocation, zoom);
        } else {


            ArrayList scores = new ArrayList();


            // TODO -- this whole section could be computed once and stored,  it is only a function of the genome, chr, and zoom level.
            double tileWidth = 0;
            if (chr.equals(Globals.CHR_ALL)) {
                tileWidth = (genome.getNominalLength() / 1000.0);
            } else {
                if (chromosome != null) {
                    tileWidth = chromosome.getLength() / ((int) Math.pow(2.0, zoom));
                }
            }
            if (tileWidth == 0) {
                return null;
            }




            int startTile = (int) (startLocation / tileWidth);
            int endTile = (int) (endLocation / tileWidth);
            for (int t = startTile; t <= endTile; t++) {
                List<LocusScore> cachedScores = getCachedSummaryScores(querySeq, zoom, t, tileWidth);
                if (cachedScores != null) {
                    for (LocusScore s : cachedScores) {
                        if (s.getEnd() >= startLocation) {
                            scores.add(s);
                        } else if (s.getStart() > endLocation) {
                            break;
                        }
                    }
                }


            }


            return scores;
        }
    }




    public TrackType getTrackType() {
        return reader.getTrackType();
    }


    public void setWindowFunction(WindowFunction wf) {
        this.windowFunction = wf;
    }


    public boolean isLogNormalized() {
        //return false;
        return getDataMin() < 0;
    }


    public void refreshData(long timestamp) {
        // ignored
    }


    public WindowFunction getWindowFunction() {
        return windowFunction;
    }


    public Collection<WindowFunction> getAvailableWindowFunctions() {
        return availableFunctions;
    }


    @Override
    public void dispose() {


    }


    /**
     * We changed how chromosomes were sorted in v2.2
     * This had the unintended side effect of introducing
     * backwards incompatibility in CHR_ALL. Version 4+ should include
     * the chromosome names in Genome order (rather than file order); which we check against.
     *
     * @return Whether we believe the data stored for whole genome view is valid or not
     */
    boolean isChrOrderValid() {


        if (reader.getVersion() >= 4) {


            String chromosomeNames;
            List<String> fileChromos = null;
            //Extract the chromosome names. Not sure when that attribute was put in
            //If it's not in the file, give up
            try {
                TDFGroup rootGroup = reader.getGroup(TDFWriter.ROOT_GROUP);
                chromosomeNames = rootGroup.getAttribute(TDFWriter.CHROMOSOMES);
                fileChromos = new ArrayList<String>(Arrays.asList(chromosomeNames.split(",")));
            } catch (Exception e) {
                return false;
            }
            return checkChromoNameOrder(fileChromos, genome.getLongChromosomeNames());
        } else if (genome != null) {
            // The WELL_KNOWN_GENOMES have had stable chromosome order since initial release
            String genomeId = genome.getId();
            return WELL_KNOWN_GENOMES.contains(genomeId);
        } else {
            // Can't be sure
            return false;
        }


    }




    /**
     * Check the sort.  It is acceptable to have different #s of chromosomes in the two lists, but they must
     * start with the same value and be in the same order to the extent they overlap.   Less genome chrs than file
     * chrs => extra data at the end.  Less file chrs than genome chrs => we'll have white space on the end.  In
     * both cases the data that is shown will be valid.
     *
     * @param fileChromos
     * @param genomeChromos
     * @return
     * @see #isChrOrderValid()
     */
    static boolean checkChromoNameOrder(List<String> fileChromos, List<String> genomeChromos) {


        int chrCount = Math.min(fileChromos.size(), genomeChromos.size());
        for (int i = 0; i < chrCount; i++) {
            if (!fileChromos.get(i).equals(genomeChromos.get(i))) return false;
        }
        //If we get this far, the chromo order is good as far as we know
        return true;
    }




    private static HashSet<String> WELL_KNOWN_GENOMES = new HashSet<String>(Arrays.asList("hg18", "hg19", "mm8", "mm9"));
}
Source Code of org.broad.igv.tdf.TDFDataSource

Related Classes of org.broad.igv.tdf.TDFDataSource