Source Code of org.broad.igv.sam.AlignmentTileLoader$AlignmentTile$IndexableMap

/*
 * Copyright (c) 2007-2012 The Broad Institute, Inc.
 * SOFTWARE COPYRIGHT NOTICE
 * This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
 *
 * This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
 *
 * This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
 * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
 */


package org.broad.igv.sam;


import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
import org.broad.igv.Globals;
import org.broad.igv.PreferenceManager;
import org.broad.igv.feature.SpliceJunctionFeature;
import org.broad.igv.sam.reader.AlignmentReader;
import org.broad.igv.sam.reader.ReadGroupFilter;
import org.broad.igv.ui.IGV;
import org.broad.igv.ui.util.MessageUtils;
import org.broad.igv.ui.util.ProgressMonitor;
import org.broad.igv.util.ObjectCache;
import org.broad.igv.util.RuntimeUtils;


import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.*;


/**
 * A wrapper for an AlignmentQueryReader that caches query results
 *
 * @author jrobinso
 */
public class AlignmentTileLoader {


    private static Logger log = Logger.getLogger(AlignmentTileLoader.class);


    private static Set<WeakReference<AlignmentTileLoader>> activeLoaders = Collections.synchronizedSet(new HashSet());


    /**
     * Flag to mark a corrupt index.  Without this attempted reads will continue in an infinite loop
     */
    private boolean corruptIndex = false;


    private AlignmentReader reader;
    private boolean cancel = false;
    private boolean pairedEnd = false;


    static void cancelReaders() {
        for (WeakReference<AlignmentTileLoader> readerRef : activeLoaders) {
            AlignmentTileLoader reader = readerRef.get();
            if (reader != null) {
                reader.cancel = true;
            }
        }
        log.debug("Readers canceled");
        activeLoaders.clear();
    }




    public AlignmentTileLoader(AlignmentReader reader) {
        this.reader = reader;
        activeLoaders.add(new WeakReference<AlignmentTileLoader>(this));
    }


    public void close() throws IOException {
        reader.close();
    }


    public SAMFileHeader getFileHeader() {
        return this.reader.getFileHeader();
    }


    public List<String> getSequenceNames() {
        return reader.getSequenceNames();
    }


    public CloseableIterator<Alignment> iterator() {
        return reader.iterator();
    }


    public boolean hasIndex() {
        return reader.hasIndex();
    }




    AlignmentTile loadTile(String chr, int start, int end,
                           SpliceJunctionHelper spliceJunctionHelper,
                           AlignmentDataManager.DownsampleOptions downsampleOptions,
                           Map<String, PEStats> peStats,
                           AlignmentTrack.BisulfiteContext bisulfiteContext,
                           ProgressMonitor monitor) {


        AlignmentTile t = new AlignmentTile(start, end, spliceJunctionHelper, downsampleOptions, bisulfiteContext);




        //assert (tiles.size() > 0);
        if (corruptIndex) {
            return t;
        }


        final PreferenceManager prefMgr = PreferenceManager.getInstance();
        boolean filterFailedReads = prefMgr.getAsBoolean(PreferenceManager.SAM_FILTER_FAILED_READS);
        boolean filterSecondaryAlignments = prefMgr.getAsBoolean(PreferenceManager.SAM_FILTER_SECONDARY_ALIGNMENTS);
        boolean filterSupplementaryAlignments = prefMgr.getAsBoolean(PreferenceManager.SAM_FILTER_SUPPLEMENTARY_ALIGNMENTS);
        ReadGroupFilter filter = ReadGroupFilter.getFilter();
        boolean showDuplicates = prefMgr.getAsBoolean(PreferenceManager.SAM_SHOW_DUPLICATES);
        int qualityThreshold = prefMgr.getAsInt(PreferenceManager.SAM_QUALITY_THRESHOLD);


        CloseableIterator<Alignment> iter = null;


        //log.debug("Loading : " + start + " - " + end);
        int alignmentCount = 0;
        WeakReference<AlignmentTileLoader> ref = new WeakReference(this);
        try {
            ObjectCache<String, Alignment> mappedMates = new ObjectCache<String, Alignment>(1000);
            ObjectCache<String, Alignment> unmappedMates = new ObjectCache<String, Alignment>(1000);




            activeLoaders.add(ref);
            iter = reader.query(chr, start, end, false);


            while (iter != null && iter.hasNext()) {


                if (cancel) {
                    return t;
                }


                Alignment record = iter.next();


                // Set mate sequence of unmapped mates
                // Put a limit on the total size of this collection.
                String readName = record.getReadName();
                if (record.isPaired()) {
                    pairedEnd = true;
                    if (record.isMapped()) {
                        if (!record.getMate().isMapped()) {
                            // record is mapped, mate is not
                            Alignment mate = unmappedMates.get(readName);
                            if (mate == null) {
                                mappedMates.put(readName, record);
                            } else {
                                record.setMateSequence(mate.getReadSequence());
                                unmappedMates.remove(readName);
                                mappedMates.remove(readName);
                            }


                        }
                    } else if (record.getMate().isMapped()) {
                        // record not mapped, mate is
                        Alignment mappedMate = mappedMates.get(readName);
                        if (mappedMate == null) {
                            unmappedMates.put(readName, record);
                        } else {
                            mappedMate.setMateSequence(record.getReadSequence());
                            unmappedMates.remove(readName);
                            mappedMates.remove(readName);
                        }
                    }
                }




                if (!record.isMapped() || (!showDuplicates && record.isDuplicate()) ||
                        (filterFailedReads && record.isVendorFailedRead()) ||
                        (filterSecondaryAlignments && !record.isPrimary()) ||
                        (filterSupplementaryAlignments && record.isSupplementary()) ||
                        record.getMappingQuality() < qualityThreshold ||
                        (filter != null && filter.filterAlignment(record))) {
                    continue;
                }


                t.addRecord(record);


                alignmentCount++;
                int interval = Globals.isTesting() ? 100000 : 1000;
                if (alignmentCount % interval == 0) {
                    if (cancel) return null;
                    String msg = "Reads loaded: " + alignmentCount;
                    MessageUtils.setStatusBarMessage(msg);
                    if (monitor != null) {
                        monitor.updateStatus(msg);
                    }
                    if (memoryTooLow()) {
                        if (monitor != null) monitor.fireProgressChange(100);
                        cancelReaders();
                        t.finish();
                        return t;        // <=  TODO need to cancel all readers
                    }
                }


                // Update pe stats
                if (peStats != null && record.isPaired() && record.isProperPair()) {
                    String lb = record.getLibrary();
                    if (lb == null) lb = "null";
                    PEStats stats = peStats.get(lb);
                    if (stats == null) {
                        stats = new PEStats(lb);
                        peStats.put(lb, stats);
                    }
                    stats.update(record);


                }
            }
            // End iteration over alignments


            // Compute peStats
            if (peStats != null) {
                // TODO -- something smarter re the percentiles.  For small samples these will revert to min and max
                double minPercentile = prefMgr.getAsFloat(PreferenceManager.SAM_MIN_INSERT_SIZE_PERCENTILE);
                double maxPercentile = prefMgr.getAsFloat(PreferenceManager.SAM_MAX_INSERT_SIZE_PERCENTILE);
                for (PEStats stats : peStats.values()) {
                    stats.compute(minPercentile, maxPercentile);
                }
            }


            // Clean up any remaining unmapped mate sequences
            for (String mappedMateName : mappedMates.getKeys()) {
                Alignment mappedMate = mappedMates.get(mappedMateName);
                if (mappedMate != null) {
                    Alignment mate = unmappedMates.get(mappedMate.getReadName());
                    if (mate != null) {
                        mappedMate.setMateSequence(mate.getReadSequence());
                    }
                }
            }
            t.finish();




        } catch (java.nio.BufferUnderflowException e) {
            // This almost always indicates a corrupt BAM index, or less frequently a corrupt bam file
            corruptIndex = true;
            MessageUtils.showMessage("<html>Error encountered querying alignments: " + e.toString() +
                    "<br>This is often caused by a corrupt index file.");


        } catch (Exception e) {
            log.error("Error loading alignment data", e);
            MessageUtils.showMessage("<html>Error encountered querying alignments: " + e.toString());
        } finally {
            // reset cancel flag.  It doesn't matter how we got here,  the read is complete and this flag is reset
            // for the next time
            cancel = false;
            activeLoaders.remove(ref);


            if (monitor != null) {
                monitor.fireProgressChange(100);
            }


            if (iter != null) {
                iter.close();
            }
            if (!Globals.isHeadless()) {
                IGV.getInstance().resetStatusMessage();
            }
        }


        return t;


    }




    private static synchronized boolean memoryTooLow() {
        if (RuntimeUtils.getAvailableMemoryFraction() < 0.2) {
            System.gc();
            if (RuntimeUtils.getAvailableMemoryFraction() < 0.2) {
                String msg = "Memory is low, reading terminating.";
                MessageUtils.showMessage(msg);
                return true;
            }


        }
        return false;
    }




    /**
     * Does this file contain paired end data?  Assume not until proven otherwise.
     */
    public boolean isPairedEnd() {
        return pairedEnd;
    }


    public Set<String> getPlatforms() {
        return reader.getPlatforms();
    }


    /**
     * Caches alignments, coverage, splice junctions, and downsampled intervals
     */


    public static class AlignmentTile {


        private boolean loaded = false;
        private int end;
        private int start;
        private AlignmentCounts counts;
        private List<Alignment> alignments;
        private List<DownsampledInterval> downsampledIntervals;
        private SpliceJunctionHelper spliceJunctionHelper;


        private static final Random RAND = new Random();


        private boolean downsample;
        private int samplingWindowSize;
        private int samplingDepth;


        private int currentSamplingWindowStart = -1;
        private int curEffSamplingWindowDepth = 0;
        //Although not strictly necessary, we keep track of the currentDownsampledInterval
        //for easy incrementing
        private DownsampledInterval currentDownsampledInterval;


        /**
         * We keep a data structure of alignments which can efficiently
         * look up by string (read name) or index (for random replacement)
         */
        IndexableMap<String, Alignment> imAlignments;


        private int downsampledCount = 0;
        private int offset = 0;


        AlignmentTile(int start, int end,
                      SpliceJunctionHelper spliceJunctionHelper,
                      AlignmentDataManager.DownsampleOptions downsampleOptions,
                      AlignmentTrack.BisulfiteContext bisulfiteContext) {
            this.start = start;
            this.end = end;
            this.downsampledIntervals = new ArrayList<DownsampledInterval>();
            long seed = System.currentTimeMillis();
            //System.out.println("seed: " + seed);
            RAND.setSeed(seed);


            // Use a sparse array for large regions  (> 10 mb)
            if ((end - start) > 10000000) {
                this.counts = new SparseAlignmentCounts(start, end, bisulfiteContext);
            } else {
                this.counts = new DenseAlignmentCounts(start, end, bisulfiteContext);
            }


            // Set the max depth, and the max depth of the sampling bucket.
            if (downsampleOptions == null) {
                // Use default settings (from preferences)
                downsampleOptions = new AlignmentDataManager.DownsampleOptions();
            }
            this.downsample = downsampleOptions.isDownsample();
            this.samplingWindowSize = downsampleOptions.getSampleWindowSize();
            this.samplingDepth = Math.max(1, downsampleOptions.getMaxReadCount());


            this.spliceJunctionHelper = spliceJunctionHelper;


            if (this.downsample) {
                imAlignments = new IndexableMap<String, Alignment>(8000);
            } else {
                alignments = new ArrayList<Alignment>(16000);
            }
        }


        public int getStart() {
            return start;
        }


        public void setStart(int start) {
            this.start = start;
        }


        int ignoredCount = 0;    // <= just for debugging


        /**
         * Add an alignment record to this tile.  This record is not necessarily retained after down-sampling.
         *
         * @param alignment
         */
        public void addRecord(Alignment alignment) {


            counts.incCounts(alignment);


            if (spliceJunctionHelper != null) {
                spliceJunctionHelper.addAlignment(alignment);
            }


            if (downsample) {
                final int alignmentStart = alignment.getAlignmentStart();
                int currentSamplingBucketEnd = currentSamplingWindowStart + samplingWindowSize;
                if (currentSamplingWindowStart < 0 || alignmentStart >= currentSamplingBucketEnd) {
                    setCurrentSamplingBucket(alignmentStart);
                }


                attemptAddRecordDownsampled(alignment);


            } else {
                alignments.add(alignment);
            }


            alignment.finish();
        }


        /**
         * Attempt to add this alignment. The alignment is definitely added if there is another
         * read with the same name. Typically this other read is a mate pair, but it could also be a secondary alignment
         * or chimeric read.
         * If we haven't seen another read like that, the record is added with some probability according to
         * reservoir sampling
         *
         * @param alignment
         */
        private void attemptAddRecordDownsampled(Alignment alignment) {
            String readName = alignment.getReadName();
            //A simple way to turn off the same-readName-checking is to replace the read name with a random string
            //so that there are no repeats
            //readName = String.format("%s%d", readName, RAND.nextInt());


            //There are 3 possibilities: other-kept, other-rejected, other-unknown (haven't seen)
            //If we kept or rejected the another read with the same name, we do the same for this one
            boolean hasRead = imAlignments.containsKey(readName);
            if (hasRead) {
                List<Alignment> mateAlignments = imAlignments.get(readName);
                boolean haveOther = mateAlignments != null;
                if (haveOther) {
                    //We keep the alignment if others have been kept
                    imAlignments.append(readName, alignment);
                } else {
                    currentDownsampledInterval.incCount();
                }
            } else {
                if (curEffSamplingWindowDepth < samplingDepth) {
                    imAlignments.append(readName, alignment);
                    curEffSamplingWindowDepth++;
                } else {
                    double samplingProb = ((double) samplingDepth) / (samplingDepth + downsampledCount + 1);
                    if (RAND.nextDouble() < samplingProb) {
                        int rndInt = (int) (RAND.nextDouble() * (samplingDepth - 1));
                        int idx = offset + rndInt;
                        // Replace random record with this one
                        List<Alignment> removedValues = imAlignments.replace(idx, readName, alignment);
                        incrementDownsampledIntervals(removedValues);
                    } else {
                        //Mark that record was not kept
                        imAlignments.markNull(readName);
                        currentDownsampledInterval.incCount();
                    }
                    downsampledCount++;
                }
            }
        }


        private void setCurrentSamplingBucket(int alignmentStart) {
            curEffSamplingWindowDepth = 0;
            downsampledCount = 0;
            currentSamplingWindowStart = alignmentStart;
            offset = imAlignments.size();


            int currentSamplingBucketEnd = currentSamplingWindowStart + samplingWindowSize;
            currentDownsampledInterval = new DownsampledInterval(alignmentStart, currentSamplingBucketEnd, 0);
            downsampledIntervals.add(currentDownsampledInterval);
        }


        private void incrementDownsampledIntervals(List<Alignment> removedValues) {
            if (removedValues == null) return;
            for (Alignment al : removedValues) {
                DownsampledInterval interval = findDownsampledInterval(al);
                if (interval != null) interval.incCount();
            }
        }


        private DownsampledInterval findDownsampledInterval(Alignment al) {
            return findDownsampledInterval(al, 0, downsampledIntervals.size());
        }


        /**
         * Attempt to find appropriate DownsampledInterval by recursive binary search
         *
         * @param al
         * @param startInd Start search index, 0-based inclusive
         * @param endInd   End search index, 0-based exclusive
         * @return
         */
        private DownsampledInterval findDownsampledInterval(Alignment al, int startInd, int endInd) {
            //Length-0 search space
            if (startInd == endInd) {
                return null;
            }
            final int midInd = (startInd + endInd) / 2;
            final DownsampledInterval curInterval = downsampledIntervals.get(midInd);
            if (al.getStart() >= curInterval.getStart() && al.getStart() < curInterval.getEnd()) {
                //Found
                return curInterval;
            }


            if (al.getStart() >= curInterval.getEnd()) {
                startInd = midInd + 1;
            } else {
                endInd = midInd;
            }
            return findDownsampledInterval(al, startInd, endInd);
        }


        /**
         * Sort the alignments by start position, and filter {@code downsampledIntervals}.
         * This will have the same results as if no downsampling occurred, although will incur
         * extra computational cost
         */
        private void sortFilterDownsampled() {
            if ((this.alignments == null || this.alignments.size() == 0) && this.downsample) {
                this.alignments = imAlignments.getAllValues();
                imAlignments.clear();
            }


            Comparator<Alignment> alignmentSorter = new Comparator<Alignment>() {
                public int compare(Alignment alignment, Alignment alignment1) {
                    return alignment.getStart() - alignment1.getStart();
                }
            };
            Collections.sort(this.alignments, alignmentSorter);


            //Only keep the intervals for which count > 0
            List<DownsampledInterval> tmp = new ArrayList<DownsampledInterval>(this.downsampledIntervals.size());
            for (DownsampledInterval interval : this.downsampledIntervals) {
                if (interval.getCount() > 0) {
                    tmp.add(interval);
                }
            }
            this.downsampledIntervals = tmp;
        }


        public List<Alignment> getAlignments() {


            if(alignments == null) {
                finish();   // TODO -- I'm not sure this should ever happen
            }
            return alignments;
        }


        public List<DownsampledInterval> getDownsampledIntervals() {
            return downsampledIntervals;
        }


        public void finish() {
            //If we downsampled,  we need to sort
            if (downsample) {
                sortFilterDownsampled();
            }
            finalizeSpliceJunctions();
            counts.finish();
        }


        public AlignmentCounts getCounts() {
            return counts;
        }




        private void finalizeSpliceJunctions() {
            if (spliceJunctionHelper != null) {
                spliceJunctionHelper.finish();
            }
        }


        public List<SpliceJunctionFeature> getSpliceJunctionFeatures() {
            if (spliceJunctionHelper == null) return null;
            return spliceJunctionHelper.getFilteredJunctions();
        }


        public SpliceJunctionHelper getSpliceJunctionHelper() {
            return spliceJunctionHelper;
        }


        /**
         * Map-like structure designed to be accessible both by key, and by numeric index
         * Multiple values are stored for each key, and a list is returned
         * If the value for a key is set as null, nothing can be added
         * <p/>
         * Intended to support downsampling, where if a read name is added and then removed
         * we don't want to add the read pair
         *
         * @param <K>
         * @param <V>
         */
        private class IndexableMap<K, V> {
            private HashMap<K, List<V>> map;
            private List<K> list;


            IndexableMap(int size) {
                this.map = new HashMap<K, List<V>>(size);
                this.list = new ArrayList<K>(size);
            }


            public List<V> get(K key) {
                return map.get(key);
            }


            /**
             * Append a value for the specified key, unless
             * the current value is null. If the current value is
             * null, it's a no-op.
             *
             * @param key
             * @param value
             * @return Whether the element was added
             */
            public boolean append(K key, V value) {
                if (!map.containsKey(key)) {
                    addNewValueToMap(key, value);
                    return list.add(key);
                } else {
                    List<V> curList = map.get(key);
                    if (curList == null) return false;
                    return curList.add(value);
                }
            }


            public List<V> markNull(K key) {
                return map.put(key, null);
            }


            private void addNewValueToMap(K key, V value) {
                List<V> curList = new ArrayList<V>(2);
                curList.add(value);
                map.put(key, curList);
            }


            /**
             * Place the specified {@code key} and {@code value} in the map,
             * at index {@code index}.
             * <p/>
             * In the unlikely event that {@code key} is already
             * at {@code index}, {@code value} will be appended
             *
             * @param index
             * @param key
             * @param value
             * @return Whether the replacement actually happened
             */
            public List<V> replace(int index, K key, V value) {
                checkSize(index);
                K oldKey = list.get(index);
                if (!oldKey.equals(key)) {
                    //Remove the old key from map, and make sure nothing else gets put there
                    List<V> oldValue = markNull(oldKey);
                    addNewValueToMap(key, value);
                    list.set(index, key);
                    return oldValue;
                } else {
                    append(key, value);
                    return null;
                }
            }


            public int size() {
                return list.size();
            }


            private void checkSize(int index) {
                if (index >= size()) {
                    throw new IllegalArgumentException("index " + index + " greater than current size" + size());
                }
            }


            public List<V> getAllValues() {
                List<V> allValues = new ArrayList<V>(2 * size());
                for (K k : list) {
                    allValues.addAll(map.get(k));
                }
                return allValues;
            }


            public boolean containsKey(K key) {
                return map.containsKey(key);
            }


            public void clear() {
                map.clear();
                list.clear();
            }
        }


    }


}
Source Code of org.broad.igv.sam.AlignmentTileLoader$AlignmentTile$IndexableMap

Related Classes of org.broad.igv.sam.AlignmentTileLoader$AlignmentTile$IndexableMap