Source Code of org.broad.igv.sam.AlignmentPacker$DenseBucketCollection

/*
 * Copyright (c) 2007-2012 The Broad Institute, Inc.
 * SOFTWARE COPYRIGHT NOTICE
 * This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
 *
 * This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
 *
 * This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
 * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
 */


/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package org.broad.igv.sam;


import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;
import org.apache.log4j.Logger;
import org.broad.igv.feature.Range;
import org.broad.igv.feature.Strand;


import java.util.*;


/**
 * Packs alignments such that there is no overlap
 *
 * @author jrobinso
 */
public class AlignmentPacker {


    private static final Logger log = Logger.getLogger(AlignmentPacker.class);


    /**
     * Minimum gap between the end of one alignment and start of another.
     */
    public static final int MIN_ALIGNMENT_SPACING = 5;
    private static final Comparator<Alignment> lengthComparator = new Comparator<Alignment>() {
        public int compare(Alignment row1, Alignment row2) {
            return (row2.getEnd() - row2.getStart()) -
                    (row1.getEnd() - row2.getStart());


        }
    };


    private static final String NULL_GROUP_VALUE = "Because google-guava tables don't support a null key, we use a special value" +
            " for null keys. It doesn't matter much what it is, but we want to avoid collisions. I find it unlikely that " +
            " this sentence will ever be used as a group value";
    public static final int tenMB = 10000000;


    /**
     * Allocates each alignment to row such that there is no overlap.
     */
    public PackedAlignments packAlignments(
            AlignmentInterval interval,
            AlignmentTrack.RenderOptions renderOptions) {


        if (renderOptions == null) renderOptions = new AlignmentTrack.RenderOptions();


        LinkedHashMap<String, List<Row>> packedAlignments = new LinkedHashMap<String, List<Row>>();


        boolean isPairedAlignments = renderOptions.isViewPairs() || renderOptions.isPairedArcView();




        if (renderOptions.groupByOption == null) {
            List<Row> alignmentRows = new ArrayList<Row>(10000);
            pack(interval.getAlignments(), isPairedAlignments, alignmentRows);
            packedAlignments.put("", alignmentRows);
        } else {


            // Separate alignments into groups.
            Map<String, List<Alignment>> groupedAlignments = new HashMap<String, List<Alignment>>();
            Iterator<Alignment> iter = interval.getAlignmentIterator();
            while (iter.hasNext()) {
                Alignment alignment = iter.next();
                String groupKey = getGroupValue(alignment, renderOptions);
                if (groupKey == null) {
                    groupKey = NULL_GROUP_VALUE;
                }
                List<Alignment> groupList = groupedAlignments.get(groupKey);
                if (groupList == null) {
                    groupList = new ArrayList<Alignment>(1000);
                    groupedAlignments.put(groupKey, groupList);
                }
                groupList.add(alignment);
            }




            // Now alphabetize (sort) and packGroup the groups
            List<String> keys = new ArrayList<String>(groupedAlignments.keySet());
            Comparator<String> groupComparator = getGroupComparator(renderOptions.groupByOption);
            Collections.sort(keys, groupComparator);


            for (String key : keys) {
                if(key.equals(NULL_GROUP_VALUE)) continue;
                List<Row> alignmentRows = new ArrayList<Row>(10000);
                List<Alignment> group = groupedAlignments.get(key);
                pack(group, isPairedAlignments, alignmentRows);
                packedAlignments.put(key, alignmentRows);
            }


            //Put null valued group at end
            List<Row> alignmentRows = new ArrayList<Row>(10000);
            List<Alignment> group = groupedAlignments.get(NULL_GROUP_VALUE);
            pack(group, isPairedAlignments, alignmentRows);
            packedAlignments.put("", alignmentRows);
        }


        List<AlignmentInterval> tmp = new ArrayList<AlignmentInterval>();
        tmp.add(interval);
        return new PackedAlignments(tmp, packedAlignments);
    }




    private void pack(List<Alignment> alList, boolean pairAlignments, List<Row> alignmentRows) {


        Map<String, PairedAlignment> pairs = null;
        if (pairAlignments) {
            pairs = new HashMap<String, PairedAlignment>(1000);
        }


        // Allocate alignemnts to buckets for each range.
        // We use priority queues to keep the buckets sorted by alignment length.  However this  is probably a needless
        // complication,  any collection type would do.


        int totalCount = 0;




        if (alList == null || alList.size() == 0) return;


        Range curRange = getAlignmentListRange(alList);


        BucketCollection bucketCollection;


        // Use dense buckets for < 10,000,000 bp windows sparse otherwise
        int bpLength = curRange.getLength();


        if (bpLength < tenMB) {
            bucketCollection = new DenseBucketCollection(bpLength, curRange);
        } else {
            bucketCollection = new SparseBucketCollection(curRange);
        }




        int curRangeStart = curRange.getStart();
        for (Alignment al : alList) {


            if (al.isMapped()) {
                Alignment alignment = al;
                if (pairAlignments && al.isPaired() && al.getMate().isMapped() && al.getMate().getChr().equals(al.getChr())) {
                    String readName = al.getReadName();
                    PairedAlignment pair = pairs.get(readName);
                    if (pair == null) {
                        pair = new PairedAlignment(al);
                        pairs.put(readName, pair);
                        alignment = pair;
                    } else {
                        // Add second alignment to pair.
                        pair.setSecondAlignment(al);
                        pairs.remove(readName);
                        continue;


                    }
                }


                // Negative "bucketNumbers" can arise with soft clips at the left edge of the chromosome. Allocate
                // these alignments to the first bucket.
                int bucketNumber = Math.max(0, al.getStart() - curRangeStart);
                if (bucketNumber < bucketCollection.getBucketCount()) {
                    PriorityQueue<Alignment> bucket = bucketCollection.get(bucketNumber);
                    if (bucket == null) {
                        bucket = new PriorityQueue<Alignment>(5, lengthComparator);
                        bucketCollection.set(bucketNumber, bucket);
                    }
                    bucket.add(alignment);
                    totalCount++;
                } else {
                    log.debug("Alignment out of bounds. name: " + alignment.getReadName() + " startPos:" + alignment.getStart());
                }
            }
        }
        bucketCollection.finishedAdding();




        // Now allocate alignments to rows.
        long t0 = System.currentTimeMillis();
        int allocatedCount = 0;
        Row currentRow = new Row();


        while (allocatedCount < totalCount) {




            curRange = bucketCollection.getRange();


            curRangeStart = curRange.getStart();
            int nextStart = curRangeStart;
            List<Integer> emptyBuckets = new ArrayList<Integer>(100);


            while (true) {
                int bucketNumber = nextStart - curRangeStart;
                PriorityQueue<Alignment> bucket = bucketCollection.getNextBucket(bucketNumber, emptyBuckets);


                // Pull the next alignment out of the bucket and add to the current row
                if (bucket != null) {
                    Alignment alignment = bucket.remove();
                    currentRow.addAlignment(alignment);
                    allocatedCount++;


                    nextStart = alignment.getEnd() + MIN_ALIGNMENT_SPACING;


                }


                //Reached the end of this range, move to the next
                if (bucket == null || nextStart > curRange.getEnd()) {
                    //Remove empty buckets.  This has no affect on the dense implementation,
                    //they are removed on the fly, but is needed for the sparse implementation
                    bucketCollection.removeBuckets(emptyBuckets);
                    emptyBuckets.clear();
                    break;
                }
            }




            // We've reached the end of the interval,  start a new row
            if (currentRow.alignments.size() > 0) {
                alignmentRows.add(currentRow);
            }
            currentRow = new Row();
        }
        if (log.isDebugEnabled()) {
            long dt = System.currentTimeMillis() - t0;
            log.debug("Packed alignments in " + dt);
        }


        // Add the last row
        if (currentRow != null && currentRow.alignments.size() > 0) {
            alignmentRows.add(currentRow);
        }




    }




    /**
     * Gets the range over which alignmentsList spans. Asssumes all on same chr, and sorted
     *
     * @param alignmentsList
     * @return
     */
    private Range getAlignmentListRange(List<Alignment> alignmentsList) {
        if (alignmentsList == null || alignmentsList.size() == 0) return null;
        Alignment firstAlignment = alignmentsList.get(0);


        int minStart = firstAlignment.getStart();
        int maxEnd = firstAlignment.getEnd();
        for (Alignment alignment : alignmentsList) {
            maxEnd = Math.max(maxEnd, alignment.getEnd());
        }
        return new Range(firstAlignment.getChr(), minStart,
                maxEnd);
    }


    private Comparator<String> getGroupComparator(AlignmentTrack.GroupOption groupByOption) {
        switch (groupByOption) {
            case PAIR_ORIENTATION:
                return new PairOrientationComparator();
            default:
                //Sort null values towards the end
                return new Comparator<String>() {
                    @Override
                    public int compare(String o1, String o2) {
                        if (o1 != null) {
                            return o1.compareToIgnoreCase(o2);
                        } else if (o2 != null) {
                            return o2.compareToIgnoreCase(o1);
                        } else {
                            //Both null;
                            return 0;
                        }


                    }
                };
        }
    }


    private String getGroupValue(Alignment al, AlignmentTrack.RenderOptions renderOptions) {


        AlignmentTrack.GroupOption groupBy = renderOptions.groupByOption;
        String tag = renderOptions.getGroupByTag();


        switch (groupBy) {
            case STRAND:
                return String.valueOf(al.isNegativeStrand());
            case SAMPLE:
                return al.getSample();
            case READ_GROUP:
                return al.getReadGroup();
            case TAG:
                Object tagValue = al.getAttribute(tag);
                return tagValue == null ? null : tagValue.toString();
            case FIRST_OF_PAIR_STRAND:
                Strand strand = al.getFirstOfPairStrand();
                String strandString = strand == Strand.NONE ? null : strand.toString();
                System.out.println(strandString);
                return strandString;
            case PAIR_ORIENTATION:
                PEStats peStats = AlignmentRenderer.getPEStats(al, renderOptions);
                AlignmentTrack.OrientationType type = AlignmentRenderer.getOrientationType(al, peStats);
                if (type == null) {
                    return AlignmentTrack.OrientationType.UNKNOWN.name();
                }
                return type.name();
            case MATE_CHROMOSOME:
                ReadMate mate = al.getMate();
                if (mate == null) return null;
                return mate.getChr();
            case SUPPLEMENTARY:
                return String.valueOf(!al.isSupplementary());
        }
        return null;
    }


    static interface BucketCollection {


        Range getRange();


        void set(int idx, PriorityQueue<Alignment> bucket);


        PriorityQueue<Alignment> get(int idx);


        PriorityQueue<Alignment> getNextBucket(int bucketNumber, Collection<Integer> emptyBuckets);


        void removeBuckets(Collection<Integer> emptyBuckets);


        void finishedAdding();


        int getBucketCount();
    }


    /**
     * Dense array implementation of BucketCollection.  Assumption is all or nearly all the genome region is covered
     * with reads.
     */
    static class DenseBucketCollection implements BucketCollection {


        Range range;
        int lastBucketNumber = -1;
        final PriorityQueue[] bucketArray;


        DenseBucketCollection(int bucketCount, Range range) {
            this.bucketArray = new PriorityQueue[bucketCount];
            this.range = range;
        }


        public void set(int idx, PriorityQueue<Alignment> bucket) {
            bucketArray[idx] = bucket;
        }


        public PriorityQueue<Alignment> get(int idx) {
            return bucketArray[idx];
        }


        public int getBucketCount() {
            return this.bucketArray.length;
        }


        public Range getRange() {
            return range;
        }


        /**
         * Return the next occupied bucket after bucketNumber
         *
         * @param bucketNumber
         * @param emptyBuckets ignored
         * @return
         */
        public PriorityQueue<Alignment> getNextBucket(int bucketNumber, Collection<Integer> emptyBuckets) {


            if (bucketNumber == lastBucketNumber) {
                // TODO -- detect inf loop here
            }


            PriorityQueue<Alignment> bucket = null;
            while (bucketNumber < bucketArray.length) {


                if (bucketNumber < 0) {
                    System.out.println();
                }


                bucket = bucketArray[bucketNumber];
                if (bucket != null) {
                    if (bucket.isEmpty()) {
                        bucketArray[bucketNumber] = null;
                        bucket = null;
                    } else {
                        return bucket;
                    }
                }
                bucketNumber++;
            }
            return null;
        }


        public void removeBuckets(Collection<Integer> emptyBuckets) {
            // Nothing to do, empty buckets are removed "on the fly"
        }


        public void finishedAdding() {
            // nothing to do
        }
    }




    /**
     * "Sparse" implementation of an alignment BucketCollection.  Assumption is there are small clusters of alignments
     * along the genome, with mostly "white space".
     */
    static class SparseBucketCollection implements BucketCollection {


        Range range;
        boolean finished = false;
        List<Integer> keys;
        final HashMap<Integer, PriorityQueue<Alignment>> buckets;


        SparseBucketCollection(Range range) {
            this.range = range;
            this.buckets = new HashMap(1000);
        }


        public void set(int idx, PriorityQueue<Alignment> bucket) {
            if (finished) {
                log.error("Error: bucket added after finishAdding() called");
            }
            buckets.put(idx, bucket);
        }


        public PriorityQueue<Alignment> get(int idx) {
            return buckets.get(idx);
        }


        public Range getRange() {
            return range;
        }


        /**
         * Return the next occupied bucket at or after after bucketNumber.
         *
         * @param bucketNumber -- the hash bucket index for the alignments, essential the position relative to the start
         *                     of this packing interval
         * @return the next occupied bucket at or after bucketNumber, or null if there are none.
         */
        public PriorityQueue<Alignment> getNextBucket(int bucketNumber, Collection<Integer> emptyBuckets) {


            PriorityQueue<Alignment> bucket = null;
            int min = 0;
            int max = keys.size() - 1;


            // Get close to the right index, rather than scan from the beginning
            while ((max - min) > 5) {
                int mid = (max + min) / 2;
                Integer key = keys.get(mid);
                if (key > bucketNumber) {
                    max = mid;
                } else {
                    min = mid;
                }
            }


            // Now march from min to max until we cross bucketNumber
            for (int i = min; i < keys.size(); i++) {
                Integer key = keys.get(i);
                if (key >= bucketNumber) {
                    bucket = buckets.get(key);
                    if (bucket.isEmpty()) {
                        emptyBuckets.add(key);
                        bucket = null;
                    } else {
                        return bucket;
                    }
                }
            }
            return null;     // No bucket found
        }


        public void removeBuckets(Collection<Integer> emptyBuckets) {


            if (emptyBuckets.isEmpty()) {
                return;
            }


            for (Integer i : emptyBuckets) {
                buckets.remove(i);
            }
            keys = new ArrayList<Integer>(buckets.keySet());
            Collections.sort(keys);
        }


        public void finishedAdding() {
            finished = true;
            keys = new ArrayList<Integer>(buckets.keySet());
            Collections.sort(keys);
        }


        public int getBucketCount() {
            return Integer.MAX_VALUE;
        }
    }


    private class PairOrientationComparator implements Comparator<String> {
        private final List<AlignmentTrack.OrientationType> orientationTypes;
        //private final Set<String> orientationNames = new HashSet<String>(AlignmentTrack.OrientationType.values().length);


        public PairOrientationComparator() {
            orientationTypes = Arrays.asList(AlignmentTrack.OrientationType.values());
//            for(AlignmentTrack.OrientationType type: orientationTypes){
//                orientationNames.add(type.name());
//            }
        }


        @Override
        public int compare(String s0, String s1) {
            if (s0 != null && s1 != null) {
                AlignmentTrack.OrientationType t0 = AlignmentTrack.OrientationType.valueOf(s0);
                AlignmentTrack.OrientationType t1 = AlignmentTrack.OrientationType.valueOf(s1);
                return orientationTypes.indexOf(t0) - orientationTypes.indexOf(t1);
            } else if (s0 == null ^ s1 == null) {
                //exactly one is null
                return s0 == null ? 1 : -1;
            } else {
                //both null
                return 0;
            }


        }
    }


}
Source Code of org.broad.igv.sam.AlignmentPacker$DenseBucketCollection

Related Classes of org.broad.igv.sam.AlignmentPacker$DenseBucketCollection