Package picard.analysis

Source Code of picard.analysis.AlignmentSummaryMetricsCollector$GroupAlignmentSummaryMetricsPerUnitMetricCollector$IndividualAlignmentSummaryMetricsCollector

package picard.analysis;

import htsjdk.samtools.AlignmentBlock;
import htsjdk.samtools.BAMRecord;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import picard.metrics.PerUnitMetricCollector;
import picard.metrics.SAMRecordAndReference;
import picard.metrics.SAMRecordAndReferenceMultiLevelCollector;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMultiLevelCollector<AlignmentSummaryMetrics, Comparable<?>> {
    // If we have a reference sequence, collect metrics on how well we aligned to it
    private final boolean doRefMetrics;

    //the adapter sequences converted to byte arrays
    private final byte[][] adapterKmers;

    //A list of Strings representing the sequence of bases in an adapter
    private final List<String> adapterSequence;

    //Paired end reads above this insert size will be considered chimeric along with inter-chromosomal pairs.
    private final int maxInsertSize;


    //Whether the SAM or BAM file consists of bisulfite sequenced reads.
    private final boolean isBisulfiteSequenced;

    //The minimum mapping quality a base has to meet in order to be considered high quality
    private final int MAPPING_QUALITY_THRESOLD = 20;

    //The minimum quality a base has to meet in order to be consider hq_20
    private final static int BASE_QUALITY_THRESHOLD = 20;

    //The number of bases to check in order to map a read to an adapter
    private static final int ADAPTER_MATCH_LENGTH = 16;

    // The maximum number of mismatches a read can have and still be considered as matching an adapter
    private static final int MAX_ADAPTER_ERRORS = 1;

    public AlignmentSummaryMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords,
                                            final boolean doRefMetrics, final List<String> adapterSequence, final int maxInsertSize, boolean isBisulfiteSequenced) {
        this.doRefMetrics       = doRefMetrics;
        this.adapterSequence    = adapterSequence;
        this.adapterKmers = prepareAdapterSequences();
        this.maxInsertSize = maxInsertSize;
        this.isBisulfiteSequenced = isBisulfiteSequenced;
        setup(accumulationLevels, samRgRecords);
    }

    @Override
    protected PerUnitMetricCollector<AlignmentSummaryMetrics, Comparable<?>, SAMRecordAndReference> makeChildCollector(String sample, String library, String readGroup) {
        return new GroupAlignmentSummaryMetricsPerUnitMetricCollector(sample, library, readGroup);
    }

    @Override
    public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {
        if (!rec.isSecondaryOrSupplementary()) {
            super.acceptRecord(rec, ref);
        }
    }

    /** Converts the supplied adapter sequences to byte arrays in both fwd and rc. */
    private byte [][] prepareAdapterSequences() {
        final Set<String> kmers = new HashSet<String>();

        // Make a set of all kmers of adapterMatchLength
        for (final String seq : adapterSequence) {
            for (int i=0; i<=seq.length() - ADAPTER_MATCH_LENGTH; ++i) {
                final String kmer = seq.substring(i, i+ADAPTER_MATCH_LENGTH).toUpperCase();

                int ns = 0;
                for (final char ch : kmer.toCharArray()) if (ch == 'N') ++ns;
                if (ns <= MAX_ADAPTER_ERRORS) {
                    kmers.add(kmer);
                    kmers.add(SequenceUtil.reverseComplement(kmer));
                }
            }
        }

        // Make an array of byte[] for the kmers
        final byte [][] adapterKmers = new byte[kmers.size()][];
        int i=0;
        for (final String kmer : kmers) {
            adapterKmers[i++] = StringUtil.stringToBytes(kmer);
        }
        return adapterKmers;
    }

    /**
     * Checks the first ADAPTER_MATCH_LENGTH bases of the read against known adapter sequences and returns
     * true if the read matches an adapter sequence with MAX_ADAPTER_ERRORS mismsatches or fewer.
     *
     * @param read the basecalls for the read in the order and orientation the machine read them
     * @return true if the read matches an adapter and false otherwise
     */
    private boolean isAdapterSequence(final byte[] read) {
        if (read.length < ADAPTER_MATCH_LENGTH) return false;

        for (final byte[] adapter : adapterKmers) {
            int errors = 0;

            for (int i=0; i<adapter.length; ++i) {
                if (read[i] != adapter[i]) {
                    if (++errors > MAX_ADAPTER_ERRORS) break;
                }
            }

            if (errors <= MAX_ADAPTER_ERRORS) return true;
        }

        return false;
    }

    private class GroupAlignmentSummaryMetricsPerUnitMetricCollector implements PerUnitMetricCollector<AlignmentSummaryMetrics, Comparable<?>, SAMRecordAndReference> {
        final IndividualAlignmentSummaryMetricsCollector unpairedCollector;
        final IndividualAlignmentSummaryMetricsCollector firstOfPairCollector;
        final IndividualAlignmentSummaryMetricsCollector secondOfPairCollector;
        final IndividualAlignmentSummaryMetricsCollector pairCollector;
        final String sample;
        final String library;
        final String readGroup;

        public GroupAlignmentSummaryMetricsPerUnitMetricCollector(final String sample, final String library, final String readGroup) {
            this.sample = sample;
            this.library = library;
            this.readGroup = readGroup;
            unpairedCollector     = new IndividualAlignmentSummaryMetricsCollector(AlignmentSummaryMetrics.Category.UNPAIRED, sample, library, readGroup);
            firstOfPairCollector  = new IndividualAlignmentSummaryMetricsCollector(AlignmentSummaryMetrics.Category.FIRST_OF_PAIR, sample, library, readGroup);
            secondOfPairCollector = new IndividualAlignmentSummaryMetricsCollector(AlignmentSummaryMetrics.Category.SECOND_OF_PAIR, sample, library, readGroup);
            pairCollector         = new IndividualAlignmentSummaryMetricsCollector(AlignmentSummaryMetrics.Category.PAIR, sample, library, readGroup);
        }

        public void acceptRecord(final SAMRecordAndReference args) {
            final SAMRecord rec         = args.getSamRecord();
            final ReferenceSequence ref = args.getReferenceSequence();

            if (rec.getReadPairedFlag()) {
                if (rec.getFirstOfPairFlag()) {
                    firstOfPairCollector.addRecord(rec, ref);
                }
                else {
                    secondOfPairCollector.addRecord(rec, ref);
                }

                pairCollector.addRecord(rec, ref);
            }
            else {
                unpairedCollector.addRecord(rec, ref);
            }
        }

        @Override
        public void finish() {
            // Let the collectors do any summary computations etc.
            unpairedCollector.onComplete();
            firstOfPairCollector.onComplete();
            secondOfPairCollector.onComplete();
            pairCollector.onComplete();
        }

        @Override
        public void addMetricsToFile(final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> file) {
            if (firstOfPairCollector.getMetrics().TOTAL_READS > 0) {
                // override how bad cycle is determined for paired reads, it should be
                // the sum of first and second reads
                pairCollector.getMetrics().BAD_CYCLES = firstOfPairCollector.getMetrics().BAD_CYCLES +
                        secondOfPairCollector.getMetrics().BAD_CYCLES;

                file.addMetric(firstOfPairCollector.getMetrics());
                file.addMetric(secondOfPairCollector.getMetrics());
                file.addMetric(pairCollector.getMetrics());
            }

            //if there are no reads in any category then we will returned an unpaired alignment summary metric with all zero values
            if (unpairedCollector.getMetrics().TOTAL_READS > 0 || firstOfPairCollector.getMetrics().TOTAL_READS == 0) {
                file.addMetric(unpairedCollector.getMetrics());
            }
        }

        /**
         * Class that counts reads that match various conditions
         */
        private class IndividualAlignmentSummaryMetricsCollector {
            private long numPositiveStrand = 0;
            private final Histogram<Integer> readLengthHistogram = new Histogram<Integer>();
            private AlignmentSummaryMetrics metrics;
            private long chimeras;
            private long chimerasDenominator;
            private long adapterReads;
            private long indels;

            private long nonBisulfiteAlignedBases = 0;
            private long hqNonBisulfiteAlignedBases = 0;
            private final Histogram<Long> mismatchHistogram = new Histogram<Long>();
            private final Histogram<Long> hqMismatchHistogram = new Histogram<Long>();
            private final Histogram<Integer> badCycleHistogram = new Histogram<Integer>();

            public IndividualAlignmentSummaryMetricsCollector(final AlignmentSummaryMetrics.Category pairingCategory,
                                                              final String sample,
                                                              final String library,
                                                              final String readGroup) {
                metrics = new AlignmentSummaryMetrics();
                metrics.CATEGORY = pairingCategory;
                metrics.SAMPLE = sample;
                metrics.LIBRARY = library;
                metrics.READ_GROUP = readGroup;
            }

            public void addRecord(final SAMRecord record, final ReferenceSequence ref) {
                if (record.isSecondaryOrSupplementary()) {
                    // only want 1 count per read so skip non primary alignments
                    return;
                }

                collectReadData(record, ref);
                collectQualityData(record, ref);
            }

            public void onComplete() {
                //summarize read data
                if (metrics.TOTAL_READS > 0)
                {
                    metrics.PCT_PF_READS = (double) metrics.PF_READS / (double) metrics.TOTAL_READS;
                    metrics.PCT_ADAPTER = this.adapterReads / (double) metrics.PF_READS;
                    metrics.MEAN_READ_LENGTH = readLengthHistogram.getMean();

                    //Calculate BAD_CYCLES
                    metrics.BAD_CYCLES = 0;
                    for (final Histogram<Integer>.Bin cycleBin : badCycleHistogram.values()) {
                        final double badCyclePercentage = cycleBin.getValue() / metrics.TOTAL_READS;
                        if (badCyclePercentage >= .8) {
                            metrics.BAD_CYCLES++;
                        }
                    }

                    if(doRefMetrics) {
                        if (metrics.PF_READS > 0)         metrics.PCT_PF_READS_ALIGNED = (double) metrics.PF_READS_ALIGNED / (double) metrics.PF_READS;
                        if (metrics.PF_READS_ALIGNED > 0) metrics.PCT_READS_ALIGNED_IN_PAIRS = (double) metrics.READS_ALIGNED_IN_PAIRS/ (double) metrics.PF_READS_ALIGNED;
                        if (metrics.PF_READS_ALIGNED > 0) metrics.STRAND_BALANCE = numPositiveStrand / (double) metrics.PF_READS_ALIGNED;
                        if (this.chimerasDenominator > 0) metrics.PCT_CHIMERAS = this.chimeras / (double) this.chimerasDenominator;

                        if (nonBisulfiteAlignedBases > 0) metrics.PF_MISMATCH_RATE = mismatchHistogram.getSum() / (double) nonBisulfiteAlignedBases;
                        metrics.PF_HQ_MEDIAN_MISMATCHES = hqMismatchHistogram.getMedian();
                        if (hqNonBisulfiteAlignedBases > 0) metrics.PF_HQ_ERROR_RATE = hqMismatchHistogram.getSum() / (double) hqNonBisulfiteAlignedBases;
                        if (metrics.PF_ALIGNED_BASES > 0) metrics.PF_INDEL_RATE = this.indels / (double) metrics.PF_ALIGNED_BASES;
                    }
                }
            }

            private void collectReadData(final SAMRecord record, final ReferenceSequence ref) {
                metrics.TOTAL_READS++;
                readLengthHistogram.increment(record.getReadBases().length);

                if (!record.getReadFailsVendorQualityCheckFlag()) {
                    metrics.PF_READS++;
                    if (isNoiseRead(record)) metrics.PF_NOISE_READS++;

                    if (record.getReadUnmappedFlag()) {
                        // If the read is unmapped see if it's adapter sequence
                        final byte[] readBases = record.getReadBases();
                        if (!(record instanceof BAMRecord)) StringUtil.toUpperCase(readBases);

                        if (isAdapterSequence(readBases)) {
                            this.adapterReads++;
                        }
                    }
                    else if(doRefMetrics) {
                        metrics.PF_READS_ALIGNED++;
                        if (!record.getReadNegativeStrandFlag()) numPositiveStrand++;

                        if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
                            metrics.READS_ALIGNED_IN_PAIRS++;

                            // Check that both ends have mapq > minimum
                            final Integer mateMq = record.getIntegerAttribute("MQ");
                            if (mateMq == null || mateMq >= MAPPING_QUALITY_THRESOLD && record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD) {
                                ++this.chimerasDenominator;

                                // With both reads mapped we can see if this pair is chimeric
                                if (Math.abs(record.getInferredInsertSize()) > maxInsertSize ||
                                        !record.getReferenceIndex().equals(record.getMateReferenceIndex())) {
                                    ++this.chimeras;
                                }
                            }
                        }
                    }
                }
            }

            private void collectQualityData(final SAMRecord record, final ReferenceSequence reference) {
                // If the read isnt an aligned PF read then look at the read for no-calls
                if (record.getReadUnmappedFlag() || record.getReadFailsVendorQualityCheckFlag() || !doRefMetrics) {
                    final byte[] readBases = record.getReadBases();
                    for (int i = 0; i < readBases.length; i++) {
                        if (SequenceUtil.isNoCall(readBases[i])) {
                            badCycleHistogram.increment(CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i));
                        }
                    }
                }
                else if (!record.getReadFailsVendorQualityCheckFlag()) {
                    final boolean highQualityMapping = isHighQualityMapping(record);
                    if (highQualityMapping) metrics.PF_HQ_ALIGNED_READS++;

                    final byte[] readBases = record.getReadBases();
                    final byte[] refBases = reference.getBases();
                    final byte[] qualities  = record.getBaseQualities();
                    final int refLength = refBases.length;
                    long mismatchCount   = 0;
                    long hqMismatchCount = 0;

                    for (final AlignmentBlock alignmentBlock : record.getAlignmentBlocks()) {
                        final int readIndex = alignmentBlock.getReadStart() - 1;
                        final int refIndex  = alignmentBlock.getReferenceStart() - 1;
                        final int length    = alignmentBlock.getLength();

                        for (int i=0; i<length && refIndex+i<refLength; ++i) {
                            final int readBaseIndex = readIndex + i;
                            boolean mismatch = !SequenceUtil.basesEqual(readBases[readBaseIndex], refBases[refIndex+i]);
                            boolean bisulfiteBase = false;
                            if (mismatch && isBisulfiteSequenced) {
                                if ( (record.getReadNegativeStrandFlag() &&
                                        (refBases[refIndex+i] == 'G' || refBases[refIndex+i] =='g') &&
                                        (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a'))
                                        || ((!record.getReadNegativeStrandFlag()) &&
                                        (refBases[refIndex+i] == 'C' || refBases[refIndex+i] == 'c') &&
                                        (readBases[readBaseIndex] == 'T') || readBases[readBaseIndex] == 't') ) {

                                    bisulfiteBase = true;
                                    mismatch = false;
                                }
                            }

                            if(mismatch) mismatchCount++;

                            metrics.PF_ALIGNED_BASES++;
                            if(!bisulfiteBase) nonBisulfiteAlignedBases++;

                            if (highQualityMapping) {
                                metrics.PF_HQ_ALIGNED_BASES++;
                                if (!bisulfiteBase) hqNonBisulfiteAlignedBases++;
                                if (qualities[readBaseIndex] >= BASE_QUALITY_THRESHOLD) metrics.PF_HQ_ALIGNED_Q20_BASES++;
                                if (mismatch) hqMismatchCount++;
                            }

                            if (mismatch || SequenceUtil.isNoCall(readBases[readBaseIndex])) {
                                badCycleHistogram.increment(CoordMath.getCycle(record.getReadNegativeStrandFlag(), readBases.length, i));
                            }
                        }
                    }

                    mismatchHistogram.increment(mismatchCount);
                    hqMismatchHistogram.increment(hqMismatchCount);

                    // Add any insertions and/or deletions to the global count
                    for (final CigarElement elem : record.getCigar().getCigarElements()) {
                        final CigarOperator op = elem.getOperator();
                        if (op == CigarOperator.INSERTION || op == CigarOperator.DELETION) ++ this.indels;
                    }
                }
            }

            private boolean isNoiseRead(final SAMRecord record) {
                final Object noiseAttribute = record.getAttribute(ReservedTagConstants.XN);
                return (noiseAttribute != null && noiseAttribute.equals(1));
            }

            private boolean isHighQualityMapping(final SAMRecord record) {
                return !record.getReadFailsVendorQualityCheckFlag() &&
                        record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD;
            }

            public AlignmentSummaryMetrics getMetrics() {
                return this.metrics;
            }
        }
    }
}
TOP

Related Classes of picard.analysis.AlignmentSummaryMetricsCollector$GroupAlignmentSummaryMetricsPerUnitMetricCollector$IndividualAlignmentSummaryMetricsCollector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.