Package picard.illumina

Source Code of picard.illumina.CollectIlluminaSummaryMetrics

/*
* The MIT License
*
* Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.illumina;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import picard.PicardException;
import picard.analysis.SinglePassSamProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Illumina;

import java.io.File;

/**
* Program to collect Coverage Summary Metrics of SAM files for data sequenced by Illumina products.
* It uses our best approximation of the filters that Illumina uses which means:
* <p/>
* 1. Examine PF reads only
* 2. Examine reads that are not marked as duplicate only
* 3. Examine mapped reads only (it is unclear if Illumina does this or not, but without this we cannot do the next point)
* 4. Only count bases that are present in two of mated reads, once. (For this we need TLEN from the sam record, which required that reads are mapped)
*
* Program assumes that all reads in the SAM file are of equal length.
*
* @author farjoun@broadinstitute.org
*/
@CommandLineProgramProperties(
        usage = "Program to collect Coverage Summary Metrics of SAM files for data sequenced by Illumina products. " +
                "It uses our best approximation of the filters that Illumina uses which means:\n\n" +
                "1. Examine PF reads only" +
                "2. Examine reads that are not marked as duplicate only" +
                "3. Examine mapped reads only (it is unclear if Illumina does this or not, but without this we cannot do the next point)" +
                "4. Only count bases that are present in two of mated reads, once. (For this we need TLEN from the sam record, which required that reads are mapped)" +
                "\nProgram assumes that all reads in the SAM file are of equal length, and that if a read is marked as \"mated\", its mate will really exist.\n",
        usageShort = "Collects summary metrics according to Illumina specifications.",
        programGroup = Illumina.class
)
public class CollectIlluminaSummaryMetrics extends SinglePassSamProgram {

    @Option(doc = "IntervalList describing the target interval of the sequencing experiment (for calculating average coverage). " +
            "Use null to use reference from BAM as interval (whole genome)", optional = true)
    public File TARGET_REGION = null;

    private final IlluminaSummaryMetrics metrics = new IlluminaSummaryMetrics();

    private static final Log log = Log.getInstance(CollectIlluminaSummaryMetrics.class);

    private Integer readLength = 0;

    /** Stock main method for a command line program. */
    public static void main(final String[] argv) {
        new CollectIlluminaSummaryMetrics().instanceMainWithExit(argv);
    }

    @Override
    protected void setup(final SAMFileHeader header, final File samFile) {

        if (TARGET_REGION == null) {
            metrics.TARGET_TERRITORY = header.getSequenceDictionary().getReferenceLength();
        } else {
            IOUtil.assertFileIsReadable(TARGET_REGION);
            IOUtil.assertFileIsWritable(OUTPUT);
            final IntervalList targetInterval = IntervalList.fromFile(TARGET_REGION);
            metrics.TARGET_TERRITORY = targetInterval.getUniqueBaseCount();
        }
    }

    @Override
    protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {

        if (readLength == 0) {
            readLength = rec.getReadLength();
        } else {
            if (rec.getReadLength() != readLength) {
                final String message=String.format("This program only works with uniform read lengths. First record had length %d. Current record %s, has length %d", readLength, rec.getReadName(), rec.getReadLength());
                log.error(message);
                throw new PicardException(message);
            }
        }

        // Not interested in counting unmapped, secondary or supplemental reads.
        if (rec.getReadUnmappedFlag() || rec.isSecondaryOrSupplementary()) {
            return;
        }

        metrics.TOTAL_READS++;

        final int length = rec.getReadLength();
        metrics.TOTAL_BASES += length;

        final boolean isPfRead = !rec.getReadFailsVendorQualityCheckFlag();

        if (isPfRead) {
            metrics.PF_READS++;
            metrics.PF_BASES += length;
        } else {
            return;
        }

        // From here on read must have passed PF
        if (rec.getDuplicateReadFlag()) {
            metrics.DUPLICATE_READS++;
            return;
        }

        // Reads are not duplicated from here on.
        final int tLen = Math.abs(rec.getInferredInsertSize());
        if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && !rec.getReadUnmappedFlag() && tLen != 0) {
            // If mated,both reads are mapped, and insert-size can be calculated, only examine first in pair, assume equal read lengths, and account for overlaps
            if (rec.getFirstOfPairFlag()) {
                // Since read is mated with both mate mapped, and insert-size available, we will only consider pair on first mate.
                if (rec.getReadNegativeStrandFlag() ^ rec.getMateNegativeStrandFlag()) {
                    // If mated reads are in opposite directions:
                    metrics.TOTAL_ILLUMINA_BASES += Math.min(2 * length, tLen);
                } else {
                    // If mated reads are in same direction:
                    metrics.TOTAL_ILLUMINA_BASES += length + Math.min(length, tLen);
                }
            }
        } else {
            // Otherwise add each read on its own.
            metrics.TOTAL_ILLUMINA_BASES += rec.getReadLength();
        }
    }

    // Do not need unmapped reads at end of file.
    @Override
    protected boolean usesNoRefReads() { return false; }

    @Override
    protected void finish() {
        // Calculate some derived metrics
        metrics.READ_LENGTH = readLength;
        metrics.AVERAGE_ILMN_DEPTH = metrics.TOTAL_ILLUMINA_BASES / (double) metrics.TARGET_TERRITORY;

        // Output the file
        final MetricsFile<IlluminaSummaryMetrics, Integer> metricsFile = getMetricsFile();
        metricsFile.addMetric(metrics);
        metricsFile.write(OUTPUT);
    }

    /** A set of metrics used to describe the general quality of a BAM file */
    public static class IlluminaSummaryMetrics extends MetricBase {

        /** The total number of reads in the input file */
        public int TOTAL_READS = 0;

        /** The number of reads that are PF - pass filter */
        public int PF_READS = 0;

        /** The number of duplicate, PF reads */
        public int DUPLICATE_READS = 0;

        /** The total number of bases in all reads */
        public long TOTAL_BASES;

        /** The total number of bases in all PF reads */
        public long PF_BASES = 0;

        /** The number of bases from PF, unique (non-duplicate) reads, only counting for overlapping bases once */
        public long TOTAL_ILLUMINA_BASES = 0;

        /** The average depth considering only bases from PF, unique (non-duplicate) reads, only counting for overlapping bases once */
        public double AVERAGE_ILMN_DEPTH = 0;

        /** The read length */
        public long READ_LENGTH = 0;

        /** The size of the target region */
        public long TARGET_TERRITORY = 0;

        public IlluminaSummaryMetrics() {}

        public IlluminaSummaryMetrics(final int TOTAL_READS, final int PF_READS, final int DUPLICATE_READS, final long TOTAL_BASES, final long PF_BASES, final long TOTAL_ILLUMINA_BASES, final double AVERAGE_ILMN_DEPTH, final long READ_LENGTH, final long TARGET_TERRITORY) {
            this.TOTAL_READS = TOTAL_READS;
            this.PF_READS = PF_READS;
            this.DUPLICATE_READS = DUPLICATE_READS;
            this.TOTAL_BASES = TOTAL_BASES;
            this.PF_BASES = PF_BASES;
            this.TOTAL_ILLUMINA_BASES = TOTAL_ILLUMINA_BASES;
            this.AVERAGE_ILMN_DEPTH = AVERAGE_ILMN_DEPTH;
            this.READ_LENGTH = READ_LENGTH;
            this.TARGET_TERRITORY = TARGET_TERRITORY;
        }
    }
}
TOP

Related Classes of picard.illumina.CollectIlluminaSummaryMetrics

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.