Package picard.analysis

Source Code of picard.analysis.CollectJumpingLibraryMetrics

/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.analysis;

import java.io.File;
import java.lang.IllegalStateException;import java.lang.Integer;import java.lang.Math;import java.lang.String;import java.lang.System;import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.programgroups.Metrics;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.metrics.MetricsFile;
import picard.sam.DuplicationMetrics;
import htsjdk.samtools.SamPairUtil;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamPairUtil.PairOrientation;
import htsjdk.samtools.SAMTag;

/**
* Command-line program to compute metrics about outward-facing pairs, inward-facing
* pairs, and chimeras in a jumping library.
*
* @author ktibbett@broadinstitute.org
*/
@CommandLineProgramProperties(
        usage = "Computes jumping library metrics.  Gets all data for computation from the first" +
                "read in each pair and assumes that the MQ tag is set with the mate's mapping quality.  If the " +
                "MQ tag is not set, then the program assumes that the mate's mapping quality is >= MINIMUM_MAPPING_QUALITY",
        usageShort = "Produces jumping library metrics for the provided SAM/BAMs",
        programGroup = Metrics.class
)
public class CollectJumpingLibraryMetrics extends CommandLineProgram {
    // Usage and parameters

    @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc="BAM file(s) of reads with duplicates marked")
    public List<File> INPUT = new ArrayList<File>();
    @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="File to which metrics should be written")
    public File OUTPUT;
    @Option(shortName = StandardOptionDefinitions.MINIMUM_MAPPING_QUALITY_SHORT_NAME, doc="Mapping quality minimum cutoff")
    public Integer MINIMUM_MAPPING_QUALITY = 0;
    @Option(shortName="T", doc="When calculating mean and stdev stop when the bins in the tail of the distribution " +
                               "contain fewer than mode/TAIL_LIMIT items")
    public int TAIL_LIMIT = 10000;
    @Option(doc="Jumps greater than or equal to the greater of this value or 2 times the mode of the " +
            "outward-facing pairs are considered chimeras")
    public int CHIMERA_KB_MIN = 100000;

    private static final int SAMPLE_FOR_MODE = 50000; // How many outward-facing pairs to sample to determine the mode

    /** Stock main method. */
    public static void main(String[] args) {
        System.exit(new CollectJumpingLibraryMetrics().instanceMain(args));
    }

    /**
     * Calculates the detailed statistics about the jumping library and then generates the results.
     */
    protected int doWork() {

        for (File f : INPUT) {
            IOUtil.assertFileIsReadable(f);
        }
        IOUtil.assertFileIsWritable(OUTPUT);

        Histogram<Integer> innieHistogram = new Histogram<Integer>();
        Histogram<Integer> outieHistogram = new Histogram<Integer>();

        int fragments = 0;
        int innies = 0;
        int outies = 0;
        int innieDupes = 0;
        int outieDupes = 0;
        int crossChromPairs = 0;
        int superSized = 0;
        int tandemPairs = 0;
        double chimeraSizeMinimum  = Math.max(getOutieMode(), (double)CHIMERA_KB_MIN);

        for (File f : INPUT) {
            SAMFileReader reader = new SAMFileReader(f);

            if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
                throw new PicardException("SAM file must " + f.getName() + " must be sorted in coordintate order");
            }

            for (SAMRecord sam : reader) {

                // We're getting all our info from the first of each pair.
                if (!sam.getFirstOfPairFlag()) {
                    continue;
                }

                // Ignore unmapped read pairs
                if (sam.getReadUnmappedFlag()) {
                    if (!sam.getMateUnmappedFlag()) {
                        fragments++;
                        continue;
                    }

                    // If both ends are unmapped and we've hit unaligned reads we're done
                    if (sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)  {
                        break;
                    }
                    continue;
                }

                if (sam.getMateUnmappedFlag()) {
                    fragments++;
                    continue;
                }

                // Ignore low-quality reads.  If we don't have the mate mapping quality, assume it's OK
                if ((sam.getAttribute(SAMTag.MQ.name()) != null &&
                     sam.getIntegerAttribute(SAMTag.MQ.name()) < MINIMUM_MAPPING_QUALITY) ||
                    sam.getMappingQuality() < MINIMUM_MAPPING_QUALITY) {
                    continue;
                }

                final int absInsertSize = Math.abs(sam.getInferredInsertSize());
                if (absInsertSize > chimeraSizeMinimum) {
                    superSized++;
                }
                else if (sam.getMateNegativeStrandFlag() == sam.getReadNegativeStrandFlag()) {
                    tandemPairs++;
                }
                else if (!sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
                    crossChromPairs++;
                }
                else {
                    final PairOrientation pairOrientation = SamPairUtil.getPairOrientation(sam);
                    if (pairOrientation == PairOrientation.RF) {
                        outieHistogram.increment(absInsertSize);
                        outies++;
                        if (sam.getDuplicateReadFlag()) {
                            outieDupes++;
                        }
                    }
                    else if(pairOrientation == PairOrientation.FR) {
                        innieHistogram.increment(absInsertSize);
                        innies++;
                        if (sam.getDuplicateReadFlag()) {
                            innieDupes++;
                        }
                    }
                    else {
                        throw new IllegalStateException("This should never happen");
                    }
                }
            }
            reader.close();
        }

        MetricsFile<JumpingLibraryMetrics,Integer> metricsFile = getMetricsFile();
        JumpingLibraryMetrics metrics = new JumpingLibraryMetrics();
        metrics.JUMP_PAIRS = outies;
        metrics.JUMP_DUPLICATE_PAIRS = outieDupes;
        metrics.JUMP_DUPLICATE_PCT = outies != 0 ? outieDupes/(double)outies : 0;
        metrics.JUMP_LIBRARY_SIZE = (outies > 0 && outieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(outies, outies-outieDupes) : 0;
        outieHistogram.trimByTailLimit(TAIL_LIMIT);
        metrics.JUMP_MEAN_INSERT_SIZE = outieHistogram.getMean();
        metrics.JUMP_STDEV_INSERT_SIZE = outieHistogram.getStandardDeviation();
        metrics.NONJUMP_PAIRS = innies;
        metrics.NONJUMP_DUPLICATE_PAIRS = innieDupes;
        metrics.NONJUMP_DUPLICATE_PCT = innies != 0 ? innieDupes/(double)innies : 0;
        metrics.NONJUMP_LIBRARY_SIZE  = (innies > 0 && innieDupes > 0) ? DuplicationMetrics.estimateLibrarySize(innies, innies-innieDupes) : 0;
        innieHistogram.trimByTailLimit(TAIL_LIMIT);
        metrics.NONJUMP_MEAN_INSERT_SIZE = innieHistogram.getMean();
        metrics.NONJUMP_STDEV_INSERT_SIZE = innieHistogram.getStandardDeviation();
        metrics.CHIMERIC_PAIRS = crossChromPairs + superSized + tandemPairs;
        metrics.FRAGMENTS = fragments;
        double totalPairs = outies + innies + metrics.CHIMERIC_PAIRS;
        metrics.PCT_JUMPS = totalPairs != 0 ? outies/totalPairs : 0;
        metrics.PCT_NONJUMPS = totalPairs != 0 ? innies/totalPairs : 0;
        metrics.PCT_CHIMERAS = totalPairs != 0 ? metrics.CHIMERIC_PAIRS/totalPairs : 0;
        metricsFile.addMetric(metrics);
        metricsFile.write(OUTPUT);

        return 0;
    }

    /**
     * Calculates the mode for outward-facing pairs, using the first SAMPLE_FOR_MODE
     * outward-facing pairs found in INPUT
     */
    private double getOutieMode() {

        int samplePerFile = SAMPLE_FOR_MODE/INPUT.size();

        Histogram<Integer> histo = new Histogram<Integer>();

        for (File f : INPUT) {
            SAMFileReader reader = new SAMFileReader(f);
            int sampled = 0;
            for (Iterator<SAMRecord> it = reader.iterator(); it.hasNext() && sampled < samplePerFile;) {
                SAMRecord sam = it.next();
                if (!sam.getFirstOfPairFlag()) {
                    continue;
                }
                // If we get here we've hit the end of the aligned reads
                if (sam.getReadUnmappedFlag() && sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
                    break;
                }
                else if (sam.getReadUnmappedFlag() || sam.getMateUnmappedFlag()) {
                    continue;
                }
                else
                {
                    if ( (sam.getAttribute(SAMTag.MQ.name()) == null ||
                           sam.getIntegerAttribute(SAMTag.MQ.name()) >= MINIMUM_MAPPING_QUALITY) &&
                        sam.getMappingQuality() >= MINIMUM_MAPPING_QUALITY &&
                        sam.getMateNegativeStrandFlag() != sam.getReadNegativeStrandFlag() &&
                        sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
                        if (SamPairUtil.getPairOrientation(sam) == PairOrientation.RF) {
                            histo.increment(Math.abs(sam.getInferredInsertSize()));
                            sampled++;
                        }
                    }
                }

            }
            reader.close();
        }

        return histo.size() > 0 ?  histo.getMode() : 0;
    }
}
TOP

Related Classes of picard.analysis.CollectJumpingLibraryMetrics

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.