Package picard.analysis

Source Code of picard.analysis.MeanQualityByCycle$HistogramGenerator

/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.analysis;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Metrics;
import picard.util.RExecutor;

import java.io.File;
import java.util.Arrays;
import java.util.List;


/**
* Program to generate a data table and chart of mean quality by cycle from a
* BAM file.  Works best on a single lane/run of data, but can be applied to
* merged BAMs - the output may just be a little confusing.
*
* @author Tim Fennell
*/
@CommandLineProgramProperties(
        usage = "Program to generate a data table and pdf chart of " +
                "mean base quality by cycle from a SAM or BAM file.  Works best on a single lane/run of data, but can be applied to" +
                "merged BAMs. Uses R to generate chart output.",
        usageShort = "Writes mean quality by cycle for a SAM or BAM file",
        programGroup = Metrics.class
)
public class MeanQualityByCycle extends SinglePassSamProgram {

    @Option(shortName="CHART", doc="A file (with .pdf extension) to write the chart to.")
    public File CHART_OUTPUT;

    @Option(doc="If set to true, calculate mean quality over aligned reads only.")
    public boolean ALIGNED_READS_ONLY = false;

    @Option(doc="If set to true calculate mean quality over PF reads only.")
    public boolean PF_READS_ONLY = false;

    private final HistogramGenerator q  = new HistogramGenerator(false);
    private final HistogramGenerator oq = new HistogramGenerator(true);

    /**
     * A subtitle for the plot, usually corresponding to a library.
     */
    private String plotSubtitle = "";

    private final Log log = Log.getInstance(MeanQualityByCycle.class);

    /** Required main method. */
    public static void main(String[] args) {
        System.exit(new MeanQualityByCycle().instanceMain(args));
    }

    private static class HistogramGenerator {
        final boolean useOriginalQualities;
        int maxLengthSoFar = 0;
        double[] firstReadTotalsByCycle  = new double[maxLengthSoFar];
        long[]   firstReadCountsByCycle  = new long[maxLengthSoFar];
        double[] secondReadTotalsByCycle = new double[maxLengthSoFar];
        long[]   secondReadCountsByCycle = new long[maxLengthSoFar];

        private HistogramGenerator(final boolean useOriginalQualities) {
            this.useOriginalQualities = useOriginalQualities;
        }

        void addRecord(final SAMRecord rec) {
            final byte[] quals = (useOriginalQualities ? rec.getOriginalBaseQualities() : rec.getBaseQualities());
            if (quals == null) return;

            final int length = quals.length;
            final boolean rc = rec.getReadNegativeStrandFlag();
            ensureArraysBigEnough(length+1);

            for (int i=0; i<length; ++i) {
                final int cycle = rc ? length-i : i+1;

                if (rec.getReadPairedFlag() && rec.getSecondOfPairFlag()) {
                    secondReadTotalsByCycle[cycle] += quals[i];
                    secondReadCountsByCycle[cycle] += 1;
                }
                else {
                    firstReadTotalsByCycle[cycle] += quals[i];
                    firstReadCountsByCycle[cycle] += 1;
                }
            }
        }

        private void ensureArraysBigEnough(final int length) {
            if (length > maxLengthSoFar) {
                firstReadTotalsByCycle  = Arrays.copyOf(firstReadTotalsByCycle, length);
                firstReadCountsByCycle  = Arrays.copyOf(firstReadCountsByCycle, length);
                secondReadTotalsByCycle = Arrays.copyOf(secondReadTotalsByCycle , length);
                secondReadCountsByCycle = Arrays.copyOf(secondReadCountsByCycle, length);
                maxLengthSoFar = length;
            }
        }

        Histogram<Integer> getMeanQualityHistogram() {
            final String label = useOriginalQualities ? "MEAN_ORIGINAL_QUALITY" : "MEAN_QUALITY";
            final Histogram<Integer> meanQualities = new Histogram<Integer>("CYCLE", label);

            int firstReadLength = 0;

            for (int cycle=0; cycle < firstReadTotalsByCycle.length; ++cycle) {
                if (firstReadTotalsByCycle[cycle] > 0) {
                    meanQualities.increment(cycle, firstReadTotalsByCycle[cycle] / firstReadCountsByCycle[cycle]);
                    firstReadLength = cycle;
                }
            }

            for (int i=0; i< secondReadTotalsByCycle.length; ++i) {
                if (secondReadCountsByCycle[i] > 0) {
                    final int cycle = firstReadLength + i;
                    meanQualities.increment(cycle, secondReadTotalsByCycle[i] / secondReadCountsByCycle[i]);
                }
            }

            return meanQualities;
        }

        boolean isEmpty() {
            return maxLengthSoFar == 0;
        }
    }

    @Override
    protected void setup(final SAMFileHeader header, final File samFile) {
        IOUtil.assertFileIsWritable(CHART_OUTPUT);
        // If we're working with a single library, assign that library's name
        // as a suffix to the plot title
        final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
        if (readGroups.size() == 1) {
            plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
        }
    }

    @Override
    protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
        // Skip unwanted records
        if (PF_READS_ONLY && rec.getReadFailsVendorQualityCheckFlag()) return;
        if (ALIGNED_READS_ONLY && rec.getReadUnmappedFlag()) return;
        if (rec.isSecondaryOrSupplementary()) return;

        q.addRecord(rec);
        oq.addRecord(rec);
    }

    @Override
    protected void finish() {
        // Generate a "Histogram" of mean quality and write it to the file
        final MetricsFile<?,Integer> metrics = getMetricsFile();
        metrics.addHistogram(q.getMeanQualityHistogram());
        if (!oq.isEmpty()) metrics.addHistogram(oq.getMeanQualityHistogram());
        metrics.write(OUTPUT);

        if (q.isEmpty() && oq.isEmpty()) {
            log.warn("No valid bases found in input file. No plot will be produced.");
        }
        else {
            // Now run R to generate a chart
            final int rResult = RExecutor.executeFromClasspath(
                    "picard/analysis/meanQualityByCycle.R",
                    OUTPUT.getAbsolutePath(),
                    CHART_OUTPUT.getAbsolutePath(),
                    INPUT.getName(),
                    plotSubtitle);

            if (rResult != 0) {
                throw new PicardException("R script meanQualityByCycle.R failed with return code " + rResult);
            }
        }
    }
}
TOP

Related Classes of picard.analysis.MeanQualityByCycle$HistogramGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.