/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.analysis;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SequenceUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Metrics;
import picard.util.RExecutor;
import java.io.File;
import java.util.List;
/**
* Charts quality score distribution within a BAM file.
*
* @author Tim Fennell
*/
@CommandLineProgramProperties(
usage = "Program to chart " +
"quality score distributions in a SAM or BAM file.",
usageShort = "Charts quality score distributions for a SAM or BAM file",
programGroup = Metrics.class
)
public class QualityScoreDistribution extends SinglePassSamProgram {
@Option(shortName="CHART", doc="A file (with .pdf extension) to write the chart to.")
public File CHART_OUTPUT;
@Option(doc="If set to true calculate mean quality over aligned reads only.")
public boolean ALIGNED_READS_ONLY = false;
@Option(shortName="PF", doc="If set to true calculate mean quality over PF reads only.")
public boolean PF_READS_ONLY = false;
@Option(doc="If set to true, include quality for no-call bases in the distribution.")
public boolean INCLUDE_NO_CALLS = false;
private final long[] qCounts = new long[128];
private final long[] oqCounts = new long[128];
/**
* A subtitle for the plot, usually corresponding to a library.
*/
private String plotSubtitle = "";
private final Log log = Log.getInstance(QualityScoreDistribution.class);
/** Required main method. */
public static void main(final String[] args) {
System.exit(new QualityScoreDistribution().instanceMain(args));
}
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
IOUtil.assertFileIsWritable(OUTPUT);
IOUtil.assertFileIsWritable(CHART_OUTPUT);
// If we're working with a single library, assign that library's name
// as a suffix to the plot title
final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
if (readGroups.size() == 1) {
this.plotSubtitle = readGroups.get(0).getLibrary();
if (null == this.plotSubtitle) this.plotSubtitle = "";
}
}
@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
// Skip unwanted records
if (PF_READS_ONLY && rec.getReadFailsVendorQualityCheckFlag()) return;
if (ALIGNED_READS_ONLY && rec.getReadUnmappedFlag()) return;
if (rec.isSecondaryOrSupplementary()) return;
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
final byte[] oq = rec.getOriginalBaseQualities();
final int length = quals.length;
for (int i=0; i<length; ++i) {
if (INCLUDE_NO_CALLS || !SequenceUtil.isNoCall(bases[i])) {
qCounts[quals[i]]++;
if (oq != null) oqCounts[oq[i]]++;
}
}
}
@Override
protected void finish() {
// Built the Histograms out of the long[]s
final Histogram<Byte> qHisto = new Histogram<Byte>("QUALITY", "COUNT_OF_Q");
final Histogram<Byte> oqHisto = new Histogram<Byte>("QUALITY", "COUNT_OF_OQ");
for (int i=0; i< qCounts.length; ++i) {
if (qCounts[i] > 0) qHisto.increment( (byte) i, (double) qCounts[i]);
if (oqCounts[i] > 0) oqHisto.increment((byte) i, (double) oqCounts[i]);
}
final MetricsFile<?,Byte> metrics = getMetricsFile();
metrics.addHistogram(qHisto);
if (!oqHisto.isEmpty()) metrics.addHistogram(oqHisto);
metrics.write(OUTPUT);
if (qHisto.isEmpty() && oqHisto.isEmpty()) {
log.warn("No valid bases found in input file. No plot will be produced.");
}
else {
// Now run R to generate a chart
final int rResult = RExecutor.executeFromClasspath(
"picard/analysis/qualityScoreDistribution.R",
OUTPUT.getAbsolutePath(),
CHART_OUTPUT.getAbsolutePath(),
INPUT.getName(),
this.plotSubtitle);
if (rResult != 0) {
throw new PicardException("R script qualityScoreDistribution.R failed with return code " + rResult);
}
}
}
}