/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.analysis;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.programgroups.Metrics;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord;
import java.io.File;import java.lang.Integer;import java.lang.String;
/**
* Command line program to calibrate quality yield metrics
*
* @author Martha Borkan
*/
@CommandLineProgramProperties(
usage = "Collects quality yield metrics, a set of metrics that quantify the quality and yield of sequence data from a " +
"SAM/BAM input file.",
usageShort = "Collects a set of metrics that quantify the quality and yield of sequence data from the provided SAM/BAM",
programGroup = Metrics.class
)
public class CollectQualityYieldMetrics extends CommandLineProgram {
@Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME,
doc="A SAM or BAM file to process.")
public File INPUT;
@Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME,
doc="The metrics file to write with quality yield metrics.")
public File OUTPUT;
@Option(shortName=StandardOptionDefinitions.USE_ORIGINAL_QUALITIES_SHORT_NAME,
doc="If available in the OQ tag, use the original quality scores " +
"as inputs instead of the quality scores in the QUAL field.")
public boolean USE_ORIGINAL_QUALITIES = true;
/** Stock main method for a command line program. */
public static void main(final String[] argv) {
new CollectQualityYieldMetrics().instanceMainWithExit(argv);
}
/**
* Main method for the program. Checks that all input files are present and
* readable and that the output file can be written to. Then iterates through
* all the records accumulating metrics. Finally writes metrics file
*/
protected int doWork() {
final Log log = Log.getInstance(getClass());
final ProgressLogger progress = new ProgressLogger(log);
// Some quick parameter checking
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
log.info("Reading input file and calculating metrics.");
final SAMFileReader sam = new SAMFileReader(IOUtil.openFileForReading(INPUT));
final MetricsFile<QualityYieldMetrics,Integer> metricsFile = getMetricsFile();
final QualityYieldMetrics metrics = new QualityYieldMetrics();
for (final SAMRecord rec : sam) {
metrics.TOTAL_READS ++;
final int length = rec.getReadLength();
final boolean isPfRead = !rec.getReadFailsVendorQualityCheckFlag();
if (isPfRead){
metrics.PF_READS ++;
metrics.PF_BASES += length;
}
metrics.TOTAL_BASES += length;
final byte[] quals;
if (USE_ORIGINAL_QUALITIES) {
byte[] tmp = rec.getOriginalBaseQualities();
if (tmp == null) tmp = rec.getBaseQualities();
quals = tmp;
}
else {
quals = rec.getBaseQualities();
}
// add up quals, and quals >= 20
for (int i=0; i<quals.length; ++i) {
metrics.Q20_EQUIVALENT_YIELD += quals[i];
if (quals[i] >= 20) metrics.Q20_BASES++;
if (quals[i] >= 30) metrics.Q30_BASES++;
if (isPfRead){
metrics.PF_Q20_EQUIVALENT_YIELD += quals[i];
if (quals[i] >= 20) metrics.PF_Q20_BASES++;
if (quals[i] >= 30) metrics.PF_Q30_BASES++;
}
}
progress.record(rec);
}
metrics.READ_LENGTH = metrics.TOTAL_READS == 0 ? 0 : (int) (metrics.TOTAL_BASES / metrics.TOTAL_READS);
metrics.Q20_EQUIVALENT_YIELD = metrics.Q20_EQUIVALENT_YIELD / 20 ;
metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20 ;
metricsFile.addMetric(metrics);
metricsFile.write(OUTPUT);
return 0;
}
/** A set of metrics used to describe the general quality of a BAM file */
public static class QualityYieldMetrics extends MetricBase {
/** The total number of reads in the input file */
public int TOTAL_READS = 0;
/** The number of reads that are PF - pass filter */
public int PF_READS = 0;
/** The average read length of all the reads (will be fixed for a lane) */
public int READ_LENGTH = 0;
/** The total number of bases in all reads */
public long TOTAL_BASES;
/** The total number of bases in all PF reads */
public long PF_BASES = 0;
/** The number of bases in all reads that achieve quality score 20 or higher */
public long Q20_BASES = 0;
/** The number of bases in PF reads that achieve quality score 20 or higher */
public long PF_Q20_BASES = 0;
/** The number of bases in all reads that achieve quality score 20 or higher */
public long Q30_BASES = 0;
/** The number of bases in PF reads that achieve quality score 20 or higher */
public long PF_Q30_BASES = 0;
/** The sum of quality scores of all bases divided by 20 */
public long Q20_EQUIVALENT_YIELD = 0;
/** The sum of quality scores of all bases divided by 20 */
public long PF_Q20_EQUIVALENT_YIELD = 0;
}
}