Package picard.analysis

Source Code of picard.analysis.CollectRrbsMetrics

/*
* The MIT License
*
* Copyright (c) 2013 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

package picard.analysis;

import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.programgroups.Metrics;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import picard.util.RExecutor;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.CollectionUtil;

import java.io.File;
import java.lang.Comparable;import java.lang.Override;import java.lang.String;import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
*  Calculates and reports QC metrics for RRBS data based on the methylation status at individual C/G bases as well
*  as CpG sites across all reads in the input BAM/SAM file.
*
@author jgentry@broadinstitute.org
*/
@CommandLineProgramProperties(
        usage = CollectRrbsMetrics.USAGE,
        usageShort = CollectRrbsMetrics.USAGE,
        programGroup = Metrics.class
)
public class CollectRrbsMetrics extends CommandLineProgram {
    final static String USAGE = "Collects metrics about bisulfite conversion for RRBS data";
   
  // Path to R file for plotting purposes
  private static final String R_SCRIPT = "picard/analysis/rrbsQc.R";

  @Option(doc="The BAM or SAM file containing aligned reads. Must be coordinate sorted", shortName= StandardOptionDefinitions.INPUT_SHORT_NAME)
  public File INPUT;
  @Option(doc="Base name for output files", shortName=StandardOptionDefinitions.METRICS_FILE_SHORT_NAME)
  public String METRICS_FILE_PREFIX;
  @Option(doc="The reference sequence fasta file", shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME)
  public File REFERENCE;
  @Option(doc="Minimum read length")
  public int MINIMUM_READ_LENGTH = 5;
  @Option(doc="Threshold for base quality of a C base before it is considered")
  public int C_QUALITY_THRESHOLD = 20;
  @Option(doc="Threshold for quality of a base next to a C before the C base is considered")
  public int NEXT_BASE_QUALITY_THRESHOLD = 10;
  @Option(doc="Maximum percentage of mismatches in a read for it to be considered, with a range of 0-1")
  public double MAX_MISMATCH_RATE = 0.1;
  @Option(doc="Set of sequence names to consider, if not specified all sequences will be used", optional = true)
  public Set<String> SEQUENCE_NAMES = new HashSet<String>();
  @Option(shortName=StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME,
      doc="If true, assume that the input file is coordinate sorted even if the header says otherwise.")
  public boolean ASSUME_SORTED = false;
  @Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics.  ")
  public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);

  public static final String DETAIL_FILE_EXTENSION = "rrbs_detail_metrics";
  public static final String SUMMARY_FILE_EXTENSION = "rrbs_summary_metrics";
  public static final String PDF_FILE_EXTENSION = "rrbs_qc.pdf";

  private static final Log log = Log.getInstance(CollectRrbsMetrics.class);

  public static void main(final String[] args) {
    new CollectRrbsMetrics().instanceMainWithExit(args);
  }

  @Override
  protected int doWork() {
    if (!METRICS_FILE_PREFIX.endsWith(".")) {
      METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
    }
    final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
    final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
    final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
    assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);

    final SAMFileReader samReader = new SAMFileReader(INPUT);
    if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
      throw new PicardException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
    }

    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE);
    final ProgressLogger progressLogger = new ProgressLogger(log);

    final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(),
        C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);

    for (final SAMRecord samRecord : samReader) {
      progressLogger.record(samRecord);
      if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
        final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
        metricsCollector.acceptRecord(samRecord, referenceSequence);
      }
    }
    metricsCollector.finish();
    final MetricsFile<RrbsMetrics, Comparable<?>> rrbsMetrics = getMetricsFile();
    metricsCollector.addAllLevelsToFile(rrbsMetrics);

    // Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
    // we get it out split it apart to the two separate MetricsFiles and write them to file
    final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
    final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
    for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
      summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
      for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
        detailsFile.addMetric(detailMetric);
      }
    }
    summaryFile.write(SUMMARY_OUT);
    detailsFile.write(DETAILS_OUT);
    RExecutor.executeFromClasspath(R_SCRIPT, DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
    return 0;
  }

  private boolean isSequenceFiltered(final String sequenceName) {
    return (SEQUENCE_NAMES != null) && (SEQUENCE_NAMES.size() > 0) && (!SEQUENCE_NAMES.contains(sequenceName));
  }

  private void assertIoFiles(final File summaryFile, final File detailsFile, final File plotsFile) {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(REFERENCE);
    IOUtil.assertFileIsWritable(summaryFile);
    IOUtil.assertFileIsWritable(detailsFile);
    IOUtil.assertFileIsWritable(plotsFile);
  }

  @Override
  protected String[] customCommandLineValidation() {
    final List<String> errorMsgs = new ArrayList<String>();
    if (MAX_MISMATCH_RATE < 0 || MAX_MISMATCH_RATE > 1) {
      errorMsgs.add("MAX_MISMATCH_RATE must be in the range of 0-1");
    }

    if (C_QUALITY_THRESHOLD < 0) {
      errorMsgs.add("C_QUALITY_THRESHOLD must be >= 0");
    }

    if (NEXT_BASE_QUALITY_THRESHOLD < 0) {
      errorMsgs.add("NEXT_BASE_QUALITY_THRESHOLD must be >= 0");
    }

    if (MINIMUM_READ_LENGTH <= 0) {
      errorMsgs.add("MINIMUM_READ_LENGTH must be > 0");
    }

    return errorMsgs.size() == 0 ? null : errorMsgs.toArray(new String[errorMsgs.size()]);
  }
}
TOP

Related Classes of picard.analysis.CollectRrbsMetrics

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.