package picard.analysis.directed;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import picard.analysis.MetricAccumulationLevel;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.metrics.MultilevelMetrics;
import java.io.File;
import java.util.List;
import java.util.Set;
/**
* Both CollectTargetedPCRMetrics and CalculateHybridSelection metrics share virtually identical program structures except
* for the name of their targeting mechanisms (e.g. bait set or amplicon set). The shared behavior of these programs
* is encapsulated in CollectTargetedMetrics which is then subclassed by CalculateHsMetrics and CollectTargetedPcrMetrics.
* <p/>
* This program verifies the input parameters to TargetMetricsCollector and converts all files to
* the format desired by TargetMetricsCollector. Then it instantiates a TargetMetricsCollector and
* collects metric information for all reads in the INPUT sam file.
*/
public abstract class CollectTargetedMetrics<METRIC extends MultilevelMetrics, COLLECTOR extends TargetMetricsCollector<METRIC>> extends CommandLineProgram {
private static final Log log = Log.getInstance(CalculateHsMetrics.class);
protected abstract IntervalList getProbeIntervals();
protected abstract String getProbeSetName();
/**
* A factory method for the TargetMetricsCollector to use this time. Examples of TargetMetricsCollector:
* (TargetedPcrMetricsCollector, HsMetricsCalculator)
*
* @return A TargetMetricsCollector to which we will pass SAMRecords
*/
protected abstract COLLECTOR makeCollector(final Set<MetricAccumulationLevel> accumulationLevels,
final List<SAMReadGroupRecord> samRgRecords,
final ReferenceSequenceFile refFile,
final File perTargetCoverage,
final IntervalList targetIntervals,
final IntervalList probeIntervals,
final String probeSetName);
@Option(shortName = "TI", doc = "An interval list file that contains the locations of the targets.", minElements=1)
public List<File> TARGET_INTERVALS;
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "An aligned SAM or BAM file.")
public File INPUT;
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output file to write the metrics to.")
public File OUTPUT;
@Option(shortName = "LEVEL", doc = "The level(s) at which to accumulate metrics.")
public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
@Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, optional = true, doc = "The reference sequence aligned to.")
public File REFERENCE_SEQUENCE;
@Option(optional = true, doc = "An optional file to output per target coverage information to.")
public File PER_TARGET_COVERAGE;
/**
* Asserts that files are readable and writable and then fires off an
* HsMetricsCalculator instance to do the real work.
*/
protected int doWork() {
for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (PER_TARGET_COVERAGE != null) IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
final SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
getProbeIntervals().getHeader().getSequenceDictionary()
);
ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(),
INPUT, REFERENCE_SEQUENCE
);
}
final COLLECTOR collector = makeCollector(
METRIC_ACCUMULATION_LEVEL,
reader.getFileHeader().getReadGroups(),
ref,
PER_TARGET_COVERAGE,
targetIntervals,
getProbeIntervals(),
getProbeSetName()
);
final ProgressLogger progress = new ProgressLogger(log);
for (final SAMRecord record : reader) {
collector.acceptRecord(record, null);
progress.record(record);
}
// Write the output file
final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
collector.finish();
collector.addAllLevelsToFile(metrics);
metrics.write(OUTPUT);
return 0;
}
/** Renders a probe name from the provided file, returning {@link java.io.File#getName()} with all extensions stripped. */
static String renderProbeNameFromFile(final File probeIntervalFile) {
final String name = probeIntervalFile.getName();
final int firstPeriodIndex = name.indexOf('.');
if (firstPeriodIndex == -1) {
return name;
} else {
return name.substring(0, firstPeriodIndex);
}
}
protected String[] customCommandLineValidation() {
if (PER_TARGET_COVERAGE != null && (METRIC_ACCUMULATION_LEVEL.size() != 1 ||
METRIC_ACCUMULATION_LEVEL.iterator().next() != MetricAccumulationLevel.ALL_READS)) {
return new String[]{"PER_TARGET_COVERAGE can be specified only when METRIC_ACCUMULATION_LEVEL is set " +
"to ALL_READS."};
}
if (PER_TARGET_COVERAGE != null && REFERENCE_SEQUENCE == null) {
return new String[]{"Must supply REFERENCE_SEQUENCE when supplying PER_TARGET_COVERAGE"};
}
return super.customCommandLineValidation();
}
}