package picard.analysis.directed;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamPairUtil;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.Histogram;
import picard.analysis.InsertSizeMetrics;
import picard.analysis.MetricAccumulationLevel;
import picard.metrics.MultiLevelCollector;
import picard.metrics.PerUnitMetricCollector;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Collects InserSizeMetrics on the specified accumulationLevels using
*/
public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMetrics, Integer, InsertSizeCollectorArgs> {
// When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this
// percentage of overall reads. (Range: 0 to 1)
private final double minimumPct;
// Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION.
// This is done because insert size data typically includes enough anomolous values from chimeras and other
// artifacts to make the mean and sd grossly misleading regarding the real distribution.
private final double deviations;
//Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail.
//Also, when calculating mean and stdev, only bins <= Histogram_WIDTH will be included.
private Integer HistogramWidth;
public InsertSizeMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords,
final double minimumPct, final Integer HistogramWidth, final double deviations) {
this.minimumPct = minimumPct;
this.HistogramWidth = HistogramWidth;
this.deviations = deviations;
setup(accumulationLevels, samRgRecords);
}
// We will pass insertSize and PairOrientation with the DefaultPerRecordCollectorArgs passed to the record collectors
// This method is called once Per samRecord
@Override
protected InsertSizeCollectorArgs makeArg(SAMRecord samRecord, ReferenceSequence refSeq) {
final int insertSize = Math.abs(samRecord.getInferredInsertSize());
final SamPairUtil.PairOrientation orientation = SamPairUtil.getPairOrientation(samRecord);
return new InsertSizeCollectorArgs(insertSize, orientation);
}
/** Make an InsertSizeCollector with the given arguments */
@Override
protected PerUnitMetricCollector<InsertSizeMetrics, Integer, InsertSizeCollectorArgs> makeChildCollector(final String sample, final String library, final String readGroup) {
return new PerUnitInsertSizeMetricsCollector(sample, library, readGroup);
}
@Override
public void acceptRecord(final SAMRecord record, final ReferenceSequence refSeq) {
if (!record.getReadPairedFlag() ||
record.getReadUnmappedFlag() ||
record.getMateUnmappedFlag() ||
record.getFirstOfPairFlag() ||
record.isSecondaryOrSupplementary() ||
record.getDuplicateReadFlag() ||
record.getInferredInsertSize() == 0) {
return;
}
super.acceptRecord(record, refSeq);
}
/** A Collector for individual InsertSizeMetrics for a given SAMPLE or SAMPLE/LIBRARY or SAMPLE/LIBRARY/READ_GROUP (depending on aggregation levels) */
public class PerUnitInsertSizeMetricsCollector implements PerUnitMetricCollector<InsertSizeMetrics, Integer, InsertSizeCollectorArgs> {
final EnumMap<SamPairUtil.PairOrientation, Histogram<Integer>> Histograms = new EnumMap<SamPairUtil.PairOrientation, Histogram<Integer>>(SamPairUtil.PairOrientation.class);
final String sample;
final String library;
final String readGroup;
private double totalInserts = 0;
public PerUnitInsertSizeMetricsCollector(final String sample, final String library, final String readGroup) {
this.sample = sample;
this.library = library;
this.readGroup = readGroup;
String prefix = null;
if (this.readGroup != null) {
prefix = this.readGroup + ".";
}
else if (this.library != null) {
prefix = this.library + ".";
}
else if (this.sample != null) {
prefix = this.sample + ".";
}
else {
prefix = "All_Reads.";
}
Histograms.put(SamPairUtil.PairOrientation.FR, new Histogram<Integer>("insert_size", prefix + "fr_count"));
Histograms.put(SamPairUtil.PairOrientation.TANDEM, new Histogram<Integer>("insert_size", prefix + "tandem_count"));
Histograms.put(SamPairUtil.PairOrientation.RF, new Histogram<Integer>("insert_size", prefix + "rf_count"));
}
public void acceptRecord(final InsertSizeCollectorArgs args) {
Histograms.get(args.getPairOrientation()).increment(args.getInsertSize());
}
public void finish() { }
public double getTotalInserts() {
return totalInserts;
}
public void addMetricsToFile(final MetricsFile<InsertSizeMetrics,Integer> file) {
for (final Histogram<Integer> h : this.Histograms.values()) totalInserts += h.getCount();
for(final Map.Entry<SamPairUtil.PairOrientation, Histogram<Integer>> entry : Histograms.entrySet()) {
final SamPairUtil.PairOrientation pairOrientation = entry.getKey();
final Histogram<Integer> Histogram = entry.getValue();
final double total = Histogram.getCount();
// Only include a category if it has a sufficient percentage of the data in it
if( total > totalInserts * minimumPct ) {
final InsertSizeMetrics metrics = new InsertSizeMetrics();
metrics.SAMPLE = this.sample;
metrics.LIBRARY = this.library;
metrics.READ_GROUP = this.readGroup;
metrics.PAIR_ORIENTATION = pairOrientation;
metrics.READ_PAIRS = (long) total;
metrics.MAX_INSERT_SIZE = (int) Histogram.getMax();
metrics.MIN_INSERT_SIZE = (int) Histogram.getMin();
metrics.MEDIAN_INSERT_SIZE = Histogram.getMedian();
metrics.MEDIAN_ABSOLUTE_DEVIATION = Histogram.getMedianAbsoluteDeviation();
final double median = Histogram.getMedian();
double covered = 0;
double low = median;
double high = median;
while (low >= Histogram.getMin() || high <= Histogram.getMax()) {
final Histogram<Integer>.Bin lowBin = Histogram.get((int) low);
if (lowBin != null) covered += lowBin.getValue();
if (low != high) {
final Histogram<Integer>.Bin highBin = Histogram.get((int) high);
if (highBin != null) covered += highBin.getValue();
}
final double percentCovered = covered / total;
final int distance = (int) (high - low) + 1;
if (percentCovered >= 0.1 && metrics.WIDTH_OF_10_PERCENT == 0) metrics.WIDTH_OF_10_PERCENT = distance;
if (percentCovered >= 0.2 && metrics.WIDTH_OF_20_PERCENT == 0) metrics.WIDTH_OF_20_PERCENT = distance;
if (percentCovered >= 0.3 && metrics.WIDTH_OF_30_PERCENT == 0) metrics.WIDTH_OF_30_PERCENT = distance;
if (percentCovered >= 0.4 && metrics.WIDTH_OF_40_PERCENT == 0) metrics.WIDTH_OF_40_PERCENT = distance;
if (percentCovered >= 0.5 && metrics.WIDTH_OF_50_PERCENT == 0) metrics.WIDTH_OF_50_PERCENT = distance;
if (percentCovered >= 0.6 && metrics.WIDTH_OF_60_PERCENT == 0) metrics.WIDTH_OF_60_PERCENT = distance;
if (percentCovered >= 0.7 && metrics.WIDTH_OF_70_PERCENT == 0) metrics.WIDTH_OF_70_PERCENT = distance;
if (percentCovered >= 0.8 && metrics.WIDTH_OF_80_PERCENT == 0) metrics.WIDTH_OF_80_PERCENT = distance;
if (percentCovered >= 0.9 && metrics.WIDTH_OF_90_PERCENT == 0) metrics.WIDTH_OF_90_PERCENT = distance;
if (percentCovered >= 0.99 && metrics.WIDTH_OF_99_PERCENT == 0) metrics.WIDTH_OF_99_PERCENT = distance;
--low;
++high;
}
// Trim the Histogram down to get rid of outliers that would make the chart useless.
final Histogram<Integer> trimmedHisto = Histogram; //alias it
if (HistogramWidth == null) {
HistogramWidth = (int) (metrics.MEDIAN_INSERT_SIZE + (deviations * metrics.MEDIAN_ABSOLUTE_DEVIATION));
}
trimmedHisto.trimByWidth(HistogramWidth);
metrics.MEAN_INSERT_SIZE = trimmedHisto.getMean();
metrics.STANDARD_DEVIATION = trimmedHisto.getStandardDeviation();
file.addHistogram(trimmedHisto);
file.addMetric(metrics);
}
}
}
}
}
// Arguments that need to be calculated once per SAMRecord that are then passed to each PerUnitMetricCollector
// for the given record
class InsertSizeCollectorArgs {
private final int insertSize;
private final SamPairUtil.PairOrientation po;
public int getInsertSize() {
return insertSize;
}
public SamPairUtil.PairOrientation getPairOrientation() {
return po;
}
public InsertSizeCollectorArgs(final int insertSize, final SamPairUtil.PairOrientation po) {
this.insertSize = insertSize;
this.po = po;
}
}