final int[] windowsByGc = new int[101];
final int[] readsByGc = new int[101];
final long[] basesByGc = new long[101];
final long[] errorsByGc = new long[101];
final SAMFileReader sam = new SAMFileReader(INPUT);
if (!ASSUME_SORTED && sam.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new PicardException("Header of input file " + INPUT.getAbsolutePath() + " indicates that it is not coordinate sorted. " +
"If you believe the records are in coordinate order, pass option ASSUME_SORTED=true. If not, sort the file with SortSam.");
}
final PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(sam.iterator());
final ReferenceSequenceFile referenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
{
// Check that the sequence dictionaries match if present
final SAMSequenceDictionary referenceDictionary= referenceFile.getSequenceDictionary();
final SAMSequenceDictionary samFileDictionary = sam.getFileHeader().getSequenceDictionary();
if (referenceDictionary != null && samFileDictionary != null) {
SequenceUtil.assertSequenceDictionariesEqual(referenceDictionary, samFileDictionary);
}
}
////////////////////////////////////////////////////////////////////////////
// Loop over the reference and the reads and calculate the basic metrics
////////////////////////////////////////////////////////////////////////////
ReferenceSequence ref = null;
final ProgressLogger
progress = new ProgressLogger(log);
while ((ref = referenceFile.nextSequence()) != null) {
final byte[] refBases = ref.getBases();
StringUtil.toUpperCase(refBases);
final int refLength = refBases.length;
final int lastWindowStart = refLength - WINDOW_SIZE;
final byte[] gc = calculateAllGcs(refBases, windowsByGc, lastWindowStart);
while (iterator.hasNext() && iterator.peek().getReferenceIndex() == ref.getContigIndex()) {
final SAMRecord rec = iterator.next();
if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++this.totalClusters;
if (!rec.getReadUnmappedFlag()) {
final int pos = rec.getReadNegativeStrandFlag() ? rec.getAlignmentEnd() - WINDOW_SIZE : rec.getAlignmentStart();
++this.totalAlignedReads;
if (pos > 0) {
final int windowGc = gc[pos];
if (windowGc >= 0) {
++readsByGc[windowGc];
basesByGc[windowGc] += rec.getReadLength();
errorsByGc[windowGc] +=
SequenceUtil.countMismatches(rec, refBases, IS_BISULFITE_SEQUENCED) +
SequenceUtil.countInsertedBases(rec) + SequenceUtil.countDeletedBases(rec);
}
}
}
progress.record(rec);
}
log.info("Processed reference sequence: " + ref.getName());
}
// Finish up the reads, presumably all unaligned
while (iterator.hasNext()) {
final SAMRecord rec = iterator.next();
if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++this.totalClusters;
}
/////////////////////////////////////////////////////////////////////////////
// Synthesize the normalized coverage metrics and write it all out to a file
/////////////////////////////////////////////////////////////////////////////
final MetricsFile<GcBiasDetailMetrics,?> metricsFile = getMetricsFile();
final double totalWindows = sum(windowsByGc);
final double totalReads = sum(readsByGc);
final double meanReadsPerWindow = totalReads / totalWindows;
final double minimumWindowsToCountInSummary = totalWindows * this.MINIMUM_GENOME_FRACTION;
for (int i=0; i<windowsByGc.length; ++i) {
if (windowsByGc[i] == 0) continue;
GcBiasDetailMetrics m = new GcBiasDetailMetrics();
m.GC = i;
m.WINDOWS = windowsByGc[i];
m.READ_STARTS = readsByGc[i];
if (errorsByGc[i] > 0) m.MEAN_BASE_QUALITY = QualityUtil.getPhredScoreFromObsAndErrors(basesByGc[i], errorsByGc[i]);
m.NORMALIZED_COVERAGE = (m.READ_STARTS / (double) m.WINDOWS) / meanReadsPerWindow;
m.ERROR_BAR_WIDTH = (Math.sqrt(m.READ_STARTS) / (double) m.WINDOWS) / meanReadsPerWindow;
metricsFile.addMetric(m);
}
metricsFile.write(OUTPUT);
// Synthesize the high level metrics
if (SUMMARY_OUTPUT != null) {
final MetricsFile<GcBiasSummaryMetrics,?> summaryMetricsFile = getMetricsFile();
final GcBiasSummaryMetrics summary = new GcBiasSummaryMetrics();
summary.WINDOW_SIZE = this.WINDOW_SIZE;
summary.TOTAL_CLUSTERS = this.totalClusters;
summary.ALIGNED_READS = this.totalAlignedReads;
calculateDropoutMetrics(metricsFile.getMetrics(), summary);
summaryMetricsFile.addMetric(summary);
summaryMetricsFile.write(SUMMARY_OUTPUT);
}
// Plot the results
final NumberFormat fmt = NumberFormat.getIntegerInstance();
fmt.setGroupingUsed(true);
final String subtitle = "Total clusters: " + fmt.format(this.totalClusters) +
", Aligned reads: " + fmt.format(this.totalAlignedReads);
String title = INPUT.getName().replace(".duplicates_marked", "").replace(".aligned.bam", "");
// Qualify the title with the library name iff it's for a single sample
final List<SAMReadGroupRecord> readGroups = sam.getFileHeader().getReadGroups();
if (readGroups.size() == 1) {
title += "." + readGroups.get(0).getLibrary();
}
RExecutor.executeFromClasspath(R_SCRIPT,
OUTPUT.getAbsolutePath(),