public List<StatsKeeper.Section> getInputDataSections() {
return inputDataKeeper.getSections();
}
private void prepareSummaryStatsKeeper() {
StringUtilsSwing sdf = new StringUtilsSwing();
String postfix = getNamePostfix();
if (warnings != null && !warnings.isEmpty()) {
StatsKeeper.Section warningsSection = new StatsKeeper.Section("Warnings");
for (Map.Entry<String,String> entry : warnings.entrySet()) {
warningsSection.addRow(entry.getKey(), entry.getValue());
}
summaryStatsKeeper.addSection(warningsSection);
}
StatsKeeper.Section globals = new StatsKeeper.Section("Globals");
globals.addRow("Reference size", sdf.formatLong(referenceSize));
globals.addRow("Number of reads", sdf.formatLong(numReads));
globals.addRow("Mapped reads", sdf.formatLong(numMappedReads)
+ " / " + sdf.formatPercentage(getPercentMappedReads()));
globals.addRow("Unmapped reads",
sdf.formatLong(numReads - numMappedReads) + " / "
+ sdf.formatPercentage(100.0 - getPercentMappedReads()));
globals.addRow("Paired reads",
sdf.formatLong(numPairedReads) + " / "
+ sdf.formatPercentage(percantagePairedReads) );
if (numPairedReads > 0) {
globals.addRow("Mapped reads, only first in pair",
sdf.formatLong(numberOfMappedFirstOfPair) + " / " +
sdf.formatPercentage(percentageOfMappedFirstOfPair));
globals.addRow("Mapped reads, only second in pair",
sdf.formatLong(numberOfMappedSecondOfPair) + " / " +
sdf.formatPercentage(percentageOfMappedSecondOfPair));
globals.addRow("Mapped reads, both in pair",
sdf.formatLong(numPairedReads - numSingletons) + " / "
+ sdf.formatPercentage((getPercentageBothMatesPaired())));
globals.addRow("Mapped reads, singletons",
sdf.formatLong(numSingletons) + " / "
+ sdf.formatPercentage(getPercentSingletons()));
}
globals.addRow("Read min/max/mean length",
sdf.formatLong(readMinSize) + " / "
+ sdf.formatLong(readMaxSize) + " / "
+ sdf.formatDecimal(readMeanSize));
if (numSelectedRegions == 0) {
globals.addRow("Clipped reads",
sdf.formatInteger(numClippedReads) + " / " +
sdf.formatPercentage(getPercentageClippedReads()));
globals.addRow("Duplication rate", sdf.formatPercentage(duplicationRate));
}
summaryStatsKeeper.addSection(globals);
if (numSelectedRegions > 0) {
StatsKeeper.Section globalsInRegions = new StatsKeeper.Section("Globals" + postfix);
globalsInRegions.addRow("Regions size/percentage of reference",
sdf.formatLong((numBasesInsideRegions))
+ " / " + sdf.formatPercentage(getSelectedRegionsPercentage()));
globalsInRegions.addRow("Mapped reads",
sdf.formatLong(numMappedReadsInRegions)
+ " / " + sdf.formatPercentage(percentageMappedReadsInRegions));
if (numPairedReads > 0) {
globalsInRegions.addRow("Mapped reads, only first in pair",
sdf.formatLong(numMappedFirstOfPairInRegions) + " / " +
sdf.formatPercentage(percentageOfMappedFirstOfPairInRegions));
globalsInRegions.addRow("Mapped reads, only second in pair",
sdf.formatLong(numMappedSecondOfPairInRegions) + " / " +
sdf.formatPercentage(percentageOfMappedSecondOfPairInRegions));
globalsInRegions.addRow("Mapped reads, both in pair",
sdf.formatLong(numPairedReadsInRegions - numSingletonsInRegions) + " / "
+ sdf.formatPercentage((getPercentageBothMatesPairedInRegions())));
globalsInRegions.addRow("Mapped reads, singletons",
sdf.formatLong(numSingletonsInRegions) + " / "
+ sdf.formatPercentage(percentageSingletonsInRegions));
globalsInRegions.addRow("Correct strand reads",
sdf.formatLong(numCorrectStrandReads) + " / " +
sdf.formatPercentage(percentageCorrectStrandReads) );
globalsInRegions.addRow("Clipped reads",
sdf.formatInteger(numClippedReads) + " / " +
sdf.formatPercentage(getPercentageClippedReads()));
globalsInRegions.addRow("Duplication rate", sdf.formatPercentage(duplicationRate));
}
summaryStatsKeeper.addSection(globalsInRegions);
}
StatsKeeper.Section acgtContent = new StatsKeeper.Section("ACGT Content" + postfix);
acgtContent.addRow("Number/percentage of A's", sdf.formatLong(getaNumber()) +
" / " + sdf.formatPercentage(getaPercent()));
acgtContent.addRow("Number/percentage of C's",sdf.formatLong(getcNumber()) +
" / " + sdf.formatPercentage(getcPercent()));
acgtContent.addRow("Number/percentage of T's", sdf.formatLong(gettNumber()) +
" / " + sdf.formatPercentage(gettPercent()));
acgtContent.addRow("Number/percentage of G's",sdf.formatLong(getgNumber()) +
" / " + sdf.formatPercentage(getgPercent()));
acgtContent.addRow("Number/percentage of N's",sdf.formatLong(getnNumber()) +
" / " + sdf.formatPercentage(getnPercent()));
acgtContent.addRow("GC Percentage", sdf.formatPercentage(getGcPercent()));
summaryStatsKeeper.addSection(acgtContent);
StatsKeeper.Section coverageSection = new StatsKeeper.Section("Coverage" + postfix);
coverageSection.addRow("Mean", sdf.formatDecimal(meanCoverage));
coverageSection.addRow("Standard Deviation",sdf.formatDecimal(stdCoverage) );
summaryStatsKeeper.addSection(coverageSection);
StatsKeeper.Section mappingQualitySection = new StatsKeeper.Section("Mapping Quality" + postfix);
mappingQualitySection.addRow("Mean Mapping Quality", sdf.formatDecimal(meanMappingQuality));
summaryStatsKeeper.addSection(mappingQualitySection);
if (meanInsertSize != 0)
{
StatsKeeper.Section insertSizeSection = new StatsKeeper.Section("Insert size" + postfix);
insertSizeSection.addRow("Mean", sdf.formatDecimal(meanInsertSize));
insertSizeSection.addRow("Standard Deviation", sdf.formatDecimal(stdInsertSize));
insertSizeSection.addRow("P25/Median/P75", sdf.formatDecimal(p25InsertSize) + " / " +
sdf.formatDecimal(medianInsertSize) + " / " + sdf.formatDecimal(p75InsertSize));
summaryStatsKeeper.addSection(insertSizeSection);
}
int numIndels = numInsertions + numDeletions;
if ( numIndels > 0 || numMismatches > 0 || alignmentErrorRate > 0) {
StatsKeeper.Section indelsSection = new StatsKeeper.Section("Mismatches and indels" + postfix);
if (alignmentErrorRate > 0) {
indelsSection.addRow("General error rate", sdf.formatPercentage(alignmentErrorRate * 100.0));
}
if (numMismatches > 0) {
indelsSection.addRow("Mismatches",sdf.formatDecimal(numMismatches));
}
//indelsSection.addRow("Total reads with indels", sdf.formatInteger(numIndels));
if (numIndels > 0) {
indelsSection.addRow("Insertions",sdf.formatDecimal(numInsertions) );
indelsSection.addRow("Deletions",sdf.formatDecimal(numDeletions) );
indelsSection.addRow("Homopolymer indels",sdf.formatPercentage(homopolymerIndelFraction * 100.0) );
}
summaryStatsKeeper.addSection(indelsSection);
}