public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) {
// special case removal of all or no reads
if ( downsamplingFraction <= 0.0 )
return pileup;
if ( downsamplingFraction >= 1.0 )
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
final PileupElementList[] alleleStratifiedElements = new PileupElementList[4];
for ( int i = 0; i < 4; i++ )
alleleStratifiedElements[i] = new PileupElementList();
// start by stratifying the reads by the alleles they represent at this position
for ( final PileupElement pe : pileup ) {
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
if ( baseIndex != -1 )
alleleStratifiedElements[baseIndex].add(pe);
}
// make a listing of allele counts and calculate the total count
final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements);
final int totalAlleleCount = (int)MathUtils.sum(alleleCounts);
// do smart down-sampling
final int numReadsToRemove = (int)(totalAlleleCount * downsamplingFraction); // floor
final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);
final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove);
for ( int i = 0; i < 4; i++ ) {
final PileupElementList alleleList = alleleStratifiedElements[i];
// if we don't need to remove any reads, then don't
if ( alleleCounts[i] > targetAlleleCounts[i] )
readsToRemove.addAll(downsampleElements(alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i]));
}
// we need to keep the reads sorted because the FragmentUtils code will expect them in coordinate order and will fail otherwise
final List<PileupElement> readsToKeep = new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove);
for ( final PileupElement pe : pileup ) {
if ( !readsToRemove.contains(pe) ) {
readsToKeep.add(pe);
}
}
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep));
}