IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (INTERVALS != null) IOUtil.assertFileIsReadable(INTERVALS);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final SAMFileReader in = new SAMFileReader(INPUT);
final Set<String> samples = new HashSet<String>();
final Set<String> libraries = new HashSet<String>();
for (final SAMReadGroupRecord rec : in.getFileHeader().getReadGroups()) {
samples.add(nvl(rec.getSample(), UNKNOWN_SAMPLE));
libraries.add(nvl(rec.getLibrary(), UNKNOWN_LIBRARY));
}
// Setup the calculators
final Set<String> contexts = CONTEXTS.isEmpty() ? makeContextStrings(CONTEXT_SIZE) : CONTEXTS;
final ListMap<String, Calculator> calculators = new ListMap<String, Calculator>();
for (final String context : contexts) {
for (final String library : libraries) {
calculators.add(context, new Calculator(library, context));
}
}
// Load up dbSNP if available
log.info("Loading dbSNP File: " + DB_SNP);
final DbSnpBitSetUtil dbSnp;
if (DB_SNP != null) dbSnp = new DbSnpBitSetUtil(DB_SNP, in.getFileHeader().getSequenceDictionary());
else dbSnp = null;
// Make an iterator that will filter out funny looking things
final SamLocusIterator iterator;
if (INTERVALS != null) {
final IntervalList intervals = IntervalList.fromFile(INTERVALS);
intervals.unique();
iterator = new SamLocusIterator(in, intervals, false);
}
else {
iterator = new SamLocusIterator(in);
}
iterator.setEmitUncoveredLoci(false);
iterator.setMappingQualityScoreCutoff(MINIMUM_MAPPING_QUALITY);
iterator.setSamFilters(Arrays.asList(
new NotPrimaryAlignmentFilter(),
new DuplicateReadFilter(),
new InsertSizeFilter(MINIMUM_INSERT_SIZE, MAXIMUM_INSERT_SIZE)
));
log.info("Starting iteration.");
long nextLogTime = 0;
int sites = 0;
for (final SamLocusIterator.LocusInfo info : iterator) {
// Skip dbSNP sites
final String chrom = info.getSequenceName();
final int pos = info.getPosition();
final int index = pos-1;
if (dbSnp != null && dbSnp.isDbSnpSite(chrom, pos)) continue;
// Skip sites at the end of chromosomes
final byte[] bases = refWalker.get(info.getSequenceIndex()).getBases();
if (pos < 3 || pos > bases.length-3) continue;
// Skip non C-G bases
final byte base = StringUtil.toUpperCase(bases[index]);
if (base != 'C' && base != 'G') continue;