while ( getNReadsInQueue() > 1 ) {
// emit to disk
writeRead(remove(waitingReads));
}
SAMRecord lastRead = remove(waitingReads);
lastLocFlushed = (lastRead.getReferenceIndex() == -1) ? null : genomeLocParser.createGenomeLoc(lastRead);
writeRead(lastRead);
if ( !tooManyReads )
forMateMatching.clear();
else
purgeUnmodifiedMates();
}
// fix mates, as needed
// Since setMateInfo can move reads, we potentially need to remove the mate, and requeue
// it to ensure proper sorting
if ( newRead.getReadPairedFlag() && !newRead.getNotPrimaryAlignmentFlag() ) {
SAMRecordHashObject mate = forMateMatching.get(newRead.getReadName());
if ( mate != null ) {
// 1. Frustratingly, Picard's setMateInfo() method unaligns (by setting the reference contig
// to '*') read pairs when both of their flags have the unmapped bit set. This is problematic
// when trying to emit reads in coordinate order because all of a sudden we have reads in the
// middle of the bam file that now belong at the end - and any mapped reads that get emitted
// after them trigger an exception in the writer. For our purposes, because we shouldn't be
// moving read pairs when they are both unmapped anyways, we'll just not run fix mates on them.
// 2. Furthermore, when reads get mapped to the junction of two chromosomes (e.g. MT since it
// is actually circular DNA), their unmapped bit is set, but they are given legitimate coordinates.
// The Picard code will come in and move the read all the way back to its mate (which can be
// arbitrarily far away). However, we do still want to move legitimately unmapped reads whose
// mates are mapped, so the compromise will be that if the mate is still in the queue then we'll
// move the read and otherwise we won't.
boolean doNotFixMates = newRead.getReadUnmappedFlag() && (mate.record.getReadUnmappedFlag() || !waitingReads.contains(mate.record));
if ( !doNotFixMates ) {
boolean reQueueMate = mate.record.getReadUnmappedFlag() && ! newRead.getReadUnmappedFlag();
if ( reQueueMate ) {
// the mate was unmapped, but newRead was mapped, so the mate may have been moved
// to be next-to newRead, so needs to be reinserted into the waitingReads queue
// note -- this must be called before the setMateInfo call below
if ( ! waitingReads.remove(mate.record) )
// we must have hit a region with too much depth and flushed the queue
reQueueMate = false;
}
// we've already seen our mate -- set the mate info and remove it from the map
// Via Nils Homer:
// There will be two SamPairUtil.setMateInfo functions. The default will not update the mate
// cigar tag; in fact, it will remove it if it is present. An alternative SamPairUtil.setMateInfo
// function takes a boolean as an argument ("addMateCigar") and will add/update the mate cigar if
// set to true. This is the one you want to use.
SamPairUtil.setMateInfo(mate.record, newRead, null, true);
if ( reQueueMate ) waitingReads.add(mate.record);
}
forMateMatching.remove(newRead.getReadName());
} else if ( pairedReadIsMovable(newRead) ) {
forMateMatching.put(newRead.getReadName(), new SAMRecordHashObject(newRead, readWasModified));
}
}
waitingReads.add(newRead);
if ( ++counter % EMIT_FREQUENCY == 0 ) {
while ( ! waitingReads.isEmpty() ) { // there's something in the queue
SAMRecord read = waitingReads.peek();
if ( noReadCanMoveBefore(read.getAlignmentStart(), newRead) &&
(!pairedReadIsMovable(read) // we won't try to move such a read
|| noReadCanMoveBefore(read.getMateAlignmentStart(), newRead ) ) ) { // we're already past where the mate started
// remove reads from the map that we have emitted -- useful for case where the mate never showed up
if ( !read.getNotPrimaryAlignmentFlag() )
forMateMatching.remove(read.getReadName());
if ( DEBUG )
logger.warn(String.format("EMIT! At %d: read %s at %d with isize %d, mate start %d, op = %s",
newRead.getAlignmentStart(), read.getReadName(), read.getAlignmentStart(),
read.getInferredInsertSize(), read.getMateAlignmentStart(), read.getAttribute("OP")));
// emit to disk
writeRead(remove(waitingReads));
} else {
if ( DEBUG )
logger.warn(String.format("At %d: read %s at %d with isize %d couldn't be emited, mate start %d",
newRead.getAlignmentStart(), read.getReadName(), read.getAlignmentStart(), read.getInferredInsertSize(), read.getMateAlignmentStart()));
break;
}
}
if ( DEBUG ) logger.warn(String.format("At %d: Done with emit cycle", newRead.getAlignmentStart()));