for (int i = 0; i < bytes.length; ++i) {
final boolean currentBaseIsN = (bytes[i] == 'N');
//create intervals when switching, i.e "nBlockIsOpen" disagrees with "currentBaseIsN"
if (nBlockIsOpen != currentBaseIsN) {
preliminaryIntervals.add(new Interval(rec.getSequenceName(), start + 1, i, false, nBlockIsOpen ? Nmer : ACGTmer));
start = i;
nBlockIsOpen = !nBlockIsOpen;
}
}
// Catch the last block of chromosome
preliminaryIntervals.add(new Interval(rec.getSequenceName(), start + 1, bytes.length, false, nBlockIsOpen ? Nmer : ACGTmer));
}
// now that we have the whole list, we need to remove the short Nmers.
// process the list, replacing trios with short Nmers in the middle with longer intervals:
while (!preliminaryIntervals.isEmpty()) {
//if top trio match the bill, replace them with a merged interval,
// and push it back the top of the list (we expect alternating Nmers and ACGTmers, but
// not assuming it in the logic)
//(I want this to be fast and the strings are all copies of the static prototypes Nmer and ACGTmer )
//noinspection StringEquality
if (preliminaryIntervals.size() >= 3 && // three or more intervals
preliminaryIntervals.get(0).getName() == ACGTmer && //an N-mer
preliminaryIntervals.get(1).getName() == Nmer && //between two
preliminaryIntervals.get(2).getName() == ACGTmer && //ACGT-mers
preliminaryIntervals.get(0).abuts(preliminaryIntervals.get(1)) && // all abutting
preliminaryIntervals.get(1).abuts(preliminaryIntervals.get(2)) && // each other (there are many contigs...)
preliminaryIntervals.get(1).length() <= maxNmerToMerge) //and the N-mer is of length N or less
{
// create the new ACGTmer interval
final Interval temp = new Interval(
preliminaryIntervals.get(0).getSequence(),
preliminaryIntervals.get(0).getStart(),
preliminaryIntervals.get(2).getEnd(), false, ACGTmer);
//remove the first 3 elements of the list