/**Parses a sequence string and returns an ArrayList of nucleotides and polymorphisms*/
@Override
public String parse(String seq, byte acidType, List<SequenceElement> elements) throws SequenceFormatException {
elements.clear();
Stack<SequenceElement> elementStack = new Stack<SequenceElement>();
SequenceElementContainer root = new VariationSequenceElementContainer("",acidType); //$NON-NLS-1$
elementStack.push(root);
boolean inSNP = false;
boolean foundPos = false;
if (targetPoly == 0)
variantIndex = -1;
int SNPno = 0;
int InDelno = 0;
Stack<Nucleotide> SNPNucleotides = new Stack<Nucleotide>();
//Go through each character of the string
char current = ' ';
char previous;
for (int pos = 0; pos < seq.length(); pos++) {
//Remember the previous char
previous = current;
//Get the current char
current = seq.charAt(pos);
if (current == positionSymbol) { //Position-only target
if (!foundPos) {
foundPos = true;
targetPoly = 0;
variantIndex = pos;
}
else if (foundPos) {
throw new SequenceFormatException("Too many position symbols"); //$NON-NLS-1$
}
}
else if (current == variantSeparator) { //SNP
//Check the next char, if outofbounds we have an error
Nucleotide nextnt;
try {
nextnt = NucleotideSequenceHandler.getNucleotide(seq.charAt(pos + 1), acidType);
}
catch (StringIndexOutOfBoundsException sx) {
throw new SequenceFormatException(
"Expected nucleotide at position: " + (pos + 2)); //$NON-NLS-1$
}
//Check previous position, this must be a nucleotide
Nucleotide prevnt = NucleotideSequenceHandler.getNucleotide(previous, acidType);
if (prevnt == null) {
throw new SequenceFormatException(
"Expected nucleotide at position: " + (pos)); //$NON-NLS-1$
}
if (nextnt == null) {
throw new SequenceFormatException(
"Expected nucleotide at position: " + (pos + 2)); //$NON-NLS-1$
}
if (inSNP) { //We are already in the SNP, add the next nucleotide
SNPNucleotides.push(nextnt);
}
else { //Add both the previous and the next
SNPNucleotides.push(prevnt);
SNPNucleotides.push(nextnt);
inSNP = true;
//and remove the last nucleotide from elements or the top InDel
SequenceElementContainer e = (SequenceElementContainer) elementStack.peek();
e.removeFromEnd();
}
//Step up one step and check if this was the last separator
pos++;
char next = ' ';
try {
next = seq.charAt(pos + 1);
}
catch (StringIndexOutOfBoundsException sx2) {
//Don't care
}
if (next != variantSeparator) {
//This was the last separator so make the SNP
Nucleotide[] nucs = new Nucleotide[SNPNucleotides.size()];
for (int i = 0; i < nucs.length; i++) {
nucs[i] = SNPNucleotides.pop();
}
SNP snp = new SNP(nucs, "SNP " + (++SNPno)); //$NON-NLS-1$
//and add it to the element on the stack
SequenceElementContainer e = (SequenceElementContainer) elementStack.peek();
e.addToEnd(snp);
inSNP = false;
}
}
else if (current == inDelBeginSymbol) {
elementStack.push(new InDel("InDel " + (++InDelno), "", "InDel " + (++InDelno), acidType)); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
else if (current == inDelEndSymbol) {
if (elementStack.size() < 2) {
throw new SequenceFormatException("Too many InDel end symbols"); //$NON-NLS-1$
}
InDel indel = (InDel) elementStack.pop();
SequenceElementContainer e = (SequenceElementContainer) elementStack.peek();
e.addToEnd(indel);
}
else {
//No special symbol, so is either nucleotide, degenerate nucleotide or invalid symbol
Nucleotide n = NucleotideSequenceHandler.getNucleotide(current, acidType);
if (n == null) { //invalid
throw new SequenceFormatException(
"Invalid symbol encountered during sequence parsing: " + current); //$NON-NLS-1$
}
if (NucleotideSequenceHandler.isPoly(n)) { //degenerate nucleotide (SNP)
SNP snp = new SNP( ( (DegenerateNucleotide) n).getPossible(),
"SNP" + (++SNPno)); //$NON-NLS-1$
SequenceElementContainer e = (SequenceElementContainer) elementStack.peek();
e.addToEnd(snp);
}
else { //normal nucleotide
SequenceElementContainer e = (SequenceElementContainer) elementStack.peek();
e.addToEnd(n);
}
}
}
elements.addAll(root.getElements());
String seqString = sequenceOf(elements);