nextToken = Token.ASSIGN;
inputOffset++;
return;
}
}
throw new XPathException("Unexpected colon at start of token");
case '@':
nextToken = Token.AT;
return;
case '?':
nextToken = Token.QMARK;
return;
case '[':
nextToken = Token.LSQB;
return;
case ']':
nextToken = Token.RSQB;
return;
case '{':
nextToken = Token.LCURLY;
return;
case '}':
nextToken = Token.RCURLY;
return;
case ';':
nextToken = Token.SEMICOLON;
state = DEFAULT_STATE;
return;
case '(':
if (inputOffset < inputLength && input.charAt(inputOffset) == '#') {
inputOffset++;
int pragmaStart = inputOffset;
int nestingDepth = 1;
while (nestingDepth > 0 && inputOffset < (inputLength-1)) {
if (input.charAt(inputOffset) == '\n') {
incrementLineNumber();
} else if (input.charAt(inputOffset) == '#' &&
input.charAt(inputOffset+1) == ')') {
nestingDepth--;
inputOffset++;
} else if (input.charAt(inputOffset) == '(' &&
input.charAt(inputOffset+1) == '#') {
nestingDepth++;
inputOffset++;
}
inputOffset++;
}
if (nestingDepth > 0) {
throw new XPathException("Unclosed XQuery pragma");
}
nextToken = Token.PRAGMA;
nextTokenValue = input.substring(pragmaStart, inputOffset-2 );
return;
}
if (inputOffset < inputLength && input.charAt(inputOffset) == ':') {
// XPath comment syntax is (: .... :)
// Comments may be nested, and may now be empty
inputOffset++;
int nestingDepth = 1;
while (nestingDepth > 0 && inputOffset < (inputLength-1)) {
if (input.charAt(inputOffset) == '\n') {
incrementLineNumber();
} else if (input.charAt(inputOffset) == ':' &&
input.charAt(inputOffset+1) == ')') {
nestingDepth--;
inputOffset++;
} else if (input.charAt(inputOffset) == '(' &&
input.charAt(inputOffset+1) == ':') {
nestingDepth++;
inputOffset++;
}
inputOffset++;
}
if (nestingDepth > 0) {
throw new XPathException("Unclosed XPath comment");
}
lookAhead();
} else {
nextToken = Token.LPAR;
}
return;
case ')':
nextToken = Token.RPAR;
return;
case '+':
nextToken = Token.PLUS;
return;
case '-':
nextToken = Token.MINUS; // not detected if part of a name
return;
case '=':
nextToken = Token.EQUALS;
return;
case '!':
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '=') {
inputOffset++;
nextToken = Token.NE;
return;
}
throw new XPathException("'!' without '='");
case '*':
// disambiguation of MULT and STAR is now done later
if (inputOffset < inputLength
&& input.charAt(inputOffset) == ':') {
inputOffset++;
nextToken = Token.SUFFIX;
// we leave the parser to get the following name as a separate
// token, but first check there's no intervening white space or comments
if (inputOffset < inputLength) {
char ahead = input.charAt(inputOffset);
if (" \r\t\n(".indexOf(ahead) >= 0) {
throw new XPathException("Whitespace and comments are not allowed after '*:'");
}
}
return;
}
nextToken = Token.STAR;
return;
case ',':
nextToken = Token.COMMA;
return;
case '$':
nextToken = Token.DOLLAR;
return;
case '|':
nextToken = Token.UNION;
return;
case '#':
nextToken = Token.HASH;
return;
case '<':
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '=') {
inputOffset++;
nextToken = Token.LE;
return;
}
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '<') {
inputOffset++;
nextToken = Token.PRECEDES;
return;
}
nextToken = Token.LT;
return;
case '>':
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '=') {
inputOffset++;
nextToken = Token.GE;
return;
}
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '>') {
inputOffset++;
nextToken = Token.FOLLOWS;
return;
}
nextToken = Token.GT;
return;
case '.':
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '.') {
inputOffset++;
nextToken = Token.DOTDOT;
return;
}
if (inputOffset == inputLength
|| input.charAt(inputOffset) < '0'
|| input.charAt(inputOffset) > '9') {
nextToken = Token.DOT;
return;
}
// otherwise drop through: we have a number starting with a decimal point
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// The logic here can return some tokens that are not legitimate numbers,
// for example "23e" or "1.0e+". However, this will only happen if the XPath
// expression as a whole is syntactically incorrect.
// These errors will be caught by the numeric constructor.
boolean allowE = true;
boolean allowSign = false;
boolean allowDot = true;
boolean endOfNum = false;
numloop:
while (!endOfNum) {
switch (c) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
allowSign = false;
break;
case '.':
if (allowDot) {
allowDot = false;
allowSign = false;
} else {
inputOffset--;
break numloop;
}
break;
case 'E': case 'e':
if (allowE) {
allowSign = true;
allowE = false;
} else {
inputOffset--;
break numloop;
}
break;
case '+': case '-':
if (allowSign) {
allowSign = false;
} else {
inputOffset--;
break numloop;
}
break;
default:
if (('a' <= c && c <= 'z') || c>127) {
// this prevents the famous "10div 3"
throw new XPathException("Separator needed after numeric literal");
}
inputOffset--;
break numloop;
}
if (inputOffset >= inputLength) break;
c = input.charAt(inputOffset++);
}
nextTokenValue = input.substring(nextTokenStartOffset, inputOffset);
nextToken = Token.NUMBER;
return;
case '"':
case '\'':
nextTokenValue = "";
while (true) {
inputOffset = input.indexOf(c, inputOffset);
if (inputOffset < 0) {
inputOffset = nextTokenStartOffset + 1;
throw new XPathException("Unmatched quote in expression");
}
nextTokenValue += input.substring(nextTokenStartOffset + 1, inputOffset++);
// look for doubled delimiters
if (inputOffset < inputLength && input.charAt(inputOffset) == c) {
nextTokenValue += c;
nextTokenStartOffset = inputOffset;
inputOffset++;
} else {
break;
}
}
// maintain line number if there are newlines in the string
if (nextTokenValue.indexOf('\n') >= 0) {
for (int i = 0; i<nextTokenValue.length(); i++) {
if (nextTokenValue.charAt(i) == '\n') {
lineNumber++;
if (newlineOffsets==null) {
newlineOffsets = new ArrayList(20);
}
newlineOffsets.add(new Integer(nextTokenStartOffset+i));
}
}
}
nextTokenValue = nextTokenValue.intern();
nextToken = Token.STRING_LITERAL;
return;
case '\n':
incrementLineNumber();
// drop through
case ' ':
case '\t':
case '\r':
nextTokenStartOffset = inputOffset;
break;
default:
if (c < 0x80 && !Character.isLetter(c)) {
throw new XPathException("Invalid character '" + c + "' in expression");
}
/* fall through */
case '_':
loop:
for (;inputOffset < inputLength; inputOffset++) {