nextToken = Token.ASSIGN;
inputOffset++;
return;
}
}
throw new StaticError("Unexpected colon at start of token");
case '@':
nextToken = Token.AT;
return;
case '?':
nextToken = Token.QMARK;
return;
case '[':
nextToken = Token.LSQB;
return;
case ']':
nextToken = Token.RSQB;
return;
case '{':
nextToken = Token.LCURLY;
return;
case '}':
nextToken = Token.RCURLY;
return;
case ';':
nextToken = Token.SEMICOLON;
state = DEFAULT_STATE;
return;
case '(':
if (inputOffset < inputLength && input.charAt(inputOffset) == ':') {
// XPath comment syntax is (: .... :)
// Comments may be nested
// Pragmas are recognized as anything starting with "(::", in which case the terminator
// must be "::)"
inputOffset++;
int pragmaStart = -1;
if (recognizePragmas && inputOffset < inputLength && input.charAt(inputOffset) == ':') {
inputOffset++;
pragmaStart = inputOffset;
}
int nestingDepth = 1;
while (nestingDepth > 0 && inputOffset < (inputLength-1)) {
if (input.charAt(inputOffset) == '\n') {
incrementLineNumber();
} else if (input.charAt(inputOffset) == ':' &&
input.charAt(inputOffset+1) == ')') {
if (pragmaStart >=0 && nestingDepth==1) {
if (input.charAt(inputOffset-1) == ':') {
lastPragma = input.substring(pragmaStart, inputOffset-1).trim();
if (lastPragma.startsWith("extension")) {
inputOffset+=2;
throw new StaticError("Unrecognized must-understand extension");
} else if (lastPragma.startsWith("pragma")) {
lastPragma = lastPragma.substring(6).trim();
} else {
inputOffset+=2;
throw new StaticError("'(::' must be followed by 'pragma' or 'extension'");
}
nestingDepth--;
inputOffset++;
}
} else {
nestingDepth--;
inputOffset++;
}
} else if (input.charAt(inputOffset) == '(' &&
input.charAt(inputOffset+1) == ':') {
nestingDepth++;
inputOffset++;
}
inputOffset++;
}
if (nestingDepth > 0) {
if (pragmaStart >= 0) {
throw new StaticError("Unclosed XQuery pragma");
} else {
throw new StaticError("Unclosed XPath comment");
}
}
lookAhead();
} else {
nextToken = Token.LPAR;
}
return;
case ')':
nextToken = Token.RPAR;
return;
case '+':
nextToken = Token.PLUS;
return;
case '-':
nextToken = Token.MINUS; // not detected if part of a name
return;
case '=':
nextToken = Token.EQUALS;
return;
case '!':
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '=') {
inputOffset++;
nextToken = Token.NE;
return;
}
throw new StaticError("'!' without '='");
case '*':
// disambiguation of MULT and STAR is now done later
//if (followsOperator()) {
if (inputOffset < inputLength
&& input.charAt(inputOffset) == ':') {
inputOffset++;
nextToken = Token.SUFFIX;
// we leave the parser to get the following name as a separate
// token, but first check there's no intervening white space
if (inputOffset < inputLength) {
char ahead = input.charAt(inputOffset);
if (" \r\t\n".indexOf(ahead) >= 0) {
throw new StaticError("Whitespace is not allowed after '*:'");
}
}
return;
}
nextToken = Token.STAR;
//} else {
// nextToken = MULT;
//}
return;
case ',':
nextToken = Token.COMMA;
return;
case '$':
nextToken = Token.DOLLAR;
return;
case '|':
nextToken = Token.UNION;
return;
case '<':
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '=') {
inputOffset++;
nextToken = Token.LE;
return;
}
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '<') {
inputOffset++;
nextToken = Token.PRECEDES;
return;
}
nextToken = Token.LT;
return;
case '>':
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '=') {
inputOffset++;
nextToken = Token.GE;
return;
}
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '>') {
inputOffset++;
nextToken = Token.FOLLOWS;
return;
}
nextToken = Token.GT;
return;
case '.':
if (inputOffset < inputLength
&& input.charAt(inputOffset) == '.') {
inputOffset++;
nextToken = Token.DOTDOT;
return;
}
if (inputOffset == inputLength
|| input.charAt(inputOffset) < '0'
|| input.charAt(inputOffset) > '9') {
nextToken = Token.DOT;
return;
}
// otherwise drop through: we have a number starting with a decimal point
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// The logic here can return some tokens that are not legitimate numbers,
// for example "23e" or "1.0e+". However, this will only happen if the XPath
// expression as a whole is syntactically incorrect.
// These errors will be caught by the numeric constructor.
boolean allowE = true;
boolean allowSign = false;
boolean allowDot = true;
boolean endOfNum = false;
numloop:
while (!endOfNum) {
switch (c) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
allowSign = false;
break;
case '.':
if (allowDot) {
allowDot = false;
allowSign = false;
} else {
inputOffset--;
break numloop;
}
break;
case 'E': case 'e':
if (allowE) {
allowSign = true;
allowE = false;
} else {
inputOffset--;
break numloop;
}
break;
case '+': case '-':
if (allowSign) {
allowSign = false;
} else {
inputOffset--;
break numloop;
}
break;
default:
inputOffset--;
break numloop;
}
if (inputOffset >= inputLength) break;
c = input.charAt(inputOffset++);
}
nextTokenValue = input.substring(nextTokenStartOffset, inputOffset);
nextToken = Token.NUMBER;
return;
case '"':
case '\'':
nextTokenValue = "";
while (true) {
inputOffset = input.indexOf(c, inputOffset);
if (inputOffset < 0) {
inputOffset = nextTokenStartOffset + 1;
throw new StaticError("Unmatched quote in expression");
}
nextTokenValue += input.substring(nextTokenStartOffset + 1, inputOffset++);
// look for doubled delimiters
if (inputOffset < inputLength && input.charAt(inputOffset) == c) {
nextTokenValue += c;
nextTokenStartOffset = inputOffset;
inputOffset++;
} else {
break;
}
}
// maintain line number if there are newlines in the string
if (nextTokenValue.indexOf('\n') >= 0) {
for (int i = 0; i<nextTokenValue.length(); i++) {
if (nextTokenValue.charAt(i) == '\n') {
lineNumber++;
if (newlineOffsets==null) {
newlineOffsets = new ArrayList();
}
newlineOffsets.add(new Integer(nextTokenStartOffset+i));
}
}
}
nextTokenValue = nextTokenValue.intern();
nextToken = Token.STRING_LITERAL;
return;
case '\n':
incrementLineNumber();
// drop through
case ' ':
case '\t':
case '\r':
nextTokenStartOffset = inputOffset;
break;
default:
if (c < 0x80 && !Character.isLetter(c)) {
throw new StaticError("Invalid character '" + c + "' in expression");
}
/* fall through */
case '_':
loop:
for (;inputOffset < inputLength; inputOffset++) {