/*
* Copyright 2003-2011 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package groovy.json;
import static groovy.json.JsonTokenType.*;
import groovy.io.LineColumnReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* The lexer reads JSON tokens in a streaming fashion from the underlying reader.
*
* @author Guillaume Laforge
* @since 1.8.0
*/
public class JsonLexer implements Iterator<JsonToken> {
private static final char SPACE = ' ';
private static final char DOT = '.';
private static final char MINUS = '-';
private static final char PLUS = '+';
private static final char LOWER_E = 'e';
private static final char UPPER_E = 'E';
private static final char ZERO = '0';
private static final char NINE = '9';
private static final Pattern p = Pattern.compile("\\\\u(\\p{XDigit}{4})");
private LineColumnReader reader;
/**
* Underlying reader from which to read the JSON tokens.
* This reader is an instance of <code>LineColumnReader</code>,
* to keep track of line and column positions.
*/
public LineColumnReader getReader() {
return reader;
}
private JsonToken currentToken = null;
/**
* Instanciates a lexer with a reader from which to read JSON tokens.
* Under the hood, the reader is wrapped in a <code>LineColumnReader</code>,
* for line and column information, unless it's already an instance of that class.
*
* @param reader underlying reader
*/
public JsonLexer(Reader reader) {
this.reader = reader instanceof LineColumnReader ? (LineColumnReader)reader : new LineColumnReader(reader);
}
/**
* @return the next token from the stream
*/
public JsonToken nextToken() {
try {
int firstIntRead = skipWhitespace();
if (firstIntRead == -1) return null;
char firstChar = (char) firstIntRead;
JsonTokenType possibleTokenType = startingWith((char) firstIntRead);
if (possibleTokenType == null) {
throw new JsonException(
"Lexing failed on line: " + reader.getLine() + ", column: " + reader.getColumn() +
", while reading '" + firstChar + "', " +
"no possible valid JSON value or punctuation could be recognized."
);
}
reader.reset();
long startLine = reader.getLine();
long startColumn = reader.getColumn();
JsonToken token = new JsonToken();
token.setStartLine(startLine);
token.setStartColumn(startColumn);
token.setEndLine(startLine);
token.setEndColumn(startColumn + 1);
token.setType(possibleTokenType);
token.setText("" + firstChar);
if (possibleTokenType.ordinal() >= OPEN_CURLY.ordinal() && possibleTokenType.ordinal() <= FALSE.ordinal()) {
return readingConstant(possibleTokenType, token);
} else if (possibleTokenType == STRING) {
StringBuilder currentContent = new StringBuilder("\"");
// consume the first double quote starting the string
reader.read();
for (;;) {
int read = reader.read();
if (read == -1) return null;
currentContent.append((char) read);
if (currentContent.charAt(currentContent.length() - 1) == '"' && currentContent.charAt(currentContent.length() - 2) != '\\' &&
possibleTokenType.matching(currentContent.toString())) {
token.setEndLine(reader.getLine());
token.setEndColumn(reader.getColumn());
token.setText(unescape(currentContent.toString()));
return token;
}
}
} else if (possibleTokenType == NUMBER) {
StringBuilder currentContent = new StringBuilder();
for (;;) {
reader.mark(1);
int read = reader.read();
if (read == -1) return null;
char lastCharRead = (char) read;
if (lastCharRead >= ZERO && lastCharRead <= NINE ||
lastCharRead == DOT || lastCharRead == MINUS || lastCharRead == PLUS ||
lastCharRead == LOWER_E || lastCharRead == UPPER_E) {
currentContent.append(lastCharRead);
} else {
reader.reset();
break;
}
}
String content = currentContent.toString();
if (possibleTokenType.matching(content)) {
token.setEndLine(reader.getLine());
token.setEndColumn(reader.getColumn());
token.setText(currentContent.toString());
return token;
} else {
throwJsonException(currentContent.toString(), possibleTokenType);
}
}
return null;
} catch (IOException ioe) {
throw new JsonException("An IO exception occurred while reading the JSON payload", ioe);
}
}
private void throwJsonException(String content, JsonTokenType type) {
throw new JsonException(
"Lexing failed on line: " +
reader.getLine() + ", column: " + reader.getColumn() +
", while reading '" + content + "', " +
"was trying to match " + type.getLabel()
);
}
/**
* Replace unicode escape and other control characters with real characters
*
* @param input text
* @return input text without the escaping
*/
public static String unescape(String input) {
// presence of a backslash
if (input.indexOf(92) > -1) {
Matcher m = p.matcher(input
.replaceAll("\\\\b", "\b")
.replaceAll("\\\\f", "\f")
.replaceAll("\\\\n", "\n")
.replaceAll("\\\\r", "\r")
.replaceAll("\\\\t", "\t")
.replaceAll("\\\\\\\\", "\\\\")
.replaceAll("\\\\/", "/")
.replaceAll("\\\\\"", "\""));
// replace unicode escapes
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, new String(new char[] { (char)Integer.valueOf(m.group(1), 16).intValue() }));
}
m.appendTail(sb);
return sb.toString();
} else {
return input;
}
}
/**
* When a constant token type is expected, check that the expected constant is read,
* and update the content of the token accordingly.
*
* @param type the token type
* @param token the token
* @return the token updated with end column and text updated
*/
private JsonToken readingConstant(JsonTokenType type, JsonToken token) {
try {
int numCharsToRead = ((String)type.getValidator()).length();
char[] chars = new char[numCharsToRead];
reader.read(chars);
String stringRead = new String(chars);
if (stringRead.equals(type.getValidator())) {
token.setEndColumn(token.getStartColumn() + numCharsToRead);
token.setText(stringRead);
return token;
} else {
throwJsonException(stringRead, type);
}
} catch (IOException ioe) {
throw new JsonException("An IO exception occurred while reading the JSON payload", ioe);
}
return null;
}
/**
* Skips all the whitespace characters and moves the cursor to the next non-space character.
*/
public int skipWhitespace() {
try {
int readChar = 20;
char c = SPACE;
while(Character.isWhitespace(c)) {
reader.mark(1);
readChar = reader.read();
c = (char)readChar;
}
reader.reset();
return readChar;
} catch (IOException ioe) {
throw new JsonException("An IO exception occurred while reading the JSON payload", ioe);
}
}
/**
* Iterator method to know if another token follows,
* or if we've reached the end of the stream.
*
* @return true if there are more tokens
*/
public boolean hasNext() {
currentToken = nextToken();
return currentToken != null;
}
/**
* Iterator method to get the next token of the stream.
*
* @return the next token
*/
public JsonToken next() {
return currentToken;
}
/**
* Method not implemented.
*
* @throws UnsupportedOperationException
*/
public void remove() {
throw new UnsupportedOperationException("The method remove() is not supported on this lexer.");
}
}