* Copyright (C) Codehaus.org *
* ------------------------------------------------------------------------- *
* Licensed under the Apache License, Version 2.0 (the "License"); *
* you may not use this file except in compliance with the License. *
* You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
package org.codehaus.jparsec;
import static org.codehaus.jparsec.util.Checks.checkArgument;
import java.io.IOException;
import java.nio.CharBuffer;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;
import org.codehaus.jparsec.annotations.Private;
import org.codehaus.jparsec.error.ParserException;
import org.codehaus.jparsec.functors.Map;
import org.codehaus.jparsec.functors.Map2;
import org.codehaus.jparsec.functors.Maps;
import org.codehaus.jparsec.util.Checks;
* Defines grammar and encapsulates parsing logic.
* A {@link Parser} takes as input a {@link CharSequence} source and parses it when the
* {@link #parse(CharSequence)} method is called. A value of type {@code T} will be returned if
* parsing succeeds, or a {@link ParserException} is thrown to indicate parsing error. For example:
* <pre>
* Parser<String> scanner = Scanners.IDENTIFIER;
* assertEquals("foo", scanner.parse("foo"));
* </pre>
* <p> {@code Parser}s are immutable and inherently covariant on the type parameter {@code T}.
* Because Java generics has no native support for covariant type parameter, a workaround is to use
* the {@link Parser#cast()} method to explicitly force covariance whenever needed.
* <p> {@code Parser}s run either on character level to scan the source, or on token level to parse
* a list of {@link Token} objects returned from another parser. This other parser that returns the
* list of tokens for token level parsing is hooked up via the {@link #from(Parser)} or
* {@link #from(Parser, Parser)} method.
* <p> The following are important naming conventions used throughout the library:
* <ul>
* <li> A character level parser object that recognizes a single lexical word is called a scanner.
* <li> A scanner that translates the recognized lexical word into a token is called a tokenizer.
* <li > A character level parser object that does lexical analysis and returns a list of
* {@link Token} is called a lexer.
* <li> All {@code index} parameters are 0-based indexes in the original source.
* </ul>
* @author Ben Yu
public abstract class Parser<T> {
Parser() {}
* An atomic mutable reference to {@link Parser}. Is useful to work around circular dependency
* between parser objects.
* <p> Example usage: <pre>
* Parser.Reference<Foo> ref = Parser.newReference();
* ...
* Parser<Bar> barParser = barParser(ref.lazy());
* Parser<Foo> fooParser = fooParser(barParser);
* ref.set(fooParser);
* </pre>
public static final class Reference<T> extends AtomicReference<Parser<T>> {
private static final long serialVersionUID = -8778697271614979497L;
private final Parser<T> lazy = new LazyParser<T>(this);
* A {@link Parser} that delegates to the parser object referenced by {@code this}
* during parsing time.
public Parser<T> lazy() {
return lazy;
/** Creates a new instance of {@link Reference}. */
public static <T> Reference<T> newReference() {
return new Reference<T>();
/** A {@link Parser} that executes {@code this}, and returns {@code value} if succeeds. */
public final <R> Parser<R> retn(R value) {
return next(Parsers.constant(value));
* A {@link Parser} that sequentially executes {@code this} and then {@code parser}.
* The return value of {@code parser} is preserved.
public final <R> Parser<R> next(Parser<R> parser) {
return Parsers.sequence(this, parser);
* A {@link Parser} that executes {@code this}, maps the result using {@code map}
* to another {@code Parser} object to be executed as the next step.
public final <To> Parser<To> next(Map<? super T, ? extends Parser<? extends To>> map) {
return new BindNextParser<T, To>(this, map);
* A {@link Parser} that sequentially executes {@code this} and then {@code parser},
* whose return value is ignored.
public final Parser<T> followedBy(Parser<?> parser) {
return Parsers.sequence(this, parser, InternalFunctors.<T, Object>firstOfTwo());
* A {@link Parser} that succeeds if {@code this} succeeds and the pattern recognized by
* {@code parser} isn't following.
public final Parser<T> notFollowedBy(Parser<?> parser) {
return followedBy(parser.not());
* {@code p.many()} is equivalent to {@code p*} in EBNF. The return values are collected and
* returned in a {@link List}.
public final Parser<List<T>> many() {
return atLeast(0);
/** {@code p.skipMany()} is equivalent to {@code p*} in EBNF. The return values are discarded. */
public final Parser<Void> skipMany() {
return skipAtLeast(0);
* {@code p.many1()} is equivalent to {@code p+} in EBNF. The return values are collected and
* returned in a {@link List}.
public final Parser<List<T>> many1() {
return atLeast(1);
* {@code p.skipMany1()} is equivalent to {@code p+} in EBNF. The return values are discarded.
public final Parser<Void> skipMany1() {
return skipAtLeast(1);
* A {@link Parser} that runs {@code this} parser greedily for at least {@code min} times.
* The return values are collected and returned in a {@link List}.
public final Parser<List<T>> atLeast(int min) {
return new RepeatAtLeastParser<T>(this, Checks.checkMin(min));
* A {@link Parser} that runs {@code this} parser greedily for at least {@code min} times
* and ignores the return values.
public final Parser<Void> skipAtLeast(int min) {
return new SkipAtLeastParser(this, Checks.checkMin(min));
* A {@link Parser} that sequentially runs {@code this} for {@code n} times and ignores the
* return values.
public final Parser<Void> skipTimes(int n) {
return skipTimes(n, n);
* A {@link Parser} that runs {@code this} for {@code n} times and collects the return values in a
* {@link List}.
public final Parser<List<T>> times(int n) {
return times(n, n);
* A {@link Parser} that runs {@code this} parser for at least {@code min} times and up to
* {@code max} times. The return values are collected and returned in {@link List}.
public final Parser<List<T>> times(int min, int max) {
Checks.checkMinMax(min, max);
return new RepeatTimesParser<T>(this, min, max);
* A {@link Parser} that runs {@code this} parser for at least {@code min} times and up to
* {@code max} times, with all the return values ignored.
public final Parser<Void> skipTimes(int min, int max) {
Checks.checkMinMax(min, max);
return new SkipTimesParser(this, min, max);
* A {@link Parser} that runs {@code this} parser and transforms the return value using
* {@code map}.
public final <R> Parser<R> map(Map<? super T, ? extends R> map) {
return new MapParser<T, R>(this, map);
* {@code p1.or(p2)} is equivalent to {@code p1 | p2} in EBNF.
* @param alternative the alternative parser to run if this fails.
public final Parser<T> or(Parser<? extends T> alternative) {
return Parsers.or(this, alternative);
* {@code p.optional()} is equivalent to {@code p?} in EBNF. {@code null} is the result when
* {@code this} fails with no partial match.
public final Parser<T> optional() {
return Parsers.plus(this, Parsers.<T>always());
* A {@link Parser} that returns {@code defaultValue} if {@code this} fails with no partial match.
public final Parser<T> optional(T defaultValue) {
return Parsers.plus(this, Parsers.constant(defaultValue));
/** A {@link Parser} that fails if {@code this} succeeds. Any input consumption is undone. */
public final Parser<?> not() {
return not(toString());
* A {@link Parser} that fails if {@code this} succeeds. Any input consumption is undone.
* @param unexpected the name of what we don't expect.
public final Parser<?> not(String unexpected) {
return peek().ifelse(Parsers.unexpected(unexpected), Parsers.always());
/** A {@link Parser} that runs {@code this} and undoes any input consumption if succeeds. */
public final Parser<T> peek() {
return new PeekParser<T>(this);
/** A {@link Parser} that undoes any partial match if {@code this} fails. */
public final Parser<T> atomic() {
return new AtomicParser<T>(this);
* A {@link Parser} that runs {@code this} parser and sets the number of logical steps explicitly
* to {@code n}.
final Parser<T> step(int n) {
checkArgument(n >= 0, "step < 0");
return new StepParser<T>(this, n);
* A {@link Parser} that returns {@code true} if {@code this} succeeds, {@code false} otherwise.
public final Parser<Boolean> succeeds() {
return ifelse(Parsers.TRUE, Parsers.FALSE);
/** A {@link Parser} that returns {@code true} if {@code this} fails, {@code false} otherwise. */
public final Parser<Boolean> fails() {
return ifelse(Parsers.FALSE, Parsers.TRUE);
* A {@link Parser} that runs {@code consequence} if {@code this} succeeds, or
* {@code alternative} otherwise.
public final <R> Parser<R> ifelse(
Parser<? extends R> consequence, Parser<? extends R> alternative) {
return ifelse(Maps.constant(consequence), alternative);
* A {@link Parser} that runs {@code consequence} if {@code this} succeeds, or
* {@code alternative} otherwise.
public final <R> Parser<R> ifelse(
Map<? super T, ? extends Parser<? extends R>> consequence, Parser<? extends R> alternative) {
return new IfElseParser<R, T>(this, consequence, alternative);
* A {@link Parser} that reports reports an error about {@code name} expected, if {@code this}
* fails with no partial match.
public final Parser<T> label(String name) {
return Parsers.plus(this, Parsers.<T>expect(name));
* Casts {@code this} to a {@link Parser} of type {@code R}.
* Use it only if you know the parser actually returns value of type {@code R}.
public final <R> Parser<R> cast() {
return (Parser<R>) this;
* A {@link Parser} that runs {@code this} between {@code before} and {@code after}.
* The return value of {@code this} is preserved.
* <p> Equivalent to {@link Parsers#between(Parser, Parser, Parser)}, which preserves the
* natural order of the parsers in the argument list, but is a bit more verbose.
public final Parser<T> between(Parser<?> before, Parser<?> after) {
return before.next(followedBy(after));
* A {@link Parser} that runs {@code this} 1 or more times separated by {@code delim}.
* <p> The return values are collected in a {@link List}.
public final Parser<List<T>> sepBy1(Parser<?> delim) {
final Parser<T> afterFirst = delim.step(0).next(this);
Map<T, Parser<List<T>>> binder = new Map<T, Parser<List<T>>>() {
public Parser<List<T>> map(T firstValue) {
return new RepeatAtLeastParser<T>(
afterFirst, 0, ListFactories.arrayListFactoryWithFirstElement(firstValue));
return next(binder);
* A {@link Parser} that runs {@code this} 0 or more times separated by {@code delim}.
* <p> The return values are collected in a {@link List}.
public final Parser<List<T>> sepBy(Parser<?> delim) {
return Parsers.plus(sepBy1(delim), EmptyListParser.<T>instance());
* A {@link Parser} that runs {@code this} for 0 or more times delimited and terminated by
* {@code delim}.
* <p> The return values are collected in a {@link List}.
public final Parser<List<T>> endBy(Parser<?> delim) {
return followedBy(delim).many();
* A {@link Parser} that runs {@code this} for 1 or more times delimited and terminated by
* {@code delim}.
* <p> The return values are collected in a {@link List}.
public final Parser<List<T>> endBy1(Parser<?> delim) {
return followedBy(delim).many1();
* A {@link Parser} that runs {@code this} for 1 ore more times separated and optionally
* terminated by {@code delim}. For example: {@code "foo;foo;foo"} and {@code "foo;foo;"} both
* matches {@code foo.sepEndBy1(semicolon)}.
* <p> The return values are collected in a {@link List}.
public final Parser<List<T>> sepEndBy1(final Parser<?> delim) {
return next(new Map<T, Parser<List<T>>>() {
public Parser<List<T>> map(T first) {
return new DelimitedListParser<T>(
Parser.this, delim, ListFactories.arrayListFactoryWithFirstElement(first));
* A {@link Parser} that runs {@code this} for 0 ore more times separated and optionally
* terminated by {@code delim}. For example: {@code "foo;foo;foo"} and {@code "foo;foo;"} both
* matches {@code foo.sepEndBy(semicolon)}.
* <p> The return values are collected in a {@link List}.
public final Parser<List<T>> sepEndBy(Parser<?> delim) {
return Parsers.plus(sepEndBy1(delim), EmptyListParser.<T>instance());
* A {@link Parser} that runs {@code op} for 0 or more times greedily, then runs {@code this}.
* The {@link Map} objects returned from {@code op} are applied from right to left to the return
* value of {@code p}.
* <p> {@code p.prefix(op)} is equivalent to {@code op* p} in EBNF.
public final Parser<T> prefix(Parser<? extends Map<? super T, ? extends T>> op) {
return Parsers.sequence(op.many(), this, Parsers.PREFIX_OPERATOR_MAP2);
* A {@link Parser} that runs {@code this} and then runs {@code op} for 0 or more times greedily.
* The {@link Map} objects returned from {@code op} are applied from left to right to the return
* value of p.
* <p> {@code p.postfix(op)} is equivalent to {@code p op*} in EBNF.
public final Parser<T> postfix(Parser<? extends Map<? super T, ? extends T>> op) {
return Parsers.sequence(this, op.many(), Parsers.POSTFIX_OPERATOR_MAP2);
* A {@link Parser} that parses non-associative infix operator. Runs {@code this} for the left
* operand, and then runs {@code op} and {@code this} for the operator and the right operand
* optionally. The {@link Map2} objects returned from {@code op} are applied to the return values
* of the two operands, if any.
* <p> {@code p.infixn(op)} is equivalent to {@code p (op p)?} in EBNF.
public final Parser<T> infixn(Parser<? extends Map2<? super T, ? super T, ? extends T>> op) {
return Parsers.infixn(this, op);
* A {@link Parser} for left-associative infix operator. Runs {@code this} for the left operand,
* and then runs {@code op} and {@code this} for the operator and the right operand for 0 or more
* times greedily. The {@link Map2} objects returned from {@code op} are applied from left to right
* to the return values of {@code this}, if any.
* For example: {@code a + b + c + d} is evaluated as {@code (((a + b)+c)+d)}.
* <p> {@code p.infixl(op)} is equivalent to {@code p (op p)*} in EBNF.
public final Parser<T> infixl(
Parser<? extends Map2<? super T, ? super T, ? extends T>> op) {
// somehow generics doesn't work if we inline the code here.
return Parsers.infixl(this, op);
* A {@link Parser} for right-associative infix operator. Runs {@code this} for the left operand,
* and then runs {@code op} and {@code this} for the operator and the right operand for 0 or more
* times greedily. The {@link Map2} objects returned from {@code op} are applied from right to
* left to the return values of {@code this}, if any.
* For example: {@code a + b + c + d} is evaluated as {@code a + (b + (c + d))}.
* <p> {@code p.infixr(op)} is equivalent to {@code p (op p)*} in EBNF.
public final Parser<T> infixr(Parser<? extends Map2<? super T, ? super T, ? extends T>> op) {
return Parsers.infixr(this, op);
* A {@link Parser} that runs {@code this} and wraps the return value in a {@link Token}.
* <p> It is normally not necessary to call this method explicitly. {@link #lexer(Parser)} and
* {@link #from(Parser, Parser)} both do the conversion automatically.
public final Parser<Token> token() {
return new ToTokenParser(this);
/** A {@link Parser} that returns the matched string in the original source. */
public final Parser<String> source() {
return new ReturnSourceParser(this);
* A {@link Parser} that takes as input the {@link Token} collection returned by {@code lexer},
* and runs {@code this} to parse the tokens.
* <p> {@code this} must be a token level parser.
public final Parser<T> from(Parser<? extends Collection<Token>> lexer) {
return Parsers.nested(Parsers.tokens(lexer), followedBy(Parsers.EOF));
* A {@link Parser} that takes as input the tokens returned by {@code tokenizer}
* delimited by {@code delim}, and runs {@code this} to parse the tokens.
* <p> {@code this} must be a token level parser.
public final Parser<T> from(Parser<?> tokenizer, Parser<Void> delim) {
return from(tokenizer.lexer(delim));
* A {@link Parser} that greedily runs {@code this} repeatedly,
* and ignores the pattern recognized by {@code delim} before and after each occurrence.
* The result tokens are wrapped in {@link Token} and are collected and returned in a
* {@link List}.
* <p> It is normally not necessary to call this method explicitly.
* {@link #from(Parser, Parser)} is more convenient for simple uses that just need to connect a
* token level parser with a lexer that produces the tokens.
* When more flexible control over the token list is needed, for example,
* to parse indentation sensitive language, a pre-processor of the token list may be needed.
* <p> {@code this} must be a tokenizer that returns a token value.
public Parser<List<Token>> lexer(Parser<?> delim) {
return delim.optional().next(token().sepEndBy(delim));
* Parses a source string.
* @param source the source string
* @param moduleName the name of the module, this name appears in error message
* @param sourceLocator maps an index of char into line and column numbers
* @return the result
final T parse(CharSequence source, String moduleName, SourceLocator sourceLocator) {
return Parsers.parse(source, followedBy(Parsers.EOF), sourceLocator, moduleName);
* Parses {@code source}.
* @param source the source string
* @param moduleName the name of the module, this name appears in error message
* @return the result
public final T parse(CharSequence source, String moduleName) {
return parse(source, moduleName, new DefaultSourceLocator(source));
/** Parses {@code source}. */
public final T parse(CharSequence source) {
return parse(source, null);
/** Parses source read from {@code readable}. */
public final T parse(Readable readable) throws IOException {
return parse(readable, null);
* Parses source read from {@code readable}.
* @param readable where the source is read from
* @param moduleName the name of the module, this name appears in error message
* @return the result
public final T parse(Readable readable, String moduleName) throws IOException {
StringBuilder builder = new StringBuilder();
copy(readable, builder);
return parse(builder, moduleName);
/** Copies all content from {@code from} to {@code to}. */
@Private static void copy(Readable from, Appendable to) throws IOException {
CharBuffer buf = CharBuffer.allocate(2048);
for (;;) {
int r = from.read(buf);
if (r == -1) break;
to.append(buf, 0, r);
final T getReturn(ParseContext ctxt) {
return (T) ctxt.result;
private ParserException asParserException(Throwable e, ParseContext ctxt) {
if (e instanceof ParserException) return (ParserException) e;
return new ParserException(
e, null, ctxt.module, ctxt.locator.locate(ctxt.getIndex()));
final boolean run(ParseContext ctxt) {
try {
return apply(ctxt);
catch(RuntimeException e) {
throw asParserException(e, ctxt);
abstract boolean apply(ParseContext ctxt);