/*
* xtc - The eXTensible Compiler
* Copyright (C) 2004-2007 Robert Grimm
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
* USA.
*/
package xtc.parser;
import java.io.IOException;
import java.io.Reader;
import xtc.tree.Locatable;
import xtc.tree.Location;
import xtc.util.Action;
import xtc.util.Pair;
/**
* The base class for packrat parsers.
*
* @author Robert Grimm
* @version $Revision: 1.17 $
*/
public abstract class ParserBase {
/** The platform's line separator. */
public static final String NEWLINE = System.getProperty("line.separator");
/**
* The start index for lines. Note that this constant mirrors
* {@link xtc.Constants#FIRST_LINE} to avoid parsers depending on
* that class.
*/
public static final int FIRST_LINE = 1;
/**
* The start index for columns. Note that this constant mirrors
* {@link xtc.Constants#FIRST_COLUMN} to avoid parsers depending on
* that class.
*/
public static final int FIRST_COLUMN = 1;
/**
* The default size for the arrays storing the memoization table's
* columns.
*/
public static final int INIT_SIZE = 4096;
/**
* The increment for the arrays storing the memoization table's
* columns.
*/
public static final int INCR_SIZE = 4096;
// -------------------------------------------------------------------------
/** The reader for the character stream to be parsed. */
protected Reader yyReader;
/** The number of characters consumed from the character stream. */
protected int yyCount;
/** The flag for whether the end-of-file has been reached. */
protected boolean yyEOF;
/** The characters consumed so far. */
protected char[] yyData;
/** The memoization table columns. */
protected Column[] yyColumns;
// -------------------------------------------------------------------------
/**
* Create a new parser base.
*
* @param reader The reader for the character stream to be parsed.
* @param file The name of the file backing the character stream.
* @throws NullPointerException Signals a null file name.
*/
public ParserBase(final Reader reader, final String file) {
this(reader, file, INIT_SIZE - 1);
}
/**
* Create a new parser base.
*
* @param reader The reader for the character stream to be parsed.
* @param file The name of the file backing the character stream.
* @param size The length of the character stream.
* @throws NullPointerException Signals a null file name.
* @throws IllegalArgumentException Signals a negative file size.
*/
public ParserBase(final Reader reader, final String file, final int size) {
if (null == file) {
throw new NullPointerException("Null file");
} else if (size < 0) {
throw new IllegalArgumentException("Negative size: " + size);
}
yyReader = reader;
yyCount = 0;
yyEOF = false;
yyData = new char[size + 1];
yyColumns = new Column[size + 1];
Column c = newColumn();
c.file = file;
c.seenCR = false;
c.line = FIRST_LINE;
c.column = FIRST_COLUMN;
yyColumns[0] = c;
}
// -------------------------------------------------------------------------
/**
* Reset this parser to the specified index. This method discards
* the input and all memoized intermediate results up to and
* excluding the specified index. The index should be determined by
* accessing {@link SemanticValue#index} from a previous,
* <i>successful</i> parse (i.e., the result must be a {@link
* SemanticValue semantic value}).
*
* @param index The index.
* @throws IndexOutOfBoundsException Signals an invalid index.
*/
public final void resetTo(final int index) {
// Check the specified index.
if (0 > index) {
throw new IndexOutOfBoundsException("Parser index: " + index);
} else if (0 == index) {
// There's nothing to see here. Move on.
return;
} else if (index >= yyCount) {
throw new IndexOutOfBoundsException("Parser index: " + index);
}
// Get the column at the specified index (to make sure we have the
// corresponding location information) and construct its
// replacement.
Column c1 = column(index);
Column c2 = newColumn();
c2.file = c1.file;
c2.seenCR = c1.seenCR;
c2.line = c1.line;
c2.column = c1.column;
yyColumns[0] = c2;
// Next, shift any read-in characters.
final int length = yyCount - index;
System.arraycopy(yyData, index, yyData, 0, length);
// Next, clear the rest of the memoization table.
for (int i=length; i<yyCount; i++) {
yyData[i] = 0;
}
for (int i=1; i<yyCount; i++) {
yyColumns[i] = null;
}
// Finally, fix the count.
yyCount = length;
// Done.
}
// -------------------------------------------------------------------------
/**
* Grow the memoization table by the specified increment.
*
* @param incr The increment.
*/
private void growBy(int incr) {
char[] oldValues = yyData;
yyData = new char[oldValues.length + incr];
System.arraycopy(oldValues, 0, yyData, 0, oldValues.length);
Column[] oldColumns = yyColumns;
yyColumns = new Column[oldColumns.length + incr];
System.arraycopy(oldColumns, 0, yyColumns, 0, oldColumns.length);
}
// -------------------------------------------------------------------------
/**
* Create a new column. A concrete implementation of this method
* should simply return a new memoization table column.
*
* @return A new memoization table column.
*/
protected abstract Column newColumn();
/**
* Get the column at the specified index. If the column at the
* specified index has not been created yet, it is created as a
* side-effect of calling this method.
*
* @param index The index.
* @return The corresponding column.
* @throws IndexOutOfBoundsException Signals an invalid index.
*/
protected final Column column(final int index) {
// A memoized production may try to access the entry just past the
// current end of the table before the corresponding character has
// been read. Hence, we may need to grow the table.
if (yyColumns.length == index) growBy(INCR_SIZE);
// Note that the array access below will generate an index out of
// bounds exception for invalid indices.
Column c = yyColumns[index];
if (null != c) return c;
// Find the last non-null column.
Column last = null;
int start;
for (start=index; start>=0; start--) {
last = yyColumns[start];
if (null != last) break;
}
// Now, carry the location information forward.
int line = last.line;
int column = last.column;
boolean seenCR = last.seenCR;
for (int i=start; i<index; i++) {
switch (yyData[i]) {
case '\t':
column = ((column >> 3) + 1) << 3;
seenCR = false;
break;
case '\n':
if (! seenCR) {
line++;
column = FIRST_COLUMN;
}
seenCR = false;
break;
case '\r':
line++;
column = FIRST_COLUMN;
seenCR = true;
break;
default:
column++;
seenCR = false;
}
}
// Create the new column.
c = newColumn();
c.file = last.file;
c.seenCR = seenCR;
c.line = line;
c.column = column;
yyColumns[index] = c;
return c;
}
// -------------------------------------------------------------------------
/**
* Parse a character at the specified index.
*
* @param index The index.
* @return The character or -1 if the end-of-file has been reached.
* @throws IOException
* Signals an exceptional condition while accessing the character
* stream.
*/
protected final int character(final int index) throws IOException {
// Have we seen the end-of-file?
if (yyEOF) {
if (index < yyCount - 1) {
return yyData[index];
} else if (index < yyCount) {
return -1;
} else {
throw new IndexOutOfBoundsException("Parser index: " + index);
}
}
// Have we already read the desired character?
if (index < yyCount) {
return yyData[index];
} else if (index != yyCount) {
throw new IndexOutOfBoundsException("Parser index: " + index);
}
// Read another character.
final int c = yyReader.read();
final int incr = (-1 == c)? 1 : INCR_SIZE;
// Do we have enough space?
if (yyData.length <= yyCount) {
growBy(incr);
}
if (-1 == c) {
// Remember the end-of-file.
yyEOF = true;
} else {
// Remember the character.
yyData[index] = (char)c;
}
yyCount++;
// Done.
return c;
}
/**
* Get the difference between the specified indices.
*
* @param start The start index.
* @param end The end index.
* @return The difference as a string.
*/
protected final String difference(final int start, final int end) {
return (start==end)? "" : new String(yyData, start, end-start);
}
/**
* Determine whether the specified index represents the end-of-file.
*
* @param index The index.
* @return <code>true</code> if the specified index represents EOF.
*/
public final boolean isEOF(final int index) {
return yyEOF && (index == yyCount - 1);
}
/**
* Get the line at the specified index.
*
* @param index The index.
* @return The corresponding line, without any line terminating
* characters.
* @throws IndexOutOfBoundsException Signals an invalid index.
* @throws IOException Signals an I/O error.
*/
public final String lineAt(int index) throws IOException {
if (0 > index) {
throw new IndexOutOfBoundsException("Parser index: " + index);
}
// Normalize index for line terminating positions.
if ((0 < index) &&
('\n' == character(index)) &&
('\r' == character(index - 1))) {
index--;
}
int start = index;
int end = index;
int c;
// Find the end of the line.
c = character(end);
while ((-1 != c) && ('\r' != c) && ('\n' != c)) {
end++;
c = character(end);
}
// Find the start of the line.
while (true) {
if (0 == start) {
break;
}
c = character(start - 1);
if (('\r' == c) || ('\n' == c)) {
break;
}
start--;
}
// Done.
return difference(start, end);
}
// -------------------------------------------------------------------------
/**
* Get the location for the specified index.
*
* @param index The index.
* @return The corresponding location.
*/
public final Location location(final int index) {
final Column c = column(index);
return new Location(c.file, c.line, c.column, index, yyCount - 1);
}
/**
* Set the location for the specified index. This method updates
* the internal location based on, for example, a line marker
* recognized by the parser.
*
* <p />This method must be called before any nodes are created for
* positions at or beyond the specified index — unless the
* specified file, line, and column are the same as the internal
* location for the index. The line number may be one less than the
* start index for lines ({@link #FIRST_LINE}), to account for a
* line marker being present in the input. The column number is
* generally be expected to be the start index for columns ({@link
* #FIRST_COLUMN}), again accounting for a line marker being present
* in the input.
*
* @param index The index.
* @param file The new file name.
* @param line The new line number.
* @param column The new column number.
* @throws NullPointerException Signals a null file name.
* @throws IllegalArgumentException Signals an invalid line or
* column number.
* @throws IndexOutOfBoundsException Signals an invalid index.
* @throws IllegalStateException Signals that the index comes at or
* before any memoized results.
*/
protected final void setLocation(final int index, final String file,
final int line, final int column) {
// Check the file, line, and column.
if (null == file) {
throw new NullPointerException("Null file");
} else if (FIRST_LINE-1 > line) {
throw new IllegalArgumentException("Invalid line number: " + line);
} else if (FIRST_COLUMN > column) {
throw new IllegalArgumentException("Invalid column number: " + column);
}
// Make sure the index is valid.
if (index < 0 || yyCount <= index) {
throw new IndexOutOfBoundsException("Parser index: " + index);
}
// Detect repeated calls for the same location.
Column c = yyColumns[index];
if (null != c) {
if (file.equals(c.file) && line == c.line && column == c.column) {
// We ignore repeated calls for the same index and location.
return;
} else if (0 != index) {
// The first column always exists, so we can't signal for a 0 index.
throw new IllegalStateException("Location at index " + index +
" is already committed");
}
}
// Check that no further columns have been allocated.
for (int i=index+1; i<yyCount; i++) {
if (null != yyColumns[i]) {
throw new IllegalStateException("Location at index " + index +
" is already committed");
}
}
// Actually update the internal location. Note that we call
// column() instead of allocating the column directly to correctly
// carry forward the seenCR flag.
c = column(index);
c.file = file;
c.line = line;
c.column = column;
}
/**
* Set the location for the specified locatable object. This method
* is equivalent to:<pre>
* if ((null != locatable) && (! locatable.hasLocation())) {
* locatable.setLocation(location(index));
* }
* </pre>
*
* @param locatable The locatable object.
* @param index The index.
*/
public final void setLocation(final Locatable locatable, final int index) {
if ((null != locatable) && (! locatable.hasLocation())) {
Column c = column(index);
locatable.setLocation(new Location(c.file, c.line, c.column, index, yyCount - 1));
}
}
// -------------------------------------------------------------------------
/**
* Apply the specified actions on the specified seed. This method
* applies all {@link xtc.util.Action actions} on the specified
* list, using the result of the previous action as the argument to
* the next action. The argument to the first action is the
* specified seed. If the specified list is empty, this method
* simply returns the specified seed.
*
* @param actions The actions to apply.
* @param seed The initial argument.
* @return The result of applying the actions.
*/
protected final <T> T apply(Pair<Action<T>> actions, T seed) {
while (! actions.isEmpty()) {
seed = actions.head().run(seed);
actions = actions.tail();
}
return seed;
}
/**
* Apply the specified actions on the specified seed while also
* setting the results' locations. This method applies all {@link
* xtc.util.Action actions} on the specified list, using the result
* of the previous action as the argument to the next action. For
* the result of each application, it also sets the location. The
* argument to the first action is the specified seed. If the
* specified list is empty, this method simply returns the specified
* seed.
*
* @param actions The actions to apply.
* @param seed The initial argument.
* @param index The index representing the current parser location.
* @return The result of applying the actions.
*/
protected final <T extends Locatable> T apply(Pair<Action<T>> actions,
T seed, final int index) {
if (! actions.isEmpty()) {
final Location loc = location(index);
do {
seed = actions.head().run(seed);
seed.setLocation(loc);
actions = actions.tail();
} while (! actions.isEmpty());
}
return seed;
}
// -------------------------------------------------------------------------
/**
* Format the specified parse error. The specified error must have
* been created by this parser.
*
* @param error The error.
* @return The corresponding error message.
* @throws IOException Signals an I/O error while creating the error
* message.
*/
public final String format(ParseError error) throws IOException {
final StringBuilder buf = new StringBuilder();
// The error's location.
Column c = null;
if (-1 != error.index) {
c = column(error.index);
buf.append(c.file);
buf.append(':');
buf.append(c.line);
buf.append(':');
buf.append(c.column);
buf.append(": ");
}
// The error's actual message.
buf.append("error: ");
buf.append(error.msg);
// The error's line with a position marker.
if (-1 != error.index) {
final String line = lineAt(error.index);
final int size = line.length();
buf.append(NEWLINE);
for (int i=0; i<size; i++) buf.append(line.charAt(i));
buf.append(NEWLINE);
for (int i=FIRST_COLUMN; i<c.column; i++) buf.append(' ');
buf.append('^');
buf.append(NEWLINE);
}
// Done.
return buf.toString();
}
/**
* Signal the specified parse error as a parse exception. The
* specified error must have been created by this parser.
*
* @param error The parse error.
* @throws ParseException Signals the error.
* @throws IOException Signals an I/O error while creating the
* exception's detail message.
*/
public final void signal(ParseError error) throws ParseException,IOException {
throw new ParseException(format(error));
}
/**
* Extract the specified result's value. If the result is a {@link
* SemanticValue}, this method returns the actual value; if it is a
* {@link ParseError}, it signals a parse exception with the
* corresponding message. The specified result must have been
* created by this parser.
*
* @param r The result.
* @return The corresponding value.
* @throws ParseException Signals that the result represents a parse
* error.
* @throws IOException Signals an I/O error while creating the parse
* error's detail message.
*/
public final Object value(Result r) throws ParseException, IOException {
if (! r.hasValue()) signal(r.parseError());
return r.semanticValue();
}
// -------------------------------------------------------------------------
/**
* Get the next few characters from the specified index.
*
* @param index The index.
* @return The next few characters.
*/
protected final String peek(final int index) {
int limit = yyEOF? yyCount - 1 : yyCount;
if (index >= limit) return "";
limit = Math.min(index + 20, limit);
return new String(yyData, index, limit-index);
}
// -------------------------------------------------------------------------
/**
* Cast the specified object. This method is used to avoid spurious
* compiler warnings for parsers utilizing generic types.
*
* @param o The object.
* @return The cast object.
*/
@SuppressWarnings("unchecked")
protected static final <T> T cast(Object o) {
return (T)o;
}
/**
* Cast the list starting at the specified pair. This method is
* used to avoid spurious compiler warnings for parsers utilizing
* generic types.
*
* @param p The list.
* @return The cast list.
*/
@SuppressWarnings("unchecked")
protected static final <T> Pair<T> cast(Pair<?> p) {
return (Pair<T>)p;
}
}