* 10/16/2004
* RSyntaxDocument.java - A document capable of syntax highlighting, used by
* RSyntaxTextArea.
* This library is distributed under a modified BSD license. See the included
* RSyntaxTextArea.License.txt file for details.
package org.fife.ui.rsyntaxtextarea;
import java.awt.event.ActionEvent;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.Iterator;
import javax.swing.Action;
import javax.swing.event.*;
import javax.swing.text.*;
import org.fife.ui.rsyntaxtextarea.modes.AbstractMarkupTokenMaker;
import org.fife.ui.rtextarea.RDocument;
import org.fife.util.DynamicIntArray;
* The document used by {@link org.fife.ui.rsyntaxtextarea.RSyntaxTextArea}.
* This document is like <code>javax.swing.text.PlainDocument</code> except that
* it also keeps track of syntax highlighting in the document. It has a "style"
* attribute associated with it that determines how syntax highlighting is done
* (i.e., what language is being highlighted).<p>
* Instances of <code>RSyntaxTextArea</code> will only accept instances of
* <code>RSyntaxDocument</code>, since it is this document that keeps
* track of syntax highlighting. All others will cause an exception to be
* thrown.<p>
* To change the language being syntax highlighted at any time, you merely have
* to call {@link #setSyntaxStyle}. Other than that, this document can be
* treated like any other save one caveat: all <code>DocumentEvent</code>s of
* type <code>CHANGE</code> use their offset and length values to represent the
* first and last lines, respectively, that have had their syntax coloring
* change. This is really a hack to increase the speed of the painting code
* and should really be corrected, but oh well.
* @author Robert Futrell
* @version 1.0
public class RSyntaxDocument extends RDocument implements Iterable<Token>,
SyntaxConstants {
* Creates a {@link TokenMaker} appropriate for a given programming
* language.
private transient TokenMakerFactory tokenMakerFactory;
* Splits text into tokens for the current programming language.
private transient TokenMaker tokenMaker;
* The current syntax style. Only cached to keep this class serializable.
private String syntaxStyle;
* Array of values representing the "last token type" on each line. This
* is used in cases such as multi-line comments: if the previous line
* ended with an (unclosed) multi-line comment, we can use this knowledge
* and start the current line's syntax highlighting in multi-line comment
* state.
protected transient DynamicIntArray lastTokensOnLines;
private transient int lastLine = -1;
private transient Token cachedTokenList;
private transient int useCacheCount = 0;
private transient int tokenRetrievalCount = 0;
private transient Segment s;
* If this is set to <code>true</code>, debug information about how much
* token caching is helping is printed to stdout.
private static final boolean DEBUG_TOKEN_CACHING = false;
* Constructs a plain text document. A default root element is created,
* and the tab size set to 5.
* @param syntaxStyle The syntax highlighting scheme to use.
public RSyntaxDocument(String syntaxStyle) {
this(null, syntaxStyle);
* Constructs a plain text document. A default root element is created,
* and the tab size set to 5.
* @param tmf The <code>TokenMakerFactory</code> for this document. If
* this is <code>null</code>, a default factory is used.
* @param syntaxStyle The syntax highlighting scheme to use.
public RSyntaxDocument(TokenMakerFactory tmf, String syntaxStyle) {
putProperty(tabSizeAttribute, Integer.valueOf(5));
lastTokensOnLines = new DynamicIntArray(400);
lastTokensOnLines.add(Token.NULL); // Initial (empty) line.
s = new Segment();
* Alerts all listeners to this document of an insertion. This is
* overridden so we can update our syntax highlighting stuff.<p>
* The syntax highlighting stuff has to be here instead of in
* <code>insertUpdate</code> because <code>insertUpdate</code> is not
* called by the undo/redo actions, but this method is.
* @param e The change.
protected void fireInsertUpdate(DocumentEvent e) {
cachedTokenList = null;
* Now that the text is actually inserted into the content and
* element structure, we can update our token elements and "last
* tokens on lines" structure.
Element lineMap = getDefaultRootElement();
DocumentEvent.ElementChange change = e.getChange(lineMap);
Element[] added = change==null ? null : change.getChildrenAdded();
int numLines = lineMap.getElementCount();
int line = lineMap.getElementIndex(e.getOffset());
int previousLine = line - 1;
int previousTokenType = (previousLine>-1 ?
lastTokensOnLines.get(previousLine) : Token.NULL);
// If entire lines were added...
if (added!=null && added.length>0) {
Element[] removed = change.getChildrenRemoved();
int numRemoved = removed!=null ? removed.length : 0;
int endBefore = line + added.length - numRemoved;
//System.err.println("... adding lines: " + line + " - " + (endBefore-1));
//System.err.println("... ... added: " + added.length + ", removed:" + numRemoved);
for (int i=line; i<endBefore; i++) {
setSharedSegment(i); // Loads line i's text into s.
int tokenType = tokenMaker.getLastTokenTypeOnLine(s, previousTokenType);
lastTokensOnLines.add(i, tokenType);
//System.err.println("--------- lastTokensOnLines.size() == " + lastTokensOnLines.getSize());
previousTokenType = tokenType;
} // End of for (int i=line; i<endBefore; i++).
// Update last tokens for lines below until they stop changing.
updateLastTokensBelow(endBefore, numLines, previousTokenType);
} // End of if (added!=null && added.length>0).
// Otherwise, text was inserted on a single line...
else {
// Update last tokens for lines below until they stop changing.
updateLastTokensBelow(line, numLines, previousTokenType);
} // End of else.
// Let all listeners know about the insertion.
* This method is called AFTER the content has been inserted into the
* document and the element structure has been updated.<p>
* The syntax-highlighting updates need to be done here (as opposed to
* an override of <code>postRemoveUpdate</code>) as this method is called
* in response to undo/redo events, whereas <code>postRemoveUpdate</code>
* is not.<p>
* Now that the text is actually inserted into the content and element
* structure, we can update our token elements and "last tokens on
* lines" structure.
* @param chng The change that occurred.
* @see #removeUpdate
protected void fireRemoveUpdate(DocumentEvent chng) {
cachedTokenList = null;
Element lineMap = getDefaultRootElement();
int numLines = lineMap.getElementCount();
DocumentEvent.ElementChange change = chng.getChange(lineMap);
Element[] removed = change==null ? null : change.getChildrenRemoved();
// If entire lines were removed...
if (removed!=null && removed.length>0) {
int line = change.getIndex(); // First line entirely removed.
int previousLine = line - 1; // Line before that.
int previousTokenType = (previousLine>-1 ?
lastTokensOnLines.get(previousLine) : Token.NULL);
Element[] added = change.getChildrenAdded();
int numAdded = added==null ? 0 : added.length;
// Remove the cached last-token values for the removed lines.
int endBefore = line + removed.length - numAdded;
//System.err.println("... removing lines: " + line + " - " + (endBefore-1));
//System.err.println("... added: " + numAdded + ", removed: " + removed.length);
lastTokensOnLines.removeRange(line, endBefore); // Removing values for lines [line-(endBefore-1)].
//System.err.println("--------- lastTokensOnLines.size() == " + lastTokensOnLines.getSize());
// Update last tokens for lines below until they've stopped changing.
updateLastTokensBelow(line, numLines, previousTokenType);
} // End of if (removed!=null && removed.size()>0).
// Otherwise, text was removed from just one line...
else {
int line = lineMap.getElementIndex(chng.getOffset());
if (line>=lastTokensOnLines.getSize())
return; // If we're editing the last line in a document...
int previousLine = line - 1;
int previousTokenType = (previousLine>-1 ?
lastTokensOnLines.get(previousLine) : Token.NULL);
//System.err.println("previousTokenType for line : " + previousLine + " is " + previousTokenType);
// Update last tokens for lines below until they've stopped changing.
updateLastTokensBelow(line, numLines, previousTokenType);
// Let all of our listeners know about the removal.
* Returns the closest {@link TokenTypes "standard" token type} for a given
* "internal" token type (e.g. one whose value is <code>< 0</code>).
* @param type The token type.
* @return The closest "standard" token type. If a mapping is not defined
* for this language, then <code>type</code> is returned.
public int getClosestStandardTokenTypeForInternalType(int type) {
return tokenMaker.getClosestStandardTokenTypeForInternalType(type);
* Returns whether closing markup tags should be automatically completed.
* This method only returns <code>true</code> if
* {@link #getLanguageIsMarkup()} also returns <code>true</code>.
* @return Whether markup closing tags should be automatically completed.
* @see #getLanguageIsMarkup()
public boolean getCompleteMarkupCloseTags() {
// TODO: Remove terrible dependency on AbstractMarkupTokenMaker
return getLanguageIsMarkup() &&
* Returns whether the current programming language uses curly braces
* ('<code>{</code>' and '<code>}</code>') to denote code blocks.
* @param languageIndex The language index at the offset in question.
* Since some <code>TokenMaker</code>s effectively have nested
* languages (such as JavaScript in HTML), this parameter tells the
* <code>TokenMaker</code> what sub-language to look at.
* @return Whether curly braces denote code blocks.
public boolean getCurlyBracesDenoteCodeBlocks(int languageIndex) {
return tokenMaker.getCurlyBracesDenoteCodeBlocks(languageIndex);
* Returns whether the current language is a markup language, such as
* @return Whether the current language is a markup language.
public boolean getLanguageIsMarkup() {
return tokenMaker.isMarkupLanguage();
* Returns the token type of the last token on the given line.
* @param line The line to inspect.
* @return The token type of the last token on the specified line. If
* the line is invalid, an exception is thrown.
public int getLastTokenTypeOnLine(int line) {
return lastTokensOnLines.get(line);
* Returns the text to place at the beginning and end of a
* line to "comment" it in this programming language.
* @return The start and end strings to add to a line to "comment"
* it out. A <code>null</code> value for either means there
* is no string to add for that part. A value of
* <code>null</code> for the array means this language
* does not support commenting/uncommenting lines.
public String[] getLineCommentStartAndEnd(int languageIndex) {
return tokenMaker.getLineCommentStartAndEnd(languageIndex);
* Returns whether tokens of the specified type should have "mark
* occurrences" enabled for the current programming language.
* @param type The token type.
* @return Whether tokens of this type should have "mark occurrences"
* enabled.
boolean getMarkOccurrencesOfTokenType(int type) {
return tokenMaker.getMarkOccurrencesOfTokenType(type);
* Returns the occurrence marker for the current language.
* @return The occurrence marker.
OccurrenceMarker getOccurrenceMarker() {
return tokenMaker.getOccurrenceMarker();
* This method returns whether auto indentation should be done if Enter
* is pressed at the end of the specified line.
* @param line The line to check.
* @return Whether an extra indentation should be done.
public boolean getShouldIndentNextLine(int line) {
Token t = getTokenListForLine(line);
t = t.getLastNonCommentNonWhitespaceToken();
return tokenMaker.getShouldIndentNextLineAfter(t);
* Returns a token list for the specified segment of text representing
* the specified line number. This method is basically a wrapper for
* <code>tokenMaker.getTokenList</code> that takes into account the last
* token on the previous line to assure token accuracy.
* @param line The line number of <code>text</code> in the document, >= 0.
* @return A token list representing the specified line.
public final Token getTokenListForLine(int line) {
if (line==lastLine && cachedTokenList!=null) {
System.err.println("--- Using cached line; ratio now: " +
useCacheCount + "/" + tokenRetrievalCount);
return cachedTokenList;
lastLine = line;
Element map = getDefaultRootElement();
Element elem = map.getElement(line);
int startOffset = elem.getStartOffset();
//int endOffset = (line==map.getElementCount()-1 ? elem.getEndOffset() - 1:
// elem.getEndOffset() - 1);
int endOffset = elem.getEndOffset() - 1; // Why always "-1"?
try {
getText(startOffset,endOffset-startOffset, s);
} catch (BadLocationException ble) {
return null;
int initialTokenType = line==0 ? Token.NULL :
//return tokenMaker.getTokenList(s, initialTokenType, startOffset);
cachedTokenList = tokenMaker.getTokenList(s, initialTokenType, startOffset);
return cachedTokenList;
boolean insertBreakSpecialHandling(ActionEvent e) {
Action a = tokenMaker.getInsertBreakAction();
if (a!=null) {
return true;
return false;
* Returns whether a character could be part of an "identifier" token
* in a specific language. This is used to identify such things as the
* bounds of the "word" to select on double-clicking.
* @param languageIndex The language index the character was found in.
* @param ch The character.
* @return Whether the character could be part of an "identifier" token.
public boolean isIdentifierChar(int languageIndex, char ch) {
return tokenMaker.isIdentifierChar(languageIndex, ch);
* Returns an iterator over the paintable tokens in this document. Results
* are undefined if this document is modified while the iterator is being
* iterated through, so this should only be used on the EDT.<p>
* The <code>remove()</code> method of the returned iterator will throw
* an <code>UnsupportedOperationException</code>.
* @return An iterator.
public Iterator<Token> iterator() {
return new TokenIterator(this);
* Deserializes a document.
* @param in The stream to read from.
* @throws ClassNotFoundException
* @throws IOException
private void readObject(ObjectInputStream in)
throws ClassNotFoundException, IOException {
// Install default TokenMakerFactory. To support custom TokenMakers,
// both JVM's should install default TokenMakerFactories that support
// the language they want to use beforehand.
// Handle other transient stuff
this.s = new Segment();
int lineCount = getDefaultRootElement().getElementCount();
lastTokensOnLines = new DynamicIntArray(lineCount);
setSyntaxStyle(syntaxStyle); // Actually install (transient) TokenMaker
* Makes our private <code>Segment s</code> point to the text in our
* document referenced by the specified element. Note that
* <code>line</code> MUST be a valid line number in the document.
* @param line The line number you want to get.
private final void setSharedSegment(int line) {
Element map = getDefaultRootElement();
//int numLines = map.getElementCount();
Element element = map.getElement(line);
if (element==null)
throw new InternalError("Invalid line number: " + line);
int startOffset = element.getStartOffset();
//int endOffset = (line==numLines-1 ?
// element.getEndOffset()-1 : element.getEndOffset() - 1);
int endOffset = element.getEndOffset()-1; // Why always "-1"?
try {
getText(startOffset, endOffset-startOffset, s);
} catch (BadLocationException ble) {
throw new InternalError("Text range not in document: " +
startOffset + "-" + endOffset);
* Sets the syntax style being used for syntax highlighting in this
* document. What styles are supported by a document is determined by its
* {@link TokenMakerFactory}. By default, all <code>RSyntaxDocument</code>s
* support all languages built into <code>RSyntaxTextArea</code>.
* @param styleKey The new style to use, such as
* {@link SyntaxConstants#SYNTAX_STYLE_JAVA}. If this style is not
* known or supported by this document, then
* {@link SyntaxConstants#SYNTAX_STYLE_NONE} is used.
* @see #setSyntaxStyle(TokenMaker)
public void setSyntaxStyle(String styleKey) {
tokenMaker = tokenMakerFactory.getTokenMaker(styleKey);
this.syntaxStyle = styleKey;
* Sets the syntax style being used for syntax highlighting in this
* document. You should call this method if you've created a custom token
* maker for a language not normally supported by
* <code>RSyntaxTextArea</code>.
* @param tokenMaker The new token maker to use.
* @see #setSyntaxStyle(String)
public void setSyntaxStyle(TokenMaker tokenMaker) {
this.tokenMaker = tokenMaker;
* Sets the token maker factory used by this document.
* @param tmf The <code>TokenMakerFactory</code> for this document. If
* this is <code>null</code>, a default factory is used.
public void setTokenMakerFactory(TokenMakerFactory tmf) {
tokenMakerFactory = tmf!=null ? tmf :
* Loops through the last-tokens-on-lines array from a specified point
* onward, updating last-token values until they stop changing. This
* should be called when lines are updated/inserted/removed, as doing
* so may cause lines below to change color.
* @param line The first line to check for a change in last-token value.
* @param numLines The number of lines in the document.
* @param previousTokenType The last-token value of the line just before
* <code>line</code>.
* @return The last line that needs repainting.
private int updateLastTokensBelow(int line, int numLines, int previousTokenType) {
int firstLine = line;
// Loop through all lines past our starting point. Update even the last
// line's info, even though there aren't any lines after it that depend
// on it changing for them to be changed, as its state may be used
// elsewhere in the library.
int end = numLines;
//System.err.println("--- end==" + end + " (numLines==" + numLines + ")");
while (line<end) {
setSharedSegment(line); // Sets s's text to that of line 'line' in the document.
int oldTokenType = lastTokensOnLines.get(line);
int newTokenType = tokenMaker.getLastTokenTypeOnLine(s, previousTokenType);
//System.err.println("---------------- line " + line + "; oldTokenType==" + oldTokenType + ", newTokenType==" + newTokenType + ", s=='" + s + "'");
// If this line's end-token value didn't change, stop here. Note
// that we're saying this line needs repainting; this is because
// the beginning of this line did indeed change color, but the
// end didn't.
if (oldTokenType==newTokenType) {
//System.err.println("... ... ... repainting lines " + firstLine + "-" + line);
fireChangedUpdate(new DefaultDocumentEvent(firstLine, line, DocumentEvent.EventType.CHANGE));
return line;
// If the line's end-token value did change, update it and
// keep going.
// NOTE: "setUnsafe" is okay here as the bounds checking was
// already done in lastTokensOnLines.get(line) above.
lastTokensOnLines.setUnsafe(line, newTokenType);
previousTokenType = newTokenType;
} // End of while (line<numLines).
// If any lines had their token types changed, fire a changed update
// for them. The view will repaint the area covered by the lines.
// FIXME: We currently cheat and send the line range that needs to be
// repainted as the "offset and length" of the change, since this is
// what the view needs. We really should send the actual offset and
// length.
if (line>firstLine) {
//System.err.println("... ... ... repainting lines " + firstLine + "-" + line);
fireChangedUpdate(new DefaultDocumentEvent(firstLine, line,
return line;
* Updates internal state information; e.g. the "last tokens on lines"
* data. After this, a changed update is fired to let listeners know that
* the document's structure has changed.<p>
* This is called internally whenever the syntax style changes.
private void updateSyntaxHighlightingInformation() {
// Reinitialize the "last token on each line" array. Note that since
// the actual text in the document isn't changing, the number of lines
// is the same.
Element map = getDefaultRootElement();
int numLines = map.getElementCount();
int lastTokenType = Token.NULL;
for (int i=0; i<numLines; i++) {
lastTokenType = tokenMaker.getLastTokenTypeOnLine(s, lastTokenType);
lastTokensOnLines.set(i, lastTokenType);
// Clear our token cache to force re-painting
lastLine = -1;
cachedTokenList = null;
// Let everybody know that syntax styles have (probably) changed.
fireChangedUpdate(new DefaultDocumentEvent(
0, numLines-1, DocumentEvent.EventType.CHANGE));