package net.java.textilej.parser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.SortedMap;
import java.util.Stack;
import java.util.TreeMap;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.java.textilej.parser.DocumentBuilder.BlockType;
import net.java.textilej.parser.DocumentBuilder.SpanType;
import net.java.textilej.parser.builder.HtmlDocumentBuilder;
import net.java.textilej.parser.outline.OutlineItem;
import net.java.textilej.parser.outline.OutlineParser;
import net.java.textilej.parser.tag.BlockTagProcessor;
import net.java.textilej.parser.tag.Dialect;
import net.java.textilej.parser.tag.PhraseModifierProcessor;
import net.java.textilej.parser.tag.ReplacementTokenProcessor;
import net.java.textilej.parser.tag.TextileParserServices;
/**
* A parser that parses basic <a href="http://en.wikipedia.org/wiki/Textile_(markup_language)">Textile markup</a> and converts it to HTML.
*
* Based on the spec available at <a href="http://textile.thresholdstate.com/">http://textile.thresholdstate.com/</a>,
* supports basic phrase modifiers, block modifiers, attributes, footnotes, and some punctuation.
*
* Notable exceptions to supported textile syntax is raw html.
*
* Additionally supported are <code>{toc}</code> and <code>{glossary}</code>.
*
* The supported syntax of the parser may be augmented by {@link #setDialect(Dialect) setting a dialect}.
* Without adding a dialect the base syntax of the parser supports true Textile markup, with the addition of the following:
* <ol>
* <li>Support for {toc} and {glossary}</li>
* <li>lists that start with '-'</li>
* <li>Confluence-style table headers</li>
* </ol>
*
* @author dgreen
*/
public class TextileParser {
private static final String IMAGE = "img";
private static class ParserState {
private ParserState() {}
private String textile;
private Map<String, String> footnoteIdToHtmlId = new HashMap<String, String>();
private Stack<ElementState> elements = new Stack<ElementState>();
private IdGenerator idGenerator = new IdGenerator();
private Map<String,String> glossaryItems = new HashMap<String, String>();
}
private DocumentBuilder builder;
private Dialect dialect;
private ParserState state;
private TextileParserServices services;
public TextileParser() {
}
public DocumentBuilder getBuilder() {
return builder;
}
public void setBuilder(DocumentBuilder builder) {
this.builder = builder;
}
public Dialect getDialect() {
return dialect;
}
public void setDialect(Dialect dialect) {
this.dialect = dialect;
if (dialect != null) {
services = new TextileParserServices() {
public void emitText(String textileMarkup) {
TextileParser.this.emitText(textileMarkup);
}
public void emitTextileLine(String textileLine) {
TextileParser.this.emitTextileLine(textileLine);
}
};
}
}
/**
* parse the given textile string and produce the result as an HTML document.
*
* @param textile the textile to parse
*
* @return the HTML document text.
*/
public String parseToHtml(String textile) {
if (builder != null) {
throw new IllegalStateException("Builder must not be set");
}
StringWriter out = new StringWriter();
setBuilder(new HtmlDocumentBuilder(out));
parse(textile);
return out.toString();
}
/**
* parse the given Textile markup string and emit the results as an HTML document to
* the given writer. The given writer is closed upon return of this function.
*
* @param textile the Textile markup
*/
public void parse(String textile) {
parse(textile,true);
}
/**
* parse the given Textile markup string and emit the results as an HTML document to
* the given writer. The given writer is closed upon return of this function.
*
* if <code>asDocument</code> is specified, the {@link #getBuilder() builder} is treated as a document
* ({@link DocumentBuilder#beginDocument()} and {@link DocumentBuilder#endDocument()} are called).
*
* @param textile the Textile markup
* @param asDocument if true, the {@link #getBuilder() builder} is treated as a document
*/
public void parse(String textile,boolean asDocument) {
if (builder == null) {
throw new IllegalStateException("Must set builder");
}
if (asDocument) {
builder.beginDocument();
}
parseToContent(textile);
if (asDocument) {
builder.endDocument();
}
}
static final String REGEX_TEXTILE_CLASS = "(?:\\(([^#\\)]+)\\))";
static final String REGEX_TEXTILE_ID = "(?:\\(#([^\\)]+)\\))";
static final String REGEX_TEXTILE_STYLE = "(?:\\{([^\\}]+)\\})";
static final String REGEX_LANGUAGE = "(?:\\[([^\\]]+)\\])";
static final String REGEX_ATTRIBUTES = "(?:"+REGEX_TEXTILE_CLASS+"|"+REGEX_TEXTILE_ID+"|"+REGEX_TEXTILE_STYLE+"|"+REGEX_LANGUAGE+"){0,4}";
private static final String[] blockModifiers1 = new String[] {
"h[0-6]", // heading
"bq", // Blockquote
"fn\\d\\d?",// Footnote
"p", // paragraph
"bc", // Block code
"pre", // Pre-formatted
"table", // table
};
private static final Pattern BLOCK_MODIFIERS;
static {
StringBuilder regex = new StringBuilder();
regex.append("^(");
int itemNumber = 0;
for (String bm: blockModifiers1) {
if (itemNumber++ != 0) {
regex.append("|");
}
regex.append("(?:");
regex.append(bm);
regex.append(")");
}
regex.append(")");
regex.append(REGEX_ATTRIBUTES);
regex.append("\\.(\\.)?(?:\\s(.*))?$");
BLOCK_MODIFIERS = Pattern.compile(regex.toString());
}
private static final Pattern BLOCK_START_TABLE_ROW = Pattern.compile("^\\|(.*)?(\\|\\s*$)");
private static final Pattern LIST_BLOCK_MODIFIERS = Pattern.compile("^((?:(?:\\*)|(?:#)|(?:-))+)"+REGEX_ATTRIBUTES+"\\s(.*+)$");
private static final Pattern GENERATIVE_EMITTER_PATTERN = Pattern.compile("^\\s*\\{(toc|glossary)(:[^\\}]*)?\\}\\s*$");
private static final Map<String,SpanType> elementNameToSpanType = new HashMap<String, SpanType>();
static {
elementNameToSpanType.put("b",SpanType.BOLD);
elementNameToSpanType.put("cite",SpanType.CITATION);
elementNameToSpanType.put("i",SpanType.ITALIC);
elementNameToSpanType.put("em",SpanType.EMPHASIS);
elementNameToSpanType.put("strong",SpanType.STRONG);
elementNameToSpanType.put("del",SpanType.DELETED);
elementNameToSpanType.put("ins",SpanType.INSERTED);
elementNameToSpanType.put("sup",SpanType.SUPERSCRIPT);
elementNameToSpanType.put("sub",SpanType.SUBSCRIPT);
elementNameToSpanType.put("span",SpanType.SPAN);
elementNameToSpanType.put("code",SpanType.CODE);
}
private static final Map<String,String> textileCharToElementName = new LinkedHashMap<String, String>(); // MUST use LinkedHashMap
private static final Pattern PHRASE_MODIFIERS;
private static final int PHRASE_MODIFIER_PATTERN_ESCAPE;
private static final int PHRASE_MODIFIER_PATTERN_HTML_ENDTAG;
private static final int PHRASE_MODIFIER_PATTERN_HTML_STARTTAG;
private static final int PHRASE_MODIFIER_PATTERN_BYELEM;
static {
// ORDER DEPENDENCY
textileCharToElementName.put("**","b");
textileCharToElementName.put("??","cite");
textileCharToElementName.put("__","i");
textileCharToElementName.put("_","em");
textileCharToElementName.put("*","strong");
textileCharToElementName.put("-","del");
textileCharToElementName.put("+","ins");
textileCharToElementName.put("^","sup");
textileCharToElementName.put("~","sub");
textileCharToElementName.put("%","span");
textileCharToElementName.put("@","code");
textileCharToElementName.put("!",IMAGE);
int group = 0;
StringBuilder buf = new StringBuilder();
PHRASE_MODIFIER_PATTERN_HTML_ENDTAG = ++group;
buf.append("(</[a-zA-Z][a-zA-Z0-9_:-]*\\s*>)");
PHRASE_MODIFIER_PATTERN_HTML_STARTTAG = ++group;
buf.append("|(<[a-zA-Z][a-zA-Z0-9_:-]*(?:\\s*[a-zA-Z][a-zA-Z0-9_:-]*=\"[^\"]*\")*\\s*/?>)");
// a phrase modifier always starts at the start of a line or after a non-word character
buf.append("|(?<=^|\\W)");
PHRASE_MODIFIER_PATTERN_ESCAPE = ++group;
++group; // occupies two spaces
buf.append("(?:(?:(==)(.*)?\\"+PHRASE_MODIFIER_PATTERN_ESCAPE+")");
PHRASE_MODIFIER_PATTERN_BYELEM = ++group;
buf.append("|(");
int itemNumber = 0;
for (Map.Entry<String,String> ent: textileCharToElementName.entrySet()) {
if (itemNumber++ > 0) {
buf.append("|");
}
String delimiters = ent.getKey();
buf.append(Pattern.quote(delimiters));
}
buf.append(")");
buf.append(REGEX_ATTRIBUTES);
buf.append("(\\S(?:.*?\\S)?)");
buf.append("\\"+PHRASE_MODIFIER_PATTERN_BYELEM);
buf.append("(?=[^a-zA-Z_0-9]|$))");
PHRASE_MODIFIERS = Pattern.compile(buf.toString());
}
private static final Pattern HYPERLINK_SUFFIX = Pattern.compile(":([^\\s]+).*");
private static final Pattern REPLACEMENT_PATTERN;
private static final int REPLACEMENT_PATTERN_HYPERLINK;
private static final int REPLACEMENT_PATTERN_FOOTNOTE_GROUP;
private static final int REPLACEMENT_PATTERN_QUOTE_GROUP;
private static final int REPLACEMENT_PATTERN_DBLQUOTE_GROUP;
private static final int REPLACEMENT_PATTERN_APOS_GROUP;
private static final int REPLACEMENT_PATTERN_EMDASH_GROUP;
private static final int REPLACEMENT_PATTERN_ENDASH_GROUP;
private static final int REPLACEMENT_PATTERN_MUL_GROUP;
private static final int REPLACEMENT_PATTERN_ACRONYM_GROUP;
private static final int REPLACEMENT_PATTERN_TOC_OPTIONS_GROUP;
private static final int REPLACEMENT_PATTERN_GLOSS_OPTIONS_GROUP;
private static final Map<String,Emitter> replacements = new HashMap<String, Emitter>();
static {
replacements.put("(tm)", new EntityReference("#8482"));
replacements.put("(TM)", new EntityReference("#8482"));
replacements.put("(c)", new EntityReference("#169"));
replacements.put("(C)", new EntityReference("#169"));
replacements.put("(r)", new EntityReference("#174"));
replacements.put("(R)", new EntityReference("#174"));
StringBuilder buf = new StringBuilder();
buf.append("(");
int index = 0;
for (String token: replacements.keySet()) {
if (index++ != 0) {
buf.append("|");
}
buf.append("(?:");
buf.append(Pattern.quote(token));
buf.append(")");
}
int group = 1;
buf.append("|(?:\\{toc:?(?:([^\\}]+))?\\})");
REPLACEMENT_PATTERN_TOC_OPTIONS_GROUP = ++group;
buf.append("|(?:\\{glossary:?(?:([^\\}]+))?\\})");
REPLACEMENT_PATTERN_GLOSS_OPTIONS_GROUP = ++group;
buf.append(")");
REPLACEMENT_PATTERN_HYPERLINK = ++group;
group += 2; // occupies three groups
buf.append("|(?:(\"|\\!)([^\"]+)\\"+REPLACEMENT_PATTERN_HYPERLINK+":([^\\s]+))");
REPLACEMENT_PATTERN_FOOTNOTE_GROUP = ++group;
buf.append("|(?:\\[(\\d+)\\])");
REPLACEMENT_PATTERN_DBLQUOTE_GROUP = ++group;
buf.append("|(?:(?<=\\W|^)\"([^\"]+)\"(?=\\W))");
REPLACEMENT_PATTERN_QUOTE_GROUP = ++group;
buf.append("|(?:(?<=\\W|^)'([^\"]+)'(?=\\W))");
REPLACEMENT_PATTERN_APOS_GROUP = ++group;
buf.append("|(?:(?<=\\w)(')(?=\\w))");
REPLACEMENT_PATTERN_EMDASH_GROUP = ++group;
buf.append("|(?:(?<=\\w\\s)(--)(?=\\s\\w))");
REPLACEMENT_PATTERN_ENDASH_GROUP = ++group;
buf.append("|(?:(?<=\\w\\s)(-)(?=\\s\\w))");
REPLACEMENT_PATTERN_MUL_GROUP = ++group;
buf.append("|(?:(?<=\\d\\s)(x)(?=\\s\\d))");
REPLACEMENT_PATTERN_ACRONYM_GROUP = ++group;
++group; // this one occupies two groups
buf.append("|(?:(?<=\\W|^)([A-Z]+)\\(([^\\)]+)\\))");
REPLACEMENT_PATTERN = Pattern.compile(buf.toString());
}
static final Pattern TABLE_ROW_PATTERN = Pattern.compile("\\|((?:\\<\\>)|\\<|\\>|\\^)?"+REGEX_ATTRIBUTES+"(_|\\|)?\\.?\\s?([^\\|]*)(\\|\\|?\\s*$)?");
private void parseToContent(String textile) {
if (textile == null || textile.length() == 0) {
return;
}
if (state != null) {
throw new IllegalStateException("The Textile parser is not reentrant and is not thread safe");
}
try {
state = new ParserState();
state.textile = textile;
BufferedReader reader = new BufferedReader(new StringReader(textile));
String textileLine;
int lineNumber = 0;
BlockType extendedBlockType = null;
int extendedBlockNewlines = 0;
boolean extendedTagRequired = false;
while ((textileLine = reader.readLine()) != null) {
++lineNumber;
builder.textileLine(lineNumber,textileLine);
if (textileLine.length() == 0) {
if (extendedBlockType == null) {
endBlock(state.elements);
} else if (extendedBlockType.isPreformatted()) {
++extendedBlockNewlines;
} else {
if (!extendedTagRequired) {
extendedTagRequired = true;
pop(state.elements,1);
}
}
continue;
}
boolean startedNewBlock = false;
boolean table = inTable(state.elements);
if (!table && BLOCK_START_TABLE_ROW.matcher(textileLine).matches()) {
endBlock(state.elements);
state.elements.push(new BlockState(builder,BlockType.TABLE,new Attributes()));
table = true;
}
if (table) {
state.elements.push(new BlockState(builder,BlockType.TABLE_ROW,new Attributes()));
Matcher rowMatcher = TABLE_ROW_PATTERN.matcher(textileLine);
while (rowMatcher.find()) {
int start = rowMatcher.start();
if (start == textileLine.length()-1) {
break;
}
String alignment = rowMatcher.group(1);
String headerIndicator = rowMatcher.group(6);
String text = rowMatcher.group(7);
boolean header = headerIndicator != null && ("_".equals(headerIndicator) || "|".equals(headerIndicator));
String textAlign = null;
if (alignment != null) {
if (alignment.equals("<>")) {
textAlign = "text-align: center;";
} else if (alignment.equals(">")) {
textAlign = "text-align: right;";
} else if (alignment.equals("<")) {
textAlign = "text-align: left;";
} else if (alignment.equals("^")) {
textAlign = "text-align: top;";
}
}
state.elements.push(new BlockState(builder,header?BlockType.TABLE_CELL_HEADER:BlockType.TABLE_CELL_NORMAL,createAttributes(rowMatcher, 2,null,null,textAlign)));
emitTextileLine(text);
pop(state.elements,1);
}
pop(state.elements,1);
} else {
Matcher listBlockMatcher = LIST_BLOCK_MODIFIERS.matcher(textileLine);
boolean lineContentProcessed = false;
if (listBlockMatcher.matches()) {
extendedBlockType = null;
extendedTagRequired = false;
String listType = listBlockMatcher.group(1);
textileLine = listBlockMatcher.group(6);
int level = listType.length();
boolean numeric = listType.charAt(level-1) == '#';
adjustList(state.elements,numeric,level,listBlockMatcher,2);
startedNewBlock = true;
} else {
Matcher blockModifierMatcher = BLOCK_MODIFIERS.matcher(textileLine);
if (blockModifierMatcher.matches()) {
// end any previous block
extendedBlockType = null;
extendedTagRequired = false;
endBlock(state.elements);
startedNewBlock = true;
String modifierType = blockModifierMatcher.group(1);
String extended = blockModifierMatcher.group(6);
textileLine = blockModifierMatcher.group(7);
//
// System.out.println("groups: "+blockModifierMatcher.groupCount());
// for (int x = 1;x<=blockModifierMatcher.groupCount();++x) {
// System.out.println("\t"+x+": "+blockModifierMatcher.group(x));
// }
if (modifierType.startsWith("h")) {
int level = Integer.parseInt(modifierType.substring(1));
state.elements.push(new HeadingState(builder,level,createAttributes(blockModifierMatcher,2),textileLine));
} else if (modifierType.equals("p")) {
state.elements.push(new BlockState(builder,BlockType.PARAGRAPH,createAttributes(blockModifierMatcher,2)));
} else if (modifierType.equals("pre")) {
state.elements.push(new BlockState(builder,BlockType.PREFORMATTED,createAttributes(blockModifierMatcher,2)));
} else if (modifierType.equals("bq")) {
state.elements.push(new BlockState(builder,BlockType.QUOTE,createAttributes(blockModifierMatcher,2)));
state.elements.push(new BlockState(builder,BlockType.PARAGRAPH,new Attributes()));
if (extended != null) {
extendedBlockType = BlockType.PARAGRAPH;
}
} else if (modifierType.equals("bc")) {
state.elements.push(new BlockState(builder,BlockType.PREFORMATTED,createAttributes(blockModifierMatcher,2)));
state.elements.push(new BlockState(builder,BlockType.CODE,new Attributes()));
if (extended != null) {
extendedBlockType = BlockType.CODE;
}
} else if (modifierType.startsWith("fn")) {
String fnId = modifierType.substring(2);
String elementId = getFootnoteId(state.footnoteIdToHtmlId, fnId);
state.elements.push(new BlockState(builder,BlockType.PARAGRAPH,createAttributes(blockModifierMatcher,2,elementId,"footnote",null)));
builder.beginSpan(SpanType.SUPERSCRIPT, new Attributes());
emitText(fnId);
builder.endSpan();
} else if (modifierType.equals("table")) {
state.elements.push(new BlockState(builder,BlockType.TABLE,createAttributes(blockModifierMatcher,2)));
} else {
throw new IllegalStateException(String.format("Unexpected '%s' at line %s",modifierType,lineNumber));
}
} else {
BlockTagProcessor blockProcessor;
if (dialect != null && ((blockProcessor = dialect.startBlock(textileLine,0)) != null)) {
blockProcessor.setParser(this);
blockProcessor.setParserServices(services);
blockProcessor.setBuilder(builder);
blockProcessor.setTextile(textile);
// end any previous block
extendedBlockType = null;
extendedTagRequired = false;
endBlock(state.elements);
startedNewBlock = true;
int lineOffset = blockProcessor.getLineOffset();
if (lineOffset < textileLine.length()) {
String textileLineEnd = textileLine.substring(lineOffset);
if (textileLineEnd.trim().length() > 0) {
blockProcessor.process(textileLine,lineOffset);
}
}
while (!blockProcessor.isBlockClosed()) {
if ((textileLine = reader.readLine()) != null) {
++lineNumber;
builder.textileLine(lineNumber,textileLine);
blockProcessor.process(textileLine, 0);
} else {
break;
}
}
blockProcessor.closeBlock();
lineContentProcessed = true;
if (textileLine == null) {
break;
}
if (blockProcessor.getLineOffset() < textileLine.length()) {
String textileLineEnd = textileLine.substring(lineOffset);
if (textileLineEnd.trim().length() > 0) {
textileLine = textileLineEnd;
lineContentProcessed = false;
state.elements.push(new BlockState(builder,BlockType.PARAGRAPH,createAttributes(blockModifierMatcher,2)));
startedNewBlock = true;
}
}
} else {
Matcher generativeEmitterMatcher = GENERATIVE_EMITTER_PATTERN.matcher(textileLine);
if (generativeEmitterMatcher.matches()) {
// end any previous block
extendedBlockType = null;
extendedTagRequired = false;
endBlock(state.elements);
startedNewBlock = true;
lineContentProcessed = true;
emitText(textileLine.trim());
}
}
}
}
if (startedNewBlock) {
extendedBlockNewlines = 0;
}
if (textileLine != null && !lineContentProcessed) {
lineContentProcessed = true;
if (!startedNewBlock) {
if (extendedBlockType != null && extendedBlockType.isPreformatted()) {
for (int x = 0;x<extendedBlockNewlines;++x) {
builder.characters("\n");
}
}
extendedBlockNewlines = 0;
}
boolean inPreformattedText = inPreformattedBlock(state.elements);
if (textileLine.length() > 0) {
if (state.elements.isEmpty()) {
state.elements.push(new BlockState(builder,BlockType.PARAGRAPH,new Attributes()));
} else {
if (!startedNewBlock) {
if (extendedTagRequired && extendedBlockType != null) {
if (inPreformattedText) {
builder.characters("\n");
}
extendedTagRequired = false;
state.elements.push(new BlockState(builder,extendedBlockType,new Attributes()));
} else {
if (!inPreformattedText) {
builder.lineBreak();
}
}
}
}
if (inPreformattedText) {
builder.characters(textileLine);
builder.characters("\n");
} else {
emitTextileLine(textileLine);
}
} else if (inPreformattedText) {
builder.characters("\n");
}
}
}
}
endBlock(state.elements);
} catch (IOException ioe) {
throw new IllegalStateException(ioe);
} finally {
state = null;
}
}
private void pop(Stack<ElementState> elements, int count) {
while (!elements.isEmpty() && count > 0) {
ElementState state = elements.pop();
state.close();
--count;
}
}
private void endBlock(Stack<ElementState> elements) {
while (!elements.isEmpty()) {
ElementState state = elements.pop();
state.close();
}
}
private boolean inTable(Stack<ElementState> elements) {
for (int x = elements.size()-1;x>=0;--x) {
if (elements.get(x).isTable()) {
return true;
}
}
return false;
}
/**
* Emit a textile line that contains no block modifiers, but may contain phrase modifiers and replacement tokens.
*
* @param state the current parser state
* @param textileLine the textile markup to process.
*/
private void emitTextileLine(String textileLine) {
if (state == null) {
throw new IllegalStateException();
}
Matcher phraseModifierMatcher = PHRASE_MODIFIERS.matcher(textileLine);
int lastOffset = 0;
PhraseModifierProcessor modifierProcessor = null;
final int textileLineLength = textileLine.length();
for (;;) {
if (dialect != null && (modifierProcessor == null || modifierProcessor.getLineStartOffset() < lastOffset)) {
modifierProcessor = dialect.findPhraseModifier(textileLine, lastOffset);
}
if (lastOffset > 0 && dialect != null) {
phraseModifierMatcher.region(lastOffset, textileLineLength);
}
int offset = -1;
if (phraseModifierMatcher.find()) {
offset = phraseModifierMatcher.start();
}
if (modifierProcessor != null) {
if (offset == -1 || modifierProcessor.getLineStartOffset() < offset) {
offset = modifierProcessor.getLineStartOffset();
if (lastOffset < offset) {
String text = textileLine.substring(lastOffset,offset);
emitText(text);
}
modifierProcessor.setParser(this);
modifierProcessor.setParserServices(services);
modifierProcessor.setBuilder(builder);
modifierProcessor.setTextile(state.textile);
modifierProcessor.emitPhrase();
lastOffset = modifierProcessor.getLineEndOffset();
continue;
}
}
if (offset == -1) {
break;
}
if (lastOffset < offset) {
String text = textileLine.substring(lastOffset,offset);
emitText(text);
}
String escaped = phraseModifierMatcher.group(PHRASE_MODIFIER_PATTERN_ESCAPE);
if (escaped != null) {
String text = phraseModifierMatcher.group(PHRASE_MODIFIER_PATTERN_ESCAPE+1);
builder.charactersUnescaped(text);
lastOffset = phraseModifierMatcher.end();
} else {
String htmlStartTag = phraseModifierMatcher.group(PHRASE_MODIFIER_PATTERN_HTML_STARTTAG);
if (htmlStartTag != null) {
builder.charactersUnescaped(htmlStartTag);
lastOffset = phraseModifierMatcher.end();
} else {
String htmlEndTag = phraseModifierMatcher.group(PHRASE_MODIFIER_PATTERN_HTML_ENDTAG);
if (htmlEndTag != null) {
builder.charactersUnescaped(htmlEndTag);
lastOffset = phraseModifierMatcher.end();
} else {
String delimiter = phraseModifierMatcher.group(PHRASE_MODIFIER_PATTERN_BYELEM);
String text = phraseModifierMatcher.group(PHRASE_MODIFIER_PATTERN_BYELEM+5);
String elementName = textileCharToElementName.get(delimiter);
final int attributesOffset = PHRASE_MODIFIER_PATTERN_BYELEM+1;
if (elementName.equals(IMAGE)) {
Matcher hyperlinkSuffixMatcher = HYPERLINK_SUFFIX.matcher(textileLine).region(phraseModifierMatcher.end(), textileLineLength);
if (hyperlinkSuffixMatcher.matches()) {
String href = hyperlinkSuffixMatcher.group(1);
builder.imageLink(href,text);
lastOffset = hyperlinkSuffixMatcher.end(1);
phraseModifierMatcher.region(lastOffset,textileLineLength);
} else {
builder.image(createAttributes(phraseModifierMatcher,attributesOffset),text);
lastOffset = phraseModifierMatcher.end();
}
} else {
SpanType spanType = elementNameToSpanType.get(elementName);
builder.beginSpan(spanType, createAttributes(phraseModifierMatcher,attributesOffset));
emitText(text);
builder.endSpan();
lastOffset = phraseModifierMatcher.end();
}
}
}
}
}
if (lastOffset < textileLineLength) {
String text = textileLine.substring(lastOffset);
emitText(text);
}
}
private boolean inPreformattedBlock(Stack<ElementState> elements) {
for (int x = elements.size()-1;x>=0;--x) {
ElementState state = elements.get(x);
if (state.isPreformattedText()) {
return true;
}
}
return false;
}
/**
* Emit text to the builder, where the text contains no phrase or block modifiers.
*
* @param textileMarkup the textile markup to process that contains no phrase or block modifiers
*/
private void emitText(String textileMarkup) {
if (state == null) {
throw new IllegalStateException();
}
Matcher replacementMatcher = REPLACEMENT_PATTERN.matcher(textileMarkup);
int lastOffset = 0;
ReplacementTokenProcessor replacementTokenProcessor = null;
final int textileMarkupLength = textileMarkup.length();
for (;;) {
if (dialect != null && (replacementTokenProcessor == null || replacementTokenProcessor.getLineStartOffset() < lastOffset)) {
replacementTokenProcessor = dialect.findReplacementToken(textileMarkup, lastOffset);
}
if (lastOffset > 0 && dialect != null) {
replacementMatcher.region(lastOffset, textileMarkupLength);
}
int offset = -1;
if (replacementMatcher.find()) {
offset = replacementMatcher.start();
}
if (replacementTokenProcessor != null) {
if (offset == -1 || replacementTokenProcessor.getLineStartOffset() < offset) {
offset = replacementTokenProcessor.getLineStartOffset();
if (lastOffset < offset) {
String t = textileMarkup.substring(lastOffset,offset);
builder.characters(t);
}
replacementTokenProcessor.setParser(this);
replacementTokenProcessor.setParserServices(services);
replacementTokenProcessor.setBuilder(builder);
replacementTokenProcessor.setTextile(state.textile);
replacementTokenProcessor.emitTokenContent();
lastOffset = replacementTokenProcessor.getLineEndOffset();
continue;
}
}
if (offset == -1) {
break;
} else if (lastOffset < offset) {
String t = textileMarkup.substring(lastOffset,offset);
builder.characters(t);
}
String token = replacementMatcher.group(1);
if (token == null || token.length() == 0) {
String hyperlinkBoundaryText = replacementMatcher.group(REPLACEMENT_PATTERN_HYPERLINK);
if (hyperlinkBoundaryText == null) {
String footnote = replacementMatcher.group(REPLACEMENT_PATTERN_FOOTNOTE_GROUP);
if (footnote == null) {
String apos = replacementMatcher.group(REPLACEMENT_PATTERN_APOS_GROUP);
if (apos == null) {
String quoted = replacementMatcher.group(REPLACEMENT_PATTERN_QUOTE_GROUP);
if (quoted == null) {
String dblQuoted = replacementMatcher.group(REPLACEMENT_PATTERN_DBLQUOTE_GROUP);
if (dblQuoted == null) {
String emDash = replacementMatcher.group(REPLACEMENT_PATTERN_EMDASH_GROUP);
if (emDash == null) {
String enDash = replacementMatcher.group(REPLACEMENT_PATTERN_ENDASH_GROUP);
if (enDash == null) {
String mul = replacementMatcher.group(REPLACEMENT_PATTERN_MUL_GROUP);
if (mul == null) {
String acronym = replacementMatcher.group(REPLACEMENT_PATTERN_ACRONYM_GROUP);
if (acronym == null) {
throw new IllegalStateException();
} else {
String acronymDef = replacementMatcher.group(REPLACEMENT_PATTERN_ACRONYM_GROUP+1);
String previousDef = state.glossaryItems.put(acronym, acronymDef);
if (previousDef != null && previousDef.length() > acronymDef.length()) {
state.glossaryItems.put(acronym, previousDef);
}
builder.acronym(acronym, acronymDef);
}
} else {
builder.entityReference("#215");
}
} else {
builder.entityReference("#8211");
}
} else {
builder.entityReference("#8212");
}
} else {
builder.entityReference("#8220");
builder.characters(dblQuoted);
builder.entityReference("#8221");
}
} else {
builder.entityReference("#8216");
builder.characters(quoted);
builder.entityReference("#8217");
}
} else {
builder.entityReference("#8217");
}
} else {
String htmlId = getFootnoteId(state.footnoteIdToHtmlId, footnote);
builder.beginSpan(SpanType.SUPERSCRIPT, new Attributes(null,"footnote",null,null));
builder.link("#"+htmlId, footnote);
builder.endSpan();
}
} else {
String hyperlinkSrc = replacementMatcher.group(REPLACEMENT_PATTERN_HYPERLINK+1);
String href = replacementMatcher.group(REPLACEMENT_PATTERN_HYPERLINK+2);
if (hyperlinkBoundaryText.equals("\"")) {
builder.link(href, hyperlinkSrc);
} else {
builder.imageLink(href, hyperlinkSrc);
}
}
} else {
Emitter emitter = replacements.get(token);
if (emitter == null) {
emitter = createEmitter(token,replacementMatcher,state.textile);
if (emitter == null) {
throw new IllegalStateException();
}
}
emitter.emit(builder, replacementMatcher);
}
lastOffset = replacementMatcher.end();
}
if (lastOffset < textileMarkupLength) {
builder.characters(textileMarkup.substring(lastOffset));
}
}
private Emitter createEmitter(String token, Matcher replacementMatcher, String textile) {
if (token.startsWith("{toc")) {
String options = replacementMatcher.group(REPLACEMENT_PATTERN_TOC_OPTIONS_GROUP);
return new TableOfContentsEmitter(textile,options);
} else if (token.startsWith("{glossary")) {
String options = replacementMatcher.group(REPLACEMENT_PATTERN_GLOSS_OPTIONS_GROUP);
return new GlossaryEmitter(textile,options);
}
return null;
}
private String getFootnoteId(Map<String, String> footnoteIdToHtmlId, String footnote) {
String htmlId = footnoteIdToHtmlId.get(footnote);
if (htmlId == null) {
htmlId = "fn"+UUID.randomUUID().toString().replace("-", "");
footnoteIdToHtmlId.put(footnote,htmlId);
}
return htmlId;
}
private void adjustList(Stack<ElementState> elements, boolean numeric, int level, Matcher matcher, int matcherAttributeOffset) {
int currentLevel = 0;
int indexOfLevel = -1;
int indexOfListItem = -1;
for (int x = 0;x<elements.size();++x) {
ElementState element = elements.get(x);
if (element.isList()) {
++currentLevel;
if (currentLevel == level) {
indexOfLevel = x;
}
} else if (element.isListItem()) {
indexOfListItem = x;
if (currentLevel == level) {
break;
}
} else if (currentLevel == level) {
break;
}
}
if (indexOfListItem > 0) {
while (elements.size() > (indexOfListItem+1)) {
ElementState ending = elements.pop();
ending.close();
}
if (level == currentLevel) {
ElementState ending = elements.pop();
ending.close();
}
} else if (indexOfLevel >= 0) {
while (elements.size() > (indexOfLevel+1)) {
ElementState ending = elements.pop();
ending.close();
}
} else {
while (!elements.isEmpty()) {
ElementState element = elements.peek();
if (element.isList()) {
break;
}
ElementState state = elements.pop();
state.close();
}
}
if (level == currentLevel) {
elements.push(new BlockState(builder,BlockType.LIST_ITEM,new Attributes()));
}
while (currentLevel < level) {
++currentLevel;
BlockType type = numeric?BlockType.NUMERIC_LIST:BlockType.BULLETED_LIST;
elements.push(new BlockState(builder,type,createAttributes(matcher,matcherAttributeOffset)));
elements.push(new BlockState(builder,BlockType.LIST_ITEM,new Attributes()));
}
}
private Attributes createAttributes(Matcher matcher,int offset) {
return createAttributes(matcher, offset, null,null,null);
}
private Attributes createAttributes(Matcher matcher, int offset, String id, String cssClass,String cssStyles) {
if (id == null) {
id = matcher.group(offset+1);
}
Attributes attributes = new Attributes();
String cssClass2 = matcher.group(offset);
String cssStyles2 = matcher.group(offset+2);
String language = matcher.group(offset+3);
if (cssClass != null || cssClass2 != null) {
attributes.setCssClass(cssClass==null?cssClass2:cssClass2==null?cssClass:cssClass+' '+cssClass2);
}
if (cssStyles2 != null || cssStyles != null) {
attributes.setCssStyle(cssStyles2 == null?cssStyles:cssStyles==null?cssStyles2:cssStyles+' '+cssStyles2);
}
attributes.setId(id);
attributes.setLanguage(language);
return attributes;
}
private interface Emitter {
public void emit(DocumentBuilder builder,Matcher matcher);
}
private static class EntityReference implements Emitter {
private String entity;
private EntityReference(String entity) {
this.entity = entity;
}
public void emit(DocumentBuilder builder, Matcher matcher) {
builder.entityReference(entity);
}
}
private class TableOfContentsEmitter implements Emitter {
private final String textile;
private String style = "none";
private int maxLevel = Integer.MAX_VALUE;
public TableOfContentsEmitter(String textile, String options) {
this.textile = textile;
if (options != null) {
String[] optionPairs = options.split("\\s*\\|\\s*");
for (String optionPair: optionPairs) {
String[] keyValue = optionPair.split("\\s*=\\s*");
if (keyValue.length == 2) {
String key = keyValue[0].trim();
String value = keyValue[1].trim();
if (key.equals("style")) {
style = value;
} else if (key.equals("maxLevel")) {
try {
maxLevel = Integer.parseInt(value);
} catch (NumberFormatException e) {}
}
}
}
}
}
public void emit(DocumentBuilder builder, Matcher matcher) {
OutlineParser outlineParser = new OutlineParser();
OutlineItem rootItem = outlineParser.parse(textile);
emitToc(rootItem);
}
private void emitToc(OutlineItem item) {
if (item.getChildren().isEmpty()) {
return;
}
if ((item.getLevel()+1) > maxLevel) {
return;
}
Attributes nullAttributes = new Attributes();
builder.beginBlock(BlockType.NUMERIC_LIST, new Attributes(null,null,"list-style: "+style+";",null));
for (OutlineItem child: item.getChildren()) {
builder.beginBlock(BlockType.LIST_ITEM, nullAttributes);
builder.link('#'+child.getId(), child.getLabel());
emitToc(child);
builder.endBlock();
}
builder.endBlock();
}
}
private class GlossaryEmitter implements Emitter {
private String style = "none";
public GlossaryEmitter(String textile, String options) {
if (options != null) {
String[] optionPairs = options.split("\\s*\\|\\s*");
for (String optionPair: optionPairs) {
String[] keyValue = optionPair.split("\\s*=\\s*");
if (keyValue.length == 2) {
String key = keyValue[0].trim();
String value = keyValue[1].trim();
if (key.equals("style")) {
style = value;
}
}
}
}
}
public void emit(DocumentBuilder builder, Matcher matcher) {
if (state.glossaryItems.isEmpty()) {
return;
}
SortedMap<String,String> glossary = new TreeMap<String, String>(state.glossaryItems);
builder.beginBlock(BlockType.BULLETED_LIST, new Attributes(null,null,"list-style: "+style,null));
Attributes nullAttributes = new Attributes();
for (Map.Entry<String, String> ent: glossary.entrySet()) {
builder.beginBlock(BlockType.LIST_ITEM, nullAttributes);
builder.beginSpan(SpanType.STRONG, nullAttributes);
builder.characters(ent.getKey());
builder.endSpan();
builder.characters(" = ");
builder.characters(ent.getValue());
builder.endBlock();
}
builder.endBlock();
}
}
private static abstract class ElementState {
protected final DocumentBuilder builder;
public ElementState(DocumentBuilder builder) {
this.builder = builder;
}
public boolean isListItem() {
return false;
}
public abstract void close();
public boolean isTable() {
return false;
}
public boolean isPreformattedText() {
return false;
}
public boolean isList() {
return false;
}
}
private static class BlockState extends ElementState {
private BlockType type;
public BlockState(DocumentBuilder builder,DocumentBuilder.BlockType type,Attributes attributes) {
super(builder);
this.type = type;
builder.beginBlock(type,attributes);
}
@Override
public void close() {
builder.endBlock();
}
public boolean isPreformattedText() {
return type.isPreformatted();
}
public boolean isList() {
return type.isList();
}
public boolean isListItem() {
return type == BlockType.LIST_ITEM;
}
public boolean isTable() {
return type == BlockType.TABLE;
}
}
private class HeadingState extends ElementState {
public HeadingState(DocumentBuilder builder,int level,Attributes attributes, String headerText) {
super(builder);
if (attributes.getId() == null) {
attributes.setId(state.idGenerator.newId("h"+level,headerText));
}
builder.beginHeading(level, attributes);
}
@Override
public void close() {
builder.endHeading();
}
}
}