/**
* Copyright (c) 2013 Puppet Labs, Inc. and other contributors, as listed below.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Puppet Labs
*/
package com.puppetlabs.geppetto.ruby;
import java.util.Collections;
import java.util.List;
import com.puppetlabs.geppetto.common.CharSequences;
import com.puppetlabs.geppetto.ruby.RubyDocProcessor.RubyDocLexer.HeadingToken;
import com.puppetlabs.geppetto.ruby.RubyDocProcessor.RubyDocLexer.Token;
import org.eclipse.xtext.util.PolymorphicDispatcher;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
/**
* A better Ruby Doc parser
*
*/
public class RubyDocProcessor {
public static class RubyDocLexer {
public class HeadingToken extends Token {
int level;
HeadingToken(int lineIdx) {
CharSequence s = lines[lineIdx];
for(int i = 0; i < s.length(); i++)
if(s.charAt(i) != '=') {
text = s.subSequence(i, s.length()); // .subSequence(i);
level = i;
break;
}
level = Math.min(5, level);
}
public int getLevel() {
return level;
}
}
public class ListEndToken extends Token {
}
public class ListItemEndToken extends Token {
}
public class ListItemStartToken extends Token {
}
public class ListStartToken extends Token {
ListStartToken(char startChar) {
text = Character.toString(startChar);
}
}
public class ParagraphEndToken extends Token {
}
public class ParagraphStartToken extends Token {
}
public class SpanToken extends Token {
StringBuilder builder = new StringBuilder();
SpanToken(int startLine, int lastLine) {
if(startLine >= lastLine)
throw new IllegalArgumentException("empty span");
List<String> section = Lists.newArrayListWithExpectedSize(lastLine - startLine);
for(int i = startLine; i < lastLine; i++)
section.add(lines[i].toString().substring(naturalMargin));
String s = Joiner.on(" ").join(section).toString();
s = s.replaceAll("\\*\\*([^\\*]+)\\*\\*", "<strong>$1</strong>");
s = s.replaceAll("\\*([^\\*]+)\\*", "<b>$1</b>");
s = s.replaceAll("_([^_]+)_", "<i>$1</i>");
s = s.replaceAll("`([^`]+)`", "<tt>$1</tt>");
s = s.replaceAll("\\+([^\\+]+)\\+", "<tt>$1</tt>");
text = s;
}
}
public abstract class Token {
protected CharSequence text = "";
CharSequence getText() {
return text;
}
/**
* Debugging type output.
*/
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("(");
builder.append(getClass().getSimpleName());
if(getText().length() > 0)
builder.append("'").append(getText()).append("'");
builder.append(")");
return builder.toString();
}
}
public class VerbatimToken extends Token {
VerbatimToken(int startLine, int endLine) {
// keep lines verbatim, but remove their naturalMargin, or it will again be
// indented when converted to HTML using a possible nested/indented <pre>
StringBuilder builder = new StringBuilder();
for(int i = startLine; i < endLine; i++)
builder.append(CharSequences.trim(lines[i], naturalMargin, lines[i].length())).append("\n");
text = builder.toString();
}
}
private CharSequence[] lines;
List<Integer> marginStack = Lists.newLinkedList();
List<Token> tokens = Lists.newArrayList();
int naturalMargin = 0;
RubyDocLexer(CharSequence[] lines) {
this.lines = lines;
marginStack.add(0, naturalMargin); // start at 0
tokenize();
}
private int eatComment(int start) {
for(int i = start; i < lines.length; i++)
if(isCommentEnd(i))
return i;
return lines.length - 1;
}
private int emitHeading(int start) {
tokens.add(new HeadingToken(start));
return start;
}
private int emitList(int start) {
int nonWsPos = CharSequences.indexOfNonWhitespace(lines[start], naturalMargin);
pushMargin();
naturalMargin = CharSequences.indexOfNonWhitespace(lines[start], nonWsPos + 1);
tokens.add(new ListStartToken(lines[start].charAt(nonWsPos)));
int i = start;
for(; i < lines.length; i++) {
if(isCommentStart(i))
i = eatComment(i);
else if(isListContinue(i)) {
// true for first list item, as well as subsequent items
i = emitListItem(i);
}
else {
break;
}
}
tokens.add(new ListEndToken());
popMargin();
return i - 1;
}
private int emitListItem(int start) {
tokens.add(new ListItemStartToken());
int i = start;
for(; i < lines.length; i++) {
if(isCommentStart(i))
i = eatComment(i);
else if(i > start && isLeftOfMargin(i))
break;
else {
i = emitPara(i);
}
}
tokens.add(new ListItemEndToken());
return i - 1;
}
private int emitPara(final int start) {
tokens.add(new ParagraphStartToken());
int i = start;
int spanStart = start;
for(; i < lines.length; i++) {
if(isBlankLine(i)) {
if(i > spanStart)
tokens.add(new SpanToken(spanStart, i));
spanStart = i + 1;
break;
}
else if(isCommentStart(i)) {
if(i > spanStart)
tokens.add(new SpanToken(spanStart, i));
i = eatComment(i);
spanStart = i + 1;
}
else if(isVerbatim(i)) {
if(i > spanStart)
tokens.add(new SpanToken(spanStart, i));
i = emitVerbatim(i);
spanStart = i + 1;
// continue
}
else if(i > start && isLeftOfMargin(i)) {
if(i > spanStart)
tokens.add(new SpanToken(spanStart, i));
spanStart = i + 1;
break;
}
else if(isHeading(i)) {
if(i > spanStart)
tokens.add(new SpanToken(spanStart, i));
spanStart = i + 1;
break;
}
else if(isListStart(i)) {
if(i > spanStart)
tokens.add(new SpanToken(spanStart, i));
i = emitList(i);
spanStart = i + 1;
}
}
if(i > spanStart)
tokens.add(new SpanToken(spanStart, i));
tokens.add(new ParagraphEndToken());
return i - 1;
}
private int emitParaOrList(int start) {
if(isListStart(start))
return emitList(start);
else if(isHeading(start))
return emitHeading(start);
else
return emitPara(start);
}
private int emitVerbatim(int start) {
pushMargin();
naturalMargin = naturalMargin + 2;
// This makes it impossible to have a first line with more indent that natural + 2
// natrualMargin = CharSequences.indexOfNonWhitespace(lines[start], naturalMargin);
int lastVerbatimLine = start;
for(int i = start; i < lines.length; i++)
if(!isBlankLine(i) && isLeftOfMargin(i))
break;
else
lastVerbatimLine++;
tokens.add(new VerbatimToken(start, lastVerbatimLine));
popMargin();
return lastVerbatimLine - 1;
}
public List<Token> getTokens() {
return Collections.unmodifiableList(tokens);
}
private boolean isBlankLine(int i) {
return CharSequences.indexOfNonWhitespace(lines[i], 0) < 0;
}
private boolean isCommentEnd(int i) {
return CharSequences.startsWith(lines[i], "++");
}
private boolean isCommentStart(int i) {
return CharSequences.startsWith(lines[i], "--");
}
private boolean isHeading(int i) {
return CharSequences.startsWith(lines[i], "=");
}
private boolean isLeftOfMargin(int i) {
return CharSequences.indexOfNonWhitespace(lines[i], 0) < naturalMargin;
}
private boolean isListContinue(int i) {
int margin = marginStack.get(1);
int nonWsPos = CharSequences.indexOfNonWhitespace(lines[i], margin);
if(nonWsPos < 0)
return false;
return (isListStartChar(lines[i].charAt(nonWsPos)) && lines[i].length() >= nonWsPos + 1 && lines[i].charAt(nonWsPos + 1) == ' ');
}
/**
* Returns true, if the first char after natural margin is a list start char, and is followed by
* a whitespace. (This to not get a false positive on *bold*). Check starts at natural margin
* to enable * * sublist starts here
*
* @param i
* @return
*/
private boolean isListStart(int i) {
int nonWsPos = CharSequences.indexOfNonWhitespace(lines[i], naturalMargin);
if(nonWsPos < 0)
return false;
return (isListStartChar(lines[i].charAt(nonWsPos)) && lines[i].length() >= nonWsPos + 1 && lines[i].charAt(nonWsPos + 1) == ' ');
}
private boolean isListStartChar(char c) {
switch(c) {
case '*':
case '-':
return true;
}
return false;
}
private boolean isVerbatim(int i) {
return CharSequences.indexOfNonWhitespace(lines[i], 0) >= naturalMargin + 2;
}
private void popMargin() {
naturalMargin = marginStack.remove(0);
}
private void pushMargin() {
marginStack.add(0, naturalMargin);
}
private void tokenize() {
for(int i = 0; i < lines.length; i++) {
if(isBlankLine(i))
continue; // skip leading lines
if(isCommentStart(i))
i = eatComment(i); // skip comment
else if(isVerbatim(i))
i = emitVerbatim(i);
else if(isHeading(i))
i = emitHeading(i);
else
i = emitParaOrList(i);
}
}
}
private PolymorphicDispatcher<String> htmlDispatcher = new PolymorphicDispatcher<String>(
"_html", 1, 2, Collections.singletonList(this), PolymorphicDispatcher.NullErrorHandler.<String> get()) {
@Override
protected String handleNoSuchMethod(Object... params) {
return "<pre>INTERNAL ERROR, missing _html method for token: " + params[0].getClass().getName() + "</pre>";
}
};
protected String _html(HeadingToken o) {
StringBuilder builder = new StringBuilder();
builder.append("<h").append(o.getLevel()).append(">");
builder.append(o.getText());
builder.append("</h").append(o.getLevel()).append(">");
return builder.toString();
}
/**
* Join with ""
*
* @param o1
* @param o2
* @return
*/
protected String _html(Object o1, Object o2) {
return "";
}
protected String _html(RubyDocLexer.ListEndToken o) {
return "</ul>";
}
protected String _html(RubyDocLexer.ListItemEndToken o) {
return "</li>";
}
protected String _html(RubyDocLexer.ListItemStartToken o) {
return "<li>";
}
protected String _html(RubyDocLexer.ListStartToken o) {
return "<ul>";
}
protected String _html(RubyDocLexer.ParagraphEndToken o) {
return "</p>";
}
protected String _html(RubyDocLexer.ParagraphStartToken o) {
return "<p>";
}
protected String _html(RubyDocLexer.SpanToken o) {
// this is really PCDATA - span is a container since spans can be nested
return o.getText().toString();
}
/**
* Join with " "
*
* @param o1
* @param o2
* @return
*/
protected String _html(RubyDocLexer.SpanToken o1, RubyDocLexer.SpanToken o2) {
return " ";
}
protected String _html(RubyDocLexer.VerbatimToken o) {
StringBuilder builder = new StringBuilder();
builder.append("<pre>");
builder.append(o.getText());
builder.append("</pre>");
return builder.toString();
}
public String asHTML(CharSequence[] lines) {
RubyDocLexer lexer = new RubyDocLexer(lines);
StringBuilder builder = new StringBuilder();
Object prevToken = ""; // represents start of input
for(Token t : lexer.getTokens()) {
builder.append(htmlDispatcher.invoke(prevToken, t)); // join on
builder.append(htmlDispatcher.invoke(t));
prevToken = t;
}
return builder.toString();
}
public String asHTML(List<CharSequence> lines) {
return asHTML(lines.toArray(new CharSequence[lines.size()]));
}
public String asHTML(String s) {
if(s == null || s.length() < 1)
return s;
int minPos = Integer.MAX_VALUE;
String[] lines = s.split("\\n");
for(int i = 1; i < lines.length; i++) {
int idx = CharSequences.indexOfNonWhitespace(lines[i], 0);
if(idx >= 0)
minPos = Math.min(minPos, idx);
}
// trim left margin
// first line is problematic, since initial whitespace is inconsistently used in the source
// If it starts with whitespace, assume it is at the natural margin.
final int naturalMargin = minPos;
// always trim the first line - hope it is never a verbatim (how can it be detected? - indented from what?)
if(lines.length > 0)
lines[0] = CharSequences.trim(lines[0]).toString();
for(int i = 1; i < lines.length; i++)
lines[i] = CharSequences.trim(lines[i], naturalMargin, lines[i].length()).toString();
// lines where pos 0 is the natural margin
return asHTML(lines);
}
}