package dk.brics.xact.operations;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import dk.brics.xact.Attribute;
import dk.brics.xact.AttributeGap;
import dk.brics.xact.Comment;
import dk.brics.xact.Element;
import dk.brics.xact.Node;
import dk.brics.xact.NodeVisitor;
import dk.brics.xact.ProcessingInstruction;
import dk.brics.xact.StringTypes;
import dk.brics.xact.TemplateGap;
import dk.brics.xact.Text;
import dk.brics.xact.XML;
import dk.brics.xact.XMLWellformednessException;
import org.apache.log4j.Logger;
// TODO: optionally compress whitespace in XHTML mode
/**
* Prints XML templates to output streams.
*/
public class XMLPrinter {
private static final Set<String> XHTML_EMPTY_ELEMENTS = new HashSet<String>();
private static Logger log = Logger.getLogger(XMLPrinter.class);
static {
for (String n : new String[]{"br", "hr", "img", "input", "base", "meta", "link", "basefont", "param", "area"})
XHTML_EMPTY_ELEMENTS.add(n);
}
private XMLPrinter() { }
/**
* Returns the string value of an element.
*/
public static String getElementStringValue(Element e) {
final StringBuilder b = new StringBuilder();
final Stack<Node> stack = new Stack<Node>(); // using heap stack, avoids deep recursive calls
if (e.getFirstChild() != null)
stack.push(e.getFirstChild());
while (!stack.isEmpty()) {
Node n = stack.pop();
n.visitBy(new NodeVisitor() {
@Override
public void visit(Text n) {
printEscaped(b, n.getString(), false);
if (n.getNextSibling() != null)
stack.push(n.getNextSibling());
}
@Override
public void visit(Element n) {
if (n.getNextSibling() != null)
stack.push(n.getNextSibling());
if (n.getFirstChild() != null)
stack.push(n.getFirstChild());
}
});
}
return b.toString();
}
/**
* Prints the given XML template.
*
* @param gaps if set, gaps are printed, otherwise they are skipped
* @param xmldecl if set, include XML declaration (and DOCTYPE if XHTML)
* @throws XMLWellformednessException if XML declaration is enabled and the result is not a wellformed XML document
* @throws UnsupportedEncodingException if the encoding is not supported
*/
public static void print(XML x, OutputStream out, String encoding, final boolean gaps, final boolean xmldecl)
throws UnsupportedEncodingException {
print(x,out,encoding,gaps,xmldecl,XMLIndentation.NEVER);
}
/**
* Prints the given XML template.
*
* @param gaps if set, gaps are printed, otherwise they are skipped
* @param xmldecl if set, include XML declaration (and DOCTYPE if XHTML)
* @param indentation how to insert indentation
* @throws XMLWellformednessException if XML declaration is enabled and the result is not a wellformed XML document
* @throws UnsupportedEncodingException if the encoding is not supported
*/
public static void print(XML x, OutputStream out, String encoding, final boolean gaps, final boolean xmldecl, final XMLIndentation indentation)
throws UnsupportedEncodingException {
final String linebreak = indentation.getLineBreak();
final PrintStream p = new PrintStream(out, false, encoding);
if (xmldecl)
p.print("<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>\r\n");
final NamespacePrefixTracker ns = new NamespacePrefixTracker();
if (xmldecl) {
int rootelems = 0;
for (XML r = x; r != null; r = r.getNextSibling())
if (r.isElement())
rootelems++;
else if (r.isText()) {
Text t = r.asText();
if (!StringTypes.isWhitespace(t.getString()))
throw new XMLWellformednessException("non-whitespace text at root", t.getOrigin());
}
if (rootelems != 1)
throw new XMLWellformednessException("wrong number of root elements", x.getOrigin());
}
final Stack<Entry> stack = new Stack<Entry>(); // using heap stack, avoids deep recursive calls
final boolean[] non_first_element = new boolean[1];
stack.push(new Entry(Entry.Kind.START_NODE, x, null, false, "", false));
final XML.XHTMLMode xhtmlDoctype = XML.getXHTMLDoctype();
while (!stack.isEmpty()) {
final Entry en = stack.pop();
switch (en.kind) {
case START_NODE:
en.node.visitBy(new NodeVisitor() {
@Override
public void visit(Text n) {
if (en.afterBlock) {
p.append(linebreak).append(en.indent);
}
String s = n.getString();
if (!en.whitespaceSensitive)
s = truncate(s, en.indent, linebreak);
printEscaped(p, s, false);
if (n.getNextSibling() != null)
stack.push(new Entry(Entry.Kind.START_NODE, n.getNextSibling(), null, en.whitespaceSensitive, en.indent, false));
}
@Override
public void visit(Comment n) {
if (en.afterBlock) {
p.append(linebreak).append(en.indent);
}
p.append("<!--").append(n.getValue()).append("-->");
if (n.getNextSibling() != null)
stack.push(new Entry(Entry.Kind.START_NODE, n.getNextSibling(), null, en.whitespaceSensitive, en.indent, false));
}
@Override
public void visit(ProcessingInstruction n) {
if (en.afterBlock) {
p.append(linebreak).append(en.indent);
}
p.append("<?").append(n.getTarget());
if (n.getData().length() > 0)
p.append(' ').append(n.getData());
p.append("?>");
if (n.getNextSibling() != null)
stack.push(new Entry(Entry.Kind.START_NODE, n.getNextSibling(), null, en.whitespaceSensitive, en.indent, false));
}
@Override
public void visit(Element n) {
boolean xhtml_mode = n.getNamespace().equals(xhtmlDoctype.getNamespace()); // http://www.w3.org/TR/xhtml1/#guidelines
boolean xhtml_empty = xhtml_mode && XHTML_EMPTY_ELEMENTS.contains(n.getLocalName());
boolean whitespaceSense = en.whitespaceSensitive || indentation.isWhitespaceSensitive(n);
boolean indentElem = !whitespaceSense && indentation.getIndentType(n) == XMLIndentation.IndentType.BLOCK;
boolean isBlock = !whitespaceSense && indentation.getIndentType(n) != XMLIndentation.IndentType.NONE;
boolean first = !non_first_element[0];
if (!non_first_element[0] && xhtml_mode) {
non_first_element[0] = true;
if (n.getLocalName().equals("html")) {
if (xmldecl) {
p.print(xhtmlDoctype.getDoctype());
}
} else {
log.debug("Attempting to print XHTML DOCTYPE but the root tag of the document is not 'html'. DOCTYPE will not be printed...", new RuntimeException());
}
}
Map<String, String> nsdecls = ns.pushNamespaceDeclarations(n);
if (indentElem || en.afterBlock) {
if (!first) {
p.append(linebreak).append(en.indent);
}
}
p.append('<');
String prefix = ns.getPrefix(n.getNamespace()).peek();
if (prefix.length() > 0)
p.append(prefix).append(':');
p.append(n.getLocalName());
for (Map.Entry<String, String> me : nsdecls.entrySet()) {
p.append(" xmlns");
if (me.getKey().length() > 0)
p.append(':').append(me.getKey());
p.append("=\"").append(me.getValue()).append('"');
}
if (n.getFirstAttr() != null)
n.getFirstAttr().visitBy(this);
if ((xhtml_mode && !xhtml_empty) || n.getFirstChild() != null) {
p.append('>');
stack.push(new Entry(Entry.Kind.END_ELEMENT, n, nsdecls, en.whitespaceSensitive, en.indent, false));
if (n.getFirstChild() != null) {
stack.push(new Entry(Entry.Kind.START_NODE, n.getFirstChild(), null, whitespaceSense, indentElem ? en.indent + indentation.getIndentation() : en.indent, indentElem));
}
} else {
if (xhtml_mode)
p.append(' ');
p.append("/>");
ns.popNamespaceDeclarations(nsdecls);
if (n.getNextSibling() != null)
stack.push(new Entry(Entry.Kind.START_NODE, n.getNextSibling(), null, en.whitespaceSensitive, en.indent, isBlock));
}
}
@Override
public void visit(TemplateGap n) {
if (gaps) {
p.append("<[");
if (n.getType() != null)
p.append(n.getType()).append(' ');
p.append(n.getGap()).append("]>");
}
if (n.getNextSibling() != null)
stack.push(new Entry(Entry.Kind.START_NODE, n.getNextSibling(), null, en.whitespaceSensitive, en.indent, false));
}
@Override
public void visit(Attribute n) {
p.append(' ');
if (n.getNamespace() != null)
p.append(ns.getPrefix(n.getNamespace()).peek()).append(':');
p.append(n.getLocalName()).append("=\"");
printEscaped(p, n.getValue(), true);
p.append('\"');
if (n.getNextAttr() != null)
n.getNextAttr().visitBy(this);
}
@Override
public void visit(AttributeGap n) {
if (gaps) {
p.append(' ');
if (n.getNamespace() != null)
p.append(ns.getPrefix(n.getNamespace()).peek()).append(':');
p.append(n.getLocalName()).append("=[");
if (n.getType() != null)
p.append(n.getType()).append(' ');
p.append(n.getGap()).append(']');
}
if (n.getNextAttr() != null)
n.getNextAttr().visitBy(this);
}
});
break;
case END_ELEMENT:
Element n = (Element) (en.node);
boolean whitespaceSense = en.whitespaceSensitive || indentation.isWhitespaceSensitive(n);
boolean indentElem = !whitespaceSense && indentation.getIndentType(n) == XMLIndentation.IndentType.BLOCK;
boolean isBlock = !whitespaceSense && indentation.getIndentType(n) != XMLIndentation.IndentType.NONE;
if (indentElem) {
p.append(linebreak).append(en.indent);
}
p.append("</");
String namespace = n.getNamespace();
String prefix = ns.getPrefix(namespace).peek();
if (prefix.length() > 0)
p.append(prefix).append(':');
p.append(n.getLocalName()).append('>');
ns.popNamespaceDeclarations(en.nsdecls);
if (n.getNextSibling() != null)
stack.push(new Entry(Entry.Kind.START_NODE, n.getNextSibling(), null, en.whitespaceSensitive, en.indent, isBlock));
break;
}
}
p.flush();
}
private static String truncate(String s, String indent, String linebreak) {
StringBuilder b = new StringBuilder();
boolean lastWhitespace = false;
boolean lastLinebreak = false;
for (int i=0; i<s.length(); i++) {
char c = s.charAt(i);
if (c == '\r' || c == '\n') {
if (!lastLinebreak) {
b.append(linebreak).append(indent);
lastLinebreak = true;
lastWhitespace = true;
}
}
else if (Character.isWhitespace(c)) {
if (!lastWhitespace) {
b.append(' '); // normalize all to standard whitespace, including linebreaks
lastWhitespace = true;
}
lastLinebreak = false;
} else {
b.append(c);
lastLinebreak = lastWhitespace = false;
}
}
return b.toString();
}
private static void printEscaped(Appendable p, String s, boolean attr) {
try {
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (attr && c == '"')
p.append(""");
else
switch (c) {
case '<':
p.append("<");
break;
case '>':
p.append(">");
break;
case '&':
p.append("&");
break;
case 0x9:
case 0xA:
case 0xD:
case 0x85:
p.append(c);
break;
default:
// allowed character ranges in XML
if (0x20 <= c && c <= 0xD7FF ||
0xE000 <= c && c <= 0xFFFD ||
0x10000 <= c && c <= 0x10FFFF) {
// discouraged characters in the XML specification.
// see XML specification: http://www.w3.org/TR/REC-xml/#charsets
if (0x7F <= c && c <= 0x9F)
continue; //0x85 is allowed but handled above
if (0xFDD0 <= c && c <= 0xFDEF)
continue;
int mask = 0x0FFF0;
int cm = c & mask;
if (cm == mask) {
int lastMask = 0x0000F;
int cl = c & lastMask;
if (cl >= 0xE)
continue;
}
p.append(c);
}
}
}
} catch (IOException e) {
// should never happen...
throw new RuntimeException(e);
}
}
private static class Entry {
enum Kind {
START_NODE,
END_ELEMENT
}
final Kind kind;
final Node node;
final Map<String, String> nsdecls;
final boolean whitespaceSensitive;
final String indent;
final boolean afterBlock;
Entry(Kind kind, Node node, Map<String, String> nsdecls, boolean whitespaceSensitive, String indent, boolean afterBlock) {
this.kind = kind;
this.node = node;
this.nsdecls = nsdecls;
this.whitespaceSensitive = whitespaceSensitive;
this.indent = indent;
this.afterBlock = afterBlock;
}
}
}