/*
* Copyright (c) 2007 Henri Sivonen
* Copyright (c) 2008-2009 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.htmlparser.sax;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
public class XmlSerializer implements ContentHandler, LexicalHandler {
private final class PrefixMapping {
public final String uri;
public final String prefix;
/**
* @param uri
* @param prefix
*/
public PrefixMapping(String uri, String prefix) {
this.uri = uri;
this.prefix = prefix;
}
/**
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override public final boolean equals(Object obj) {
if (obj instanceof PrefixMapping) {
PrefixMapping other = (PrefixMapping) obj;
return this.prefix.equals(other.prefix);
} else {
return false;
}
}
/**
* @see java.lang.Object#hashCode()
*/
@Override public final int hashCode() {
return prefix.hashCode();
}
}
private final class StackNode {
public final String uri;
public final String prefix;
public final String qName;
public final Set<PrefixMapping> mappings = new HashSet<PrefixMapping>();
/**
* @param uri
* @param qName
*/
public StackNode(String uri, String qName, String prefix) {
this.uri = uri;
this.qName = qName;
this.prefix = prefix;
}
}
private final static Map<String, String> WELL_KNOWN_ATTRIBUTE_PREFIXES = new HashMap<String, String>();
static {
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("adobe:ns:meta/", "x");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd",
"sodipodi");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://ns.adobe.com/AdobeIllustrator/10.0/", "i");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://ns.adobe.com/Extensibility/1.0/", "x");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://ns.adobe.com/illustrator/1.0/", "illustrator");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/",
"photoshop");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/tiff/1.0/",
"tiff");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/",
"xapG");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/",
"xapMM");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://ns.adobe.com/xap/1.0/rights/", "xapRights");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/",
"xapTPg");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://purl.org/dc/elements/1.1/",
"dc");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd",
"sodipodi");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://w3.org/1999/xlink", "xlink");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.carto.net/attrib/",
"attrib");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://www.iki.fi/pav/software/textext/", "textext");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://www.inkscape.org/namespaces/inkscape", "inkscape");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://www.justsystem.co.jp/hanako13/svg", "jsh");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink",
"xlink");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put(
"http://www.w3.org/2001/XMLSchema-instance", "xsi");
WELL_KNOWN_ATTRIBUTE_PREFIXES.put("http://www.w3.org/1999/xlink",
"xlink");
}
private final static Map<String, String> WELL_KNOWN_ELEMENT_PREFIXES = new HashMap<String, String>();
static {
WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.w3.org/1999/XSL/Transform",
"xsl");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdf");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.org/dc/elements/1.1/",
"dc");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://www.w3.org/2001/XMLSchema-instance", "xsi");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.ascc.net/xml/schematron",
"sch");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://purl.oclc.org/dsdl/schematron",
"sch");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://www.inkscape.org/namespaces/inkscape", "inkscape");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd",
"sodipodi");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://ns.adobe.com/AdobeSVGViewerExtensions/3.0/", "a");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://ns.adobe.com/AdobeIllustrator/10.0/", "i");
WELL_KNOWN_ELEMENT_PREFIXES.put("adobe:ns:meta/", "x");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/", "xap");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdf/1.3/", "pdf");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/tiff/1.0/", "tiff");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://creativecommons.org/ns#", "cc");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd",
"sodipodi");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/", "Iptc4xmpCore");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/exif/1.0/", "exif");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://ns.adobe.com/Extensibility/1.0/", "x");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/illustrator/1.0/",
"illustrator");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/pdfx/1.3/", "pdfx");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/photoshop/1.0/",
"photoshop");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/Variables/1.0/",
"v");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/",
"xapG");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/g/img/",
"xapGImg");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/mm/",
"xapMM");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/rights/",
"xapRights");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://ns.adobe.com/xap/1.0/sType/Dimensions#", "stDim");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://ns.adobe.com/xap/1.0/sType/Font#", "stFnt");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://ns.adobe.com/xap/1.0/sType/ResourceRef#", "stRef");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://ns.adobe.com/xap/1.0/t/pg/",
"xapTPg");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://product.corel.com/CGS/11/cddns/", "odm");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://schemas.microsoft.com/visio/2003/SVGExtensions/", "v");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://web.resource.org/cc/", "cc");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://www.freesoftware.fsf.org/bkchem/cdml", "cdml");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.opengis.net/gml", "gml");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.svgmaker.com/svgns",
"svgmaker");
WELL_KNOWN_ELEMENT_PREFIXES.put(
"http://www.w3.org/2000/01/rdf-schema#", "rdfs");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://xmlns.com/foaf/0.1/", "foaf");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.xml-cml.org/schema/stmml",
"stm");
WELL_KNOWN_ELEMENT_PREFIXES.put("http://www.iupac.org/foo/ichi", "ichi");
}
private final static Writer wrap(OutputStream out) {
Charset charset = Charset.forName("utf-8");
CharsetEncoder encoder = charset.newEncoder();
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
try {
encoder.replaceWith("\uFFFD".getBytes("utf-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
return new OutputStreamWriter(out, encoder);
}
// grows from head
private final LinkedList<StackNode> stack = new LinkedList<StackNode>();
private final Writer writer;
public XmlSerializer(OutputStream out) {
this(wrap(out));
}
public XmlSerializer(Writer out) {
this.writer = out;
}
protected void checkNCName(String name) throws SAXException {
}
private final void push(String uri, String local, String prefix) {
stack.addFirst(new StackNode(uri, local, prefix));
}
private final String pop() {
String rv = stack.removeFirst().qName;
stack.getFirst().mappings.clear();
return rv;
}
private final String lookupPrefixAttribute(String ns) {
if ("http://www.w3.org/XML/1998/namespace".equals(ns)) {
return "xml";
}
Set<String> hidden = new HashSet<String>();
for (StackNode node : stack) {
for (PrefixMapping mapping : node.mappings) {
if (mapping.prefix.length() != 0 && mapping.uri.equals(ns)
&& !hidden.contains(mapping.prefix)) {
return mapping.prefix;
}
hidden.add(mapping.prefix);
}
}
return null;
}
private final String lookupUri(String prefix) {
for (StackNode node : stack) {
for (PrefixMapping mapping : node.mappings) {
if (mapping.prefix.equals(prefix)) {
return mapping.uri;
}
}
}
return null;
}
private final boolean xmlNsQname(String name) {
if (name == null) {
return false;
} else if ("xmlns".equals(name)) {
return true;
} else if (name.startsWith("xmlns:")) {
return true;
} else {
return false;
}
}
private final void writeAttributeValue(String val) throws IOException {
boolean prevWasSpace = true;
int last = val.length() - 1;
for (int i = 0; i <= last; i++) {
char c = val.charAt(i);
switch (c) {
case '<':
writer.write("<");
prevWasSpace = false;
break;
case '>':
writer.write(">");
prevWasSpace = false;
break;
case '&':
writer.write("&");
prevWasSpace = false;
break;
case '"':
writer.write(""");
prevWasSpace = false;
break;
case '\r':
writer.write("
");
prevWasSpace = false;
break;
case '\t':
writer.write("	");
prevWasSpace = false;
break;
case '\n':
writer.write("
");
prevWasSpace = false;
break;
case ' ':
if (prevWasSpace || i == last) {
writer.write(" ");
prevWasSpace = false;
} else {
writer.write(' ');
prevWasSpace = true;
}
break;
case '\uFFFE':
writer.write('\uFFFD');
prevWasSpace = false;
break;
case '\uFFFF':
writer.write('\uFFFD');
prevWasSpace = false;
break;
default:
if (c < ' ') {
writer.write('\uFFFD');
} else {
writer.write(c);
}
prevWasSpace = false;
break;
}
}
}
private final void generatePrefix(String uri) throws SAXException {
int counter = 0;
String candidate = WELL_KNOWN_ATTRIBUTE_PREFIXES.get(uri);
if (candidate == null) {
candidate = "p" + (counter++);
}
while (lookupUri(candidate) != null) {
candidate = "p" + (counter++);
}
startPrefixMappingPrivate(candidate, uri);
}
public final void characters(char[] ch, int start, int length)
throws SAXException {
try {
for (int i = start; i < start + length; i++) {
char c = ch[i];
switch (c) {
case '<':
writer.write("<");
break;
case '>':
writer.write(">");
break;
case '&':
writer.write("&");
break;
case '\r':
writer.write("
");
break;
case '\t':
writer.write('\t');
break;
case '\n':
writer.write('\n');
break;
case '\uFFFE':
writer.write('\uFFFD');
break;
case '\uFFFF':
writer.write('\uFFFD');
break;
default:
if (c < ' ') {
writer.write('\uFFFD');
} else {
writer.write(c);
}
break;
}
}
} catch (IOException e) {
throw new SAXException(e);
}
}
public final void endDocument() throws SAXException {
try {
stack.clear();
writer.flush();
writer.close();
} catch (IOException e) {
throw new SAXException(e);
}
}
public final void endElement(String uri, String localName, String qName)
throws SAXException {
try {
writer.write('<');
writer.write('/');
writer.write(pop());
writer.write('>');
} catch (IOException e) {
throw new SAXException(e);
}
}
public final void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
characters(ch, start, length);
}
public final void processingInstruction(String target, String data)
throws SAXException {
try {
checkNCName(target);
writer.write("<?");
writer.write(target);
writer.write(' ');
boolean prevWasQuestionmark = false;
for (int i = 0; i < data.length(); i++) {
char c = data.charAt(i);
switch (c) {
case '?':
writer.write('?');
prevWasQuestionmark = true;
break;
case '>':
if (prevWasQuestionmark) {
writer.write(" >");
} else {
writer.write('>');
}
prevWasQuestionmark = false;
break;
case '\t':
writer.write('\t');
prevWasQuestionmark = false;
break;
case '\r':
case '\n':
writer.write('\n');
prevWasQuestionmark = false;
break;
case '\uFFFE':
writer.write('\uFFFD');
prevWasQuestionmark = false;
break;
case '\uFFFF':
writer.write('\uFFFD');
prevWasQuestionmark = false;
break;
default:
if (c < ' ') {
writer.write('\uFFFD');
} else {
writer.write(c);
}
prevWasQuestionmark = false;
break;
}
}
writer.write("?>");
} catch (IOException e) {
throw new SAXException(e);
}
}
public final void setDocumentLocator(Locator locator) {
}
public final void startDocument() throws SAXException {
try {
writer.write("<?xml version='1.0' encoding='utf-8'?>\n");
} catch (IOException e) {
throw new SAXException(e);
}
stack.clear();
push(null, null, null);
}
public final void startElement(String uri, String localName, String q,
Attributes atts) throws SAXException {
checkNCName(localName);
String prefix;
String qName;
if (uri.length() == 0) {
prefix = "";
qName = localName;
// generate xmlns
startPrefixMappingPrivate(prefix, uri);
} else {
prefix = WELL_KNOWN_ELEMENT_PREFIXES.get(uri);
if (prefix == null) {
prefix = "";
}
String lookup = lookupUri(prefix);
if (lookup != null && !lookup.equals(uri)) {
prefix = "";
}
startPrefixMappingPrivate(prefix, uri);
if (prefix.length() == 0) {
qName = localName;
} else {
qName = prefix + ':' + localName;
}
}
int attLen = atts.getLength();
for (int i = 0; i < attLen; i++) {
String attUri = atts.getURI(i);
if (attUri.length() == 0
|| "http://www.w3.org/XML/1998/namespace".equals(attUri)
|| "http://www.w3.org/2000/xmlns/".equals(attUri)
|| atts.getLocalName(i).length() == 0
|| xmlNsQname(atts.getQName(i))) {
continue;
}
if (lookupPrefixAttribute(attUri) == null) {
generatePrefix(attUri);
}
}
try {
writer.write('<');
writer.write(qName);
for (PrefixMapping mapping : stack.getFirst().mappings) {
writer.write(' ');
if (mapping.prefix.length() == 0) {
writer.write("xmlns");
} else {
writer.write("xmlns:");
writer.write(mapping.prefix);
}
writer.write('=');
writer.write('"');
writeAttributeValue(mapping.uri);
writer.write('"');
}
for (int i = 0; i < attLen; i++) {
String attUri = atts.getURI(i);
if ("http://www.w3.org/XML/1998/namespace".equals(attUri)
|| "http://www.w3.org/2000/xmlns/".equals(attUri)
|| atts.getLocalName(i).length() == 0
|| xmlNsQname(atts.getQName(i))) {
continue;
}
writer.write(' ');
if (attUri.length() != 0) {
writer.write(lookupPrefixAttribute(attUri));
writer.write(':');
}
String attLocal = atts.getLocalName(i);
checkNCName(attLocal);
writer.write(attLocal);
writer.write('=');
writer.write('"');
writeAttributeValue(atts.getValue(i));
writer.write('"');
}
writer.write('>');
} catch (IOException e) {
throw new SAXException(e);
}
push(uri, qName, prefix);
}
public final void comment(char[] ch, int start, int length) throws SAXException {
try {
boolean prevWasHyphen = false;
writer.write("<!--");
for (int i = start; i < start + length; i++) {
char c = ch[i];
switch (c) {
case '-':
if (prevWasHyphen) {
writer.write(" -");
} else {
writer.write('-');
prevWasHyphen = true;
}
break;
case '\t':
writer.write('\t');
prevWasHyphen = false;
break;
case '\r':
case '\n':
writer.write('\n');
prevWasHyphen = false;
break;
case '\uFFFE':
writer.write('\uFFFD');
prevWasHyphen = false;
break;
case '\uFFFF':
writer.write('\uFFFD');
prevWasHyphen = false;
break;
default:
if (c < ' ') {
writer.write('\uFFFD');
} else {
writer.write(c);
}
prevWasHyphen = false;
break;
}
}
if (prevWasHyphen) {
writer.write(' ');
}
writer.write("-->");
} catch (IOException e) {
throw new SAXException(e);
}
}
public final void endCDATA() throws SAXException {
}
public final void endDTD() throws SAXException {
}
public final void endEntity(String name) throws SAXException {
}
public final void startCDATA() throws SAXException {
}
public final void startDTD(String name, String publicId, String systemId)
throws SAXException {
}
public final void startEntity(String name) throws SAXException {
}
public final void startPrefixMapping(String prefix, String uri)
throws SAXException {
if (prefix.length() == 0 || uri.equals(lookupUri(prefix))) {
return;
}
if (uri.equals(lookupUri(prefix))) {
return;
}
if ("http://www.w3.org/XML/1998/namespace".equals(uri)) {
if ("xml".equals(prefix)) {
return;
} else {
throw new SAXException("Attempt to declare a reserved NS uri.");
}
}
if ("http://www.w3.org/2000/xmlns/".equals(uri)) {
throw new SAXException("Attempt to declare a reserved NS uri.");
}
if (uri.length() == 0 && prefix.length() != 0) {
throw new SAXException("Can bind a prefix to no namespace.");
}
checkNCName(prefix);
Set<PrefixMapping> theSet = stack.getFirst().mappings;
PrefixMapping mapping = new PrefixMapping(uri, prefix);
if (theSet.contains(mapping)) {
throw new SAXException(
"Attempt to map one prefix to two URIs on one element.");
}
theSet.add(mapping);
}
public final void startPrefixMappingPrivate(String prefix, String uri)
throws SAXException {
if (uri.equals(lookupUri(prefix))) {
return;
}
stack.getFirst().mappings.add(new PrefixMapping(uri, prefix));
}
public final void endPrefixMapping(String prefix) throws SAXException {
}
public final void skippedEntity(String name) throws SAXException {
}
}