/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-04 Wolfgang M. Meier
* wolfgang@exist-db.org
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id$
*/
package org.exist.storage.serializers;
import org.exist.Namespaces;
import org.exist.dom.AttrImpl;
import org.exist.dom.CDATASectionImpl;
import org.exist.dom.CommentImpl;
import org.exist.dom.DocumentImpl;
import org.exist.dom.DocumentTypeImpl;
import org.exist.dom.ElementImpl;
import org.exist.dom.Match;
import org.exist.dom.NodeProxy;
import org.exist.dom.ProcessingInstructionImpl;
import org.exist.dom.QName;
import org.exist.dom.StoredNode;
import org.exist.dom.TextImpl;
import org.exist.numbering.NodeId;
import org.exist.storage.DBBroker;
import org.exist.util.Configuration;
import org.exist.util.serializer.AttrList;
import org.exist.xquery.value.Type;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Serializer implementation for the native database backend.
*
* @author wolf
*/
public class NativeSerializer extends Serializer {
// private final static AttributesImpl EMPTY_ATTRIBUTES = new AttributesImpl();
private final static QName TEXT_ELEMENT = new QName("text", Namespaces.EXIST_NS, "exist");
private final static QName ATTRIB_ELEMENT = new QName("attribute", Namespaces.EXIST_NS, "exist");
private final static QName SOURCE_ATTRIB = new QName("source", Namespaces.EXIST_NS, "exist");
private final static QName ID_ATTRIB = new QName("id", Namespaces.EXIST_NS, "exist");
public NativeSerializer(DBBroker broker, Configuration config) {
super(broker, config);
}
protected void serializeToReceiver(NodeProxy p, boolean generateDocEvent, boolean checkAttributes)
throws SAXException {
if(Type.subTypeOf(p.getType(), Type.DOCUMENT) || p.getNodeId() == NodeId.DOCUMENT_NODE) {
serializeToReceiver(p.getDocument(), generateDocEvent);
return;
}
setDocument(p.getDocument());
if (generateDocEvent) {receiver.startDocument();}
final Iterator<StoredNode> domIter = broker.getNodeIterator(new StoredNode(p));
serializeToReceiver(null, domIter, p.getDocument(), checkAttributes, p.getMatches(), new TreeSet<String>());
if (generateDocEvent) {receiver.endDocument();}
}
protected void serializeToReceiver(DocumentImpl doc, boolean generateDocEvent) throws SAXException {
final long start = System.currentTimeMillis();
setDocument(doc);
final NodeList children = doc.getChildNodes();
if (generateDocEvent)
{receiver.startDocument();}
if (doc.getDoctype() != null){
if ("yes".equals(getProperty(EXistOutputKeys.OUTPUT_DOCTYPE, "no"))) {
final StoredNode n = (StoredNode) doc.getDoctype();
serializeToReceiver(n, null, (DocumentImpl) n.getOwnerDocument(), true, null, new TreeSet<String>());
}
}
// iterate through children
for (int i = 0; i < children.getLength(); i++) {
final StoredNode node = (StoredNode) children.item(i);
final Iterator<StoredNode> domIter = broker.getNodeIterator(node);
domIter.next();
final NodeProxy p = new NodeProxy(node);
serializeToReceiver(node, domIter, (DocumentImpl)node.getOwnerDocument(),
true, p.getMatches(), new TreeSet<String>());
}
if (generateDocEvent) {receiver.endDocument();}
if (LOG.isDebugEnabled())
{LOG.debug("serializing document " + doc.getDocId() + " (" + doc.getURI() + ")"
+ " to SAX took " + (System.currentTimeMillis() - start) + " msec");}
}
protected void serializeToReceiver(StoredNode node, Iterator<StoredNode> iter,
DocumentImpl doc, boolean first, Match match, Set<String> namespaces) throws SAXException {
if (node == null)
{node = iter.next();}
if (node == null)
{return;}
// char ch[];
String cdata;
switch (node.getNodeType()) {
case Node.ELEMENT_NODE:
receiver.setCurrentNode(node);
String defaultNS = null;
if (((ElementImpl) node).declaresNamespacePrefixes()) {
// declare namespaces used by this element
String prefix, uri;
for (final Iterator<String> i = ((ElementImpl) node).getPrefixes(); i.hasNext();) {
prefix = i.next();
if (prefix.length() == 0) {
defaultNS = ((ElementImpl) node).getNamespaceForPrefix(prefix);
receiver.startPrefixMapping("", defaultNS);
namespaces.add(defaultNS);
} else {
uri = ((ElementImpl) node).getNamespaceForPrefix(prefix);
receiver.startPrefixMapping(prefix, uri);
namespaces.add(uri);
}
}
}
final String ns = defaultNS == null ? node.getNamespaceURI() : defaultNS;
if (ns.length() > 0 && (!namespaces.contains(ns)))
{receiver.startPrefixMapping(node.getPrefix(), ns);}
final AttrList attribs = new AttrList();
if ((first && showId == EXIST_ID_ELEMENT) || showId == EXIST_ID_ALL) {
attribs.addAttribute(ID_ATTRIB, node.getNodeId().toString());
/*
* This is a proposed fix-up that the serializer could do
* to make sure elements always have the namespace declarations
*
} else {
// This is fix-up for when the node has a namespace but there is no
// namespace declaration.
String elementNS = node.getNamespaceURI();
Node parent = node.getParentNode();
if (parent instanceof ElementImpl) {
ElementImpl parentElement = (ElementImpl)parent;
String declaredNS = parentElement.getNamespaceForPrefix(node.getPrefix());
if (elementNS!=null && declaredNS==null) {
// We need to declare the prefix as it was missed somehow
receiver.startPrefixMapping(node.getPrefix(), elementNS);
} else if (elementNS==null && declaredNS!=null) {
// We need to declare the default namespace to be the no namespace
receiver.startPrefixMapping(node.getPrefix(), elementNS);
} else if (!elementNS.equals(defaultNS)) {
// Same prefix but different namespace
receiver.startPrefixMapping(node.getPrefix(), elementNS);
}
} else if (elementNS!=null) {
// If the parent is the document, we must have a namespace
// declaration when there is a namespace URI.
receiver.startPrefixMapping(node.getPrefix(), elementNS);
}
*/
}
if (first && showId > 0) {
// String src = doc.getCollection().getName() + "/" + doc.getFileName();
attribs.addAttribute(SOURCE_ATTRIB, doc.getFileURI().toString());
}
final int children = node.getChildCount();
int count = 0;
// int childLen;
StoredNode child = null;
while (count < children) {
child = (StoredNode) iter.next();
if (child!=null && child.getNodeType() == Node.ATTRIBUTE_NODE) {
if ((getHighlightingMode() & TAG_ATTRIBUTE_MATCHES) > 0)
{cdata = processAttribute(((AttrImpl) child).getValue(), node.getNodeId(), match);}
else
{cdata = ((AttrImpl) child).getValue();}
attribs.addAttribute(child.getQName(), cdata);
count++;
child.release();
} else
{break;}
}
receiver.setCurrentNode(node);
receiver.startElement(node.getQName(), attribs);
while (count < children) {
serializeToReceiver(child, iter, doc, false, match, namespaces);
if (++count < children) {
child = (StoredNode) iter.next();
} else
{break;}
}
receiver.setCurrentNode(node);
receiver.endElement(node.getQName());
if (((ElementImpl) node).declaresNamespacePrefixes()) {
String prefix;
for (final Iterator<String> i = ((ElementImpl) node).getPrefixes(); i.hasNext();) {
prefix = i.next();
receiver.endPrefixMapping(prefix);
}
}
if (ns.length() > 0 && (!namespaces.contains(ns)))
{receiver.endPrefixMapping(node.getPrefix());}
node.release();
break;
case Node.TEXT_NODE:
if (first && createContainerElements) {
final AttrList tattribs = new AttrList();
if (showId > 0) {
tattribs.addAttribute(ID_ATTRIB, node.getNodeId().toString());
tattribs.addAttribute(SOURCE_ATTRIB, doc.getFileURI().toString());
}
receiver.startElement(TEXT_ELEMENT, tattribs);
}
receiver.setCurrentNode(node);
receiver.characters(((TextImpl) node).getXMLString());
if (first && createContainerElements)
{receiver.endElement(TEXT_ELEMENT);}
node.release();
break;
case Node.ATTRIBUTE_NODE:
if ((getHighlightingMode() & TAG_ATTRIBUTE_MATCHES) == TAG_ATTRIBUTE_MATCHES)
{cdata = processAttribute(((AttrImpl) node).getValue(), node.getNodeId(), match);}
else
{cdata = ((AttrImpl) node).getValue();}
if(first) {
if (createContainerElements) {
final AttrList tattribs = new AttrList();
if (showId > 0) {
tattribs.addAttribute(ID_ATTRIB, node.getNodeId().toString());
tattribs.addAttribute(SOURCE_ATTRIB, doc.getFileURI().toString());
}
tattribs.addAttribute(node.getQName(), cdata);
receiver.startElement(ATTRIB_ELEMENT, tattribs);
receiver.endElement(ATTRIB_ELEMENT);
}
else {
if (this.outputProperties.getProperty("output-method") != null &&
"text".equals(this.outputProperties.getProperty("output-method"))) {
receiver.characters(node.getNodeValue());
} else {
LOG.warn("Error SENR0001: attribute '" + node.getQName() + "' has no parent element. " +
"While serializing document " + doc.getURI());
throw new SAXException("Error SENR0001: attribute '" + node.getQName() + "' has no parent element");
}
}
} else
{receiver.attribute(node.getQName(), cdata);}
node.release();
break;
case Node.DOCUMENT_TYPE_NODE:
final String systemId = ((DocumentTypeImpl) node).getSystemId();
final String publicId = ((DocumentTypeImpl) node).getPublicId();
final String name = ((DocumentTypeImpl) node).getName();
receiver.documentType(name, publicId, systemId);
break;
case Node.PROCESSING_INSTRUCTION_NODE:
receiver.processingInstruction(
((ProcessingInstructionImpl) node).getTarget(),
((ProcessingInstructionImpl) node).getData());
node.release();
break;
case Node.COMMENT_NODE:
final String comment = ((CommentImpl) node).getData();
char data[] = new char[comment.length()];
comment.getChars(0, data.length, data, 0);
receiver.comment(data, 0, data.length);
node.release();
break;
case Node.CDATA_SECTION_NODE:
final String str = ((CDATASectionImpl)node).getData();
if (first)
{receiver.characters(str);}
else {
data = new char[str.length()];
str.getChars(0,str.length(), data, 0);
receiver.cdataSection(data, 0, data.length);
}
break;
//TODO : how to process other types ? -pb
}
}
private final String processAttribute(String data, NodeId nodeId, Match match) {
if (match == null) {return data;}
// prepare a regular expression to mark match-terms
StringBuilder expr = null;
Match next = match;
while (next != null) {
if (next.getNodeId().equals(nodeId)) {
if (expr == null) {
expr = new StringBuilder();
expr.append("\\b(");
}
if (expr.length() > 5) {expr.append('|');}
expr.append("");
}
next = next.getNextMatch();
}
if (expr != null) {
expr.append(")\\b");
final Pattern pattern = Pattern.compile(expr.toString(), Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
final Matcher matcher = pattern.matcher(data);
return matcher.replaceAll("||$1||");
}
return data;
}
}