Package nux.xom.xquery

Source Code of nux.xom.xquery.ResultSequenceSerializer$WrapSerializer

/*
* Copyright (c) 2005, The Regents of the University of California, through
* Lawrence Berkeley National Laboratory (subject to receipt of any required
* approvals from the U.S. Dept. of Energy). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* (1) Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* (2) Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* (3) Neither the name of the University of California, Lawrence Berkeley
* National Laboratory, U.S. Dept. of Energy nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* You are under no obligation whatsoever to provide any bug fixes, patches, or
* upgrades to the features, functionality or performance of the source code
* ("Enhancements") to anyone; however, if you choose to make your Enhancements
* available either publicly, or directly to Lawrence Berkeley National
* Laboratory, without imposing a separate written license agreement for such
* Enhancements, then you hereby grant the following license: a non-exclusive,
* royalty-free perpetual license to install, use, modify, prepare derivative
* works, incorporate into other computer software, distribute, and sublicense
* such enhancements or derivative works thereof, in binary and source code
* form.
*/
package nux.xom.xquery;

import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import nu.xom.Attribute;
import nu.xom.Comment;
import nu.xom.DocType;
import nu.xom.Document;
import nu.xom.Element;
//import nu.xom.Namespace;
import nu.xom.Node;
import nu.xom.Nodes;
import nu.xom.ParentNode;
import nu.xom.ProcessingInstruction;
import nu.xom.Serializer;
import nu.xom.Text;
import nu.xom.XMLException;

/**
* Serializes an XQuery/XPath result sequence onto a given output stream, using
* various configurable serialization options such encoding, indentation and
* algorithm. The semantics of options are identical to the XOM
* {@link Serializer}, except the "algorithm" option.
* <p>
* The <b>W3C algorithm</b> serializes each item in the result sequence
* according to the XML Output Method of the <a target="_blank"
* href="http://www.w3.org/TR/xslt-xquery-serialization"> W3C XQuery/XSLT2
* Serialization Spec</a>, with sequence normalization as defined
* therein. As such, it may output data that is not a well-formed document. For
* example, if the result sequence contains more than one element then a
* document with more than one root element will be output. However, for some
* use cases the algorithm does indeed output a well-formed XML document. For
* example, if the result sequence contains a single document or element node.
* Finally, note that an exception is thrown if the result sequence contains a
* (top-level) attribute node.
* <p>
* In contrast, the <b>wrap algorithm</b> wraps each item in the result
* sequence into a decorated element wrapper, thereby ensuring that any
* arbitrary result sequence can always be output as a well-formed XML document.
* This enables easy processing in subsequent XML processing pipeline stages.
* Unlike the W3C algorithm, the wrap algorithm does not perform sequence
* normalization. Thus, wrapping is better suited for XQuery debugging purposes,
* because one can see exactly what items a query does (or does not) return.
* <p>
* Example usage:
* <pre>
* Document doc = new Builder().build(new File("samples/data/p2pio-receive.xml"));
* Nodes results = XQueryUtil.xquery(doc, "//*");
* // Nodes results = XQueryUtil.xquery(doc, "//node(), //@*, 'Hello World!'");
* ResultSequenceSerializer ser = new ResultSequenceSerializer();
* ser.setEncoding("UTF-8");
* ser.setIndent(4);
* ser.setAlgorithm(ResultSequenceSerializer.W3C_ALGORITHM);
* // ser.setAlgorithm(ResultSequenceSerializer.WRAP_ALGORITHM);
* ser.write(results, System.out);
* </pre>
*
* @author whoschek.AT.lbl.DOT.gov
* @author $Author: hoschek3 $
* @version $Revision: 1.49 $, $Date: 2005/12/05 06:53:04 $
*/
public class ResultSequenceSerializer {
 
  // Note: does not subclass XOM Serializer for maximum impl. flexibility,
  // and to reduce number of public classes (i.e. API complexity).
 
  // TODO: add lineSeparator, maxLength, preserveBaseURI?
  // TODO: add node ID decorations for wrapped algo?
  // TODO: add write(ResultSequence, OutputStream)? could have minimum overhead by
  // avoiding intermediate node and node list construction by pulling Saxon items
  // from the pipeline, and writing them directly one by one.
 
  /**
   * Serializes each item in the result sequence according to the XML Output
   * Method of the <a target="_blank"
   * href="http://www.w3.org/TR/xslt-xquery-serialization"> W3C XQuery/XSLT2
   * Serialization Draft Spec</a>, with sequence normalization as defined
   * therein.
   */
  public static final String W3C_ALGORITHM  = "w3c";
 
  /**
   * Serializes each item in the result sequence by wrapping it into a
   * decorated element, without sequence normalization.
   */
  public static final String WRAP_ALGORITHM = "wrap";
 
  private String algorithm = W3C_ALGORITHM;
  private int indent = 0;
  private String encoding = "UTF-8";
  private boolean nfc = false;
 
  /**
   * Constructs and returns a serializer with default options.
   */
  public ResultSequenceSerializer() {
  }
 
  /**
   * Returns the current serialization algorithm; Can be
   * {@link #W3C_ALGORITHM} or {@link #WRAP_ALGORITHM}; Defaults to
   * {@link #W3C_ALGORITHM}.
   *
   * @return the current algorithm
   */
  public String getAlgorithm() {
    return algorithm;
  }
 
  /**
   * Returns the number of spaces to insert for each nesting level for pretty
   * printing purposes; Defaults to zero; For details, see
   * {@link Serializer#setIndent(int)}.
   *
   * @return the current the number of spaces
   */
  public int getIndent() {
    return indent;
  }
 
  /**
   * Returns the current serialization character encoding; Defaults to
   * "UTF-8"; For details, see
   * {@link Serializer#Serializer(OutputStream, String)}.
   *
   * @return the current encoding
   */
  public String getEncoding() {
    return encoding;
  }

  /**
   * Returns whether or not to perform Unicode normalization form C (NFC);
   * Defaults to false; For details, see
   * {@link Serializer#setUnicodeNormalizationFormC(boolean)}
   *
   * @return whether or not to perform NFC
   */
  public boolean getUnicodeNormalizationFormC() {
    return nfc;
  }
 
  /**
   * Sets the serialization algorithm.
   *
   * @param algorithm
   *            the serialization algorithm to use
   */
  public void setAlgorithm(String algorithm) {
    if (!(W3C_ALGORITHM.equals(algorithm) || WRAP_ALGORITHM.equals(algorithm))) {
      throw new IllegalArgumentException(
        "Unrecognized XQuery serialization algorithm: " + algorithm);
    }
    this.algorithm = algorithm;
  }

  /**
   * Sets the character encoding for the serialization.
   *
   * @param encoding
   *            the encoding to use
   */
  public void setEncoding(String encoding) {
    if (encoding == null)
      throw new NullPointerException("Encoding must not be null");
    this.encoding = encoding;
  }

  /**
   * Sets the number of spaces to insert for each nesting level.
   *
   * @param indent
   *            the indentation to use (must be &gt;= 0)
   */
  public void setIndent(int indent) {
    this.indent = indent;
  }

  /**
   * Sets whether or not to perform Unicode normalization form C (NFC).
   *
   * @param nfc
   *            true to normalize with NFC, false otherwise.
   */
  public void setUnicodeNormalizationFormC(boolean nfc) {
    this.nfc = nfc;
  }
 
  /**
   * Returns a string representation for debugging purposes.
   *
   * @return a string representation
   */
  public String toString() {
    return "[" +
    "algorithm=" + getAlgorithm() +
    ", encoding=" + getEncoding() +
    ", indent=" + getIndent() +
    ", unicodeNormalizationFormC=" + getUnicodeNormalizationFormC() +
    "]";
  }
 
  /**
   * Serializes the given result sequence onto the given output stream.
   * This method does not auto-close the output stream.
   *
   * @param nodes
   *            the result sequence to serialize
   * @param out
   *            the stream to write to
   * @throws IOException
   *             if an I/O error occured
   */
  public void write(Nodes nodes, OutputStream out) throws IOException {
    SequenceSerializer ser;
    if (W3C_ALGORITHM.equals(getAlgorithm())) {
      ser = new W3CSerializer(out, getEncoding());
    } else {
      ser = new WrapSerializer(out, getEncoding());
    }
   
    ser.setIndent(getIndent());
    ser.setUnicodeNormalizationFormC(getUnicodeNormalizationFormC());
//    ser.setLineSeparator(getLineSeparator());
//    ser.setMaxLength(getMaxLength());
//    ser.setPreserveBaseURI(getPreserveBaseURI());
   
    ser.write(nodes);
  }
 
 
  ///////////////////////////////////////////////////////////////////////////////
  // Nested classes:
  /////////////////////////////////////////////////////////////////////////////// 
  private static abstract class SequenceSerializer extends Serializer {
   
    public SequenceSerializer(OutputStream out, String encoding)
        throws UnsupportedEncodingException {
      super(out, encoding);
    }

    // override for specific implementations
    public abstract void write(Nodes nodes) throws IOException;

  }
 
 
  ///////////////////////////////////////////////////////////////////////////////
  // Nested classes:
  ///////////////////////////////////////////////////////////////////////////////   
  private static final class W3CSerializer extends SequenceSerializer {
   
    private boolean writeNamespaceDeclarationsInScope = false;
    private static boolean isXom11Plus = true;
   
    public W3CSerializer(OutputStream out, String encoding)
        throws UnsupportedEncodingException {
      super(out, encoding);
    }

    protected void writeXMLDeclaration() throws IOException {
      writeRaw("<?xml version=\"1.0\" encoding=\"");
      writeRaw(getEncoding());
      writeRaw("\"?>");
//      breakLine();  // subtle but important: omit newline
    }
      
    /** Includes efficient impl of the W3C result sequence normalization spec. */
    public void write(Nodes nodes) throws IOException {
      final boolean indentYes = getIndent() > 0;
      boolean mayBreakLine = true
      boolean isPreviousAtomic = false;
      this.writeNamespaceDeclarationsInScope = false;
     
      writeXMLDeclaration();
     
      int size = nodes.size();
      for (int i=0; i < size; i++) {
        Node node = nodes.get(i);
        if (node instanceof Attribute) {
          throw new XMLException(
          "SENR0001: W3C XQuery Serialization spec forbids top-level attributes");
//        } else if (node instanceof Namespace) {
//          throw new XMLException(
//          "SENR0001: W3C XQuery Serialization spec forbids top-level namespaces");
        } else if (node instanceof Document) {
          // Replace document with its children
          // Note that a Document can't have an atomic value or Text as child
          Document doc = (Document) node;
          for (int j=0; j < doc.getChildCount(); j++) {
            Node child = doc.getChild(j);
            if (mayBreakLine && indentYes && child instanceof Element) {
              breakLine();
            }
            writeChild(child);
            mayBreakLine = true;
          }
          isPreviousAtomic = false;
        } else if (DefaultResultSequence.isAtomicValue(node)) {
          // Replace adjacent atomic values with their string
          // concatenation, separated by a space, forming a Text node
          if (isPreviousAtomic) writeEscaped(" ");
          writeEscaped(node.getChild(0).getValue()); // string value of atomic value
          mayBreakLine = false;
          isPreviousAtomic = true;
        } else if (node instanceof Text) {
          // replace adjacent Texts with their string concatenation, removing empty texts
          String value = node.getValue();
          if (value.length() > 0) {
            writeEscaped(value);
            mayBreakLine = false;
            isPreviousAtomic = false;
          }
        } else { // any other node type
          if (node instanceof Element) {
            if (mayBreakLine && indentYes) breakLine();
            // root elements need no special toplevel namespace treatment
            ParentNode parent = node.getParent();
            this.writeNamespaceDeclarationsInScope =
              parent != null && !(parent instanceof Document);
          }
          writeChild(node);
          this.writeNamespaceDeclarationsInScope = false;
          mayBreakLine = true;
          isPreviousAtomic = false;
        }       
      } // end for
     
      if (mayBreakLine && indentYes) breakLine();
      flush();
    }
       
    /**
    Ensures namespaces declared on ancestor-or-self are included in output. This is necessary
    because they may actually be in use somewhere in the subtree. Example:
    <pre>
   
    doc :=
    <SOAP:a xmlns:SOAP="http://schemas.xmlsoap.org/soap/envelope/" xmlns:foo="http://example.com">
      <b>
        <SOAP:c/>
      </b>
    </SOAP:a>
   
    result sequence := (b)
   
    The XOM Serializer assumes that an entire document is written, rather than arbitrary
    (subtree) nodes. It would generate this (wrong) output 1:
    <b>
      <SOAP:c/>
    </b>
    This is because its algorithm thinks it has previously already written the <SOAP:a>
    element, including its namespace declaration.
   
    Our method adds all namespaces-in-scope to the subforest's root, generating the following
    (expected) output 2:
    <b xmlns:SOAP="http://schemas.xmlsoap.org/soap/envelope/" xmlns:foo="http://example.com">
      <SOAP:c/> 
    </b>
   
    Note: According to http://xquery.com/pipermail/talk/2005-November/000883.html,
    incorrect output 3 would be:
    <b>
      <SOAP:c xmlns:SOAP="http://schemas.xmlsoap.org/soap/envelope/"/>
    </b>
   
    </pre>
    */
    protected void writeNamespaceDeclarations(Element element) throws IOException {
      if (this.writeNamespaceDeclarationsInScope) {
        this.writeNamespaceDeclarationsInScope = false;
        writeNamespaceDeclarationsInScope(element);
      } else {
        super.writeNamespaceDeclarations(element);
      }
    }

    private void writeNamespaceDeclarationsInScope(Element element) throws IOException {
      Map namespaces = getNamespacePrefixesInScope(element);
      int size = namespaces.size();

      // TODO: enable non-normative cosmetic sort by prefix ???
      // if (size > 1) namespaces = new java.util.TreeMap(namespaces);

      Iterator iter = namespaces.entrySet().iterator();
      for (int i = 0; i < size; i++) {
        Map.Entry entry = (Map.Entry) iter.next();
        String uri = (String) entry.getValue();
        if (uri.length() > 0) { // xmlns="" is unnecessary
          String prefix = (String) entry.getKey();
          writeRaw(" ");
          writeNamespaceDeclaration(prefix, uri);
        }
      }
    }
     
      /*
     * Awkward work-around to use XOM's
     * Element.getNamespacePrefixesInScope(), or, if that's not available as
     * a public method, a slow fallback solution.
     */     
    private static Map getNamespacePrefixesInScope(Element element) {
      if (isXom11Plus) { // fast path
        try {
          return getNamespacePrefixesInScopePublic(element);
        } catch (Error e) {
          isXom11Plus = false;
        }
      }
      // slow path
      return getNamespacePrefixesInScopeNonPublic(element);
    }
     
    private static Map getNamespacePrefixesInScopePublic(Element element) {
      return element.getNamespacePrefixesInScope(); // (xom-1.1+)
//       return XPathContext.makeNamespaceContext(element).toMap();
    }
     
    private static Map getNamespacePrefixesInScopeNonPublic(Element element) {
      HashMap namespaces = new HashMap();
     
      do {
        int size = element.getNamespaceDeclarationCount();
        for (int i = 0; i < size; i++) {
          String prefix = element.getNamespacePrefix(i);
          if (!namespaces.containsKey(prefix)) {
            String uri = element.getNamespaceURI(prefix);
            namespaces.put(prefix, uri);
          }
        }
        ParentNode parent = element.getParent();
        element = (parent instanceof Element ? (Element) parent : null);
      } while (element != null);
     
      return namespaces;
    }
   
  }
 

  ////////////////////////////////////////////////////////////////////////////////
  // Nested classes:
  ////////////////////////////////////////////////////////////////////////////////
  private static final class WrapSerializer extends SequenceSerializer {

    /** Prefabricated template elements for efficient WRAP_ALGORITHM */
    private static final HashMap TEMPLATES = initTemplates();
   
    public WrapSerializer(OutputStream out, String encoding)
        throws UnsupportedEncodingException {
      super(out, encoding);
    }

    public void write(Nodes nodes) throws IOException {
      Document doc = wrapSequence(nodes);
      write(doc);     
    }
   
    /** Wraps each item in the result sequence into a decorated element wrapper. */
    private static Document wrapSequence(Nodes nodes) {
      // make a copy of the template for sequences:
      Element items = (Element) TEMPLATES.get(Nodes.class.getName());
      items = new Element(items);
     
      int size = nodes.size();
      for (int i=0; i < size; i++) {
        items.appendChild(wrap(nodes.get(i)));
      }
     
      return new Document(items);
    }

    /** Wraps (a copy of) an item into a decorated element. */
    private static Element wrap(Node node) {   
      if (DefaultResultSequence.isAtomicValue(node)) {
        return (Element) node.copy(); // atomic values are already properly wrapped
      }
     
      // make a copy of the template associated with the given node type:
      Element item = (Element) TEMPLATES.get(node.getClass().getName());
      if (item == null) // FIXME: also allow Node subclasses
        throw new IllegalArgumentException("Unrecognized node type: " + node.getClass());
      item = new Element(item);

      // add copy of content to wrapper:
      if (node instanceof Attribute) {
        Attribute attr = (Attribute) node;         
        item.addAttribute((Attribute) attr.copy());
      } else if (node instanceof Document) {
        Document doc = (Document) node;         
        for (int j=0; j < doc.getChildCount(); j++) {
          item.appendChild(doc.getChild(j).copy());
        }
//      } else if (node instanceof Namespace) { // xom >= 1.1 only
//        Namespace ns = (Namespace) node;
////        item.addNamespaceDeclaration(ns.getPrefix(), ns.getValue());
//        if (ns.getPrefix().length() > 0) {
//          item.addAttribute(new Attribute("prefix", ns.getPrefix()));
//        }
//        item.addAttribute(new Attribute("uri", ns.getValue()));
      } else if (node instanceof DocType) {
        DocType docType = (DocType) node;
        Element e;
       
        e = new Element("rootName");
        e.appendChild(docType.getRootElementName());
        item.appendChild(e);
       
        if (docType.getPublicID() != null) {
          e = new Element("publicID");
          e.appendChild(docType.getPublicID());
          item.appendChild(e);
        }
        if (docType.getSystemID() != null) {
          e = new Element("systemID");
          e.appendChild(docType.getSystemID());
          item.appendChild(e);
        }
        if (docType.getInternalDTDSubset().length() > 0) {
          e = new Element("internalDTDSubset");
          e.appendChild(docType.getInternalDTDSubset());
          item.appendChild(e);
        }
      } else { // Element, Text, Comment, ProcessingInstruction
        item.appendChild(node.copy());
      }
     
      return item;
    }
   
    /** Prefabricate template elements for efficient WRAPPED_ALGORITHM. */
    private static HashMap initTemplates() {
      HashMap templates = new HashMap();
      String ns = "http://dsd.lbl.gov/nux";
      Element template;
     
      template = new Element("item:document", ns);
      templates.put(Document.class.getName(), template);
     
      template = new Element("item:element", ns);
      templates.put(Element.class.getName(), template);
     
      template = new Element("item:attribute", ns);
      templates.put(Attribute.class.getName(), template);
     
      template = new Element("item:text", ns);
      templates.put(Text.class.getName(), template);
     
      template = new Element("item:comment", ns);
      templates.put(Comment.class.getName(), template);
     
      template = new Element("item:pi", ns);
      templates.put(ProcessingInstruction.class.getName(), template);

      template = new Element("item:docType", ns);
      templates.put(DocType.class.getName(), template);
     
//      template = new Element("item:namespace", ns); // xom >= 1.1 only
//      templates.put(Namespace.class.getName(), template);
     
      template = new Element("item:items", ns);
      template.addNamespaceDeclaration("xsi", "http://www.w3.org/2001/XMLSchema-instance");
      templates.put(Nodes.class.getName(), template);

      return templates;
    }
  }
 
}
TOP

Related Classes of nux.xom.xquery.ResultSequenceSerializer$WrapSerializer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.