Package nux.xom.tests

Source Code of nux.xom.tests.XQueryCommand$Statistics

/*
* Copyright (c) 2005, The Regents of the University of California, through
* Lawrence Berkeley National Laboratory (subject to receipt of any required
* approvals from the U.S. Dept. of Energy). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* (1) Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* (2) Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* (3) Neither the name of the University of California, Lawrence Berkeley
* National Laboratory, U.S. Dept. of Energy nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* You are under no obligation whatsoever to provide any bug fixes, patches, or
* upgrades to the features, functionality or performance of the source code
* ("Enhancements") to anyone; however, if you choose to make your Enhancements
* available either publicly, or directly to Lawrence Berkeley National
* Laboratory, without imposing a separate written license agreement for such
* Enhancements, then you hereby grant the following license: a non-exclusive,
* royalty-free perpetual license to install, use, modify, prepare derivative
* works, incorporate into other computer software, distribute, and sublicense
* such enhancements or derivative works thereof, in binary and source code
* form.
*/
package nux.xom.tests;

import gnu.getopt.Getopt;
import gnu.getopt.LongOpt;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.xml.transform.TransformerException;

import nu.xom.Attribute;
import nu.xom.Builder;
import nu.xom.Comment;
import nu.xom.DocType;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Node;
import nu.xom.Nodes;
import nu.xom.ParentNode;
import nu.xom.ParsingException;
import nu.xom.ProcessingInstruction;
import nu.xom.Text;
import nu.xom.xinclude.XIncludeException;
import nu.xom.xinclude.XIncluder;
import nux.xom.binary.BinaryXMLCodec;
import nux.xom.pool.BuilderFactory;
import nux.xom.pool.BuilderPool;
import nux.xom.pool.DocumentFactory;
import nux.xom.pool.DocumentMap;
import nux.xom.pool.DocumentPool;
import nux.xom.pool.DocumentURIResolver;
import nux.xom.pool.FileUtil;
import nux.xom.pool.PoolConfig;
import nux.xom.pool.XOMUtil;
import nux.xom.pool.XQueryFactory;
import nux.xom.pool.XQueryPool;
import nux.xom.xquery.ResultSequenceSerializer;
import nux.xom.xquery.StreamingPathFilter;
import nux.xom.xquery.StreamingPathFilterException;
import nux.xom.xquery.StreamingTransform;
import nux.xom.xquery.XQuery;
import nux.xom.xquery.XQueryUtil;

import org.xml.sax.EntityResolver;
import org.xml.sax.XMLReader;

/**
* Nux XQuery test tool with optional schema validation, XInclude and update
* facility; See the <a href="doc-files/fire-xquery-usage.txt ">online help</a>
* for a description of all available options, plus examples.
* <p>
* Somewhat complex implementation due to the large number of flexible options,
* and robust handling of all potential issues. For a much simpler example see
* SimpleXQueryCommand.java.
* <p>
* Can also be used as a simple benchmark, measuring XML parsing, XQuery
* execution and XML serialization, either individually, or in combination (via
* --iterations, --runs, --out=/dev/null), with and without pooling.
* <p>
* For best parsing results, make sure to run with the latest stable Xerces
* release. (For JDK 1.5, copy the xerces jars into nux/lib/. For JDK 1.4, set
* environment variable via export
* JAVA_OPTS='-Djava.endorsed.dirs=/path/to/xerces/lib').
* <p>
* When using W3C XML Schema, RelaxNG and TagSoup, make sure to put the xerces
* jar, MSV jars and tagsoup jar onto the classpath, respectively.
*
* @author whoschek.AT.lbl.DOT.gov
* @author $Author: hoschek3 $
* @version $Revision: 1.68 $, $Date: 2006/05/01 22:53:36 $
*/
public final class XQueryCommand extends CmdLineProgram {
 
  /**
   * Main entry point; run this program without any arguments to get help
   * (including descriptions of all options).
   */
  public static void main(String[] args) {
    new XQueryCommand().doMain(args);
  }
 
  private XQueryCommand() {
    initOptions();
  }
   
  private String getLongUsage() {
    String text =
 
    "\n" + getProgramName() + " - Nux XQuery test tool with optional schema validation.\n\n" +
 
    "Usage: " + getProgramName() + " " + getOptions() + "\n\n"+
 
    "Option names can be abbreviated as long as they remain unambigous.\n"+
    "Option cardinalities: '?' = 0..1, '*' = 0..N, '+' = 1..N, 'def' = default.\n\n" +
 
    "Help options:\n"+
    "  ? --version             Display the version of this program and exit.\n"+
    "  ? --help                Print this help page and exit.\n\n"+
   
    "Query options:\n"+
    "  + --query={STRING}|FILE The XQuery to execute.\n"
    "  ? --base=FILE           Resolve relative URIs found in the XQuery (def='.').\n"
    "  * --var=NAME:VALUE      Pass external variables to XQuery (def=none).\n\n"
 
    "Output options:\n"+
    "  * --out=FILE|/dev/null  File(s) to serialize to (def=stdout).\n"
    "  ? --algo=w3c|wrap       Result sequence serialization algorithm (def=w3c).\n"
    "  ? --encoding=STRING     Character encoding to serialize with (def=UTF-8).\n"
    "  ? --indent=INT          Insert prettyprint indentation; disable=0 (def=4).\n\n"
 
    "Validation options for input documents:\n"+
    "  ? --validate=wf|dtd|schema|relaxng|html  Set validation language (def=wf).\n"
    "  ? --schema=FILE         e.g. foo.dtd|foo.xsd|foo.rng (def=undefined).\n"
    "  ? --namespace=URI       Namespace of schema (def=undefined).\n\n"
   
    "Misc options:\n"+
    "  ? --update={STRING}|FILE Apply update XQuery to each item in result sequence.\n"
    "  ? --xinclude            Perform W3C XInclude resolution on input files.\n"
    "  ? --strip               Remove whitespace-only text nodes from input files.\n"
    "  ? --noexternal          Disallow Java extension functions in XQuery.\n" +
    "  ? --filterpath=STRING   Streaming path filter, e.g. '/a/b/c' (def=none).\n" +
    "  ? --filterquery={STRING}|FILE XQuery transforming each filter match (def=.).\n" +
    "  ? --debug               Print full stack trace on exception.\n\n"+
 
    "Benchmarking options:\n"+
    "  ? --runs=INT            Repeat outer loop N times (def=1).\n"+
    "  ? --iterations=INT      Repeat inner loop M times (def=1).\n"+
    "  ? --docpoolcapacity=INT Allow at most N MB memory for document pool (def=0).\n" +
    "  ? --docpoolcompression=-1..9    Use document ZLIB compression level (def=-1).\n" +
    "  ? --nobuilderpool       Disable caching of SAX XMLReaders.\n" +
    "  ? --explain             Print description of optimized XQuery plan.\n"+
    "  ? --xomxpath            Use XOM's XPath engine instead of Nux's XPath engine.\n\n" +
 
//    "Logging:\n"+
//    "  --loglevel=all|trace|debug|info|warn|error|fatal|off (default='info').\n\n"+

    "Examples:\n"+
    "  " + "cd samples/data\n"+
    "  " + getProgramName() + " --query='{doc(\"periodic.xml\")/PERIODIC_TABLE/ATOM[NAME=\"Zinc\"]}'\n"+
    "  " + getProgramName() + " --query='{declare namespace atom = \"http://www.w3.org/2005/Atom\"; doc(\"http://www.tbray.org/ongoing/ongoing.atom\")/atom:feed/atom:entry/atom:title}'\n"+
    "  " + getProgramName() + " --query='{count(//*)}' *.xml\n"+
    "  " + getProgramName() + " --query='{count(//*)}' *.xml.bnux\n"+
    "  " + getProgramName() + " --algo=wrap --query='{//node(), //@*, \"Hello World!\"}' p2pio.xml\n"+
    "  " + getProgramName() + " --query=../xmark/q09.xq ../xmark/auction-0.01.xml --out=/tmp/results.out\n"+
    "  " + getProgramName() + " --var=x:2 --var=y:5 --query='{declare variable $x external; declare variable $y external; $x * $y}'\n"+
    "  " + getProgramName() + " --query='{/receive/timeout}' --update='{declare namespace system = \"java:java.lang.System\"; system:currentTimeMillis() + 10000}' p2pio.xml\n"+
    "  " + getProgramName() + " --xinclude --query='{.}' xinclude.xml\n"+
//    "  " + getProgramName() + " --xinclude --query='{//@xml:base}' --update='{()}' xinclude.xml\n"+
    "  " + getProgramName() + " --query=../fulltext/q2-06.xq ../fulltext/full-text.xml\n"+
    "  " + getProgramName() + " --validate=html --query='{//*:img/string(@src)}' ../../doc/index.html\n"+
    "  " + getProgramName() + " --query='{.}' --validate=relaxng --debug --schema=../data-atom/atom.rng ../data-atom/ongoing.xml\n"+
    "  " + getProgramName() + " --query='{.}' --validate=schema --namespace='http://openuri.org/easypo' --schema=ns-order.xsd ns-order.xml\n"+
    "  " + getProgramName() + " --query='{declare namespace util = \"java:nux.xom.pool.FileUtil\"; <files> {for $uri in util:listFiles(\"../shakespeare\", false(), \"*.xml\", \"\") let $kills := count(saxon:discard-document(doc(string($uri)))//LINE[contains(., \"kill\")]) order by $kills return <file><name>{$uri}</name> <killCount>{$kills}</killCount></file> }</files>}'\n" +
    "  " + getProgramName() + " --query='{.}' --validate=schema --namespace='http://openuri.org/easypo' --schema=ns-order.xsd ns-order.xml --out=/dev/null --iter=0 --runs=100000\n"+
    "  " + getProgramName() + " --query='{count(doc(\"periodic.xml\")//*)}' --out=/dev/null --indent=0 --iter=5000 --runs=5 --docpoolcapacity=100 --explain\n"+
    "";
   
    boolean isWindows = System.getProperty("os.name").startsWith("Windows");
    if (isWindows) { // swap escape apostrophes so examples work also on Windows command line:
      char z = (char) 0;
      text = text.replace('\'', z);    // ' --> 0
      text = text.replace('\"', '\''); // " --> '
      text = text.replace(z, '\"');    // 0 --> " 
    }
    return text;
  }
 
  /** Defines command line options. */
  private void initOptions() {
    // TODO: ???
    // additional input docs via --dir --includes --excludes --recursive
    // --loglevel=debug
    // --optionally validate output as well?
   
    this.sb = new StringBuffer();
    ArrayList options = new ArrayList();
    options.add( new LongOpt("help", LongOpt.NO_ARGUMENT, null, 'h') );
    options.add( new LongOpt("version", LongOpt.NO_ARGUMENT, null, 'v') );   
    options.add( new LongOpt("query", LongOpt.REQUIRED_ARGUMENT, sb, 'q') );
    options.add( new LongOpt("base", LongOpt.REQUIRED_ARGUMENT, sb, 'b') );
    options.add( new LongOpt("var", LongOpt.REQUIRED_ARGUMENT, sb, 'P') );
    options.add( new LongOpt("out", LongOpt.REQUIRED_ARGUMENT, sb, 'o') );
    options.add( new LongOpt("algo", LongOpt.REQUIRED_ARGUMENT, sb, 'S') );
    options.add( new LongOpt("encoding", LongOpt.REQUIRED_ARGUMENT, sb, 'E') );
    options.add( new LongOpt("indent", LongOpt.REQUIRED_ARGUMENT, sb, 'I') )
    options.add( new LongOpt("strip", LongOpt.NO_ARGUMENT, null, 's') );   
    options.add( new LongOpt("update", LongOpt.REQUIRED_ARGUMENT, sb, 'u') )
    options.add( new LongOpt("xinclude", LongOpt.NO_ARGUMENT, null, 'x') );   
    options.add( new LongOpt("explain", LongOpt.NO_ARGUMENT, null, 'e') );
    options.add( new LongOpt("noexternal", LongOpt.NO_ARGUMENT, null, 'n') );   
    options.add( new LongOpt("runs", LongOpt.REQUIRED_ARGUMENT, sb, 'r') );
    options.add( new LongOpt("iterations", LongOpt.REQUIRED_ARGUMENT, sb, 'i') );
    options.add( new LongOpt("docpoolcapacity", LongOpt.REQUIRED_ARGUMENT, sb, 'C') );
    options.add( new LongOpt("docpoolcompression", LongOpt.REQUIRED_ARGUMENT, sb, 'D') );
    options.add( new LongOpt("nobuilderpool", LongOpt.NO_ARGUMENT, null, 'p') );
    options.add( new LongOpt("debug", LongOpt.NO_ARGUMENT, null, 'd') );   
    options.add( new LongOpt("validate", LongOpt.REQUIRED_ARGUMENT, sb, 'V') );
    options.add( new LongOpt("namespace", LongOpt.REQUIRED_ARGUMENT, sb, 'W') );
    options.add( new LongOpt("schema", LongOpt.REQUIRED_ARGUMENT, sb, 'w') );
    options.add( new LongOpt("filterpath", LongOpt.REQUIRED_ARGUMENT, sb, 'f') );
    options.add( new LongOpt("filterquery", LongOpt.REQUIRED_ARGUMENT, sb, 'F') );
    options.add( new LongOpt("xomxpath", LongOpt.NO_ARGUMENT, null, 'N') );   
   
////    options.add( new LongOpt("loglevel", LongOpt.REQUIRED_ARGUMENT, sb, 'l') ); setLogLevels(Level.INFO);
     
    this.longOpts = new LongOpt[options.size()];
    options.toArray(this.longOpts);   
  }
 
  private LongOpt[] longOpts = null;
  private StringBuffer sb = null;
 
  // parsed command line options, initialized with defaults:
  private String[] inputFiles = null;
  private Map variables = new HashMap(); // String -> Object
  private List queries = new ArrayList(); // String or File
  private URI baseURI = null;
 
  private List outputFiles = new ArrayList(); // File
  private String algorithm = ResultSequenceSerializer.W3C_ALGORITHM;
  private String encoding = "UTF-8";
  private int indent = 4;
  private boolean stripWhitespace = false;
  private boolean explain = false;
  private int runs = 1;
  private int iterations = 1;
  private long docPoolCapacity = 0;
  private int docPoolCompression = -1;
  private boolean noBuilderPool = false;
  private boolean debug = false;
  private String validate = "wf";
  private String namespace = null;
  private File schema = null;
  private boolean xinclude = false;
  private Object update = null;
  private StreamingPathFilter filter = null;
  private String filterQuery = null;
  private boolean xomXPath = false;
 
  protected int parseArguments(String[] args) {
    if (args.length == 0) {
      System.out.println(getLongUsage());
      return -1;
    }
   
    Getopt getopt = new Getopt(getProgramName(), args, ":", longOpts, true);
    //Getopt getopt = new Getopt(getCommandString(), argv, "-:h:vf:b:pc:u:t:y:w:drq", longOpts, true);
    //getopt.setOpterr(false); // We'll do our own error handling
 
    int c;
    while ((c = getopt.getopt()) != -1) {
//      log.trace("longind="+g.getLongind());
      switch (c) {
        case 'h' : // --help
          System.out.println(getLongUsage());
          return -1;
        case 'v' : // --version
          System.out.println(getVersionInfo());
          return -1;
        case 's' : // --strip
          stripWhitespace = true;
          break;
        case 'x' : // --xinclude
          xinclude = true;
          break;
        case 'e' : // --explain
          explain = true;
          break;
        case 'n' : // --noexternal
          System.setProperty("nux.xom.xquery.XQuery.allowExternalFunctions", "false");
          break;
        case 'd' : // --debug
          debug = true;
          break;
        case 'p' : // --nobuilderpool
          noBuilderPool = true;
          break;
        case 'N' : // --xomxpath
          xomXPath = true;
          break;
        case 0
          String arg = getopt.getOptarg();
          char val = (char) (new Integer(sb.toString())).intValue();
          String optionName = longOpts[getopt.getLongind()].getName();
//          log.trace("Got long option with value '" + val + "' with argument " + ((arg != null) ? arg : "null"));
          switch (val) {
            case 'q' : // --query
              arg = arg.trim();
              if (arg.startsWith("{") && arg.endsWith("}")) {
                // query is given inline between curly brackets, ala Saxon command line tool
                queries.add(arg.substring(1, arg.length()-1));
              } else {
                if (arg.equals("nop"))
                  queries.add(null); // disable xquery for benchmarking
                else
                  queries.add(parsePath(arg));
              }
              break;
            case 'u' : // --update
              arg = arg.trim();
              if (arg.startsWith("{") && arg.endsWith("}")) {
                // update query is given inline between curly brackets, ala Saxon command line tool
                update = arg.substring(1, arg.length()-1);
              } else {
                update = parsePath(arg);
              }
              break;
            case 'b' : // --base
              baseURI = parsePath(arg).toURI();
              break;
            case 'P' : { // --var
              int i = arg.indexOf(':');
              if (i < 0) throw new UsageException("Missing name:value pair");
              String name = arg.substring(0, i).trim();
              String value = arg.substring(i+1);
              if (false && value.startsWith("doc(") && value.endsWith(")")) {
                try {
                  value = value.substring("doc(".length()-1);
                  value = value.substring(1, value.length()-1);
                  variables.put(name, new Builder().build(new File(value)));
                } catch (Exception e) {
                  throw new UsageException(e);
                }
              } else {
                variables.put(name, value);
              }
              break;
            }
            case 'o' : // --out
              outputFiles.add(parsePath(arg));
              break;
            case 'S' : // --algo
              arg = arg.trim();
              checkValidity(arg, new String[] {
                ResultSequenceSerializer.W3C_ALGORITHM,
                ResultSequenceSerializer.WRAP_ALGORITHM}, optionName);
              algorithm = arg;
              break;
            case 'E' : // --encoding
              encoding = arg.trim();
              break;
            case 'I' : // --indent
              indent = Math.max(0, parseInt(arg, optionName));
              break;
            case 'r' : // --runs
              runs = parseIntGreaterThanZero(arg, optionName);
              break;
            case 'i' : // --iterations
              iterations = Math.max(0, parseInt(arg, optionName));
              break;
            case 'C' : // --docpoolcapacity
              docPoolCapacity = 1024L * 1024L * parseInt(arg, optionName);
              break;         
            case 'D' : // --docpoolcompression
              docPoolCompression = parseInt(arg, optionName);
              break;         
            case 'V' // --validate
              arg = arg.trim();
              checkValidity(arg, new String[] {
                "wf", "dtd", "schema", "relaxng", "html"}, optionName);
              validate = arg;
              break
            case 'W' // --namespace
              namespace = arg.trim();
              break
            case 'w' : { // --schema
              // if the schema file location is a relative path,
              // xerces interprets it relative to the XML instance document file,
              // not the current working directory.
              // This may be surprising and errorprone, so we convert it to an absolute path
              // Also, there are some obscure work arounds to make this work both on Unix and Windows, in all cases...
              schema = parsePath(arg).getAbsoluteFile();
              break;
            }
            case 'f' : // --filterpath
              try {
                filter = new StreamingPathFilter(arg, null);
              } catch (StreamingPathFilterException e) {
                throw new UsageException(e);
              }
              break
            case 'F' : // --filterquery
              arg = arg.trim();
              if (arg.startsWith("{") && arg.endsWith("}")) {
                // query is given inline between curly brackets, ala Saxon command line tool
                filterQuery = arg.substring(1, arg.length()-1);
              } else {
                try {
                  filterQuery = FileUtil.toString(
                      new FileInputStream(parsePath(arg)), null);
                } catch (IOException e) {
                  throw new UsageException(e);
                }
              }
              break;
////            case 'l' : // --loglevel
////              setLogLevels(toLevel(arg));
////              break;
            default :
              throw new InternalError("Oops. Should never reach here. val='" + val + "'");
          }       
          break;
        case ':' :
          throw new UsageException("Option '" + longOpts[getopt.getLongind()].getName() + "' requires an argument");
          //throw new UsageException("Argument missing for option " + (char) g.getOptopt() + ", errname=" + longopts[g.getLongind()].getName());
        case '?' :
          System.err.println(getLongUsage());
          return -1;
//          throw new UsageException("The option '" + (char) g.getOptopt() + "' is not valid");
        default :
          throw new InternalError("Oops. Should never reach here. getopt() returned '" + (char) c + "'");
      }
    }
 
    if (queries.size() == 0) throw new UsageException("Missing required argument --query");
    inputFiles = parseNonOptionArguments(args, getopt.getOptind(), true, 0, Integer.MAX_VALUE);
    if (inputFiles.length == 0) inputFiles = new String[] { null };
   

    // fill in default, if necessary
    File file = null;
    if (outputFiles.size() > 0) file = (File) outputFiles.get(0);
    while (outputFiles.size() < inputFiles.length) outputFiles.add(file);
   
    if (filterQuery == null) filterQuery = ".";
    if (update != null) docPoolCompression = Math.max(0, docPoolCompression);
   
    if (xomXPath) {
      for (int i=0; i < queries.size(); i++) {
        Object query = queries.get(i);
        if (query instanceof File) {
          try {
            query = FileUtil.toString(new FileInputStream((File)query), null);
          } catch (IOException e) {
            throw new UsageException(e);
          }
        }
        queries.set(i, query);
      }
    }
   
    return 0;
  }
   
  /**
   * Execute the query.
   */ 
  protected void run() throws Exception {   
    try {
      final boolean isBench2 = (runs > 1);
      final boolean isBench = (runs > 1 && iterations > 0);     
      final DocumentPool docPool = createDocumentPool(isBench);

      DocumentURIResolver resolver = new DocumentURIResolver() {
        public Document resolve(String href, String baseURI) throws ParsingException, IOException, TransformerException {
          String systemID = new net.sf.saxon.StandardURIResolver(null).
            resolve(href, baseURI).getSystemId();
//          System.err.println(systemID);
          return docPool.getDocument(URI.create(systemID));
        }
      };

      // prepare XQuery pool
      XQueryPool queryPool = new XQueryPool(
          new PoolConfig(), new XQueryFactory(null, resolver));
     
      ResultSequenceSerializer serializer = new ResultSequenceSerializer();
      serializer.setAlgorithm(algorithm);
      serializer.setEncoding(encoding);
      serializer.setIndent(indent);
     
      // now do the real work
      long runsStart = System.currentTimeMillis();
      for (int run=0; run < runs; run++) {
        if (isBench) {
          System.out.println("\n\n******************************************");
          System.out.println("run = " + run + ":");
        }
        for (int i=0; i < queries.size(); i++) {
          long start = System.currentTimeMillis();
          long serializationTime = 0;
          Object query = queries.get(i);
          XQuery xquery;
          if (query instanceof String) {
            xquery = queryPool.getXQuery((String)query, baseURI);
          } else if (query instanceof File) {
            xquery = queryPool.getXQuery((File)query, baseURI);
          } else {
            xquery = null; // disable XQuery for benchmarking
          }
         
          if (isBench) {
            System.out.println("query = " +query);
          }
          if (explain && run == 0 && xquery != null) {
            System.out.println("explain = \n" + xquery.explain());
          }
         
          XQuery morpher;
          if (update instanceof String) {
            morpher = queryPool.getXQuery((String)update, null);
          } else if (update instanceof File) {
            morpher = queryPool.getXQuery((File)update, null);
          } else {
            morpher = null;
          }
         
          int numSerials = 0;
          for (int j=0; j < inputFiles.length; j++) {
            Document doc = null;
            if (inputFiles[j] != null) {
              doc = docPool.getDocument(new File(inputFiles[j]));
            }
            if (explain && doc != null) {
              System.out.println("stats=" + toStatisticsString(doc));
            }

            for (int iter=0; iter < iterations; iter++) {
              Document doc2 = doc;
              if (morpher != null && doc2 != null) {
                doc2 = new Document(doc2); // immutable for multiple iterations
              }
             
              // run the query
              Nodes results;
              if (xomXPath) {
                if (doc2 == null) throw new UsageException(
                  "A context node is required by XOM's XPath engine, but missing.");
                results = doc2.query((String)query);
              } else if (xquery != null) {
                results = xquery.execute(doc2, null, variables).toNodes();
              } else {
                results = new Nodes(); // disable XQuery for benchmarking
                results.append(doc2);
              }
             
              if (morpher != null) {
                // interpret --query as select, interpret --update as morpher
                for (int k=0; doc2 == null && k < results.size(); k++) {
                  doc2 = results.get(k).getDocument();
                }
                XQueryUtil.update(results, morpher, null);
               
                // serialize modified document if there is one
                results = new Nodes();
                if (doc2 != null) results.append(doc2);
              }
             
              // serialize results onto output, if any
              File f = (File) outputFiles.get(j);
              OutputStream out = System.out;
              if (f != null) {
                if (f.getAbsolutePath().equals("/dev/null")) continue;
                out = new FileOutputStream(f);
              }
             
              long serializationStart = System.currentTimeMillis();
              serializer.write(results, out);
              if (out != System.out && out != System.err) out.close();
              serializationTime += System.currentTimeMillis() - serializationStart;
              numSerials++;
            }
          }
          if (isBench && iterations > 0) {
            long end = System.currentTimeMillis();
            System.out.println("\nsecs = " + ((end-start) / 1000.0f));
            System.out.println("queries/sec = " + (inputFiles.length * iterations / ((end-start) / 1000.0f)));
            if (numSerials > 0) {
              System.out.println("\nserialization secs = " + (serializationTime / 1000.0f));
              System.out.println("serializations/sec = " + (numSerials / (serializationTime / 1000.0f)));
            }
          }
        }
      }
      if (isBench2) {
        long runsEnd = System.currentTimeMillis();
        System.out.println("\n\n******************************************");
        System.out.println("total secs = " + ((runsEnd-runsStart) / 1000.0f));
        System.out.println("runs/sec = " + (runs / ((runsEnd-runsStart) / 1000.0f)));
      }
    } catch (RuntimeException e) { // report stack trace only if requested
      if (debug) {
        if (e instanceof UsageException) e = new RuntimeException(e);
        throw e;
      }
      throw new UsageException(e);  
    } catch (Exception e) {
      if (debug) throw e;
      throw new UsageException(e);  
    }
  }

  private DocumentPool createDocumentPool(final boolean isBench) {
    // prepare BuilderPool
    PoolConfig config = new PoolConfig();
    if (noBuilderPool) config.setMaxEntries(0);
    final BuilderPool builderPool;
   
    if (filter == null) {
      builderPool = new BuilderPool(config, new BuilderFactory());
    } else {
      BuilderFactory builderFactory = new BuilderFactory() {
        protected Builder newBuilder(XMLReader parser, boolean validate) {
          StreamingTransform myTransform = new StreamingTransform() {
            public Nodes transform(Element subtree) {
              return XQueryUtil.xquery(subtree, filterQuery);
            }
          };
          return new Builder(parser, validate, filter.createNodeFactory(null, myTransform));    
        }
      };
      builderPool = new BuilderPool(config, builderFactory);
    }

    // prepare DocumentFactory and DocumentPool
    DocumentFactory docFactory = new DocumentFactory() {
      public Document createDocument(InputStream input, URI baseURI)
          throws ParsingException, IOException {
        long start = System.currentTimeMillis();
        Document doc;
        if (baseURI != null && baseURI.getPath().endsWith(".bnux")) {
          if (filter == null) {
            doc = getBinaryXMLFactory().createDocument(input, baseURI);
          } else {
            StreamingTransform myTransform = new StreamingTransform() {
              public Nodes transform(Element subtree) {
                return XQueryUtil.xquery(subtree, filterQuery);
              }
            };
 
            if (input == null && baseURI == null)
              throw new IllegalArgumentException("input and baseURI must not both be null");
            if (input == null) input = baseURI.toURL().openStream();
            try {
              doc = new BinaryXMLCodec().deserialize(input, filter.createNodeFactory(null, myTransform));
              if (baseURI != null) doc.setBaseURI(baseURI.toASCIIString());
            } finally {
              input.close(); // do what SAX XML parsers do
            }
          }
        } else {
          doc = super.createDocument(input, baseURI);
        }
        if (xinclude) {
          try {
            XIncluder.resolveInPlace(doc, newBuilder());
          } catch (XIncludeException e) {
            throw new ParsingException(e.getMessage(), e);
          }
        }
        if (stripWhitespace) XOMUtil.Normalizer.STRIP.normalize(doc);
        long end = System.currentTimeMillis();
        if (isBench || explain) System.out.println(baseURI + " parse [ms]=" + (end-start));
        return doc;
      }
     
      protected Builder newBuilder() {
        if (validate.equals("wf")) {
          return builderPool.getBuilder(false);
        } else if (validate.equals("dtd")) {
          if (schema == null) return builderPool.getBuilder(true);
          EntityResolver resolver;
          try {
            resolver = new BuilderFactory().createResolver(
                  new FileInputStream(schema));
          } catch (IOException e) {
            throw new UsageException(e);
          }
          return builderPool.getDTDBuilder(resolver);
        } else if (validate.equals("schema")) {
          HashMap map = new HashMap();
          if (schema != null) map.put(schema, namespace);
//          return new BuilderFactory().createW3CBuilder(map);
          return builderPool.getW3CBuilder(map);
        } else if (validate.equals("relaxng")) {
          if (schema == null) throw new UsageException(
              "Missing required argument --schema");
          return builderPool.getMSVBuilder(schema.toURI());
        } else if (validate.equals("html")) {
          XMLReader parser;
          try {
            parser = (XMLReader) Class.forName("org.ccil.cowan.tagsoup.Parser").newInstance();
          } catch (Exception e) {
            throw new UsageException(e);
          }
          return new Builder(parser);
        } else {
          throw new UsageException("Illegal validate option: " + validate);
        }
      }
    };
   
    return new DocumentPool(
      new DocumentMap(
        new PoolConfig().
          setCompressionLevel(docPoolCompression).
          setCapacity(docPoolCapacity)),
        docFactory);
  }
 
  protected String getMailAddress() { return "wolfgang.DOT.hoschek.AT.mac.DOT.com"; }
  protected String getProgramName() { return "fire-xquery"; }
  protected String getHomepage()    { return "http://dsd.lbl.gov/nux"; }
  protected String getVersion() {
    String s = "[";
    if (Package.getPackage("nux.xom.xquery") != null) {
      String version = Package.getPackage("nux.xom.xquery").getImplementationVersion();
      if (version != null) s += "nux-" + version + ", ";
    }
    s += "saxon-" +  net.sf.saxon.Version.getProductVersion();
    if (Package.getPackage("nu.xom") != null) {
      String version = Package.getPackage("nu.xom").getImplementationVersion();
      if (version != null) s += ", xom-" + version;
    }
    s += "]";
    return s;
  }
 
  /** Parses OS insensitive file path, stripping off leading URI scheme, if any */
  private static File parsePath(String path) {
    path = (path == null ? "" : path.trim());
    if (path.startsWith("file://"))  {
      path = path.substring("file://".length());
    } else if (path.startsWith("file:")) {
      path = path.substring("file:".length())
    }
   
    if (path.length() == 0 || path.equals(".")) {
      path = System.getProperty("user.dir", "."); // CWD
    } else
      // convert separators to native format
      path = path.replace('\\', File.separatorChar);
      path = path.replace('/',  File.separatorChar);
     
      if (path.startsWith("~")) {
        // substitute Unix style home dir: ~ --> user.home
        String home = System.getProperty("user.home", "~");
        path = home + path.substring(1);
      }
    }
   
    return new File(path);
  }

  /**
   * Returns a statistical summary of the given node (subtree) for
   * experimental/analytical purposes.
   *
   * @param node
   *            the node (subtree) for which to calculate statistics
   * @return a summary representation
   */
  private static String toStatisticsString(Node node) {
    Statistics stats = new Statistics();
    toStatisticsString(node, stats);
   
    NumberFormat f = NumberFormat.getPercentInstance();
    f.setMaximumFractionDigits(2);
    double nodes = stats.nodes * 1.0;
    double chars = stats.chars * 1.0;
   
    return
    "[" +
    "nodes=" + stats.nodes +
    ", elements=" + f.format(stats.elements / nodes) +
    ", attributes=" + f.format(stats.attributes / nodes) +
    ", texts=" + f.format(stats.texts / nodes) +
    ", comments=" + f.format(stats.comments / nodes) +
    ", pis=" + f.format(stats.pis / nodes) +
    ", docTypes=" + f.format(stats.docTypes / nodes) +
   
    ", chars=" + stats.chars +
    ", tagChars=" + f.format(stats.tagChars / chars) +
    ", whitespaceChars=" + f.format(stats.whitespaceChars / chars) +
    ", nonASCIIChars=" + f.format(stats.nonASCIIChars / chars) +
//    ", memorySizeMB=" + (getMemorySize(node) / (1024.0f * 1024.0f)) +
    "]";
  }
 
  private static void toStatisticsString(Node node, Statistics stats) {
    stats.nodes++;
    String value = "";
    if (node instanceof ParentNode) {
      ParentNode parent = (ParentNode) node;
      for (int i=0; i < parent.getChildCount(); i++) {
        toStatisticsString(parent.getChild(i), stats);
      }
      if (node instanceof Element) {
        stats.elements++;
        Element elem = (Element) node;
        value = elem.getQualifiedName();
        stats.tagChars += value.length();
        for (int j=0; j < elem.getAttributeCount(); j++) {
          toStatisticsString(elem.getAttribute(j), stats);
        }
        // TODO: include additional namespace declarations?
      }
    }
    else {
      if (node instanceof Text) {
        stats.texts++;
      } else if (node instanceof Attribute) {
        stats.attributes++;
        stats.tagChars += ((Attribute) node).getQualifiedName().length();
      } else if (node instanceof Comment) {
        stats.comments++;
      } else if (node instanceof ProcessingInstruction) {
        stats.pis++;
      } else if (node instanceof DocType) {
        stats.docTypes++;
      }
      value = node.toXML();
    }
   
    stats.chars += value.length();
    for (int i=0; i < value.length(); i++) {
      if (isWhitespace(value.charAt(i))) stats.whitespaceChars++;
      if (value.charAt(i) > 127 || value.charAt(i) < 0) stats.nonASCIIChars++;
    }
  }
 
  /** see XML spec */
  private static boolean isWhitespace(char c) {
    switch (c) {
      case '\t': return true;
      case '\n': return true;
      case '\r': return true;
      case ' ' : return true;
      default  : return false;     
    }
  }
 
  private static final class Statistics {
    private int nodes;
    private int elements;
    private int texts;
    private int comments;
    private int attributes;
    private int pis;
    private int docTypes;
//    private int namespaces;
    private long chars;
    private long tagChars;
    private long whitespaceChars;
    private long nonASCIIChars;
  }
 
}
TOP

Related Classes of nux.xom.tests.XQueryCommand$Statistics

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.