Package org.webharvest.utils

Source Code of org.webharvest.utils.XmlUtil

package org.webharvest.utils;

import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.webharvest.runtime.variables.ListVariable;
import org.webharvest.runtime.variables.NodeVariable;
import org.webharvest.runtime.RuntimeConfig;
import org.webharvest.exception.ScraperXPathException;

import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.dom.DOMSource;
import java.io.*;

import net.sf.saxon.query.StaticQueryContext;
import net.sf.saxon.query.XQueryExpression;
import net.sf.saxon.query.DynamicQueryContext;
import net.sf.saxon.Configuration;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.om.SequenceIterator;
import net.sf.saxon.om.Item;

/**
* XML utils - contains common logic for XML handling
*/
public class XmlUtil {

    public static void prettyPrintXml(Document doc, Writer writer) throws IOException {
        try {
            DOMSource domSource = new DOMSource(doc);
            StreamResult streamResult = new StreamResult(writer);
            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            serializer.setOutputProperty(OutputKeys.METHOD, "xml");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.transform(domSource, streamResult);
        } catch (TransformerException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    public static String prettyPrintXml(String xmlAsString) throws IOException, ParserConfigurationException, SAXException {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse( new InputSource(new StringReader(xmlAsString)) );
        StringWriter writer = new StringWriter();

        prettyPrintXml(doc, writer);

        String result = writer.toString();

        writer.close();
        return result;
    }

    /**
     * Evaluates specified XPath expression against given XML text and using given runtime configuration.
     * @param xpath
     * @param xml
     * @param runtimeConfig
     * @return Instance of ListVariable that contains results.
     * @throws XPathException
     */
    public static ListVariable evaluateXPath(String xpath, String xml, RuntimeConfig runtimeConfig) throws XPathException {
        StaticQueryContext sqc = runtimeConfig.getStaticQueryContext();
        Configuration config = sqc.getConfiguration();

        XQueryExpression exp = runtimeConfig.getXQueryExpressionPool().getCompiledExpression(xpath);
        DynamicQueryContext dynamicContext = new DynamicQueryContext(config);
        StringReader reader = new StringReader(xml);

        dynamicContext.setContextItem(sqc.buildDocument(new StreamSource(reader)));

        return createListOfXmlNodes(exp, dynamicContext);
    }

    /**
     * Creates list variable of resulting XML nodes.
     * @param exp
     * @param dynamicContext
     * @return
     * @throws XPathException
     */
    public static ListVariable createListOfXmlNodes(XQueryExpression exp, DynamicQueryContext dynamicContext) throws XPathException {
        final SequenceIterator iter = exp.iterator(dynamicContext);

        ListVariable listVariable = new ListVariable();
        while (true) {
            Item item = iter.next();
            if (item == null) {
                break;
            }

            XmlNodeWrapper value = new XmlNodeWrapper(item);
            listVariable.addVariable( new NodeVariable(value) );
        }

        return listVariable;
    }

}
TOP

Related Classes of org.webharvest.utils.XmlUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.