Package net.sf.jpluck.handlers

Source Code of net.sf.jpluck.handlers.DOMHandler

package net.sf.jpluck.handlers;

import java.net.URI;

import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;

import net.sf.jpluck.jxl.BookmarkProcessor;
import net.sf.jpluck.plucker.Bookmark;
import net.sf.jpluck.plucker.TextRecord;
import net.sf.jpluck.spider.Resource;
import net.sf.jpluck.xml.NamespaceURI;
import net.sf.jpluck.xml.TextRecordResult;

import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;


public abstract class DOMHandler extends ContentHandler {
    public DOMHandler(net.sf.jpluck.plucker.Document pluckerDocument, net.sf.jpluck.jxl.Document jxlDocument,
                      Resource resource) {
        super(pluckerDocument, jxlDocument, resource);
    }

    public void handle() throws HandlingException {
        try {
            org.w3c.dom.Document dom = parseDocument();
            dom = jxlDocument.transform(dom, resource.getURI(), pluckerDocument);
            Element element = dom.getDocumentElement();
            if (NamespaceURI.JPLUCK.equals(element.getNamespaceURI()) &&
                        element.getLocalName().equals("pages")) {
                NodeList nodeList = element.getElementsByTagNameNS(NamespaceURI.JPLUCK, "page");
                URI baseURI = URI.create(resource.getURI());
                for (int i = 0, n = nodeList.getLength(); i < n; i++) {
                    Element page = (Element) nodeList.item(i);
                    String uri = page.getAttribute("uri");
                    if (uri.length() == 0) {
                        throw new HandlingException("No uri found in <jpluck:page> element.");
                    }
                    uri = baseURI.resolve(uri).toString();
                    addTextRecord(uri, null, page);
                    String bookmark = page.getAttribute("bookmark");
                    if (bookmark.length() > 0) {
                      pluckerDocument.addBookmark(bookmark, uri);
                    }
                }
            } else {
                addTextRecord(resource.getURI(), resource.getRedirectionURI(), dom);
            }
        } catch (HandlingException e) {
            throw e;
        } catch (Exception e) {
            throw new HandlingException(e);
        }
    }

    private void addTextRecord(String uri, String redirectionURI, Node node)
                        throws Exception {
        TextRecord textRecord = new TextRecord(((redirectionURI != null) ? redirectionURI : uri),
                                               jxlDocument.getOutputEncoding(), jxlDocument.isUseHiresMargins());
        if (redirectionURI != null) {
            textRecord.setAlternateURI(uri);
        }
        TextRecordResult result = new TextRecordResult(textRecord, jxlDocument.getURIRewriter(),
                                                       jxlDocument.getTextColorBrightness(),
                                                       jxlDocument.isParseTables(), pluckerDocument);
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
        transformer.transform(new DOMSource(node), result);

        pluckerDocument.addRecord(textRecord);

        String[] linkURIs = result.getLinkURIs();
        for (int i = 0; i < linkURIs.length; i++) {
            String linkURI = linkURIs[i];
            addPageLink(linkURI);
        }

        String[] imageURIs = result.getEmbeddedImageURIs();
        for (int i = 0; i < imageURIs.length; i++) {
            String imageURI = imageURIs[i];
            addImageLink(imageURI);
        }
       
        // Add bookmarks defined by the page.
        Bookmark[] bookmarks = result.getBookmarks();
        for (int i = 0; i < bookmarks.length; i++) {
      Bookmark bookmark = bookmarks[i];
      pluckerDocument.addBookmark(bookmark);
    }
       
    // Process AutoBookmarks
        BookmarkProcessor processor = jxlDocument.getBookmarkProcessor();
        String title = result.getTitle();
        processor.process(pluckerDocument, uri, title);
    }

    protected abstract org.w3c.dom.Document parseDocument()
                                                   throws SAXException;
}
TOP

Related Classes of net.sf.jpluck.handlers.DOMHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.