package net.sf.jpluck.handlers;
import java.net.URI;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import net.sf.jpluck.jxl.BookmarkProcessor;
import net.sf.jpluck.plucker.Bookmark;
import net.sf.jpluck.plucker.TextRecord;
import net.sf.jpluck.spider.Resource;
import net.sf.jpluck.xml.NamespaceURI;
import net.sf.jpluck.xml.TextRecordResult;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public abstract class DOMHandler extends ContentHandler {
public DOMHandler(net.sf.jpluck.plucker.Document pluckerDocument, net.sf.jpluck.jxl.Document jxlDocument,
Resource resource) {
super(pluckerDocument, jxlDocument, resource);
}
public void handle() throws HandlingException {
try {
org.w3c.dom.Document dom = parseDocument();
dom = jxlDocument.transform(dom, resource.getURI(), pluckerDocument);
Element element = dom.getDocumentElement();
if (NamespaceURI.JPLUCK.equals(element.getNamespaceURI()) &&
element.getLocalName().equals("pages")) {
NodeList nodeList = element.getElementsByTagNameNS(NamespaceURI.JPLUCK, "page");
URI baseURI = URI.create(resource.getURI());
for (int i = 0, n = nodeList.getLength(); i < n; i++) {
Element page = (Element) nodeList.item(i);
String uri = page.getAttribute("uri");
if (uri.length() == 0) {
throw new HandlingException("No uri found in <jpluck:page> element.");
}
uri = baseURI.resolve(uri).toString();
addTextRecord(uri, null, page);
String bookmark = page.getAttribute("bookmark");
if (bookmark.length() > 0) {
pluckerDocument.addBookmark(bookmark, uri);
}
}
} else {
addTextRecord(resource.getURI(), resource.getRedirectionURI(), dom);
}
} catch (HandlingException e) {
throw e;
} catch (Exception e) {
throw new HandlingException(e);
}
}
private void addTextRecord(String uri, String redirectionURI, Node node)
throws Exception {
TextRecord textRecord = new TextRecord(((redirectionURI != null) ? redirectionURI : uri),
jxlDocument.getOutputEncoding(), jxlDocument.isUseHiresMargins());
if (redirectionURI != null) {
textRecord.setAlternateURI(uri);
}
TextRecordResult result = new TextRecordResult(textRecord, jxlDocument.getURIRewriter(),
jxlDocument.getTextColorBrightness(),
jxlDocument.isParseTables(), pluckerDocument);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(node), result);
pluckerDocument.addRecord(textRecord);
String[] linkURIs = result.getLinkURIs();
for (int i = 0; i < linkURIs.length; i++) {
String linkURI = linkURIs[i];
addPageLink(linkURI);
}
String[] imageURIs = result.getEmbeddedImageURIs();
for (int i = 0; i < imageURIs.length; i++) {
String imageURI = imageURIs[i];
addImageLink(imageURI);
}
// Add bookmarks defined by the page.
Bookmark[] bookmarks = result.getBookmarks();
for (int i = 0; i < bookmarks.length; i++) {
Bookmark bookmark = bookmarks[i];
pluckerDocument.addBookmark(bookmark);
}
// Process AutoBookmarks
BookmarkProcessor processor = jxlDocument.getBookmarkProcessor();
String title = result.getTitle();
processor.process(pluckerDocument, uri, title);
}
protected abstract org.w3c.dom.Document parseDocument()
throws SAXException;
}