Package net.sf.jpluck.plucker.parsing.html

Examples of net.sf.jpluck.plucker.parsing.html.HTMLSerializer


public class TextRecordResult extends SAXResult {
    private HTMLSerializer serializer;

    public TextRecordResult(TextRecord textRecord, URIRewriter uriRewriter, int textColorBrightness, boolean parseTables,
    Document document) {
        serializer = new HTMLSerializer(textRecord, uriRewriter, textColorBrightness, parseTables, document);
        setHandler(serializer);
       
    }
View Full Code Here


        for (int i = 0, n = items.length; i < n; i++) {
          String uri = resource.getURI() + "_item-" + (i + 1);
          TextRecord textRecord = new TextRecord(uri,
                               jxlDocument.getOutputEncoding(),
                               jxlDocument.isUseHiresMargins());
          HTMLSerializer serializer = new HTMLSerializer(textRecord, jxlDocument.getURIRewriter(),
                                   settings.getTextColorBrightness(), settings.isParseTables(),
                                   pluckerDocument);
          Transformer transformer = TransformerFactory.newInstance().newTransformer();
          transformer.transform(new DOMSource(formatter.getItemDocuments()[i]), new SAXResult(serializer));
          pluckerDocument.addRecord(serializer.getDataRecord());
         
          String[] pageURIs = serializer.getLinkURIs();
          for (int j = 0; j < pageURIs.length; j++) {
            if (!pageURIs[j].startsWith(resource.getURI() + "_item-")) {
              addPageLink(pageURIs[j]);
            }
          }

          String[] imageURIs = serializer.getEmbeddedImageURIs();
          for (int j = 0; j < imageURIs.length; j++) {
            addImageLink(imageURIs[j]);
          }

          if (generateBookmarks) {
View Full Code Here

TOP

Related Classes of net.sf.jpluck.plucker.parsing.html.HTMLSerializer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.