Package net.sf.jpluck.spider

Examples of net.sf.jpluck.spider.Resource.parseHTML()


      in.read(data);
      in.close();
    }
    System.out.println("Parsing HTML");
    Resource resource = new Resource(html, null, new ContentType("text/html"), data, 0, false);
    Document document = resource.parseHTML();
    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    File file = new File(xml);
    System.out.println("Writing " + file.getAbsolutePath());
    transformer.setOutputProperty(OutputKeys.METHOD, "xml");
    transformer.setOutputProperty(OutputKeys.ENCODING, encoding);
View Full Code Here


        }
        byte[] data = baos.toByteArray();
        Document document = new Document();
        if (mimeType.equals("text/html")) {
      Resource resource = new Resource("http://127.0.0.1/", null, new ContentType(mimeType), data, 0, false);
      org.w3c.dom.Document dom = resource.parseHTML();
      TextRecord textRecord = new TextRecord("http://127.0.0.1/");
      Transformer transformer = TransformerFactory.newInstance().newTransformer();
      TextRecordResult result = new TextRecordResult(textRecord, new URIRewriter(), 100, false, document);
      transformer.transform(new DOMSource(dom), result);
      document.addRecord(textRecord);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.