Package org.apache.html.dom

Examples of org.apache.html.dom.HTMLDocumentImpl.createDocumentFragment()


    Metadata tikamd = new Metadata();

    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    tikamd.set(Metadata.CONTENT_TYPE, mimeType);
    try {
      parser.parse(new ByteArrayInputStream(raw), domhandler, tikamd,context);
View Full Code Here


    /** Main. */
    public static void main(String[] argv) throws Exception {
        DOMFragmentParser parser = new DOMFragmentParser();
        HTMLDocument document = new HTMLDocumentImpl();
        for (int i = 0; i < argv.length; i++) {
            DocumentFragment fragment = document.createDocumentFragment();
            parser.parse(argv[i], fragment);
            print(fragment, "");
        }
    } // main(String[])

View Full Code Here

    private void doTest(final String html, final String expected) throws Exception {
        DOMFragmentParser parser = new DOMFragmentParser();
        HTMLDocument document = new HTMLDocumentImpl();

        DocumentFragment fragment = document.createDocumentFragment();
        InputSource source = new InputSource(new StringReader(html));
        parser.parse(source, fragment);
//        final OutputFormat of = new OutputFormat();
//        of.setOmitXMLDeclaration(true);
//        XMLSerializer s = new XMLSerializer(of);
View Full Code Here

    else return parseNeko(input);
  }

  private DocumentFragment parseTagSoup(InputSource input) throws Exception {
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    DocumentFragment frag = doc.createDocumentFragment();
    DOMBuilder builder = new DOMBuilder(doc, frag);
    org.ccil.cowan.tagsoup.Parser reader = new org.ccil.cowan.tagsoup.Parser();
    reader.setContentHandler(builder);
    reader.setFeature(org.ccil.cowan.tagsoup.Parser.ignoreBogonsFeature, true);
    reader.setFeature(org.ccil.cowan.tagsoup.Parser.bogonsEmptyFeature, false);
View Full Code Here

          LOG.isTraceEnabled());
    } catch (SAXException e) {}
    // convert Document to DocumentFragment
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment res = doc.createDocumentFragment();
    DocumentFragment frag = doc.createDocumentFragment();
    parser.parse(input, frag);
    res.appendChild(frag);

    try {
View Full Code Here

    } catch (SAXException e) {}
    // convert Document to DocumentFragment
    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment res = doc.createDocumentFragment();
    DocumentFragment frag = doc.createDocumentFragment();
    parser.parse(input, frag);
    res.appendChild(frag);

    try {
      while(true) {
View Full Code Here

    parser.parse(input, frag);
    res.appendChild(frag);

    try {
      while(true) {
        frag = doc.createDocumentFragment();
        parser.parse(input, frag);
        if (!frag.hasChildNodes()) break;
        if (LOG.isInfoEnabled()) {
          LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes.");
        }
View Full Code Here

    Metadata tikamd = new Metadata();

    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    // to add once available in Tika
    // context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
    try {
View Full Code Here

        XMLDocumentFilter[] filters = { remover };
        parser.setProperty(
            "http://cyberneko.org/html/properties/filters",
            filters);
        HTMLDocument document = new HTMLDocumentImpl();
        DocumentFragment fragment = document.createDocumentFragment();

        InputSource inputSource = new InputSource(new StringReader(html));
        parser.parse(inputSource, fragment);
        StringWriter writer = new StringWriter();
        OutputFormat format = new OutputFormat();
View Full Code Here

    Metadata tikamd = new Metadata();

    HTMLDocumentImpl doc = new HTMLDocumentImpl();
    doc.setErrorChecking(false);
    DocumentFragment root = doc.createDocumentFragment();
    DOMBuilder domhandler = new DOMBuilder(doc, root);
    ParseContext context = new ParseContext();
    try {
      parser.parse(new ByteArrayInputStream(raw), domhandler, tikamd,context);
    } catch (Exception e) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.