Package org.cyberneko.html

Examples of org.cyberneko.html.HTMLTagBalancer$ElementEntry


   */
  private DocumentHandler parseHtmlImpl(String source) throws IOException {
    HTMLConfiguration config = newConfiguration();

    HTMLScanner htmlScanner = new HTMLScanner();
    HTMLTagBalancer tagBalancer = new HTMLTagBalancer();

    DocumentHandler handler = newDocumentHandler(source, htmlScanner);

    if (config.getFeature("http://xml.org/sax/features/namespaces")) {
      NamespaceBinder namespaceBinder = new NamespaceBinder();
      namespaceBinder.setDocumentHandler(handler);
      namespaceBinder.setDocumentSource(tagBalancer);
      namespaceBinder.reset(config);
      tagBalancer.setDocumentHandler(namespaceBinder);
    } else {
      tagBalancer.setDocumentHandler(handler);
    }

    tagBalancer.setDocumentSource(htmlScanner);
    htmlScanner.setDocumentHandler(tagBalancer);

    tagBalancer.reset(config);
    htmlScanner.reset(config);

    XMLInputSource inputSource = new XMLInputSource(null, null, null);
    inputSource.setEncoding("UTF-8");
    inputSource.setCharacterStream(new StringReader(source));
View Full Code Here


  }

  @Override
  protected Document parseDomImpl(String source) {
    HTMLScanner htmlScanner = new HTMLScanner();
    HTMLTagBalancer tagBalancer = new HTMLTagBalancer();
    DocumentHandler handler = new DocumentHandler(source);
    tagBalancer.setDocumentHandler(handler);
    htmlScanner.setDocumentHandler(tagBalancer);

    HTMLConfiguration config = new HTMLConfiguration();
    // Maintain original case for elements and attributes
    config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
    config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
    // Parse as fragment.
    config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
    // Get notified of entity and character references
    config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
    config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
    tagBalancer.reset(config);
    htmlScanner.reset(config);
    XMLInputSource inputSource = new XMLInputSource(null, null, null);
    inputSource.setEncoding("UTF-8");
    inputSource.setCharacterStream(new StringReader(source));
    try {
View Full Code Here

TOP

Related Classes of org.cyberneko.html.HTMLTagBalancer$ElementEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.