Package org.cyberneko.html

Examples of org.cyberneko.html.HTMLConfiguration

This configuration recognizes the following properties:

For complete usage information, refer to the documentation. @see HTMLScanner @see HTMLTagBalancer @see HTMLErrorReporter @author Andy Clark @version $Id: HTMLConfiguration.java,v 1.9 2005/02/14 03:56:54 andyc Exp $


    // Constructors
    //

    /** Default constructor. */
    public SAXParser() {
        super(new HTMLConfiguration());
    } // <init>()
View Full Code Here


    // Constructors
    //

    /** Default constructor. */
    public DOMFragmentParser() {
        fParserConfiguration = new HTMLConfiguration();
        fParserConfiguration.addRecognizedFeatures(RECOGNIZED_FEATURES);
        fParserConfiguration.addRecognizedProperties(RECOGNIZED_PROPERTIES);
        fParserConfiguration.setFeature(DOCUMENT_FRAGMENT, true);
        fParserConfiguration.setDocumentHandler(this);
    } // <init>()
View Full Code Here

    // Constructors
    //

    /** Default constructor. */
    public SAXParser() {
        super(new HTMLConfiguration());
    } // <init>()
View Full Code Here

    // Constructors
    //

    /** Default constructor. */
    public DOMFragmentParser() {
        fParserConfiguration = new HTMLConfiguration();
        fParserConfiguration.addRecognizedFeatures(RECOGNIZED_FEATURES);
        fParserConfiguration.addRecognizedProperties(RECOGNIZED_PROPERTIES);
        fParserConfiguration.setFeature(DOCUMENT_FRAGMENT, true);
        fParserConfiguration.setDocumentHandler(this);
    } // <init>()
View Full Code Here

    // Constructors
    //

    /** Default constructor. */
    public DOMParser() {
        super(new HTMLConfiguration());
        /*** extending DOMParser ***/
        try {
            setProperty("http://apache.org/xml/properties/dom/document-class-name",
                                       "org.apache.html.dom.HTMLDocumentImpl");
        }
View Full Code Here

    public static void main(String[] argv) throws Exception {
        if (argv.length == 0) {
            printUsage();
            System.exit(1);
        }
        XMLParserConfiguration parser = new HTMLConfiguration();
        parser.setFeature(NOTIFY_CHAR_REFS, true);
        parser.setFeature(NOTIFY_HTML_BUILTIN_REFS, true);
        String encoding = "Windows-1252";
        boolean identity = false;
        boolean purify = false;
        for (int i = 0; i < argv.length; i++) {
            String arg = argv[i];
            if (arg.equals("-e")) {
                encoding = argv[++i];
                continue;
            }
            if (arg.equals("-i")) {
                identity = true;
                continue;
            }
            if (arg.equals("-p")) {
                purify = true;
                continue;
            }
            if (arg.equals("-h")) {
                printUsage();
                System.exit(1);
            }
            java.util.Vector filtersVector = new java.util.Vector(2);
            if (identity) {
                filtersVector.addElement(new Identity());
            }
            else if (purify) {
                filtersVector.addElement(new Purifier());
            }
            filtersVector.addElement(new Writer(System.out, encoding));
            XMLDocumentFilter[] filters =
                new XMLDocumentFilter[filtersVector.size()];
            filtersVector.copyInto(filters);
            parser.setProperty(FILTERS, filters);
            parser.parse(new XMLInputSource(null, arg, null));
        }
    } // main(String[])
View Full Code Here

    private StringBuffer buffer;

    public HTMLParser() {

        super(new HTMLConfiguration());
    }
View Full Code Here

   *
   * @return
   * @throws ServletException
   */
  protected HTMLConfiguration getHtmlConfig() {
    HTMLConfiguration _config = new HTMLConfiguration();
    try {
      if (this.getPublicid() != null || this.getSystemid() != null) {
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-doctype",
            true);
        _config.setFeature(
            "http://cyberneko.org/html/features/override-doctype",
            true);
      }
      if (this.getPublicid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/pubid",
            getPublicid());

      }
      if (this.getSystemid() != null) {
        _config.setProperty(
            "http://cyberneko.org/html/properties/doctype/sysid",
            getSystemid());

      }
      if (this.getNamespace() != null) {
        _config.setFeature("http://xml.org/sax/features/namespaces",
            true);
        _config
            .setFeature(
                "http://cyberneko.org/html/features/override-namespaces",
                true);
        _config.setFeature(
            "http://cyberneko.org/html/features/insert-namespaces",
            true);
        _config.setProperty(
            "http://cyberneko.org/html/properties/namespaces-uri",
            getNamespace());

      }
      // config
      // .setFeature(
      // "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
      // true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/cdata-sections",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/script/strip-comment-delims",
              true);
      _config
          .setFeature(
              "http://cyberneko.org/html/features/scanner/style/strip-comment-delims",
              true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-doctype", true);
      _config.setFeature(
          "http://cyberneko.org/html/features/insert-namespaces",
          true);
      // 
      // Set properties
      // http://cyberneko.org/html/features/insert-namespaces
      // _config
      // .setProperty(
      // "http://cyberneko.org/html/properties/default-encoding",
      // encoding);
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/elems",
              "lower");
      _config
          .setProperty(
              "http://cyberneko.org/html/properties/names/attrs",
              "lower");
      _config.setProperty("http://cyberneko.org/html/properties/filters",
          _filters);
    } catch (XMLConfigurationException e) {
      // throw new ServletException("error set Neko feature ", e);
    }
    return _config;
View Full Code Here

        public HtmlSaxParser(Properties properties) {
            super(getConfig(properties));
        }
   
        private static HTMLConfiguration getConfig(Properties properties) {
            HTMLConfiguration config = new HTMLConfiguration();
            config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            if (properties != null) {
                for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                    String name = (String) i.next();
                    if (name.indexOf("/features/") > -1) {
                        config.setFeature(name, Boolean.getBoolean(properties.getProperty(name)));
                    } else if (name.indexOf("/properties/") > -1) {
                        config.setProperty(name, properties.getProperty(name));
                    }
                }
            }
            return config;
        }
View Full Code Here

        public HtmlSaxParser(Properties properties) {
            super(getConfig(properties));
        }
   
        private static HTMLConfiguration getConfig(Properties properties) {
            HTMLConfiguration config = new HTMLConfiguration();
            config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            if (properties != null) {
                for (Iterator i = properties.keySet().iterator();i.hasNext();) {
                    String name = (String) i.next();
                    if (name.indexOf("/features/") > -1) {
                        config.setFeature(name, Boolean.getBoolean(properties.getProperty(name)));
                    } else if (name.indexOf("/properties/") > -1) {
                        config.setProperty(name, properties.getProperty(name));
                    }
                }
            }
            return config;
        }
View Full Code Here

TOP

Related Classes of org.cyberneko.html.HTMLConfiguration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.