Package org.htmlparser

Examples of org.htmlparser.Node


        if (tagContents.length() == 0)
            return this;
        try
        {
            boolean found = false;
            Node retVal = null;
            // Find the first word in the scanners
            String firstWord = extractWord(tagContents.toString());
            // Now, get the scanner associated with this.
            TagScanner scanner = (TagScanner) scanners.get(firstWord);
View Full Code Here


        } catch (final UnsupportedEncodingException e) {
            throw new ScimpiException(e);
        }
        final Lexer lexer = new Lexer(page);

        Node node = null;
        try {
            Stack<Snippet> tags = allTags;
            String lineNumbers = "1";
            String template = null;
            tags.push(new HtmlSnippet(lineNumbers, filePath));

            // NOTE done like this the tags can be cached for faster processing
            while ((node = lexer.nextNode()) != null) {
                if (node instanceof Remark) {
                    // TODO need to pick up on comments within tags; at the
                    // moment this splits a tag into two causing a
                    // failure later
                    continue;

                } else if (node instanceof TagNode && ((TagNode) node).getTagName().startsWith("SWF:")) {
                    final TagNode tagNode = (TagNode) node;
                    final String tagName = tagNode.getTagName().toUpperCase();
                    LOG.debug(tagName);

                    // TODO remove context & request from Attributes -- the tags
                    // will be re-used across
                    // requests
                    final Attributes attributes = new Attributes(tagNode, context);
                    int type = 0;
                    if (tagNode.isEndTag()) {
                        type = SwfTag.END;
                    } else {
                        type = tagNode.isEmptyXmlTag() ? SwfTag.EMPTY : SwfTag.START;
                    }
                    testForProcessorForTag(lexer, tagName);
                    lineNumbers = lineNumbering(node);
                    final SwfTag tag = new SwfTag(tagName, attributes, type, lineNumbers, loadFile.getCanonicalPath());
                    tags.push(tag);

                    if (tagName.equals("SWF:IMPORT")) {
                        if (!tagNode.isEmptyXmlTag()) {
                            throw new ScimpiException("Import tag must be empty");
                        }
                        String importFile = tagNode.getAttribute("file");
                        if (context.isDebug()) {
                            context.getWriter().println("<!-- " + "import file " + importFile + " -->");
                        }
                        importFile = context.replaceVariables(importFile);
                        parseHtmlFile(loadPath, importFile, context, tags, tagsForPreviousTemplate);
                    }

                    if (tagName.equals("SWF:TEMPLATE")) {
                        if (!tagNode.isEmptyXmlTag()) {
                            throw new ScimpiException("Template tag must be empty");
                        }
                        if (template != null) {
                            throw new ScimpiException("Template tag can only be used once within a file");
                        }
                        template = tagNode.getAttribute("file");
                        template = context.replaceVariables(template);
                        if (context.isDebug()) {
                            context.getWriter().println("<!-- " + "apply template " + template + " -->");
                        }
                        tags = new Stack<Snippet>();
                    }

                    if (tagName.equals("SWF:CONTENT")) {
                        if (!tagNode.isEmptyXmlTag()) {
                            throw new ScimpiException("Content tag must be empty");
                        }
                        if (context.isDebug()) {
                            context.getWriter().println("<!-- " + "insert content into template -->");
                        }
                        tags.addAll(tagsForPreviousTemplate);
                    }
                } else {
                    final Snippet snippet = tags.size() == 0 ? null : tags.peek();
                    if (snippet instanceof HtmlSnippet) {
                        ((HtmlSnippet) snippet).append(node.toHtml());
                    } else {
                        final HtmlSnippet htmlSnippet = new HtmlSnippet(lineNumbers, filePath);
                        htmlSnippet.append(node.toHtml());
                        tags.push(htmlSnippet);
                    }
                }

            }
View Full Code Here

     *
     * @param tag HTML tag
     */
    public void visitEndTag(Tag tag) {

        Node parent;

        // Get parent tag
        parent = tag.getParent();

        // Process orphan end tags
        if (parent == null)
            modifiedHTML.append(tag.toHtml());

        // Process top level tag with no parents
        else if (parent.getParent() == null)
            modifiedHTML.append(parent.toHtml());

    }
View Full Code Here

     */
    private void parseNodes(final NodeIterator e,
            final URLPointer baseUrl, final URLCollection urls)
        throws HTMLParseException, ParserException {
        while(e.hasMoreNodes()) {
            Node node = e.nextNode();
            // a url is always in a Tag.
            if (!(node instanceof Tag)) {
                continue;
            }
            Tag tag = (Tag) node;
View Full Code Here

     */
    private void parseNodes(final NodeIterator e,
        final URLPointer baseUrl, final URLCollection urls)
        throws HTMLParseException, ParserException {
        while(e.hasMoreNodes()) {
            Node node = e.nextNode();
            // a url is always in a Tag.
            if (!(node instanceof Tag)) {
                continue;
            }
            Tag tag = (Tag) node;
View Full Code Here

     */
    private void parseNodes(final NodeIterator e,
            final URLPointer baseUrl, final URLCollection urls)
        throws HTMLParseException, ParserException {
        while(e.hasMoreNodes()) {
            Node node = e.nextNode();
            // a url is always in a Tag.
            if (!(node instanceof Tag)) {
                continue;
            }
            Tag tag = (Tag) node;
View Full Code Here

    }

    public static String getGraphSourceString(String html) {
        try {
            Parser parser = new Parser(html);
            Node node = getElementById(parser, GRAPH_STRING_ID);
            return node != null ? node.toPlainTextString().replaceAll(
                    LINE_BREAK, "\n") : "";
        } catch (ParserException e) {
            e.printStackTrace();
        }
        return "";
View Full Code Here

    public void testGetElementById() {
        // Should output:
        // This is a level 2 span
        try {
            Parser parser = new Parser(TEST_STRING);
            Node node = getElementById(parser, "L2");
            if (node != null) {
                System.out.println(node.toPlainTextString());
            } else {
                System.out.println("null");
            }
        } catch (ParserException e) {
            // TODO Auto-generated catch block
View Full Code Here

  }
 
  public String getGraphSourceString(String html) {
    try {
      Parser parser = new Parser(html);
      Node node = getElementById(parser, GRAPH_STRING_ID);
      return node != null ? node.toPlainTextString().replaceAll(LINE_BREAK, "\n") : "";
    } catch (ParserException e) {
      e.printStackTrace();
    }
    return "";
  }
View Full Code Here

  public void testGetElementById() {
    // Should output:
    //      This is a level 2 span
    try {
      Parser parser = new Parser(TEST_STRING);
      Node node = getElementById(parser, "L2");
      if (node != null) {
        System.out.println(node.toPlainTextString());
      } else {
        System.out.println("null");
      }
    } catch (ParserException e) {
      // TODO Auto-generated catch block
View Full Code Here

TOP

Related Classes of org.htmlparser.Node

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.