Package org.htmlparser.util

Examples of org.htmlparser.util.NodeList


      factory.setTextPrototype(new HighlightTextNode("",t2hl,lt,rt));
      Parser htmlParser = new Parser();
      htmlParser.setNodeFactory(factory);
      Parser.createParser(content,"UTF-8");
      htmlParser.setInputHTML(content);
      NodeList nodeList = htmlParser.parse(null);
      content = nodeList.toHtml();
    }catch(Exception e){
      logger.error(e);
    }
    return content;
  }
View Full Code Here


    SortedSet<HtmlConverterPSpan> linebreaksFromHtmlTags = new TreeSet<HtmlConverterPSpan>();

    // process
    try {
      Parser parser = new Parser(documentText);
      NodeList list = parser.parse(null);
      HtmlConverterVisitor visitor = new HtmlConverterVisitor(newlineInducingTags);
      list.visitAllNodesWith(visitor);
      visibleSpansSoFar = visitor.getTextSpans();
      linebreaksFromHtmlTags = visitor.getLinebreaksFromHtmlTags();
    } catch (ParserException e) {
      throw new AnalysisEngineProcessException(e);
    }
View Full Code Here

    String html = Common.getInputHtmlGBK(urlStr);
    parser.setInputHTML(html);
    String filerStr = "li";
    NodeFilter filter = new TagNameFilter(filerStr);
    // 取得页面内容中标签为"dl"
    NodeList nodeList = parser.extractAllNodesThatMatch(filter);

    Tag tag = (Tag) nodeList.elementAt(0);

    return tag.toHtml();
  }
View Full Code Here

   */
  public static <T extends TagNode> List<T> parseTags(String html, final Class<T> tagType, final String attrbuteName, final String attrbutValue) {
    try {
      Parser parser = new Parser();
      parser.setInputHTML(html);
      NodeList list = parser.parse(new NodeFilter() {

        public boolean accept(Node node) {
          if (node.getClass() == tagType) {
            T tagNode = (T) node;
            if (attrbuteName == null) {
              return true;
            }
            String attrValue = tagNode.getAttribute(attrbuteName);
            if (attrValue != null && attrValue.equals(attrbutValue)) {
              return true;
            }
          }

          return false;
        }
      });
      List<T> tagsList = new ArrayList<T>();
      for (int i = 0; i < list.size(); i++) {
        T t = (T) list.elementAt(i);
        tagsList.add(t);
      }
      return tagsList;
    } catch (Exception e) {
      e.printStackTrace();
View Full Code Here

        parser.visitAllNodesWith(page);
        assertStringEquals(
            "title",
            "Welcome to the HTMLParser website",
            page.getTitle());
        NodeList bodyNodes = page.getBody();
        assertEquals("number of nodes in body", 1, bodyNodes.size());
        Node node = bodyNodes.elementAt(0);
        assertTrue(
            "expected stringNode but was " + node.getClass().getName(),
            node instanceof StringNode);
        assertStringEquals(
            "body contents",
View Full Code Here

    public void testCreatePageWithTables() throws Exception
    {
        createParser(PAGE_WITH_TABLE);
        HtmlPage page = new HtmlPage(parser);
        parser.visitAllNodesWith(page);
        NodeList bodyNodes = page.getBody();
        assertEquals("number of nodes in body", 2, bodyNodes.size());
        assertXmlEquals(
            "body html",
            "Welcome to HTMLParser"
                + "<table>"
                + "<tr>"
                + "  <td>cell 1</td>"
                + "  <td>cell 2</td>"
                + "</tr>"
                + "</table>",
            bodyNodes.asHtml());
        TableTag tables[] = page.getTables();
        assertEquals("number of tables", 1, tables.length);
        assertEquals("number of rows", 1, tables[0].getRowCount());
        TableRow row = tables[0].getRow(0);
        assertEquals("number of columns", 2, row.getColumnCount());
View Full Code Here

        parser.addScanner(new TitleScanner(""));
        parser.addScanner(new HtmlScanner());
        parseAndAssertNodeCount(1);
        assertType("html tag", Html.class, node[0]);
        Html html = (Html) node[0];
        NodeList nodeList = new NodeList();
        html.collectInto(nodeList, TitleTag.class);
        assertEquals("nodelist size", 1, nodeList.size());
        Node node = nodeList.elementAt(0);
        assertType("expected title tag", TitleTag.class, node);
        TitleTag titleTag = (TitleTag) node;
        assertStringEquals("title", "Some Title", titleTag.getTitle());
    }
View Full Code Here

    public void testCreation()
    {
        StringNode stringNode =
            new StringNode(new StringBuffer("Script Code"), 0, 0);
        NodeList childVector = new NodeList();
        childVector.add(stringNode);
        ScriptTag scriptTag =
            new ScriptTag(
                new TagData(0, 10, "Tag Contents", "tagline"),
                new CompositeTagData(null, null, childVector));
View Full Code Here

        this.tag = tag;
        this.url = url;
        this.reader = reader;
        this.currLine = currLine;
        this.endTag = null;
        this.nodeList = new NodeList();
        this.endTagFound = false;
        this.balance_quotes = balance_quotes;
    }
View Full Code Here

    {
        super(tagData, compositeTagData);
        this.appletClass = compositeTagData.getStartTag().getAttribute("CODE");
        this.codeBase = compositeTagData.getStartTag().getAttribute("CODEBASE");
        this.archive = compositeTagData.getStartTag().getAttribute("ARCHIVE");
        NodeList children = compositeTagData.getChildren();
        appletParams = new Hashtable();
        createAppletParamsTable(children);
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.util.NodeList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.