Package org.htmlparser

Examples of org.htmlparser.Node


    StringBuffer prvContent = new StringBuffer();
    try {
      parser.setEncoding(Globals.ENC_8859_1);
      parser.setInputHTML(html);
      NodeList nodes = parser.extractAllNodesThatMatch(nfilter);
      Node node = null;
      for(int i=0;i<nodes.size();i++){
        if(prvContent.length() >= max_count){
          if(node instanceof TagNode){
            TagNode tmp_node = (TagNode)node;
            boolean isEnd = tmp_node.isEndTag();
            if(!isEnd){
              prvContent.setLength(prvContent.length()-tmp_node.getText().length()-2);
            }
          }
          //��������δ�رյı�ǩ
          Node parent = node;
          //System.out.println("current node is . "+parent.getText());
          do{
            parent = parent.getParent()
            //System.out.println("parent = "+parent);         
            if(parent==null) break;
            if(!(parent instanceof TagNode)) continue;
            //System.out.println("Parent node is no ended. "+parent.getText());
            prvContent.append(((TagNode)parent).getEndTag().toHtml());
View Full Code Here


    parser.setInputHTML(new String(inputHtml.getBytes(),ISO8859_1));
    //Parser parser = Parser.createParser(new String(inputHtml.getBytes(),ISO8859_1));
    //�������еĽڵ�
    NodeList nodes = parser.extractAllNodesThatMatch(nodeFilter);
    for(int i=0;i<nodes.size();i++){
      Node node = nodes.elementAt(i);
      text.append(new String(node.toPlainTextString().getBytes(ISO8859_1)));
    }
    return text.toString();
  }
View Full Code Here

      parser.setEncoding("8859_1");
      parser.setInputHTML(html);
      //�������еĽڵ�
      NodeList nodes = parser.extractAllNodesThatMatch(scriptFilter);     
      for(int i=0;i<nodes.size();i++){
        Node node = nodes.elementAt(i);
        if(node instanceof TextNode)
          text.append(node.getText());
        else{
          text.append('<');
          text.append(node.getText());
          text.append('>');
        }
      }
      return text.toString();
    }catch(Exception e){
View Full Code Here

        }
        oBuffer.append(">");
        NodeList oChilds = oNode.getChildren();
        if (oChilds!=null) {
          for (NodeIterator i = oNode.getChildren().elements(); i.hasMoreNodes(); ) {
            Node oChildNode = i.nextNode();
            oBuffer.append(parseNode(oChildNode, oCompiler, oMatcher));
          } // next
        } // fi
        oBuffer.append(oCTag.getEndTag().toTagHtml());
       
View Full Code Here

    StringBuffer oRetVal = new StringBuffer(sHtml.length());

    try {
      for (NodeIterator i = parser.elements(); i.hasMoreNodes(); ) {
        Node oNode = i.nextNode();
    oRetVal.append(parseNode(oNode, oCompiler, oMatcher));
      } // next
    }
    catch (ParserException pe) {
      if (DebugFile.trace) {
View Full Code Here

     */
    private void parseNodes(final NodeIterator e,
            final URLPointer baseUrl, final URLCollection urls)
        throws HTMLParseException, ParserException {
        while(e.hasMoreNodes()) {
            Node node = e.nextNode();
            // a url is always in a Tag.
            if (!(node instanceof Tag)) {
                continue;
            }
            Tag tag = (Tag) node;
View Full Code Here

     * @param pageEncoding the encoding used for the page where the nodes are present
     */
    private void parseNodes(final NodeIterator e, Map<String, String> formEncodings, String pageEncoding)
        throws HTMLParseException, ParserException {
        while(e.hasMoreNodes()) {
            Node node = e.nextNode();
            // a url is always in a Tag.
            if (!(node instanceof Tag)) {
                continue;
            }
            Tag tag = (Tag) node;
View Full Code Here

        try
        {
            // we start to iterate through the elements
            for (NodeIterator e= htmlParser.elements(); e.hasMoreNodes();)
            {
                Node node= e.nextNode();
                String binUrlStr= null;

                // first we check to see if body tag has a
                // background set and we set the NodeIterator
                // to the child elements inside the body
View Full Code Here

            "title",
            "Welcome to the HTMLParser website",
            page.getTitle());
        NodeList bodyNodes = page.getBody();
        assertEquals("number of nodes in body", 1, bodyNodes.size());
        Node node = bodyNodes.elementAt(0);
        assertTrue(
            "expected stringNode but was " + node.getClass().getName(),
            node instanceof StringNode);
        assertStringEquals(
            "body contents",
            "Welcome to HTMLParser",
            page.getBody().asString());
View Full Code Here

        parser.addScanner(new TableScanner(parser));
        parser.addScanner(new SpanScanner());
        parseAndAssertNodeCount(1);
        assertType("node", TableColumn.class, node[0]);
        TableColumn col = (TableColumn) node[0];
        Node spans[] = col.searchFor(Span.class).toNodeArray();
        assertEquals("number of spans found", 2, spans.length);
        assertStringEquals(
            "span 1",
            "Flavor: small(90 to 120 minutes)",
            spans[0].toPlainTextString());
View Full Code Here

TOP

Related Classes of org.htmlparser.Node

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.