Examples of org.htmlparser.filters.TagNameFilter

org.htmlparser.filters.TagNameFilter
This class accepts all tags matching the tag name.


      // *****************************************
      // Iterate images from HTML and replace CIDs


      NodeList oCollectionList = new NodeList();
      TagNameFilter oImgFilter = new TagNameFilter ("IMG");
      for (NodeIterator e = oPrsr.elements(); e.hasMoreNodes();)
        e.nextNode().collectInto(oCollectionList, oImgFilter);


      final int nImgs = oCollectionList.size();

View Full Code Here

    int iSlash;
    Parser oPrsr;
    String sCid, sSrc;
    String sBodyCid = sBody;
    NodeList oCollectionList;
    TagNameFilter oImgFilter;


  // **********************************************************************
  // Replace <IMG SRC="..." >


    oPrsr = Parser.createParser(sBodyCid, sEnc);
    
    oCollectionList = new NodeList();
    oImgFilter = new TagNameFilter ("IMG");
    for (NodeIterator e = oPrsr.elements(); e.hasMoreNodes();)
      e.nextNode().collectInto(oCollectionList, oImgFilter);


    int nImgs = oCollectionList.size();


    if (DebugFile.trace) DebugFile.writeln("Images NodeList.size() = " + String.valueOf(nImgs));


    for (int i=0; i<nImgs; i++) {
    ImageTag oImgTag = (ImageTag) oCollectionList.elementAt(i);
      
        sSrc = oImgTag.extractImageLocn().replace('\\','/');
    
    if (DebugFile.trace) DebugFile.writeln("Processing image location "+sSrc);
    
        // Keep a reference to every related image name so that the same image is not included twice in the message
        if (!oImgs.containsKey(sSrc)) {


          // Find last slash from image url
          iSlash = sSrc.lastIndexOf('/');
      
          // Take image name
          if (iSlash>=0) {
            while (sSrc.charAt(iSlash)=='/') { if (++iSlash==sSrc.length()) break; }
              sCid = sSrc.substring(iSlash);
          }
          else {
            sCid = sSrc;
          }
          if (DebugFile.trace) DebugFile.writeln("HashMap.put("+sSrc+","+sCid+")");


          oImgs.put(sSrc, sCid);
        } // fi (!oImgs.containsKey(sSrc))
        
        sBodyCid = doSubstitution (sBodyCid, "Src", Gadgets.replace(Gadgets.replace(oImgTag.extractImageLocn(),'\\',"\\\\"),'.',"\\x2E"), sPreffix+oImgs.get(sSrc));
    } // next


  // **********************************************************************
  // Replace <TABLE BACKGROUND="..." >
    
    oCollectionList = new NodeList();
    TagNameFilter oTableFilter = new TagNameFilter("TABLE");
    oPrsr = Parser.createParser(sBodyCid, sEnc);
    for (NodeIterator e = oPrsr.elements(); e.hasMoreNodes();)
      e.nextNode().collectInto(oCollectionList, oTableFilter);
          
    nImgs = oCollectionList.size();


    if (DebugFile.trace) DebugFile.writeln("Tables NodeList.size() = " + String.valueOf(nImgs));


    for (int i=0; i<nImgs; i++) {


      sSrc = ((TableTag) oCollectionList.elementAt(i)).getAttribute("background");
      if (sSrc!=null) {
        if (sSrc.length()>0) {
          sSrc = sSrc.replace('\\','/');


      if (DebugFile.trace) DebugFile.writeln("Processing background location "+sSrc);


          // Keep a reference to every related image name so that the same image is not included twice in the message
          if (!oImgs.containsKey(sSrc)) {


            // Find last slash from image url
            iSlash = sSrc.lastIndexOf('/');


            // Take image name
            if (iSlash>=0) {
              while (sSrc.charAt(iSlash)=='/') { if (++iSlash==sSrc.length()) break; }
                sCid = sSrc.substring(iSlash);
            } // fi
            else {
              sCid = sSrc;
            }


            if (DebugFile.trace) DebugFile.writeln("HashMap.put("+sSrc+","+sCid+")");


            oImgs.put(sSrc, sCid);
          } // fi (!oImgs.containsKey(sSrc))
      
      sBodyCid = doSubstitution (sBodyCid, "Background", Gadgets.replace(Gadgets.replace(((TableTag) oCollectionList.elementAt(i)).getAttribute("background"),'\\',"\\\\"),'.',"\\x2E"), sPreffix+oImgs.get(sSrc));


        } // fi
      } // fi
    } // next


  // **********************************************************************
  // Replace <TD BACKGROUND="..." >
    
    oCollectionList = new NodeList();
    TagNameFilter oTDFilter = new TagNameFilter("TD");
    oPrsr = Parser.createParser(sBodyCid, sEnc);
    for (NodeIterator e = oPrsr.elements(); e.hasMoreNodes();)
      e.nextNode().collectInto(oCollectionList, oTDFilter);
          
    nImgs = oCollectionList.size();

View Full Code Here

    int iSlash;
    Parser oPrsr;
    String sCid, sSrc;
    String sBodyCid = sBody;
    NodeList oCollectionList;
    TagNameFilter oImgFilter;


  // **********************************************************************
  // Replace <IMG SRC="..." >


    oPrsr = Parser.createParser(sBodyCid, sEnc);
    
    oCollectionList = new NodeList();
    oImgFilter = new TagNameFilter ("IMG");
    for (NodeIterator e = oPrsr.elements(); e.hasMoreNodes();)
      e.nextNode().collectInto(oCollectionList, oImgFilter);


    int nImgs = oCollectionList.size();


    if (DebugFile.trace) DebugFile.writeln("Images NodeList.size() = " + String.valueOf(nImgs));


    for (int i=0; i<nImgs; i++) {


        sSrc = (((ImageTag) oCollectionList.elementAt(i)).extractImageLocn()).replace('\\','/');


        // Keep a reference to every related image name so that the same image is not included twice in the message
        if (!oImgs.containsKey(sSrc)) {


          // Find last slash from image url
          iSlash = sSrc.lastIndexOf('/');
      
          // Take image name
          if (iSlash>=0) {
            while (sSrc.charAt(iSlash)=='/') { if (++iSlash==sSrc.length()) break; }
              sCid = sSrc.substring(iSlash);
          }
          else {
            sCid = sSrc;
          }


          // String sUid = Gadgets.generateUUID();
          // sCid = sUid.substring(0,12)+"$"+sUid.substring(12,20)+"$"+sUid.substring(20,28)+"@hipergate.org";


          if (DebugFile.trace) DebugFile.writeln("HashMap.put("+sSrc+","+sCid+")");


          oImgs.put(sSrc, sCid);
        } // fi (!oImgs.containsKey(sSrc))
        
        String sImgSrc = ((ImageTag) oCollectionList.elementAt(i)).extractImageLocn();
        if (sImgSrc.startsWith(sPreffix)) {
          sBodyCid = doSubstitution(sBodyCid, "Src", Gadgets.replace(Gadgets.replace(sImgSrc,'\\',"\\\\"),'.',"\\x2E"), sImgSrc.substring(sPreffix.length()));
        }
        
    } // next


  // **********************************************************************
  // Replace <TABLE BACKGROUND="..." >
    
    oCollectionList = new NodeList();
    TagNameFilter oTableFilter = new TagNameFilter("TABLE");
    oPrsr = Parser.createParser(sBodyCid, sEnc);
    for (NodeIterator e = oPrsr.elements(); e.hasMoreNodes();)
      e.nextNode().collectInto(oCollectionList, oTableFilter);
          
    nImgs = oCollectionList.size();


    if (DebugFile.trace) DebugFile.writeln("Tables NodeList.size() = " + String.valueOf(nImgs));


    for (int i=0; i<nImgs; i++) {


      sSrc = ((TableTag) oCollectionList.elementAt(i)).getAttribute("background");
      if (sSrc!=null) {
        if (sSrc.length()>0) {
          sSrc = sSrc.replace('\\','/');


          // Keep a reference to every related image name so that the same image is not included twice in the message
          if (!oImgs.containsKey(sSrc)) {


            // Find last slash from image url
            iSlash = sSrc.lastIndexOf('/');


            // Take image name
            if (iSlash>=0) {
              while (sSrc.charAt(iSlash)=='/') { if (++iSlash==sSrc.length()) break; }
                sCid = sSrc.substring(iSlash);
            } // fi
            else {
              sCid = sSrc;
            }


            if (DebugFile.trace) DebugFile.writeln("HashMap.put("+sSrc+","+sCid+")");


            oImgs.put(sSrc, sCid);
          } // fi (!oImgs.containsKey(sSrc))


          String sBckGrnd = ((TableTag) oCollectionList.elementAt(i)).getAttribute("background");
          if (sBckGrnd.startsWith(sPreffix)) {
            sBodyCid = doSubstitution(sBodyCid, "Background", Gadgets.replace(Gadgets.replace(sBckGrnd,'\\',"\\\\"),'.',"\\x2E"), sBckGrnd.substring(sPreffix.length()));
          }          
          
        } // fi
      } // fi
    } // next


  // **********************************************************************
  // Replace <TD BACKGROUND="..." >
    
    oCollectionList = new NodeList();
    TagNameFilter oTDFilter = new TagNameFilter("TD");
    oPrsr = Parser.createParser(sBodyCid, sEnc);
    for (NodeIterator e = oPrsr.elements(); e.hasMoreNodes();)
      e.nextNode().collectInto(oCollectionList, oTDFilter);
          
    nImgs = oCollectionList.size();

View Full Code Here

    Parser parser = new Parser();


    String html = Common.getInputHtmlGBK(urlStr);
    parser.setInputHTML(html);
    String filerStr = "li";
    NodeFilter filter = new TagNameFilter(filerStr);
    // 取得页面内容中标签为"dl"
    NodeList nodeList = parser.extractAllNodesThatMatch(filter);


    Tag tag = (Tag) nodeList.elementAt(0);

View Full Code Here

    try {
      Parser parser = new Parser();
      parser.setInputHTML(inputHTML);
      parser.setEncoding("UTF-8");    
      NodeList nl = parser.parse(null); 
      NodeList trs = nl.extractAllNodesThatMatch(new TagNameFilter("tr"),true);
      String regex = "([a-z]+) *= *\"?((?:(?! [a-z]+ *=|/? *>|\").)+)";
        Pattern p = Pattern.compile(regex, Pattern.DOTALL);
        for(int i=0;i<trs.size();i++) {
          NodeList nodes = trs.elementAt(i).getChildren();
          NodeList tds  = nodes.extractAllNodesThatMatch(new TagNameFilter("td"),true);
          for(int j=0;j<tds.size();j++) {
            count++;
            String content = tds.elementAt(j).toHtml();
            Matcher fit =  p.matcher(content);
            if (fit.find()) {

View Full Code Here

            htmlBuffer = "<html>" + htmlBuffer + "</html>";
            Parser parser = new Parser();
            parser.setInputHTML(htmlBuffer);
            NodeList nodelist = parser.parse(null);


            NodeList tableList = nodelist.extractAllNodesThatMatch(new TagNameFilter("TABLE"), true);
            NodeList  headList = tableList.extractAllNodesThatMatch(new TagNameFilter("THEAD"), true);
            NodeList  footList = tableList.extractAllNodesThatMatch(new TagNameFilter("TFOOT"), true);
            NodeList  rowList = tableList.extractAllNodesThatMatch(new TagNameFilter("TR"), true);
            
            //Create a ParserUtils var
            ParserUtils pu = new ParserUtils();
            //Set rowCount to size of rowList
            rowCount = rowList.size();

View Full Code Here


        try
        {
            Parser parser = Parser.createParser(htmlData, null);


            NodeList heads = parser.parse(new TagNameFilter("HEAD"));
            if (heads.size() != 1)
                throw new DiscoveryException(
                        "HTML response must have exactly one HEAD element, " +
                                "found " + heads.size() + " : " + heads.toHtml());
            Node head = heads.elementAt(0);

View Full Code Here


      String s = loadFromUrl("http://" + BINGOBANKO_URL + "/print/?boardCount=9");


      Parser parser = new Parser(s);


      OrFilter filter = new OrFilter(new TagNameFilter("IMG"), new TagNameFilter("script"));


      if (bingoIndex == null) {
         bingoIndex = (int) ((System.currentTimeMillis() - 1317495600085l) / 604800000) + 40;
      }

View Full Code Here


        try
        {
            Parser parser = Parser.createParser(htmlData, null);


            NodeList heads = parser.parse(new TagNameFilter("HEAD"));
            if (heads.size() != 1)
                throw new DiscoveryException(
                        "HTML response must have exactly one HEAD element, " +
                                "found " + heads.size() + " : " + heads.toHtml());
            Node head = heads.elementAt(0);

View Full Code Here

            if (bytesRead <= 0)
                throw new YadisException("No data read from the HTML message",
                        YadisResult.HTMLMETA_DOWNLOAD_ERROR);


            Parser parser = Parser.createParser(new String(data, 0, bytesRead), null);
            NodeList heads = parser.parse(new TagNameFilter("HEAD"));


            if (heads.size() != 1)
                throw new YadisException(
                        "HTML response must have exactly one HEAD element, " +
                                "found " + heads.size() + " : "

View Full Code Here

0 1 2 3

TOP

Related Classes of org.htmlparser.filters.TagNameFilter

BoardDownloader

com.knowgate.hipermail.HtmlMimeBodyPart

com.knowgate.hipermail.SessionHandler

com.lanyuan.util.HttpClientUtils

com.structis.fichesst.server.util.HtmlTableStyleParser

com.zesped.util.MailSessionHandler

edu.umd.cloud9.collection.ExtractHTMLFieldCollection$MyMapper

fitnesse.fixtures.PageDriver

fitnesse.slim.converters.MapEditor

hudson.plugins.rubyMetrics.rcov.RcovParser

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.