Package org.vietspider.html.parser

Examples of org.vietspider.html.parser.HTMLParser2


    System.out.println(file.getPath());
    System.out.println("*********************************************************");
  }

  public void viewNode(String path) throws Exception {
    HTMLParser2 parser2 = new HTMLParser2();
    HTMLDocument document  = parser2.createDocument(file, null);
    NodePathParser pathParser = new NodePathParser();
    NodePath nodePath = pathParser.toPath(path);
    HTMLExtractor htmlExtractor = new HTMLExtractor();
    HTMLNode node = htmlExtractor.extract(document, new NodePath[]{nodePath}).getRoot();
    System.out.println(node.getTextValue());
View Full Code Here


    HTMLNode node = htmlExtractor.extract(document, new NodePath[]{nodePath}).getRoot();
    System.out.println(node.getTextValue());
  }

  public HTMLDocument removeNode(String namethrows Exception {
    HTMLParser2 parser2 = new HTMLParser2();
    HTMLDocument document  = parser2.createDocument(file, null);
    removeNode(name, document.getRoot());
    return document;
  }
View Full Code Here

      }
    }
  }

  public List<HTMLNode> getAllNode(String name) throws Exception {
    HTMLParser2 parser2 = new HTMLParser2();
    HTMLDocument document  = parser2.createDocument(file, null);
    List<HTMLNode> list = new ArrayList<HTMLNode>();
    getAllNode(name, document.getRoot(), list);
    return list;
  }
View Full Code Here

  public static void main(String[] args) {
    try{
      URL url = new URL("http://www.java.net");
//      System.out.println(URLDecoder.decode("video_id=http%3A%2F%2Fliveu-80.vo.llnwd.net%2Fflurl%2Fmb53%2Fnew_media3%2F2006%2F8%2F29%2F174380_media_flash8.flv&homeurl=http%3A%2F%2Fwww.flurl.com%2F&endmovies=http%3A%2F%2Fwww.flurl.com%2Fthumbs.php%3Fid%3D174380&embed=%3Ctable%20border%3D%270%27%20bgcolor%3D%27ffffff%27%20cellpadding%3D%270%27%20cellspacing%3D%270%27%3E%3Ctr%3E%3Ctd%3E%3Cembed%20id%3D%27flurl_media%27%20name%3D%27flurl_media%27%20width%3D%27519%27%20height%3D%27438%27%20src%3D%27http%3A%2F%2Fwww.flurl.com%2Fflvplayer2.swf%3Fvideo%3Dhttp%3A%2F%2Fwww.flurl.com%2Fflash_player_info.php%3Fid%3D174380%26flash%3D8%27%20quality%3D%27high%27%20bgcolor%3D%27white%27%20play%3D%27true%27%20loop%3D%27false%27%20allowScriptAccess%3D%27sameDomain%27%20type%3D%27application%2Fx-shockwave-flash%27%20pluginspage%3D%27http%3A%2F%2Fwww.macromedia.com%2Fgo%2Fgetflashplayer%27%3E%3C%2Fembed%3E%3C%2Ftd%3E%3C%2Ftr%3E%3Ctr%3E%3Ctd%20align%3D%27right%27%3E%3Cstrong%3E%3Ca%20href%3D%27http%3A%2F%2Fwww.flurl.com%2F%27%3EHosted%20on%20Flurl%20Video%20Search%3C%2Fa%3E%20-%20%3Ca%20href%3D%27http%3A%2F%2Fwww.flurl.com%2Fmedia%27%3EWatch%20More%20Videos%3C%2Fa%3E%20%3C%2Fstrong%3E%3C%2Ftd%3E%3C%2Ftr%3E%3C%2Ftable%3E"));
      HTMLParser2 parser2 = new HTMLParser2();
      HTMLDocument document = parser2.createDocument(url.openStream(), "utf-8");

      NodePathParser pathParser = new NodePathParser();
      NodePath nodePath = pathParser.toPath("BODY[0].DIV[0].TABLE[0].TBODY[0].TR[1].TD[3].DIV[10]");
      HTMLExtractor htmlExtractor = new HTMLExtractor();
      HTMLNode node = htmlExtractor.extract(document, new NodePath[]{nodePath}).getRoot();
View Full Code Here

    System.out.println(file.getPath());
    System.out.println("*********************************************************");   
  }
 
  public List<String> getAttribute(String nodeName, String attrName) throws Exception
    HTMLParser2 parser2 = new HTMLParser2();
    HTMLDocument document  = parser2.createDocument(file, null);
   
    List<String> attrValues = new ArrayList<String>();
   
    NodeIterator iterator = document.getRoot().iterator();
    while(iterator.hasNext()) {
View Full Code Here

    }
    return attrValues;
  }
 
  public List<String> getLinks() throws Exception {
    HTMLParser2 parser2 = new HTMLParser2();
    HTMLDocument document  = parser2.createDocument(file, null);
    HyperLinkUtil linkUtil = new HyperLinkUtil() ;
    return linkUtil.scanSiteLink(document.getRoot());
  }
View Full Code Here

    webClient.setURL(homepage, new URL(homepage));
   
    String address = "http://www.hastc.org.vn/Ketqua_giaodich.asp?stocktype=2&menuid=103120";
    byte  [] bytes = download(homepage, address);
   
    HTMLParser2 parser2 = new HTMLParser2();
    HTMLDocument document  = parser2.createDocument(bytes, "utf-8");
   
    String path = "BODY[0].TABLE[0].TBODY[0].TR[0].TD[0].TABLE[0].TBODY[0].TR[0].TD[0]";
    path += ".TABLE[0].TBODY[0].TR[1].TD[1].TABLE[0].TBODY[0].TR[0].TD[0].TABLE[1].TBODY[0].TR[1]";
    path += ".TD[0].TABLE[1].TBODY[0].TR[i>0]";
   
View Full Code Here

  }

  public static void main(String[] args) {
    try{
      URL url = new URL("http://www.java.net");
      HTMLDocument document = new HTMLParser2().createDocument(url.openStream(), "utf-8");
      testGetLink(document.getRoot());
      System.out.println("\n\n\n\n*********************************************************************\n\n\n\n");
      testCreateFullLink(document.getRoot(), url);
      System.out.println("\n\n\n\n*********************************************************************\n\n\n\n");
      testCreateImageLink(document.getRoot(), url);
View Full Code Here

    webClient.setURL(homepage, new URL(homepage));
   
    String address = "http://forums.java.net/jive/thread.jspa?threadID=40523&tstart=0";
    byte  [] bytes = download(homepage, address);
   
    HTMLParser2 parser2 = new HTMLParser2();
    HTMLDocument document  = parser2.createDocument(bytes, "utf-8");
    String titlePathValue  = "BODY[0].DIV[1].TABLE[0].TBODY[0].TR[0].TD[0].P[1]";
   
    NodePathParser pathParser = new NodePathParser();
    HTMLExtractor htmlExtractor = new HTMLExtractor();
   
View Full Code Here

    }
  }

  public static void main(String[] argsthrows Exception {
    URL url = new URL("http://vnexpress.net/Vietnam/Xa-hoi/2006/10/3B9EFB66/");
    HTMLDocument document = new HTMLParser2().createDocument(url.openStream(), null);
    StringBuilder builder = new StringBuilder();
    build(builder, document.getRoot());
    File file = new File("E:\\Temp\\a.txt");
    if(!file.exists()) file.createNewFile();
    FileOutputStream stream = new FileOutputStream(file);
View Full Code Here

TOP

Related Classes of org.vietspider.html.parser.HTMLParser2

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.