Package org.apache.nutch.crawl

Examples of org.apache.nutch.crawl.Inlink


      
      if (anchor.length() > this.nwMaxAnchorLength) {
        anchor = anchor.substring(0, this.nwMaxAnchorLength);
      }
      inlinks.add(new Inlink(fromUrl, anchor));   // collect inverted link     
      if (collectionType.equals(Global.COLLECTION_TYPE_MULTIPLE)) {
        try {         
            String toUrlNearTimestamp = sqlsearcher.selectNearTimestamp(toUrl, fromUrlTimestamp);
        LOG.debug("LinkDB: toUrlNearTimestamp: " + toUrlNearTimestamp + " toUrl:" + toUrl + " fromUrlTimestamp " + fromUrlTimestamp);
          if (toUrlNearTimestamp!=null) {
View Full Code Here


    filter.setConf(conf);
    Assert.assertNotNull(filter);
    NutchDocument doc = new NutchDocument();
    ParseImpl parse = new ParseImpl("foo bar", new ParseData());
    Inlinks inlinks = new Inlinks();
    inlinks.add(new Inlink("http://test1.com/", "text1"));
    inlinks.add(new Inlink("http://test2.com/", "text2"));
    inlinks.add(new Inlink("http://test3.com/", "text2"));
    try {
      filter.filter(doc, parse, new Text("http://nutch.apache.org/index.html"), new CrawlDatum(), inlinks);
    } catch(Exception e){
      e.printStackTrace();
      Assert.fail(e.getMessage());
View Full Code Here

    filter.setConf(conf);
    assertNotNull(filter);
    NutchDocument doc = new NutchDocument();
    ParseImpl parse = new ParseImpl("foo bar", new ParseData());
    Inlinks inlinks = new Inlinks();
    inlinks.add(new Inlink("http://test1.com/", "text1"));
    inlinks.add(new Inlink("http://test2.com/", "text2"));
    inlinks.add(new Inlink("http://test3.com/", "text2"));
    try {
      filter.filter(doc, parse, new Text("http://nutch.apache.org/index.html"), new CrawlDatum(), inlinks);
    } catch(Exception e){
      e.printStackTrace();
      fail(e.getMessage());
View Full Code Here

TOP

Related Classes of org.apache.nutch.crawl.Inlink

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.