Package org.archive.access.nutch.jobs

Examples of org.archive.access.nutch.jobs.NutchwaxCrawlDb


    if (!getFS().exists(dbPath))
    {
      getFS().mkdirs(dbPath);
    }
     
    CrawlDb cdb = new NutchwaxCrawlDb(getJobConf());
     
    if (segments != null)
    {
      List<Path> paths = new ArrayList<Path>(segments.length);
       
      for (int i = 0; i < segments.length; i++)
      {
        Path p = new Path(segments[i]);
         
        if (!getFS().exists(p))
        {
          throw new FileNotFoundException(p.toString());
        }

        paths.add(p);
      }
       
      cdb.update(od.getCrawlDb(), paths.toArray(new Path[paths.size()]),
        true, true);
    }
    else
    {
      Path[] allSegments = getSegments(od);
       
      // This just does the last segment created.
      cdb.update(od.getCrawlDb(),
        new Path[] {allSegments[allSegments.length - 1]}, true, true);
    }
  }
View Full Code Here

TOP

Related Classes of org.archive.access.nutch.jobs.NutchwaxCrawlDb

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.