Package org.apache.nutch.indexer.solr

Examples of org.apache.nutch.indexer.solr.SolrIndexer


      linkDbTool.invert(linkDb, segments, true, true, false); // invert links

      // index, dedup & merge
      FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
      if (isSolrIndex) {
        SolrIndexer indexer = new SolrIndexer(conf);
        indexer.indexSolr(solrUrl, crawlDb, linkDb,
            Arrays.asList(HadoopFSUtil.getPaths(fstats)));
      }
      else {
       
        DeleteDuplicates dedup = new DeleteDuplicates(conf);       
        if(indexes != null) {
          // Delete old indexes
          if (fs.exists(indexes)) {
            LOG.info("Deleting old indexes: " + indexes);
            fs.delete(indexes, true);
          }

          // Delete old index
          if (fs.exists(index)) {
            LOG.info("Deleting old merged index: " + index);
            fs.delete(index, true);
          }
        }
       
        Indexer indexer = new Indexer(conf);
        indexer.index(indexes, crawlDb, linkDb,
            Arrays.asList(HadoopFSUtil.getPaths(fstats)));
       
        IndexMerger merger = new IndexMerger(conf);
        if(indexes != null) {
          dedup.dedup(new Path[] { indexes });
View Full Code Here


      linkDbTool.invert(linkDb, segments, true, true, false); // invert links

      if (solrUrl != null) {
        // index, dedup & merge
        FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
        SolrIndexer indexer = new SolrIndexer(getConf());
        indexer.indexSolr(solrUrl, crawlDb, linkDb,
          Arrays.asList(HadoopFSUtil.getPaths(fstats)));
        SolrDeleteDuplicates dedup = new SolrDeleteDuplicates();
        dedup.setConf(getConf());
        dedup.dedup(solrUrl);
      }
View Full Code Here

      linkDbTool.invert(linkDb, segments, true, true, false); // invert links

      if (solrUrl != null) {
        // index, dedup & merge
        FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
        SolrIndexer indexer = new SolrIndexer(getConf());
        indexer.indexSolr(solrUrl, crawlDb, linkDb,
          Arrays.asList(HadoopFSUtil.getPaths(fstats)));
        SolrDeleteDuplicates dedup = new SolrDeleteDuplicates();
        dedup.setConf(getConf());
        dedup.dedup(solrUrl);
      }
View Full Code Here

      linkDbTool.invert(linkDb, segments, true, true, false); // invert links

      if (solrUrl != null) {
        // index, dedup & merge
        FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
        SolrIndexer indexer = new SolrIndexer(getConf());
        indexer.indexSolr(solrUrl, crawlDb, linkDb,
          Arrays.asList(HadoopFSUtil.getPaths(fstats)));
        SolrDeleteDuplicates dedup = new SolrDeleteDuplicates();
        dedup.setConf(getConf());
        dedup.dedup(solrUrl);
      }
View Full Code Here

TOP

Related Classes of org.apache.nutch.indexer.solr.SolrIndexer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.