Examples of Indexer


Examples of org.apache.nutch.indexer.Indexer

            LOG.info("Deleting old merged index: " + index);
            fs.delete(index, true);
          }
        }
       
        Indexer indexer = new Indexer(conf);
        indexer.index(indexes, crawlDb, linkDb,
            Arrays.asList(HadoopFSUtil.getPaths(fstats)));
       
        IndexMerger merger = new IndexMerger(conf);
        if(indexes != null) {
          dedup.dedup(new Path[] { indexes });
View Full Code Here

Examples of org.apache.nutch.indexer.Indexer

    Generator generator = new Generator(conf);
    Fetcher fetcher = new Fetcher(conf);
    ParseSegment parseSegment = new ParseSegment(conf);
    CrawlDb crawlDbTool = new CrawlDb(conf);
    LinkDb linkDbTool = new LinkDb(conf);
    Indexer indexer = new Indexer(conf);
    DeleteDuplicates dedup = new DeleteDuplicates(conf);
    IndexMerger merger = new IndexMerger(conf);
     
    // initialize crawlDb
    injector.inject(crawlDb, rootUrlDir);
    int i;
    for (i = 0; i < depth; i++) {             // generate new segment
      Path segment = generator.generate(crawlDb, segments, -1, topN, System
          .currentTimeMillis(), false, false);
      if (segment == null) {
        LOG.info("Stopping at depth=" + i + " - no more URLs to fetch.");
        break;
      }
      fetcher.fetch(segment, threads)// fetch it
      if (!Fetcher.isParsing(job)) {
        parseSegment.parse(segment);    // parse it, if needed
      }
      crawlDbTool.update(crawlDb, new Path[]{segment}, true, true); // update crawldb
    }
    if (i > 0) {
      linkDbTool.invert(linkDb, segments, true, true, false); // invert links

      // index, dedup & merge
      indexer.index(indexes, crawlDb, linkDb, fs.listPaths(segments));
      dedup.dedup(new Path[] { indexes });
      merger.merge(fs.listPaths(indexes), index, tmpDir);
    } else {
      LOG.warn("No URLs to fetch - check your seed list and URL filters.");
    }
View Full Code Here

Examples of org.apache.nutch.indexer.Indexer

    Generator generator = new Generator(conf);
    Fetcher fetcher = new Fetcher(conf);
    ParseSegment parseSegment = new ParseSegment(conf);
    CrawlDb crawlDbTool = new CrawlDb(conf);
    LinkDb linkDbTool = new LinkDb(conf);
    Indexer indexer = new Indexer(conf);
    DeleteDuplicates dedup = new DeleteDuplicates(conf);
    IndexMerger merger = new IndexMerger(conf);
     
    // initialize crawlDb
    injector.inject(crawlDb, rootUrlDir);
    int i;
    for (i = 0; i < depth; i++) {             // generate new segment
      Path segment = generator.generate(crawlDb, segments, -1, topN, System
          .currentTimeMillis(), false, false);
      if (segment == null) {
        LOG.info("Stopping at depth=" + i + " - no more URLs to fetch.");
        break;
      }
      fetcher.fetch(segment, threads)// fetch it
      if (!Fetcher.isParsing(job)) {
        parseSegment.parse(segment);    // parse it, if needed
      }
      crawlDbTool.update(crawlDb, new Path[]{segment}, true, true); // update crawldb
    }
    if (i > 0) {
      linkDbTool.invert(linkDb, segments, true, true, false); // invert links

      // index, dedup & merge
      indexer.index(indexes, crawlDb, linkDb, fs.listPaths(segments));
      dedup.dedup(new Path[] { indexes });
      merger.merge(fs.listPaths(indexes), index, tmpDir);
    } else {
      LOG.warn("No URLs to fetch - check your seed list and URL filters.");
    }
View Full Code Here

Examples of org.apache.phoenix.hbase.index.Indexer

    admin.createTable(desc);
    HTable primary = new HTable(UTIL.getConfiguration(), tableNameBytes);

    // overwrite the codec so we can verify the current state
    HRegion region = UTIL.getMiniHBaseCluster().getRegions(tableNameBytes).get(0);
    Indexer indexer =
        (Indexer) region.getCoprocessorHost().findCoprocessor(Indexer.class.getName());
    CoveredColumnsIndexBuilder builder =
        (CoveredColumnsIndexBuilder) indexer.getBuilderForTesting();
    VerifyingIndexCodec codec = new VerifyingIndexCodec();
    builder.setIndexCodecForTesting(codec);

    // setup the Puts we want to write
    final long ts = System.currentTimeMillis();
View Full Code Here

Examples of org.apache.stanbol.entityhub.indexing.core.Indexer

        args = line.getArgs();
        if(line.hasOption('h') || args.length <= 0){
            printHelp();
            System.exit(0);
        }
        Indexer indexer;
        IndexerFactory factory = IndexerFactory.getInstance();
        String path = null;
        if(args.length > 1){
            path = args[1];
        }
        if("init".equalsIgnoreCase(args[0]) ||
                "index".equalsIgnoreCase(args[0]) ||
                "postprocess".equalsIgnoreCase(args[0]) ||
                "finalise".equalsIgnoreCase(args[0])){
            if(path != null){
                indexer = factory.create(path);
            } else {
                indexer = factory.create();
            }
            if(line.hasOption('c')){
                int cunckSize = Integer.parseInt(line.getOptionValue('c'));
                indexer.setChunkSize(cunckSize);
            }
            if("index".equalsIgnoreCase(args[0])){
                indexer.index();
            } else if("postprocess".equalsIgnoreCase(args[0])){
                indexer.initialiseIndexing();
                indexer.skipIndexEntities();
                indexer.postProcessEntities();
                indexer.finaliseIndexing();
            } else if ("finalise".equalsIgnoreCase(args[0])){
                indexer.initialiseIndexing();
                indexer.skipIndexEntities();
                indexer.skipPostProcessEntities();
                indexer.finaliseIndexing();
            }
        } else {
            System.err.println("Unknown command "+args[0]+" (supported: init,index)\n\n");
            printHelp();
        }
View Full Code Here

Examples of org.apache.xindice.core.indexer.Indexer

      // TODO: figure out what goes here. done.
      try {

          Collection col = getCollection( (String) message.get(COLLECTION) );
          Indexer idx = col.getIndexer((String) message.get(NAME));

          if (idx != null) {

              result.put(RESULT, "yes");
          } else {
View Full Code Here

Examples of org.apache.xindice.core.indexer.Indexer

               if ( nk.keys != null )
                  ks.add(nk.keys);
               else if ( name != null ) {
                  // Try to use a NameIndex to resolve the path component
                  IndexPattern pattern = new IndexPattern(symbols, name, nsMap);
                  Indexer idx = context.getIndexManager().getBestIndexer(Indexer.STYLE_NODENAME, pattern);
                  if ( idx != null ) {
                     IndexMatch[] matches = idx.queryMatches(new IndexQueryANY(pattern));
                     Key[] keys = QueryEngine.getUniqueKeys(matches);
                     ks.add(keys);
                  }
               }
            }
View Full Code Here

Examples of org.apache.xindice.core.indexer.Indexer

            //       to do a collection scan in those cases where somebody
            //       typed an element or attribute name incorrectly.

            IndexPattern pattern = iq.getPattern();

            Indexer idx = context.getIndexManager().getBestIndexer(Indexer.STYLE_NODEVALUE, pattern);
            if ( idx != null )
               return new NamedKeys(nk.name, nk.attribute, QueryEngine.getUniqueKeys(idx.queryMatches(iq)));
            else if ( autoIndex ) {
               // TODO: This has to *not* be hardcoded
               Element e = new DocumentImpl().createElement("index");
               e.setAttribute("class", "org.apache.xindice.core.indexer.ValueIndexer");
               e.setAttribute("name", "xp_"+ps);
View Full Code Here

Examples of org.apache.xindice.core.indexer.Indexer

    * @param config The Indexer's configuration
    * @return The newly created Indexer
    */
   public final Indexer createIndexer(Configuration config) throws DBException {
      checkFiler(FaultCodes.COL_NO_INDEXMANAGER);
      Indexer idx = indexManager.create(config);
      getDatabase().flushConfig();
      return idx;
   }
View Full Code Here

Examples of org.apache.xindice.core.indexer.Indexer

        }

        Hashtable result = new Hashtable();
        try {
            Collection col = getCollection((String) message.get(COLLECTION));
            Indexer idx = col.getIndexer((String) message.get(NAME));

            if (idx != null) {
                result.put(RESULT, "yes");
            } else {
                result.put(RESULT, "no");
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.