Examples of AnalyzerFactory

org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory
A factory to create an analyzer. See {@link org.apache.lucene.benchmark.byTask.tasks.AnalyzerFactoryTask}
org.apache.nutch.analysis.AnalyzerFactory
Creates and caches {@link NutchAnalyzer} plugins. @author Jérôme Charron

Examples of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory

      }
    } catch (Throwable t) {
      throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
    }


    final AnalyzerFactory analyzerFactory = new AnalyzerFactory
        (charFilterFactories, tokenizerFactory, tokenFilterFactories);
    analyzerFactory.setPositionIncrementGap(positionIncrementGap);
    analyzerFactory.setOffsetGap(offsetGap);
    getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
  }

View Full Code Here

Examples of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory

      Analyzer analyzer = null;
      if (null == analyzerName || 0 == analyzerName.length()) {
        analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
      }
      // First, lookup analyzerName as a named analyzer factory
      AnalyzerFactory factory = getRunData().getAnalyzerFactories().get(analyzerName);
      if (null != factory) {
        analyzer = factory.create();
      } else {
        if (analyzerName.contains(".")) {
          if (analyzerName.startsWith("standard.")) {
            analyzerName = "org.apache.lucene.analysis." + analyzerName;
          }

View Full Code Here

Examples of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory

      }
    } catch (Throwable t) {
      throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
    }


    final AnalyzerFactory analyzerFactory = new AnalyzerFactory
        (charFilterFactories, tokenizerFactory, tokenFilterFactories);
    analyzerFactory.setPositionIncrementGap(positionIncrementGap);
    analyzerFactory.setOffsetGap(offsetGap);
    getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
  }

View Full Code Here

Examples of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory

      }
    } catch (Throwable t) {
      throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
    }


    final AnalyzerFactory analyzerFactory = new AnalyzerFactory
        (charFilterFactories, tokenizerFactory, tokenFilterFactories);
    analyzerFactory.setPositionIncrementGap(positionIncrementGap);
    analyzerFactory.setOffsetGap(offsetGap);
    getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
  }

View Full Code Here

Examples of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory

      }
    } catch (Throwable t) {
      throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
    }


    final AnalyzerFactory analyzerFactory = new AnalyzerFactory
        (charFilterFactories, tokenizerFactory, tokenFilterFactories);
    analyzerFactory.setPositionIncrementGap(positionIncrementGap);
    analyzerFactory.setOffsetGap(offsetGap);
    getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
  }

View Full Code Here

Examples of org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory

      Analyzer analyzer = null;
      if (null == analyzerName || 0 == analyzerName.length()) {
        analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
      }
      // First, lookup analyzerName as a named analyzer factory
      AnalyzerFactory factory = getRunData().getAnalyzerFactories().get(analyzerName);
      if (null != factory) {
        analyzer = factory.create();
      } else {
        if (analyzerName.contains(".")) {
          if (analyzerName.startsWith("standard.")) {
            analyzerName = "org.apache.lucene.analysis." + analyzerName;
          }

View Full Code Here

Examples of org.apache.nutch.analysis.AnalyzerFactory

    perm = new Path(FileOutputFormat.getOutputPath(job), name);
    temp = job.getLocalPath("index/_"  +
                      Integer.toString(new Random().nextInt()));


    fs.delete(perm, true); // delete old, if any
    analyzerFactory = new AnalyzerFactory(job);
    writer = new IndexWriter(
        FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())),
        new NutchDocumentAnalyzer(job), true, MaxFieldLength.UNLIMITED);


    writer.setMergeFactor(job.getInt("indexer.mergeFactor", 10));

View Full Code Here

Examples of org.apache.nutch.analysis.AnalyzerFactory

      final Path temp = job.getLocalPath("index/_"
        + Integer.toString(new Random().nextInt()));


      fs.delete(perm, true); // delete old, if any


      final AnalyzerFactory factory = new AnalyzerFactory(job);
      final IndexWriter writer = // build locally first
      new IndexWriter(
        FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())),
        new NutchDocumentAnalyzer(job), true, 
        new MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));


      writer.setMergeFactor(job.getInt("indexer.mergeFactor", 10));
      writer.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100));
      writer.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs",
        Integer.MAX_VALUE));
      writer.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128));
      writer.setMaxFieldLength(job.getInt("indexer.max.tokens", 10000));
      writer.setInfoStream(LogUtil.getInfoStream(LOG));
      writer.setUseCompoundFile(false);
      writer.setSimilarity(new NutchSimilarity());


      return new RecordWriter<WritableComparable, LuceneDocumentWrapper>() {
        boolean closed;


        public void write(WritableComparable key, LuceneDocumentWrapper value)
          throws IOException { // unwrap & index doc
          Document doc = value.get();
          NutchAnalyzer analyzer = factory.get(doc.get("lang"));
          if (LOG.isInfoEnabled()) {
            LOG.info(" Indexing [" + doc.getField("url").stringValue() + "]"
              + " with analyzer " + analyzer);
          }
          writer.addDocument(doc, analyzer);

View Full Code Here

Examples of org.apache.nutch.analysis.AnalyzerFactory

      final Path temp = job.getLocalPath("index/_"
        + Integer.toString(new Random().nextInt()));


      fs.delete(perm, true); // delete old, if any


      final AnalyzerFactory factory = new AnalyzerFactory(job);
      final IndexWriter writer = // build locally first
      new IndexWriter(fs.startLocalOutput(perm, temp).toString(),
        new NutchDocumentAnalyzer(job), true);


      writer.setMergeFactor(job.getInt("indexer.mergeFactor", 10));
      writer.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100));
      writer.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs",
        Integer.MAX_VALUE));
      writer.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128));
      writer.setMaxFieldLength(job.getInt("indexer.max.tokens", 10000));
      writer.setInfoStream(LogUtil.getInfoStream(LOG));
      writer.setUseCompoundFile(false);
      writer.setSimilarity(new NutchSimilarity());


      return new RecordWriter<WritableComparable, LuceneDocumentWrapper>() {
        boolean closed;


        public void write(WritableComparable key, LuceneDocumentWrapper value)
          throws IOException { // unwrap & index doc
          Document doc = value.get();
          NutchAnalyzer analyzer = factory.get(doc.get("lang"));
          if (LOG.isInfoEnabled()) {
            LOG.info(" Indexing [" + doc.getField("url").stringValue() + "]"
              + " with analyzer " + analyzer);
          }
          writer.addDocument(doc, analyzer);

View Full Code Here

Examples of org.apache.nutch.analysis.AnalyzerFactory

    perm = new Path(FileOutputFormat.getOutputPath(job), name);
    temp = job.getLocalPath("index/_"  +
                      Integer.toString(new Random().nextInt()));


    fs.delete(perm, true); // delete old, if any
    analyzerFactory = new AnalyzerFactory(job);
    writer = new IndexWriter(fs.startLocalOutput(perm, temp).toString(),
        new NutchDocumentAnalyzer(job), true);


    writer.setMergeFactor(job.getInt("indexer.mergeFactor", 10));
    writer.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100));

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.