Package org.apache.lucene.analysis.core

Examples of org.apache.lucene.analysis.core.KeywordAnalyzer


  }

  @Test
  public void test3() throws IOException, InterruptedException {
    // Thread.sleep(30000);
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
    IndexWriter writer = new IndexWriter(_cacheDirectory, conf);
    int docs = 100000;
    for (int i = 0; i < docs; i++) {
      if (i % 500 == 0) {
        System.out.println(i);
View Full Code Here


  }

  private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
      throws IOException {
    HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
    TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
    mergePolicy.setUseCompoundFile(false);
    IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

    Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
View Full Code Here

          File file = new File(tableFile, shardName);
          file.mkdirs();
          directory = new MMapDirectory(file);
        }
        if (!DirectoryReader.indexExists(directory)) {
          new IndexWriter(directory, new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer())).close();
        }
        shards.put(shardName, openIndex(table, shardName, directory));
      }
      return shards;
    }
View Full Code Here

    @Test
    public void testMultipleAnalyzers() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                new StandardAnalyzer(TEST_VERSION_CURRENT),
                new KeywordAnalyzer()
        );
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "just", "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)},
                    new int[]{ 000577, 14, 14, 19, 19},
View Full Code Here

    @Test
    public void testMultipleAnalyzersDeduplication() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                new StandardAnalyzer(TEST_VERSION_CURRENT),
                new KeywordAnalyzer()
        );
        cb.enableDeduplication();
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "just a little test "+i, "a", "little", "test", Integer.toString(i)},
View Full Code Here

    @Test
    public void testCascadeCombo() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                new ComboAnalyzer(TEST_VERSION_CURRENT,
                        new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                        new KeywordAnalyzer()
                ),
                new StandardAnalyzer(TEST_VERSION_CURRENT),
                new KeywordAnalyzer()
        );
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "just", "just a little test "+i, "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)},
                    new int[]{ 0000577, 14, 14, 19, 19},
View Full Code Here

                    new int[]{ 10001101010});
    }

    @Test
    public void testCascadeComboTwiceSameInstanceSolvedByCaching() throws IOException {
        Analyzer analyzer = new KeywordAnalyzer();
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                new ComboAnalyzer(TEST_VERSION_CURRENT,
                        new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                        analyzer
                ).enableTokenStreamCaching(),
View Full Code Here

        this.docDef = def ;

        // create the analyzer as a wrapper that uses KeywordAnalyzer for
        // entity and graph fields and StandardAnalyzer for all other
        Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>() ;
        analyzerPerField.put(def.getEntityField(), new KeywordAnalyzer()) ;
        if ( def.getGraphField() != null )
            analyzerPerField.put(def.getGraphField(), new KeywordAnalyzer()) ;
       
        for (String field : def.fields()) {
          Analyzer analyzer = def.getAnalyzer(field);
          if (analyzer != null) {
            analyzerPerField.put(field, analyzer);
View Full Code Here

    this.searchStrategy = searchingStrategy;

    queryParser = new QueryParser(
        Version.LUCENE_44,
        FbIndexField.TEXT.fieldName(),
        searchingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44));
    LogInfo.log("Opening index dir: " + indexDir);
    IndexReader indexReader = DirectoryReader.open(SimpleFSDirectory.open(new File(indexDir)));
    indexSearcher = new IndexSearcher(indexReader);
    LogInfo.log("Opened index with " + indexReader.numDocs() + " documents.");
View Full Code Here

  public FbEntityIndexer(String namefile, String outputDir, String indexingStrategy) throws IOException {

    if (!indexingStrategy.equals("exact") && !indexingStrategy.equals("inexact"))
      throw new RuntimeException("Bad indexing strategy: " + indexingStrategy);

    IndexWriterConfig config =  new IndexWriterConfig(Version.LUCENE_44 , indexingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44));
    config.setOpenMode(OpenMode.CREATE);
    config.setRAMBufferSizeMB(256.0);
    indexer = new IndexWriter(new SimpleFSDirectory(new File(outputDir)),config);
   
    this.nameFile = namefile;
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.core.KeywordAnalyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.