Package org.apache.lucene.analysis.miscellaneous

Examples of org.apache.lucene.analysis.miscellaneous.PatternAnalyzer$FastStringReader


        analyzerProviderFactories.put("stop", new PreBuiltAnalyzerProviderFactory("stop", AnalyzerScope.INDICES, new StopAnalyzer(Lucene.ANALYZER_VERSION)));
        analyzerProviderFactories.put("whitespace", new PreBuiltAnalyzerProviderFactory("whitespace", AnalyzerScope.INDICES, new WhitespaceAnalyzer(Lucene.ANALYZER_VERSION)));
        analyzerProviderFactories.put("simple", new PreBuiltAnalyzerProviderFactory("simple", AnalyzerScope.INDICES, new SimpleAnalyzer(Lucene.ANALYZER_VERSION)));

        // extended ones
        analyzerProviderFactories.put("pattern", new PreBuiltAnalyzerProviderFactory("pattern", AnalyzerScope.INDICES, new PatternAnalyzer(Lucene.ANALYZER_VERSION, Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
        analyzerProviderFactories.put("snowball", new PreBuiltAnalyzerProviderFactory("snowball", AnalyzerScope.INDICES, new SnowballAnalyzer(Lucene.ANALYZER_VERSION, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
        analyzerProviderFactories.put("standard_html_strip", new PreBuiltAnalyzerProviderFactory("standard_html_strip", AnalyzerScope.INDICES, new StandardHtmlStripAnalyzer(Lucene.ANALYZER_VERSION)));

        analyzerProviderFactories.put("arabic", new PreBuiltAnalyzerProviderFactory("arabic", AnalyzerScope.INDICES, new ArabicAnalyzer(Lucene.ANALYZER_VERSION)));
        analyzerProviderFactories.put("armenian", new PreBuiltAnalyzerProviderFactory("armenian", AnalyzerScope.INDICES, new ArmenianAnalyzer(Lucene.ANALYZER_VERSION)));
View Full Code Here


        if (sPattern == null) {
            throw new ElasticSearchIllegalArgumentException("Analyzer [" + name + "] of type pattern must have a `pattern` set");
        }
        Pattern pattern = Regex.compile(sPattern, settings.get("flags"));

        analyzer = new PatternAnalyzer(version, pattern, lowercase, stopWords);
    }
View Full Code Here

 
  @Test
  public void TestMultiValueCollisionDetection() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(
        Lucene.VERSION, new PatternAnalyzer(Version.LUCENE_36, PatternAnalyzer.NON_WORD_PATTERN, false, null)));

    indexWriter.addDocument(DocumentBuilder.doc()
        .add(DocumentBuilder.field("mvalue", "FB"))
        .add(DocumentBuilder.field("mvalue", "Ea")).build());
View Full Code Here

   @Test
   public void TestMultiValueMaxTermsPerDoc() throws Exception {
      Directory dir = new RAMDirectory();
      IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(
              Lucene.VERSION, new PatternAnalyzer(Version.LUCENE_36, PatternAnalyzer.WHITESPACE_PATTERN, false, null)));

      DocumentBuilder d = DocumentBuilder.doc();

      for (int i=0;i<200;i++) d.add(DocumentBuilder.field("mvalue", "t" + i));
View Full Code Here

   @Test
   public void TestMultiValueMinDocsPerTerm() throws Exception {
      Directory dir = new RAMDirectory();
      IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(
              Lucene.VERSION, new PatternAnalyzer(Version.LUCENE_36, PatternAnalyzer.WHITESPACE_PATTERN, false, null)));

      DocumentBuilder d = DocumentBuilder.doc();

      d.add(DocumentBuilder.field("mvalue", "t1"));
View Full Code Here

        Version matchVersion = Version.valueOf(MapUtils.getString(initProperties, "matchVersion", "LUCENE_35"));
        Pattern pattern = Pattern.compile(MapUtils.getString(initProperties, "pattern", "\\s+"));
        boolean toLowerCase = MapUtils.getBoolean(initProperties, "toLowerCase", true);
        String stopWordsStr = MapUtils.getString(initProperties, "stopWords", "");
        Set<String> stopWords = new HashSet<String>(Arrays.asList(stopWordsStr.split("\\s*,\\s*")));
        return new PatternAnalyzer(matchVersion, pattern, toLowerCase, stopWords);
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.miscellaneous.PatternAnalyzer$FastStringReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.