Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Analyzer


  public static Analyzer getPorterStemmerAnalyzer(final Analyzer child) {
   
    if (child == null)
      throw new IllegalArgumentException("child analyzer must not be null");
 
    return new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new PorterStemFilter(
            child.tokenStream(fieldName, reader));
//        /* PorterStemFilter and SnowballFilter have the same behaviour,
View Full Code Here


    if (maxSynonyms < 0)
      throw new IllegalArgumentException("maxSynonyms must not be negative");
    if (maxSynonyms == 0)
      return child; // no need to wrap
 
    return new Analyzer() {
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
        return new SynonymTokenFilter(
          child.tokenStream(fieldName, reader), synonyms, maxSynonyms);
      }
View Full Code Here

  public static Analyzer getTokenCachingAnalyzer(final Analyzer child) {

    if (child == null)
      throw new IllegalArgumentException("child analyzer must not be null");

    return new Analyzer() {

      private final HashMap<String,ArrayList<AttributeSource.State>> cache = new HashMap<String,ArrayList<AttributeSource.State>>();

      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
View Full Code Here

    q = parser.parse("[a TO c]", null);
    assertEquals("f1:[a TO c] f2:[a TO c] f3:[a TO c]", q.toString());
  }

  public void testStopWordSearching() throws Exception {
    Analyzer analyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT);
    Directory ramDir = new RAMDirectory();
    IndexWriter iw = new IndexWriter(ramDir, analyzer, true,
        IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    doc.add(new Field("body", "blah the footest blah", Field.Store.NO,
View Full Code Here

    q = parser.parse("[a TO c]");
    assertEquals("f1:[a TO c] f2:[a TO c] f3:[a TO c]", q.toString());
  }

  public void testStopWordSearching() throws Exception {
    Analyzer analyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT);
    Directory ramDir = new RAMDirectory();
    IndexWriter iw = new IndexWriter(ramDir, analyzer, true,
        IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    doc.add(new Field("body", "blah the footest blah", Field.Store.NO,
View Full Code Here

          int traversalSize = Math.min(scoreDocs.length, traversalSize());

          if (traversalSize > 0) {
            boolean retrieve = withRetrieve();
            int numHighlight = Math.min(numToHighlight(), scoreDocs.length);
            Analyzer analyzer = getRunData().getAnalyzer();
            BenchmarkHighlighter highlighter = null;
            if (numHighlight > 0) {
              highlighter = getBenchmarkHighlighter(q);
            }
            for (int m = 0; m < traversalSize; m++) {
View Full Code Here

    token.end = end;
    return token;
  }

  public void checkCJKToken(final String str, final TestToken[] out_tokens) throws IOException {
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
    String terms[] = new String[out_tokens.length];
    int startOffsets[] = new int[out_tokens.length];
    int endOffsets[] = new int[out_tokens.length];
    String types[] = new String[out_tokens.length];
    for (int i = 0; i < out_tokens.length; i++) {
View Full Code Here

    }
    assertAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, null);
  }
 
  public void checkCJKTokenReusable(final Analyzer a, final String str, final TestToken[] out_tokens) throws IOException {
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
    String terms[] = new String[out_tokens.length];
    int startOffsets[] = new int[out_tokens.length];
    int endOffsets[] = new int[out_tokens.length];
    String types[] = new String[out_tokens.length];
    for (int i = 0; i < out_tokens.length; i++) {
View Full Code Here

    };
    checkCJKToken(str, out_tokens);
  }
 
  public void testTokenStream() throws Exception {
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
    assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02",
        new String[] { "\u4e00\u4e01", "\u4e01\u4e02"});
  }
View Full Code Here

    assertAnalyzesTo(analyzer, "\u4e00\u4e01\u4e02",
        new String[] { "\u4e00\u4e01", "\u4e01\u4e02"});
  }
 
  public void testReusableTokenStream() throws Exception {
    Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
    String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
   
    TestToken[] out_tokens = {
      newToken("\u3042\u3044", 0, 2, CJKTokenizer.DOUBLE_TOKEN_TYPE),
      newToken("\u3044\u3046", 1, 3, CJKTokenizer.DOUBLE_TOKEN_TYPE),
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Analyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.