Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Analyzer


    // LUCENE-1189
    assertQueryEquals("(\"a\\\\\") or (\"b\")", a ,"a\\ or b");
  }

  public void testQueryStringEscaping() throws Exception {
    Analyzer a = new WhitespaceAnalyzer();

    assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c");
    assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c");
    assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c");
    assertEscapedQueryEquals("a\\b:c", a, "a\\\\b\\:c");
View Full Code Here


  // LUCENE-2002: make sure defaults for StandardAnalyzer's
  // enableStopPositionIncr & QueryParser's enablePosIncr
  // "match"
  public void testPositionIncrements() throws Exception {
    Directory dir = new MockRAMDirectory();
    Analyzer a = new StandardAnalyzer(Version.LUCENE_CURRENT);
    IndexWriter w = new IndexWriter(dir, a, IndexWriter.MaxFieldLength.UNLIMITED);
    Document doc = new Document();
    doc.add(new Field("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED));
    w.addDocument(doc);
    IndexReader r = w.getReader();
View Full Code Here

    docCount++;
  }

  // LUCENE-38
  public void testExclusiveLowerNull() throws Exception {
    Analyzer analyzer = new SingleCharAnalyzer();
    //http://issues.apache.org/jira/browse/LUCENE-38
    Query query = new TermRangeQuery("content", null, "C",
                                 false, false);
    initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer);
    IndexSearcher searcher = new IndexSearcher(dir, true);
View Full Code Here

  }

  // LUCENE-38
  public void testInclusiveLowerNull() throws Exception {
    //http://issues.apache.org/jira/browse/LUCENE-38
    Analyzer analyzer = new SingleCharAnalyzer();
    Query query = new TermRangeQuery("content", null, "C", true, true);
    initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
    IndexSearcher searcher = new IndexSearcher(dir, true);
    int numHits = searcher.search(query, null, 1000).totalHits;
    // When Lucene-38 is fixed, use the assert on the next line:
View Full Code Here

    q = parser.parse("[a TO c]");
    assertEquals("f1:[a TO c] f2:[a TO c] f3:[a TO c]", q.toString());
  }

  public void testStopWordSearching() throws Exception {
    Analyzer analyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT);
    Directory ramDir = new RAMDirectory();
    IndexWriter iw =  new IndexWriter(ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    doc.add(new Field("body", "blah the footest blah", Field.Store.NO, Field.Index.ANALYZED));
    iw.addDocument(doc);
View Full Code Here

    assertTokenStreamContents(filter, new String[] { "12345", "#$%@#^%&", "HL" });
  }

  public void testRandom() throws Exception {
    final int codeLen = _TestUtil.nextInt(random(), 1, 8);
    Analyzer a = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, codeLen, false));
      }
     
    };
    checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
   
    Analyzer b = new Analyzer() {

      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, codeLen, true));
View Full Code Here

    };
    checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
  }
 
  public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, random().nextBoolean()));
      }
View Full Code Here

    Document doc = searcher.doc(sd[0].doc);
    assertEquals("Should match most similar when using 2 words", "2", doc.get("id"));
  }

  public void testFuzzyLikeThisQueryEquals() {
    Analyzer analyzer = new MockAnalyzer(random());
    FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
    fltq1.addTerms("javi", "subject", 0.5f, 2);
    FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
    fltq2.addTerms("javi", "subject", 0.5f, 2);
    assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
View Full Code Here

import org.apache.lucene.util._TestUtil;

public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {

  static private Analyzer newTestAnalyzer() {
    return new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
        return new TokenStreamComponents(tokenizer, tokenizer);
      }
View Full Code Here

        (new StringReader(text.toString()));
    while (reader.read() != -1);
  }

  public void testUTF16Surrogates() throws Exception {
    Analyzer analyzer = newTestAnalyzer();
    // Paired surrogates
    assertAnalyzesTo(analyzer, " one two ��three",
        new String[] { "one", "two", "\uD86C\uDC01three" } );
    assertAnalyzesTo(analyzer, " ��", new String[] { "\uD86C\uDC01" } );
    assertAnalyzesTo(analyzer, " ��", new String[] { "\uD86C\uDC01" } );
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.Analyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.