Examples of CharacterRunAutomaton


Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

  @BeforeClass
  public static void beforeClass() throws Exception {
    Random random = random();
    directory = newDirectory();
    stopword = "" + randomChar();
    CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeString(stopword));
    analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
    RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
    Document doc = new Document();
    Field id = new StringField("id", "", Field.Store.NO);
    Field field = new TextField("field", "", Field.Store.NO);
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new MockTokenizer(reader);
        TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
        stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet));
        return new TokenStreamComponents(tokenizer, stream);
      }
    };
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
    Document doc = new Document();
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

      }
    }
    Automaton single = new Automaton(initial);
    single.reduce();
    Automaton repeat = BasicOperations.repeat(single);
    jvmLetter = new CharacterRunAutomaton(repeat);
  }
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

  }
 
  // Test some regular expressions as tokenization patterns
  /** Test a configuration where each character is a term */
  public void testSingleChar() throws Exception {
    CharacterRunAutomaton single =
        new CharacterRunAutomaton(new RegExp(".").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), single, false);
    assertAnalyzesTo(a, "foobar",
        new String[] { "f", "o", "o", "b", "a", "r" },
        new int[] { 0, 1, 2, 3, 4, 5 },
        new int[] { 1, 2, 3, 4, 5, 6 }
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

    checkRandomData(random(), a, 100);
  }
 
  /** Test a configuration where two characters makes a term */
  public void testTwoChars() throws Exception {
    CharacterRunAutomaton single =
        new CharacterRunAutomaton(new RegExp("..").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), single, false);
    assertAnalyzesTo(a, "foobar",
        new String[] { "fo", "ob", "ar"},
        new int[] { 0, 2, 4 },
        new int[] { 2, 4, 6 }
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

    checkRandomData(random(), a, 100);
  }
 
  /** Test a configuration where three characters makes a term */
  public void testThreeChars() throws Exception {
    CharacterRunAutomaton single =
        new CharacterRunAutomaton(new RegExp("...").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), single, false);
    assertAnalyzesTo(a, "foobar",
        new String[] { "foo", "bar"},
        new int[] { 0, 3 },
        new int[] { 3, 6 }
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

    checkRandomData(random(), a, 100);
  }
 
  /** Test a configuration where word starts with one uppercase */
  public void testUppercase() throws Exception {
    CharacterRunAutomaton single =
        new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), single, false);
    assertAnalyzesTo(a, "FooBarBAZ",
        new String[] { "Foo", "Bar", "B", "A", "Z"},
        new int[] { 0, 3, 6, 7, 8 },
        new int[] { 3, 6, 7, 8, 9 }
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

        new int[] { 2, 1, 2 });
  }
 
  /** Test a configuration that behaves a lot like KeepWordFilter */
  public void testKeep() throws Exception {
    CharacterRunAutomaton keepWords =
      new CharacterRunAutomaton(
          BasicOperations.complement(
              Automaton.union(
                  Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar")))));
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords);
    assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

        new int[] { 2, 2, 1, 2 });
  }
 
  /** Test a configuration that behaves a lot like LengthFilter */
  public void testLength() throws Exception {
    CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, length5);
    assertAnalyzesTo(a, "ok toolong fine notfine",
        new String[] { "ok", "fine" },
        new int[] { 1, 2 });
  }
View Full Code Here

Examples of org.apache.lucene.util.automaton.CharacterRunAutomaton

 
  /** blast some random strings through differently configured tokenizers */
  public void testRandomRegexps() throws Exception {
    int iters = atLeast(30);
    for (int i = 0; i < iters; i++) {
      final CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.randomAutomaton(random()));
      final boolean lowercase = random().nextBoolean();
      final int limit = _TestUtil.nextInt(random(), 0, 500);
      Analyzer a = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.