Package org.apache.lucene.util.automaton

Examples of org.apache.lucene.util.automaton.RegExp


   * @param term regular expression.
   * @param flags optional RegExp features from {@link RegExp}
   * @param provider custom AutomatonProvider for named automata
   */
  public NodeRegexpQuery(final Term term, final int flags, final AutomatonProvider provider) {
    super(term, new RegExp(term.text(), flags).toAutomaton(provider));
  }
View Full Code Here


        System.out.println("TEST: got termsEnum=" + termsEnum);
      }
      BytesRef term;
      int ord = 0;

      Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();   
      final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);

      while((term = termsEnum.next()) != null) {
        BytesRef term2 = termsEnum2.next();
        assertNotNull(term2);
View Full Code Here

  private class DumbRegexpQuery extends MultiTermQuery {
    private final Automaton automaton;
   
    DumbRegexpQuery(Term term, int flags) {
      super(term.field());
      RegExp re = new RegExp(term.text(), flags);
      automaton = re.toAutomaton();
    }
View Full Code Here

   
    if (deep) {
      int numIntersections = atLeast(3);
      for (int i = 0; i < numIntersections; i++) {
        String re = AutomatonTestUtil.randomRegexp(random());
        CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
        if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
          // TODO: test start term too
          TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
          TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
          assertTermsEnum(leftIntersection, rightIntersection, rarely());
View Full Code Here

   
    if (deep) {
      int numIntersections = atLeast(3);
      for (int i = 0; i < numIntersections; i++) {
        String re = AutomatonTestUtil.randomRegexp(random());
        CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
        if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
          // TODO: test start term too
          TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
          TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
          assertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, rarely());
View Full Code Here

 
  // Test some regular expressions as tokenization patterns
  /** Test a configuration where each character is a term */
  public void testSingleChar() throws Exception {
    CharacterRunAutomaton single =
        new CharacterRunAutomaton(new RegExp(".").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), single, false);
    assertAnalyzesTo(a, "foobar",
        new String[] { "f", "o", "o", "b", "a", "r" },
        new int[] { 0, 1, 2, 3, 4, 5 },
        new int[] { 1, 2, 3, 4, 5, 6 }
View Full Code Here

  }
 
  /** Test a configuration where two characters makes a term */
  public void testTwoChars() throws Exception {
    CharacterRunAutomaton single =
        new CharacterRunAutomaton(new RegExp("..").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), single, false);
    assertAnalyzesTo(a, "foobar",
        new String[] { "fo", "ob", "ar"},
        new int[] { 0, 2, 4 },
        new int[] { 2, 4, 6 }
View Full Code Here

  }
 
  /** Test a configuration where three characters makes a term */
  public void testThreeChars() throws Exception {
    CharacterRunAutomaton single =
        new CharacterRunAutomaton(new RegExp("...").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), single, false);
    assertAnalyzesTo(a, "foobar",
        new String[] { "foo", "bar"},
        new int[] { 0, 3 },
        new int[] { 3, 6 }
View Full Code Here

  }
 
  /** Test a configuration where word starts with one uppercase */
  public void testUppercase() throws Exception {
    CharacterRunAutomaton single =
        new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), single, false);
    assertAnalyzesTo(a, "FooBarBAZ",
        new String[] { "Foo", "Bar", "B", "A", "Z"},
        new int[] { 0, 3, 6, 7, 8 },
        new int[] { 3, 6, 7, 8, 9 }
View Full Code Here

        new int[] { 2, 2, 1, 2 });
  }
 
  /** Test a configuration that behaves a lot like LengthFilter */
  public void testLength() throws Exception {
    CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton());
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, length5);
    assertAnalyzesTo(a, "ok toolong fine notfine",
        new String[] { "ok", "fine" },
        new int[] { 1, 2 });
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.automaton.RegExp

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.