Package org.apache.lucene.util.automaton

Examples of org.apache.lucene.util.automaton.State$TransitionsIterable


            // OK: this is the first token
            posData.leaving = a.getInitialState();
          } else {
            // This means there's a hole (eg, StopFilter
            // does this):
            posData.leaving = new State();
            addHoles(a.getInitialState(), positions, pos);
          }
        } else {
          posData.leaving = new State();
          posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
          if (posInc > 1) {
            // A token spanned over a hole; add holes
            // "under" it:
            addHoles(a.getInitialState(), positions, pos);
          }
        }
        positions.freeBefore(pos);
      } else {
        // note: this isn't necessarily true. its just that we aren't surely det.
        // we could optimize this further (e.g. buffer and sort synonyms at a position)
        // but thats probably overkill. this is cheap and dirty
        deterministic = false;
      }

      final int endPos = pos + posLengthAtt.getPositionLength();

      termBytesAtt.fillBytesRef();
      final BytesRef termUTF8 = changeToken(term);
      int[] termUnicode = null;
      final Position endPosData = positions.get(endPos);
      if (endPosData.arriving == null) {
        endPosData.arriving = new State();
      }

      State state = posData.leaving;
      int termLen;
      if (unicodeArcs) {
        final String utf16 = termUTF8.utf8ToString();
        termUnicode = new int[utf16.codePointCount(0, utf16.length())];
        termLen = termUnicode.length;
        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
          termUnicode[j++] = cp = utf16.codePointAt(i);
      } else {
        termLen = termUTF8.length;
      }

      for(int byteIDX=0;byteIDX<termLen;byteIDX++) {
        final State nextState = byteIDX == termLen-1 ? endPosData.arriving : new State();
        int c;
        if (unicodeArcs) {
          c = termUnicode[byteIDX];
        } else {
          c = termUTF8.bytes[termUTF8.offset + byteIDX] & 0xff;
        }
        state.addTransition(new Transition(c, nextState));
        state = nextState;
      }

      maxOffset = Math.max(maxOffset, offsetAtt.endOffset());
    }

    in.end();
    State endState = null;
    if (offsetAtt.endOffset() > maxOffset) {
      endState = new State();
      endState.setAccept(true);
    }

    pos++;
    while (pos <= positions.getMaxPos()) {
      posData = positions.get(pos);
View Full Code Here


    Position posData = positions.get(pos);
    Position prevPosData = positions.get(pos-1);

    while(posData.arriving == null || prevPosData.leaving == null) {
      if (posData.arriving == null) {
        posData.arriving = new State();
        posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
      }
      if (prevPosData.leaving == null) {
        if (pos == 1) {
          prevPosData.leaving = startState;
        } else {
          prevPosData.leaving = new State();
        }
        if (prevPosData.arriving != null) {
          prevPosData.arriving.addTransition(new Transition(POS_SEP, prevPosData.leaving));
        }
      }
View Full Code Here

      BytesRef spare = new BytesRef();
      for (TermFreqPayload2 e : slowCompletor) {
        spare.copyChars(e.analyzedForm);
        Set<IntsRef> finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton);
        for (IntsRef intsRef : finiteStrings) {
          State p = automaton.getInitialState();
          BytesRef ref = Util.toBytesRef(intsRef, spare);
          boolean added = false;
          for (int i = ref.offset; i < ref.length; i++) {
            State q = p.step(ref.bytes[i] & 0xff);
            if (q == null) {
              break;
            } else if (q.isAccept()) {
              matches.add(new LookupResult(e.surfaceForm, e.weight));
              added = true;
              break;
            }
            p = q;
View Full Code Here

    State[] states = a.getNumberedStates();

    // Go in reverse topo sort so we know we only have to
    // make one pass:
    for(int stateNumber=states.length-1;stateNumber >=0;stateNumber--) {
      final State state = states[stateNumber];
      List<Transition> newTransitions = new ArrayList<>();
      for(Transition t : state.getTransitions()) {
        assert t.getMin() == t.getMax();
        if (t.getMin() == TokenStreamToAutomaton.POS_SEP) {
          if (preserveSep) {
            // Remap to SEP_LABEL:
            newTransitions.add(new Transition(SEP_LABEL, t.getDest()));
          } else {
            copyDestTransitions(state, t.getDest(), newTransitions);
            a.setDeterministic(false);
          }
        } else if (t.getMin() == TokenStreamToAutomaton.HOLE) {

          // Just remove the hole: there will then be two
          // SEP tokens next to each other, which will only
          // match another hole at search time.  Note that
          // it will also match an empty-string token ... if
          // that's somehow a problem we can always map HOLE
          // to a dedicated byte (and escape it in the
          // input).
          copyDestTransitions(state, t.getDest(), newTransitions);
          a.setDeterministic(false);
        } else {
          newTransitions.add(t);
        }
      }
      state.setTransitions(newTransitions.toArray(new Transition[newTransitions.size()]));
    }
  }
View Full Code Here

 
  @Override
  public void setUp() throws Exception {
    super.setUp();
    // build an automaton matching this jvm's letter definition
    State initial = new State();
    State accept = new State();
    accept.setAccept(true);
    for (int i = 0; i <= 0x10FFFF; i++) {
      if (Character.isLetter(i)) {
        initial.addTransition(new Transition(i, i, accept));
      }
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.automaton.State$TransitionsIterable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.