Package com.flaptor.org.apache.lucene.util.automaton

Source Code of com.flaptor.org.apache.lucene.util.automaton.SpecialOperations

/*
* dk.brics.automaton
*
* Copyright (c) 2001-2009 Anders Moeller
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package com.flaptor.org.apache.lucene.util.automaton;

import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

import com.flaptor.org.apache.lucene.util.BytesRef;

/**
* Special automata operations.
*
* @lucene.experimental
*/
final public class SpecialOperations {
 
  private SpecialOperations() {}
 
  /**
   * Finds the largest entry whose value is less than or equal to c, or 0 if
   * there is no such entry.
   */
  static int findIndex(int c, int[] points) {
    int a = 0;
    int b = points.length;
    while (b - a > 1) {
      int d = (a + b) >>> 1;
      if (points[d] > c) b = d;
      else if (points[d] < c) a = d;
      else return d;
    }
    return a;
  }
 
  /**
   * Returns true if the language of this automaton is finite.
   */
  public static boolean isFinite(Automaton a) {
    if (a.isSingleton()) return true;
    return isFinite(a.initial, new BitSet(a.getNumberOfStates()), new BitSet(a.getNumberOfStates()));
  }
 
  /**
   * Checks whether there is a loop containing s. (This is sufficient since
   * there are never transitions to dead states.)
   */
  // TODO: not great that this is recursive... in theory a
  // large automata could exceed java's stack
  private static boolean isFinite(State s, BitSet path, BitSet visited) {
    path.set(s.number);
    for (Transition t : s.getTransitions())
      if (path.get(t.to.number) || (!visited.get(t.to.number) && !isFinite(t.to, path, visited))) return false;
    path.clear(s.number);
    visited.set(s.number);
    return true;
  }
 
  /**
   * Returns the longest string that is a prefix of all accepted strings and
   * visits each state at most once.
   *
   * @return common prefix
   */
  public static String getCommonPrefix(Automaton a) {
    if (a.isSingleton()) return a.singleton;
    StringBuilder b = new StringBuilder();
    HashSet<State> visited = new HashSet<State>();
    State s = a.initial;
    boolean done;
    do {
      done = true;
      visited.add(s);
      if (!s.accept && s.numTransitions() == 1) {
        Transition t = s.getTransitions().iterator().next();
        if (t.min == t.max && !visited.contains(t.to)) {
          b.appendCodePoint(t.min);
          s = t.to;
          done = false;
        }
      }
    } while (!done);
    return b.toString();
  }
 
  // TODO: this currently requites a determinized machine,
  // but it need not -- we can speed it up by walking the
  // NFA instead.  it'd still be fail fast.
  public static BytesRef getCommonPrefixBytesRef(Automaton a) {
    if (a.isSingleton()) return new BytesRef(a.singleton);
    BytesRef ref = new BytesRef(10);
    HashSet<State> visited = new HashSet<State>();
    State s = a.initial;
    boolean done;
    do {
      done = true;
      visited.add(s);
      if (!s.accept && s.numTransitions() == 1) {
        Transition t = s.getTransitions().iterator().next();
        if (t.min == t.max && !visited.contains(t.to)) {
          ref.grow(++ref.length);
          ref.bytes[ref.length - 1] = (byte)t.min;
          s = t.to;
          done = false;
        }
      }
    } while (!done);
    return ref;
  }
 
  /**
   * Returns the longest string that is a suffix of all accepted strings and
   * visits each state at most once.
   *
   * @return common suffix
   */
  public static String getCommonSuffix(Automaton a) {
    if (a.isSingleton()) // if singleton, the suffix is the string itself.
      return a.singleton;
   
    // reverse the language of the automaton, then reverse its common prefix.
    Automaton r = a.clone();
    reverse(r);
    r.determinize();
    return new StringBuilder(SpecialOperations.getCommonPrefix(r)).reverse().toString();
  }
 
  public static BytesRef getCommonSuffixBytesRef(Automaton a) {
    if (a.isSingleton()) // if singleton, the suffix is the string itself.
      return new BytesRef(a.singleton);
   
    // reverse the language of the automaton, then reverse its common prefix.
    Automaton r = a.clone();
    reverse(r);
    r.determinize();
    BytesRef ref = SpecialOperations.getCommonPrefixBytesRef(r);
    reverseBytes(ref);
    return ref;
  }
 
  private static void reverseBytes(BytesRef ref) {
    if (ref.length <= 1) return;
    int num = ref.length >> 1;
    for (int i = ref.offset; i < ( ref.offset + num ); i++) {
      byte b = ref.bytes[i];
      ref.bytes[i] = ref.bytes[ref.offset * 2 + ref.length - i - 1];
      ref.bytes[ref.offset * 2 + ref.length - i - 1] = b;
    }
  }
 
  /**
   * Reverses the language of the given (non-singleton) automaton while returning
   * the set of new initial states.
   */
  public static Set<State> reverse(Automaton a) {
    a.expandSingleton();
    // reverse all edges
    HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();
    State[] states = a.getNumberedStates();
    Set<State> accept = new HashSet<State>();
    for (State s : states)
      if (s.isAccept())
        accept.add(s);
    for (State r : states) {
      m.put(r, new HashSet<Transition>());
      r.accept = false;
    }
    for (State r : states)
      for (Transition t : r.getTransitions())
        m.get(t.to).add(new Transition(t.min, t.max, r));
    for (State r : states) {
      Set<Transition> tr = m.get(r);
      r.setTransitions(tr.toArray(new Transition[tr.size()]));
    }
    // make new initial+final states
    a.initial.accept = true;
    a.initial = new State();
    for (State r : accept)
      a.initial.addEpsilon(r); // ensures that all initial states are reachable
    a.deterministic = false;
    a.clearNumberedStates();
    return accept;
  }
}
TOP

Related Classes of com.flaptor.org.apache.lucene.util.automaton.SpecialOperations

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.