Package gnu.regexp

Source Code of gnu.regexp.RESyntax

/*
*  gnu/regexp/RESyntax.java
*  Copyright (C) 1998 Wes Biggs
*
*  This library is free software; you can redistribute it and/or modify
*  it under the terms of the GNU Library General Public License as published
*  by the Free Software Foundation; either version 2 of the License, or
*  (at your option) any later version.
*
*  This library is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Library General Public License for more details.
*
*  You should have received a copy of the GNU Library General Public License
*  along with this program; if not, write to the Free Software
*  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

package gnu.regexp;
import java.util.BitSet;

/**
* An RESyntax specifies the way a regular expression will be compiled.
* This class provides a number of predefined useful constants for
* emulating popular regular expression syntaxes.  Additionally the
* user may construct his or her own syntax, using any combination of the
* syntax bit constants.  The syntax is an optional argument to any of the
* matching methods on class RE.
*
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
*/

public class RESyntax {
  private BitSet bits;

  // Values for constants are bit indexes

  /**
   * Syntax bit. Backslash is an escape character in lists.
   */
  public static final int RE_BACKSLASH_ESCAPE_IN_LISTS =  0;

  /**
   * Syntax bit. Use \? instead of ? and \+ instead of +.
   */
  public static final int RE_BK_PLUS_QM                =  1;

  /**
   * Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
   */
  public static final int RE_CHAR_CLASSES              =  2;

  /**
   * Syntax bit. ^ and $ are special everywhere.
   * <B>Not implemented.</B>
   */
  public static final int RE_CONTEXT_INDEP_ANCHORS     =  3;

  /**
   * Syntax bit. Repetition operators are only special in valid positions.
   * <B>Not implemented.</B>
   */
  public static final int RE_CONTEXT_INDEP_OPS         =  4;

  /**
   * Syntax bit. Repetition and alternation operators are invalid
   * at start and end of pattern and other places.
   * <B>Not implemented</B>.
   */
  public static final int RE_CONTEXT_INVALID_OPS       =  5;

  /**
   * Syntax bit. Match-any-character operator (.) matches a newline.
   */
  public static final int RE_DOT_NEWLINE               =  6;

  /**
   * Syntax bit. Match-any-character operator (.) does not match a null.
   */
  public static final int RE_DOT_NOT_NULL              =  7;

  /**
   * Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
   */
  public static final int RE_INTERVALS                 =  8;

  /**
   * Syntax bit. No alternation (|), match one-or-more (+), or
   * match zero-or-one (?) operators.
   */
  public static final int RE_LIMITED_OPS               =  9;

  /**
   * Syntax bit. Newline is an alternation operator.
   */
  public static final int RE_NEWLINE_ALT               = 10; // impl.

  /**
   * Syntax bit. Intervals use { } instead of \{ \}
   */
  public static final int RE_NO_BK_BRACES              = 11;

  /**
   * Syntax bit. Grouping uses ( ) instead of \( \).
   */
  public static final int RE_NO_BK_PARENS              = 12;

  /**
   * Syntax bit. Backreferences not allowed.
   */
  public static final int RE_NO_BK_REFS                = 13;

  /**
   * Syntax bit. Alternation uses | instead of \|
   */
  public static final int RE_NO_BK_VBAR                = 14;

  /**
   * Syntax bit. <B>Not implemented</B>.
   */
  public static final int RE_NO_EMPTY_RANGES           = 15;

  /**
   * Syntax bit. An unmatched right parenthesis (')' or '\)', depending
   * on RE_NO_BK_PARENS) will throw an exception when compiling.
   */
  public static final int RE_UNMATCHED_RIGHT_PAREN_ORD = 16;

  /**
   * Syntax bit. <B>Not implemented.</B>
   */
  public static final int RE_HAT_LISTS_NOT_NEWLINE     = 17;

  /**
   * Syntax bit.  Stingy matching is allowed (+?, *?, ??, {x,y}?).
   */
  public static final int RE_STINGY_OPS                = 18;

  /**
   * Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
   */
  public static final int RE_CHAR_CLASS_ESCAPES        = 19;

  /**
   * Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
   */
  public static final int RE_PURE_GROUPING             = 20;

  /**
   * Syntax bit. <B>Not implemented</B>.
   */
  public static final int RE_LOOKAHEAD                 = 21;

  /**
   * Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
   */
  public static final int RE_STRING_ANCHORS            = 22;

  /**
   * Syntax bit. Allow embedded comments, (#comment), as in Perl5.
   */
  public static final int RE_COMMENTS                  = 23;

  /**
   * Syntax bit. Allow character class escapes within lists, as in Perl5.
   */
  public static final int RE_CHAR_CLASS_ESC_IN_LISTS   = 24;

  private static final int BIT_TOTAL                   = 25;

  /**
   * Predefined syntax.
   * Emulates regular expression support in the awk utility.
   */
  public static final RESyntax RE_SYNTAX_AWK;

  /**
   * Predefined syntax.
   * Emulates regular expression support in the ed utility.
   */
  public static final RESyntax RE_SYNTAX_ED;

  /**
   * Predefined syntax.
   * Emulates regular expression support in the egrep utility.
   */
  public static final RESyntax RE_SYNTAX_EGREP;

  /**
   * Predefined syntax.
   * Emulates regular expression support in the GNU Emacs editor.
   */
  public static final RESyntax RE_SYNTAX_EMACS;

  /**
   * Predefined syntax.
   * Emulates regular expression support in the grep utility.
   */
  public static final RESyntax RE_SYNTAX_GREP;

  /**
   * Predefined syntax.
   * Emulates regular expression support in the POSIX awk specification.
   */
  public static final RESyntax RE_SYNTAX_POSIX_AWK;

  /**
   * Predefined syntax.
   * Emulates POSIX basic regular expression support.
   */
  public static final RESyntax RE_SYNTAX_POSIX_BASIC;

  /**
   * Predefined syntax.
   * Emulates regular expression support in the POSIX egrep specification.
   */
  public static final RESyntax RE_SYNTAX_POSIX_EGREP;

  /**
   * Predefined syntax.
   * Emulates POSIX extended regular expression support.
   */
  public static final RESyntax RE_SYNTAX_POSIX_EXTENDED;

  /**
   * Predefined syntax.
   * Emulates POSIX basic minimal regular expressions.
   */
  public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_BASIC;

  /**
   * Predefined syntax.
   * Emulates POSIX extended minimal regular expressions.
   */
  public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_EXTENDED;

  /**
   * Predefined syntax.
   * Emulates regular expression support in the sed utility.
   */
  public static final RESyntax RE_SYNTAX_SED;

  /**
   * Predefined syntax.
   * Emulates regular expression support in Larry Wall's perl, version 4,
   */
  public static final RESyntax RE_SYNTAX_PERL4;

  /**
   * Predefined syntax.
   * Emulates regular expression support in Larry Wall's perl, version 4,
   * using single line mode (/s modifier).
   */
  public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)

  /**
   * Predefined syntax.
   * Emulates regular expression support in Larry Wall's perl, version 5.
   */
  public static final RESyntax RE_SYNTAX_PERL5; 

  /**
   * Predefined syntax.
   * Emulates regular expression support in Larry Wall's perl, version 5,
   * using single line mode (/s modifier).
   */
  public static final RESyntax RE_SYNTAX_PERL5_S;
 
  static {
    // Define syntaxes

    RE_SYNTAX_EMACS = new RESyntax();

    RESyntax RE_SYNTAX_POSIX_COMMON = new RESyntax()
      .set(RE_CHAR_CLASSES)
      .set(RE_DOT_NEWLINE)
      .set(RE_DOT_NOT_NULL)
      .set(RE_INTERVALS)
      .set(RE_NO_EMPTY_RANGES);

    RE_SYNTAX_POSIX_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
      .set(RE_BK_PLUS_QM);

    RE_SYNTAX_POSIX_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
      .set(RE_CONTEXT_INDEP_ANCHORS)
      .set(RE_CONTEXT_INDEP_OPS)
      .set(RE_NO_BK_BRACES)
      .set(RE_NO_BK_PARENS)
      .set(RE_NO_BK_VBAR)
      .set(RE_UNMATCHED_RIGHT_PAREN_ORD);

    RE_SYNTAX_AWK = new RESyntax()
      .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
      .set(RE_DOT_NOT_NULL)
      .set(RE_NO_BK_PARENS)
      .set(RE_NO_BK_REFS)
      .set(RE_NO_BK_VBAR)
      .set(RE_NO_EMPTY_RANGES)
      .set(RE_UNMATCHED_RIGHT_PAREN_ORD);
   
    RE_SYNTAX_POSIX_AWK = new RESyntax(RE_SYNTAX_POSIX_EXTENDED)
      .set(RE_BACKSLASH_ESCAPE_IN_LISTS);
   
    RE_SYNTAX_GREP = new RESyntax()
      .set(RE_BK_PLUS_QM)
      .set(RE_CHAR_CLASSES)
      .set(RE_HAT_LISTS_NOT_NEWLINE)
      .set(RE_INTERVALS)
      .set(RE_NEWLINE_ALT);

    RE_SYNTAX_EGREP = new RESyntax()
      .set(RE_CHAR_CLASSES)
      .set(RE_CONTEXT_INDEP_ANCHORS)
      .set(RE_CONTEXT_INDEP_OPS)
      .set(RE_HAT_LISTS_NOT_NEWLINE)
      .set(RE_NEWLINE_ALT)
      .set(RE_NO_BK_PARENS)
      .set(RE_NO_BK_VBAR);
   
    RE_SYNTAX_POSIX_EGREP = new RESyntax(RE_SYNTAX_EGREP)
      .set(RE_INTERVALS)
      .set(RE_NO_BK_BRACES);
   
    /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
   
    RE_SYNTAX_ED = new RESyntax(RE_SYNTAX_POSIX_BASIC);
   
    RE_SYNTAX_SED = new RESyntax(RE_SYNTAX_POSIX_BASIC);
   
    RE_SYNTAX_POSIX_MINIMAL_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
      .set(RE_LIMITED_OPS);
   
    /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
       replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */

    RE_SYNTAX_POSIX_MINIMAL_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
      .set(RE_CONTEXT_INDEP_ANCHORS)
      .set(RE_CONTEXT_INVALID_OPS)
      .set(RE_NO_BK_BRACES)
      .set(RE_NO_BK_PARENS)
      .set(RE_NO_BK_REFS)
      .set(RE_NO_BK_VBAR)
      .set(RE_UNMATCHED_RIGHT_PAREN_ORD);
   
    /* There is no official Perl spec, but here's a "best guess" */
   
    RE_SYNTAX_PERL4 = new RESyntax()
      .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
      .set(RE_CONTEXT_INDEP_ANCHORS)
      .set(RE_CONTEXT_INDEP_OPS)          // except for '{', apparently
      .set(RE_INTERVALS)
      .set(RE_NO_BK_BRACES)
      .set(RE_NO_BK_PARENS)
      .set(RE_NO_BK_VBAR)
      .set(RE_NO_EMPTY_RANGES)
      .set(RE_CHAR_CLASS_ESCAPES);    // \d,\D,\w,\W,\s,\S

    RE_SYNTAX_PERL4_S = new RESyntax(RE_SYNTAX_PERL4)
      .set(RE_DOT_NEWLINE);
   
    RE_SYNTAX_PERL5 = new RESyntax(RE_SYNTAX_PERL4)
      .set(RE_PURE_GROUPING)          // (?:)
      .set(RE_STINGY_OPS)             // *?,??,+?,{}?
      .set(RE_LOOKAHEAD)              // (?=)(?!) not implemented
      .set(RE_STRING_ANCHORS)         // \A,\Z
      .set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within []
      .set(RE_COMMENTS);              // (?#)
   
    RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5)
      .set(RE_DOT_NEWLINE);
  }


  /**
   * Construct a new syntax object with all bits turned off.
   * This is equivalent to RE_SYNTAX_EMACS.
   */
  public RESyntax() {
    bits = new BitSet(BIT_TOTAL);
  }

  /**
   * Construct a new syntax object with all bits set the same
   * as the other syntax.
   */
  public RESyntax(RESyntax other) {
    bits = (BitSet) other.bits.clone();
  }

  /**
   * Check if a given bit is set in this syntax.
   */
  public boolean get(int index) {
    return bits.get(index);
  }

  /**
   * Set a given bit in this syntax.  Returns a reference to this syntax
   * for easy chaining.
   */
  public RESyntax set(int index) {
    bits.set(index);
    return this;
  }
}
TOP

Related Classes of gnu.regexp.RESyntax

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.