Package de.susebox.jtopas.impl

Source Code of de.susebox.jtopas.impl.PatternMatcher$DataProviderCharSequence

/*
* PatternMatcher.java: Interface for pattern-aware tokenizers.
*
* Copyright (C) 2003 Heiko Blau
*
* This file belongs to the JTopas Library.
* JTopas is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along
* with JTopas. If not, write to the
*
*   Free Software Foundation, Inc.
*   59 Temple Place, Suite 330,
*   Boston, MA 02111-1307
*   USA
*
* or check the Internet: http://www.fsf.org
*
* Contact:
*   email: heiko@susebox.de
*/

package de.susebox.jtopas.impl;

//-----------------------------------------------------------------------------
// Imports
//
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.regex.PatternSyntaxException;

import de.susebox.jtopas.TokenizerProperty;
import de.susebox.jtopas.Flags;
import de.susebox.jtopas.TokenizerException;

import de.susebox.jtopas.spi.PatternHandler;
import de.susebox.jtopas.spi.DataProvider;


//-----------------------------------------------------------------------------
// Class PatternMatcher
//

/**<p>
* Implementation of the {@link PatternHandler} interface using the JDK 1.4
* package <code>java.util.regex</code>.
*</p>
*
* @author  Heiko Blau
*/
public class PatternMatcher implements PatternHandler {
 
  //---------------------------------------------------------------------------
  // Constructors
  //
 
  /**
   * The constructor takes a pattern and the {@link TokenizerProperty} object
   * associated with this instance of <code>PatternMatcher</code>. The global
   * flags are passed to control the behaviour for attributes that are not
   * specified in the property itself (e.g. case-sensitivity).
   *
   * @param   prop          the {@link TokenizerProperty} associated with this object
   * @param   globalFlags   flags that are to be used if not set explicitely in the property
   * @throws  NullPointerException if the given parameter is <code>null</code>
   */
  public PatternMatcher(TokenizerProperty prop, int globalFlags) throws NullPointerException {
    _globalFlags = globalFlags;
    setProperty(prop);
  }
 

  //---------------------------------------------------------------------------
  // Methods of the PatternHandler interface
  //
 
  /**
   * The method is a dummy implementation for the interface {@link PatternHandler}
   * and always returns <code>true</code>.
   *
   * @return  always <code>true</code>
   */
  public boolean hasPattern() {
    return true;
  }
 
  /**
   * This method checks if the start of a character range given through the
   * {@link DataProvider} matches a pattern. See {@link PatternHandler#matches}
   * for details.
   *
   * @param   dataProvider    the source to get the data from
   * @param   freePatternOnly if <code>true</code> only unbounded pattern should be
   *                          checked (pattern not enclosed in whitespaces, separators etc.)
   * @return  a {@link PatternHandler.Result} object or <code>null</code> if no
   *          match was found
   * @throws  TokenizerException    generic exception
   * @throws  NullPointerException  if no {@link DataProvider} is given
   */
  public PatternHandler.Result matches(DataProvider dataProvider)
    throws TokenizerException, NullPointerException
  {
    // invoke JDK 1.4 or jakarta regexp API
    try {
      String[]  groups;
     
      _matcher.reset(new DataProviderCharSequence(dataProvider));
      if (_matcher.lookingAt()) {
        if (_property.isFlagSet(Flags.F_RETURN_IMAGE_PARTS, (_globalFlags & Flags.F_RETURN_IMAGE_PARTS) != 0)) {
          // get the capturing groups
          groups = new String[_matcher.groupCount() + 1];
          for (int index = 0; index < groups.length; ++index) {
            groups[index] = _matcher.group(index);
          }
        } else {
          groups = new String[] {};
        }
        return new LocalResult(_property, _matcher.end(), groups);
      } else {
        return null;
      }
    } catch (Exception ex) {
      throw new TokenizerException(ex);
    }
  }

 
  //---------------------------------------------------------------------------
  // Methods
  //

  /**
   * Setting the {@link TokenizerProperty} for this <code>PatternMatcher</code>.
   * This method will recompile the regular expression pattern.
   *
   * @param   prop    the {@link TokenizerProperty} associated with this object
   * @throws  NullPointerException if the given parameter is <code>null</code>
   */
  public void setProperty(TokenizerProperty prop) throws NullPointerException {
    // no pattern given
    if (prop == null) {
      throw new NullPointerException("No property given.");
    } else if (prop.getImages() == null || prop.getImages().length < 1 || prop.getImages()[0] == null) {
      throw new NullPointerException("Property contains no pattern image.");
    }
   
    // compile the pattern
    int flags = Pattern.MULTILINE | Pattern.DOTALL;

    if (prop.isFlagSet(Flags.F_NO_CASE, (_globalFlags & Flags.F_NO_CASE) != 0)) {
      flags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
    }
    _matcher = Pattern.compile(prop.getImages()[0], flags).matcher("");

    // set property
    _property = prop;
  }
 
  /**
   * Retrieving the {@link TokenizerProperty} of this <code>PatternMatcher</code>.
   *
   * @return  the {@link TokenizerProperty} associated with this object
   */
  public TokenizerProperty getProperty() {
    return _property;
  }
 

  //---------------------------------------------------------------------------
  // Inner Classes
  //
 
  /**
   * The result of a match operation.
   */
  private final class LocalResult implements PatternHandler.Result {
   
    /**
     * The constructor gets all the nessecary parameters.
     *
     * @param prop          the pattern property
     * @param lengthOfMatch the detected number of characters that match the pattern
     * @param groups        array with the capturing groups
     */
    protected LocalResult(TokenizerProperty prop, int lengthOfMatch, String[] groups) {
      _property       = prop;
      _lengthOfMatch  = lengthOfMatch;
      _groups         = groups;
    }
   
    /**
     * Returns the capturing groups of a match.
     *
     * @return  the capturing groups of the last pattern match in {@link #matches}.
     */
    public String[] getGroups() throws TokenizerException {
      return _groups;
    }
   
    /**
     * Returns the number of characters that are part of a match.
     *
     * @return length of match
     */
    public int getLengthOfMatch() {
      return _lengthOfMatch;
    }
   
    /**
     * Returns the {@link TokenizerProperty} that describes the pattern that
     * matches data passed to {@link PatternHandler#matches}.
     *
     * @return the pattern property of a successful match
     */
    public TokenizerProperty getProperty() {
      return _property;
    }
   
    // member
    private TokenizerProperty _property;
    private int               _lengthOfMatch;
    private String[]          _groups;
  }

  /**
   * An implementation of the JDK 1.4 {@link java.lang.CharSequence} interface
   * backed by a {@link DataProvider}.
   */
  private final class DataProviderCharSequence implements CharSequence {
   
    /**
     * The constructor takes the reference to the {@link DataProvider}.
     *
     * @param dataProvider  the backing <code>DataProvider</code>
     */
    public DataProviderCharSequence(DataProvider dataProvider) {
      this(dataProvider, dataProvider.getStartPosition(), dataProvider.getLength());
    }
   
    /**
     * The constructor takes the reference to the {@link DataProvider}, the
     * start position and length. It is nessecary for the {@link #subSequence}
     * method
     *
     * @param dataProvider  the backing <code>DataProvider</code>
     */
    private DataProviderCharSequence(DataProvider dataProvider, int start, int length) {
      _dataProvider = dataProvider;
      _start        = start;
      _length       = length;
    }
   
    /**
     * Returns the character at the specified index.  An index ranges from zero
     * to <code>length() - 1</code>.  The first character of the sequence is at
     * index zero, the next at index one, and so on, as for array
     * indexing. </p>
     *
     * @param   index   the index of the character to be returned
     * @return  the specified character
     * @throws  ArrayIndexOutOfBoundsException
     *          if the <code>index</code> argument is negative or not less than
     *          <code>length()</code>
     */
    public char charAt(int index) throws ArrayIndexOutOfBoundsException {
      return _dataProvider.getCharAt(_start + index - _dataProvider.getStartPosition());
    }
   
    /** Returns the length of this character sequence.  The length is the number
     * of 16-bit Unicode characters in the sequence. </p>
     *
     * @return  the number of characters in this sequence
     *
     */
    public int length() {
      return _length;
    }
   
    /**
     * Returns a new character sequence that is a subsequence of this sequence.
     * See {@link java.lang.CharSequence#subSequence} for details.
     *
     * @param   start   the start index, inclusive
     * @param   end     the end index, exclusive
     * @return  the specified subsequence
     * @throws  IndexOutOfBoundsException
     *          if <code>start</code> or <code>end</code> are negative,
     *          if <code>end</code> is greater than <code>length()</code>,
     *          or if <code>start</code> is greater than <code>end</code>
     */
    public CharSequence subSequence(int start, int end) {
      if (start < 0 || end < 0 || end > length() || start > end) {
        throw new IndexOutOfBoundsException();
      }
      return new DataProviderCharSequence(_dataProvider, _start + start, end - start);
    }
   
    /**
     * Returns the string representation for the <code>DataProvider</code>.
     *
     * @return the string consisting of all available data in the DataProvider.
     */
    public String toString() {
      int realStart = _start - _dataProvider.getStartPosition();
     
      return _dataProvider.toString().substring(realStart, realStart + _length);
    }
   
    // members
    private DataProvider  _dataProvider = null;
    private int           _start        = 0;
    private int           _length       = 0;
  }

 
  //---------------------------------------------------------------------------
  // Members
  //
  private TokenizerProperty _property    = null;
  private Matcher           _matcher     = null;
  private int               _globalFlags = 0;
}
 
TOP

Related Classes of de.susebox.jtopas.impl.PatternMatcher$DataProviderCharSequence

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.