Source Code of com.google.collide.shared.util.TextUtils

// Copyright 2012 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.google.collide.shared.util;


import com.google.gwt.regexp.shared.MatchResult;
import com.google.gwt.regexp.shared.RegExp;


/**
 * Utility methods for text operations. This differs from {@link StringUtils} by
 * operating on a higher-level (for example, words and identifiers).
 */
public class TextUtils {


  /**
   * Finds the next character which is not a mark or other character. Will
   * return column if the end of the line is reached or column is a non-mark or
   * other character.
   */
  public static int findNextCharacterInclusive(String text, int column) {
    MatchResult result = RegExpUtils.findMatchAfterIndex(
        UnicodeUtils.regexpNotMarkOrOtherExcludingTabAndNewline, text, column - 1);
    // if result is null, then it's likely we're at the \n (I think).
    return result == null ? column : result.getIndex();
  }


  /**
   * Finds the next character which is not a combining character.
   */
  public static int findNonMarkNorOtherCharacter(String text, int column) {
    /*
     * If moving forward: if next character is combining mark, skip to next
     * non-combining mark character, else go forward one character.
     */
    if (column + 1 >= text.length()) {
      return text.length() + 1;
    }


    MatchResult match = RegExpUtils.findMatchAfterIndex(
        UnicodeUtils.regexpNotMarkOrOtherExcludingTabAndNewline, text, column);
    if (match == null) {
      return text.length() + 1;
    } else {
      return match.getIndex();
    }
  }
  
  /**
   * Finds the previous character which is not a combining character.
   */
  public static int findPreviousNonMarkNorOtherCharacter(String text, int column) {
    /*
     * If moving backward: if previous character is combining mark, skip to
     * before first non-combining mark character. If it isn't a combining mark,
     * proceed back one character.
     */
    if (column - 1 < 0) {
      return -1;
    }


    MatchResult match = RegExpUtils.findMatchBeforeIndex(
        UnicodeUtils.regexpNotMarkOrOtherExcludingTabAndNewline, text, column);
    if (match == null) {
      return -1;
    } else {
      return match.getIndex();
    }
  }


  /**
   * Finds the index of the next non-similar word. There are two groups of
   * words: Javascript identifiers and the remaining non-whitespace characters.
   *
   * Consider the text "hello there". With {@code skipWhitespaceBeforeWord}
   * true, the return value would be at the 't'. With it false, the return value
   * would be at the ' '.
   *
   * Consider the text "someFunction(foo); // Test" and 
   * {@code skipWhitespaceBeforeWord} is true. findNextWord(text, 0) will return
   * the index of the '(', since it is the first word that is not an identifier.
   * findNextWord(text, 12) will return the 13 ('f' from "foo").
   * findNextWord(text, 17) will return 19 ('/').
   *
   * @param skipWhitespaceBeforeWord true to skip the whitespace before the next
   *        word (thus returning the position of the first letter of the word),
   *        false to return the position of the first whitespace before the word
   * @return the index according to {@code skipWhitespaceBeforeWord}, or if the
   *         given {@code column} is beyond the string's length, this will
   *         return the length plus one.
   */
  public static int findNextWord(String text, int column, boolean skipWhitespaceBeforeWord) {
    if (column + 1 >= text.length()) {
      return text.length() + 1;
    }


    int initialColumn = column;
    if (skipWhitespaceBeforeWord) {
      column = skipNonwhitespaceSimilar(text, column, true);
      column = skipWhitespace(text, column, true);
    } else {
      column = skipWhitespace(text, column, true);
      column = skipNonwhitespaceSimilar(text, column, true);
    }


    return column;
  }


  /**
   * Counts number of whitespaces at the beginning of line.
   */
  public static int countWhitespacesAtTheBeginningOfLine(String text) {
    MatchResult result = RegExpUtils.findMatchAfterIndex(
        UnicodeUtils.regexpNotWhitespaceExcludingNewlineAndCarriageReturn, text, -1);


    return result == null ? text.length() : result.getIndex();
  }


  /**
   * Similar to {@link #findNextWord}, but searches backwards.
   *
   * <p>Character at {@code column} position is ignored, because it denotes the
   * symbol after "cursor".
   */
  public static int findPreviousWord(String text, int column, boolean skipWhitespaceBeforeWord) {
    column--;


    if (column < 0) {
      return -1;
    }


    if (skipWhitespaceBeforeWord) {
      column = skipNonwhitespaceSimilar(text, column, false);
      column = skipWhitespace(text, column, false);
    } else {
      column = skipWhitespace(text, column, false);
      if (column >= 0) {
        column = skipNonwhitespaceSimilar(text, column, false);
      }
    }


    column++;


    return column;
  }


  /**
   * Jumps to the previous or next best match given the parameters below. This
   * may be inside the current word. For example, if the cursor is at index 1 in
   * "hey bob", and moveByWord is called with returnCursorAtEnd=true, then the
   * returned value will be 2 (y). If returnCursorAtEnd is false, it would
   * return 4 (b).
   *
   * @param column the start column for the search
   * @param forward true for forward match, false for backwards match
   * @param returnCursorAtEnd if true, the cursor position returned will be for
   *        the last character of the next/previous word found
   * @return the calculated column, -1 for no valid match found
   */
  /*
   * TODO: Make sure we look at this, it is only used by the {@link
   * VimScheme} and I think it can be made significantly less complicated as
   * well as use the {@link #findNextWord(String, int, boolean)} and {@link
   * #findPreviousWord(String, int, boolean)} API.
   */
  public static int moveByWord(
      String text, int column, boolean forward, boolean returnCursorAtEnd) {
    int curColumn = column;
    int length = text.length();
    int direction = forward ? 1 : -1;
    boolean farWordEnd =
        ((direction == 1 && returnCursorAtEnd) || (direction == -1 && !returnCursorAtEnd));
    boolean foundEarlyMatch = false;


    if (!UnicodeUtils.isWhitespace(text.charAt(curColumn))) {
      // land on the first whitespace character after the last letter
      curColumn = skipNonwhitespaceSimilar(text, curColumn, forward);
      if (farWordEnd && curColumn - direction != column) {
        // found a match within the same word
        curColumn -= direction; // go back to last non-whitespace character
        foundEarlyMatch = true;
      }
    }


    if (!foundEarlyMatch && curColumn >= 0 && curColumn < length) {
      // land on the first non-whitespace character of the next word
      curColumn = skipWhitespace(text, curColumn, forward);
      if (farWordEnd && curColumn >= 0 && curColumn < length) {
        // land on the last non-whitespace character of the next word
        curColumn = skipNonwhitespaceSimilar(text, curColumn, forward) - direction;
      }
    }


    if (curColumn < 0 || curColumn >= length) {
      return -1;
    }
    return curColumn;
  }


  /**
   * Returns the entire word that the cursor at {@code column} falls into, or
   * null if the cursor is over whitespace.
   */
  public static String getWordAtColumn(String text, int column) {
    if (UnicodeUtils.isWhitespace(text.charAt(column))) {
      return null;
    }
    int leftColumn = skipNonwhitespaceSimilar(text, column, false) + 1;
    int rightColumn = skipNonwhitespaceSimilar(text, column, true);
    if (leftColumn >= 0 && rightColumn < text.length()) {
      return text.substring(leftColumn, rightColumn);
    }
    return null;
  }


  public static boolean isValidIdentifierCharacter(char c) {
    return !RegExpUtils.resetAndTest(
        UnicodeUtils.regexpNotJavascriptIdentifierCharacter, String.valueOf(c));
  }
  
  public static boolean isNonIdentifierAndNonWhitespace(char c) {
    return RegExpUtils.resetAndTest(UnicodeUtils.regexpIdentifierOrWhitespace, String.valueOf(c));
  }


  private static int skipIdentifier(String text, int column, boolean forward) {
    return directionalRegexp(
        forward, UnicodeUtils.regexpNotJavascriptIdentifierCharacter, text, column);
  }


  public static int skipNonwhitespaceNonidentifier(String text, int column, boolean forward) {
    if (column >= 0 && column < text.length()) {
      return directionalRegexp(forward, UnicodeUtils.regexpIdentifierOrWhitespace, text, column);
    }
    return column;
  }


  private static int skipNonwhitespaceSimilar(String text, int column, boolean forward) {
    if (isValidIdentifierCharacter(text.charAt(column))) {
      return skipIdentifier(text, column, forward);
    } else {
      return skipNonwhitespaceNonidentifier(text, column, forward);
    }
  }


  private static int skipWhitespace(String text, int column, boolean forward) {
    // we only execute the whitespace skip if the current character is in fact
    // whitespace
    if (column >= 0 && column < text.length() && UnicodeUtils.isWhitespace(text.charAt(column))) {
      return directionalRegexp(forward, UnicodeUtils.regexpNotWhitespace, text, column);
    }
    return column;
  }


  /**
   * Depending on the supplied direction, it will call either
   * findMatchAfterIndex or findMatchBeforeIndex. Once the result is obtained it
   * will return either the match index or the appropriate bound column
   * (text.length() or -1).
   */
  private static int directionalRegexp(boolean forward, RegExp regexp, String text, int column) {
    MatchResult result =
        forward ? RegExpUtils.findMatchAfterIndex(regexp, text, column)
            : RegExpUtils.findMatchBeforeIndex(regexp, text, column);
    int fallback = forward ? text.length() : -1;
    return result == null ? fallback : result.getIndex();
  }
}
Source Code of com.google.collide.shared.util.TextUtils

Related Classes of com.google.collide.shared.util.TextUtils