// Copyright 2012 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.collide.shared.util;
import com.google.gwt.regexp.shared.MatchResult;
import com.google.gwt.regexp.shared.RegExp;
/**
* Utility methods for text operations. This differs from {@link StringUtils} by
* operating on a higher-level (for example, words and identifiers).
*/
public class TextUtils {
/**
* Finds the next character which is not a mark or other character. Will
* return column if the end of the line is reached or column is a non-mark or
* other character.
*/
public static int findNextCharacterInclusive(String text, int column) {
MatchResult result = RegExpUtils.findMatchAfterIndex(
UnicodeUtils.regexpNotMarkOrOtherExcludingTabAndNewline, text, column - 1);
// if result is null, then it's likely we're at the \n (I think).
return result == null ? column : result.getIndex();
}
/**
* Finds the next character which is not a combining character.
*/
public static int findNonMarkNorOtherCharacter(String text, int column) {
/*
* If moving forward: if next character is combining mark, skip to next
* non-combining mark character, else go forward one character.
*/
if (column + 1 >= text.length()) {
return text.length() + 1;
}
MatchResult match = RegExpUtils.findMatchAfterIndex(
UnicodeUtils.regexpNotMarkOrOtherExcludingTabAndNewline, text, column);
if (match == null) {
return text.length() + 1;
} else {
return match.getIndex();
}
}
/**
* Finds the previous character which is not a combining character.
*/
public static int findPreviousNonMarkNorOtherCharacter(String text, int column) {
/*
* If moving backward: if previous character is combining mark, skip to
* before first non-combining mark character. If it isn't a combining mark,
* proceed back one character.
*/
if (column - 1 < 0) {
return -1;
}
MatchResult match = RegExpUtils.findMatchBeforeIndex(
UnicodeUtils.regexpNotMarkOrOtherExcludingTabAndNewline, text, column);
if (match == null) {
return -1;
} else {
return match.getIndex();
}
}
/**
* Finds the index of the next non-similar word. There are two groups of
* words: Javascript identifiers and the remaining non-whitespace characters.
*
* Consider the text "hello there". With {@code skipWhitespaceBeforeWord}
* true, the return value would be at the 't'. With it false, the return value
* would be at the ' '.
*
* Consider the text "someFunction(foo); // Test" and
* {@code skipWhitespaceBeforeWord} is true. findNextWord(text, 0) will return
* the index of the '(', since it is the first word that is not an identifier.
* findNextWord(text, 12) will return the 13 ('f' from "foo").
* findNextWord(text, 17) will return 19 ('/').
*
* @param skipWhitespaceBeforeWord true to skip the whitespace before the next
* word (thus returning the position of the first letter of the word),
* false to return the position of the first whitespace before the word
* @return the index according to {@code skipWhitespaceBeforeWord}, or if the
* given {@code column} is beyond the string's length, this will
* return the length plus one.
*/
public static int findNextWord(String text, int column, boolean skipWhitespaceBeforeWord) {
if (column + 1 >= text.length()) {
return text.length() + 1;
}
int initialColumn = column;
if (skipWhitespaceBeforeWord) {
column = skipNonwhitespaceSimilar(text, column, true);
column = skipWhitespace(text, column, true);
} else {
column = skipWhitespace(text, column, true);
column = skipNonwhitespaceSimilar(text, column, true);
}
return column;
}
/**
* Counts number of whitespaces at the beginning of line.
*/
public static int countWhitespacesAtTheBeginningOfLine(String text) {
MatchResult result = RegExpUtils.findMatchAfterIndex(
UnicodeUtils.regexpNotWhitespaceExcludingNewlineAndCarriageReturn, text, -1);
return result == null ? text.length() : result.getIndex();
}
/**
* Similar to {@link #findNextWord}, but searches backwards.
*
* <p>Character at {@code column} position is ignored, because it denotes the
* symbol after "cursor".
*/
public static int findPreviousWord(String text, int column, boolean skipWhitespaceBeforeWord) {
column--;
if (column < 0) {
return -1;
}
if (skipWhitespaceBeforeWord) {
column = skipNonwhitespaceSimilar(text, column, false);
column = skipWhitespace(text, column, false);
} else {
column = skipWhitespace(text, column, false);
if (column >= 0) {
column = skipNonwhitespaceSimilar(text, column, false);
}
}
column++;
return column;
}
/**
* Jumps to the previous or next best match given the parameters below. This
* may be inside the current word. For example, if the cursor is at index 1 in
* "hey bob", and moveByWord is called with returnCursorAtEnd=true, then the
* returned value will be 2 (y). If returnCursorAtEnd is false, it would
* return 4 (b).
*
* @param column the start column for the search
* @param forward true for forward match, false for backwards match
* @param returnCursorAtEnd if true, the cursor position returned will be for
* the last character of the next/previous word found
* @return the calculated column, -1 for no valid match found
*/
/*
* TODO: Make sure we look at this, it is only used by the {@link
* VimScheme} and I think it can be made significantly less complicated as
* well as use the {@link #findNextWord(String, int, boolean)} and {@link
* #findPreviousWord(String, int, boolean)} API.
*/
public static int moveByWord(
String text, int column, boolean forward, boolean returnCursorAtEnd) {
int curColumn = column;
int length = text.length();
int direction = forward ? 1 : -1;
boolean farWordEnd =
((direction == 1 && returnCursorAtEnd) || (direction == -1 && !returnCursorAtEnd));
boolean foundEarlyMatch = false;
if (!UnicodeUtils.isWhitespace(text.charAt(curColumn))) {
// land on the first whitespace character after the last letter
curColumn = skipNonwhitespaceSimilar(text, curColumn, forward);
if (farWordEnd && curColumn - direction != column) {
// found a match within the same word
curColumn -= direction; // go back to last non-whitespace character
foundEarlyMatch = true;
}
}
if (!foundEarlyMatch && curColumn >= 0 && curColumn < length) {
// land on the first non-whitespace character of the next word
curColumn = skipWhitespace(text, curColumn, forward);
if (farWordEnd && curColumn >= 0 && curColumn < length) {
// land on the last non-whitespace character of the next word
curColumn = skipNonwhitespaceSimilar(text, curColumn, forward) - direction;
}
}
if (curColumn < 0 || curColumn >= length) {
return -1;
}
return curColumn;
}
/**
* Returns the entire word that the cursor at {@code column} falls into, or
* null if the cursor is over whitespace.
*/
public static String getWordAtColumn(String text, int column) {
if (UnicodeUtils.isWhitespace(text.charAt(column))) {
return null;
}
int leftColumn = skipNonwhitespaceSimilar(text, column, false) + 1;
int rightColumn = skipNonwhitespaceSimilar(text, column, true);
if (leftColumn >= 0 && rightColumn < text.length()) {
return text.substring(leftColumn, rightColumn);
}
return null;
}
public static boolean isValidIdentifierCharacter(char c) {
return !RegExpUtils.resetAndTest(
UnicodeUtils.regexpNotJavascriptIdentifierCharacter, String.valueOf(c));
}
public static boolean isNonIdentifierAndNonWhitespace(char c) {
return RegExpUtils.resetAndTest(UnicodeUtils.regexpIdentifierOrWhitespace, String.valueOf(c));
}
private static int skipIdentifier(String text, int column, boolean forward) {
return directionalRegexp(
forward, UnicodeUtils.regexpNotJavascriptIdentifierCharacter, text, column);
}
public static int skipNonwhitespaceNonidentifier(String text, int column, boolean forward) {
if (column >= 0 && column < text.length()) {
return directionalRegexp(forward, UnicodeUtils.regexpIdentifierOrWhitespace, text, column);
}
return column;
}
private static int skipNonwhitespaceSimilar(String text, int column, boolean forward) {
if (isValidIdentifierCharacter(text.charAt(column))) {
return skipIdentifier(text, column, forward);
} else {
return skipNonwhitespaceNonidentifier(text, column, forward);
}
}
private static int skipWhitespace(String text, int column, boolean forward) {
// we only execute the whitespace skip if the current character is in fact
// whitespace
if (column >= 0 && column < text.length() && UnicodeUtils.isWhitespace(text.charAt(column))) {
return directionalRegexp(forward, UnicodeUtils.regexpNotWhitespace, text, column);
}
return column;
}
/**
* Depending on the supplied direction, it will call either
* findMatchAfterIndex or findMatchBeforeIndex. Once the result is obtained it
* will return either the match index or the appropriate bound column
* (text.length() or -1).
*/
private static int directionalRegexp(boolean forward, RegExp regexp, String text, int column) {
MatchResult result =
forward ? RegExpUtils.findMatchAfterIndex(regexp, text, column)
: RegExpUtils.findMatchBeforeIndex(regexp, text, column);
int fallback = forward ? text.length() : -1;
return result == null ? fallback : result.getIndex();
}
}