// Copyright 2012 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.collide.client.document.linedimensions;
import com.google.collide.client.document.linedimensions.ColumnOffsetCache.ColumnOffset;
import com.google.collide.client.util.dom.FontDimensionsCalculator;
import com.google.collide.client.util.dom.FontDimensionsCalculator.FontDimensions;
import com.google.collide.json.shared.JsonArray;
import com.google.collide.json.shared.JsonStringMap;
import com.google.collide.shared.Pair;
import com.google.collide.shared.document.Document;
import com.google.collide.shared.document.Line;
import com.google.collide.shared.document.TextChange;
import com.google.collide.shared.document.Document.PreTextListener;
import com.google.collide.shared.document.Document.TextListener;
import com.google.collide.shared.document.TextChange.Type;
import com.google.collide.shared.util.JsonCollections;
import com.google.collide.shared.util.StringUtils;
import com.google.collide.shared.util.UnicodeUtils;
import com.google.collide.shared.util.ListenerRegistrar.RemoverManager;
import com.google.gwt.regexp.shared.MatchResult;
import com.google.gwt.regexp.shared.RegExp;
/**
* An object which can accurately measure a {@link Line} and map X coordinates
* to columns and vice versa.
*/
/*
* TL;DR;
*
* We have a fast regex to tell whether a line can use the naive calculations
* for column/x. If it doesn't we have to put it in a span and measure special
* characters then store how much they effect the width of our columns. Tabs
* which are prefixing indentation and suffix carraige returns are special cased
* and don't require measurements.
*/
/*
* Implementation Details
*
* There are three states a line can be in:
*
* 1. Unknown, this is represented by the lack of a NEEDS_OFFSET tag on the
* Line. This is the default line state and means that a user (or this class)
* has yet to visit this line in the document.
*
* 2. No offset needed (false NEEDS_OFFSET value), this state indicates that
* we've visited the line and decided it has no characters which warrant special
* attention. Prefix tabs and suffix carriage returns are included in this state
* since they are a common case. If they exist they will be handled in some very
* simple offset code.
*
* 3. Offset needed (true NEEDS_OFFSET value), this state indicates that there
* are special characters within this line. This includes tabs which appear in
* the middle of a line or carriage-returns which aren't at the end followed by
* a \n. In this state a ColumnOffsetCache is built and put onto the line. It is
* lazily constructed as columns are requested.
*
* ColumnOffsetCache internals:
*
* Once built and attached to a line, the column offset cache maintains a cache
* entry for each special character on a line. The entry will be made for the
* column immediately following the special character and the x value of that
* entry will represent the left edge of that column (the right edge of the
* column with the special character).
*
* All examples assume double-wide characters are 10px and normal characters are
* 5px wide, combining marks are 0px.
*
* For example, if the line is "烏烏龍茶\n" the cache will contain:
*
* [{column: 1, x: 10px }, {column: 2, x: 20px }, {column: 3, x: 30px },
* {column: 4, x: 40px }]
*
* Note that this last entry exists regardless of the \n existing. So the last
* entry may be a column which does not exist in the string.
*
* Using the example, when column 2's x is requested (that is the third
* character in the string), entry [1] will be pulled from the cache and the
* returned left edge will be 20px.
*
* A cache entry is only created for special width characters, any interesting
* character that turns out to be our same width is ignored as well.
*
* For example in the case of "烏aaa烏" the cache will contain:
*
* [{column: 1, x: 10px }, {column: 5, x: 25px }]
*
* In this example if we were interested in column 2 (that is the 3rd character
* in the string or 2nd 'a' character), entry [1] will be pulled from cache,
* then 10px + 5px will be added together and a left edge of 15px will be
* returned.
*
* An important note on combining marks:
*
* Combining marks and some similar characters, show up in the cache as 0 width
* characters. That is when they are measured they add no additional width to
* the string and function as a zero width column. They will still contain an
* entry in cache but it will be marked as isZeroWidth and the x value will
* correspond to the same x value of the previous column.
*
* For example, if the line is "à=à" (note the first a has a combining mark so
* the string is really more like "a`=à" the cache will contain:
*
* [{column: 2, x: 5px, isZeroWidth: true }]
*
* That means if we look up say character 4 (the last à). We will take 5px and
* add 5px to get 10px which would put us to the right of the '=' when rendered.
*
* How the cache is built:
*
* We scan the string for any characters of interest (we vaguely define that as
* any character not part of the original latin alphabet so character code >
* 255). We then measure the entire string up to and including that character.
* If the string matches the length we'd expect, then we cache that the
* character is normal but do not create an offset cache entry. Otherwise we
* store the width of the character so we don't have to measure for it again,
* then create an offset cache entry to denote that this character affects
* columns past it since it has an odd width.
*
* Current Limitations:
*
* Some combining marks combine in both directions (this is mostly script type
* languages). This means they can affect the width of columns before themselves
* (aka make the character the combine width smaller). We have a hack to
* mitigate this but really there's only so much we can do.
*
* Measuring can be expensive in long strings containing a large number of
* different special characters. So if you paste a 500 character string of
* Katakana, then click at the very end, prepare for a small wait while we catch
* up. The good news is that will be mitigated the second time since each of
* those character's width is cached and we won't have to layout again.
*
* Zoom makes us wipe the entire cache (sucks), Unfortunately this can't be
* avoided as different character's scale at different factors (double sucks).
* So we just clear our cache and go about rebuilding.
*
* Further comments:
*
* I'm not sure how docs does it but they seem pretty quick, they bite it hard
* on combining marks (the cursor just moves over the letter), but they do
* handle wider characters fine. The one thing they do have is Arabic makes the
* cursor go right-to-left but still combining mark's don't quite work and in
* fact some don't render correctly at all (which do otherwise).
*/
public class LineDimensionsCalculator {
/**
* Creates a new {@link LineDimensionsCalculator} from a
* {@link FontDimensionsCalculator}.
*/
public static LineDimensionsCalculator create(FontDimensionsCalculator fontCalculator) {
final LineDimensionsCalculator calculator =
new LineDimensionsCalculator(new BrowserMeasurementProvider(fontCalculator));
// add a listener so that we can clear our cache if the dimensions change.
fontCalculator.addCallback(new FontDimensionsCalculator.Callback() {
@Override
public void onFontDimensionsChanged(FontDimensions fontDimensions) {
LineDimensionsCalculator.clearCharacterCacheDueToZoomChange();
}
});
return calculator;
}
/**
* Creates a new {@link LineDimensionsCalculator} with a custom
* {@link MeasurementProvider}.
*/
static LineDimensionsCalculator createWithCustomProvider(MeasurementProvider provider) {
return new LineDimensionsCalculator(provider);
}
/**
* Specifies how a X-to-column conversion determines the column if the X isn't on the exact column
* boundary.
*/
public enum RoundingStrategy {
ROUND, FLOOR, CEIL;
public int apply(double value) {
switch (this) {
case ROUND:
return (int) Math.round(value);
case FLOOR:
return (int) Math.floor(value);
case CEIL:
return (int) Math.ceil(value);
default:
throw new IllegalStateException("Unexpected value for RoundingStrategy");
}
}
}
/**
* A cache used to cache the width of special characters. Would be final
* except there isn't a fast way to clear a map.
*/
private static JsonStringMap<Double> characterWidthCache = JsonCollections.createMap();
/**
* A listener which notifies us of dirty lines. We only have to handle the
* case where the endLine != startLine since the startLine is handled in the
* preTextListener.
*/
private static TextListener textListener = new TextListener() {
@Override
public void onTextChange(com.google.collide.shared.document.Document document,
JsonArray<TextChange> textChanges) {
for (int i = 0; i < textChanges.size(); i++) {
TextChange change = textChanges.get(i);
if (change.getEndLine() != change.getLine()) {
LineDimensionsUtils.isOffsetNeededAndCache(
change.getEndLine(), change.getEndColumn(), change.getType());
}
}
}
};
/**
* A listener which allows us to mark the cache dirty before a text change
* actually takes place.
*/
private static PreTextListener preTextListener = new PreTextListener() {
@Override
public void onPreTextChange(Document document,
Type type,
Line line,
int lineNumber,
int column,
String text) {
/*
* In the case where text is deleted, we only need to mark ourselves dirty
* if there is already an OffsetCache. The insert case though requires
* looking at the newly typed text for special characters.
*/
LineDimensionsUtils.preTextIsOffsetNeededAndCache(line, column, type, text);
}
};
private final RemoverManager listenerManager = new RemoverManager();
private final MeasurementProvider measurementProvider;
private LineDimensionsCalculator(MeasurementProvider measurementProvider) {
this.measurementProvider = measurementProvider;
}
/**
* Sets the currently opened document so we can listen for mutations.
*/
public void handleDocumentChange(Document newDocument) {
// Remove old document listener
listenerManager.remove();
// add the new ones
listenerManager.track(newDocument.getPreTextListenerRegistrar().add(preTextListener));
listenerManager.track(newDocument.getTextListenerRegistrar().add(textListener));
}
/**
* Converts a column to its x coordinate.
*/
public double convertColumnToX(Line line, int column) {
// Simple case we early out
if (column == 0) {
return 0;
}
if (!LineDimensionsUtils.needsOffset(line)) {
return simpleConvertColumnToX(line, column);
}
return convertColumnToXMeasuringIfNeeded(line, column);
}
/**
* Converts an x coordinate to the Editor column.
*/
public int convertXToColumn(Line line, double x, RoundingStrategy roundingStrategy) {
// Easy out (< can happen when selection dragging).
if (x <= 0) {
return 0;
}
if (!LineDimensionsUtils.needsOffset(line)) {
return simpleConvertXToColumn(line, x, roundingStrategy);
}
return roundingStrategy.apply(convertXToColumnMeasuringIfNeeded(line, x));
}
/**
* Converts column to x using the {@link ColumnOffsetCache} stored on the
* line, measuring if required.
*/
private double convertColumnToXMeasuringIfNeeded(Line line, int column) {
LineDimensionsUtils.markTimeline(getClass(), "Begin converting Column To X via offset cache.");
ColumnOffsetCache cache = ColumnOffsetCache.getOrCreate(line, getColumnWidth());
checkColumnInCacheAndMeasureIfNeeded(cache, line, column);
ColumnOffset offset = cache.getColumnOffsetForColumn(column);
LineDimensionsUtils.markTimeline(getClass(), "End converting Column To X via offset cache.");
return smartColumnToX(offset, column);
}
/**
* Converts x to a column using the {@link ColumnOffsetCache} stored on the
* line, measuring if needed.
*/
private double convertXToColumnMeasuringIfNeeded(Line line, double x) {
LineDimensionsUtils.markTimeline(getClass(), "Begin converting X To column via offset cache.");
ColumnOffsetCache cache = ColumnOffsetCache.getOrCreate(line, getColumnWidth());
checkXInCacheAndMeasureIfNeeded(cache, line, x);
Pair<ColumnOffset, Double> offsetAndWidth = cache.getColumnOffsetForX(x, getColumnWidth());
LineDimensionsUtils.markTimeline(getClass(), "End converting X To column via offset cache.");
return smartXToColumn(offsetAndWidth.first, offsetAndWidth.second, x);
}
/**
* Smart column to x conversion which converts a column to an x position based
* on a {@link ColumnOffset}.
*/
private double smartColumnToX(ColumnOffset offset, int column) {
if (offset.column == column) {
return offset.x;
}
return offset.x + naiveColumnToX(column - offset.column);
}
/**
* Smart x to column conversion which an x pixel position to a column based on
* a {@link ColumnOffset}.
*/
private double smartXToColumn(ColumnOffset offset, double width, double x) {
double column = offset.column;
if (x == offset.x) {
return column;
} else if (x < offset.x + width) {
/*
* We are converting this exact column so lets taken into account this
* columns length which may be special.
*/
column += (x - offset.x) / width;
} else {
// Figure out the offset in pixels and subtract then convert.
column += naiveXToColumn(x - offset.x);
}
return column;
}
/**
* Naively converts a column to its expected x value not taking into account
* any special characters.
*/
private double naiveColumnToX(double column) {
return column * getColumnWidth();
}
/**
* Naively converts a x pixel value to its expected column not taking into
* account any special characters.
*/
private double naiveXToColumn(double x) {
return x / getColumnWidth();
}
/**
* Finds the adjusted column number due to tab indentation and carriage
* returns. This is used in the simple case to handle prefixing tabs and the
* '\r\n' windows line format. Complex cases are handled in the
* {@link ColumnOffsetCache}.
*/
private double simpleConvertColumnToX(Line line, int column) {
// early out when we are at the start of the line
if (column == 0) {
return 0;
}
LineDimensionsUtils.markTimeline(getClass(), "Calculating simple offset");
// get any indentation tabs that are affecting us
int offsetTabColumns = LineDimensionsUtils.getLastIndentationTabCount(line.getText(), column)
* (LineDimensionsUtils.getTabWidth() - 1);
int offsetCarriageReturn = 0;
if (isColumnAffectedByCarriageReturn(line, column)) {
offsetCarriageReturn = -1;
}
LineDimensionsUtils.markTimeline(getClass(), "End calculating simple offset");
return naiveColumnToX(offsetTabColumns + offsetCarriageReturn + column);
}
private int simpleConvertXToColumn(Line line, double x, RoundingStrategy roundingStrategy) {
if (x == 0) {
return 0;
}
LineDimensionsUtils.markTimeline(getClass(), "Calculating simple offset from x");
/*
* we just have to be conscious here of prefix tabs which may be a different
* width and suffix \r which is 0 width. We deal accordingly.
*/
/*
* we divide x by the width of a tab in pixels to overshoot the number of
* indentation tabs
*/
int columnIfAllTabs = (int) Math.floor(x / naiveColumnToX(LineDimensionsUtils.getTabWidth()));
int offsetTabColumns =
LineDimensionsUtils.getLastIndentationTabCount(line.getText(), columnIfAllTabs);
assert columnIfAllTabs >= offsetTabColumns : "You appear to be less tabs then you say you are";
double lineWidthPxWithoutTabs =
x - (offsetTabColumns * LineDimensionsUtils.getTabWidth() * getColumnWidth());
int column =
roundingStrategy.apply(naiveXToColumn(lineWidthPxWithoutTabs) + offsetTabColumns);
// if we landed on the carriage return column++
if (column < line.length() && line.getText().charAt(column) == '\r') {
column++;
}
LineDimensionsUtils.markTimeline(getClass(), "End calculating simple offset from x");
return column;
}
/**
* @return true if a measurement was performed.
*/
private boolean checkColumnInCacheAndMeasureIfNeeded(
ColumnOffsetCache cache, Line line, int column) {
if (cache.isColumnMeasurementNeeded(column)) {
measureLineStoppingAtColumn(cache, line, column);
return true;
}
return false;
}
/**
* @return true if a measurement was performed.
*/
private boolean checkXInCacheAndMeasureIfNeeded(ColumnOffsetCache cache, Line line, double x) {
if (cache.isXMeasurementNeeded(x)) {
measureLineStoppingAtX(cache, line, x);
return true;
}
return false;
}
/**
* Builds the cache for a line up to or beyond the given endColumn value.
*
* @see #measureLine(ColumnOffsetCache, Line, int, double)
*/
private void measureLineStoppingAtColumn(ColumnOffsetCache cache, Line line, int endColumn) {
measureLine(cache, line, endColumn, Double.MAX_VALUE);
}
/**
* Builds the cache for a line up to or beyond the given endX value.
*
* @see #measureLine(ColumnOffsetCache, Line, int, double)
*/
private void measureLineStoppingAtX(ColumnOffsetCache cache, Line line, double endX) {
measureLine(cache, line, Integer.MAX_VALUE, endX);
}
/**
* Builds the cache for a line up to a particular column. Should not be called
* if the line has already been {@link ColumnOffsetCache#FULLY_MEASURED}.
*
* <p>
* You should only rely on either endColumn or endX, one or the other should
* be the max value for its data type.
*
* @see #measureLineStoppingAtColumn(ColumnOffsetCache, Line, int)
* @see #measureLineStoppingAtX(ColumnOffsetCache, Line, double)
*
* @param endColumn inclusive end column (we will end on or after end)
* @param endX inclusive end x pixel width (we will end on or after endX)
*/
private void measureLine(ColumnOffsetCache cache, Line line, int endColumn, double endX) {
/*
* Starting at cache.measuredColumn we will use the regex to scan forward to
* see if we hit an interesting character other than prefixed tab. if we do
* we'll measure that to that point and append a {@link ColumnOffset} if it
* is a special size. Rinse and repeat.
*/
LineDimensionsUtils.markTimeline(getClass(), "Beginning measure line");
RegExp regexp = UnicodeUtils.regexpNonAsciiTabOrCarriageReturn;
regexp.setLastIndex(cache.measuredOffset.column);
MatchResult result = regexp.exec(line.getText());
if (result != null) {
double x = 0;
do {
// Calculate any x offset up to this point in the line
ColumnOffset offset = cache.getLastColumnOffsetInCache();
double baseXOffset = smartColumnToX(offset, result.getIndex());
/*
* TODO: we can be smarter here, if i > 1, then this character
* is a mark. We could separate out the RegExp into non-spacing,
* enclosing-marks v. spacing-marks and already know which are supposed
* to be zero-width based on which groups are null.
*/
String match = result.getGroup(0);
for (int i = 0; i < match.length(); i++) {
x = addOffsetForResult(cache, match.charAt(i), result.getIndex() + i, line, baseXOffset);
baseXOffset = x;
}
result = regexp.exec(line.getText());
// we have to ensure we measure through the last zero-width character.
} while (result != null && result.getIndex() < endColumn && x < endX);
}
if (result == null) {
cache.measuredOffset = ColumnOffsetCache.FULLY_MEASURED;
return;
}
LineDimensionsUtils.markTimeline(getClass(), "Ending measure line");
}
private double addOffsetForResult(
ColumnOffsetCache cache, char matchedCharacter, int index, Line line, double baseXOffset) {
/*
* Get the string up to the current character, special casing tabs since
* they must render as the correct number of spaces (we replace them when
* the appropriate number of hard-spaces so the browser doesn't trim them).
*/
String partialLineText = line.getText().substring(0, index + 1).replace(
"\t", StringUtils.repeatString("\u00A0", LineDimensionsUtils.getTabWidth()));
/*
* Get the width of the string including our special character and if needed
* append an offset to the cache.
*/
double expectedWidth = baseXOffset + getColumnWidth();
double stringWidth = getStringWidth(matchedCharacter, baseXOffset, partialLineText);
if (stringWidth < baseXOffset) {
/*
* This is a annoying condition where certain combining characters can
* actually change how the previous character is rendered. In some cases
* actually making it smaller than before. This is fairly annoying. It
* only happens when some scripts and languages like Arabic with heavy
* combining marks. This is also possible due to measurement
* inconsistencies when measuring combining characters.
*
* Honestly there's not much we can do, but we make our best attempt to at
* least provide a consistent cursor experience even if it isn't
* navigating the characters correctly (not that I would even know,
* considering I can't speak/read Arabic).
*/
stringWidth = baseXOffset;
}
if (stringWidth != expectedWidth) {
cache.appendOffset(index + 1, stringWidth, stringWidth - baseXOffset);
}
return stringWidth;
}
/**
* Returns the width of a column within the current zoom level.
*/
private double getColumnWidth() {
return measurementProvider.getCharacterWidth();
}
/**
* Determines the width of a string using either the cached width of a
* character of interest or by measuring it using a
* {@link MeasurementProvider}
*
* @param characterOfInterest The character we are interested in which should
* also be the last character of the textToMeasure string.
* @param baseXOffset The base x offset of the column before the character of
* interest. The returned result will be this offset + the width of the
* characterOfInterest.
* @param textToMeasureIncludingCharacterOfInterest The string of text to
* measure including the character of interest.
*
* @return The width of the string which is baseXOffset +
* characterOfInterestWidth
*/
private double getStringWidth(char characterOfInterest, double baseXOffset,
String textToMeasureIncludingCharacterOfInterest) {
switch (characterOfInterest) {
case '\t':
// base + columnWidth * tab_size_in_columns
return baseXOffset + LineDimensionsUtils.getTabWidth() * getColumnWidth();
case '\r':
// zero-width just return the baseXOffset
return baseXOffset;
default:
Double characterWidth = characterWidthCache.get(String.valueOf(characterOfInterest));
// if we know the width already return it
if (characterWidth != null) {
return baseXOffset + characterWidth;
}
// Measure and store the width of the character
double expectedWidth = baseXOffset + getColumnWidth();
double width =
measurementProvider.measureStringWidth(textToMeasureIncludingCharacterOfInterest);
// cache the width of this character
characterWidthCache.put(String.valueOf(characterOfInterest), width - baseXOffset);
return width;
}
}
/**
* Returns true if the column is past a carriage return at the end of a line.
*/
private static boolean isColumnAffectedByCarriageReturn(Line line, int column) {
return line.length() >= 2 && column > line.length() - 2
&& line.getText().charAt(line.length() - 2) == '\r';
}
/**
* Due to differences in how characters measure at different zoom levels (it's
* not a constant factor for all character types!!!), we just clear the world
* and rebuild.
*/
private static void clearCharacterCacheDueToZoomChange() {
LineDimensionsUtils.markTimeline(LineDimensionsCalculator.class, "Cleared cache due to zoom");
characterWidthCache = JsonCollections.createMap();
}
}