int ltrCount = 0;
int rtlCount = 0;
while (textIter.hasNext())
{
TextPosition position = textIter.next();
String stringValue = position.getUnicode();
for (int a = 0; a < stringValue.length(); a++)
{
byte dir = Character.getDirectionality(stringValue.charAt(a));
if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT ||
dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING ||
dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
{
ltrCount++;
} else if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT ||
dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC ||
dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING ||
dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
{
rtlCount++;
}
}
}
// choose the dominant direction
boolean isRtlDominant = rtlCount > ltrCount;
startArticle(!isRtlDominant);
startOfArticle = true;
// we will later use this to skip reordering
boolean hasRtl = rtlCount > 0;
// Now cycle through to print the text.
// We queue up a line at a time before we print so that we can convert
// the line from presentation form to logical form (if needed).
List<LineItem> line = new ArrayList<LineItem>();
textIter = textList.iterator(); // start from the beginning again
// PDF files don't always store spaces. We will need to guess where we should add
// spaces based on the distances between TextPositions. Historically, this was done
// based on the size of the space character provided by the font. In general, this
// worked but there were cases where it did not work. Calculating the average character
// width and using that as a metric works better in some cases but fails in some cases
// where the spacing worked. So we use both. NOTE: Adobe reader also fails on some of
// these examples.
// Keeps track of the previous average character width
float previousAveCharWidth = -1;
while (textIter.hasNext())
{
TextPosition position = textIter.next();
PositionWrapper current = new PositionWrapper(position);
String characterValue = position.getUnicode();
// Resets the average character width when we see a change in font
// or a change in the font size
if (lastPosition != null &&
(position.getFont() != lastPosition.getTextPosition().getFont() ||
position.getFontSize() != lastPosition.getTextPosition().getFontSize()))
{
previousAveCharWidth = -1;
}
float positionX;
float positionY;
float positionWidth;
float positionHeight;
// If we are sorting, then we need to use the text direction
// adjusted coordinates, because they were used in the sorting.
if (getSortByPosition())
{
positionX = position.getXDirAdj();
positionY = position.getYDirAdj();
positionWidth = position.getWidthDirAdj();
positionHeight = position.getHeightDir();
} else
{
positionX = position.getX();
positionY = position.getY();
positionWidth = position.getWidth();
positionHeight = position.getHeight();
}
// The current amount of characters in a word
int wordCharCount = position.getIndividualWidths().length;
// Estimate the expected width of the space based on the
// space character with some margin.
float wordSpacing = position.getWidthOfSpace();
float deltaSpace;
if (wordSpacing == 0 || wordSpacing == Float.NaN)
{
deltaSpace = Float.MAX_VALUE;
} else