Collections.sort(rawTextStrings, positionComparator);
}
// Aggregating and integrating the source text strings into the target ones...
TextString textString = null;
TextChar previousTextChar = null;
for(ContentScanner.TextStringWrapper rawTextString : rawTextStrings)
{
/*
NOTE: Contents on the same line are grouped together within the same text string.
*/
// Add a new text string in case of new line!
if(textString == null
|| (!textString.getTextChars().isEmpty()
&& !TextStringPositionComparator.isOnTheSameLine(
textString.getBox(),
rawTextString.getBox())))
{
textStrings.add(textString = new TextString());
previousTextChar = null;
}
TextStyle textStyle = rawTextString.getStyle();
float spaceWidth = 0;
try
{spaceWidth = textStyle.getFont().getWidth(' ', textStyle.getFontSize());}
catch(Exception e)
{ /* NOOP */ }
if(spaceWidth == 0)
{spaceWidth = textStyle.getFontSize() * .25f;} // NOTE: as a rule of thumb, space width is estimated according to the font size.
for(TextChar textChar : rawTextString.getTextChars())
{
if(previousTextChar != null)
{
/*
NOTE: PDF files may have text contents omitting space characters,
so they must be inferred and synthesized, marking them as virtual
in order to allow the user to distinguish between original contents
and augmented ones.
*/
double characterSpace = textChar.getBox().getX() - previousTextChar.getBox().getMaxX();
if(characterSpace >= spaceWidth)
{
// Add synthesized space character!
textString.textChars.add(
new TextChar(
' ',
new Rectangle2D.Double(
previousTextChar.getBox().getMaxX(),
textChar.getBox().getY(),
characterSpace,
textChar.getBox().getHeight()
),
textStyle,