//
boolean suppressCharacter = false;
float tolerance = (text.getWidth()/textCharacter.length())/3.0f;
for( int i=0; i<sameTextCharacters.size() && textCharacter != null; i++ )
{
TextPosition character = sameTextCharacters.get( i );
String charCharacter = character.getCharacter();
float charX = character.getX();
float charY = character.getY();
//only want to suppress
if( charCharacter != null &&
//charCharacter.equals( textCharacter ) &&
within( charX, textX, tolerance ) &&
within( charY,
textY,
tolerance ) )
{
suppressCharacter = true;
}
}
if( !suppressCharacter )
{
sameTextCharacters.add( text );
showCharacter = true;
}
}
if( showCharacter )
{
//if we are showing the character then we need to determine which
//article it belongs to.
int foundArticleDivisionIndex = -1;
int notFoundButFirstLeftAndAboveArticleDivisionIndex = -1;
int notFoundButFirstLeftArticleDivisionIndex = -1;
int notFoundButFirstAboveArticleDivisionIndex = -1;
float x = text.getX();
float y = text.getY();
if( shouldSeparateByBeads )
{
for( int i=0; i<pageArticles.size() && foundArticleDivisionIndex == -1; i++ )
{
PDThreadBead bead = (PDThreadBead)pageArticles.get( i );
if( bead != null )
{
PDRectangle rect = bead.getRectangle();
if( rect.contains( x, y ) )
{
foundArticleDivisionIndex = i*2+1;
}
else if( (x < rect.getLowerLeftX() ||
y < rect.getUpperRightY()) &&
notFoundButFirstLeftAndAboveArticleDivisionIndex == -1)
{
notFoundButFirstLeftAndAboveArticleDivisionIndex = i*2;
}
else if( x < rect.getLowerLeftX() &&
notFoundButFirstLeftArticleDivisionIndex == -1)
{
notFoundButFirstLeftArticleDivisionIndex = i*2;
}
else if( y < rect.getUpperRightY() &&
notFoundButFirstAboveArticleDivisionIndex == -1)
{
notFoundButFirstAboveArticleDivisionIndex = i*2;
}
}
else
{
foundArticleDivisionIndex = 0;
}
}
}
else
{
foundArticleDivisionIndex = 0;
}
int articleDivisionIndex = -1;
if( foundArticleDivisionIndex != -1 )
{
articleDivisionIndex = foundArticleDivisionIndex;
}
else if( notFoundButFirstLeftAndAboveArticleDivisionIndex != -1 )
{
articleDivisionIndex = notFoundButFirstLeftAndAboveArticleDivisionIndex;
}
else if( notFoundButFirstLeftArticleDivisionIndex != -1 )
{
articleDivisionIndex = notFoundButFirstLeftArticleDivisionIndex;
}
else if( notFoundButFirstAboveArticleDivisionIndex != -1 )
{
articleDivisionIndex = notFoundButFirstAboveArticleDivisionIndex;
}
else
{
articleDivisionIndex = charactersByArticle.size()-1;
}
List<TextPosition> textList = (List<TextPosition>) charactersByArticle.get( articleDivisionIndex );
/* In the wild, some PDF encoded documents put diacritics (accents on
* top of characters) into a separate Tj element. When displaying them
* graphically, the two chunks get overlayed. With text output though,
* we need to do the overlay. This code recombines the diacritic with
* its associated character if the two are consecutive.
*/
if(textList.isEmpty())
{
textList.add(text);
}
else
{
/* test if we overlap the previous entry.
* Note that we are making an assumption that we need to only look back
* one TextPosition to find what we are overlapping.
* This may not always be true. */
TextPosition previousTextPosition = (TextPosition)textList.get(textList.size()-1);
if(text.isDiacritic() && previousTextPosition.contains(text))
{
previousTextPosition.mergeDiacritic(text, normalize);
}
/* If the previous TextPosition was the diacritic, merge it into this
* one and remove it from the list. */
else if(previousTextPosition.isDiacritic() && text.contains(previousTextPosition))
{
text.mergeDiacritic(previousTextPosition, normalize);
textList.remove(textList.size()-1);
textList.add(text);
}