Package org.pdfclown.documents.contents

Examples of org.pdfclown.documents.contents.TextChar


      Collections.sort(rawTextStrings, positionComparator);
    }

    // Aggregating and integrating the source text strings into the target ones...
    TextString textString = null;
    TextChar previousTextChar = null;
    for(ContentScanner.TextStringWrapper rawTextString : rawTextStrings)
    {
      /*
        NOTE: Contents on the same line are grouped together within the same text string.
      */
      // Add a new text string in case of new line!
      if(textString == null
        || (!textString.getTextChars().isEmpty()
          && !TextStringPositionComparator.isOnTheSameLine(
            textString.getBox(),
            rawTextString.getBox())))
      {
        textStrings.add(textString = new TextString());
        previousTextChar = null;
      }

      TextStyle textStyle = rawTextString.getStyle();
      float spaceWidth = 0;
      try
      {spaceWidth = textStyle.getFont().getWidth(' ', textStyle.getFontSize());}
      catch(Exception e)
      { /* NOOP */ }
      if(spaceWidth == 0)
      {spaceWidth = textStyle.getFontSize() * .25f;} // NOTE: as a rule of thumb, space width is estimated according to the font size.
      for(TextChar textChar : rawTextString.getTextChars())
      {
        if(previousTextChar != null)
        {
          /*
            NOTE: PDF files may have text contents omitting space characters,
            so they must be inferred and synthesized, marking them as virtual
            in order to allow the user to distinguish between original contents
            and augmented ones.
          */
          double characterSpace = textChar.getBox().getX() - previousTextChar.getBox().getMaxX();
          if(characterSpace >= spaceWidth)
          {
            // Add synthesized space character!
            textString.textChars.add(
              new TextChar(
                ' ',
                new Rectangle2D.Double(
                  previousTextChar.getBox().getMaxX(),
                  textChar.getBox().getY(),
                  characterSpace,
                  textChar.getBox().getHeight()
                  ),
                textStyle,
View Full Code Here

TOP

Related Classes of org.pdfclown.documents.contents.TextChar

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.