Package com.itextpdf.text.pdf.parser

Source Code of com.itextpdf.text.pdf.parser.PdfTextExtractorEncodingsTest

/*
* Created on Jul 2, 2009
* (c) 2009 Trumpet, Inc.
*
*/
package com.itextpdf.text.pdf.parser;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;

import junit.framework.Assert;

import org.junit.BeforeClass;
import org.junit.Test;

import com.itextpdf.text.Document;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;

/**
* @author kevin
*/
public class PdfTextExtractorEncodingsTest
{

    /** Basic Latin characters, with Unicode values less than 128 */
    private static final String TEXT1 = "AZaz09*!";
    /** Latin-1 characters */
    private static final String TEXT2 = "\u0027\u0060\u00a4\u00a6";

    // the following will cause failures
    //  private static final String TEXT2 = "\u0027\u0060\u00a4\u00a6\00b5\u2019";

   
    @BeforeClass
    public static void initializeFontFactory(){
        FontFactory.registerDirectories();       
    }

    protected static Font getSomeTTFont(String encoding, boolean embedded, float size) {

        String fontNames[] = {"arial"};
        for (String name : fontNames) {
            Font foundFont = FontFactory.getFont(name, encoding, embedded, size);
            if (foundFont != null) {
                switch(foundFont.getBaseFont().getFontType()){
                    case BaseFont.FONT_TYPE_TT:
                    case BaseFont.FONT_TYPE_TTUNI:
                        return foundFont; // SUCCESS
                }
            }
        }
        throw new IllegalArgumentException("Unable to find TrueType font to test with - add the name of a TT font on the system to the fontNames array in this method");
    }

   
    private static byte[] createPdf(final Font font)
      throws Exception
    {
      final ByteArrayOutputStream byteStream = new ByteArrayOutputStream();

      final Document document = new Document();
      PdfWriter.getInstance(document, byteStream);
      document.open();
      document.add(new Paragraph(TEXT1, font));
      document.newPage();
      document.add(new Paragraph(TEXT2, font));
      document.close();

      final byte[] pdfBytes = byteStream.toByteArray();
     
      return pdfBytes;
    }
   
   
    /**
     * Used for testing only if we need to open the PDF itself
     * @param bytes
     * @param file
     * @throws Exception
     */
    private void saveBytesToFile(byte[] bytes, File file) throws Exception{
        final FileOutputStream outputStream = new FileOutputStream(file);
        outputStream.write(bytes);
        outputStream.close();
        System.out.println("PDF dumped to " + file.getAbsolutePath());
    }

    /**
     * Test parsing a document which uses a standard non-embedded font.
     *
     * @throws Exception any exception will cause the test to fail
     */
    @Test
    public void testStandardFont() throws Exception
    {
        Font font = new Font(Font.TIMES_ROMAN, 12);
        byte[] pdfBytes = createPdf(font);
       
        if (false){
            saveBytesToFile(pdfBytes, new File("test.pdf"));
        }

        checkPdf(pdfBytes);
       
    }


    /**
     * Test parsing a document which uses a font encoding which creates a /Differences
     * PdfArray in the PDF.
     *
     * @throws Exception any exception will cause the test to fail
     */
    @Test
    public void testEncodedFont()
      throws Exception
    {
        Font font = getSomeTTFont("ISO-8859-1", BaseFont.EMBEDDED, 12);
        byte[] pdfBytes = createPdf(font);
        checkPdf(pdfBytes);
    }


    /**
     * Test parsing a document which uses a Unicode font encoding which creates a /ToUnicode
     * PdfArray.
     *
     * @throws Exception any exception will cause the test to fail
     */
    @Test
    public void testUnicodeFont()
      throws Exception
    {
        Font font = getSomeTTFont(BaseFont.IDENTITY_H, BaseFont.EMBEDDED, 12);
        byte[] pdfBytes = createPdf(font);
        checkPdf(pdfBytes);
    }


    private void checkPdf(final byte[] pdfBytes) throws Exception {

      final PdfReader pdfReader = new PdfReader(pdfBytes);
      final PdfTextExtractor textExtractor = new PdfTextExtractor(pdfReader);
      // Characters from http://unicode.org/charts/PDF/U0000.pdf
      Assert.assertEquals(TEXT1, textExtractor.getTextFromPage(1));
      // Characters from http://unicode.org/charts/PDF/U0080.pdf
      Assert.assertEquals(TEXT2, textExtractor.getTextFromPage(2));
    }

  }
TOP

Related Classes of com.itextpdf.text.pdf.parser.PdfTextExtractorEncodingsTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.