Source Code of br.net.woodstock.rockframework.document.pdf.itextpdf.GetTextProcessor

/*
 * This file is part of rockframework.
 * 
 * rockframework is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 * 
 * rockframework is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>;.
 */
package br.net.woodstock.rockframework.document.pdf.itextpdf;


import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;


import br.net.woodstock.rockframework.core.RockFrameworkVersion;
import br.net.woodstock.rockframework.core.util.Assert;
import br.net.woodstock.rockframework.document.DocumentException;
import br.net.woodstock.rockframework.document.DocumentInput;
import br.net.woodstock.rockframework.document.DocumentOutput;
import br.net.woodstock.rockframework.document.DocumentProcessor;
import br.net.woodstock.rockframework.document.pdf.PDFException;


import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;


public class GetTextProcessor implements DocumentProcessor {


  private static final long  serialVersionUID  = RockFrameworkVersion.VERSION;


  public static final String  SOURCE_PARAMETER  = "source";


  public GetTextProcessor() {
    super();
  }


  @Override
  public DocumentOutput process(final DocumentInput input) {
    Assert.notNull(input, "input");
    try {
      Object source = input.getParameter(GetTextProcessor.SOURCE_PARAMETER);


      if (source == null) {
        throw new DocumentException("Parameter 'source' + must be set");
      }


      PdfReader reader = IText.read(source);
      PdfReaderContentParser parser = new PdfReaderContentParser(reader);
      TextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
      int pageCount = reader.getNumberOfPages();
      ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
      PrintWriter writer = new PrintWriter(outputStream);


      for (int i = 1; i <= pageCount; i++) {
        TextExtractionStrategy result = parser.processContent(i, strategy);
        String pageText = result.getResultantText();
        writer.println(pageText);
      }


      reader.close();
      writer.close();


      String text = new String(outputStream.toByteArray());
      return new DocumentOutput(text);
    } catch (IOException e) {
      throw new PDFException(e);
    }
  }


}
Source Code of br.net.woodstock.rockframework.document.pdf.itextpdf.GetTextProcessor

Related Classes of br.net.woodstock.rockframework.document.pdf.itextpdf.GetTextProcessor