Package br.net.woodstock.rockframework.document.pdf.pdfbox

Source Code of br.net.woodstock.rockframework.document.pdf.pdfbox.GetTextProcessor

/*
* This file is part of rockframework.
*
* rockframework is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* rockframework is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>;.
*/
package br.net.woodstock.rockframework.document.pdf.pdfbox;

import java.io.IOException;

import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

import br.net.woodstock.rockframework.core.RockFrameworkVersion;
import br.net.woodstock.rockframework.core.util.Assert;
import br.net.woodstock.rockframework.document.DocumentException;
import br.net.woodstock.rockframework.document.DocumentInput;
import br.net.woodstock.rockframework.document.DocumentOutput;
import br.net.woodstock.rockframework.document.DocumentProcessor;
import br.net.woodstock.rockframework.document.pdf.PDFException;

public class GetTextProcessor implements DocumentProcessor {

  private static final long  serialVersionUID  = RockFrameworkVersion.VERSION;

  public static final String  SOURCE_PARAMETER  = "source";

  public GetTextProcessor() {
    super();
  }

  @Override
  public DocumentOutput process(final DocumentInput input) {
    Assert.notNull(input, "input");
    try {
      Object source = input.getParameter(GetTextProcessor.SOURCE_PARAMETER);

      if (source == null) {
        throw new DocumentException("Parameter 'source' + must be set");
      }

      PDFParser parser = PDFBox.read(source);
      parser.parse();

      PDDocument document = parser.getPDDocument();
      PDFTextStripper stripper = new PDFTextStripper();

      String text = stripper.getText(document);

      document.close();

      return new DocumentOutput(text);
    } catch (IOException e) {
      throw new PDFException(e);
    }
  }

}
TOP

Related Classes of br.net.woodstock.rockframework.document.pdf.pdfbox.GetTextProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.