/*
* This file is part of rockframework.
*
* rockframework is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* rockframework is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>;.
*/
package br.net.woodstock.rockframework.document.pdf.pdfbox;
import java.io.IOException;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import br.net.woodstock.rockframework.core.RockFrameworkVersion;
import br.net.woodstock.rockframework.core.util.Assert;
import br.net.woodstock.rockframework.document.DocumentException;
import br.net.woodstock.rockframework.document.DocumentInput;
import br.net.woodstock.rockframework.document.DocumentOutput;
import br.net.woodstock.rockframework.document.DocumentProcessor;
import br.net.woodstock.rockframework.document.pdf.PDFException;
public class GetTextProcessor implements DocumentProcessor {
private static final long serialVersionUID = RockFrameworkVersion.VERSION;
public static final String SOURCE_PARAMETER = "source";
public GetTextProcessor() {
super();
}
@Override
public DocumentOutput process(final DocumentInput input) {
Assert.notNull(input, "input");
try {
Object source = input.getParameter(GetTextProcessor.SOURCE_PARAMETER);
if (source == null) {
throw new DocumentException("Parameter 'source' + must be set");
}
PDFParser parser = PDFBox.read(source);
parser.parse();
PDDocument document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(document);
document.close();
return new DocumentOutput(text);
} catch (IOException e) {
throw new PDFException(e);
}
}
}