/*
* This file is part of rockframework.
*
* rockframework is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* rockframework is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>;.
*/
package br.net.woodstock.rockframework.document.pdf.itextpdf;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import br.net.woodstock.rockframework.core.RockFrameworkVersion;
import br.net.woodstock.rockframework.core.util.Assert;
import br.net.woodstock.rockframework.document.DocumentException;
import br.net.woodstock.rockframework.document.DocumentInput;
import br.net.woodstock.rockframework.document.DocumentOutput;
import br.net.woodstock.rockframework.document.DocumentProcessor;
import br.net.woodstock.rockframework.document.pdf.PDFException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
public class GetTextProcessor implements DocumentProcessor {
private static final long serialVersionUID = RockFrameworkVersion.VERSION;
public static final String SOURCE_PARAMETER = "source";
public GetTextProcessor() {
super();
}
@Override
public DocumentOutput process(final DocumentInput input) {
Assert.notNull(input, "input");
try {
Object source = input.getParameter(GetTextProcessor.SOURCE_PARAMETER);
if (source == null) {
throw new DocumentException("Parameter 'source' + must be set");
}
PdfReader reader = IText.read(source);
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
TextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
int pageCount = reader.getNumberOfPages();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
PrintWriter writer = new PrintWriter(outputStream);
for (int i = 1; i <= pageCount; i++) {
TextExtractionStrategy result = parser.processContent(i, strategy);
String pageText = result.getResultantText();
writer.println(pageText);
}
reader.close();
writer.close();
String text = new String(outputStream.toByteArray());
return new DocumentOutput(text);
} catch (IOException e) {
throw new PDFException(e);
}
}
}