PDDocument document = null;
try {
PDFTextStripper stripper = new PDFTextStripper();
stripper.setLineSeparator("\n");
//load the document
document = PDDocument.load(in);
String author = "";
String title = "";
String summary = "";
//get the additional data
try {
PDDocumentInformation pdfinfo = document.getDocumentInformation();
if (!Util.isEmpty(pdfinfo.getAuthor())) {
author = pdfinfo.getAuthor();
}
if (!Util.isEmpty(pdfinfo.getTitle())) {
title = pdfinfo.getTitle();
}
if (!Util.isEmpty(pdfinfo.getSubject())) {
summary = pdfinfo.getSubject();
}
} catch (Exception eR) {
String message = MessageUtil.getMessage("extractor.pdf.metadatamissing",
new Object[] { info.getUri() });
logger.info(message);
}
//set the buffer
bout = new ByteArrayOutputStream();
writer = new OutputStreamWriter(bout);
//strip the document to the buffer
stripper.writeText(document, writer);
bout.flush();
writer.flush();
//construct the patterns (to not ignore and replace)
Pattern notIgnorePattern = Pattern.compile(getNotIgnoreChars());