}
else
{
Writer output = null;
PDDocument document = null;
try
{
long startTime = startProcessing("Loading PDF "+pdfFile);
if( outputFile == null && pdfFile.length() >4 )
{
outputFile = new File( pdfFile.substring( 0, pdfFile.length() -4 ) + ext ).getAbsolutePath();
}
if (useNonSeqParser)
{
document = PDDocument.loadNonSeq(new File( pdfFile ), password);
}
else
{
document = PDDocument.load(pdfFile, force);
if( document.isEncrypted() )
{
StandardDecryptionMaterial sdm = new StandardDecryptionMaterial( password );
document.openProtection( sdm );
}
}
AccessPermission ap = document.getCurrentAccessPermission();
if( ! ap.canExtractContent() )
{
throw new IOException( "You do not have permission to extract text" );
}
stopProcessing("Time for loading: ", startTime);
if( toConsole )
{
output = new OutputStreamWriter( System.out, encoding );
}
else
{
output = new OutputStreamWriter( new FileOutputStream( outputFile ), encoding );
}
PDFTextStripper stripper;
if(toHTML)
{
stripper = new PDFText2HTML();
}
else
{
stripper = new PDFTextStripper();
}
stripper.setForceParsing( force );
stripper.setSortByPosition( sort );
stripper.setShouldSeparateByBeads( separateBeads );
stripper.setStartPage( startPage );
stripper.setEndPage( endPage );
startTime = startProcessing("Starting text extraction");
if (debug)
{
System.err.println("Writing to "+outputFile);
}
// Extract text for main document:
stripper.writeText( document, output );
// ... also for any embedded PDFs:
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();
if (names != null)
{
PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
if (embeddedFiles != null)
{
Map<String,COSObjectable> embeddedFileNames = embeddedFiles.getNames();
if (embeddedFileNames != null) {
for (Map.Entry<String,COSObjectable> ent : embeddedFileNames.entrySet())
{
if (debug)
{
System.err.println("Processing embedded file " + ent.getKey() + ":");
}
PDComplexFileSpecification spec = (PDComplexFileSpecification) ent.getValue();
PDEmbeddedFile file = spec.getEmbeddedFile();
if (file != null && file.getSubtype().equals("application/pdf"))
{
if (debug)
{
System.err.println(" is PDF (size=" + file.getSize() + ")");
}
InputStream fis = file.createInputStream();
PDDocument subDoc = null;
try
{
subDoc = PDDocument.load(fis);
}
finally
{
fis.close();
}
try
{
stripper.writeText( subDoc, output );
}
finally
{
subDoc.close();
}
}
}
}
}