Examples of org.pdfbox.util.PDFTextStripper.writeText()

org.pdfbox.util.PDFTextStripper.writeText()
@deprecated @see PDFTextStripper#writeText(PDDocument,Writer) @param doc The document to extract the text. @param outputStream The stream to write the text to. @throws IOException If there is an error extracting the text.

       file = File.createTempFile("extract", ".tmp");
       tempFiles.markForDeletion(file);
       Writer output = null;
       output = new OutputStreamWriter(new FileOutputStream(file), DEFAULT_ENCODING);
       PDFTextStripper stripper = new PDFTextStripper();
       stripper.writeText(document, output);
       output.close();
       if(document != null)
          document.close();
      
     } catch (Exception e) {

View Full Code Here

       file = File.createTempFile("extract", ".tmp");
       tempFiles.markForDeletion(file);
       Writer output = null;
       output = new OutputStreamWriter(new FileOutputStream(file), "UTF-8");
       PDFTextStripper stripper = new PDFTextStripper();
       stripper.writeText(document, output);
       output.close();
       if(document != null)
          document.close();
      
     } catch (Exception e) {

View Full Code Here

       file = File.createTempFile("extract", ".tmp");
       tempFiles.markForDeletion(file);
       Writer output = null;
       output = new OutputStreamWriter(new FileOutputStream(file), "UTF-8");
       PDFTextStripper stripper = new PDFTextStripper();
       stripper.writeText(document, output);
       output.close();
      
      
     } catch (Exception e) {
         throw new ExtractionException("failed to extract pdf (probable password protected document)",e,logger);

View Full Code Here

                output = new OutputStreamWriter(
                    new FileOutputStream( textFile ), encoding );
            }


            start = System.currentTimeMillis();
            stripper.writeText( document, output );
            stop = System.currentTimeMillis();
            LOG.info( "Time to extract text time=" +(stop-start) );
        }
        finally
        {

View Full Code Here


            //create a tmp output stream with the size of the content.
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            OutputStreamWriter writer = new OutputStreamWriter( out );
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.writeText( pdfDocument.getDocument(), writer );
            writer.close();


            byte[] contents = out.toByteArray();
            InputStreamReader input = new InputStreamReader( new ByteArrayInputStream( contents ) );
            // Add the tag-stripped contents as a Reader-valued Text field so it will

View Full Code Here

                      PDFTextStripper stripper = new PDFTextStripper();
  
                      //stripper.setSortByPosition( sort );
                      //stripper.setStartPage( startPage );
                      //stripper.setEndPage( endPage );
                      stripper.writeText( document, output );
                      text = baos.toString("UTF-8");
                      if(logger.isInfoEnabled())
                logger.info("PDF Document has " + text.length() + " chars\n\n" + text);
                  }
              }

View Full Code Here

0 1 2

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.