Package org.pdfbox.util

Examples of org.pdfbox.util.PDFTextStripper.writeText()


       file = File.createTempFile("extract", ".tmp");
       tempFiles.markForDeletion(file);
       Writer output = null;
       output = new OutputStreamWriter(new FileOutputStream(file), DEFAULT_ENCODING);
       PDFTextStripper stripper = new PDFTextStripper();
       stripper.writeText(document, output);
       output.close();
       if(document != null)
          document.close();
     
     } catch (Exception e) {
View Full Code Here


       file = File.createTempFile("extract", ".tmp");
       tempFiles.markForDeletion(file);
       Writer output = null;
       output = new OutputStreamWriter(new FileOutputStream(file), "UTF-8");
       PDFTextStripper stripper = new PDFTextStripper();
       stripper.writeText(document, output);
       output.close();
       if(document != null)
          document.close();
     
     } catch (Exception e) {
View Full Code Here

       file = File.createTempFile("extract", ".tmp");
       tempFiles.markForDeletion(file);
       Writer output = null;
       output = new OutputStreamWriter(new FileOutputStream(file), "UTF-8");
       PDFTextStripper stripper = new PDFTextStripper();
       stripper.writeText(document, output);
       output.close();
     
     
     } catch (Exception e) {
         throw new ExtractionException("failed to extract pdf (probable password protected document)",e,logger);
View Full Code Here

                output = new OutputStreamWriter(
                    new FileOutputStream( textFile ), encoding );
            }

            start = System.currentTimeMillis();
            stripper.writeText( document, output );
            stop = System.currentTimeMillis();
            LOG.info( "Time to extract text time=" +(stop-start) );
        }
        finally
        {
View Full Code Here

            //create a tmp output stream with the size of the content.
            ByteArrayOutputStream out = new ByteArrayOutputStream();
            OutputStreamWriter writer = new OutputStreamWriter( out );
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.writeText( pdfDocument.getDocument(), writer );
            writer.close();

            byte[] contents = out.toByteArray();
            InputStreamReader input = new InputStreamReader( new ByteArrayInputStream( contents ) );
            // Add the tag-stripped contents as a Reader-valued Text field so it will
View Full Code Here

                      PDFTextStripper stripper = new PDFTextStripper();
 
                      //stripper.setSortByPosition( sort );
                      //stripper.setStartPage( startPage );
                      //stripper.setEndPage( endPage );
                      stripper.writeText( document, output );
                      text = baos.toString("UTF-8");
                      if(logger.isInfoEnabled())
                logger.info("PDF Document has " + text.length() + " chars\n\n" + text);
                  }
              }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.