Examples of PDFTextStripper


Examples of org.pdfbox.util.PDFTextStripper

    this.wordReader = new FastBufferedReader();
  }

  public PdfDocumentFactory( final String[] property ) throws IOException, ConfigurationException {
    super( property );
    this.textStripper= new PDFTextStripper();
    this.wordReader = new FastBufferedReader();
  }
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

    return FieldType.TEXT;
  }

  private void readObject( final ObjectInputStream s ) throws IOException, ClassNotFoundException {
    s.defaultReadObject();
    textStripper = new PDFTextStripper();
  }
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

  return; }

  //*-- extract PDF document's textual content
  String docText = null;
  try
  { PDFTextStripper stripper = new PDFTextStripper();
    docText = stripper.getText(new PDDocument(cosDoc));
  }
  catch (OutOfMemoryError exc)
  { closeCOSDocument(cosDoc);
    logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage());
  }
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

    {
        URL url = new URL("http://localhost:8080/xwiki/bin/export/Main/WebHome?format=pdf");
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        InputStream is = connection.getInputStream();
        PDDocument pdd = PDDocument.load(is);
        PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(pdd);
        pdd.close();
        is.close();

        assertTrue("Invalid content", text.contains("Welcome to your wiki"));
    }
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

      document = PDDocument.load(bis);
      if (document.isEncrypted()) {
        throw new DocumentAccessException("PDF is encrypted. Can not read content file=" + leaf.getName());
      }     
      if (log.isDebug()) log.debug("readContent PDDocument loaded");
      PDFTextStripper stripper = new PDFTextStripper();
      return stripper.getText(document);
    } finally {
      if (document != null) {
        document.close();
      }
      if (bis != null) {
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

            //create a writer where to append the text content.
            StringWriter writer = new StringWriter();
            if( stripper == null )
            {
                stripper = new PDFTextStripper();
            }
            else
            {
                stripper.resetEngine();
            }
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

     * @throws IOException If there is an error creating the test.
     */
    public TestTextStripper( String name ) throws IOException
    {
        super( name );
        stripper = new PDFTextStripper();
        stripper.setLineSeparator("\n");
    }
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

     */
    public void doTestFile(File file, boolean bLogResult)
        throws Exception
    {

        PDFTextStripper stripper = new PDFTextStripper();
        OutputStream os = null;
        Writer writer = null;
        PDDocument document = null;
        try
        {
            document = PDDocument.load(file);

            File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt");
            os = new FileOutputStream(outFile);
            writer = new OutputStreamWriter(os);

            stripper.writeText(document, writer);
        }
        finally
        {
            if( writer != null )
            {
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

                        output = new OutputStreamWriter(
                            new FileOutputStream( textFile ) );
                    }
                }
   
                PDFTextStripper stripper = null;
                if(toHTML)
                {
                   stripper = new PDFText2HTML();
                }
                else
                {
                   stripper = new PDFTextStripper();
                }
                stripper.setSortByPosition( sort );
                stripper.setStartPage( startPage );
                stripper.setEndPage( endPage );
                stripper.writeText( document, output );
            }
            finally
            {
                if( output != null )
                {
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

                        PDDocument document = parser.getPDDocument();
                        try {
                            CharArrayWriter writer = new CharArrayWriter();

                            PDFTextStripper stripper = new PDFTextStripper();
                            stripper.setLineSeparator("\n");
                            stripper.writeText(document, writer);

                            delegate = new CharArrayReader(writer.toCharArray());
                        } finally {
                            document.close();
                        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.