Examples of PDFTextStripper


Examples of org.pdfbox.util.PDFTextStripper

            try {
                log.debug("parse() Attempting to extract text from (" + filename + ")");

                output = new StringWriter();

                PDFTextStripper stripper = new PDFTextStripper();
                stripper.writeText(document, output);

                log.debug("parse() Successfully stripped out text from (" + filename + ")");
            }
            catch (IOException ioe) {
                log.error("parse() failed", ioe);
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

    public Reader convertToPlainText(InputStream source, WVTDocumentInfo d) {

        String plainText = null;
        try {
            PDDocument document = PDDocument.load(source);
            PDFTextStripper stripper = new PDFTextStripper();
            plainText = stripper.getText(document);
            document.close();
        } catch (IOException e) {
            WVToolLogger.getGlobalLogger().logException("Could not read or convert PDF Document", e);
            plainText = new String();
        }
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

        //Just try using the default password and move on
        decryptor.decryptDocument("");
      }

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

        //Just try using the default password and move on
        decryptor.decryptDocument("");
      }

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

            PDDocument document = parser.getPDDocument();

            CharArrayWriter writer = new CharArrayWriter();

            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setLineSeparator("\n");
            stripper.writeText(document, writer);

            document.close();
            writer.close();

            return new CharArrayReader(writer.toCharArray());
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

    return getTextInternal(startPage, endPage);
  }

  protected String getTextInternal(int startPage, int endPage) {
    try {
      final PDFTextStripper textStripper = new PDFTextStripper();
      textStripper.setStartPage(startPage);
      textStripper.setEndPage(endPage);
      return textStripper.getText(getPDFDocument());
    } catch (final IOException e) {
      throw new RuntimeException("Problem extracting text", e);
    }
  }
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

        final String lineSeparator, final String pageSeparator)
    {
        final StringWriter output = new StringWriter();
        try
        {
            final PDFTextStripper textStripper = new PDFTextStripper();
            textStripper.setPageSeparator(pageSeparator);
            textStripper.setLineSeparator(lineSeparator);
            textStripper.setStartPage(startPage);
            textStripper.setEndPage(endPage);
            textStripper.writeText(getPDFDocument(), output);
            return output.toString();
        }
        catch (final Exception e)
        {
            throw new RuntimeException("Error while extracting text from document.", e);
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

        final List fragments = new ArrayList();

        final StringWriter output = new StringWriter();
        try
        {
            final PDFTextStripper textStripper = new PDFTextStripper()
            {
                protected void showCharacter(TextPosition textPosition) {
                    fragments.add(textPosition);
                }
            };
            textStripper.setLineSeparator(lineSeparator);
            textStripper.setStartPage(page);
            textStripper.setEndPage(page);
            textStripper.writeText(getPDFDocument(), output);
            return fragments;
        }
        catch (final Exception e)
        {
            throw new RuntimeException("Error while extracting text from document.", e);
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

        this.contentHandler.startPrefixMapping(PREFIX, NAMESPACE);
        this.contentHandler.startElement(NAMESPACE, "document", PREFIX + ":document",
                new AttributesImpl());

        try {
            PDFTextStripper stripper = new PDFTextStripper();
            PDFParser parser = new PDFParser(this.content.getInputStream());
            parser.parse();
            PDDocument doc = parser.getPDDocument();
            String text = stripper.getText(doc);
            doc.close();
            char[] chars = text.toCharArray();
            this.contentHandler.characters(chars, 0, chars.length);
        } catch (Exception e) {
            throw new ProcessingException(e);
View Full Code Here

Examples of org.pdfbox.util.PDFTextStripper

            try {
                parser.parse();
                PDDocument document = parser.getPDDocument();
                CharArrayWriter writer = new CharArrayWriter();

                PDFTextStripper stripper = new PDFTextStripper();
                stripper.setLineSeparator("\n");
                stripper.writeText(document, writer);

                return new CharArrayReader(writer.toCharArray());
            } finally {
                try {
                    PDDocument doc = parser.getPDDocument();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.