Package com.itextpdf.text.pdf

Examples of com.itextpdf.text.pdf.PdfReader


      PdfCopy writer = new PdfCopy(document, outputStream);

      document.open();

      for (InputStream source : sources) {
        PdfReader reader = new PdfReader(source);
        int pageCount = reader.getNumberOfPages();
        for (int i = 1; i <= pageCount; i++) {
          PdfImportedPage page = writer.getImportedPage(reader, i);
          writer.addPage(page);
        }
        reader.close();
      }

      document.close();
      writer.close();
View Full Code Here


  public InputStream[] split(final InputStream source, final int size) throws IOException {
    try {
      Assert.notNull(source, "source");
      Assert.greaterThan(size, 0, "size");

      PdfReader reader = new PdfReader(source);
      int pageCount = reader.getNumberOfPages();
      List<InputStream> list = new LinkedList<InputStream>();

      Document document = null;
      InputOutputStream outputStream = null;
      PdfCopy writer = null;
      for (int i = 1; i <= pageCount; i++) {
        if ((document == null) || (i % size == 0)) {
          if (document != null) {
            document.close();
            writer.close();
            list.add(outputStream.getInputStream());
          }
          document = new Document(reader.getPageSizeWithRotation(1));
          outputStream = new InputOutputStream();
          writer = new PdfCopy(document, outputStream);
        }
        PdfImportedPage page = writer.getImportedPage(reader, i);
        writer.addPage(page);
      }

      if (document != null) {
        document.close();
        writer.close();
        list.add(outputStream.getInputStream());
      }

      reader.close();

      return list.toArray(new InputStream[list.size()]);
    } catch (DocumentException e) {
      throw new br.net.woodstock.rockframework.office.DocumentException(e);
    }
View Full Code Here

  @Override
  public String getText(final InputStream source) throws IOException {
    Assert.notNull(source, "source");

    PdfReader reader = new PdfReader(source);
    PdfReaderContentParser parser = new PdfReaderContentParser(reader);
    TextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
    int pageCount = reader.getNumberOfPages();
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    PrintWriter writer = new PrintWriter(outputStream);

    for (int i = 1; i <= pageCount; i++) {
      TextExtractionStrategy result = parser.processContent(i, strategy);
      String pageText = result.getResultantText();
      writer.println(pageText);
    }

    reader.close();
    writer.close();

    String text = new String(outputStream.toByteArray());
    return text;
  }
View Full Code Here

          document.add(img);
          document.close();

          // Second - read the memory based PDF file and read the image. Write
          // this into PNG file.
          PdfReader reader = new PdfReader(baos.toByteArray());
          for (int j = 0; j < reader.getXrefSize(); j++) {
            PdfObject pdfobj = reader.getPdfObject(j);
            if (pdfobj != null) {
              if (pdfobj.isStream()) {
                PdfStream stream = (PdfStream) pdfobj;
                PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
                if (pdfsubtype != null) {
                  if (pdfsubtype.toString().equals(PdfName.IMAGE.toString())) {
                    PdfImageObject image = new PdfImageObject((PRStream) stream);
                    BufferedImage bufferedImage = image.getBufferedImage();
                    if (bufferedImage != null) {
                      String pngName = IdUtils.getUniqueID() + ".png";
                      File pngFile = new File(tmpDirectory, pngName);
                      FileOutputStream out = new FileOutputStream(pngFile);
                      ImageIO.write(bufferedImage, "png", out);
                      out.close();
                      list.add(pngFile);
                    }
                  }
                }
              }
            }
          }
          reader.close();
        } catch (Exception exception) {
          // Ignore - the page may be corrupt.
        }
      }
    }
View Full Code Here

   * @param pdfFile File object with pdf file.
   * @return Text content of the pdf file.
   * @throws IOException
   */
  public static String getTextFromPdfFile(File pdfFile) throws IOException {
    PdfReader pdfReader = new PdfReader(pdfFile.getCanonicalPath());
    String content = "";
    for (int i = 0; i < pdfReader.getNumberOfPages(); i++) {
      String pageContent;
      try {
        pageContent = PdfTextExtractor.getTextFromPage(pdfReader, i + 1);
      }
      catch (Exception e) {
        // LogUtils.log(e);
        pageContent = null;
      }
      if (pageContent != null) {
        pageContent = pageContent.trim();
        if (pageContent.length() > 0) {
          if (i > 0) {
            content += "\r\n\r\n";
          }
          content += pageContent;
        }
      }
    }
    pdfReader.close();
    return content;
  }
View Full Code Here

                InputStream is = resolveAndOpenStream(uriStr);
                if (is != null) {
                    try {
                        URI uri = new URI(uriStr);
                        if (uri.getPath() != null && uri.getPath().toLowerCase().endsWith(".pdf")) {
                            PdfReader reader = _outputDevice.getReader(uri);
                            PDFAsImage image = new PDFAsImage(uri);
                            Rectangle rect = reader.getPageSizeWithRotation(1);
                            image.setInitialWidth(rect.getWidth() * _outputDevice.getDotsPerPoint());
                            image.setInitialHeight(rect.getHeight() * _outputDevice.getDotsPerPoint());
                            resource = new ImageResource(uriStr, image);
                        } else {
                            Image image = Image.getInstance(readStream(is));
View Full Code Here

        }
    }

    private void drawPDFAsImage(PDFAsImage image, int x, int y) {
        URI uri = image.getURI();
        PdfReader reader = null;

        try {
            reader = getReader(uri);
        } catch (IOException e) {
            throw new XRRuntimeException("Could not load " + uri + ": " + e.getMessage(), e);
View Full Code Here

        _currentPage.addTemplate(page, (float) mx[0], (float) mx[1], (float) mx[2], (float) mx[3], (float) mx[4], (float) mx[5]);
        _currentPage.saveState();
    }

    public PdfReader getReader(URI uri) throws IOException {
        PdfReader result = (PdfReader) _readerCache.get(uri);
        if (result == null) {
            result = new PdfReader(getSharedContext().getUserAgentCallback().getBinaryResource(uri.toString()));
            _readerCache.put(uri, result);
        }
        return result;
    }
View Full Code Here

   
    public ExtractorPDFContent() {
    }
   
    protected boolean innerExtract(CrawlURI curi){
        PdfReader documentReader;
        ArrayList<String> uris = new ArrayList<String>();
       
        try {
            documentReader = new PdfReader(curi.getRecorder().getContentReplayInputStream());

            for(int i=1; i<= documentReader.getNumberOfPages(); i++) { //Page numbers start at 1
                String pageParseText = extractPageText(documentReader,i);
                Matcher matcher = URLPattern.matcher(pageParseText);

                while(matcher.find()) {
                    String prospectiveURL = pageParseText.substring(matcher.start(),matcher.end()).trim();
View Full Code Here

    static int countPagesInPdf(String candidate) {

        // LOGGER.log(Level.INFO, System.getProperty("user.dir"));

        PdfReader reader = null;
        try {
            reader = new PdfReader(candidate);
        } catch (IOException e) {
            LOGGER.log(Level.SEVERE, "Exception: file not found - therefore I cannot count pages", e);
        }

        return reader.getNumberOfPages();
    }
View Full Code Here

TOP

Related Classes of com.itextpdf.text.pdf.PdfReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.