Examples of PDDocument

com.dotcms.repackage.org.apache.pdfbox.pdmodel.PDDocument
de.intarsys.pdf.pd.PDDocument
org.apache.pdfbox.pdmodel.PDDocument
This is the in-memory representation of the PDF document. You need to call close() on this object when you are done using it!! @author Ben Litchfield @version $Revision: 1.47 $
org.pdfbox.pdmodel.PDDocument
This is the in-memory representation of the PDF document. You need to call close() on this object when you are done using it!! @author Ben Litchfield @version $Revision: 1.43 $

Examples of org.pdfbox.pdmodel.PDDocument


      assertEquals(originalAuthors, AuthorList.getAuthorList(b.getField(
          "author").toString()));


      // Next check from Document Information
      PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile());
      try {


        assertEquals(originalAuthors, AuthorList.getAuthorList(document
            .getDocumentInformation().getAuthor()));


        b = XMPUtil.getBibtexEntryFromDocumentInformation(document
            .getDocumentInformation());
        assertEquals(originalAuthors, AuthorList.getAuthorList(b
            .getField("author").toString()));


        // Now check from Dublin Core
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();


        if (metaRaw == null) {
          fail();
        }


        XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw
            .createInputStream()));
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
            XMPSchemaBibtex.class);


        List<XMPSchema> schemas = meta
            .getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");


        assertEquals(1, schemas.size());


        XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas
            .iterator().next();
        assertNotNull(dcSchema);


        assertEquals("David Patterson", dcSchema.getCreators().get(0));
        assertEquals("Arvind", dcSchema.getCreators().get(1));
        assertEquals("Krste Asanov\\'\\i{}c", dcSchema.getCreators()
            .get(2));


        b = XMPUtil.getBibtexEntryFromDublinCore(dcSchema);
        assertEquals(originalAuthors, AuthorList.getAuthorList(b
            .getField("author").toString()));
      } finally {
        document.close();
      }


    } finally {
      pdfFile.delete();
    }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument


  //*-- extract PDF document's textual content
  String docText = null;
  try 
  { PDFTextStripper stripper = new PDFTextStripper();
    docText = stripper.getText(new PDDocument(cosDoc));
  }
  catch (OutOfMemoryError exc) 
  { closeCOSDocument(cosDoc);
    logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage()); 
  }
  catch (Exception e) 
  { closeCOSDocument(cosDoc);
    logger.error("Cannot get text from PDF document " + ifile + " " + e.getMessage());
    return;
  }
  //*-- Extract the entire text and save in the contents 
  if (docText != null) 
  { docText = StringTools.filterChars(docText); doc.setContents(new StringBuffer(docText) ); }


  //*-- Extract PDF document's meta-data
  PDDocument pdDoc = null;
  try 
  {
   logger.info("Extracting metadata from PDF file " + ifile);
   pdDoc = new PDDocument(cosDoc);
   PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
   String author   = StringTools.filterChars(docInfo.getAuthor());
   String title    = StringTools.filterChars(docInfo.getTitle());
   String keywords = StringTools.filterChars(docInfo.getKeywords());
   String summary  = StringTools.filterChars(docInfo.getSubject());
   if ((author != null) && (!author.equals("")))     { doc.setAuthor(author); }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

  public static List<BibtexEntry> readXMP(InputStream inputStream)
      throws IOException {


    List<BibtexEntry> result = new LinkedList<BibtexEntry>();


    PDDocument document = null;


    try {
      document = PDDocument.load(inputStream);
      if (document.isEncrypted()) {
        throw new EncryptionNotSupportedException(
            "Error: Cannot read metadata from encrypted document.");
      }


      XMPMetadata meta = getXMPMetadata(document);


      // If we did not find any XMP metadata, search for non XMP metadata
      if (meta != null) {


              List<XMPSchema> schemas = meta
                  .getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
        
              Iterator<XMPSchema> it = schemas.iterator();
              while (it.hasNext()) {
                XMPSchemaBibtex bib = (XMPSchemaBibtex) it.next();
        
                result.add(bib.getBibtexEntry());
              }
        
              // If we did not find anything have a look if a Dublin Core exists
              if (result.size() == 0) {
                schemas = meta
                    .getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
                it = schemas.iterator();
                while (it.hasNext()) {
                  XMPSchemaDublinCore dc = (XMPSchemaDublinCore) it.next();
        
                  BibtexEntry entry = getBibtexEntryFromDublinCore(dc);
        
                  if (entry != null)
                    result.add(entry);
                }
              }
      }
      if (result.size() == 0) {
        BibtexEntry entry = getBibtexEntryFromDocumentInformation(document
            .getDocumentInformation());


        if (entry != null)
          result.add(entry);
      }
    } finally {
      if (document != null)
        document.close();
    }
    
    // return null, if no metadata was found
    if (result.size()==0) return null;
    return result;

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

   *         found.
   * @throws IOException
   */
  public static XMPMetadata readRawXMP(InputStream inputStream)
      throws IOException {
    PDDocument document = null;


    try {
      document = PDDocument.load(inputStream);
      if (document.isEncrypted()) {
        throw new EncryptionNotSupportedException(
            "Error: Cannot read metadata from encrypted document.");
      }


      return getXMPMetadata(document);


    } finally {
      if (document != null)
        document.close();
    }
  }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

      boolean writePDFInfo) throws IOException, TransformerException {


    if (databasee != null)
      bibtexEntries = databasee.resolveForStrings(bibtexEntries, false);


    PDDocument document = null;


    try {
      document = PDDocument.load(file.getAbsoluteFile());
      if (document.isEncrypted()) {
        throw new EncryptionNotSupportedException(
            "Error: Cannot add metadata to encrypted document.");
      }


      if (writePDFInfo && bibtexEntries.size() == 1) {
        writeDocumentInformation(document, bibtexEntries
            .iterator().next(), null);
        writeDublinCore(document, bibtexEntries, null);
      }


      PDDocumentCatalog catalog = document.getDocumentCatalog();
      PDMetadata metaRaw = catalog.getMetadata();


      XMPMetadata meta;
      if (metaRaw != null) {
        meta = new XMPMetadata(XMLUtil.parse(metaRaw
            .createInputStream()));
      } else {
        meta = new XMPMetadata();
      }
      meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
          XMPSchemaBibtex.class);


      // Remove all current Bibtex-schemas
      List schemas = meta
          .getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
      Iterator it = schemas.iterator();
      while (it.hasNext()) {
        XMPSchemaBibtex bib = (XMPSchemaBibtex) it.next();
        bib.getElement().getParentNode().removeChild(bib.getElement());
      }


      it = bibtexEntries.iterator();
      while (it.hasNext()) {
        BibtexEntry e = (BibtexEntry) it.next();
        XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta);
        meta.addSchema(bibtex);
        bibtex.setBibtexEntry(e, null);
      }


      // Save to stream and then input that stream to the PDF
      ByteArrayOutputStream os = new ByteArrayOutputStream();
      meta.save(os);
      ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
      PDMetadata metadataStream = new PDMetadata(document, is, false);
      catalog.setMetadata(metadataStream);


      // Save
      try {
        document.save(file.getAbsolutePath());
      } catch (COSVisitorException e) {
        throw new TransformerException("Could not write XMP-metadata: "
            + e.getLocalizedMessage());
      }


    } finally {
      if (document != null) {
        document.close();
      }
    }
  }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

    public void testExportSingleSimplePageAsPDF() throws Exception
    {
        URL url = new URL("http://localhost:8080/xwiki/bin/export/Main/WebHome?format=pdf");
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        InputStream is = connection.getInputStream();
        PDDocument pdd = PDDocument.load(is);
        PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(pdd);
        pdd.close();
        is.close();


        assertTrue("Invalid content", text.contains("Welcome to your wiki"));
    }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

    bos.close();    
  }


  private String extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
    if (log.isDebug()) log.debug("readContent from pdf starts...");
    PDDocument document = null;
    BufferedInputStream bis = null;
    try {
      bis = new BufferedInputStream(leaf.getInputStream());      
      document = PDDocument.load(bis);
      if (document.isEncrypted()) {
        throw new DocumentAccessException("PDF is encrypted. Can not read content file=" + leaf.getName());
      }      
      if (log.isDebug()) log.debug("readContent PDDocument loaded");
      PDFTextStripper stripper = new PDFTextStripper();
      return stripper.getText(document);
    } finally {
      if (document != null) {
        document.close();
      }
      if (bis != null) {
        bis.close();
      }
    }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

     *
     * @throws IOException If there is an error writing the data.
     */
    public PDDocument createPDFFromText( Reader text ) throws IOException
    {
        PDDocument doc = null;
        try
        {
            
            int margin = 40;
            float height = font.getFontDescriptor().getFontBoundingBox().getHeight()/1000;
            
            //calculate font height and increase by 5 percent.
            height = height*fontSize*1.05f;
            doc = new PDDocument();
            BufferedReader data = new BufferedReader( text );
            String nextLine = null;
            PDPage page = new PDPage();
            PDPageContentStream contentStream = null;
            float y = -1;
            float maxStringLength = page.getMediaBox().getWidth() - 2*margin;
            while( (nextLine = data.readLine()) != null )
            {
                
                String[] lineWords = nextLine.trim().split( " " );
                int lineIndex = 0;
                while( lineIndex < lineWords.length )
                {   
                    StringBuffer nextLineToDraw = new StringBuffer();
                    float lengthIfUsingNextWord = 0;
                    do
                    {
                        nextLineToDraw.append( lineWords[lineIndex] );
                        nextLineToDraw.append( " " );
                        lineIndex++;
                        if( lineIndex < lineWords.length )
                        {
                            String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
                            lengthIfUsingNextWord = 
                                (font.getStringWidth( lineWithNextWord )/1000) * fontSize;
                        }
                    }
                    while( lineIndex < lineWords.length && 
                           lengthIfUsingNextWord < maxStringLength );
                    if( y < margin )
                    {
                        page = new PDPage();
                        doc.addPage( page );
                        if( contentStream != null )
                        {
                            contentStream.endText();
                            contentStream.close();
                        }
                        contentStream = new PDPageContentStream(doc, page);
                        contentStream.setFont( font, fontSize );
                        contentStream.beginText();
                        y = page.getMediaBox().getHeight() - margin + height;
                        contentStream.moveTextPositionByAmount( 
                            margin, y );
                        
                    }
                    //System.out.println( "Drawing string at " + x + "," + y );
                    
                    if( contentStream == null )
                    {
                        throw new IOException( "Error:Expected non-null content stream." );
                    }
                    contentStream.moveTextPositionByAmount( 0, -height);
                    y -= height;
                    contentStream.drawString( nextLineToDraw.toString() );
                }
                
                
            }  
            if( contentStream != null )
            {
                contentStream.endText();
                contentStream.close();
            }
        }
        catch( IOException io )
        {
            if( doc != null )
            {
                doc.close();
            }
            throw io;
        }
        return doc;
    }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

     * @throws IOException If there is an error with the PDF.
     */
    public static void main(String[] args) throws IOException
    {
        TextToPDF app = new TextToPDF();
        PDDocument doc = null;
        try
        {
            if( args.length < 2 )
            {
                app.usage();
            }
            else
            {
                for( int i=0; i<args.length-2; i++ )
                {
                    if( args[i].equals( "-standardFont" ))
                    {
                        i++;
                        app.setFont( PDType1Font.getStandardFont( args[i] ));
                    }
                    else if( args[i].equals( "-ttf" ))
                    {
                        i++;
                        PDTrueTypeFont font = PDTrueTypeFont.loadTTF( doc, new File( args[i]));
                        app.setFont( font );
                    }
                    else if( args[i].equals( "-fontSize" ))
                    {
                        i++;
                        app.setFontSize( Integer.parseInt( args[i] ) );
                    }
                    else
                    {
                        throw new IOException( "Unknown argument:" + args[i] );
                    }
                }
                doc = app.createPDFFromText( new FileReader( args[args.length-1] ) );
                doc.save( args[args.length-2] );
            }
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
        finally
        {
            if( doc != null )
            {
                doc.close();
            }
        }
    }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

        {
            usage();
        }
        else
        {
            PDDocument document = null;
            try
            {
                document = PDDocument.load( args[0] );
                if( document.isEncrypted() )
                {
                    try
                    {
                        document.decrypt( "" );
                    }
                    catch( InvalidPasswordException e )
                    {
                        System.err.println( "Error: Document is encrypted with a password." );
                        System.exit( 1 );
                    }
                }
                PDFTextStripperByArea stripper = new PDFTextStripperByArea();
                stripper.setSortByPosition( true );
                Rectangle rect = new Rectangle( 10, 280, 275, 60 );
                stripper.addRegion( "class1", rect );
                List allPages = document.getDocumentCatalog().getAllPages();
                PDPage firstPage = (PDPage)allPages.get( 0 );
                stripper.extractRegions( firstPage );
                System.out.println( "Text in the area:" + rect );
                System.out.println( stripper.getTextForRegion( "class1" ) );
                
            }
            finally
            {
                if( document != null )
                {
                    document.close();
                }
            }
        }
    }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.