Examples of PDDocument


Examples of org.pdfbox.pdmodel.PDDocument

      assertEquals(originalAuthors, AuthorList.getAuthorList(b.getField(
          "author").toString()));

      // Next check from Document Information
      PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile());
      try {

        assertEquals(originalAuthors, AuthorList.getAuthorList(document
            .getDocumentInformation().getAuthor()));

        b = XMPUtil.getBibtexEntryFromDocumentInformation(document
            .getDocumentInformation());
        assertEquals(originalAuthors, AuthorList.getAuthorList(b
            .getField("author").toString()));

        // Now check from Dublin Core
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        if (metaRaw == null) {
          fail();
        }

        XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw
            .createInputStream()));
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
            XMPSchemaBibtex.class);

        List<XMPSchema> schemas = meta
            .getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");

        assertEquals(1, schemas.size());

        XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas
            .iterator().next();
        assertNotNull(dcSchema);

        assertEquals("David Patterson", dcSchema.getCreators().get(0));
        assertEquals("Arvind", dcSchema.getCreators().get(1));
        assertEquals("Krste Asanov\\'\\i{}c", dcSchema.getCreators()
            .get(2));

        b = XMPUtil.getBibtexEntryFromDublinCore(dcSchema);
        assertEquals(originalAuthors, AuthorList.getAuthorList(b
            .getField("author").toString()));
      } finally {
        document.close();
      }

    } finally {
      pdfFile.delete();
    }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

  //*-- extract PDF document's textual content
  String docText = null;
  try
  { PDFTextStripper stripper = new PDFTextStripper();
    docText = stripper.getText(new PDDocument(cosDoc));
  }
  catch (OutOfMemoryError exc)
  { closeCOSDocument(cosDoc);
    logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage());
  }
  catch (Exception e)
  { closeCOSDocument(cosDoc);
    logger.error("Cannot get text from PDF document " + ifile + " " + e.getMessage());
    return;
  }
  //*-- Extract the entire text and save in the contents
  if (docText != null)
  { docText = StringTools.filterChars(docText); doc.setContents(new StringBuffer(docText) ); }

  //*-- Extract PDF document's meta-data
  PDDocument pdDoc = null;
  try
  {
   logger.info("Extracting metadata from PDF file " + ifile);
   pdDoc = new PDDocument(cosDoc);
   PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
   String author   = StringTools.filterChars(docInfo.getAuthor());
   String title    = StringTools.filterChars(docInfo.getTitle());
   String keywords = StringTools.filterChars(docInfo.getKeywords());
   String summary  = StringTools.filterChars(docInfo.getSubject());
   if ((author != null) && (!author.equals("")))     { doc.setAuthor(author); }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

  public static List<BibtexEntry> readXMP(InputStream inputStream)
      throws IOException {

    List<BibtexEntry> result = new LinkedList<BibtexEntry>();

    PDDocument document = null;

    try {
      document = PDDocument.load(inputStream);
      if (document.isEncrypted()) {
        throw new EncryptionNotSupportedException(
            "Error: Cannot read metadata from encrypted document.");
      }

      XMPMetadata meta = getXMPMetadata(document);

      // If we did not find any XMP metadata, search for non XMP metadata
      if (meta != null) {

              List<XMPSchema> schemas = meta
                  .getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
       
              Iterator<XMPSchema> it = schemas.iterator();
              while (it.hasNext()) {
                XMPSchemaBibtex bib = (XMPSchemaBibtex) it.next();
       
                result.add(bib.getBibtexEntry());
              }
       
              // If we did not find anything have a look if a Dublin Core exists
              if (result.size() == 0) {
                schemas = meta
                    .getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
                it = schemas.iterator();
                while (it.hasNext()) {
                  XMPSchemaDublinCore dc = (XMPSchemaDublinCore) it.next();
       
                  BibtexEntry entry = getBibtexEntryFromDublinCore(dc);
       
                  if (entry != null)
                    result.add(entry);
                }
              }
      }
      if (result.size() == 0) {
        BibtexEntry entry = getBibtexEntryFromDocumentInformation(document
            .getDocumentInformation());

        if (entry != null)
          result.add(entry);
      }
    } finally {
      if (document != null)
        document.close();
    }
   
    // return null, if no metadata was found
    if (result.size()==0) return null;
    return result;
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

   *         found.
   * @throws IOException
   */
  public static XMPMetadata readRawXMP(InputStream inputStream)
      throws IOException {
    PDDocument document = null;

    try {
      document = PDDocument.load(inputStream);
      if (document.isEncrypted()) {
        throw new EncryptionNotSupportedException(
            "Error: Cannot read metadata from encrypted document.");
      }

      return getXMPMetadata(document);

    } finally {
      if (document != null)
        document.close();
    }
  }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

      boolean writePDFInfo) throws IOException, TransformerException {

    if (databasee != null)
      bibtexEntries = databasee.resolveForStrings(bibtexEntries, false);

    PDDocument document = null;

    try {
      document = PDDocument.load(file.getAbsoluteFile());
      if (document.isEncrypted()) {
        throw new EncryptionNotSupportedException(
            "Error: Cannot add metadata to encrypted document.");
      }

      if (writePDFInfo && bibtexEntries.size() == 1) {
        writeDocumentInformation(document, bibtexEntries
            .iterator().next(), null);
        writeDublinCore(document, bibtexEntries, null);
      }

      PDDocumentCatalog catalog = document.getDocumentCatalog();
      PDMetadata metaRaw = catalog.getMetadata();

      XMPMetadata meta;
      if (metaRaw != null) {
        meta = new XMPMetadata(XMLUtil.parse(metaRaw
            .createInputStream()));
      } else {
        meta = new XMPMetadata();
      }
      meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
          XMPSchemaBibtex.class);

      // Remove all current Bibtex-schemas
      List schemas = meta
          .getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
      Iterator it = schemas.iterator();
      while (it.hasNext()) {
        XMPSchemaBibtex bib = (XMPSchemaBibtex) it.next();
        bib.getElement().getParentNode().removeChild(bib.getElement());
      }

      it = bibtexEntries.iterator();
      while (it.hasNext()) {
        BibtexEntry e = (BibtexEntry) it.next();
        XMPSchemaBibtex bibtex = new XMPSchemaBibtex(meta);
        meta.addSchema(bibtex);
        bibtex.setBibtexEntry(e, null);
      }

      // Save to stream and then input that stream to the PDF
      ByteArrayOutputStream os = new ByteArrayOutputStream();
      meta.save(os);
      ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
      PDMetadata metadataStream = new PDMetadata(document, is, false);
      catalog.setMetadata(metadataStream);

      // Save
      try {
        document.save(file.getAbsolutePath());
      } catch (COSVisitorException e) {
        throw new TransformerException("Could not write XMP-metadata: "
            + e.getLocalizedMessage());
      }

    } finally {
      if (document != null) {
        document.close();
      }
    }
  }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

    public void testExportSingleSimplePageAsPDF() throws Exception
    {
        URL url = new URL("http://localhost:8080/xwiki/bin/export/Main/WebHome?format=pdf");
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        InputStream is = connection.getInputStream();
        PDDocument pdd = PDDocument.load(is);
        PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(pdd);
        pdd.close();
        is.close();

        assertTrue("Invalid content", text.contains("Welcome to your wiki"));
    }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

    bos.close();   
  }

  private String extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
    if (log.isDebug()) log.debug("readContent from pdf starts...");
    PDDocument document = null;
    BufferedInputStream bis = null;
    try {
      bis = new BufferedInputStream(leaf.getInputStream());     
      document = PDDocument.load(bis);
      if (document.isEncrypted()) {
        throw new DocumentAccessException("PDF is encrypted. Can not read content file=" + leaf.getName());
      }     
      if (log.isDebug()) log.debug("readContent PDDocument loaded");
      PDFTextStripper stripper = new PDFTextStripper();
      return stripper.getText(document);
    } finally {
      if (document != null) {
        document.close();
      }
      if (bis != null) {
        bis.close();
      }
    }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

     *
     * @throws IOException If there is an error writing the data.
     */
    public PDDocument createPDFFromText( Reader text ) throws IOException
    {
        PDDocument doc = null;
        try
        {
           
            int margin = 40;
            float height = font.getFontDescriptor().getFontBoundingBox().getHeight()/1000;
           
            //calculate font height and increase by 5 percent.
            height = height*fontSize*1.05f;
            doc = new PDDocument();
            BufferedReader data = new BufferedReader( text );
            String nextLine = null;
            PDPage page = new PDPage();
            PDPageContentStream contentStream = null;
            float y = -1;
            float maxStringLength = page.getMediaBox().getWidth() - 2*margin;
            while( (nextLine = data.readLine()) != null )
            {
               
                String[] lineWords = nextLine.trim().split( " " );
                int lineIndex = 0;
                while( lineIndex < lineWords.length )
                {  
                    StringBuffer nextLineToDraw = new StringBuffer();
                    float lengthIfUsingNextWord = 0;
                    do
                    {
                        nextLineToDraw.append( lineWords[lineIndex] );
                        nextLineToDraw.append( " " );
                        lineIndex++;
                        if( lineIndex < lineWords.length )
                        {
                            String lineWithNextWord = nextLineToDraw.toString() + lineWords[lineIndex];
                            lengthIfUsingNextWord =
                                (font.getStringWidth( lineWithNextWord )/1000) * fontSize;
                        }
                    }
                    while( lineIndex < lineWords.length &&
                           lengthIfUsingNextWord < maxStringLength );
                    if( y < margin )
                    {
                        page = new PDPage();
                        doc.addPage( page );
                        if( contentStream != null )
                        {
                            contentStream.endText();
                            contentStream.close();
                        }
                        contentStream = new PDPageContentStream(doc, page);
                        contentStream.setFont( font, fontSize );
                        contentStream.beginText();
                        y = page.getMediaBox().getHeight() - margin + height;
                        contentStream.moveTextPositionByAmount(
                            margin, y );
                       
                    }
                    //System.out.println( "Drawing string at " + x + "," + y );
                   
                    if( contentStream == null )
                    {
                        throw new IOException( "Error:Expected non-null content stream." );
                    }
                    contentStream.moveTextPositionByAmount( 0, -height);
                    y -= height;
                    contentStream.drawString( nextLineToDraw.toString() );
                }
               
               
            } 
            if( contentStream != null )
            {
                contentStream.endText();
                contentStream.close();
            }
        }
        catch( IOException io )
        {
            if( doc != null )
            {
                doc.close();
            }
            throw io;
        }
        return doc;
    }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

     * @throws IOException If there is an error with the PDF.
     */
    public static void main(String[] args) throws IOException
    {
        TextToPDF app = new TextToPDF();
        PDDocument doc = null;
        try
        {
            if( args.length < 2 )
            {
                app.usage();
            }
            else
            {
                for( int i=0; i<args.length-2; i++ )
                {
                    if( args[i].equals( "-standardFont" ))
                    {
                        i++;
                        app.setFont( PDType1Font.getStandardFont( args[i] ));
                    }
                    else if( args[i].equals( "-ttf" ))
                    {
                        i++;
                        PDTrueTypeFont font = PDTrueTypeFont.loadTTF( doc, new File( args[i]));
                        app.setFont( font );
                    }
                    else if( args[i].equals( "-fontSize" ))
                    {
                        i++;
                        app.setFontSize( Integer.parseInt( args[i] ) );
                    }
                    else
                    {
                        throw new IOException( "Unknown argument:" + args[i] );
                    }
                }
                doc = app.createPDFFromText( new FileReader( args[args.length-1] ) );
                doc.save( args[args.length-2] );
            }
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
        finally
        {
            if( doc != null )
            {
                doc.close();
            }
        }
    }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocument

        {
            usage();
        }
        else
        {
            PDDocument document = null;
            try
            {
                document = PDDocument.load( args[0] );
                if( document.isEncrypted() )
                {
                    try
                    {
                        document.decrypt( "" );
                    }
                    catch( InvalidPasswordException e )
                    {
                        System.err.println( "Error: Document is encrypted with a password." );
                        System.exit( 1 );
                    }
                }
                PDFTextStripperByArea stripper = new PDFTextStripperByArea();
                stripper.setSortByPosition( true );
                Rectangle rect = new Rectangle( 10, 280, 275, 60 );
                stripper.addRegion( "class1", rect );
                List allPages = document.getDocumentCatalog().getAllPages();
                PDPage firstPage = (PDPage)allPages.get( 0 );
                stripper.extractRegions( firstPage );
                System.out.println( "Text in the area:" + rect );
                System.out.println( stripper.getTextForRegion( "class1" ) );
               
            }
            finally
            {
                if( document != null )
                {
                    document.close();
                }
            }
        }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.