cos = parser.getDocument();
// sanity check: PDFBox breaks on encrypted documents, so give up.
if(cos.getEncryptionDictionary() != null)
{
throw new MetadataValidationException("This packager cannot accept an encrypted PDF document.");
}
/* PDF to DC "crosswalk":
*
* NOTE: This is not in a crosswalk plugin because (a) it isn't
* useful anywhere else, and more importantly, (b) the source
* data is not XML so it doesn't fit the plugin's interface.
*
* pattern of crosswalk -- PDF dict entries to DC:
* Title -> title.null
* Author -> contributor.author
* CreationDate -> date.created
* ModDate -> date.created
* Creator -> description.provenance (application that created orig)
* Producer -> description.provenance (convertor to pdf)
* Subject -> description.abstract
* Keywords -> subject.other
* date is java.util.Calendar
*/
PDDocument pd = new PDDocument(cos);
PDDocumentInformation docinfo = pd.getDocumentInformation();
String title = docinfo.getTitle();
// sanity check: item must have a title.
if (title == null)
{
throw new MetadataValidationException("This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary.");
}
if (log.isDebugEnabled())
{
log.debug("PDF Info dict title=\"" + title + "\"");
}