}
private void crosswalkPDF(Context context, Item item, InputStream metadata)
throws CrosswalkException, IOException, SQLException, AuthorizeException
{
COSDocument cos = null;
try
{
PDFParser parser = new PDFParser(metadata);
parser.parse();
cos = parser.getDocument();
// sanity check: PDFBox breaks on encrypted documents, so give up.
if(cos.getEncryptionDictionary() != null)
throw new MetadataValidationException("This packager cannot accept an encrypted PDF document.");
/* PDF to DC "crosswalk":
*
* NOTE: This is not in a crosswalk plugin because (a) it isn't
* useful anywhere else, and more importantly, (b) the source
* data is not XML so it doesn't fit the plugin's interface.
*
* pattern of crosswalk -- PDF dict entries to DC:
* Title -> title.null
* Author -> contributor.author
* CreationDate -> date.created
* ModDate -> date.created
* Creator -> description.provenance (application that created orig)
* Producer -> description.provenance (convertor to pdf)
* Subject -> description.abstract
* Keywords -> subject.other
* date is java.util.Calendar
*/
PDDocument pd = new PDDocument(cos);
PDDocumentInformation docinfo = pd.getDocumentInformation();
String title = docinfo.getTitle();
// sanity check: item must have a title.
if (title == null)
throw new MetadataValidationException("This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary.");
log.debug("PDF Info dict title=\""+title+"\"");
item.addDC("title", null, "en", title);
String value;
Calendar date;
if ((value = docinfo.getAuthor()) != null)
{
item.addDC("contributor", "author", null, value);
log.debug("PDF Info dict author=\""+value+"\"");
}
if ((value = docinfo.getCreator()) != null)
item.addDC("description", "provenance", "en",
"Application that created the original document: "+value);
if ((value = docinfo.getProducer()) != null)
item.addDC("description", "provenance", "en",
"Original document converted to PDF by: "+value);
if ((value = docinfo.getSubject()) != null)
item.addDC("description", "abstract", null, value);
if ((value = docinfo.getKeywords()) != null)
item.addDC("subject", "other", null, value);
// Take either CreationDate or ModDate as "date.created",
// Too bad there's no place to put "last modified" in the DC.
Calendar calValue;
if ((calValue = docinfo.getCreationDate()) == null)
calValue = docinfo.getModificationDate();
if (calValue != null)
item.addDC("date", "created", null,
(new DCDate(calValue.getTime())).toString());
item.update();
}
finally
{
if (cos != null)
cos.close();
}
}