* @param record a JDOM Element containing the actual PMH record with descriptive metadata.
* @param OREPrefix the metadataprefix value used by the remote PMH server to disseminate ORE. Only used for collections set up to harvest content.
*/
private void processRecord(Element record, String OREPrefix) throws SQLException, AuthorizeException, IOException, CrosswalkException, HarvestingException, ParserConfigurationException, SAXException, TransformerException
{
WorkspaceItem wi = null;
Date timeStart = new Date();
// grab the oai identifier
String itemOaiID = record.getChild("header", OAI_NS).getChild("identifier", OAI_NS).getText();
Element header = record.getChild("header",OAI_NS);
// look up the item corresponding to the OAI identifier
Item item = HarvestedItem.getItemByOAIId(ourContext, itemOaiID, targetCollection.getID());
// Make sure the item hasn't been deleted in the mean time
if (header.getAttribute("status") != null && header.getAttribute("status").getValue().equals("deleted")) {
log.info("Item " + itemOaiID + " has been marked as deleted on the OAI server.");
if (item != null)
{
targetCollection.removeItem(item);
}
ourContext.restoreAuthSystemState();
return;
}
// If we are only harvesting descriptive metadata, the record should already contain all we need
List<Element> descMD = record.getChild("metadata", OAI_NS).getChildren();
IngestionCrosswalk MDxwalk = (IngestionCrosswalk)PluginManager.getNamedPlugin(IngestionCrosswalk.class, this.metadataKey);
// Otherwise, obtain the ORE ReM and initiate the ORE crosswalk
IngestionCrosswalk ORExwalk = null;
Element oreREM = null;
if (harvestRow.getHarvestType() > 1) {
oreREM = getMDrecord(harvestRow.getOaiSource(), itemOaiID, OREPrefix).get(0);
ORExwalk = (IngestionCrosswalk)PluginManager.getNamedPlugin(IngestionCrosswalk.class, this.ORESerialKey);
}
// Ignore authorization
ourContext.turnOffAuthorisationSystem();
HarvestedItem hi;
if (item != null) // found an item so we modify
{
log.debug("Item " + item.getHandle() + " was found locally. Using it to harvest " + itemOaiID + ".");
// FIXME: check for null pointer if for some odd reason we don't have a matching hi
hi = HarvestedItem.find(ourContext, item.getID());
// Compare last-harvest on the item versus the last time the item was updated on the OAI provider side
// If ours is more recent, forgo this item, since it's probably a left-over from a previous harvesting attempt
Date OAIDatestamp = Utils.parseISO8601Date(header.getChildText("datestamp", OAI_NS));
Date itemLastHarvest = hi.getHarvestDate();
if (itemLastHarvest != null && OAIDatestamp.before(itemLastHarvest)) {
log.info("Item " + item.getHandle() + " was harvested more recently than the last update time reporetd by the OAI server; skipping.");
return;
}
// Otherwise, clear and re-import the metadata and bitstreams
item.clearMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
if (descMD.size() == 1)
{
MDxwalk.ingest(ourContext, item, descMD.get(0));
}
else
{
MDxwalk.ingest(ourContext, item, descMD);
}
// Import the actual bitstreams
if (harvestRow.getHarvestType() == 3) {
log.info("Running ORE ingest on: " + item.getHandle());
Bundle[] allBundles = item.getBundles();
for (Bundle bundle : allBundles) {
item.removeBundle(bundle);
}
ORExwalk.ingest(ourContext, item, oreREM);
}
scrubMetadata(item);
}
else
// NOTE: did not find, so we create (presumably, there will never be a case where an item already
// exists in a harvest collection but does not have an OAI_id)
{
wi = WorkspaceItem.create(ourContext, targetCollection, false);
item = wi.getItem();
hi = HarvestedItem.create(ourContext, item.getID(), itemOaiID);
//item.setOaiID(itemOaiID);
if (descMD.size() == 1)
{
MDxwalk.ingest(ourContext, item, descMD.get(0));
}
else
{
MDxwalk.ingest(ourContext, item, descMD);
}
if (harvestRow.getHarvestType() == 3) {
ORExwalk.ingest(ourContext, item, oreREM);
}
// see if we can do something about the wonky metadata
scrubMetadata(item);
// see if a handle can be extracted for the item
String handle = extractHandle(item);
if (handle != null)
{
DSpaceObject dso = HandleManager.resolveToObject(ourContext, handle);
if (dso != null)
{
throw new HarvestingException("Handle collision: attempted to re-assign handle '" + handle + "' to an incoming harvested item '" + hi.getOaiID() + "'.");
}
}
try {
item = InstallItem.installItem(ourContext, wi, handle);
//item = InstallItem.installItem(ourContext, wi);
}
// clean up the workspace item if something goes wrong before
catch(SQLException se) {
wi.deleteWrapper();
throw se;
}
catch(IOException ioe) {
wi.deleteWrapper();
throw ioe;
}
catch(AuthorizeException ae) {
wi.deleteWrapper();
throw ae;
}
}
// Now create the special ORE bundle and drop the ORE document in it