// grab the oai identifier
String itemOaiID = record.getChild("header", OAI_NS).getChild("identifier", OAI_NS).getText();
Element header = record.getChild("header",OAI_NS);
// look up the item corresponsing to the OAI identifier
Item item = HarvestedItem.getItemByOAIId(ourContext, itemOaiID, targetCollection.getID());
// Make sure the item hasn't been deleted in the mean time
if (header.getAttribute("status") != null && header.getAttribute("status").getValue().equals("deleted")) {
log.info("Item " + itemOaiID + " has been marked as deleted on the OAI server.");
if (item != null)
targetCollection.removeItem(item);
ourContext.restoreAuthSystemState();
return;
}
// If we are only harvesting descriptive metadata, the record should already contain all we need
List<Element> descMD = record.getChild("metadata", OAI_NS).getChildren();
IngestionCrosswalk MDxwalk = (IngestionCrosswalk)PluginManager.getNamedPlugin(IngestionCrosswalk.class, this.metadataKey);
// Otherwise, obtain the ORE ReM and initiate the ORE crosswalk
IngestionCrosswalk ORExwalk = null;
Element oreREM = null;
if (harvestRow.getHarvestType() > 1) {
oreREM = getMDrecord(harvestRow.getOaiSource(), itemOaiID, OREPrefix).get(0);
ORExwalk = (IngestionCrosswalk)PluginManager.getNamedPlugin(IngestionCrosswalk.class, this.ORESeialKey);
}
// Ignore authorization
ourContext.turnOffAuthorisationSystem();
HarvestedItem hi;
if (item != null) // found an item so we modify
{
log.debug("Item " + item.getHandle() + " was found locally. Using it to harvest " + itemOaiID + ".");
// FIXME: check for null pointer if for some odd reason we don't have a matching hi
hi = HarvestedItem.find(ourContext, item.getID());
// Comprate last-harvest on the item versus the last time the item was updated on the OAI provider side
// If ours is more recent, forgo this item, since it's probably a left-over from a previous harvesting attempt
Date OAIDatestamp = Utils.parseISO8601Date(header.getChildText("datestamp", OAI_NS));
Date itemLastHarvest = hi.getHarvestDate();
if (itemLastHarvest != null && OAIDatestamp.before(itemLastHarvest)) {
log.info("Item " + item.getHandle() + " was harvested more recently than the last update time reporetd by the OAI server; skipping.");
return;
}
// Otherwise, clear and re-import the metadata and bitstreams
item.clearMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
if (descMD.size() == 1)
MDxwalk.ingest(ourContext, item, descMD.get(0));
else
MDxwalk.ingest(ourContext, item, descMD);
// Import the actual bitstreams
if (harvestRow.getHarvestType() == 3) {
log.info("Running ORE ingest on: " + item.getHandle());
Bundle[] allBundles = item.getBundles();
for (Bundle bundle : allBundles) {
item.removeBundle(bundle);
}
ORExwalk.ingest(ourContext, item, oreREM);
}
scrubMetadata(item);
}
else
// NOTE: did not find, so we create (presumably, there will never be a case where an item already
// exists in a harvest collection but does not have an OAI_id)
{
wi = WorkspaceItem.create(ourContext, targetCollection, false);
item = wi.getItem();
hi = HarvestedItem.create(ourContext, item.getID(), itemOaiID);
//item.setOaiID(itemOaiID);
if (descMD.size() == 1)
MDxwalk.ingest(ourContext, item, descMD.get(0));
else
MDxwalk.ingest(ourContext, item, descMD);
if (harvestRow.getHarvestType() == 3) {
ORExwalk.ingest(ourContext, item, oreREM);
}
// see if we can do something about the wonky metadata
scrubMetadata(item);
// see if a handle can be exracted for the item
String handle = extractHandle(item);
if (handle != null)
{
DSpaceObject dso = HandleManager.resolveToObject(ourContext, handle);
if (dso != null)
throw new HarvestingException("Handle collision: attempted to re-assign handle '" + handle + "' to an incoming harvested item '" + hi.getOaiID() + "'.");
}
try {
item = InstallItem.installItem(ourContext, wi, handle);
//item = InstallItem.installItem(ourContext, wi);
}
// clean up the workspace item if something goes wrong before
catch(SQLException se) {
wi.deleteWrapper();
throw se;
}
catch(IOException ioe) {
wi.deleteWrapper();
throw ioe;
}
catch(AuthorizeException ae) {
wi.deleteWrapper();
throw ae;
}
}
// Now create the special ORE bundle and drop the ORE document in it
if (harvestRow.getHarvestType() == 2 || harvestRow.getHarvestType() == 3)
{
Bundle OREBundle = item.createBundle("ORE");
XMLOutputter outputter = new XMLOutputter();
String OREString = outputter.outputString(oreREM);
ByteArrayInputStream OREStream = new ByteArrayInputStream(OREString.getBytes());
Bitstream OREBitstream = OREBundle.createBitstream(OREStream);
OREBitstream.setName("ORE.xml");
BitstreamFormat bf = FormatIdentifier.guessFormat(ourContext, OREBitstream);
OREBitstream.setFormat(bf);
OREBitstream.update();
OREBundle.addBitstream(OREBitstream);
OREBundle.update();
}
//item.setHarvestDate(new Date());
hi.setHarvestDate(new Date());
// Add provenance that this item was harvested via OAI
String provenanceMsg = "Item created via OAI harvest from source: "
+ this.harvestRow.getOaiSource() + " on " + new DCDate(hi.getHarvestDate())
+ " (GMT). Item's OAI Record identifier: " + hi.getOaiID();
item.addMetadata("dc", "description", "provenance", "en", provenanceMsg);
item.update();
hi.update();
long timeTaken = new Date().getTime() - timeStart.getTime();
log.info("Item " + item.getHandle() + "(" + item.getID() + ")" + " has been ingested. The whole process took: " + timeTaken + " ms. ");
// Un-ignore authorization
ourContext.restoreAuthSystemState();
}