* @param record a JDOM Element containing the actual PMH record with descriptive metadata.
* @param OREPrefix the metadataprefix value used by the remote PMH server to disseminate ORE. Only used for collections set up to harvest content.
private void processRecord(Element record, String OREPrefix) throws SQLException, AuthorizeException, IOException, CrosswalkException, HarvestingException, ParserConfigurationException, SAXException, TransformerException
WorkspaceItem wi = null;
Date timeStart = new Date();
// grab the oai identifier
String itemOaiID = record.getChild("header", OAI_NS).getChild("identifier", OAI_NS).getText();
Element header = record.getChild("header",OAI_NS);
// look up the item corresponding to the OAI identifier
Item item = HarvestedItem.getItemByOAIId(ourContext, itemOaiID, targetCollection.getID());
// Make sure the item hasn't been deleted in the mean time
if (header.getAttribute("status") != null && header.getAttribute("status").getValue().equals("deleted")) {
log.info("Item " + itemOaiID + " has been marked as deleted on the OAI server.");
if (item != null)
// If we are only harvesting descriptive metadata, the record should already contain all we need
List<Element> descMD = record.getChild("metadata", OAI_NS).getChildren();
IngestionCrosswalk MDxwalk = (IngestionCrosswalk)PluginManager.getNamedPlugin(IngestionCrosswalk.class, this.metadataKey);
// Otherwise, obtain the ORE ReM and initiate the ORE crosswalk
IngestionCrosswalk ORExwalk = null;
Element oreREM = null;
if (harvestRow.getHarvestType() > 1) {
oreREM = getMDrecord(harvestRow.getOaiSource(), itemOaiID, OREPrefix).get(0);
ORExwalk = (IngestionCrosswalk)PluginManager.getNamedPlugin(IngestionCrosswalk.class, this.ORESerialKey);
// Ignore authorization
HarvestedItem hi;
if (item != null) // found an item so we modify
log.debug("Item " + item.getHandle() + " was found locally. Using it to harvest " + itemOaiID + ".");
// FIXME: check for null pointer if for some odd reason we don't have a matching hi
hi = HarvestedItem.find(ourContext, item.getID());
// Compare last-harvest on the item versus the last time the item was updated on the OAI provider side
// If ours is more recent, forgo this item, since it's probably a left-over from a previous harvesting attempt
Date OAIDatestamp = Utils.parseISO8601Date(header.getChildText("datestamp", OAI_NS));
Date itemLastHarvest = hi.getHarvestDate();
if (itemLastHarvest != null && OAIDatestamp.before(itemLastHarvest)) {
log.info("Item " + item.getHandle() + " was harvested more recently than the last update time reporetd by the OAI server; skipping.");
// Otherwise, clear and re-import the metadata and bitstreams
item.clearMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
if (descMD.size() == 1)
MDxwalk.ingest(ourContext, item, descMD.get(0));
MDxwalk.ingest(ourContext, item, descMD);
// Import the actual bitstreams
if (harvestRow.getHarvestType() == 3) {
log.info("Running ORE ingest on: " + item.getHandle());
Bundle[] allBundles = item.getBundles();
for (Bundle bundle : allBundles) {
ORExwalk.ingest(ourContext, item, oreREM);
// NOTE: did not find, so we create (presumably, there will never be a case where an item already
// exists in a harvest collection but does not have an OAI_id)
wi = WorkspaceItem.create(ourContext, targetCollection, false);
item = wi.getItem();
hi = HarvestedItem.create(ourContext, item.getID(), itemOaiID);
if (descMD.size() == 1)
MDxwalk.ingest(ourContext, item, descMD.get(0));
MDxwalk.ingest(ourContext, item, descMD);
if (harvestRow.getHarvestType() == 3) {
ORExwalk.ingest(ourContext, item, oreREM);
// see if we can do something about the wonky metadata
// see if a handle can be extracted for the item
String handle = extractHandle(item);
if (handle != null)
DSpaceObject dso = HandleManager.resolveToObject(ourContext, handle);
if (dso != null)
throw new HarvestingException("Handle collision: attempted to re-assign handle '" + handle + "' to an incoming harvested item '" + hi.getOaiID() + "'.");
try {
item = InstallItem.installItem(ourContext, wi, handle);
//item = InstallItem.installItem(ourContext, wi);
// clean up the workspace item if something goes wrong before
catch(SQLException se) {
throw se;
catch(IOException ioe) {
throw ioe;
catch(AuthorizeException ae) {
throw ae;
// Now create the special ORE bundle and drop the ORE document in it