descMDPrefix = OAIResolveNamespaceToPrefix(oaiSource, metadataNS.getURI());
OREPrefix = OAIResolveNamespaceToPrefix(oaiSource, ORESeialNS.getURI());
}
catch (FileNotFoundException fe) {
log.error("The OAI server did not respond.");
throw new HarvestingException("The OAI server did not respond.");
}
catch (ConnectException fe) {
log.error("The OAI server did not respond.");
throw new HarvestingException("The OAI server did not respond.");
}
if (descMDPrefix == null) {
log.error("The OAI server does not support this metadata format");
throw new HarvestingException("The OAI server does not support this metadata format: " + metadataNS.getURI());
}
if (OREPrefix == null && harvestRow.getHarvestType() != HarvestedCollection.TYPE_DMD) {
throw new HarvestingException("The OAI server does not support ORE dissemination in the configured serialization format: " + ORESeialNS.getURI());
}
Document oaiResponse = null;
Element root = null;
String resumptionToken;
// set the status indicating the collection is currently being processed
harvestRow.setHarvestStatus(HarvestedCollection.STATUS_BUSY);
harvestRow.setHarvestMessage("Collection is currently being harvested");
harvestRow.setHarvestStartTime(startTime);
harvestRow.update();
ourContext.commit();
// expiration timer starts
int expirationInterval = ConfigurationManager.getIntProperty("harvester.threadTimeout");
if (expirationInterval == 0) expirationInterval = 24;
Calendar calendar = Calendar.getInstance();
calendar.setTime(startTime);
calendar.add(Calendar.HOUR, expirationInterval);
expirationTime = calendar.getTime();
// main loop to keep requesting more objects until we're done
List<Element> records;
Set<String> errorSet = new HashSet<String>();
ListRecords listRecords = new ListRecords(oaiSource, fromDate, toDate, oaiSetId, descMDPrefix);
log.debug("Harvesting request parameters: listRecords " + oaiSource + " " + fromDate + " " + toDate + " " + oaiSetId + " " + descMDPrefix);
if (listRecords != null)
log.info("HTTP Request: " + listRecords.getRequestURL());
while (listRecords != null)
{
records = new ArrayList<Element>();
oaiResponse = db.build(listRecords.getDocument());
if (listRecords.getErrors() != null && listRecords.getErrors().getLength() > 0)
{
for (int i=0; i<listRecords.getErrors().getLength(); i++)
{
String errorCode = listRecords.getErrors().item(i).getAttributes().getNamedItem("code").getTextContent();
errorSet.add(errorCode);
}
if (errorSet.contains("noRecordsMatch"))
{
log.info("noRecordsMatch: OAI server did not contain any updates");
harvestRow.setHarvestResult(new Date(), "OAI server did not contain any updates");
harvestRow.setHarvestStatus(HarvestedCollection.STATUS_READY);
harvestRow.update();
return;
} else {
throw new HarvestingException(errorSet.toString());
}
}
else
{
root = oaiResponse.getRootElement();
records.addAll(root.getChild("ListRecords", OAI_NS).getChildren("record", OAI_NS));
}
// Process the obtained records
if (records != null && records.size()>0)
{
log.info("Found " + records.size() + " records to process");
for (Element record : records) {
// check for STOP interrupt from the scheduler
if (HarvestScheduler.interrupt == HarvestScheduler.HARVESTER_INTERRUPT_STOP)
throw new HarvestingException("Harvest process for " + targetCollection.getID() + " interrupted by stopping the scheduler.");
// check for timeout
if (expirationTime.before(new Date()))
throw new HarvestingException("runHarvest method timed out for collection " + targetCollection.getID());
processRecord(record,OREPrefix);
ourContext.commit();
}
}