if (bRealDedup) { // Want to test update code, so ignore update cycle
if (null != source.getRssConfig()) {
source.getRssConfig().setUpdateCycle_secs(1); // always update
}
}
HarvestController harvester = new HarvestController(true);
if (nNumDocsToReturn > 100) { // (seems reasonable)
nNumDocsToReturn = 100;
}
harvester.setStandaloneMode(nNumDocsToReturn, bRealDedup);
List<DocumentPojo> toAdd = new LinkedList<DocumentPojo>();
List<DocumentPojo> toUpdate = new LinkedList<DocumentPojo>();
List<DocumentPojo> toRemove = new LinkedList<DocumentPojo>();
if (null == source.getHarvestStatus()) {
source.setHarvestStatus(new SourceHarvestStatusPojo());
}
String oldMessage = source.getHarvestStatus().getHarvest_message();
// SPECIAL CASE: FOR FEDERATED QUERIES
if ((null != source.getExtractType()) && source.getExtractType().equals("Federated")) {
int federatedQueryEnts = 0;
SourceFederatedQueryConfigPojo endpoint = null;
try {
endpoint = source.getProcessingPipeline().get(0).federatedQuery;
}
catch (Exception e) {}
if (null == endpoint) {
rp.setResponse(new ResponseObject("Test Source",false,"source error: no federated query specified"));
return rp;
}
AdvancedQueryPojo testQuery = null;
String errMessage = "no query specified";
try {
testQuery = AdvancedQueryPojo.fromApi(endpoint.testQueryJson, AdvancedQueryPojo.class);
}
catch (Exception e) {
errMessage = e.getMessage();
}
if (null == testQuery) {
rp.setResponse(new ResponseObject("Test Source",false,"source error: need to specifiy a valid IKANOW query to test federated queries, error: " + errMessage));
return rp;
}
// OK if we're here then we can test the query
SimpleFederatedQueryEngine testFederatedQuery = new SimpleFederatedQueryEngine();
endpoint.parentSource = source;
testFederatedQuery.addEndpoint(endpoint);
ObjectId queryId = new ObjectId();
String[] communityIdStrs = new String[source.getCommunityIds().size()];
int i = 0;
for (ObjectId commId: source.getCommunityIds()) {
communityIdStrs[i] = commId.toString();
i++;
}
testFederatedQuery.setTestMode(true);
testFederatedQuery.preQueryActivities(queryId, testQuery, communityIdStrs);
StatisticsPojo stats = new StatisticsPojo();
stats.setSavedScores(0, 0);
rp.setStats(stats);
ArrayList<BasicDBObject> toAddTemp = new ArrayList<BasicDBObject>(1);
testFederatedQuery.postQueryActivities(queryId, toAddTemp, rp);
for (BasicDBObject docObj: toAddTemp) {
DocumentPojo doc = DocumentPojo.fromDb(docObj, DocumentPojo.class);
if (null != doc.getEntities()) {
federatedQueryEnts += doc.getEntities().size();
}
//Metadata workaround:
@SuppressWarnings("unchecked")
LinkedHashMap<String, Object[]> meta = (LinkedHashMap<String, Object[]>) docObj.get(DocumentPojo.metadata_);
if (null != meta) {
Object metaJson = meta.get("json");
if (metaJson instanceof Object[]) { // (in this case ... non-cached, need to recopy in, I forget why)
doc.addToMetadata("json", (Object[])metaJson);
}
}
toAdd.add(doc);
}
// (currently can't run harvest source federated query)
if (0 == federatedQueryEnts) { // (more fed query exceptions)
source.getHarvestStatus().setHarvest_message("Warning: no entities extracted, probably docConversionMap is wrong?");
}
else {
source.getHarvestStatus().setHarvest_message(federatedQueryEnts + " entities extracted");
}
}//TESTED (END FEDERATED QUERY TEST MODE, WHICH IS A BIT DIFFERENT)
else {
harvester.harvestSource(source, toAdd, toUpdate, toRemove);
}
// (don't parrot the old message back - v confusing)
if (oldMessage == source.getHarvestStatus().getHarvest_message()) { // (ptr ==)
source.getHarvestStatus().setHarvest_message("(no documents extracted - likely a source or configuration error)");