public ResponsePojo testSource(String sourceJson, int nNumDocsToReturn, boolean bReturnFullText, boolean bRealDedup, String userIdStr)
{
ResponsePojo rp = new ResponsePojo();
try
{
SourcePojo source = null;
SourcePojoSubstitutionApiMap apiMap = new SourcePojoSubstitutionApiMap(new ObjectId(userIdStr));
try {
source = ApiManager.mapFromApi(sourceJson, SourcePojo.class, apiMap);
source.fillInSourcePipelineFields();
}
catch (Exception e) {
rp.setResponse(new ResponseObject("Test Source",false,"Error deserializing source (JSON is valid but does not match schema): " + e.getMessage()));
return rp;
}
if (null == source.getKey()) {
source.setKey(source.generateSourceKey()); // (a dummy value, not guaranteed to be unique)
}
if ((null == source.getExtractType()) || !source.getExtractType().equals("Federated")) {
String testUrl = source.getRepresentativeUrl();
if (null == testUrl) {
rp.setResponse(new ResponseObject("Test Source",false,"Error, source contains no URL to harvest"));
return rp;
}
}
// This is the only field that you don't normally need to specify in save but will cause
// problems if it's not populated in test.
ObjectId userId = new ObjectId(userIdStr);
// Set owner (overwrite, for security reasons)
source.setOwnerId(userId);
if (null == source.getCommunityIds()) {
source.setCommunityIds(new TreeSet<ObjectId>());
}
if (!source.getCommunityIds().isEmpty()) { // need to check that I'm allowed the specified community...
if ((1 == source.getCommunityIds().size()) && (userId.equals(source.getCommunityIds().iterator().next())))
{
// we're OK only community id is user community
}//TESTED
else {
HashSet<ObjectId> communities = SocialUtils.getUserCommunities(userIdStr);
Iterator<ObjectId> it = source.getCommunityIds().iterator();
while (it.hasNext()) {
ObjectId src = it.next();
if (!communities.contains(src)) {
rp.setResponse(new ResponseObject("Test Source",false,"Authentication error: you don't belong to this community: " + src));
return rp;
}//TESTED
}
}//TESTED
}
// Always add the userId to the source community Id (so harvesters can tell if they're running in test mode or not...)
source.addToCommunityIds(userId); // (ie user's personal community, always has same _id - not that it matters)
// Check the source's admin status
source.setOwnedByAdmin(RESTTools.adminLookup(userId.toString(), false));
if (bRealDedup) { // Want to test update code, so ignore update cycle
if (null != source.getRssConfig()) {
source.getRssConfig().setUpdateCycle_secs(1); // always update
}
}
HarvestController harvester = new HarvestController(true);
if (nNumDocsToReturn > 100) { // (seems reasonable)
nNumDocsToReturn = 100;
}
harvester.setStandaloneMode(nNumDocsToReturn, bRealDedup);
List<DocumentPojo> toAdd = new LinkedList<DocumentPojo>();
List<DocumentPojo> toUpdate = new LinkedList<DocumentPojo>();
List<DocumentPojo> toRemove = new LinkedList<DocumentPojo>();
if (null == source.getHarvestStatus()) {
source.setHarvestStatus(new SourceHarvestStatusPojo());
}
String oldMessage = source.getHarvestStatus().getHarvest_message();
// SPECIAL CASE: FOR FEDERATED QUERIES
if ((null != source.getExtractType()) && source.getExtractType().equals("Federated")) {
int federatedQueryEnts = 0;
SourceFederatedQueryConfigPojo endpoint = null;
try {
endpoint = source.getProcessingPipeline().get(0).federatedQuery;
}
catch (Exception e) {}
if (null == endpoint) {
rp.setResponse(new ResponseObject("Test Source",false,"source error: no federated query specified"));
return rp;
}
AdvancedQueryPojo testQuery = null;
String errMessage = "no query specified";
try {
testQuery = AdvancedQueryPojo.fromApi(endpoint.testQueryJson, AdvancedQueryPojo.class);
}
catch (Exception e) {
errMessage = e.getMessage();
}
if (null == testQuery) {
rp.setResponse(new ResponseObject("Test Source",false,"source error: need to specifiy a valid IKANOW query to test federated queries, error: " + errMessage));
return rp;
}
// OK if we're here then we can test the query
SimpleFederatedQueryEngine testFederatedQuery = new SimpleFederatedQueryEngine();
endpoint.parentSource = source;
testFederatedQuery.addEndpoint(endpoint);
ObjectId queryId = new ObjectId();
String[] communityIdStrs = new String[source.getCommunityIds().size()];
int i = 0;
for (ObjectId commId: source.getCommunityIds()) {
communityIdStrs[i] = commId.toString();
i++;
}
testFederatedQuery.setTestMode(true);
testFederatedQuery.preQueryActivities(queryId, testQuery, communityIdStrs);
StatisticsPojo stats = new StatisticsPojo();
stats.setSavedScores(0, 0);
rp.setStats(stats);
ArrayList<BasicDBObject> toAddTemp = new ArrayList<BasicDBObject>(1);
testFederatedQuery.postQueryActivities(queryId, toAddTemp, rp);
for (BasicDBObject docObj: toAddTemp) {
DocumentPojo doc = DocumentPojo.fromDb(docObj, DocumentPojo.class);
if (null != doc.getEntities()) {
federatedQueryEnts += doc.getEntities().size();
}
//Metadata workaround:
@SuppressWarnings("unchecked")
LinkedHashMap<String, Object[]> meta = (LinkedHashMap<String, Object[]>) docObj.get(DocumentPojo.metadata_);
if (null != meta) {
Object metaJson = meta.get("json");
if (metaJson instanceof Object[]) { // (in this case ... non-cached, need to recopy in, I forget why)
doc.addToMetadata("json", (Object[])metaJson);
}
}
toAdd.add(doc);
}
// (currently can't run harvest source federated query)
if (0 == federatedQueryEnts) { // (more fed query exceptions)
source.getHarvestStatus().setHarvest_message("Warning: no entities extracted, probably docConversionMap is wrong?");
}
else {
source.getHarvestStatus().setHarvest_message(federatedQueryEnts + " entities extracted");
}
}//TESTED (END FEDERATED QUERY TEST MODE, WHICH IS A BIT DIFFERENT)
else {
harvester.harvestSource(source, toAdd, toUpdate, toRemove);
}
// (don't parrot the old message back - v confusing)
if (oldMessage == source.getHarvestStatus().getHarvest_message()) { // (ptr ==)
source.getHarvestStatus().setHarvest_message("(no documents extracted - likely a source or configuration error)");
}//TESTED
String message = null;
if ((null != source.getHarvestStatus()) && (null != source.getHarvestStatus().getHarvest_message())) {
message = source.getHarvestStatus().getHarvest_message();
}
else {
message = "";
}
List<String> errMessagesFromSourceDeser = apiMap.getErrorMessages();
if (null != errMessagesFromSourceDeser) {
StringBuffer sbApiMapErr = new StringBuffer("Substitution errors:\n");
for (String err: errMessagesFromSourceDeser) {
sbApiMapErr.append(err).append("\n");
}
message = message + "\n" + sbApiMapErr.toString();
}//TESTED (by hand)
if ((null != source.getHarvestStatus()) && (HarvestEnum.error == source.getHarvestStatus().getHarvest_status())) {
rp.setResponse(new ResponseObject("Test Source",false,"source error: " + message));
rp.setData(toAdd, new DocumentPojoApiMap());
}
else {
if ((null == message) || message.isEmpty()) {
message = "no messages from harvester";
}
rp.setResponse(new ResponseObject("Test Source",true,"successfully returned " + toAdd.size() + " docs: " + message));
try {
// If grabbing full text
// Also some logstash specific logic - these aren't docs so just output the entire record
boolean isLogstash = (null != source.getExtractType()) && source.getExtractType().equalsIgnoreCase("logstash");
List<BasicDBObject> logstashRecords = null;
if (bReturnFullText || isLogstash) {
for (DocumentPojo doc: toAdd) {
if (isLogstash) {
if (null == logstashRecords) {