Package com.ikanow.infinit.e.api.knowledge.federated

Source Code of com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine$FederatedHarvest

/*******************************************************************************
* Copyright 2012, The Infinit.e Open Source Project.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package com.ikanow.infinit.e.api.knowledge.federated;

import java.net.URI;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Future;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;

import net.minidev.json.JSONArray;

import org.bson.types.ObjectId;

import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import com.ikanow.infinit.e.data_model.api.ResponsePojo;
import com.ikanow.infinit.e.data_model.api.knowledge.AdvancedQueryPojo;
import com.ikanow.infinit.e.data_model.interfaces.query.IQueryExtension;
import com.ikanow.infinit.e.data_model.store.DbManager;
import com.ikanow.infinit.e.data_model.store.MongoDbManager;
import com.ikanow.infinit.e.data_model.store.MongoDbUtil;
import com.ikanow.infinit.e.data_model.store.config.source.SimpleFederatedCache;
import com.ikanow.infinit.e.data_model.store.config.source.SourceFederatedQueryConfigPojo;
import com.ikanow.infinit.e.data_model.store.document.DocumentPojo;
import com.ikanow.infinit.e.data_model.store.document.EntityPojo;
import com.ikanow.infinit.e.data_model.utils.IkanowSecurityManager;
import com.jayway.jsonpath.JsonPath;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.ning.http.client.AsyncHttpClient;
import com.ning.http.client.AsyncHttpClient.BoundRequestBuilder;
import com.ning.http.client.Response;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;

public class SimpleFederatedQueryEngine implements IQueryExtension {

  //DEBUG
  static private boolean _DEBUG = false;
  //static private boolean _DEBUG = true;
 
  static Logger _logger = Logger.getLogger(SimpleFederatedQueryEngine.class);

  private boolean _cacheMode = true;
  private boolean _testMode = false;
  public void setTestMode(boolean testMode) {
    _testMode = testMode;
    _cacheMode = !_testMode; // (don't cache in some places if in test mode)
  }
 
  public static class FederatedRequest {
    // Endpoint version
    public SourceFederatedQueryConfigPojo.FederatedQueryEndpointUrl subRequest;
    public Future<Response> responseFuture;
    public AsyncHttpClient asyncClient;
    public String mergeKey;
   
    // Script import version
    public FederatedHarvest importThread;
    public AdvancedQueryPojo fullQuery;
    public Throwable errorMessage;
    public BasicDBObject scriptResult = null;
   
    // Joint
    public SourceFederatedQueryConfigPojo endpointInfo;
    public BasicDBObject cachedResult = null;
    public BasicDBObject cachedDoc = null;
    public BasicDBObject cachedDoc_expired = null;
    public String requestParameter;
    public String[] communityIdStrs;
    public String queryIndex;
  }
 
  public void addEndpoint(SourceFederatedQueryConfigPojo newEndpoint) {
    if (null == _endpoints) {
      _endpoints = new LinkedList<SourceFederatedQueryConfigPojo>();
    }
    _endpoints.add(newEndpoint);
  }
 
  LinkedList<FederatedRequest> _asyncRequestsPerQuery = null;
  LinkedList<SourceFederatedQueryConfigPojo> _endpoints = null;
 
  IkanowSecurityManager _scriptingSecurityManager = new IkanowSecurityManager();
  private ScriptEngine _pyEngine = null;
 
  @Override
  public void preQueryActivities(ObjectId queryId, AdvancedQueryPojo query, String[] communityIdStrs) {
   
    _asyncRequestsPerQuery = null;
   
    // 1] Check whether this makes sense to query, get the (sole) entity if so
    String entityType = null;
    String entityValue = null;
    String entityIndex = null;
    String textToTest = null;
    if ((null != query.qt) && (query.qt.size() > 0) && (query.qt.size() < 4)) {

      String logic = query.logic;
      if (null != logic) {
        logic = logic.toLowerCase();
      }
      if ((null != logic) && (logic.contains("or") || logic.contains("not"))) {
        //DEBUG
        if (_DEBUG) _logger.debug("DEB: preQA1: Logic too complex: " + query.logic);
        if (_testMode) {
          throw new RuntimeException("Bad testQueryJson: Logic too complex: " + query.logic);
        }
       
        return; // logic too complex
      }//TESTED (1.3)
      for (AdvancedQueryPojo.QueryTermPojo qt: query.qt) {
        if ((null != qt.entity) || ((null != qt.entityType) && (null != qt.entityValue))) {
          if (null == entityType) { // we now have == 1 entity
            if (null != qt.entityValue) {
              entityValue = qt.entityValue;
              entityType = qt.entityType;
              entityIndex = entityValue.toLowerCase() + "/" + entityType.toLowerCase();
            }//TESTED (1.5)
            else {
              entityIndex = qt.entity.toLowerCase();
              int index = qt.entity.lastIndexOf('/');
              if (index > 0) {
                entityValue = qt.entity.substring(0, index);
                entityType = qt.entity.substring(index + 1).toLowerCase();
              }
            }//TESTED (1.6)
          }
          else { // >1 entity, not supported
            //DEBUG
            if (_DEBUG) _logger.debug("DEB: preQA2a: >1 entity: " + qt.entity + " / " + entityType + " / " + query.toApi());
            if (_testMode) {
              throw new RuntimeException("Bad testQueryJson: >1 entity: " + qt.entity + " / " + entityType + " / " + query.toApi());
            }
           
            return;
          }//TESTED (1.4)
        }//TESTED
        else if ((null != qt.etext) && (qt.etext.equals("*"))) {
          //this is fine provided it's only ANDed together (eg above logic case)
        }
        else if (null != qt.etext) { // Only work if it matches one of the regexes
          if (null == entityType) {
            textToTest = qt.etext;
            entityType = "etext";
          }
          else { // >1 entity, not supported
            //DEBUG
            if (_DEBUG) _logger.debug("DEB: preQA2b: >1 entity: " + qt.entity + " / " + entityType + " / " + query.toApi());
            if (_testMode) {
              throw new RuntimeException("Bad testQueryJson: >1 entity: " + qt.entity + " / " + entityType + " / " + query.toApi());
            }
           
            return;
          }//TESTED (1.4)         
        }
        else if (null == qt.time) { // temporal
          //DEBUG
          if (_DEBUG) _logger.debug("DEB: preQA3: non-entity/date " + query.toApi());
          if (_testMode) {
            throw new RuntimeException("Bad testQueryJson: non-entity/date " + query.toApi());
          }
          return;
        }//TESTED (1.1)
      }//(end loop over query terms)
     
    }//TESTED (1.*)
    if (null == entityType) { // Query too complex
      //DEBUG
      if (_DEBUG) _logger.debug("DEB: preQA4: query missing entity " + query.toApi());
      if (_testMode) {
        throw new RuntimeException("Bad testQueryJson: query missing entity " + query.toApi());
      }
     
      return;
    }//TESTED (1.2)
    entityType = entityType.toLowerCase();
   
    // 2] If so, query across all the end
   
    for (SourceFederatedQueryConfigPojo endpoint: _endpoints) {
     
      // Endpoint validation:
      if (null == endpoint.entityTypes) {
        if (_testMode) {
          throw new RuntimeException("No entity types specified");
        }
        else {
          continue;
        }
      }
      if (null != textToTest) { // This is text, see if you can convert to an entity
        entityValue = null; //(reset for different endpoints - used in the check to decide whether to continue)
       
        for (String entityTypeRegex: endpoint.entityTypes) {
          if (entityTypeRegex.startsWith("/")) {
            int regexIndex = entityTypeRegex.lastIndexOf('/'); // (guaranteed to be >= 0)
            try {
              Pattern regex = Pattern.compile(entityTypeRegex.substring(1, regexIndex));
              if (regex.matcher(textToTest).matches()) {
                entityType = entityTypeRegex.substring(1+regexIndex);
                if (entityType.length() > 0) {
                  entityValue = textToTest;
                  entityIndex = entityValue.toLowerCase() + "/" + entityType.toLowerCase();
                }
              }
            }
            catch (Exception e) { // if not in test mode, carry on
              if (_testMode) {
                throw new RuntimeException(e);
              }
            }
          }
        }//(end loop over entity regexes)
      }//TESTED
      if (null == entityValue) { // None of the regexes matched
        if (_testMode) {
          throw new RuntimeException("Text specified, does not match any of the regexes: " + Arrays.toString(endpoint.entityTypes.toArray()) + " ... text = " + textToTest);
        }
        continue;
      }
     
      //DEBUG
      if (_DEBUG) _logger.debug("DEB: preQA5: ENDPOINT: " + Arrays.toString(endpoint.entityTypes.toArray()) + " / " + entityType);

      if ((null != endpoint.importScript) && !endpoint.importScript.isEmpty()) {
        if (null == endpoint.scriptlang) {
          endpoint.scriptlang = "python"; // python ==default
        }
        if (endpoint.scriptlang.equalsIgnoreCase("python")) {
          _pyEngine = new ScriptEngineManager().getEngineByName("python");
          if (null == _pyEngine) {
            _logger.error("Python not installed - copy jython 2.5+ into /opt/infinite-home/lib/unbundled");
            if (_testMode) {
              throw new RuntimeException("Python not installed - copy jython 2.5+ into /opt/infinite-home/lib/unbundled");
            }
          }//TESTED (by hand, importScript != null and scriptlang: "python", jython not on classpath)
        }
        else {
          _logger.error("Python is currently the only supported scriptlang");
          if (_testMode) {
            throw new RuntimeException("Python is currently the only supported scriptlang");
          }
        }//TESTED (by hand, importScript != null and scriptlang: "none")
      }//TESTED
     
      if ((null != endpoint.bypassSimpleQueryParsing) && endpoint.bypassSimpleQueryParsing) {
        throw new RuntimeException("Currently only simple query parsing is supported");       
      }
      if ((null != endpoint.entityTypes) && endpoint.entityTypes.contains(entityType)) {
       
        // Check if the *doc* (not *API response*) generated from this endpoint/entity has been cached, check expiry if so
        String cachedDocUrl = buildScriptUrl(endpoint.parentSource.getKey(), entityIndex);
        BasicDBObject cachedDoc = null;
        BasicDBObject cachedDoc_expired = null;
        if (_cacheMode && ((null == endpoint.cacheTime_days) || (endpoint.cacheTime_days >= 0))) {
          BasicDBObject cachedDocQuery = new BasicDBObject(DocumentPojo.url_, cachedDocUrl);
          cachedDocQuery.put(DocumentPojo.sourceKey_, endpoint.parentSource.getKey());
          cachedDoc = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(cachedDocQuery);
         
          if ((null != cachedDoc) && checkDocCache_isExpired(cachedDoc, endpoint)) {
            cachedDoc_expired = cachedDoc;
            cachedDoc = null;
          }
        }//TESTED (by hand)
       
        if (null == _asyncRequestsPerQuery) {
          // If we've got this far create a list to store the async requests
          _asyncRequestsPerQuery = new LinkedList<FederatedRequest>();
        }

        if (null != cachedDoc) {
          // Common params:
          FederatedRequest requestOverview = new FederatedRequest();
          requestOverview.endpointInfo = endpoint;
          requestOverview.communityIdStrs = communityIdStrs;
          requestOverview.requestParameter = entityValue;
          requestOverview.queryIndex = entityIndex;
          requestOverview.mergeKey = endpoint.parentSource.getKey();
         
          if (_DEBUG) _logger.debug("DEB: preQA6z: Doc Cache: " + cachedDocUrl + " , " + cachedDoc);                     
         
          requestOverview.cachedDoc = cachedDoc;
          _asyncRequestsPerQuery.add(requestOverview);
        }//TESTED (by hand)
        else if (null != endpoint.importScript) {
          BasicDBObject cachedVal = null;
          if (_cacheMode) { // (source key not static, plus not sure it's desirable, so for simplicity just don't cache requests in test mode)
            cachedVal = this.getCache(cachedDocUrl, endpoint);
          }
         
          // Common params:
          FederatedRequest requestOverview = new FederatedRequest();
          requestOverview.endpointInfo = endpoint;
          requestOverview.communityIdStrs = communityIdStrs;
          requestOverview.requestParameter = entityValue;
          requestOverview.queryIndex = entityIndex;
          requestOverview.mergeKey = endpoint.parentSource.getKey();
          requestOverview.cachedDoc_expired = cachedDoc_expired;
         
          if (null == cachedVal) {
            requestOverview.importThread = new FederatedHarvest();
            requestOverview.importThread.queryEngine = this;
            requestOverview.importThread.request = requestOverview;
            requestOverview.importThread.start();
          }
          else {
            if (_DEBUG) _logger.debug("DEB: preQA6a: Cache: " + cachedDocUrl + " , " + cachedVal);           
           
            requestOverview.cachedResult = cachedVal;           
          }
          // Launch thread
          _asyncRequestsPerQuery.add(requestOverview);
        }//TESTED (by hand)
        else {
          AsyncHttpClient asyncHttpClient = null;
          try {
            for (SourceFederatedQueryConfigPojo.FederatedQueryEndpointUrl request: endpoint.requests) {
              asyncHttpClient = new AsyncHttpClient();
              BoundRequestBuilder asyncRequest = null;
              String postContent = null;
              if (null != request.httpFields) {
                postContent = request.httpFields.get("Content");
                if (null == postContent)
                  postContent = request.httpFields.get("content");
              }//TESTED (by hand, "http://httpbin.org/post", "httpFields": { "Content": "test" }
             
              if (null == postContent) {
                asyncRequest = asyncHttpClient.prepareGet(request.endPointUrl.replace("$1", entityValue));
              }
              else {
                asyncRequest = asyncHttpClient.preparePost(request.endPointUrl.replace("$1", entityValue));             
              }//TESTED (by hand, "http://httpbin.org/post", "httpFields": { "Content": "test" }
             
              if (null != request.urlParams) {
                for (Map.Entry<String, String> keyValue: request.urlParams.entrySet()) {
                  asyncRequest = asyncRequest.addQueryParameter(keyValue.getKey(), keyValue.getValue().replace("$1", entityValue));
                }
              }//TESTED (1.5, 1.6, 3.*, 4.*)
              if (null != request.httpFields) {
                for (Map.Entry<String, String> keyValue: request.httpFields.entrySet()) {
                  if (!keyValue.getKey().equalsIgnoreCase("content")) {
                    asyncRequest = asyncRequest.addHeader(keyValue.getKey(), keyValue.getValue().replace("$1", entityValue));
                  }
                }             
              }//TESTED (by hand, "http://httpbin.org/cookies", "httpFields": { "Cookie": "mycookie=test" }
              if (null != postContent) {
                asyncRequest = asyncRequest.setBody(postContent.replace("$1", entityValue));
              }//TESTED (by hand, "http://httpbin.org/post", "httpFields": { "Content": "$1" }
             
              // Common params:
              FederatedRequest requestOverview = new FederatedRequest();
              requestOverview.endpointInfo = endpoint;
              requestOverview.communityIdStrs = communityIdStrs;
              requestOverview.requestParameter = entityValue;
              requestOverview.asyncClient = asyncHttpClient;
              requestOverview.queryIndex = entityIndex;
              requestOverview.mergeKey = endpoint.parentSource.getKey();
              requestOverview.subRequest = request;
   
              // Now check out the cache:
              URI rawUri = asyncRequest.build().getRawURI();
              String url = rawUri.toString();
              if ((null == endpoint.parentSource.getOwnedByAdmin()) || !endpoint.parentSource.getOwnedByAdmin())
              {
                //TODO (INF-2798): Make this consistent with the how security is handled elsewhere
                int port = rawUri.getPort();
                if ((80 != port) && (443 != port) && (-1 != port)) {
                  _logger.error("Only admin can make requests on non-standard ports: " + url + ": " + port);
                  if (_testMode) {
                    asyncHttpClient.close();
                    throw new RuntimeException("Only admin can make requests on non-standard ports: " + url + ": " + port);
                  }                 
                }
              }//TESTED (by hand)
             
              BasicDBObject cachedVal = this.getCache(url, endpoint);
              requestOverview.cachedResult = cachedVal;
              requestOverview.cachedDoc_expired = cachedDoc_expired;
             
              if (null == cachedVal) {
                requestOverview.responseFuture = asyncRequest.execute();           
              }         
              else {
                //DEBUG
                if (_DEBUG) _logger.debug("DEB: preQA6b: Cache: " + url + " , " + cachedVal);
               
                requestOverview.responseFuture = null;
                asyncHttpClient.close();
              }
              _asyncRequestsPerQuery.add(requestOverview);
            }//(end loop over multiple requests
          }
          catch (Exception e) {
            _logger.error("Unknown error creating federated query for " + endpoint.titlePrefix + ": " + e.getMessage());
            if (_testMode) {
              throw new RuntimeException("Unknown error creating federated query for " + endpoint.titlePrefix + ": " + e.getMessage(), e);
            }
          }
        }//(end cached doc vs script vs request mode for queries)
       
      }//(end if this request is for this entity type)
      else { // no entity matches - if in test mode then bomb out with useful error
        if (_testMode) {
          throw new RuntimeException("Specified entity: " + entityIndex + " not in set: " + Arrays.toString(endpoint.entityTypes.toArray()));
        }
      }
    }//(end loop over endpoints)
  }

  @Override
  public void postQueryActivities(ObjectId queryId, List<BasicDBObject> docs, ResponsePojo response)
  {
    boolean grabbedScores = false;
    double aggregateSignif = 100.0;
    double queryRelevance = 100.0;
    double score = 100.0;
   
    if (null != _asyncRequestsPerQuery) {
      int added = 0;
      BasicDBObject doc = null;
      BasicDBList bsonArray = new BasicDBList();
      PeekingIterator<FederatedRequest> it = Iterators.peekingIterator(_asyncRequestsPerQuery.iterator());
      while (it.hasNext()) {
       
        FederatedRequest request = it.next();
        if (null == request.cachedDoc) { // no cached doc
          try {
            if (null == request.cachedResult) {  // no cached value       
              if (null != request.importThread) {
                // 1) wait for the thread to finish
                if (null == request.endpointInfo.queryTimeout_secs) {
                  request.endpointInfo.queryTimeout_secs = 300;
                }
                for (int timer = 0; timer < request.endpointInfo.queryTimeout_secs; timer++) {
                  try {
                    request.importThread.join(1000L);
                    if (!request.importThread.isAlive()) {
                      break;
                    }
                  }//TESTED (by hand)
                  catch (Exception e) {
                   
                  }
                }
                if (request.importThread.isAlive()) {
                  request.errorMessage = new RuntimeException("Script timed out");
                }//TESTED (by hand)
               
                // 2) Get the results
                if (null != request.errorMessage) {
                  if (_testMode) {
                    throw new RuntimeException(request.errorMessage);
                  }
                }
                else if (null == request.scriptResult) {               
                  if (_testMode) {
                    throw new RuntimeException("Script mode: no cached result found from: " + request.requestParameter);
                  }
                }
                else {
                  String url = buildScriptUrl(request.mergeKey, request.queryIndex);
                  if (_cacheMode) { // (don't cache python federated queries in test mode)
                    this.cacheApiResponse(url, request.scriptResult, request.endpointInfo);
                  }
                  bsonArray.add(request.scriptResult);               
                }
              } // end script mode
              else { // HTTP mode
                Response endpointResponse = request.responseFuture.get();
                request.asyncClient.close();
                request.asyncClient = null;
   
                String jsonStr = endpointResponse.getResponseBody();
                String url = endpointResponse.getUri().toURL().toString();
               
                Object bsonUnknownType = com.mongodb.util.JSON.parse(jsonStr);
                BasicDBObject bson = null;
                if (bsonUnknownType instanceof BasicDBObject) {
                  bson = (BasicDBObject) bsonUnknownType;
                }
                else if (bsonUnknownType instanceof BasicDBList) {
                  bson = new BasicDBObject("array", bsonUnknownType);
                }
                else if (bsonUnknownType instanceof String) {
                  bson = new BasicDBObject("value", bsonUnknownType);                 
                }
               
                //DEBUG
                if (_DEBUG) _logger.debug("DEB: postQA1: " + url + ": " + jsonStr);
               
                if (null != bson) {
                  MongoDbUtil.enforceTypeNamingPolicy(bson, 0);
                  this.cacheApiResponse(url, bson, request.endpointInfo);
                  bsonArray.add(bson);
                }
              }//(end script vs request method)
            }//TESTED (3.1, 4.2)
            else { // (just used cached value)
              //DEBUG
              if (_DEBUG) _logger.debug("DEB: postQA2: " + request.cachedResult.toString());
             
              bsonArray.add((BasicDBObject)request.cachedResult.get(SimpleFederatedCache.cachedJson_));
            }//TESTED (4.1, 4.3)
          }
          catch (Exception e) {
            //DEBUG
            if (null == request.subRequest) {
              _logger.error("Error with script: " + e.getMessage());
              if (_testMode) {
                throw new RuntimeException("Error with script: " + e.getMessage(), e);
              }           
            }
            else {
              _logger.error("Error with " + request.subRequest.endPointUrl + ": " + e.getMessage());
              if (_testMode) {
                throw new RuntimeException("Error with " + request.subRequest.endPointUrl + ": " + e.getMessage(), e);
              }
            }
          }
          if (!it.hasNext() || (request.mergeKey != it.peek().mergeKey)) { // deliberate ptr arithmetic
            String url = buildScriptUrl(request.mergeKey, request.queryIndex);
           
            //DEBUG
            if (_DEBUG) _logger.debug("DEB: postQA3: " + url + ": " + bsonArray);         
           
            doc = createDocFromJson(bsonArray, url, request, request.endpointInfo);
          }
        } // (end if no cached doc)
        else { // cached doc, bypass lots of processing because no merging and doc already built
          doc = request.cachedDoc;
        }//TESTED (by hand)
         
        if (null != doc) {
          // Cache the document unless already cached (or caching disabled)
          if ((null == request.cachedDoc) && _cacheMode &&
              ((null == request.endpointInfo.cacheTime_days) || (request.endpointInfo.cacheTime_days >= 0)))
          {
            if (null != request.cachedDoc_expired) {
              ObjectId updateId = request.cachedDoc_expired.getObjectId(DocumentPojo.updateId_);
              if (null != updateId) {
                doc.put(DocumentPojo.updateId_, updateId);
              }
              else {
                doc.put(DocumentPojo.updateId_, request.cachedDoc_expired.getObjectId(DocumentPojo._id_));
              }
              BasicDBObject docUpdate = new BasicDBObject(DocumentPojo.url_, doc.getString(DocumentPojo.url_));
              docUpdate.put(DocumentPojo.sourceKey_, doc.getString(DocumentPojo.sourceKey_));
              DbManager.getDocument().getMetadata().remove(docUpdate);
             
              //DEBUG
              if (_DEBUG) _logger.debug("DEB: postQA4a: re-cached ... " + docUpdate.toString() + ": " + doc.getObjectId(DocumentPojo.updateId_));
            }
            //DEBUG
            if (_DEBUG) _logger.debug("DEB: postQA4b: cached ... " + doc);         
            DbManager.getDocument().getMetadata().save(doc);

          }//TESTED (by hand, 3 cases: cached not expired, cached expired first time, cached expired multiple times)
         
          if (!grabbedScores) {
            if (!docs.isEmpty()) {
              BasicDBObject topDoc = docs.get(0);
              aggregateSignif = topDoc.getDouble(DocumentPojo.aggregateSignif_, aggregateSignif);
              queryRelevance = topDoc.getDouble(DocumentPojo.queryRelevance_, queryRelevance);
              score = topDoc.getDouble(DocumentPojo.score_, score);
              grabbedScores = true;
             
              // OK would also like to grab the original matching entity
              BasicDBList ents = (BasicDBList) topDoc.get(DocumentPojo.entities_);
              if (null != ents) {
                for (Object entObj: ents) {
                  BasicDBObject ent = (BasicDBObject)entObj;
                  String entIndex = ent.getString(EntityPojo.index_, "");
                  if (entIndex.equals(request.queryIndex)) {
                    ents = (BasicDBList) doc.get(DocumentPojo.entities_);
                    if (null != ents) {
                      ents.add(ent);
                    }
                    break;
                  }
                }
              }//TESTED (by hand)
            }
          }
          doc.put(DocumentPojo.aggregateSignif_, aggregateSignif);
          doc.put(DocumentPojo.queryRelevance_, queryRelevance);
          doc.put(DocumentPojo.score_, score);

          // If we're returning to a query then we'll adjust the doc format (some of the atomic fields become arrays)
          if (!_testMode) {
            convertDocToQueryFormat(doc, request.communityIdStrs);             
          }//TESTED (by hand)
         
          docs.add(0, doc);
          added++;
          doc = null; //(reset)
         
          //(end if built a doc from the last request/set of requests)
        }//TESTED (3.1)   
       
      }//(end loop over federated requests)
     
      if (null != response.getStats()) {
        response.getStats().found += added;
      }//TESTED (by hand)     
    }
  }

  /////////////////////////////////////////////////
 
  // UTILITY
 
  public BasicDBObject createDocFromJson(BasicDBList jsonList, String url, FederatedRequest request, SourceFederatedQueryConfigPojo endpointInfo) {
    BasicDBObject doc = null; // (don't create unless needed)
    BasicDBList ents = null;
    StringBuffer entVals = null;
    HashSet<String> entDedup = null;
   
    if (_testMode) { // In test mode, need to return the JSON even if no entities are specified
      doc = new BasicDBObject();
    }
    if (null != endpointInfo.docConversionMap) {
      for (Map.Entry<String, String> docInfo: endpointInfo.docConversionMap.entrySet()) {
        for (Object jsonObj: jsonList) {
          BasicDBObject json = (BasicDBObject)jsonObj;
          try {
            String key = docInfo.getKey();
            // (allow user to not prepend array: if they don't want to)
            if ((1 == json.size()) && json.containsKey((Object)"array")) {
              if (!key.startsWith("array:") &&
                  !key.startsWith(":array") && !key.startsWith("$:array") &&
                  !key.startsWith("::") && !key.startsWith("$::"))
              {
                if (key.startsWith(":")) { // jpath
                  key = ":array" + key;
                }
                else if (key.startsWith("$:")) { // jpath
                  key = "$:array" + key.substring(1);
                }
                else {
                  key = "array:" + key;
                }                 
              }               
            }//TESTED (by hand)
            if (key.startsWith(":")) { // jpath
              key = "$" + key;
            }
            // NOTE: *not* org.json.JSONArray
            JSONArray candidateEntities = null;
            if (key.startsWith("$")) {
              JSONArray candidateEntities_tmp = JsonPath.read(json.toString(), key.replace(':', '.'));
              if (null != candidateEntities_tmp) {
                candidateEntities = new JSONArray();
                for (Object o: candidateEntities_tmp) {
                  if (o instanceof String) {
                    candidateEntities.add(o);
                  }
                  else if (o instanceof JSONArray) {
                    candidateEntities.addAll((JSONArray)o);
                  }
                }//TESTED (displayUrl vs entities, 3.2)
              }             
              //DEBUG
              //System.out.println(candidateEntities);
             
            }//(TESTED (permutations above by hand))
            else {
              String s = (String) MongoDbUtil.getProperty(json, key.replace(':', '.'));
              if (null != s) {
                candidateEntities = new JSONArray();
                candidateEntities.add(s);
              }
            }//TESTED (3.1)                     
           
            if (null != candidateEntities) for (int i = 0; i < candidateEntities.size(); ++i) {
              Object o = candidateEntities.get(i);
              if (!(o instanceof String)) {
                continue;
              }
              String s = o.toString();
              if (null == doc) {
                doc = new BasicDBObject();
                //(various fields added below)
              }
              if (docInfo.getValue().equalsIgnoreCase(DocumentPojo.displayUrl_)) {
                doc.put(DocumentPojo.displayUrl_, s);
              }//TESTED (3.1, 4.*)
              else { // Entities!
                if (null == ents) {
                  ents = new BasicDBList();
                }
                String index = s.toLowerCase() + "/" + docInfo.getValue().toLowerCase();
               
                if (null == entDedup) {
                  entDedup = new HashSet<String>();
                }
                else if (entDedup.contains(index)) { // Entity deduplication
                  continue;
                }//TESTED (3.2)
                entDedup.add(index);
               
                if (null == entVals) {
                  entVals = new StringBuffer(": ");
                }
                else {
                  entVals.append(", ");
                }
                entVals.append(s);
               
                String dimension = null;
                if (null != endpointInfo.typeToDimensionMap) {
                  try {
                    dimension = EntityPojo.Dimension.valueOf(endpointInfo.typeToDimensionMap.get(docInfo.getValue())).toString();
                  }
                  catch (Exception e) {}
                }
                if (null == dimension) {
                  dimension = EntityPojo.Dimension.What.toString();
                }//TESTED (by hand)
               
                // (alternative to "made up" values would be to go looking in the existing docs/ents?)
                // (we'll try to avoid that for now...)
                BasicDBObject ent = new BasicDBObject();
                ent.put(EntityPojo.disambiguated_name_, s);
                ent.put(EntityPojo.type_, docInfo.getValue());
                ent.put(EntityPojo.dimension_, dimension);
                ent.put(EntityPojo.relevance_, 1.0);
                ent.put(EntityPojo.doccount_, 1L); // (ie relative to this query)
                ent.put(EntityPojo.averageFreq_, 1.0);
                ent.put(EntityPojo.datasetSignificance_, 10.0); // (ie relative to this query)
                ent.put(EntityPojo.frequency_, 1.0);
                ent.put(EntityPojo.index_, index);
                ent.put(EntityPojo.queryCoverage_, 100.0); // (ie relative to this query)
                ent.put(EntityPojo.totalfrequency_, 1.0); // (ie relative to this query)
                ents.add(ent);
              }//TESTED (3.1, 4.*)
            }
          }
          catch (Exception e) {
            //(do nothing? null or the wrong type)
            //e.printStackTrace();
          }
        }//end loop over various JSON objects retrieved
      }//(End loop over doc conversion elements)
    }//TESTED (3.*, 4.*)
   
    if ((null == ents) && !_testMode) { // don't return unless there are any entities
      return null;
    }
    else if (null != doc) {
      // Insert mandatory fields:
      // (Note the query format is a little bit different, the following fields are converted to arrays:
      //  sourceKey, source, communityId, mediaType)
      doc.put(DocumentPojo._id_, new ObjectId());
      doc.put(DocumentPojo.url_, url);
      doc.put(DocumentPojo.created_, new Date());
      doc.put(DocumentPojo.modified_, new Date());
      doc.put(DocumentPojo.publishedDate_, new Date());
      doc.put(DocumentPojo.sourceKey_, endpointInfo.parentSource.getKey());
      doc.put(DocumentPojo.source_, endpointInfo.parentSource.getTitle());
      doc.put(DocumentPojo.communityId_, new ObjectId(request.communityIdStrs[0]));
      doc.put(DocumentPojo.mediaType_, endpointInfo.parentSource.getMediaType());
      doc.put(DocumentPojo.metadata_, new BasicDBObject("json", jsonList.toArray()));
     
      if ((null != entVals) && (entVals.length() > 165)) { // (arbitrary length)
        entVals.setLength(165);
        entVals.append("...");
      }
      doc.put(DocumentPojo.title_, new StringBuffer(endpointInfo.titlePrefix).append(": ").append(request.requestParameter).append(entVals).toString());
      doc.put(DocumentPojo.entities_, ents);
      Gson gson = new GsonBuilder().setPrettyPrinting().create();
      JsonParser jp = new JsonParser();
      JsonElement je = jp.parse(jsonList.toString());
      doc.put(DocumentPojo.description_, gson.toJson(je)); // (prettified JSON)       
    }//TESTED (3.*, 4.*)
   
    return doc;
  }
 
  // Convert some of the doc fields to
  private static BasicDBObject convertDocToQueryFormat(BasicDBObject doc, String[] communityIdStrs) {
    doc.put(DocumentPojo.sourceKey_, Arrays.asList(doc.get(DocumentPojo.sourceKey_)));
    doc.put(DocumentPojo.source_, Arrays.asList(doc.get(DocumentPojo.source_)));
    doc.put(DocumentPojo.mediaType_, Arrays.asList(doc.get(DocumentPojo.mediaType_)));
    doc.put(DocumentPojo.communityId_, communityIdStrs);

    return doc;
  }//TESTED (by hand)
 
  private static String buildScriptUrl(String mergeKey, String entityIndex) {
    String url = new StringBuilder().append("inf://federated/").append(mergeKey).append("/").append(entityIndex).toString();
    return url;
  }//TESTED (by hand)

  /////////////////////////////////////////////////
 
  // CACHE UTILITIES
 
  public static boolean TEST_MODE_ONLY = false
  public static DBCollection getCacheCollection() {
    if (TEST_MODE_ONLY) {
      return MongoDbManager.getCollection("test", "fed_query_cache");
    }
    else {
      return MongoDbManager.getIngest().getFederatedCache();
    }
  }
 
  private static boolean _staticInitializer = false;
  private static long _lastChecked = -1L;
  private BasicDBObject getCache(String url, SourceFederatedQueryConfigPojo endpoint) {
    if ((null != endpoint.cacheTime_days) && (endpoint.cacheTime_days <= 0)) { // cache disabled
      return null;
    }
   
    DBCollection endpointCacheCollection = getCacheCollection();
   
    if (!_staticInitializer) {
      _staticInitializer = true;
      endpointCacheCollection.ensureIndex(new BasicDBObject(SimpleFederatedCache.expiryDate_, 1));
    }
    BasicDBObject cacheObj = (BasicDBObject) endpointCacheCollection.findOne(new BasicDBObject(SimpleFederatedCache._id_, url));
    if (null == cacheObj) {
      return null;
    }
    // else found something, means there's stuff in the DB
    // so check it's not too big:
    Date now = new Date();         
    if ((-1 == _lastChecked) || (now.getTime() > (_lastChecked + (600L*1000L)))) { // (only check every 10 minutes)
     
      if (endpointCacheCollection.count() > SimpleFederatedCache.QUERY_FEDERATION_CACHE_CLEANSE_SIZE) {
        _lastChecked = now.getTime();
        // Remove everything with expiry date older than now       
        endpointCacheCollection.remove(new BasicDBObject(SimpleFederatedCache.expiryDate_, new BasicDBObject(DbManager.lt_, new Date())));
      }
    }//TESTED (4.3)
    Date expiryDate = cacheObj.getDate(SimpleFederatedCache.expiryDate_, now);
    if (now.getTime() < expiryDate.getTime()) {
      return cacheObj;
    }
    else {
      return null;
    }//TESTED (4.2)
  }//TESTED (4.*)
 
  private static final int DEFAULT_CACHE_TIME_DAYS = 5;
 
  private void cacheApiResponse(String url, BasicDBObject toCacheJson, SourceFederatedQueryConfigPojo endpoint) {
   
    int cacheTime_days = DEFAULT_CACHE_TIME_DAYS;
    if (null != endpoint.cacheTime_days) {
      cacheTime_days = endpoint.cacheTime_days;
    }
    if (cacheTime_days <= 0) { // Disable _request_ cache (to disable all caching include doc caching use -1)
      return;
    }
   
    DBCollection endpointCacheCollection = getCacheCollection();
   
    BasicDBObject toCacheObj = new BasicDBObject(SimpleFederatedCache._id_, url);
    toCacheObj.put(SimpleFederatedCache.cachedJson_, toCacheJson);
    toCacheObj.put(SimpleFederatedCache.expiryDate_, new Date(new Date().getTime() + cacheTime_days*3600L*24L*1000L));
    endpointCacheCollection.save(toCacheObj);
  }//TESTED (3.1, 4.*)

  // Document level caching, although it effectively serves as a mostly redundant request cache,
  // It's actually used to allow users to save federated query documents in their buckets
 
  public static boolean checkDocCache_isExpired(BasicDBObject cachedDoc, SourceFederatedQueryConfigPojo endpoint) {
    if (null == endpoint.cacheTime_days)
      endpoint.cacheTime_days = DEFAULT_CACHE_TIME_DAYS; 
   
    Date now = new Date();
    long cacheThreshold = cachedDoc.getDate(DocumentPojo.created_, now).getTime() + endpoint.cacheTime_days*3600L*24L*1000L;
   
    if (cacheThreshold < now.getTime()) // (ie doc-creation-time + cache is earlier than now => time to decache)
    {
      if (_DEBUG) _logger.debug("DEB: preQA6zz: Cache expired: " + cachedDoc.getString(DocumentPojo.url_) + ": " + new Date(cacheThreshold) + " vs " + now);           
     
      return true;
    }
    else
      return false;
  }//TESTED
 
  ///////////////////////////////////////////////////////////////////////////////
  ///////////////////////////////////////////////////////////////////////////////

  // (my own threading)
 
  public static class FederatedHarvest extends Thread {
    public FederatedRequest request;
    public SimpleFederatedQueryEngine queryEngine;
    public void run() {
      String scriptResult = null;
      LinkedList<String> debugLog = queryEngine._testMode ? new LinkedList<String>() : null;
      try {
        scriptResult = queryEngine.performImportPythonScript(request.endpointInfo.importScript, request.requestParameter, request.fullQuery, request.endpointInfo.parentSource.getOwnedByAdmin(), debugLog);
      }
      catch (Exception e) {
        request.errorMessage = e;
        return;
      }     
      Object parsedScriptResult = null;
      try {
        parsedScriptResult = com.mongodb.util.JSON.parse(scriptResult.toString());
      }
      catch (Exception e) {
        request.errorMessage = new RuntimeException("Error deserializing " + scriptResult + ": " + e.getMessage());
        return;
      }     
      if (parsedScriptResult instanceof BasicDBObject) {
        request.scriptResult = (BasicDBObject) parsedScriptResult;
      }
      else if (parsedScriptResult instanceof BasicDBList) {
        request.scriptResult = new BasicDBObject("array", parsedScriptResult);
      }
      else if (parsedScriptResult instanceof String) {
        request.scriptResult = new BasicDBObject("value", parsedScriptResult);
      }
      else {
        request.errorMessage = new RuntimeException("Error deserializing " + scriptResult + ": " + parsedScriptResult);
        return;       
      }
      try {
        MongoDbUtil.enforceTypeNamingPolicy(request.scriptResult, 0);
        if ((null != debugLog) && !debugLog.isEmpty()) {
          request.scriptResult.put("$logs", debugLog);
        }
      }
      catch (Exception ee) {
        request.errorMessage = new RuntimeException("Error deserializing " + scriptResult + ": " + ee.getMessage());
        return;
      }     
    }
  }//TESTED (by hand)
 
  ///////////////////////////////////////////////////////////////////////////////

  private String performImportPythonScript(String importScript, String entityValue, AdvancedQueryPojo query, Boolean isSrcAdmin, LinkedList<String> debugLog) {
    String modCode = importScript;
    try {         
      // Create a return value
      String fnName = "var" + new ObjectId();
     
      // Pull all the imports, evaluate them outside the security manager
      Pattern importRegex = Pattern.compile("^\\s*(?:import|from)\\s[^\n]+", Pattern.MULTILINE);
      Matcher m = importRegex.matcher(importScript);
      StringBuffer sbImports = new StringBuffer();
      while (m.find()) {
        sbImports.append(m.group()).append('\n');
      }           
      if (null != query) {
        _pyEngine.put("_query", query.toApi()); // full query       
      }
      if (null != entityValue) {
        _pyEngine.put("_entityValue", entityValue); // allow either entityValue       
      }
      _pyEngine.eval(sbImports.toString());

      // Logging function
      if (null != debugLog) {
        String logName = "log" + new ObjectId();
        _pyEngine.put(logName, debugLog);
        _pyEngine.eval("def ikanow_log(logmsg):\n   " + logName + ".add(logmsg)\n\n");
      }
      else {
        _pyEngine.eval("def ikanow_log(logmsg):\n   pass\n\n");       
      }
     
      // Enable SSL everywhere (https://wiki.python.org/jython/NewSocketModule#SSLSupport)
      //http://tech.pedersen-live.com/2010/10/trusting-all-certificates-in-jython/
      //didn't work: http://jython.xhaus.com/installing-an-all-trusting-security-provider-on-java-and-jython/
      _pyEngine.eval(IOUtils.toString(SimpleFederatedQueryEngine.class.getResourceAsStream("JythonTrustManager.py")));
     
      // Now run the script
      modCode = m.replaceAll("");
      modCode = modCode.replaceAll("\n([^\n]+)$", "\n" + fnName + " = " + "$1");
      if ((null == isSrcAdmin) || !isSrcAdmin) {
        _scriptingSecurityManager.eval(_pyEngine, modCode);
      }
      else {
        //TODO (INF-2798): Make this consistent with the how security is handled elsewhere
        _pyEngine.eval(modCode);
      }

      // Get return value
      Object result = _pyEngine.get(fnName.toString());
     
      //DEBUG
      if (_DEBUG) _logger.debug("DEB: T1: Return val from script: " + result);     
     
      if (null == result) {
        throw new RuntimeException("Null return from script - final line needs to evaluate expression to return, eg 'response.read()' or 'varname'");
      }//TESTED (by hand)
     
      return result.toString();
     
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    catch (Error ee) {
      throw new RuntimeException(ee);
    }   
  }//TESTED
 
  ///////////////////////////////////////////////////////////////////////////////
  ///////////////////////////////////////////////////////////////////////////////
  ///////////////////////////////////////////////////////////////////////////////
 
  // TEST UTILITIES
 
  // Check 1.x test results
 
  public void test_CheckIfQueryLaunched(String testName, boolean shouldWork) {
   
    FederatedRequest request = null;
    if (null != this._asyncRequestsPerQuery) {
      if (!this._asyncRequestsPerQuery.isEmpty()) {
        request = this._asyncRequestsPerQuery.peek();
        if (this._asyncRequestsPerQuery.size() > 1) {
          System.out.println("*** " + testName + ":  + too many async requests");
          System.exit(-1);
        }
      }
    }
   
    if (shouldWork) {
      if (null == request) {
        System.out.println("*** " + testName + ": Expected request, got none");       
        System.exit(-1);
      }
    }
    else {
      if (null != request) {
        if (null != request.responseFuture) {
          System.out.println("*** " + testName + ": incorrectly added " + request.requestParameter);
          System.exit(-1);
        }
        else if ((null == request.cachedResult) && (null == request.cachedDoc)) {
          System.out.println("*** " + testName + ": no request, not cached " + request.cachedResult);
          System.exit(-1);         
        }
      }
    }
  }//TESTED (by hand - who tests the tester?!)

  public void test_CheckIfDocAdded(String testName, List<BasicDBObject> docs) {
   
    // 1) Did a doc get added?
   
    if (2 != docs.size()) {
      System.out.println("*** " + testName + ": didn't add doc? " + docs.size());     
      System.exit(-1);
    }
   
    // 2) Was it the right doc?
   
    BasicDBObject doc = docs.get(0);
    doc.remove(DocumentPojo._id_);
    doc.remove(DocumentPojo.created_);
    doc.remove(DocumentPojo.modified_);
    doc.remove(DocumentPojo.publishedDate_);

    String docToCheck =
      "{ \"displayUrl\" : \"http://test3_1\" , \"url\" : \"inf://federated/fakeendpoint.123/test3_1/testentityin\" , \"sourceKey\" : [ \"fakeendpoint.123\"] , \"source\" : [ \"fakeendpoint\"] , \"communityId\" : [ \"4c927585d591d31d7b37097a\"] , \"mediaType\" : [ \"Report\"] , \"metadata\" : { \"json\" : [ { \"test\" : { \"field\" : [ \"test3_1\" , \"test3_1\"] , \"field2\" : \"http://test3_1\"}}]} , \"title\" : \"fake endpoint: : test3_1: test3_1\" , \"entities\" : [ { \"disambiguated_name\" : \"test3_1\" , \"type\" : \"TestEntityOut\" , \"dimension\" : \"What\" , \"relevance\" : 1.0 , \"doccount\" : 1 , \"averageFreq\" : 1.0 , \"datasetSignificance\" : 10.0 , \"frequency\" : 1.0 , \"index\" : \"test3_1/testentityout\" , \"queryCoverage\" : 100.0 , \"totalfrequency\" : 1.0}] , \"description\" : \"[\\n  {\\n    \\\"test\\\": {\\n      \\\"field\\\": [\\n        \\\"test3_1\\\",\\n        \\\"test3_1\\\"\\n      ],\\n      \\\"field2\\\": \\\"http://test3_1\\\"\\n    }\\n  }\\n]\" , \"aggregateSignif\" : 115.0 , \"queryRelevance\" : 105.0 , \"score\" : 110.0}";
   
    if (!docToCheck.equals(doc.toString())) {
      System.out.println("*** " + testName + ": document incorrect:\n" + docToCheck + "\nVS\n" + doc.toString());
      System.exit(-1);
    }
   
    // 3) Did the doc get cached?
   
    DBCollection endpointCacheCollection = getCacheCollection();
   
    String hardwiredCacheId = "http://localhost:8186/test/test3_1/?param1=test3_1";
    BasicDBObject cachedVal = (BasicDBObject) endpointCacheCollection.findOne(new BasicDBObject(SimpleFederatedCache._id_, hardwiredCacheId));
   
    if (null == cachedVal) {
      System.out.println("*** " + testName + ": no cache for: " + doc.get(DocumentPojo.url_) + " / " + hardwiredCacheId);
      System.exit(-1);
    }
    else {
      Date expiryDate = cachedVal.getDate(SimpleFederatedCache.expiryDate_, null);
      if ((null == expiryDate) || ((new Date().getTime() - expiryDate.getTime()) > 10*1000L)) {
        System.out.println("*** " + testName + ": expiry date for: " + doc.get(DocumentPojo.url_) + ": " + expiryDate + " vs now= " + new Date());       
        System.exit(-1);
      }
      BasicDBObject cachedJson = (BasicDBObject) cachedVal.get(SimpleFederatedCache.cachedJson_);
      Object docVal = null;
      try {
        Object[] docVals = (Object[] ) ((BasicDBObject)doc.get(DocumentPojo.metadata_)).get("json");
        docVal = docVals[0];
      }
      catch (Exception e) {
        @SuppressWarnings("rawtypes")
        Collection docVals = (Collection) ((BasicDBObject)doc.get(DocumentPojo.metadata_)).get("json");
        docVal = docVals.iterator().next();
       
      }
      if ((null == cachedJson) || !cachedJson.equals(docVal)) {
        System.out.println("*** " + testName + ": cache: " + doc.get(DocumentPojo.url_) + ": cached val incorrect:\n" + docVal + "\nVS\n" + cachedJson);
        System.exit(-1);       
      }
    }
  }//TESTED (by hand)
 
  ///////////////////////////////////////////////////////////////////////////////
 
  // (general test utils)
 
  // Reset state ready for next test
 
  public void test_queryClear(boolean clearCacheAlso) {
    if (null != this._asyncRequestsPerQuery) {
      for (FederatedRequest req: this._asyncRequestsPerQuery) {
        try {
          req.responseFuture.get();
          if (null != req.asyncClient) {
            req.asyncClient.close();
          }
        }
        catch (Exception e) {}
      }
      this._asyncRequestsPerQuery.clear();
    }
    if (clearCacheAlso) {
      DBCollection endpointCacheCollection = getCacheCollection();
      endpointCacheCollection.remove(new BasicDBObject());
    }
  }//TESTED (by hand - who tests the tester?!)
 
  public void test_cacheExpire() {
    DBCollection endpointCacheCollection = getCacheCollection();

    DBCursor dbc = endpointCacheCollection.find();
    for (DBObject cacheEntryObj: dbc) {
      BasicDBObject cacheEntry = (BasicDBObject) cacheEntryObj;
      cacheEntry.put(SimpleFederatedCache.expiryDate_, new Date(new Date().getTime() - 3600L*1000L)); // (ie expired an hour ago)
      endpointCacheCollection.save(cacheEntry);
    }
  }
  public void test_cacheClear(boolean clearApiCache, boolean clearDocCache, String key) {
    // (DO NOT COMMENT THIS OUT)
    SimpleFederatedQueryEngine.TEST_MODE_ONLY = true;   
   
    // Clear doc_metadata table before running
    if (clearDocCache) {
      MongoDbManager.getDocument().getMetadata().remove(new BasicDBObject(DocumentPojo.sourceKey_, key));
    }
    if (clearApiCache) {
      // (note: this only deletes the test collection because of TEST_MODE_ONLY code)
      SimpleFederatedQueryEngine.getCacheCollection().remove(new BasicDBObject()); // (clear test collection for testing)   
    }
  }
  public void test_cacheFill(String testName, boolean fill, boolean shouldBeFull) {
    DBCollection endpointCacheCollection = getCacheCollection();
    if (fill) {
      for (long i = 0; i < (1 + SimpleFederatedCache.QUERY_FEDERATION_CACHE_CLEANSE_SIZE); ++i) {
        SimpleFederatedCache fakeCacheElement = new SimpleFederatedCache();
        fakeCacheElement.expiryDate = new Date(new Date().getTime() - 3600L*1000L); // (ie expired an hour ago)
        fakeCacheElement._id = testName + "_" + i;
        fakeCacheElement.cachedJson = new BasicDBObject();
        endpointCacheCollection.save(fakeCacheElement.toDb());
      }
      _lastChecked = new Date(new Date().getTime() - 602L*1000L).getTime();
    }
    long count = endpointCacheCollection.count();
    if (shouldBeFull) {
      if (count < SimpleFederatedCache.QUERY_FEDERATION_CACHE_CLEANSE_SIZE) {
        System.out.println("*** " + testName + ": cache should just contain many elements, not: " + count);
        System.exit(-1);                       
      }
    }
    else {
      if (1 != count) {
        System.out.println("*** " + testName + ": cache should just contain one element, not: " + count);
        System.exit(-1);               
      }
    }
  }
}
TOP

Related Classes of com.ikanow.infinit.e.api.knowledge.federated.SimpleFederatedQueryEngine$FederatedHarvest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.