// (NOTE CAN'T ACCESS "query" UNTIL AFTER 0.1 BECAUSE THAT CAN CHANGE IT)
long nSysTime = (_nNow = System.currentTimeMillis());
ResponsePojo rp = new ResponsePojo();
// communityIdList is CSV
String[] communityIdStrs = SocialUtils.getCommunityIds(userIdStr, communityIdStrList);
//(timing)
long nQuerySetupTime = System.currentTimeMillis();
ElasticSearchManager indexMgr = getIndexManager(communityIdStrs);
SearchRequestBuilder searchSettings = indexMgr.getSearchOptions();
StringBuffer querySummary = new StringBuffer();
BaseQueryBuilder queryObj = null;
InternalTempFilterInfo tempFilterInfo = null;
try {
queryObj = getBaseQuery(query, communityIdStrs, communityIdStrList, userIdStr, querySummary);
if (null == queryObj) { // only occurs if has 1 element with ftext starting $cache:
return getSavedQueryInstead(query.qt.get(0).ftext.substring(7), communityIdStrs, query); // (step over cache preamble)
}
tempFilterInfo = getBaseFilter(query, communityIdStrs);
}
catch (Exception e) {
Globals.populateStackTrace(errorString, e);
if (null != e.getCause()) {
errorString.append("[CAUSE=").append(e.getCause().getMessage()).append("]");
Globals.populateStackTrace(errorString, e.getCause());
}
errorString.append(": " + e.getMessage());
return null;
}
//DEBUG
//querySummary.append(new Gson().toJson(query, AdvancedQueryPojo.class));
// 0.4] Pre-Lucene Scoring
// 0.4.1] General
// Different options:
// a] Get the most recent N documents matching the query, score post-query
// b] Get the N highest (Lucene) scoring documents, incorporate significance post-query if desired
// In both cases, N depends on whether significance calculation is taking place (and on the "skip" param)
int nRecordsToOutput = query.output.docs.numReturn;
int nRecordsToSkip = query.output.docs.skip;
int nRecordsToGet = query.score.numAnalyze;
final int nMAXRECORDSTOOUTPUT = 10000;
final int nMAXRECORDSTOGET = 20000;
// Some sanity checking on doc numbers:
if (nRecordsToOutput > nMAXRECORDSTOOUTPUT) { // Upper limit...
errorString.append(": Max # docs to return is 10000.");
return null;
}
if (nRecordsToGet < nRecordsToOutput) {
nRecordsToGet = nRecordsToOutput;
}
else if (nRecordsToGet > nMAXRECORDSTOGET) { // Upper limit...
nRecordsToGet = nMAXRECORDSTOGET; // (we can do something sensible with this so carry on regardless)
}
boolean bUseSignificance = (query.score.sigWeight > 0.0);
boolean bNeedExtraResultsForEnts =
((query.output.aggregation != null) && (query.output.aggregation.entsNumReturn != null) && (query.output.aggregation.entsNumReturn > 0))
||
(query.output.docs.enable && (query.output.docs.numReturn > 0) && (query.output.docs.ents) && (query.score.scoreEnts));
if (bUseSignificance || bNeedExtraResultsForEnts) {
// Some logic taken from the original "knowledge/search"
while ( (nRecordsToSkip + nRecordsToOutput > nRecordsToGet) && (nRecordsToGet <= nMAXRECORDSTOGET) )
{
nRecordsToGet += nRecordsToGet;
}
if (nRecordsToGet > nMAXRECORDSTOGET) {
errorString.append(": Can only skip through to 20000 documents.");
return null;
}
searchSettings.setSize(nRecordsToGet);
//TESTED
}
else if (query.output.docs.enable) { // In this case we just need the minimum number of records
// (whether searching by date or by relevance)
searchSettings.setFrom(nRecordsToSkip);
nRecordsToSkip = 0; // (so it isn't double counted in the processing module)
nRecordsToGet = nRecordsToOutput;
searchSettings.setSize(nRecordsToGet);
//TESTED
}
else { // In thise case we're just outputting aggregations, and not even ones that come from the docs
nRecordsToGet = 0; // (use this variable everywhere where we care about bring docs back either to output or for suitable aggregation)
searchSettings.setSize(0);
}
// Sort on score if relevance is being used
if (nRecordsToGet > 0) {
if (query.score.relWeight > 0.0) { // (b) above
// Using score is default, nothing to do
}
else { // (a) above
// Debug code, if rel weight negative then use date to check Lucene score is better...
if (query.score.relWeight < 0.0) {
query.score.relWeight = -query.score.relWeight;
}
// Set Lucene to order:
searchSettings.addSort(DocumentPojo.publishedDate_, SortOrder.DESC);
}//TOTEST
}//(if docs aren't enabled, don't need to worry about sorting)
// 0.4.2] Prox scoring (needs to happen after [0.3]
// Add proximity scoring:
boolean bLowAccuracyDecay = false;
if ((nRecordsToGet > 0) || (null == _scoringParams.adjustAggregateSig) || _scoringParams.adjustAggregateSig) {
// (ie if we're getting docs or applying scores to entities)
if (!_aggregationAccuracy.equals("full")) {
bLowAccuracyDecay = true;
}
queryObj = addProximityBasedScoring(queryObj, searchSettings, query.score, tempFilterInfo.parentFilterObj, bLowAccuracyDecay);
if (null == _scoringParams.adjustAggregateSig) { // auto-decide .. if ftext is set and is non-trivial
if ((null != query.score.timeProx) || (null != query.score.geoProx)) {
// (These are set to null above if badly formed)
_scoringParams.adjustAggregateSig = true;
}
}
}// (else not worth the effort)
// 0.4.3] Source weightings (if any)
queryObj = applyManualWeights(queryObj, query.score);
// 0.5] Pre-lucene output options
// only return the id field and score
// (Both _id and score come back as default options, SearchHit:: getId and getScore, don't need anything else)
// Facets
// (These are needed for the case where we need to perform aggregations manually)
Integer manualEntsNumReturn = null;
Integer manualEventsNumReturn = null;
Integer manualFactsNumReturn = null;
Integer manualGeoNumReturn = null;
//DEBUG
//System.out.println(new Gson().toJson(query.output.aggregation));
if ((null != query.output.aggregation) && (null != query.output.aggregation.raw)) { // Like query, specify raw aggregation (Facets)
// Gross raw handling for facets
if ((null != query.raw) && (null != query.raw.query)) {
// Don't currently support raw query and raw facets because I can't work out how to apply
// the override on group/source!
errorString.append(": Not currently allowed raw query and raw facets");
return null;
}
else { // Normal code
searchSettings.setFacets(query.output.aggregation.raw.getBytes());
}
}
else { // Apply various aggregation (=="facet") outputs to searchSettings
boolean bSpecialCase = (null != query.raw) && (null != query.raw.query);
if (!_aggregationAccuracy.equals("full")) {
if (null != query.output.aggregation) {
if (_aggregationAccuracy.equals("low")) {
manualEntsNumReturn = query.output.aggregation.entsNumReturn;
manualEventsNumReturn = query.output.aggregation.eventsNumReturn;
manualFactsNumReturn = query.output.aggregation.factsNumReturn;
manualGeoNumReturn = query.output.aggregation.geoNumReturn;
}
query.output.aggregation.entsNumReturn = null;
query.output.aggregation.eventsNumReturn = null;
query.output.aggregation.factsNumReturn = null;
query.output.aggregation.geoNumReturn = null;
// (allow time aggregation)
// (allow source aggregation)
}
}
AggregationUtils.parseOutputAggregation(query.output.aggregation, _aliasLookup,
tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings,
searchSettings, bSpecialCase?tempFilterInfo.parentFilterObj:null);
// In partial accuracy case, restore aggregation
if (null != manualEntsNumReturn) {
query.output.aggregation.entsNumReturn = manualEntsNumReturn;
}
if (null != manualEventsNumReturn) {
query.output.aggregation.eventsNumReturn = manualEventsNumReturn;
}
if (null != manualFactsNumReturn) {
query.output.aggregation.factsNumReturn = manualFactsNumReturn;
}
if (null != manualGeoNumReturn) {
query.output.aggregation.geoNumReturn = manualGeoNumReturn;
}
//TESTED
}
//TESTED x2
//(timing)
nQuerySetupTime = System.currentTimeMillis() - nQuerySetupTime;
// 0.6] Perform Lucene query
// 0.6.1: query extensions: pre-query hook
ArrayList<IQueryExtension> queryExtensions = null;
if (null != _queryExtensions) {
queryId = new ObjectId();
queryExtensions = new ArrayList<IQueryExtension>(_queryExtensions.size());
for (Class<IQueryExtension> queryExtensionClass: _queryExtensions) {
// Don't catch any exceptions thrown here - let it bubble upwards
IQueryExtension queryExtension = queryExtensionClass.newInstance();
queryExtension.preQueryActivities(queryId, query, communityIdStrs);
queryExtensions.add(queryExtension);
}
}//TESTED (see test.QueryExtensionsTestCode)
// Built-in federated query engine ...
if (null != _federatedQueryCache) {
// 2 modes:
// 1) If srcInclude is true(default) then check each source vs the table
// 2) If srcInclude is false, or no sources specified, then check each community vs the table
// 1:
if ((null != query.input) && (null != query.input.sources) && ((null == query.input.srcInclude) || query.input.srcInclude))
{
for (String srcKey: query.input.sources) {
FederatedQueryInMemoryCache fedQueryCacheEl = _federatedQueryCache.get(srcKey);
if (null != fedQueryCacheEl) {
if (null == this._builtInFederatedQueryEngine) {
_builtInFederatedQueryEngine = new SimpleFederatedQueryEngine();
}
_builtInFederatedQueryEngine.addEndpoint(fedQueryCacheEl.source);
}
}
}//TESTED (//TESTED (http://localhost:8184/knowledge/document/query/53ab42a2e4b04bcfe2de4387?qt[0].entity=%22garyhart.com/externaldomain%22&output.docs.numReturn=10&input.sources=inf...federated.externaldomain.&input.srcInclude=true))
// 2:
else { //Get federated queries from communities
HashSet<String> excludeSrcs = null;
for (String commIdStr: communityIdStrs) {
FederatedQueryInMemoryCache fedQueryCacheEl = _federatedQueryCache.get(commIdStr);
if (null != fedQueryCacheEl) {
if ((null != query.input) && (null != query.input.sources)) { // (there are exclude sources)
if (null == excludeSrcs) {
excludeSrcs = new HashSet<String>(query.input.sources);
}
}//TESTED (http://localhost:8184/knowledge/document/query/53ab42a2e4b04bcfe2de4387?qt[0].entity=%22garyhart.com/externaldomain%22&output.docs.numReturn=10&input.sources=inf...federated.externaldomain.&input.srcInclude=false)
for (Map.Entry<String, SourceFederatedQueryConfigPojo> fedQueryKV: fedQueryCacheEl.sources.entrySet()) {
if ((null == excludeSrcs) || !excludeSrcs.contains(fedQueryKV.getKey())) {
if (null == this._builtInFederatedQueryEngine) {
_builtInFederatedQueryEngine = new SimpleFederatedQueryEngine();
}
_builtInFederatedQueryEngine.addEndpoint(fedQueryKV.getValue());
}
}
}
}//TESTED (by hand)
}
if (null != _builtInFederatedQueryEngine) {
_builtInFederatedQueryEngine.preQueryActivities(queryId, query, communityIdStrs);
}
}
// 0.6.2: the main query
if ((null != query.explain) && query.explain) { // (for diagnostic - will return lucene explanation)
searchSettings.setExplain(true);
}
SearchResponse queryResults = null;
// (_source can now be enabled, so this is necessary to avoid returning it)
searchSettings.addFields();
if ((null != query.raw) && (null != query.raw.query))
{
// (Can bypass all other settings)
searchSettings.setQuery(query.raw.query);
queryResults = indexMgr.doQuery(null, tempFilterInfo.parentFilterObj, searchSettings);
}//TESTED '{ "raw": { "match_all": {} } }'
else
{
// Where I can, use the source filter as part of the query so that
// facets will apply to query+filter, not just filter
queryObj = QueryBuilders.boolQuery().must(queryObj).must(QueryBuilders.constantScoreQuery(tempFilterInfo.parentFilterObj).boost(0.0F));
queryResults = indexMgr.doQuery(queryObj, null, searchSettings);
}//TESTED '{}' etc
long nLuceneTime = queryResults.getTookInMillis();
// 0.7] Lucene scores
long nProcTime = 0;
long nProcTime_tmp = System.currentTimeMillis();
StatisticsPojo stats = new StatisticsPojo();
stats.found = queryResults.getHits().getTotalHits();
stats.start = (long)nRecordsToSkip;
if (nRecordsToGet > 0) {
stats.setScore(queryResults.getHits(), (null != query.score.geoProx)||(null != query.score.timeProx), (null != query.explain) && query.explain);
}
//DEBUG
//System.out.println(new Gson().toJson(queryResults));
nProcTime += (System.currentTimeMillis() - nProcTime_tmp);
// 0.8] Get data from Mongo + handle scoring
//(timing)
long nMongoTime = System.currentTimeMillis();
List<BasicDBObject> docs = null;
//(aggregation)
LinkedList<BasicDBObject> lowAccuracyAggregatedEntities = null; // (always low accuracy)
LinkedList<BasicDBObject> standaloneEvents = null;
LinkedList<BasicDBObject> lowAccuracyAggregatedEvents = null;
LinkedList<BasicDBObject> lowAccuracyAggregatedFacts = null;
AggregationUtils.GeoContainer lowAccuracyAggregatedGeo = null;
AggregationUtils.GeoContainer extraAliasAggregatedGeo = null;
ScoringUtils scoreStats = null;
if (null != stats.getIds()) {
DBCursor docs0 = this.getDocIds(DbManager.getDocument().getMetadata(), stats.getIds(), nRecordsToGet, query.output, query.score);
nMongoTime = System.currentTimeMillis() - nMongoTime;
nProcTime_tmp = System.currentTimeMillis();
// Entity aggregation (CURRENTLY ALWAYS LOW AGGREGATION):
if ((null != query.output.aggregation) && (null != query.output.aggregation.entsNumReturn) && (query.output.aggregation.entsNumReturn > 0)) {
lowAccuracyAggregatedEntities = new LinkedList<BasicDBObject>();
}
// Standalone events:
if ((query.output.docs != null) && (query.output.docs.eventsTimeline != null) && query.output.docs.eventsTimeline) {
standaloneEvents = new LinkedList<BasicDBObject>();
}
// Low accuracy aggregations:
if ((null != manualEventsNumReturn) && (manualEventsNumReturn > 0)) {
lowAccuracyAggregatedEvents = new LinkedList<BasicDBObject>();
}
if ((null != manualFactsNumReturn) && (manualFactsNumReturn > 0)) {
lowAccuracyAggregatedFacts = new LinkedList<BasicDBObject>();
}
if ((null != manualGeoNumReturn) && (manualGeoNumReturn > 0)) {
lowAccuracyAggregatedGeo = new AggregationUtils.GeoContainer();
}
else if ((null != query.output.aggregation) && (null != query.output.aggregation.geoNumReturn) && (query.output.aggregation.geoNumReturn > 0))
{
// (only if not using low accuracy aggregation ... otherwise it all gets dumped in lowAccuracyAggregatedGeo)
extraAliasAggregatedGeo = new AggregationUtils.GeoContainer();
}
scoreStats = new ScoringUtils();
try {
boolean lockAcquired = true;
try {
lockAcquired = this.acquireConcurrentAccessLock();
} catch (InterruptedException e) {
//(that's fine just carry on)
lockAcquired = false;
}
if (!lockAcquired) {
rp.setResponse(new ResponseObject("Query", false, "Query engine busy, please try again later."));
return rp;
}
scoreStats.setAliasLookupTable(_aliasLookup);
docs = scoreStats.calcTFIDFAndFilter(DbManager.getDocument().getMetadata(),
docs0, query.score, query.output, stats, bLowAccuracyDecay,
nRecordsToSkip, nRecordsToOutput,
communityIdStrs,
tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings,
standaloneEvents,
lowAccuracyAggregatedEntities,
lowAccuracyAggregatedGeo, extraAliasAggregatedGeo,
lowAccuracyAggregatedEvents, lowAccuracyAggregatedFacts);
}
finally {
scoreStats.clearAsMuchMemoryAsPossible();
this.releaseConcurrentAccessLock();
}
nProcTime += (System.currentTimeMillis() - nProcTime_tmp);
}
else {
nMongoTime = 0;
}
//TESTED (all queries)
// 0.9] Output:
rp.setResponse(new ResponseObject("Query", true, querySummary.toString()));
// 0.9.1] Stats:
stats.resetArrays();
rp.setStats(stats); // (only actually uses the response pojo, but get rid of big fields anyway...)
// 0.9.2] Facets:
if (null != lowAccuracyAggregatedEntities) { // Entity aggregation
rp.setEntities(lowAccuracyAggregatedEntities);
}
if (null != standaloneEvents) {
rp.setEventsTimeline(standaloneEvents);
}
if (null != lowAccuracyAggregatedGeo) {
rp.setGeo(lowAccuracyAggregatedGeo.geotags, (int)lowAccuracyAggregatedGeo.maxCount, (int)lowAccuracyAggregatedGeo.minCount);
}
if (null != lowAccuracyAggregatedEvents) {
rp.setEvents(lowAccuracyAggregatedEvents);
}
if (null != lowAccuracyAggregatedFacts) {
rp.setFacts(lowAccuracyAggregatedFacts);
}
if ((null != query.output.aggregation) && (null != query.output.aggregation.raw)) {
rp.setFacets(queryResults.getFacets().facetsAsMap());
}
else if ((null != queryResults.getFacets()) && (null != queryResults.getFacets().getFacets())) { // "Logical" aggregation
if (0.0 == query.score.sigWeight) {
scoreStats = null; // (don't calculate event/fact aggregated significance if it's not wanted)
}
AggregationUtils.loadAggregationResults(rp, queryResults.getFacets().getFacets(), query.output.aggregation, scoreStats, _aliasLookup, tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings, extraAliasAggregatedGeo);
} // (end facets not overwritten)
scoreStats = null; // (now definitely never need scoreStats)
// 0.9.3] Documents
if (query.output.docs.enable) {
if ((null != docs) && (docs.size() > 0)) {
rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
}
else { // (ensure there's always an empty list)
docs = new ArrayList<BasicDBObject>(0);
rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
}
}
else { // (ensure there's always an empty list)
docs = new ArrayList<BasicDBObject>(0);
rp.setData(docs, (BasePojoApiMap<BasicDBObject>)null);
}
// 0.9.4] query extensions: post-query hook
if (null != queryExtensions) {
for (IQueryExtension queryExtension: queryExtensions) {
// Don't catch any exceptions thrown here - let it bubble upwards
queryExtension.postQueryActivities(queryId, docs, rp);
}
}//TESTED (see test.QueryExtensionsTestCode)
// (Built-in version)
if (null != _builtInFederatedQueryEngine) {
_builtInFederatedQueryEngine.postQueryActivities(queryId, docs, rp);
}
// 0.9.5] Timing/logging
long nTotalTime = System.currentTimeMillis() - nSysTime;
rp.getResponse().setTime(nTotalTime);
_logMsg.setLength(0);
_logMsg.append("knowledge/query querylen=").append(querySummary.length());
_logMsg.append(" query=").append(querySummary.toString());
_logMsg.append(" userid=").append(userIdStr);