//(timing)
long nQuerySetupTime = System.currentTimeMillis();
ElasticSearchManager indexMgr = getIndexManager(communityIdStrs);
SearchRequestBuilder searchSettings = indexMgr.getSearchOptions();
StringBuffer querySummary = new StringBuffer();
BaseQueryBuilder queryObj = null;
InternalTempFilterInfo tempFilterInfo = null;
try {
queryObj = getBaseQuery(query, communityIdStrs, communityIdStrList, userIdStr, querySummary);
if (null == queryObj) { // only occurs if has 1 element with ftext starting $cache:
return getSavedQueryInstead(query.qt.get(0).ftext.substring(7), communityIdStrs, query); // (step over cache preamble)
}
tempFilterInfo = getBaseFilter(query, communityIdStrs);
}
catch (Exception e) {
Globals.populateStackTrace(errorString, e);
if (null != e.getCause()) {
errorString.append("[CAUSE=").append(e.getCause().getMessage()).append("]");
Globals.populateStackTrace(errorString, e.getCause());
}
errorString.append(": " + e.getMessage());
return null;
}
//DEBUG
//querySummary.append(new Gson().toJson(query, AdvancedQueryPojo.class));
// 0.4] Pre-Lucene Scoring
// 0.4.1] General
// Different options:
// a] Get the most recent N documents matching the query, score post-query
// b] Get the N highest (Lucene) scoring documents, incorporate significance post-query if desired
// In both cases, N depends on whether significance calculation is taking place (and on the "skip" param)
int nRecordsToOutput = query.output.docs.numReturn;
int nRecordsToSkip = query.output.docs.skip;
int nRecordsToGet = query.score.numAnalyze;
final int nMAXRECORDSTOOUTPUT = 10000;
final int nMAXRECORDSTOGET = 20000;
// Some sanity checking on doc numbers:
if (nRecordsToOutput > nMAXRECORDSTOOUTPUT) { // Upper limit...
errorString.append(": Max # docs to return is 10000.");
return null;
}
if (nRecordsToGet < nRecordsToOutput) {
nRecordsToGet = nRecordsToOutput;
}
else if (nRecordsToGet > nMAXRECORDSTOGET) { // Upper limit...
nRecordsToGet = nMAXRECORDSTOGET; // (we can do something sensible with this so carry on regardless)
}
boolean bUseSignificance = (query.score.sigWeight > 0.0);
boolean bNeedExtraResultsForEnts =
((query.output.aggregation != null) && (query.output.aggregation.entsNumReturn != null) && (query.output.aggregation.entsNumReturn > 0))
||
(query.output.docs.enable && (query.output.docs.numReturn > 0) && (query.output.docs.ents) && (query.score.scoreEnts));
if (bUseSignificance || bNeedExtraResultsForEnts) {
// Some logic taken from the original "knowledge/search"
while ( (nRecordsToSkip + nRecordsToOutput > nRecordsToGet) && (nRecordsToGet <= nMAXRECORDSTOGET) )
{
nRecordsToGet += nRecordsToGet;
}
if (nRecordsToGet > nMAXRECORDSTOGET) {
errorString.append(": Can only skip through to 20000 documents.");
return null;
}
searchSettings.setSize(nRecordsToGet);
//TESTED
}
else if (query.output.docs.enable) { // In this case we just need the minimum number of records
// (whether searching by date or by relevance)
searchSettings.setFrom(nRecordsToSkip);
nRecordsToSkip = 0; // (so it isn't double counted in the processing module)
nRecordsToGet = nRecordsToOutput;
searchSettings.setSize(nRecordsToGet);
//TESTED
}
else { // In thise case we're just outputting aggregations, and not even ones that come from the docs
nRecordsToGet = 0; // (use this variable everywhere where we care about bring docs back either to output or for suitable aggregation)
searchSettings.setSize(0);
}
// Sort on score if relevance is being used
if (nRecordsToGet > 0) {
if (query.score.relWeight > 0.0) { // (b) above
// Using score is default, nothing to do
}
else { // (a) above
// Debug code, if rel weight negative then use date to check Lucene score is better...
if (query.score.relWeight < 0.0) {
query.score.relWeight = -query.score.relWeight;
}
// Set Lucene to order:
searchSettings.addSort(DocumentPojo.publishedDate_, SortOrder.DESC);
}//TOTEST
}//(if docs aren't enabled, don't need to worry about sorting)
// 0.4.2] Prox scoring (needs to happen after [0.3]
// Add proximity scoring:
boolean bLowAccuracyDecay = false;
if ((nRecordsToGet > 0) || (null == _scoringParams.adjustAggregateSig) || _scoringParams.adjustAggregateSig) {
// (ie if we're getting docs or applying scores to entities)
if (!_aggregationAccuracy.equals("full")) {
bLowAccuracyDecay = true;
}
queryObj = addProximityBasedScoring(queryObj, searchSettings, query.score, tempFilterInfo.parentFilterObj, bLowAccuracyDecay);
if (null == _scoringParams.adjustAggregateSig) { // auto-decide .. if ftext is set and is non-trivial
if ((null != query.score.timeProx) || (null != query.score.geoProx)) {
// (These are set to null above if badly formed)
_scoringParams.adjustAggregateSig = true;
}
}
}// (else not worth the effort)
// 0.4.3] Source weightings (if any)
queryObj = applyManualWeights(queryObj, query.score);
// 0.5] Pre-lucene output options
// only return the id field and score
// (Both _id and score come back as default options, SearchHit:: getId and getScore, don't need anything else)
// Facets
// (These are needed for the case where we need to perform aggregations manually)
Integer manualEntsNumReturn = null;
Integer manualEventsNumReturn = null;
Integer manualFactsNumReturn = null;
Integer manualGeoNumReturn = null;
//DEBUG
//System.out.println(new Gson().toJson(query.output.aggregation));
if ((null != query.output.aggregation) && (null != query.output.aggregation.raw)) { // Like query, specify raw aggregation (Facets)
// Gross raw handling for facets
if ((null != query.raw) && (null != query.raw.query)) {
// Don't currently support raw query and raw facets because I can't work out how to apply
// the override on group/source!
errorString.append(": Not currently allowed raw query and raw facets");
return null;
}
else { // Normal code
searchSettings.setFacets(query.output.aggregation.raw.getBytes());
}
}
else { // Apply various aggregation (=="facet") outputs to searchSettings
boolean bSpecialCase = (null != query.raw) && (null != query.raw.query);
if (!_aggregationAccuracy.equals("full")) {
if (null != query.output.aggregation) {
if (_aggregationAccuracy.equals("low")) {
manualEntsNumReturn = query.output.aggregation.entsNumReturn;
manualEventsNumReturn = query.output.aggregation.eventsNumReturn;
manualFactsNumReturn = query.output.aggregation.factsNumReturn;
manualGeoNumReturn = query.output.aggregation.geoNumReturn;
}
query.output.aggregation.entsNumReturn = null;
query.output.aggregation.eventsNumReturn = null;
query.output.aggregation.factsNumReturn = null;
query.output.aggregation.geoNumReturn = null;
// (allow time aggregation)
// (allow source aggregation)
}
}
AggregationUtils.parseOutputAggregation(query.output.aggregation, _aliasLookup,
tempFilterInfo.entityTypeFilterStrings, tempFilterInfo.assocVerbFilterStrings,
searchSettings, bSpecialCase?tempFilterInfo.parentFilterObj:null);
// In partial accuracy case, restore aggregation
if (null != manualEntsNumReturn) {
query.output.aggregation.entsNumReturn = manualEntsNumReturn;
}
if (null != manualEventsNumReturn) {
query.output.aggregation.eventsNumReturn = manualEventsNumReturn;
}
if (null != manualFactsNumReturn) {
query.output.aggregation.factsNumReturn = manualFactsNumReturn;
}
if (null != manualGeoNumReturn) {
query.output.aggregation.geoNumReturn = manualGeoNumReturn;
}
//TESTED
}
//TESTED x2
//(timing)
nQuerySetupTime = System.currentTimeMillis() - nQuerySetupTime;
// 0.6] Perform Lucene query
// 0.6.1: query extensions: pre-query hook
ArrayList<IQueryExtension> queryExtensions = null;
if (null != _queryExtensions) {
queryId = new ObjectId();
queryExtensions = new ArrayList<IQueryExtension>(_queryExtensions.size());
for (Class<IQueryExtension> queryExtensionClass: _queryExtensions) {
// Don't catch any exceptions thrown here - let it bubble upwards
IQueryExtension queryExtension = queryExtensionClass.newInstance();
queryExtension.preQueryActivities(queryId, query, communityIdStrs);
queryExtensions.add(queryExtension);
}
}//TESTED (see test.QueryExtensionsTestCode)
// Built-in federated query engine ...
if (null != _federatedQueryCache) {
// 2 modes:
// 1) If srcInclude is true(default) then check each source vs the table
// 2) If srcInclude is false, or no sources specified, then check each community vs the table
// 1:
if ((null != query.input) && (null != query.input.sources) && ((null == query.input.srcInclude) || query.input.srcInclude))
{
for (String srcKey: query.input.sources) {
FederatedQueryInMemoryCache fedQueryCacheEl = _federatedQueryCache.get(srcKey);
if (null != fedQueryCacheEl) {
if (null == this._builtInFederatedQueryEngine) {
_builtInFederatedQueryEngine = new SimpleFederatedQueryEngine();
}
_builtInFederatedQueryEngine.addEndpoint(fedQueryCacheEl.source);
}
}
}//TESTED (//TESTED (http://localhost:8184/knowledge/document/query/53ab42a2e4b04bcfe2de4387?qt[0].entity=%22garyhart.com/externaldomain%22&output.docs.numReturn=10&input.sources=inf...federated.externaldomain.&input.srcInclude=true))
// 2:
else { //Get federated queries from communities
HashSet<String> excludeSrcs = null;
for (String commIdStr: communityIdStrs) {
FederatedQueryInMemoryCache fedQueryCacheEl = _federatedQueryCache.get(commIdStr);
if (null != fedQueryCacheEl) {
if ((null != query.input) && (null != query.input.sources)) { // (there are exclude sources)
if (null == excludeSrcs) {
excludeSrcs = new HashSet<String>(query.input.sources);
}
}//TESTED (http://localhost:8184/knowledge/document/query/53ab42a2e4b04bcfe2de4387?qt[0].entity=%22garyhart.com/externaldomain%22&output.docs.numReturn=10&input.sources=inf...federated.externaldomain.&input.srcInclude=false)
for (Map.Entry<String, SourceFederatedQueryConfigPojo> fedQueryKV: fedQueryCacheEl.sources.entrySet()) {
if ((null == excludeSrcs) || !excludeSrcs.contains(fedQueryKV.getKey())) {
if (null == this._builtInFederatedQueryEngine) {
_builtInFederatedQueryEngine = new SimpleFederatedQueryEngine();
}
_builtInFederatedQueryEngine.addEndpoint(fedQueryKV.getValue());
}
}
}
}//TESTED (by hand)
}
if (null != _builtInFederatedQueryEngine) {
_builtInFederatedQueryEngine.preQueryActivities(queryId, query, communityIdStrs);
}
}
// 0.6.2: the main query
if ((null != query.explain) && query.explain) { // (for diagnostic - will return lucene explanation)
searchSettings.setExplain(true);
}
SearchResponse queryResults = null;
// (_source can now be enabled, so this is necessary to avoid returning it)
searchSettings.addFields();
if ((null != query.raw) && (null != query.raw.query))
{
// (Can bypass all other settings)
searchSettings.setQuery(query.raw.query);
queryResults = indexMgr.doQuery(null, tempFilterInfo.parentFilterObj, searchSettings);
}//TESTED '{ "raw": { "match_all": {} } }'
else
{
// Where I can, use the source filter as part of the query so that