Results results = new Results();
// Get the query string
String queryString = query;
StopWatch abstractQueryLogic = new StopWatch();
StopWatch optimizedQuery = new StopWatch();
StopWatch queryGlobalIndex = new StopWatch();
StopWatch optimizedEventQuery = new StopWatch();
StopWatch fullScanQuery = new StopWatch();
StopWatch processResults = new StopWatch();
abstractQueryLogic.start();
StopWatch parseQuery = new StopWatch();
parseQuery.start();
QueryParser parser;
try {
if (log.isDebugEnabled()) {
log.debug("ShardQueryLogic calling QueryParser.execute");
}
parser = new QueryParser();
parser.execute(queryString);
} catch (org.apache.commons.jexl2.parser.ParseException e1) {
throw new IllegalArgumentException("Error parsing query", e1);
}
int hash = parser.getHashValue();
parseQuery.stop();
if (log.isDebugEnabled()) {
log.debug(hash + " Query: " + queryString);
}
Set<String> fields = new HashSet<String>();
for (String f : parser.getQueryIdentifiers()) {
fields.add(f);
}
if (log.isDebugEnabled()) {
log.debug("getQueryIdentifiers: " + parser.getQueryIdentifiers().toString());
}
// Remove any negated fields from the fields list, we don't want to lookup negated fields
// in the index.
fields.removeAll(parser.getNegatedTermsForOptimizer());
if (log.isDebugEnabled()) {
log.debug("getQueryIdentifiers: " + parser.getQueryIdentifiers().toString());
}
// Get the mapping of field name to QueryTerm object from the query. The query term object
// contains the operator, whether its negated or not, and the literal to test against.
Multimap<String,QueryTerm> terms = parser.getQueryTerms();
// Find out which terms are indexed
// TODO: Should we cache indexed terms or does that not make sense since we are always
// loading data.
StopWatch queryMetadata = new StopWatch();
queryMetadata.start();
Map<String,Multimap<String,Class<? extends Normalizer>>> metadataResults;
try {
metadataResults = findIndexedTerms(connector, auths, fields, typeFilter);
} catch (Exception e1) {
throw new RuntimeException("Error in metadata lookup", e1);
}
// Create a map of indexed term to set of normalizers for it
Multimap<String,Normalizer> indexedTerms = HashMultimap.create();
for (Entry<String,Multimap<String,Class<? extends Normalizer>>> entry : metadataResults.entrySet()) {
// Get the normalizer from the normalizer cache
for (Class<? extends Normalizer> clazz : entry.getValue().values()) {
indexedTerms.put(entry.getKey(), normalizerCacheMap.get(clazz));
}
}
queryMetadata.stop();
if (log.isDebugEnabled()) {
log.debug(hash + " Indexed Terms: " + indexedTerms.toString());
}
Set<String> orTerms = parser.getOrTermsForOptimizer();
// Iterate over the query terms to get the operators specified in the query.
ArrayList<String> unevaluatedExpressions = new ArrayList<String>();
boolean unsupportedOperatorSpecified = false;
for (Entry<String,QueryTerm> entry : terms.entries()) {
if (null == entry.getValue()) {
continue;
}
if (null != this.unevaluatedFields && this.unevaluatedFields.contains(entry.getKey().trim())) {
unevaluatedExpressions.add(entry.getKey().trim() + " " + entry.getValue().getOperator() + " " + entry.getValue().getValue());
}
int operator = JexlOperatorConstants.getJJTNodeType(entry.getValue().getOperator());
if (!(operator == ParserTreeConstants.JJTEQNODE || operator == ParserTreeConstants.JJTNENODE || operator == ParserTreeConstants.JJTLENODE
|| operator == ParserTreeConstants.JJTLTNODE || operator == ParserTreeConstants.JJTGENODE || operator == ParserTreeConstants.JJTGTNODE || operator == ParserTreeConstants.JJTERNODE)) {
unsupportedOperatorSpecified = true;
break;
}
}
if (null != unevaluatedExpressions)
unevaluatedExpressions.trimToSize();
if (log.isDebugEnabled()) {
log.debug(hash + " unsupportedOperators: " + unsupportedOperatorSpecified + " indexedTerms: " + indexedTerms.toString() + " orTerms: "
+ orTerms.toString() + " unevaluatedExpressions: " + unevaluatedExpressions.toString());
}
// We can use the intersecting iterator over the field index as an optimization under the
// following conditions
//
// 1. No unsupported operators in the query.
// 2. No 'or' operators and at least one term indexed
// or
// 1. No unsupported operators in the query.
// 2. and all terms indexed
// or
// 1. All or'd terms are indexed. NOTE, this will potentially skip some queries and push to a full table scan
// // WE should look into finding a better way to handle whether we do an optimized query or not.
boolean optimizationSucceeded = false;
boolean orsAllIndexed = false;
if (orTerms.isEmpty()) {
orsAllIndexed = false;
} else {
orsAllIndexed = indexedTerms.keySet().containsAll(orTerms);
}
if (log.isDebugEnabled()) {
log.debug("All or terms are indexed");
}
if (!unsupportedOperatorSpecified
&& (((null == orTerms || orTerms.isEmpty()) && indexedTerms.size() > 0) || (fields.size() > 0 && indexedTerms.size() == fields.size()) || orsAllIndexed)) {
optimizedQuery.start();
// Set up intersecting iterator over field index.
// Get information from the global index for the indexed terms. The results object will contain the term
// mapped to an object that contains the total count, and partitions where this term is located.
// TODO: Should we cache indexed term information or does that not make sense since we are always loading data
queryGlobalIndex.start();
IndexRanges termIndexInfo;
try {
// If fields is null or zero, then it's probably the case that the user entered a value
// to search for with no fields. Check for the value in index.
if (fields.isEmpty()) {
termIndexInfo = this.getTermIndexInformation(connector, auths, queryString, typeFilter);
if (null != termIndexInfo && termIndexInfo.getRanges().isEmpty()) {
// Then we didn't find anything in the index for this query. This may happen for an indexed term that has wildcards
// in unhandled locations.
// Break out of here by throwing a named exception and do full scan
throw new DoNotPerformOptimizedQueryException();
}
// We need to rewrite the query string here so that it's valid.
if (termIndexInfo instanceof UnionIndexRanges) {
UnionIndexRanges union = (UnionIndexRanges) termIndexInfo;
StringBuilder buf = new StringBuilder();
String sep = "";
for (String fieldName : union.getFieldNamesAndValues().keySet()) {
buf.append(sep).append(fieldName).append(" == ");
if (!(queryString.startsWith("'") && queryString.endsWith("'"))) {
buf.append("'").append(queryString).append("'");
} else {
buf.append(queryString);
}
sep = " or ";
}
if (log.isDebugEnabled()) {
log.debug("Rewrote query for non-fielded single term query: " + queryString + " to " + buf.toString());
}
queryString = buf.toString();
} else {
throw new RuntimeException("Unexpected IndexRanges implementation");
}
} else {
RangeCalculator calc = this.getTermIndexInformation(connector, auths, indexedTerms, terms, this.getIndexTableName(), this.getReverseIndexTableName(),
queryString, this.queryThreads, typeFilter);
if (null == calc.getResult() || calc.getResult().isEmpty()) {
// Then we didn't find anything in the index for this query. This may happen for an indexed term that has wildcards
// in unhandled locations.
// Break out of here by throwing a named exception and do full scan
throw new DoNotPerformOptimizedQueryException();
}
termIndexInfo = new UnionIndexRanges();
termIndexInfo.setIndexValuesToOriginalValues(calc.getIndexValues());
termIndexInfo.setFieldNamesAndValues(calc.getIndexEntries());
termIndexInfo.getTermCardinality().putAll(calc.getTermCardinalities());
for (Range r : calc.getResult()) {
// foo is a placeholder and is ignored.
termIndexInfo.add("foo", r);
}
}
} catch (TableNotFoundException e) {
log.error(this.getIndexTableName() + "not found", e);
throw new RuntimeException(this.getIndexTableName() + "not found", e);
} catch (org.apache.commons.jexl2.parser.ParseException e) {
throw new RuntimeException("Error determining ranges for query: " + queryString, e);
} catch (DoNotPerformOptimizedQueryException e) {
log.info("Indexed fields not found in index, performing full scan");
termIndexInfo = null;
}
queryGlobalIndex.stop();
// Determine if we should proceed with optimized query based on results from the global index
boolean proceed = false;
if (null == termIndexInfo || termIndexInfo.getFieldNamesAndValues().values().size() == 0) {
proceed = false;
} else if (null != orTerms && orTerms.size() > 0 && (termIndexInfo.getFieldNamesAndValues().values().size() == indexedTerms.size())) {
proceed = true;
} else if (termIndexInfo.getFieldNamesAndValues().values().size() > 0) {
proceed = true;
} else if (orsAllIndexed) {
proceed = true;
} else {
proceed = false;
}
if (log.isDebugEnabled()) {
log.debug("Proceed with optimized query: " + proceed);
if (null != termIndexInfo)
log.debug("termIndexInfo.getTermsFound().size(): " + termIndexInfo.getFieldNamesAndValues().values().size() + " indexedTerms.size: "
+ indexedTerms.size() + " fields.size: " + fields.size());
}
if (proceed) {
if (log.isDebugEnabled()) {
log.debug(hash + " Performing optimized query");
}
// Use the scan ranges from the GlobalIndexRanges object as the ranges for the batch scanner
ranges = termIndexInfo.getRanges();
if (log.isDebugEnabled()) {
log.info(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString());
}
// Create BatchScanner, set the ranges, and setup the iterators.
optimizedEventQuery.start();
BatchScanner bs = null;
try {
bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads);
bs.setRanges(ranges);
IteratorSetting si = new IteratorSetting(21, "eval", OptimizedQueryIterator.class);
if (log.isDebugEnabled()) {
log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
}
// Set the query option
si.addOption(EvaluatingIterator.QUERY_OPTION, queryString);
// Set the Indexed Terms List option. This is the field name and normalized field value pair separated
// by a comma.
StringBuilder buf = new StringBuilder();
String sep = "";
for (Entry<String,String> entry : termIndexInfo.getFieldNamesAndValues().entries()) {
buf.append(sep);
buf.append(entry.getKey());
buf.append(":");
buf.append(termIndexInfo.getIndexValuesToOriginalValues().get(entry.getValue()));
buf.append(":");
buf.append(entry.getValue());
if (sep.equals("")) {
sep = ";";
}
}
if (log.isDebugEnabled()) {
log.debug("Setting scan option: " + FieldIndexQueryReWriter.INDEXED_TERMS_LIST + " to " + buf.toString());
}
FieldIndexQueryReWriter rewriter = new FieldIndexQueryReWriter();
String q = "";
try {
q = queryString;
q = rewriter.applyCaseSensitivity(q, true, false);// Set upper/lower case for fieldname/fieldvalue
Map<String,String> opts = new HashMap<String,String>();
opts.put(FieldIndexQueryReWriter.INDEXED_TERMS_LIST, buf.toString());
q = rewriter.removeNonIndexedTermsAndInvalidRanges(q, opts);
q = rewriter.applyNormalizedTerms(q, opts);
if (log.isDebugEnabled()) {
log.debug("runServerQuery, FieldIndex Query: " + q);
}
} catch (org.apache.commons.jexl2.parser.ParseException ex) {
log.error("Could not parse query, Jexl ParseException: " + ex);
} catch (Exception ex) {
log.error("Problem rewriting query, Exception: " + ex.getMessage());
}
si.addOption(BooleanLogicIterator.FIELD_INDEX_QUERY, q);
// Set the term cardinality option
sep = "";
buf.delete(0, buf.length());
for (Entry<String,Long> entry : termIndexInfo.getTermCardinality().entrySet()) {
buf.append(sep);
buf.append(entry.getKey());
buf.append(":");
buf.append(entry.getValue());
sep = ",";
}
if (log.isDebugEnabled())
log.debug("Setting scan option: " + BooleanLogicIterator.TERM_CARDINALITIES + " to " + buf.toString());
si.addOption(BooleanLogicIterator.TERM_CARDINALITIES, buf.toString());
if (this.useReadAheadIterator) {
if (log.isDebugEnabled()) {
log.debug("Enabling read ahead iterator with queue size: " + this.readAheadQueueSize + " and timeout: " + this.readAheadTimeOut);
}
si.addOption(ReadAheadIterator.QUEUE_SIZE, this.readAheadQueueSize);
si.addOption(ReadAheadIterator.TIMEOUT, this.readAheadTimeOut);
}
if (null != unevaluatedExpressions) {
StringBuilder unevaluatedExpressionList = new StringBuilder();
String sep2 = "";
for (String exp : unevaluatedExpressions) {
unevaluatedExpressionList.append(sep2).append(exp);
sep2 = ",";
}
if (log.isDebugEnabled())
log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString());
si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString());
}
bs.addScanIterator(si);
processResults.start();
processResults.suspend();
long count = 0;
for (Entry<Key,Value> entry : bs) {
count++;
// The key that is returned by the EvaluatingIterator is not the same key that is in
// the table. The value that is returned by the EvaluatingIterator is a kryo
// serialized EventFields object.
processResults.resume();
Document d = this.createDocument(entry.getKey(), entry.getValue());
results.getResults().add(d);
processResults.suspend();
}
log.info(count + " matching entries found in optimized query.");
optimizationSucceeded = true;
processResults.stop();
} catch (TableNotFoundException e) {
log.error(this.getTableName() + "not found", e);
throw new RuntimeException(this.getIndexTableName() + "not found", e);
} finally {
if (bs != null) {
bs.close();
}
}
optimizedEventQuery.stop();
}
optimizedQuery.stop();
}
// WE should look into finding a better way to handle whether we do an optimized query or not.
// We are not setting up an else condition here because we may have aborted the logic early in the if statement.
if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()) && !orsAllIndexed)) {
// if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()))) {
fullScanQuery.start();
if (log.isDebugEnabled()) {
log.debug(hash + " Performing full scan query");
}
// Set up a full scan using the date ranges from the query
// Create BatchScanner, set the ranges, and setup the iterators.
BatchScanner bs = null;
try {
// The ranges are the start and end dates
Collection<Range> r = getFullScanRange(beginDate, endDate, terms);
ranges.addAll(r);
if (log.isDebugEnabled()) {
log.debug(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString());
}
bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads);
bs.setRanges(ranges);
IteratorSetting si = new IteratorSetting(22, "eval", EvaluatingIterator.class);
// Create datatype regex if needed
if (null != typeFilter) {
StringBuilder buf = new StringBuilder();
String s = "";
for (String type : typeFilter) {
buf.append(s).append(type).append(".*");
s = "|";
}
if (log.isDebugEnabled())
log.debug("Setting colf regex iterator to: " + buf.toString());
IteratorSetting ri = new IteratorSetting(21, "typeFilter", RegExFilter.class);
RegExFilter.setRegexs(ri, null, buf.toString(), null, null, false);
bs.addScanIterator(ri);
}
if (log.isDebugEnabled()) {
log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString);
}
si.addOption(EvaluatingIterator.QUERY_OPTION, queryString);
if (null != unevaluatedExpressions) {
StringBuilder unevaluatedExpressionList = new StringBuilder();
String sep2 = "";
for (String exp : unevaluatedExpressions) {
unevaluatedExpressionList.append(sep2).append(exp);
sep2 = ",";
}
if (log.isDebugEnabled())
log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString());
si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString());
}
bs.addScanIterator(si);
long count = 0;
processResults.start();
processResults.suspend();
for (Entry<Key,Value> entry : bs) {
count++;
// The key that is returned by the EvaluatingIterator is not the same key that is in
// the partition table. The value that is returned by the EvaluatingIterator is a kryo
// serialized EventFields object.
processResults.resume();
Document d = this.createDocument(entry.getKey(), entry.getValue());
results.getResults().add(d);
processResults.suspend();
}
processResults.stop();
log.info(count + " matching entries found in full scan query.");
} catch (TableNotFoundException e) {
log.error(this.getTableName() + "not found", e);
} finally {
if (bs != null) {
bs.close();
}
}
fullScanQuery.stop();
}
log.info("AbstractQueryLogic: " + queryString + " " + timeString(abstractQueryLogic.getTime()));
log.info(" 1) parse query " + timeString(parseQuery.getTime()));
log.info(" 2) query metadata " + timeString(queryMetadata.getTime()));
log.info(" 3) full scan query " + timeString(fullScanQuery.getTime()));
log.info(" 3) optimized query " + timeString(optimizedQuery.getTime()));
log.info(" 1) process results " + timeString(processResults.getTime()));
log.info(" 1) query global index " + timeString(queryGlobalIndex.getTime()));
log.info(hash + " Query completed.");