/*
* JBoss, Home of Professional Open Source
* Copyright 2012 Red Hat Inc. and/or its affiliates and other contributors
* as indicated by the @authors tag. All rights reserved.
*/
package org.jboss.elasticsearch.tools.content;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.SettingsException;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
/**
* Content preprocessor which allows to Look up value over Elasticsearch search request containing some value from data
* structure. This preprocessor requires Elasticsearch client to be passed into
* {@link #init(String, org.elasticsearch.client.Client, Map)}! Example of configuration for this preprocessor for
* single value lookup:
*
* <pre>
* {
* "name" : "EL Lookup transformer",
* "class" : "org.jboss.elasticsearch.tools.content.ESLookupValuePreprocessor",
* "settings" : {
* "index_name" : "projects",
* "index_type" : "project",
* "source_field" : "jira_project_code",
* "idx_search_field" : "jira_project_code",
* "result_mapping" : [
* {"idx_result_field":"universal_code", "target_field":"project_code", "value_default":"unknown"},
* {"idx_result_field":"name", "target_field":"project_name"}
* ]
* }
* }
* </pre>
*
* Options are:
* <ul>
* <li><code>index_name<code> - name of search index to lookup values in
* <li><code>index_type<code> - name of type in search index to lookup values in
* <li><code>source_field<code> - source field in input data to be used as 'lookup key'. Dot notation for nested values can be used here.
* <li><code>source_value<code> - value to be used as 'lookup key'. Can be used as alternative instead of <code>source_field<code>.
* You can use pattern for keys replacement with values from input data here. Keys are enclosed in curly braces, dot notation for deeper nesting may be used in keys.
* <li><code>idx_search_field<code> - field in search index document to be asked for 'lookup key' obtained from source field. Elasticsearch <code>text</code>
* filter is used against this field. Search is not performed if 'lookup key' is empty. This configuration can contain
* array of lookup field's names also. They are looked up in sequence then, if previous one do not provide any concrete
* value - so it is sort of fallback mechanism.
* <li><code>result_multiple_ignore</code> - defines what to do if lookup returns multiple value. If `false` (default
* value) then first result is used. If `true` then lookup result is ignored and default value is used if any. Warning
* message is produced for both cases.
* <li>
* <code>result_mapping<code> - array of mappings from lookup result to the data. Each mapping definition may contain these fields:
* <ul>
* <li><code>idx_result_field<code> - field in found search index document to be placed into target field. You can use `_source` value there to get whole content of document found in ES index.
* <li><code>target_field<code> - target field in data to store looked up value into. Can be same as input field. Dot
* notation can be used here for structure nesting.
* <li><code>value_default</code> - optional default value used if lookup do not provide value. If not set then target
* field is leaved empty for values not found in mapping. You can use pattern for keys replacement from other values in
* data in default value. Keys are enclosed in curly braces, dot notation for deeper nesting may be used in keys.
* Special key '<code>__original</code>' means that original value from source field will be used here. Example of
* default value with replacement keys ' <code>No mapping found for value {__original}.</code> '.
* </ul>
* <li><code>source_bases</code> - list of fields in source data which are used as bases for lookups evaluation. If
* defined then lookup is performed for each of this fields, <code>source_field</code>, <code>target_field</code> and
* keys in <code>value_default</code> and<code>source_value</code> are resolved relatively against this base. Base must
* provide object or list of objects. See example later. </ul>
*
*
* Example of configuration for this preprocessor for lookup of multiple values of same structure:
*
* <pre>
* {
* "name" : "EL Lookup transformer",
* "class" : "org.jboss.elasticsearch.tools.content.ESLookupValuePreprocessor",
* "settings" : {
* "index_name" : "peoples",
* "index_type" : "person",
* "source_field" : "username",
* "idx_search_field" : "user_name",
* "result_mapping" : [
* {"idx_result_field":"full_name", "target_field":"name"},
* {"idx_result_field":"email_company", "target_field":"email"},
* {"idx_result_field":"phone_company", "target_field":"phone"}
* ],
* "source_bases" : ["author", "editor", "comments.author"]
* }
* }
* </pre>
*
* So for example next input document:
*
* <pre>
* {
* "author": {
* "username" : "joe"
* },
* "editor": {
* "username" : "dan"
* },
* "comments" : [
* {
* "text": "Posted first version",
* "author": {
* "username" : "joe"
* }
* },{
* "text": "We need to work on it little more.",
* "author": {
* "username" : "dan"
* }
* }
* ]
* }
* </pre>
*
* will be transformed to the:
*
* <pre>
* {
* "author": {
* "username" : "joe",
* "name" : "Joe Gulf",
* "email" : "joe@edit.com",
* "phone" : "11223355"
* },
* "editor": {
* "username" : "dan"
* "name" : "Dan Doe",
* "email" : "dan@edit.com",
* "phone" : "11223356"
* },
* "comments" : [
* {
* "text": "Posted first version",
* "author": {
* "username" : "joe",
* "name" : "Joe Gulf",
* "email" : "joe@edit.com",
* "phone" : "11223355"
* }
* },{
* "text": "We need to work on it little more.",
* "author": {
* "username" : "dan",
* "name" : "Dan Doe",
* "email" : "dan@edit.com",
* "phone" : "11223356"
* }
* }
* ]
* }
*
* <pre>
* if there will be necessary documents in Elasticsearch index called <code>peoples</code> with type <code>people</code>
* with content like:
*
* <pre>
* {
* "user_name" : "joe",
* "full_name" : "Joe Gulf",
* "email_company" : "joe@edit.com",
* "phone_company" : "11223355"
* }
* </pre>
*
* and
*
* <pre>
* {
* "user_name" : "dan",
* "full_name" : "Dan Doe",
* "email_company" : "dan@edit.com",
* "phone_company" : "11223356"
* }
* </pre>
*
* @author Vlastimil Elias (velias at redhat dot com)
* @see StructuredContentPreprocessorFactory
*/
public class ESLookupValuePreprocessor extends
StructuredContentPreprocessorWithSourceBasesBase<ESLookupValuePreprocessor.LookupContenxt> {
protected static final String CFG_index_name = "index_name";
protected static final String CFG_index_type = "index_type";
protected static final String CFG_source_field = "source_field";
protected static final String CFG_source_value = "source_value";
protected static final String CFG_idx_search_field = "idx_search_field";
protected static final String CFG_result_mapping = "result_mapping";
protected static final String CFG_idx_result_field = "idx_result_field";
protected static final String CFG_ignore_multiple_results = "result_multiple_ignore";
protected static final String CFG_target_field = "target_field";
protected static final String CFG_value_default = "value_default";
protected String indexName;
protected String indexType;
protected String sourceField;
protected String sourceValuePattern;
protected List<String> idxSearchField;
protected List<Map<String, String>> resultMapping;
protected boolean ignoreMultipleResults = false;
@SuppressWarnings("unchecked")
@Override
public void init(Map<String, Object> settings) throws SettingsException {
super.init(settings);
if (client == null) {
throw new SettingsException("ElasticSearch client is required for preprocessor " + name);
}
indexName = XContentMapValues.nodeStringValue(settings.get(CFG_index_name), null);
validateConfigurationStringNotEmpty(indexName, CFG_index_name);
indexType = XContentMapValues.nodeStringValue(settings.get(CFG_index_type), null);
validateConfigurationStringNotEmpty(indexType, CFG_index_type);
sourceField = XContentMapValues.nodeStringValue(settings.get(CFG_source_field), null);
if (ValueUtils.isEmpty(sourceField)) {
sourceField = null;
sourceValuePattern = XContentMapValues.nodeStringValue(settings.get(CFG_source_value), null);
}
if (ValueUtils.isEmpty(sourceField) && ValueUtils.isEmpty(sourceValuePattern)) {
throw new SettingsException("At least one of 'settings/" + CFG_source_field + "' or 'settings/"
+ CFG_source_value + "' configuration value must be defined for '" + name + "' preprocessor");
}
resultMapping = (List<Map<String, String>>) settings.get(CFG_result_mapping);
validateResultMappingConfiguration(resultMapping, CFG_result_mapping);
idxSearchField = StructureUtils.getListOfStringValues(settings, CFG_idx_search_field);
validateConfigurationObjectNotEmpty(idxSearchField, CFG_idx_search_field);
ignoreMultipleResults = XContentMapValues.nodeBooleanValue(settings.get(CFG_ignore_multiple_results), false);
}
/**
* Validate result mapping configuration part.
*
* @param value to check
* @param configFieldName name of field in preprocessor settings structure. Used for error message.
* @throws SettingsException thrown if value is not valid
*/
protected void validateResultMappingConfiguration(List<Map<String, String>> value, String configFieldName)
throws SettingsException {
if (value == null || value.isEmpty()) {
throw new SettingsException("Missing or empty 'settings/" + configFieldName + "' configuration array for '"
+ name + "' preprocessor");
}
for (Map<String, String> mappingRecord : value) {
if (ValueUtils.isEmpty(mappingRecord.get(CFG_idx_result_field))) {
throw new SettingsException("Missing or empty 'settings/" + configFieldName + "/" + CFG_idx_result_field
+ "' configuration value for '" + name + "' preprocessor");
}
if (ValueUtils.isEmpty(mappingRecord.get(CFG_target_field))) {
throw new SettingsException("Missing or empty 'settings/" + configFieldName + "/" + CFG_target_field
+ "' configuration value for '" + name + "' preprocessor");
}
}
}
@Override
protected LookupContenxt createContext(Map<String, Object> data) {
return new LookupContenxt();
}
@SuppressWarnings("unchecked")
@Override
protected void processOneSourceValue(Map<String, Object> data, LookupContenxt context, String base,
PreprocessChainContext chainContext) {
Object sourceValue = null;
if (sourceField != null) {
sourceValue = XContentMapValues.extractValue(sourceField, data);
} else {
sourceValue = ValueUtils.processStringValuePatternReplacement(sourceValuePattern, data, null);
}
Map<String, Object> targetValues = null;
if (sourceValue instanceof Collection) {
if (context == null)
context = new LookupContenxt();
Collection<Object> sourceCollection = (Collection<Object>) sourceValue;
targetValues = new HashMap<String, Object>();
for (Object sourceObject : sourceCollection) {
Map<String, Object> v = lookupValue(sourceObject, data, context, chainContext);
if (v != null) {
for (String targetField : v.keySet()) {
Object vo = v.get(targetField);
if (vo != null) {
List<Object> colTarget = (List<Object>) targetValues.get(targetField);
if (colTarget == null) {
colTarget = new ArrayList<Object>();
targetValues.put(targetField, colTarget);
}
colTarget.add(vo);
}
}
}
}
} else {
targetValues = lookupValue(sourceValue, data, context, chainContext);
}
if (targetValues != null) {
for (String targetField : targetValues.keySet())
StructureUtils.putValueIntoMapOfMaps(data, targetField, targetValues.get(targetField));
}
}
private boolean esExceptionWarned = false;
/**
* Perform lookup for one value in ES with default handling.
*
* @param sourceValue to be looked up
* @param data used in default pattern evaluation
* @param context
* @param chainContext
* @return Map with looked up values (defaults handled already) and target_field names as keys
*/
protected Map<String, Object> lookupValue(Object sourceValue, Map<String, Object> data, LookupContenxt context,
PreprocessChainContext chainContext) {
Map<String, Object> value = new HashMap<String, Object>();
if (sourceValue != null) {
if (context != null && context.lookupCache.containsKey(sourceValue))
return context.lookupCache.get(sourceValue);
boolean found = false;
for (String idxSf : idxSearchField) {
try {
SearchRequestBuilder req = client.prepareSearch(indexName).setTypes(indexType)
.setQuery(QueryBuilders.matchAllQuery())
.setFilter(FilterBuilders.queryFilter(QueryBuilders.matchQuery(idxSf, sourceValue)));
for (Map<String, String> mappingRecord : resultMapping) {
String idx_resultField = mappingRecord.get(CFG_idx_result_field);
if (idx_resultField != null && !"_source".equals(idx_resultField)) {
req.addField(mappingRecord.get(CFG_idx_result_field));
}
}
SearchResponse resp = req.execute().actionGet();
if (resp.getHits().getTotalHits() > 0) {
if (resp.getHits().getTotalHits() > 1) {
String message = "More results found during lookup for value '" + sourceValue + "' using index field '"
+ idxSf;
if (ignoreMultipleResults)
message += "', so we ignore them.";
else
message += "', so first one is used.";
addDataWarning(chainContext, message);
logger.debug(message);
if (ignoreMultipleResults) {
continue;
}
}
SearchHit hit = resp.getHits().hits()[0];
for (Map<String, String> mappingRecord : resultMapping) {
String idx_resultField = mappingRecord.get(CFG_idx_result_field);
Object v = null;
SearchHitField shf = null;
if ("_source".equals(idx_resultField)) {
v = hit.getSource();
} else {
shf = hit.field(idx_resultField);
if (shf != null) {
v = shf.getValue();
}
}
if (shf != null || v != null) {
if (v == null && mappingRecord.get(CFG_value_default) != null) {
v = ValueUtils.processStringValuePatternReplacement(mappingRecord.get(CFG_value_default), data,
sourceValue);
}
value.put(mappingRecord.get(CFG_target_field), v);
} else {
String message = "Result found during lookup for value '" + sourceValue + "' using index field '"
+ idxSf + ", but result field '" + mappingRecord.get(CFG_idx_result_field)
+ "' is not present there";
addDataWarning(chainContext, message);
logger.debug(message);
}
}
found = true;
} else {
addDataWarning(chainContext, "No result found during lookup for value '" + sourceValue + "'.");
}
esExceptionWarned = false;
} catch (ElasticSearchException e) {
if (!esExceptionWarned) {
esExceptionWarned = true;
String message = "Lookup failed due '" + e.getClass().getName() + ":" + e.getMessage()
+ "', so default value handling is used.";
addDataWarning(chainContext, message);
logger.warn(message);
}
}
}
if (!found) {
processDefaultValues(sourceValue, data, value, chainContext);
}
}
if (context != null && sourceValue != null)
context.lookupCache.put(sourceValue, value);
return value;
}
private void processDefaultValues(Object sourceValue, Map<String, Object> data, Map<String, Object> value,
PreprocessChainContext chainContext) {
for (Map<String, String> mappingRecord : resultMapping) {
if (mappingRecord.get(CFG_value_default) != null) {
Object v = ValueUtils.processStringValuePatternReplacement(mappingRecord.get(CFG_value_default), data,
sourceValue);
value.put(mappingRecord.get(CFG_target_field), v);
} else {
value.put(mappingRecord.get(CFG_target_field), null);
}
}
}
protected class LookupContenxt {
Map<Object, Map<String, Object>> lookupCache = new HashMap<Object, Map<String, Object>>();
}
public List<String> getSourceBases() {
return sourceBases;
}
public String getIndexName() {
return indexName;
}
public String getIndexType() {
return indexType;
}
public String getSourceField() {
return sourceField;
}
public String getSourceValuePattern() {
return sourceValuePattern;
}
public List<String> getIdxSearchField() {
return idxSearchField;
}
public List<Map<String, String>> getResultMapping() {
return resultMapping;
}
}