/*
* Copyright 2013 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ngdata.hbaseindexer.mr;
import static com.ngdata.hbaseindexer.indexer.SolrServerFactory.createHttpSolrServers;
import static com.ngdata.hbaseindexer.indexer.SolrServerFactory.createSharder;
import static com.ngdata.hbaseindexer.util.solr.SolrConnectionParamUtil.getSolrMaxConnectionsPerRoute;
import static com.ngdata.hbaseindexer.util.solr.SolrConnectionParamUtil.getSolrMaxConnectionsTotal;
import static com.ngdata.hbaseindexer.util.solr.SolrConnectionParamUtil.getSolrMode;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;
import com.google.common.base.Charsets;
import com.google.common.collect.Maps;
import com.ngdata.hbaseindexer.conf.IndexerComponentFactory;
import com.ngdata.hbaseindexer.conf.IndexerComponentFactoryUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableSplit;
import org.apache.hadoop.io.Text;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.hadoop.SolrInputDocumentWritable;
import org.apache.solr.hadoop.SolrOutputFormat;
import org.apache.solr.hadoop.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.codahale.metrics.Counting;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.SharedMetricRegistries;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.ngdata.hbaseindexer.SolrConnectionParams;
import com.ngdata.hbaseindexer.conf.IndexerConf;
import com.ngdata.hbaseindexer.conf.IndexerConf.RowReadMode;
import com.ngdata.hbaseindexer.conf.IndexerConfBuilder;
import com.ngdata.hbaseindexer.indexer.DirectSolrClassicInputDocumentWriter;
import com.ngdata.hbaseindexer.indexer.DirectSolrInputDocumentWriter;
import com.ngdata.hbaseindexer.indexer.Indexer;
import com.ngdata.hbaseindexer.indexer.ResultToSolrMapperFactory;
import com.ngdata.hbaseindexer.indexer.ResultWrappingRowData;
import com.ngdata.hbaseindexer.indexer.RowData;
import com.ngdata.hbaseindexer.indexer.Sharder;
import com.ngdata.hbaseindexer.indexer.SharderException;
import com.ngdata.hbaseindexer.indexer.SolrInputDocumentWriter;
import com.ngdata.hbaseindexer.metrics.IndexerMetricsUtil;
import com.ngdata.hbaseindexer.morphline.MorphlineResultToSolrMapper;
import com.ngdata.hbaseindexer.parse.ResultToSolrMapper;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Counter;
import com.yammer.metrics.core.Meter;
import com.yammer.metrics.core.Metric;
import com.yammer.metrics.core.MetricName;
import com.yammer.metrics.core.Timer;
import org.apache.http.client.HttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.solr.client.solrj.SolrServer;
/**
* Mapper for converting HBase Result objects into index documents.
*/
public class HBaseIndexerMapper extends TableMapper<Text, SolrInputDocumentWritable> {
/**
* Configuration key for setting the name of the indexer.
*/
public static final String INDEX_NAME_CONF_KEY = "hbase.indexer.indexname";
/**
* Configuration key for setting the contents of the indexer config.
*/
public static final String INDEX_COMPONENT_FACTORY_KEY = "hbase.indexer.factory";
/** Configuration key for setting the contents of the indexer config. */
public static final String INDEX_CONFIGURATION_CONF_KEY = "hbase.indexer.configuration";
/**
* Configuration key for setting the free-form index connection parameters.
*/
public static final String INDEX_CONNECTION_PARAMS_CONF_KEY = "hbase.indexer.index.connectionparams";
/**
* Configuration key for setting the direct write flag.
*/
public static final String INDEX_DIRECT_WRITE_CONF_KEY = "hbase.indexer.directwrite";
/**
* Configuration key for setting the HBase table name.
*/
public static final String TABLE_NAME_CONF_KEY = "hbase.indexer.table.name";
private static final String CONF_KEYVALUE_SEPARATOR = "=";
private static final String CONF_VALUE_SEPARATOR = ";";
private static final Logger LOG = LoggerFactory.getLogger(HBaseIndexerMapper.class);
private Indexer indexer;
private SolrInputDocumentWriter solrDocWriter;
/**
* Add the given index connection parameters to a Configuration.
*
* @param conf the configuration in which to add the parameters
* @param connectionParams index connection parameters
*/
public static void configureIndexConnectionParams(Configuration conf, Map<String, String> connectionParams) {
String confValue = Joiner.on(CONF_VALUE_SEPARATOR).withKeyValueSeparator(CONF_KEYVALUE_SEPARATOR).join(
connectionParams);
conf.set(INDEX_CONNECTION_PARAMS_CONF_KEY, confValue);
}
/**
* Retrieve index connection parameters from a Configuration.
*
* @param conf configuration containing index connection parameters
* @return index connection parameters
*/
public static Map<String, String> getIndexConnectionParams(Configuration conf) {
String confValue = conf.get(INDEX_CONNECTION_PARAMS_CONF_KEY);
if (confValue == null) {
LOG.warn("No connection parameters found in configuration");
return ImmutableMap.of();
}
return Splitter.on(CONF_VALUE_SEPARATOR).withKeyValueSeparator(CONF_KEYVALUE_SEPARATOR).split(confValue);
}
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Utils.getLogConfigFile(context.getConfiguration());
if (LOG.isTraceEnabled()) {
LOG.trace("CWD is {}", new File(".").getCanonicalPath());
TreeMap map = new TreeMap();
for (Map.Entry<String, String> entry : context.getConfiguration()) {
map.put(entry.getKey(), entry.getValue());
}
LOG.trace("Mapper configuration:\n{}", Joiner.on("\n").join(map.entrySet()));
}
String indexName = context.getConfiguration().get(INDEX_NAME_CONF_KEY);
String indexerComponentFactory = context.getConfiguration().get(INDEX_COMPONENT_FACTORY_KEY);
String indexConfiguration = context.getConfiguration().get(INDEX_CONFIGURATION_CONF_KEY);
String tableName = context.getConfiguration().get(TABLE_NAME_CONF_KEY);
if (indexName == null) {
throw new IllegalStateException("No configuration value supplied for " + INDEX_NAME_CONF_KEY);
}
if (indexConfiguration == null) {
throw new IllegalStateException("No configuration value supplied for " + INDEX_CONFIGURATION_CONF_KEY);
}
if (tableName == null) {
throw new IllegalStateException("No configuration value supplied for " + TABLE_NAME_CONF_KEY);
}
Map<String, String> indexConnectionParams = getIndexConnectionParams(context.getConfiguration());
IndexerComponentFactory factory = IndexerComponentFactoryUtil.getComponentFactory(indexerComponentFactory, new ByteArrayInputStream(indexConfiguration.getBytes(Charsets.UTF_8)), indexConnectionParams);
IndexerConf indexerConf = factory.createIndexerConf();
String morphlineFile = context.getConfiguration().get(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM);
Map<String, String> params = indexerConf.getGlobalParams();
if (morphlineFile != null) {
params.put(MorphlineResultToSolrMapper.MORPHLINE_FILE_PARAM, morphlineFile);
}
String morphlineId = context.getConfiguration().get(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM);
if (morphlineId != null) {
params.put(MorphlineResultToSolrMapper.MORPHLINE_ID_PARAM, morphlineId);
}
for (Map.Entry<String, String> entry : context.getConfiguration()) {
if (entry.getKey().startsWith(MorphlineResultToSolrMapper.MORPHLINE_VARIABLE_PARAM + ".")) {
params.put(entry.getKey(), entry.getValue());
}
if (entry.getKey().startsWith(MorphlineResultToSolrMapper.MORPHLINE_FIELD_PARAM + ".")) {
params.put(entry.getKey(), entry.getValue());
}
}
ResultToSolrMapper mapper = factory.createMapper(indexName);
// TODO This would be better-placed in the top-level job setup -- however, there isn't currently any
// infrastructure to handle converting an in-memory model into XML (we can only interpret an
// XML doc into the internal model), so we need to do this here for now
if (indexerConf.getRowReadMode() != RowReadMode.NEVER) {
LOG.warn("Changing row read mode from " + indexerConf.getRowReadMode() + " to " + RowReadMode.NEVER);
indexerConf = new IndexerConfBuilder(indexerConf).rowReadMode(RowReadMode.NEVER).build();
}
indexerConf.setGlobalParams(params);
try {
indexer = createIndexer(indexName, context, indexerConf, tableName, mapper, indexConnectionParams);
} catch (SharderException e) {
throw new RuntimeException(e);
}
}
private Indexer createIndexer(String indexName, Context context, IndexerConf indexerConf, String tableName,
ResultToSolrMapper mapper, Map<String, String> indexConnectionParams)
throws IOException, SharderException {
Configuration conf = context.getConfiguration();
if (conf.getBoolean(INDEX_DIRECT_WRITE_CONF_KEY, false)) {
String solrMode = getSolrMode(indexConnectionParams);
if (solrMode.equals("cloud")) {
DirectSolrInputDocumentWriter writer = createCloudSolrWriter(context, indexConnectionParams);
solrDocWriter = wrapInBufferedWriter(context, writer);
return Indexer.createIndexer(indexName, indexerConf, tableName, mapper, null, null, solrDocWriter);
} else if (solrMode.equals("classic")) {
DirectSolrClassicInputDocumentWriter classicSolrWriter = createClassicSolrWriter(context, indexConnectionParams);
Sharder sharder = createSharder(indexConnectionParams, classicSolrWriter.getNumServers());
solrDocWriter = wrapInBufferedWriter(context, classicSolrWriter);
return Indexer.createIndexer(indexName, indexerConf, tableName, mapper, null, sharder, solrDocWriter);
} else {
throw new RuntimeException("Only 'cloud' and 'classic' are valid values for solr.mode, but got " + solrMode);
}
} else {
solrDocWriter = new MapReduceSolrInputDocumentWriter(context);
return Indexer.createIndexer(indexName, indexerConf, tableName, mapper, null, null, solrDocWriter);
}
}
private DirectSolrInputDocumentWriter createCloudSolrWriter(Context context, Map<String, String> indexConnectionParams)
throws IOException {
String indexZkHost = indexConnectionParams.get(SolrConnectionParams.ZOOKEEPER);
String collectionName = indexConnectionParams.get(SolrConnectionParams.COLLECTION);
if (indexZkHost == null) {
throw new IllegalStateException("No index ZK host defined");
}
if (collectionName == null) {
throw new IllegalStateException("No collection name defined");
}
CloudSolrServer solrServer = new CloudSolrServer(indexZkHost);
solrServer.setDefaultCollection(collectionName);
return new DirectSolrInputDocumentWriter(context.getConfiguration().get(INDEX_NAME_CONF_KEY), solrServer);
}
private DirectSolrClassicInputDocumentWriter createClassicSolrWriter(Context context,
Map<String, String> indexConnectionParams)
throws IOException {
PoolingClientConnectionManager connectionManager = new PoolingClientConnectionManager();
connectionManager.setDefaultMaxPerRoute(getSolrMaxConnectionsPerRoute(indexConnectionParams));
connectionManager.setMaxTotal(getSolrMaxConnectionsTotal(indexConnectionParams));
HttpClient httpClient = new DefaultHttpClient(connectionManager);
List<SolrServer> solrServers = createHttpSolrServers(indexConnectionParams, httpClient);
return new DirectSolrClassicInputDocumentWriter(
context.getConfiguration().get(INDEX_NAME_CONF_KEY), solrServers);
}
private SolrInputDocumentWriter wrapInBufferedWriter(Context context, SolrInputDocumentWriter writer)
throws MalformedURLException {
int bufferSize = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_BATCH_SIZE, 100);
return new BufferedSolrInputDocumentWriter(
writer,
bufferSize,
context.getCounter(HBaseIndexerCounters.OUTPUT_INDEX_DOCUMENTS),
context.getCounter(HBaseIndexerCounters.OUTPUT_INDEX_DOCUMENT_BATCHES));
}
@Override
protected void map(ImmutableBytesWritable key, Result result, Context context) throws IOException,
InterruptedException {
context.progress();
context.getCounter(HBaseIndexerCounters.INPUT_ROWS).increment(1L);
try {
TableSplit tableSplit;
if (context.getInputSplit() instanceof TableSplit) {
tableSplit = (TableSplit) context.getInputSplit();
indexer.indexRowData(ImmutableList.<RowData>of(new ResultWrappingRowData(result,
tableSplit.getTableName())));
} else {
throw new IOException("Input split not of type " + TableSplit.class + " but " +
context.getInputSplit().getClass());
}
} catch (SolrServerException e) {
// These will only be thrown through if there is an exception on the server side.
// Document-based errors will be swallowed and the counter will be incremented
throw new RuntimeException(e);
} catch (SharderException e) {
throw new RuntimeException(e);
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
try {
solrDocWriter.close();
} catch (SolrServerException e) {
throw new RuntimeException(e);
}
copyIndexingMetricsToCounters(context);
copyIndexingMetrics3ToCounters(context);
}
private void copyIndexingMetricsToCounters(Context context) {
final String COUNTER_GROUP = "HBase Indexer Metrics";
SortedMap<String, SortedMap<MetricName, Metric>> groupedMetrics = Metrics.defaultRegistry().groupedMetrics(
new IndexerMetricsUtil.IndexerMetricPredicate());
for (Entry<String, SortedMap<MetricName, Metric>> metricsGroupEntry : groupedMetrics.entrySet()) {
SortedMap<MetricName, Metric> metricsGroupMap = metricsGroupEntry.getValue();
for (Entry<MetricName, Metric> metricEntry : metricsGroupMap.entrySet()) {
MetricName metricName = metricEntry.getKey();
Metric metric = metricEntry.getValue();
String counterName = metricName.getType() + ": " + metricName.getName();
if (metric instanceof Counter) {
Counter counter = (Counter) metric;
context.getCounter(COUNTER_GROUP, counterName).increment(counter.count());
} else if (metric instanceof Meter) {
Meter meter = (Meter) metric;
context.getCounter(COUNTER_GROUP, counterName).increment(meter.count());
} else if (metric instanceof Timer) {
Timer timer = (Timer) metric;
context.getCounter(COUNTER_GROUP, counterName).increment((long) timer.sum());
}
}
}
}
private void copyIndexingMetrics3ToCounters(Context context) {
for (String name : SharedMetricRegistries.names()) {
MetricRegistry metricRegistry = SharedMetricRegistries.getOrCreate(name);
for (Map.Entry<String, com.codahale.metrics.Counter> entry : metricRegistry.getCounters().entrySet()) {
addCounting(context, entry.getKey(), entry.getValue(), 1);
}
for (Map.Entry<String, com.codahale.metrics.Histogram> entry : metricRegistry.getHistograms().entrySet()) {
addCounting(context, entry.getKey(), entry.getValue(), 1);
}
for (Map.Entry<String, com.codahale.metrics.Meter> entry : metricRegistry.getMeters().entrySet()) {
addCounting(context, entry.getKey(), entry.getValue(), 1);
}
for (Map.Entry<String, com.codahale.metrics.Timer> entry : metricRegistry.getTimers().entrySet()) {
long nanosPerMilliSec = 1000 * 1000;
addCounting(context, entry.getKey(), entry.getValue(), nanosPerMilliSec);
}
}
}
private void addCounting(Context context, String metricName, Counting value, long scale) {
final String COUNTER_GROUP = "HBase Indexer Metrics";
context.getCounter(COUNTER_GROUP, metricName).increment(value.getCount() / scale);
}
}