* Copyright 2013 NGDATA nv
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package com.ngdata.hbaseindexer.cli;
import static com.ngdata.hbaseindexer.model.api.IndexerDefinition.BatchIndexingState;
import static com.ngdata.hbaseindexer.model.api.IndexerDefinition.IncrementalIndexingState;
import static com.ngdata.hbaseindexer.model.api.IndexerDefinition.LifecycleState;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import com.ngdata.hbaseindexer.SolrConnectionParams;
import com.ngdata.hbaseindexer.conf.DefaultIndexerComponentFactory;
import com.ngdata.hbaseindexer.conf.IndexerComponentFactoryUtil;
import com.ngdata.hbaseindexer.conf.IndexerConfException;
import com.ngdata.hbaseindexer.model.api.IndexerDefinition;
import com.ngdata.hbaseindexer.model.api.IndexerDefinitionBuilder;
import com.ngdata.hbaseindexer.util.IndexerNameValidator;
import com.ngdata.hbaseindexer.util.solr.SolrConnectionParamUtil;
import joptsimple.ArgumentAcceptingOptionSpec;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import joptsimple.ValueConversionException;
import joptsimple.ValueConverter;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hbase.util.Pair;
* Base class for the {@link AddIndexerCli} and {@link UpdateIndexerCli}.
public abstract class AddOrUpdateIndexerCli extends BaseIndexCli {
protected OptionSpec<String> nameOption;
protected ArgumentAcceptingOptionSpec<String> indexerConfOption;
protected ArgumentAcceptingOptionSpec<String> indexerComponentFactoryOption;
protected OptionSpec<Pair<String, String>> connectionParamOption;
protected OptionSpec<IndexerDefinition.LifecycleState> lifecycleStateOption;
protected OptionSpec<IndexerDefinition.IncrementalIndexingState> incrementalIdxStateOption;
protected OptionSpec<IndexerDefinition.BatchIndexingState> batchIdxStateOption;
protected OptionSpec<String> defaultBatchIndexCliArgumentsOption;
protected OptionSpec<String> batchIndexCliArgumentsOption;
protected OptionParser setupOptionParser() {
OptionParser parser = super.setupOptionParser();
nameOption = parser
.acceptsAll(Lists.newArrayList("n", "name"), "a name for the index")
indexerComponentFactoryOption = parser
.acceptsAll(Lists.newArrayList("r", "indexer-component-factory"), "Indexer component factory class")
indexerConfOption = parser
.acceptsAll(Lists.newArrayList("c", "indexer-conf"), "Indexer configuration")
connectionParamOption = parser
.acceptsAll(Lists.newArrayList("cp", "connection-param"),
"A connection parameter in the form key=value. This option can be specified multiple"
+ " times. Example: -cp solr.zk=host1,host2 -cp solr.collection=products. In case"
+ " of update, use an empty value to remove a key: -cp solr.collection=")
.withValuesConvertedBy(new StringPairConverter())
lifecycleStateOption = parser
.acceptsAll(Lists.newArrayList("lifecycle"), "Lifecycle state, one of "
+ LifecycleState.ACTIVE + ", " + LifecycleState.DELETE_REQUESTED)
.withValuesConvertedBy(new EnumConverter<LifecycleState>(LifecycleState.class))
incrementalIdxStateOption = parser
.acceptsAll(Lists.newArrayList("incremental"), "Incremental indexing state, one of "
+ IncrementalIndexingState.SUBSCRIBE_AND_CONSUME
+ ", " + IncrementalIndexingState.SUBSCRIBE_DO_NOT_CONSUME
+ ", " + IncrementalIndexingState.DO_NOT_SUBSCRIBE)
.withValuesConvertedBy(new EnumConverter<IncrementalIndexingState>(IncrementalIndexingState.class))
batchIdxStateOption = parser
.acceptsAll(Lists.newArrayList("batch"), "Batch indexing state, can only be set to "
+ BatchIndexingState.BUILD_REQUESTED + (". This will trigger a batch rebuild of the index in "
+ "\"direct write\" mode (scanning over all records and sending the results to a live solr cluster)."))
.withValuesConvertedBy(new EnumConverter<BatchIndexingState>(BatchIndexingState.class))
defaultBatchIndexCliArgumentsOption = parser
.acceptsAll(Lists.newArrayList("dbc", "default-batch-cli-arguments"),
"Default batch indexing cli arguments for this indexer. On update, use this option without"
+ " filename argument to clear the setting. Note that not all options of the map reduce"
+ " batch index job make sense in this context, because it only supports direct write to"
+ " a running solr cluster (i.e. --reducers 0)")
batchIndexCliArgumentsOption = parser
.acceptsAll(Lists.newArrayList("bc", "batch-cli-arguments"),
"Batch indexing cli arguments to use for the next batch index build triggered, this overrides"
+ " the default batch index cli arguments (if any). On update, use this option without"
+ " filename argument to clear the setting. Note that not all options of the map reduce"
+ " batch index job make sense in this context, because it only supports direct write to"
+ " a running solr cluster (i.e. --reducers 0)")
return parser;
* Builds an {@link IndexerDefinition} based on the CLI options provided, an optionally starting from
* an initial state.
protected IndexerDefinitionBuilder buildIndexerDefinition(OptionSet options, IndexerDefinition oldIndexerDef)
throws IOException {
IndexerDefinitionBuilder builder = new IndexerDefinitionBuilder();
if (oldIndexerDef != null)
// name option is always required, so don't need to check for nulliness
String indexerName = nameOption.value(options);
LifecycleState lifecycleState = lifecycleStateOption.value(options);
if (lifecycleState != null)
IncrementalIndexingState incrementalIdxState = incrementalIdxStateOption.value(options);
if (incrementalIdxState != null)
BatchIndexingState batchIdxState = batchIdxStateOption.value(options);
if (batchIdxState != null)
// connection type is a hardcoded setting
Map<String, String> connectionParams = getConnectionParams(options,
oldIndexerDef != null ? oldIndexerDef.getConnectionParams() : null);
if (connectionParams != null)
if (oldIndexerDef == null || oldIndexerDef.getIndexerComponentFactory() == null)
byte[] indexerConf = getIndexerConf(options, indexerComponentFactoryOption, indexerConfOption, connectionParams);
if (indexerConf != null)
String[] defaultBatchIndexCliArguments = getBatchIndexingCliArguments(options, defaultBatchIndexCliArgumentsOption);
if (defaultBatchIndexCliArguments != null) {
if (defaultBatchIndexCliArguments.length == 0) {
} else {
String[] batchIndexCliArguments = getBatchIndexingCliArguments(options, batchIndexCliArgumentsOption);
if (batchIndexCliArguments != null) {
if (batchIndexCliArguments.length == 0) {
} else {
return builder;
protected byte[] getIndexerConf(OptionSet options, OptionSpec<String> readerOption, OptionSpec<String> configOption,
Map<String, String> connectionParams)
throws IOException {
String componentFactory = readerOption.value(options);
String fileName = configOption.value(options);
byte[] data = null;
if (fileName != null) {
File file = new File(fileName);
if (!file.exists()) {
StringBuilder msg = new StringBuilder();
msg.append("Specified indexer configuration file not found:\n");
throw new CliException(msg.toString());
data = ByteStreams.toByteArray(Files.newInputStreamSupplier(file).getInput());
try {
.getComponentFactory(componentFactory, new ByteArrayInputStream(data), connectionParams);
} catch (IndexerConfException e) {
StringBuilder msg = new StringBuilder();
msg.append("Failed to parse configuration ").append(fileName).append('\n');
addExceptionMessages(e, msg);
throw new CliException(msg.toString());
return data;
private void addExceptionMessages(Throwable throwable, StringBuilder builder) {
Throwable cause = throwable;
while (cause != null) {
cause = cause.getCause();
private Map<String, String> getConnectionParams(OptionSet options, Map<String, String> oldParams) {
Map<String, String> connectionParams = Maps.newHashMap();
if (oldParams != null) {
connectionParams = Maps.newHashMap(oldParams);
String oldSolrMode = connectionParams.get(SolrConnectionParams.MODE);
if (oldSolrMode == null) {
oldSolrMode = "cloud";
List<String> explicit = Lists.newArrayList();
for (Pair<String, String> param : connectionParamOption.values(options)) {
// An empty value indicates a request to remove the key
if (param.getSecond().length() == 0) {
} else {
if (!isValidConnectionParam(param.getFirst())) {
System.err.println("WARNING: the following is not a recognized Solr connection parameter: "
+ param.getFirst());
connectionParams.put(param.getFirst(), param.getSecond());
String newSolrMode = connectionParams.get(SolrConnectionParams.MODE);
if (newSolrMode == null) {
newSolrMode = "cloud";
if (oldSolrMode.equals("cloud") && newSolrMode.equals("classic")) {
// Switch from cloud to classic -- remove any cloud specific parameters
removeUnlessExplicit(explicit, connectionParams, SolrConnectionParams.COLLECTION);
removeUnlessExplicit(explicit, connectionParams, SolrConnectionParams.ZOOKEEPER);
} else if (oldSolrMode.equals("classic") && newSolrMode.equals("cloud")) {
// Switch from classic to cloud -- remove any cloud specific parameters
removeUnlessExplicit(explicit, connectionParams, SolrConnectionParams.SHARDER_TYPE);
removeUnlessExplicit(explicit, connectionParams, SolrConnectionParams.MAX_CONNECTIONS);
removeUnlessExplicit(explicit, connectionParams, SolrConnectionParams.MAX_CONNECTIONS_PER_HOST);
// remove any solr.shard.* parameter that wasn't set explicitly
List<String> shardParams = Lists.newArrayList();
Pattern pattern = Pattern.compile(Pattern.quote(SolrConnectionParams.SOLR_SHARD_PREFIX) + "\\d+");
for (String param : connectionParams.keySet()) {
if (pattern.matcher(param).matches()) {
for (String shardParam : shardParams) {
removeUnlessExplicit(explicit, connectionParams, shardParam);
// if we detect a switch from classic to cloud,
// automatically clear solr zk param and solr collection param
// Validate that the minimum required connection params are present
if (!connectionParams.containsKey(SolrConnectionParams.MODE)
|| connectionParams.get(SolrConnectionParams.MODE).equals("cloud")) {
// handle cloud params
if (!connectionParams.containsKey(SolrConnectionParams.ZOOKEEPER)) {
String solrZk = getZkConnectionString() + "/solr";
System.err.println("WARNING: no -cp solr.zk specified, will use " + solrZk);
connectionParams.put("solr.zk", solrZk);
if (!connectionParams.containsKey(SolrConnectionParams.COLLECTION)) {
throw new CliException(
"ERROR: no -cp solr.collection=collectionName specified (this is required when solr.mode=cloud)");
// TODO: throw error if sharder type is specified or if shards are listed
} else if (connectionParams.get(SolrConnectionParams.MODE).equals("classic")) {
// handle classic params
// Check that there is at least one shard, and that the shards are valid
if (SolrConnectionParamUtil.getShards(connectionParams).size() == 0) {
throw new CliException("ERROR: You need at least one shard when using solr classic");
} else {
throw new CliException("ERROR: solr.mode should be 'cloud' or 'classic'. Invalid value: " +
return connectionParams;
* Removes a connection parameter unless it was set explicitly
* @param explicit List of parameters that were set explicitly
* @param connectionParams Current connectionParams
* @param param The parameter to remove
private void removeUnlessExplicit(List<String> explicit, Map<String, String> connectionParams, String param) {
if (!explicit.contains(param)) {
private boolean isValidConnectionParam(String param) {
List<String> fixed = Lists.newArrayList(
if (fixed.contains(param)) {
return true;
if (param.matches(Pattern.quote(SolrConnectionParams.SOLR_SHARD_PREFIX) + "\\d+")) {
return true;
return false;
* Returns a zero-length array in case the configuration should be removed.
protected String[] getBatchIndexingCliArguments(OptionSet options, OptionSpec<String> option) throws IOException {
String fileName = option.value(options);
if (fileName == null) {
return new String[0];
File file = new File(fileName);
if (!file.exists()) {
StringBuilder msg = new StringBuilder();
msg.append("Specified batch cli arguments configuration file not found:\n");
throw new CliException(msg.toString());
return Iterables.toArray(Splitter.on(" ").split(FileUtils.readFileToString(file)), String.class);
* Converter for jopt-simple that parses key=value pairs.
private static class StringPairConverter implements ValueConverter<Pair<String, String>> {
public Pair<String, String> convert(String input) {
int eqPos = input.indexOf('=');
if (eqPos == -1) {
throw new ValueConversionException("Parameter should be in the form key=value, which the " +
"following is not: '" + input + "'.");
String key = input.substring(0, eqPos).trim();
String value = input.substring(eqPos + 1).trim();
return Pair.newPair(key, value);
public Class<Pair<String, String>> valueType() {
Class<?> pairClass = Pair.class;
return (Class<Pair<String, String>>) pairClass;
public String valuePattern() {
return "key=value";
private static class EnumConverter<T extends Enum<T>> implements ValueConverter<T> {
Class<T> enumClass;
EnumConverter(Class<T> enumClass) {
this.enumClass = enumClass;
public T convert(String input) {
try {
return Enum.valueOf(enumClass, input.toUpperCase());
} catch (IllegalArgumentException e) {
throw new ValueConversionException("Unrecognized value for enum " + enumClass.getSimpleName()
+ ": '" + input + "'.");
public Class<T> valueType() {
return enumClass;
public String valuePattern() {
return null;