/*
* Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package voldemort.tools;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Queue;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import joptsimple.OptionException;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import org.apache.commons.codec.DecoderException;
import org.apache.log4j.Logger;
import voldemort.client.ClientConfig;
import voldemort.client.protocol.admin.AdminClient;
import voldemort.client.protocol.admin.AdminClientConfig;
import voldemort.cluster.Cluster;
import voldemort.routing.BaseStoreRoutingPlan;
import voldemort.store.StoreDefinition;
import voldemort.utils.ByteUtils;
import voldemort.utils.Utils;
import voldemort.utils.ByteArray;
import voldemort.versioning.Version;
import voldemort.versioning.Versioned;
/**
* The KeyVersionFetcherCLI is a rudimentary tool that outputs a sampling of
* existing keys from a cluster. For each store in the cluster, a distinct file
* of keys to sample is expected. And, for each of these, a distinct file of
* key-versions is generated.
*
*/
public class KeyVersionFetcherCLI {
private static Logger logger = Logger.getLogger(KeyVersionFetcherCLI.class);
private final static int DEFAULT_KEY_PARALLELISM = 4;
private final static int DEFAULT_PROGRESS_PERIOD_OPS = 1000;
private final static int DEFAULT_OUTPUT_BATCH_SIZE = 100;
private final AdminClient adminClient;
private final Cluster cluster;
private final List<StoreDefinition> storeDefinitions;
private final Set<String> storeNamesSet;
private final String inDir;
private final String outDir;
private final ExecutorService kvFetcherService;
private final int progressPeriodOps;
private final int outputBatchSize;
private final boolean details;
private final long startTimeMs;
private static AtomicInteger fetches = new AtomicInteger(0);
public KeyVersionFetcherCLI(String url,
String inDir,
String outDir,
List<String> storeNames,
int keyParallelism,
int progressPeriodOps,
int outputBatchSize,
boolean details) {
if(logger.isInfoEnabled()) {
logger.info("Connecting to bootstrap server: " + url);
}
Properties clientProps = new Properties();
clientProps.put("connection_timeout_ms", "2500");
clientProps.put("max_connections", Integer.toString(keyParallelism));
clientProps.put("routing_timeout_ms", "10000");
clientProps.put("socket_timeout_ms", "10000");
clientProps.put("failuredetector_threshold", "10");
this.adminClient = new AdminClient(url,
new AdminClientConfig(),
new ClientConfig(clientProps));
this.cluster = adminClient.getAdminClientCluster();
this.storeDefinitions = adminClient.metadataMgmtOps.getRemoteStoreDefList(cluster.getNodeIds()
.iterator()
.next())
.getValue();
this.storeNamesSet = new HashSet<String>();
for(StoreDefinition storeDefinition: storeDefinitions) {
String storeName = storeDefinition.getName();
if(storeNames != null) {
if(!storeNames.contains(storeName)) {
logger.debug("Will not sample store "
+ storeName
+ " since it is not in list of storeNames provided on command line.");
continue;
}
}
this.storeNamesSet.add(storeName);
}
if(storeNames != null) {
List<String> badStoreNames = new LinkedList<String>();
for(String storeName: storeNames) {
if(!this.storeNamesSet.contains(storeName)) {
badStoreNames.add(storeName);
}
}
if(badStoreNames.size() > 0) {
Utils.croak("Some storeNames provided on the command line were not found on this cluster: "
+ badStoreNames);
}
}
this.inDir = inDir;
this.outDir = outDir;
this.kvFetcherService = Executors.newFixedThreadPool(keyParallelism);
this.progressPeriodOps = progressPeriodOps;
this.outputBatchSize = outputBatchSize;
this.details = details;
this.startTimeMs = System.currentTimeMillis();
}
public boolean sampleStores() {
for(StoreDefinition storeDefinition: storeDefinitions) {
if(storeNamesSet.contains(storeDefinition.getName())) {
if(!sampleStore(storeDefinition)) {
logger.info("Problem sampling store " + storeDefinition.getName()
+ ".. Bailing..");
return false;
}
}
}
return true;
}
public void updateFetchProgress(String storeName) {
int curFetches = fetches.incrementAndGet();
if(0 == curFetches % progressPeriodOps) {
if(logger.isInfoEnabled()) {
long durationS = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()
- startTimeMs);
logger.info("Fetched " + curFetches + " in " + durationS + " seconds for store "
+ storeName);
}
}
}
public class ZoneToNaryToString {
Map<Integer, Map<Integer, Set<String>>> zoneToNaryToString;
ZoneToNaryToString() {
zoneToNaryToString = new HashMap<Integer, Map<Integer, Set<String>>>();
}
public void addZoneNaryString(int zoneId, int zoneNAry, String string) {
if(!zoneToNaryToString.containsKey(zoneId)) {
zoneToNaryToString.put(zoneId, new HashMap<Integer, Set<String>>());
}
if(!zoneToNaryToString.get(zoneId).containsKey(zoneNAry)) {
zoneToNaryToString.get(zoneId).put(zoneNAry, new TreeSet<String>());
}
zoneToNaryToString.get(zoneId).get(zoneNAry).add(string);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Set<Integer> sortedZoneIds = new TreeSet<Integer>(zoneToNaryToString.keySet());
for(int zoneId: sortedZoneIds) {
Set<Integer> sortedZoneNAries = new TreeSet<Integer>(zoneToNaryToString.get(zoneId)
.keySet());
for(int zoneNary: sortedZoneNAries) {
for(String string: zoneToNaryToString.get(zoneId).get(zoneNary)) {
sb.append(zoneId)
.append(" : ")
.append(zoneNary)
.append(" : ")
.append(string)
.append("\n");
}
}
}
return sb.toString();
}
}
public class FetchKeyVersionsTask implements Callable<String> {
private final BaseStoreRoutingPlan storeRoutingPlan;
private final byte[] key;
FetchKeyVersionsTask(BaseStoreRoutingPlan storeRoutingPlan, byte[] key) {
this.storeRoutingPlan = storeRoutingPlan;
this.key = key;
}
@Override
public String call() throws Exception {
String storeName = storeRoutingPlan.getStoreDefinition().getName();
List<Integer> replicatingNodeIds = storeRoutingPlan.getReplicationNodeList(key);
int masterPartitionId = storeRoutingPlan.getMasterPartitionId(key);
ZoneToNaryToString zoneToNaryToString = new ZoneToNaryToString();
for(int replicatingNodeId: replicatingNodeIds) {
// TODO Not sure why we can't do getVersions(..) here. Seems
// wasteful to fetch the value all the way over from the server
// and discard it here
List<Versioned<byte[]>> values = adminClient.storeOps.getNodeKey(storeName,
replicatingNodeId,
new ByteArray(key));
int zoneId = storeRoutingPlan.getCluster()
.getNodeById(replicatingNodeId)
.getZoneId();
int zoneNAry = storeRoutingPlan.getZoneNAry(zoneId, replicatingNodeId, key);
// Sort the versions so that on-disk order of concurrent
// versions is not visible.
// FIXME this will break since VectorClock is not a
// 'Comparable'.
/*
* TreeSet<Version> sortedVersions = new TreeSet<Version>();
* for(Versioned<byte[]> value: values) {
* sortedVersions.add(value.getVersion()); }
*/
StringBuilder sb = new StringBuilder();
sb.append(ByteUtils.toHexString(key));
for(Versioned<byte[]> value: values) {
// TODO : This needs to be fixed for RO stores
Version version = value.getVersion();
sb.append(" : ").append(version.toString());
}
if(details) {
sb.append(" : ")
.append("PartitionId:")
.append(masterPartitionId)
.append(" : ")
.append("NodeId:")
.append(replicatingNodeId)
.append(" : ")
.append("host:")
.append(storeRoutingPlan.getCluster()
.getNodeById(replicatingNodeId)
.getHost());
}
zoneToNaryToString.addZoneNaryString(zoneId, zoneNAry, sb.toString());
}
updateFetchProgress(storeName);
return zoneToNaryToString.toString();
}
}
public boolean sampleStore(StoreDefinition storeDefinition) {
String storeName = storeDefinition.getName();
String keysFileName = inDir + System.getProperty("file.separator") + storeName + ".keys";
File keysFile = new File(keysFileName);
if(!keysFile.exists()) {
logger.error("Keys file " + keysFileName + " does not exist!");
return false;
}
String kvFileName = outDir + System.getProperty("file.separator") + storeName + ".kvs";
File kvFile = new File(kvFileName);
if(kvFile.exists()) {
logger.info("Key-Version file " + kvFileName
+ " exists, so will not sample keys from file " + keysFileName + ".");
return true;
}
BaseStoreRoutingPlan storeRoutingPlan = new BaseStoreRoutingPlan(cluster, storeDefinition);
BufferedReader keyReader = null;
BufferedWriter kvWriter = null;
try {
keyReader = new BufferedReader(new FileReader(keysFileName));
kvWriter = new BufferedWriter(new FileWriter(kvFileName));
boolean readAllKeys = false;
while(!readAllKeys) {
Queue<Future<String>> futureKVs = new LinkedList<Future<String>>();
for(int numFetchTasks = 0; numFetchTasks < this.outputBatchSize; numFetchTasks++) {
String keyLine = keyReader.readLine();
if(keyLine == null) {
readAllKeys = true;
break;
}
byte[] keyInBytes = ByteUtils.fromHexString(keyLine.trim());
FetchKeyVersionsTask kvFetcher = new FetchKeyVersionsTask(storeRoutingPlan,
keyInBytes);
Future<String> future = kvFetcherService.submit(kvFetcher);
futureKVs.add(future);
}
if(futureKVs.size() > 0) {
while(!futureKVs.isEmpty()) {
Future<String> future = futureKVs.poll();
String keyVersions = future.get();
kvWriter.append(keyVersions);
}
}
}
return true;
} catch(DecoderException de) {
logger.error("Could not decode key to sample for store " + storeName, de);
return false;
} catch(IOException ioe) {
logger.error("IOException caught while sampling store " + storeName, ioe);
return false;
} catch(InterruptedException ie) {
logger.error("InterruptedException caught while sampling store " + storeName, ie);
return false;
} catch(ExecutionException ee) {
logger.error("Encountered an execution exception while sampling " + storeName, ee);
ee.printStackTrace();
return false;
} finally {
if(keyReader != null) {
try {
keyReader.close();
} catch(IOException e) {
logger.error("IOException caught while trying to close keyReader for store "
+ storeName, e);
e.printStackTrace();
}
}
if(kvWriter != null) {
try {
kvWriter.close();
} catch(IOException e) {
logger.error("IOException caught while trying to close kvWriter for store "
+ storeName, e);
e.printStackTrace();
}
}
}
}
public void stop() {
if(adminClient != null) {
adminClient.close();
}
kvFetcherService.shutdown();
}
/**
* Return args parser
*
* @return program parser
* */
private static OptionParser getParser() {
OptionParser parser = new OptionParser();
parser.accepts("help", "print help information");
parser.accepts("url", "[REQUIRED] bootstrap URL")
.withRequiredArg()
.describedAs("bootstrap-url")
.ofType(String.class);
parser.accepts("in-dir",
"[REQUIRED] Directory in which to find the input key files (named \"{storeName}.kvs\", generated by KeyFetcherCLI.")
.withRequiredArg()
.describedAs("inputDirectory")
.ofType(String.class);
parser.accepts("out-dir",
"[REQUIRED] Directory in which to output the key files (named \"{storeName}.kvs\".")
.withRequiredArg()
.describedAs("outputDirectory")
.ofType(String.class);
parser.accepts("store-names",
"Store names to sample. Comma delimited list or singleton. [Default: ALL]")
.withRequiredArg()
.describedAs("storeNames")
.withValuesSeparatedBy(',')
.ofType(String.class);
parser.accepts("parallelism",
"Number of key-versions to sample in parallel. [Default: "
+ DEFAULT_KEY_PARALLELISM + " ]")
.withRequiredArg()
.describedAs("storeParallelism")
.ofType(Integer.class);
parser.accepts("progress-period-ops",
"Number of operations between progress info is displayed. [Default: "
+ DEFAULT_PROGRESS_PERIOD_OPS + " ]")
.withRequiredArg()
.describedAs("progressPeriodOps")
.ofType(Integer.class);
parser.accepts("output-batch-size",
"Number of keys fetched and written out in sorted order at once. [Default: "
+ DEFAULT_OUTPUT_BATCH_SIZE + " ]")
.withRequiredArg()
.describedAs("outputBatchSize")
.ofType(Integer.class);
parser.accepts("details",
"print details of each key-version: partition ID, node ID, & hostname");
return parser;
}
/**
* Print Usage to STDOUT
*/
private static void printUsage() {
StringBuilder help = new StringBuilder();
help.append("KeyFetcherCLI Tool\n");
help.append(" Find one key from each store-partition. Output keys per store.\n");
help.append("Options:\n");
help.append(" Required:\n");
help.append(" --url <bootstrap-url>\n");
help.append(" --in-dir <inputDirectory>\n");
help.append(" --out-dir <outputDirectory>\n");
help.append(" Optional:\n");
help.append(" --store-names <storeName>[,<storeName>...]\n");
help.append(" --parallelism <keyParallelism>\n");
help.append(" --progress-period-ops <progressPeriodOps>\n");
help.append(" --output-batch-size <operationsInOutputBatch>\n");
help.append(" --details\n");
help.append(" --help\n");
System.out.print(help.toString());
}
private static void printUsageAndDie(String errMessage) {
printUsage();
Utils.croak("\n" + errMessage);
}
// In the future, this tool could be expanded with the following options:
// - fetch value in addition to version
// - choose between printing human readable data (.toString()) or computer
// readable data (ByteUtils.toHexString(byte[])).
public static void main(String[] args) throws Exception {
OptionParser parser = null;
OptionSet options = null;
try {
parser = getParser();
options = parser.parse(args);
} catch(OptionException oe) {
parser.printHelpOn(System.out);
printUsageAndDie("Exception when parsing arguments : " + oe.getMessage());
return;
}
/* validate options */
if(options.has("help")) {
parser.printHelpOn(System.out);
printUsage();
return;
}
if(!options.hasArgument("url") || !options.hasArgument("in-dir")
|| !options.hasArgument("out-dir")) {
parser.printHelpOn(System.out);
printUsageAndDie("Missing a required argument.");
return;
}
String url = (String) options.valueOf("url");
String inDir = (String) options.valueOf("in-dir");
Utils.mkdirs(new File(inDir));
String outDir = (String) options.valueOf("out-dir");
Utils.mkdirs(new File(outDir));
List<String> storeNames = null;
if(options.hasArgument("store-names")) {
@SuppressWarnings("unchecked")
List<String> list = (List<String>) options.valuesOf("store-names");
storeNames = list;
}
Integer keyParallelism = DEFAULT_KEY_PARALLELISM;
if(options.hasArgument("parallelism")) {
keyParallelism = (Integer) options.valueOf("parallelism");
}
Integer progressPeriodOps = DEFAULT_PROGRESS_PERIOD_OPS;
if(options.hasArgument("progress-period-ops")) {
progressPeriodOps = (Integer) options.valueOf("progress-period-ops");
}
Integer outputBatchSize = DEFAULT_OUTPUT_BATCH_SIZE;
if(options.hasArgument("output-batch-size")) {
outputBatchSize = (Integer) options.valueOf("output-batch-size");
}
boolean details = options.has("details");
try {
KeyVersionFetcherCLI sampler = new KeyVersionFetcherCLI(url,
inDir,
outDir,
storeNames,
keyParallelism,
progressPeriodOps,
outputBatchSize,
details);
try {
if(!sampler.sampleStores()) {
logger.error("Key-versions were not successfully sampled from some stores.");
}
} finally {
sampler.stop();
}
} catch(Exception e) {
logger.error("Exception during key-version sampling: ", e);
}
}
}