this.init();
boolean targetShards = MongoConfigUtil.canReadSplitsFromShards(getConfiguration());
DB configDB = this.mongo.getDB("config");
DBCollection chunksCollection = configDB.getCollection("chunks");
MongoClientURI inputURI = MongoConfigUtil.getInputURI(getConfiguration());
String inputNS = inputURI.getDatabase() + "." + inputURI.getCollection();
DBCursor cur = chunksCollection.find(new BasicDBObject("ns", inputNS));
int numChunks = 0;
Map<String, String> shardsMap = null;
if (targetShards) {
try {
shardsMap = this.getShardsMap();
} catch (Exception e) {
//Something went wrong when trying to
//read the shards data from the config server,
//so abort the splitting
throw new SplitFailedException("Couldn't get shards information from config server", e);
}
}
List<String> mongosHostNames = MongoConfigUtil.getInputMongosHosts(this.getConfiguration());
if (targetShards && mongosHostNames.size() > 0) {
throw new SplitFailedException("Setting both mongo.input.split.read_from_shards and mongo.input.mongos_hosts"
+ " does not make sense. ");
}
if (mongosHostNames.size() > 0) {
LOG.info("Using multiple mongos instances (round robin) for reading input.");
}
Map<String, LinkedList<InputSplit>> shardToSplits = new HashMap<String, LinkedList<InputSplit>>();
while (cur.hasNext()) {
final BasicDBObject row = (BasicDBObject) cur.next();
BasicDBObject chunkLowerBound = (BasicDBObject) row.get("min");
BasicDBObject chunkUpperBound = (BasicDBObject) row.get("max");
MongoInputSplit chunkSplit = createSplitFromBounds(chunkLowerBound, chunkUpperBound);
chunkSplit.setInputURI(inputURI);
String shard = (String) row.get("shard");
if (targetShards) {
//The job is configured to target shards, so replace the
//mongos hostname with the host of the shard's servers
String shardHosts = shardsMap.get(shard);
if (shardHosts == null) {
throw new SplitFailedException("Couldn't find shard ID: " + shard + " in config.shards.");
}
MongoClientURI newURI = rewriteURI(inputURI, shardHosts);
chunkSplit.setInputURI(newURI);
} else if (mongosHostNames.size() > 0) {
//Multiple mongos hosts are specified, so
//choose a host name in round-robin fashion
//and rewrite the URI using that hostname.
//This evenly distributes the load to avoid
//pegging a single mongos instance.
String roundRobinHost = mongosHostNames.get(numChunks % mongosHostNames.size());
MongoClientURI newURI = rewriteURI(inputURI, roundRobinHost);
chunkSplit.setInputURI(newURI);
}
LinkedList<InputSplit> shardList = shardToSplits.get(shard);
if (shardList == null) {
shardList = new LinkedList<InputSplit>();