// Note: data written to the JobConf will be silently discarded
@Override
public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
Settings settings = SettingsManager.loadFrom(job);
InitializationUtils.discoverNodesIfNeeded(settings, log);
InitializationUtils.discoverEsVersion(settings, log);
String savedSettings = settings.save();
RestRepository client = new RestRepository(settings);
boolean indexExists = client.indexExists(true);
Map<Shard, Node> targetShards = null;
if (!indexExists) {
if (settings.getIndexReadMissingAsEmpty()) {
log.info(String.format("Index [%s] missing - treating it as empty", settings.getResourceRead()));
targetShards = Collections.emptyMap();
}
else {
client.close();
throw new EsHadoopIllegalArgumentException(
String.format("Index [%s] missing and settings [%s] is set to false", settings.getResourceRead(), ConfigurationOptions.ES_FIELD_READ_EMPTY_AS_NULL));
}
}
else {
targetShards = client.getReadTargetShards();
if (log.isTraceEnabled()) {
log.trace("Creating splits for shards " + targetShards);
}
}
Version.logVersion();
log.info(String.format("Reading from [%s]", settings.getResourceRead()));
String savedMapping = null;
if (!targetShards.isEmpty()) {
Field mapping = client.getMapping();
log.info(String.format("Discovered mapping {%s} for [%s]", mapping, settings.getResourceRead()));
// validate if possible
FieldPresenceValidation validation = settings.getFieldExistanceValidation();
if (validation.isRequired()) {
MappingUtils.validateMapping(settings.getScrollFields(), mapping, validation, log);
}
//TODO: implement this more efficiently
savedMapping = IOUtils.serializeToBase64(mapping);