.get(VALUE_REGEX);
Set<Pair<Text,Text>> fetchedColumns = getFetchedColumns(conf);
Authorizations auths = getAuthorizations(conf);
byte[] password = getPassword(conf);
String username = getUsername(conf);
Instance instance = getInstance(conf);
List<AccumuloIterator> iterators = getIterators(conf);
List<AccumuloIteratorOption> options = getIteratorOptions(conf);
Level logLevel = getLogLevel(conf);
if (ranges.isEmpty()) {
ranges = new ArrayList<Range>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<String,Map<KeyExtent,List<Range>>>();
TabletLocator tl;
try {
if (isOfflineScan(InputFormatBase.getConfiguration(job))) {
binnedRanges = binOfflineTable(job, tableName, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
UtilWaitThread.sleep(100 + (int) (Math.random() * 100)); // sleep randomly between 100 and 200 ms
binnedRanges = binOfflineTable(job, tableName, ranges);
}
} else {
String tableId = null;
tl = getTabletLocator(InputFormatBase.getConfiguration(job));
// its possible that the cache could contain complete, but old information about a tables tablets... so clear it
tl.invalidateCache();
while (!tl.binRanges(ranges, binnedRanges).isEmpty()) {
if (!(instance instanceof MockInstance)) {
if (tableId == null)
tableId = Tables.getTableId(instance, tableName);
if (!Tables.exists(instance, tableId))
throw new TableDeletedException(tableId);
if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
throw new TableOfflineException(instance, tableId);
}
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
UtilWaitThread.sleep(100 + (int) (Math.random() * 100)); // sleep randomly between 100 and 200 ms
tl.invalidateCache();
}
}
} catch (Exception e) {
throw new IOException(e);
}
ArrayList<InputSplit> splits = new ArrayList<InputSplit>(ranges.size());
HashMap<Range,ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<Range,ArrayList<String>>();
HashMap<String,String> hostNameCache = new HashMap<String,String>();
for (Entry<String,Map<KeyExtent,List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getHostName();
hostNameCache.put(ip, location);
}
for (Entry<KeyExtent,List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the
// tablets
splits.add(new RangeInputSplit(ke.clip(r), new String[] {location}));
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<String>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
if (!autoAdjust)
for (Entry<Range,ArrayList<String>> entry : splitsToAdd.entrySet())
splits.add(new RangeInputSplit(entry.getKey(), entry.getValue().toArray(new String[0])));
for (InputSplit inputSplit : splits) {
RangeInputSplit split = (RangeInputSplit) inputSplit;
split.setTable(tableName);
split.setOffline(offline);
split.setIsolatedScan(isolated);
split.setUsesLocalIterators(localIterators);
split.setMockInstance(mockInstance);
split.setMaxVersions(maxVersions);
split.setRowRegex(rowRegex);
split.setColfamRegex(colfamRegex);
split.setColqualRegex(colqualRegex);
split.setValueRegex(valueRegex);
split.setFetchedColumns(fetchedColumns);
split.setUsername(username);
split.setPassword(password);
split.setInstanceName(instance.getInstanceName());
split.setZooKeepers(instance.getZooKeepers());
split.setAuths(auths);
split.setIterators(iterators);
split.setOptions(options);
split.setLogLevel(logLevel);
}