}
log.debug("fs " + fs + " files: " + Arrays.toString(paths) + " location: " + location);
Collection<String> goodPaths = cleanUpFiles(fs, files, location, true);
for (String path : goodPaths) {
String filename = new Path(path).getName();
DataFileValue dfv = new DataFileValue(0, 0);
datafiles.put(locText.toString() + "/" + filename, dfv);
}
} else {
SortedMap<Key,Value> datafilesMetadata;
Text rowName = extent.getMetadataEntry();
if (tabletsKeyValues != null && tabletsKeyValues.size() > 0) {
datafilesMetadata = new TreeMap<Key,Value>();
for (Entry<Key,Value> entry : tabletsKeyValues.entrySet()) {
if (entry.getKey().compareRow(rowName) == 0 && entry.getKey().compareColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY) == 0) {
datafilesMetadata.put(new Key(entry.getKey()), new Value(entry.getValue()));
}
}
} else {
ScannerImpl mdScanner = new ScannerImpl(HdfsZooInstance.getInstance(), SecurityConstants.getSystemCredentials(), Constants.METADATA_TABLE_ID,
Constants.NO_AUTHS);
// Commented out because when no data file is present, each tablet will scan through metadata table and return nothing
// reduced batch size to improve performance
// changed here after endKeys were implemented from 10 to 1000
mdScanner.setBatchSize(1000);
// leave these in, again, now using endKey for safety
mdScanner.fetchColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY);
mdScanner.setRange(new Range(rowName));
datafilesMetadata = new TreeMap<Key,Value>();
for (Entry<Key,Value> entry : mdScanner) {
if (entry.getKey().compareRow(rowName) != 0) {
break;
}
datafilesMetadata.put(new Key(entry.getKey()), new Value(entry.getValue()));
}
}
Iterator<Entry<Key,Value>> dfmdIter = datafilesMetadata.entrySet().iterator();
while (dfmdIter.hasNext()) {
Entry<Key,Value> entry = dfmdIter.next();
datafiles.put(entry.getKey().getColumnQualifier().toString(), new DataFileValue(entry.getValue().get()));
}
}
return datafiles;
}