KeyExtent last = new KeyExtent();
String directory = null;
Set<String> knownFiles = new HashSet<String>();
int count = 0;
final MultiTableBatchWriter writer = opts.getConnector().createMultiTableBatchWriter(bwOpts.getBatchWriterConfig());
// collect the list of known files and the directory for each extent
for (Entry<Key,Value> entry : scanner) {
Key key = entry.getKey();
KeyExtent ke = new KeyExtent(key.getRow(), (Text) null);
// when the key extent changes
if (!ke.equals(last)) {
if (directory != null) {
// add any files in the directory unknown to the key extent
count += addUnknownFiles(fs, directory, knownFiles, last, writer, opts.update);
}
directory = null;
knownFiles.clear();
last = ke;
}
if (Constants.METADATA_DIRECTORY_COLUMN.hasColumns(key)) {
directory = entry.getValue().toString();
log.debug("Found directory " + directory + " for row " + key.getRow().toString());
} else if (key.compareColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY) == 0) {
String filename = key.getColumnQualifier().toString();
knownFiles.add(filename);
log.debug("METADATA file found: " + filename);
}
}
if (directory != null) {
// catch the last key extent
count += addUnknownFiles(fs, directory, knownFiles, last, writer, opts.update);
}
log.info("There were " + count + " files that are unknown to the metadata table");
writer.close();
}