// This isn't good (who would ever want to use a regex to check string length?)
// However, short of writing an entire input format, this is the best we can do.
// It seems to improve performance by >50% with NSRL loaded in, so it's better
// than nothing.
scan.setFilter(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".{20,}")));
HBaseConfiguration.addHbaseResources(j.getConfiguration());
j.getConfiguration().set(TableInputFormat.INPUT_TABLE, "hash");
j.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));