@Override
public void run() throws Exception {
Preconditions.checkState(getInputs().size() == 0);
Preconditions.checkState(getOutputs().size() == 3);
KeyValueWriter streamOutputWriter = (KeyValueWriter) getOutputs().get(STREAM_OUTPUT_NAME)
.getWriter();
KeyValueWriter hashOutputWriter = (KeyValueWriter) getOutputs().get(HASH_OUTPUT_NAME)
.getWriter();
KeyValueWriter expectedOutputWriter = (KeyValueWriter) getOutputs().get(EXPECTED_OUTPUT_NAME)
.getWriter();
float fileSizeFraction = hashOutputFileSize / (float) streamOutputFileSize;
Preconditions.checkState(fileSizeFraction > 0.0f && fileSizeFraction <= 1.0f);
int mod = 1;
int extraKeysMod = 0;
if (fileSizeFraction > overlapApprox) {
// Common keys capped by overlap. Additional ones required in the hashFile.
mod = (int) (1 / overlapApprox);
extraKeysMod = (int) (1 / (fileSizeFraction - overlapApprox));
} else {
// All keys in hashFile must exist in stream file.
mod = (int) (1 / fileSizeFraction);
}
LOG.info("Using mod=" + mod + ", extraKeysMod=" + extraKeysMod);
long count = 0;
long sizeLarge = 0;
long sizeSmall = 0;
long numLargeFileKeys = 0;
long numSmallFileKeys = 0;
long numExpectedKeys = 0;
while (sizeLarge < streamOutputFileSize) {
String str = createOverlapString(13, count);
Text text = new Text(str);
int size = text.getLength();
streamOutputWriter.write(text, NullWritable.get());
sizeLarge += size;
numLargeFileKeys++;
if (count % mod == 0) {
hashOutputWriter.write(text, NullWritable.get());
sizeSmall += size;
numSmallFileKeys++;
expectedOutputWriter.write(text, NullWritable.get());
numExpectedKeys++;
}
if (extraKeysMod != 0 && count % extraKeysMod == 0) {
String nStr = createNonOverlaptring(13, count);
Text nText = new Text(nStr);