}
public void textTest(int numRegularRecords, int numPartitions, long availableMemory,
int numLargeKeys, int numLargevalues, int numLargeKvPairs) throws IOException,
InterruptedException {
Partitioner partitioner = new HashPartitioner();
ApplicationId appId = ApplicationId.newInstance(10000, 1);
TezCounters counters = new TezCounters();
String uniqueId = UUID.randomUUID().toString();
OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);
Random random = new Random();
Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress,
-1, HashPartitioner.class);
CompressionCodec codec = null;
if (shouldCompress) {
codec = new DefaultCodec();
((Configurable) codec).setConf(conf);
}
int numRecordsWritten = 0;
Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
for (int i = 0; i < numPartitions; i++) {
expectedValues.put(i, LinkedListMultimap.<String, String> create());
}
UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext,
conf, numPartitions, availableMemory);
int sizePerBuffer = kvWriter.sizePerBuffer;
BitSet partitionsWithData = new BitSet(numPartitions);
Text keyText = new Text();
Text valText = new Text();
for (int i = 0; i < numRegularRecords; i++) {
String key = createRandomString(Math.abs(random.nextInt(10)));
String val = createRandomString(Math.abs(random.nextInt(20)));
keyText.set(key);
valText.set(val);
int partition = partitioner.getPartition(keyText, valText, numPartitions);
partitionsWithData.set(partition);
expectedValues.get(partition).put(key, val);
kvWriter.write(keyText, valText);
numRecordsWritten++;
}
// Write Large key records
for (int i = 0; i < numLargeKeys; i++) {
String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
String val = createRandomString(Math.abs(random.nextInt(20)));
keyText.set(key);
valText.set(val);
int partition = partitioner.getPartition(keyText, valText, numPartitions);
partitionsWithData.set(partition);
expectedValues.get(partition).put(key, val);
kvWriter.write(keyText, valText);
numRecordsWritten++;
}
// Write Large val records
for (int i = 0; i < numLargevalues; i++) {
String key = createRandomString(Math.abs(random.nextInt(10)));
String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
keyText.set(key);
valText.set(val);
int partition = partitioner.getPartition(keyText, valText, numPartitions);
partitionsWithData.set(partition);
expectedValues.get(partition).put(key, val);
kvWriter.write(keyText, valText);
numRecordsWritten++;
}
// Write records where key + val are large (but both can fit in the buffer individually)
for (int i = 0; i < numLargeKvPairs; i++) {
String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
keyText.set(key);
valText.set(val);
int partition = partitioner.getPartition(keyText, valText, numPartitions);
partitionsWithData.set(partition);
expectedValues.get(partition).put(key, val);
kvWriter.write(keyText, valText);
numRecordsWritten++;
}
List<Event> events = kvWriter.close();
verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));
TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs,
outputLargeRecordsCounter.getValue());
// Validate the event
assertEquals(1, events.size());
assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
assertEquals(0, cdme.getSourceIndexStart());
assertEquals(numPartitions, cdme.getCount());
DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(
ByteString.copyFrom(cdme
.getUserPayload()));
assertFalse(eventProto.hasData());
BitSet emptyPartitionBits = null;
if (partitionsWithData.cardinality() != numPartitions) {
assertTrue(eventProto.hasEmptyPartitions());
byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto
.getEmptyPartitions());
emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
assertEquals(numPartitions - partitionsWithData.cardinality(),
emptyPartitionBits.cardinality());
} else {
assertFalse(eventProto.hasEmptyPartitions());
emptyPartitionBits = new BitSet(numPartitions);
}
assertEquals(HOST_STRING, eventProto.getHost());
assertEquals(SHUFFLE_PORT, eventProto.getPort());
assertEquals(uniqueId, eventProto.getPathComponent());
// Verify the data
// Verify the actual data
TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
Path outputFilePath = null;
Path spillFilePath = null;
try {
outputFilePath = taskOutput.getOutputFile();
} catch (DiskErrorException e) {
if (numRecordsWritten > 0) {
fail();
} else {
// Record checking not required.
return;
}
}
try {
spillFilePath = taskOutput.getOutputIndexFile();
} catch (DiskErrorException e) {
if (numRecordsWritten > 0) {
fail();
} else {
// Record checking not required.
return;
}
}
// Special case for 0 records.
TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
DataInputBuffer keyBuffer = new DataInputBuffer();
DataInputBuffer valBuffer = new DataInputBuffer();
Text keyDeser = new Text();
Text valDeser = new Text();
for (int i = 0; i < numPartitions; i++) {
if (emptyPartitionBits.get(i)) {
continue;
}
TezIndexRecord indexRecord = spillRecord.getIndex(i);
FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
inStream.seek(indexRecord.getStartOffset());
IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null,
null, false, 0, -1);
while (reader.nextRawKey(keyBuffer)) {
reader.nextRawValue(valBuffer);
keyDeser.readFields(keyBuffer);
valDeser.readFields(valBuffer);
int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
}
inStream.close();
}
for (int i = 0; i < numPartitions; i++) {