Package org.apache.tez.runtime.library.partitioner

Examples of org.apache.tez.runtime.library.partitioner.HashPartitioner


  }

  public void textTest(int numRegularRecords, int numPartitions, long availableMemory,
      int numLargeKeys, int numLargevalues, int numLargeKvPairs) throws IOException,
      InterruptedException {
    Partitioner partitioner = new HashPartitioner();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);
    Random random = new Random();

    Configuration conf = createConfiguration(outputContext, Text.class, Text.class, shouldCompress,
        -1, HashPartitioner.class);
    CompressionCodec codec = null;
    if (shouldCompress) {
      codec = new DefaultCodec();
      ((Configurable) codec).setConf(conf);
    }

    int numRecordsWritten = 0;

    Map<Integer, Multimap<String, String>> expectedValues = new HashMap<Integer, Multimap<String, String>>();
    for (int i = 0; i < numPartitions; i++) {
      expectedValues.put(i, LinkedListMultimap.<String, String> create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext,
        conf, numPartitions, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;

    BitSet partitionsWithData = new BitSet(numPartitions);
    Text keyText = new Text();
    Text valText = new Text();
    for (int i = 0; i < numRegularRecords; i++) {
      String key = createRandomString(Math.abs(random.nextInt(10)));
      String val = createRandomString(Math.abs(random.nextInt(20)));
      keyText.set(key);
      valText.set(val);
      int partition = partitioner.getPartition(keyText, valText, numPartitions);
      partitionsWithData.set(partition);
      expectedValues.get(partition).put(key, val);
      kvWriter.write(keyText, valText);
      numRecordsWritten++;
    }

    // Write Large key records
    for (int i = 0; i < numLargeKeys; i++) {
      String key = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
      String val = createRandomString(Math.abs(random.nextInt(20)));
      keyText.set(key);
      valText.set(val);
      int partition = partitioner.getPartition(keyText, valText, numPartitions);
      partitionsWithData.set(partition);
      expectedValues.get(partition).put(key, val);
      kvWriter.write(keyText, valText);
      numRecordsWritten++;
    }

    // Write Large val records
    for (int i = 0; i < numLargevalues; i++) {
      String key = createRandomString(Math.abs(random.nextInt(10)));
      String val = createRandomString(sizePerBuffer + Math.abs(random.nextInt(100)));
      keyText.set(key);
      valText.set(val);
      int partition = partitioner.getPartition(keyText, valText, numPartitions);
      partitionsWithData.set(partition);
      expectedValues.get(partition).put(key, val);
      kvWriter.write(keyText, valText);
      numRecordsWritten++;
    }

    // Write records where key + val are large (but both can fit in the buffer individually)
    for (int i = 0; i < numLargeKvPairs; i++) {
      String key = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
      String val = createRandomString(sizePerBuffer / 2 + Math.abs(random.nextInt(100)));
      keyText.set(key);
      valText.set(val);
      int partition = partitioner.getPartition(keyText, valText, numPartitions);
      partitionsWithData.set(partition);
      expectedValues.get(partition).put(key, val);
      kvWriter.write(keyText, valText);
      numRecordsWritten++;
    }

    List<Event> events = kvWriter.close();
    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    TezCounter outputLargeRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_LARGE_RECORDS);
    assertEquals(numLargeKeys + numLargevalues + numLargeKvPairs,
        outputLargeRecordsCounter.getValue());

    // Validate the event
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numPartitions, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto.parseFrom(
        ByteString.copyFrom(cdme
            .getUserPayload()));
    assertFalse(eventProto.hasData());
    BitSet emptyPartitionBits = null;
    if (partitionsWithData.cardinality() != numPartitions) {
      assertTrue(eventProto.hasEmptyPartitions());
      byte[] emptyPartitions = TezCommonUtils.decompressByteStringToByteArray(eventProto
          .getEmptyPartitions());
      emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
      assertEquals(numPartitions - partitionsWithData.cardinality(),
          emptyPartitionBits.cardinality());
    } else {
      assertFalse(eventProto.hasEmptyPartitions());
      emptyPartitionBits = new BitSet(numPartitions);
    }
    assertEquals(HOST_STRING, eventProto.getHost());
    assertEquals(SHUFFLE_PORT, eventProto.getPort());
    assertEquals(uniqueId, eventProto.getPathComponent());

    // Verify the data
    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = null;
    Path spillFilePath = null;
    try {
      outputFilePath = taskOutput.getOutputFile();
    } catch (DiskErrorException e) {
      if (numRecordsWritten > 0) {
        fail();
      } else {
        // Record checking not required.
        return;
      }
    }
    try {
      spillFilePath = taskOutput.getOutputIndexFile();
    } catch (DiskErrorException e) {
      if (numRecordsWritten > 0) {
        fail();
      } else {
        // Record checking not required.
        return;
      }
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    Text keyDeser = new Text();
    Text valDeser = new Text();
    for (int i = 0; i < numPartitions; i++) {
      if (emptyPartitionBits.get(i)) {
        continue;
      }
      TezIndexRecord indexRecord = spillRecord.getIndex(i);
      FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
      inStream.seek(indexRecord.getStartOffset());
      IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null,
          null, false, 0, -1);
      while (reader.nextRawKey(keyBuffer)) {
        reader.nextRawValue(valBuffer);
        keyDeser.readFields(keyBuffer);
        valDeser.readFields(valBuffer);
        int partition = partitioner.getPartition(keyDeser, valDeser, numPartitions);
        assertTrue(expectedValues.get(partition).remove(keyDeser.toString(), valDeser.toString()));
      }
      inStream.close();
    }
    for (int i = 0; i < numPartitions; i++) {
View Full Code Here

TOP

Related Classes of org.apache.tez.runtime.library.partitioner.HashPartitioner

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.