Examples of org.apache.tez.runtime.library.api.KeyValueWriter

Package org.apache.tez.runtime.library.api

Examples of org.apache.tez.runtime.library.api.KeyValueWriter

org.apache.tez.runtime.library.api.KeyValueWriter
A key/value(s) pair based {@link Writer}

  /**
   * Get a key value write to write Map Reduce compatible output
   */
  @Override
  public KeyValueWriter getWriter() throws IOException {
    return new KeyValueWriter() {
      private final boolean useNewWriter = useNewApi;


      @SuppressWarnings("unchecked")
      @Override
      public void write(Object key, Object value) throws IOException {

View Full Code Here

      for (Entry<String, String> entry : incrementalConf) {
        jobConf.set(entry.getKey(), entry.getValue());
      }
    }


    KeyValueWriter kvWriter = null;
    if ((out instanceof MROutputLegacy)) {
      kvWriter = ((MROutputLegacy)out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)){
      kvWriter = ((OrderedPartitionedKVOutput)out).getWriter();
    } else {

View Full Code Here

      throw new IOException("Illegal input to reduce: " + in.getClass());
    }
    OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy)in;
    KeyValuesReader kvReader = shuffleInput.getReader();


    KeyValueWriter kvWriter = null;
    if((out instanceof MROutputLegacy)) {
      kvWriter = ((MROutputLegacy) out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)) {
      kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
    } else {

View Full Code Here

      // of casting the input/output. This allows the actual input/output type to be replaced
      // without affecting the semantic guarantees of the data type that are represented by
      // the reader and writer.
      // The inputs/outputs are referenced via the names assigned in the DAG.
      KeyValueReader kvReader = (KeyValueReader) getInputs().get(INPUT).getReader();
      KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(SUMMATION).getWriter();
      while (kvReader.next()) {
        StringTokenizer itr = new StringTokenizer(kvReader.getCurrentValue().toString());
        while (itr.hasMoreTokens()) {
          word.set(itr.nextToken());
          // Count 1 every time a word is observed. Word is the key a 1 is the value
          kvWriter.write(word, one);
        }
      }
    }

View Full Code Here


    @Override
    public void run() throws Exception {
      Preconditions.checkArgument(getInputs().size() == 1);
      Preconditions.checkArgument(getOutputs().size() == 1);
      KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(OUTPUT).getWriter();
      // The KeyValues reader provides all values for a given key. The aggregation of values per key
      // is done by the LogicalInput. Since the key is the word and the values are its counts in 
      // the different TokenProcessors, summing all values per key provides the sum for that word.
      KeyValuesReader kvReader = (KeyValuesReader) getInputs().get(TOKENIZER).getReader();
      while (kvReader.next()) {
        Text word = (Text) kvReader.getCurrentKey();
        int sum = 0;
        for (Object value : kvReader.getCurrentValues()) {
          sum += ((IntWritable) value).get();
        }
        kvWriter.write(word, new IntWritable(sum));
      }
      // deriving from SimpleMRProcessor takes care of committing the output
      // It automatically invokes the commit logic for the OutputFormat if necessary.
    }

View Full Code Here

      Reader rawReader = input.getReader();
      Preconditions.checkState(rawReader instanceof KeyValueReader);
      LogicalOutput output = getOutputs().values().iterator().next();


      KeyValueReader reader = (KeyValueReader) rawReader;
      KeyValueWriter writer = (KeyValueWriter) output.getWriter();


      while (reader.next()) {
        Object val = reader.getCurrentValue();
        // The data value itself is the join key. Simply write it out as the key.
        // The output value is null.
        writer.write(val, NullWritable.get());
      }
    }

View Full Code Here

      Reader rawHashReader = hashInput.getReader();
      Preconditions.checkState(rawStreamReader instanceof KeyValueReader);
      Preconditions.checkState(rawHashReader instanceof KeyValueReader);
      LogicalOutput lo = getOutputs().get(joinOutput);
      Preconditions.checkState(lo.getWriter() instanceof KeyValueWriter);
      KeyValueWriter writer = (KeyValueWriter) lo.getWriter();


      // create a hash table for the hash side
      KeyValueReader hashKvReader = (KeyValueReader) rawHashReader;
      Set<Text> keySet = new HashSet<Text>();
      while (hashKvReader.next()) {
        keySet.add(new Text((Text) hashKvReader.getCurrentKey()));
      }


      // read the stream side and join it using the hash table
      KeyValueReader streamKvReader = (KeyValueReader) rawStreamReader;
      while (streamKvReader.next()) {
        Text key = (Text) streamKvReader.getCurrentKey();
        if (keySet.contains(key)) {
          writer.write(key, NullWritable.get());
        }
      }
    }

View Full Code Here

      // the recommended approach is to cast the reader/writer to a specific type instead
      // of casting the input/output. This allows the actual input/output type to be replaced
      // without affecting the semantic guarantees of the data type that are represented by
      // the reader and writer.
      // The inputs/outputs are referenced via the names assigned in the DAG.
      KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(SORTER).getWriter();
      KeyValuesReader kvReader = (KeyValuesReader) getInputs().get(TOKENIZER).getReader();
      while (kvReader.next()) {
        Text word = (Text) kvReader.getCurrentKey();
        int sum = 0;
        for (Object value : kvReader.getCurrentValues()) {
          sum += ((IntWritable) value).get();
        }
        // write the sum as the key and the word as the value
        kvWriter.write(new IntWritable(sum), word);
      }
    }

View Full Code Here


    @Override
    public void run() throws Exception {
      Preconditions.checkArgument(getInputs().size() == 1);
      Preconditions.checkArgument(getOutputs().size() == 1);
      KeyValueWriter kvWriter = (KeyValueWriter) getOutputs().get(OUTPUT).getWriter();
      KeyValuesReader kvReader = (KeyValuesReader) getInputs().get(SUMMATION).getReader();
      while (kvReader.next()) {
        Object sum = kvReader.getCurrentKey();
        for (Object word : kvReader.getCurrentValues()) {
          kvWriter.write(word, sum);
        }
      }
      // deriving from SimpleMRProcessor takes care of committing the output
    }

View Full Code Here

    @Override
    public void run() throws Exception {
      Preconditions.checkState(getInputs().size() == 0);
      Preconditions.checkState(getOutputs().size() == 3);


      KeyValueWriter streamOutputWriter = (KeyValueWriter) getOutputs().get(STREAM_OUTPUT_NAME)
          .getWriter();
      KeyValueWriter hashOutputWriter = (KeyValueWriter) getOutputs().get(HASH_OUTPUT_NAME)
          .getWriter();
      KeyValueWriter expectedOutputWriter = (KeyValueWriter) getOutputs().get(EXPECTED_OUTPUT_NAME)
          .getWriter();


      float fileSizeFraction = hashOutputFileSize / (float) streamOutputFileSize;
      Preconditions.checkState(fileSizeFraction > 0.0f && fileSizeFraction <= 1.0f);
      int mod = 1;
      int extraKeysMod = 0;
      if (fileSizeFraction > overlapApprox) {
        // Common keys capped by overlap. Additional ones required in the hashFile.
        mod = (int) (1 / overlapApprox);
        extraKeysMod = (int) (1 / (fileSizeFraction - overlapApprox));
      } else {
        // All keys in hashFile must exist in stream file.
        mod = (int) (1 / fileSizeFraction);
      }
      LOG.info("Using mod=" + mod + ", extraKeysMod=" + extraKeysMod);


      long count = 0;
      long sizeLarge = 0;
      long sizeSmall = 0;
      long numLargeFileKeys = 0;
      long numSmallFileKeys = 0;
      long numExpectedKeys = 0;
      while (sizeLarge < streamOutputFileSize) {
        String str = createOverlapString(13, count);
        Text text = new Text(str);
        int size = text.getLength();
        streamOutputWriter.write(text, NullWritable.get());
        sizeLarge += size;
        numLargeFileKeys++;
        if (count % mod == 0) {
          hashOutputWriter.write(text, NullWritable.get());
          sizeSmall += size;
          numSmallFileKeys++;
          expectedOutputWriter.write(text, NullWritable.get());
          numExpectedKeys++;
        }
        if (extraKeysMod != 0 && count % extraKeysMod == 0) {
          String nStr = createNonOverlaptring(13, count);
          Text nText = new Text(nStr);

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.apache.tez.runtime.library.api.KeyValueWriter

org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POValueOutputTez

org.apache.tez.examples.JoinDataGen$GenDataProcessor

org.apache.tez.examples.JoinExample$ForwardingProcessor

org.apache.tez.examples.JoinExample$JoinProcessor

org.apache.tez.examples.OrderedWordCount$NoOpSorter

org.apache.tez.examples.OrderedWordCount$SumProcessor

org.apache.tez.examples.WordCount$SumProcessor

org.apache.tez.examples.WordCount$TokenProcessor

org.apache.tez.mapreduce.examples.BroadcastAndOneToOneExample$InputProcessor

org.apache.tez.mapreduce.examples.processor.FilterByWordInputProcessor

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.