Package org.apache.tez.runtime.library.common.sort.impl.IFile

Examples of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer


      int noInMemorySegments = inMemorySegments.size();
     
      MapOutput mergedMapOutputs =
        unconditionalReserve(dummyMapId, mergeOutputSize, false);
     
      Writer writer =
        new InMemoryWriter(mergedMapOutputs.getArrayStream());
     
      LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
               " segments of total-size: " + mergeOutputSize);

      TezRawKeyValueIterator rIter =
        TezMerger.merge(conf, rfs,
                       ConfigUtils.getIntermediateInputKeyClass(conf),
                       ConfigUtils.getIntermediateInputValueClass(conf),
                       inMemorySegments, inMemorySegments.size(),
                       new Path(inputContext.getUniqueIdentifier()),
                       (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
                       nullProgressable, null, null, null);
      TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
      writer.close();

      LOG.info(inputContext.getUniqueIdentifier()
               " Memory-to-Memory merge of the " + noInMemorySegments +
               " files in-memory complete.");
View Full Code Here


      Path outputPath = mapOutputFile.getInputFileForWrite(
          srcTaskIdentifier.getInputIdentifier().getSrcTaskIndex(),
          mergeOutputSize).suffix(Constants.MERGED_OUTPUT_PREFIX);

      Writer writer = null;
      try {
        writer =
            new Writer(conf, rfs, outputPath,
                (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                codec, null);

        TezRawKeyValueIterator rIter = null;
        LOG.info("Initiating in-memory merge with " + noInMemorySegments +
            " segments...");

        rIter = TezMerger.merge(conf, rfs,
            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
            (Class)ConfigUtils.getIntermediateInputValueClass(conf),
            inMemorySegments, inMemorySegments.size(),
            new Path(inputContext.getUniqueIdentifier()),
            (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
            nullProgressable, spilledRecordsCounter, null, null);

        if (null == combiner) {
          TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
        } else {
          runCombineProcessor(rIter, writer);
        }
        writer.close();
        writer = null;

        LOG.info(inputContext.getUniqueIdentifier()
            " Merge of the " + noInMemorySegments +
            " files in-memory complete." +
            " Local file is " + outputPath + " of size " +
            localFS.getFileStatus(outputPath).getLen());
      } catch (IOException e) {
        //make sure that we delete the ondisk file that we created
        //earlier when we invoked cloneFileAttributes
        localFS.delete(outputPath, true);
        throw e;
      } finally {
        if (writer != null) {
          writer.close();
        }
      }

      // Note the output of the merge
      closeOnDiskFile(outputPath);
View Full Code Here

      // 2. Start the on-disk merge process
      Path outputPath =
        localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(),
            approxOutputSize, conf).suffix(Constants.MERGED_OUTPUT_PREFIX);
      Writer writer =
        new Writer(conf, rfs, outputPath,
                        (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                        (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                        codec, null);
      TezRawKeyValueIterator iter  = null;
      Path tmpDir = new Path(inputContext.getUniqueIdentifier());
      try {
        iter = TezMerger.merge(conf, rfs,
                            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                            (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                            codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize,
                            inputs.toArray(new Path[inputs.size()]), true, ioSortFactor, tmpDir,
                            (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
                            nullProgressable, spilledRecordsCounter, null,
                            mergedMapOutputsCounter, null);

        TezMerger.writeFile(iter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
        writer.close();
      } catch (IOException e) {
        localFS.delete(outputPath, true);
        throw e;
      }
View Full Code Here

  @Test
  public void testRepeatedKeysInMemReaderNoRLE() throws IOException {
    String outputFileName = "ifile.out";
    Path outputPath = new Path(workDir, outputFileName);
    List<KVPair> data = KVDataGen.generateTestData(true);
    Writer writer = writeTestFile(outputPath, false, data);

    FSDataInputStream inStream =  localFs.open(outputPath);
    byte[] bytes = new byte[(int)writer.getRawLength()];

    readDataToMem(inStream, bytes);
    inStream.close();

    InMemoryReader inMemReader = new InMemoryReader(null, new InputAttemptIdentifier(0, 0), bytes, 0, bytes.length);
View Full Code Here

  @Test
  public void testRepeatedKeysInMemReaderRLE() throws IOException {
    String outputFileName = "ifile.out";
    Path outputPath = new Path(workDir, outputFileName);
    List<KVPair> data = KVDataGen.generateTestData(true);
    Writer writer = writeTestFile(outputPath, true, data);

    FSDataInputStream inStream =  localFs.open(outputPath);
    byte[] bytes = new byte[(int)writer.getRawLength()];

    readDataToMem(inStream, bytes);
    inStream.close();

View Full Code Here

                                             inMemToDiskBytes).suffix(
                                                 Constants.MERGED_OUTPUT_PREFIX);
        final TezRawKeyValueIterator rIter = TezMerger.merge(job, fs, keyClass, valueClass,
            memDiskSegments, numMemDiskSegments, tmpDir, comparator, nullProgressable,
            spilledRecordsCounter, null, additionalBytesRead, null);
        final Writer writer = new Writer(job, fs, outputPath,
            keyClass, valueClass, codec, null, null);
        try {
          TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
          // add to list of final disk outputs.
          onDiskMapOutputs.add(outputPath);
        } catch (IOException e) {
          if (null != outputPath) {
            try {
              fs.delete(outputPath, true);
            } catch (IOException ie) {
              // NOTHING
            }
          }
          throw e;
        } finally {
          if (null != writer) {
            writer.close();
            additionalBytesWritten.increment(writer.getCompressedLength());
          }
        }
        LOG.info("Merged " + numMemDiskSegments + " segments, " +
                 inMemToDiskBytes + " bytes to disk to satisfy " +
                 "reduce memory limit");
View Full Code Here

      int noInMemorySegments = inMemorySegments.size();
     
      MapOutput mergedMapOutputs =
        unconditionalReserve(dummyMapId, mergeOutputSize, false);
     
      Writer writer =
        new InMemoryWriter(mergedMapOutputs.getArrayStream());

      LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
               " segments of total-size: " + mergeOutputSize);

      // Nothing will be materialized to disk because the sort factor is being
      // set to the number of in memory segments.
      // TODO Is this doing any combination ?
      TezRawKeyValueIterator rIter =
        TezMerger.merge(conf, rfs,
                       ConfigUtils.getIntermediateInputKeyClass(conf),
                       ConfigUtils.getIntermediateInputValueClass(conf),
                       inMemorySegments, inMemorySegments.size(),
                       new Path(inputContext.getUniqueIdentifier()),
                       (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
                       nullProgressable, null, null, null, null);
      TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
      writer.close();

      LOG.info(inputContext.getUniqueIdentifier()
               " Memory-to-Memory merge of the " + noInMemorySegments +
               " files in-memory complete.");
View Full Code Here

      // adequate memory to keep all segments in memory.
      Path outputPath = mapOutputFile.getInputFileForWrite(
          srcTaskIdentifier.getInputIdentifier().getInputIndex(),
          mergeOutputSize).suffix(Constants.MERGED_OUTPUT_PREFIX);

      Writer writer = null;
      try {
        writer =
            new Writer(conf, rfs, outputPath,
                (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                codec, null, null);

        TezRawKeyValueIterator rIter = null;
        LOG.info("Initiating in-memory merge with " + noInMemorySegments +
            " segments...");

        // Nothing actually materialized to disk - controlled by setting sort-factor to #segments.
        rIter = TezMerger.merge(conf, rfs,
            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
            (Class)ConfigUtils.getIntermediateInputValueClass(conf),
            inMemorySegments, inMemorySegments.size(),
            new Path(inputContext.getUniqueIdentifier()),
            (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
            nullProgressable, spilledRecordsCounter, null, additionalBytesRead, null);
        // spilledRecordsCounter is tracking the number of keys that will be
        // read from each of the segments being merged - which is essentially
        // what will be written to disk.

        if (null == combiner) {
          TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
        } else {
          // TODO Counters for Combine
          runCombineProcessor(rIter, writer);
        }
        writer.close();
        additionalBytesWritten.increment(writer.getCompressedLength());
        writer = null;

        LOG.info(inputContext.getUniqueIdentifier()
            " Merge of the " + noInMemorySegments +
            " files in-memory complete." +
            " Local file is " + outputPath + " of size " +
            localFS.getFileStatus(outputPath).getLen());
      } catch (IOException e) {
        //make sure that we delete the ondisk file that we created
        //earlier when we invoked cloneFileAttributes
        localFS.delete(outputPath, true);
        throw e;
      } finally {
        if (writer != null) {
          writer.close();
        }
      }

      // Note the output of the merge
      closeOnDiskFile(outputPath);
View Full Code Here

      // 2. Start the on-disk merge process
      Path outputPath =
        localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(),
            approxOutputSize, conf).suffix(Constants.MERGED_OUTPUT_PREFIX);
      Writer writer =
        new Writer(conf, rfs, outputPath,
                        (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                        (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                        codec, null, null);
      TezRawKeyValueIterator iter  = null;
      Path tmpDir = new Path(inputContext.getUniqueIdentifier());
      try {
        iter = TezMerger.merge(conf, rfs,
                            (Class)ConfigUtils.getIntermediateInputKeyClass(conf),
                            (Class)ConfigUtils.getIntermediateInputValueClass(conf),
                            codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize,
                            inputs.toArray(new Path[inputs.size()]), true, ioSortFactor, tmpDir,
                            (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
                            nullProgressable, spilledRecordsCounter, null,
                            mergedMapOutputsCounter, null);

        // TODO Maybe differentiate between data written because of Merges and
        // the finalMerge (i.e. final mem available may be different from
        // initial merge mem)
        TezMerger.writeFile(iter, writer, nullProgressable, TezJobConfig.DEFAULT_RECORDS_BEFORE_PROGRESS);
        writer.close();
        additionalBytesWritten.increment(writer.getCompressedLength());
      } catch (IOException e) {
        localFS.delete(outputPath, true);
        throw e;
      }
View Full Code Here

      final InMemValBytes value = createInMemValBytes();
      for (int i = 0; i < partitions; ++i) {
        IFile.Writer writer = null;
        try {
          long segmentStart = out.getPos();
          writer = new Writer(conf, out, keyClass, valClass, codec,
                                    spilledRecordsCounter, null);
          if (combiner == null) {
            // spill directly
            DataInputBuffer key = new DataInputBuffer();
            while (spindex < mend &&
View Full Code Here

TOP

Related Classes of org.apache.tez.runtime.library.common.sort.impl.IFile.Writer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.