Examples of org.apache.hadoop.zebra.tfile.RawComparable

org.apache.hadoop.zebra.tfile.RawComparable
Interface for objects that can be compared through {@link RawComparator}. This is useful in places where we need a single object reference to specify a range of bytes in a byte array, such as {@link Comparable} or{@link Collections#binarySearch(java.util.List,Object,Comparator)}The actual comparison among RawComparable's requires an external RawComparator and it is applications' responsibility to ensure two RawComparable are supposed to be semantically comparable with the same RawComparator.

      }
      if (!isSorted()) {
        throw new IOException(
            "Cannot get key-bounded scanner for unsorted table");
      }
      RawComparable begin =
          (beginKey != null) ? new ByteArray(beginKey.getBytes(), 0, beginKey
              .getLength()) : null;
      RawComparable end =
          (endKey != null) ? new ByteArray(endKey.getBytes(), 0, endKey.getLength())
              : null;
      if (begin != null && end != null) {
        if (comparator.compare(begin, end) >= 0) {
          throw new IOException("Zero-key-range split");

View Full Code Here

      final long minSize = getMinSplitSize(conf);
      final long EPSILON = (long) (minSize * (SPLIT_SLOP - 1));
      long goalSize = totalBytes / n;
      long batchSize = 0;
      BlockDistribution bd = new BlockDistribution();;
      RawComparable prevKey = null;


      long minStepSize = -1;
      FSDataInputStream nextFsdis = null;
      TFile.Reader nextReader = null;
      for (int i = 0; i < paths.length; ++i) {
        FileStatus fstatus = tfileStatus[i];
        long blkSize = fstatus.getBlockSize();
        long fileLen = fstatus.getLen();
        long stepSize = Math.max(minSize,
            (goalSize < blkSize) ? goalSize : blkSize);
        if (minStepSize== -1 || minStepSize > stepSize)
          minStepSize = stepSize;
        // adjust the block size by the scaling factor
        blkSize /= nTables;
        stepSize = Math.max(minSize,
          (goalSize < blkSize) ? goalSize : blkSize);
        FSDataInputStream fsdis = null;
        TFile.Reader reader = null;
        long remainLen = fileLen;
        try {
          if (nextReader == null)
          {
            fsdis = fs.open(paths[i]);
            reader = new TFile.Reader(fsdis, fileLen, conf);
          } else {
            fsdis = nextFsdis;
            reader = nextReader;
          }
          BlockLocation[] locations =
              fs.getFileBlockLocations(fstatus, 0, fileLen);
          if (locations.length == 0) {
            throw new AssertionError(
                "getFileBlockLocations returns 0 location");
          }


          Arrays.sort(locations, new Comparator<BlockLocation>() {
            @Override
            public int compare(BlockLocation o1, BlockLocation o2) {
              long diff = o1.getOffset() - o2.getOffset();
              if (diff < 0) return -1;
              if (diff > 0) return 1;
              return 0;
            }
          });
          
          long[] startOffsets = new long[locations.length];


          for (int ii = 0; ii < locations.length; ii++)
            startOffsets[ii] = locations[ii].getOffset();


          boolean done = false;
          while ((remainLen > 0) && !done) {
            long splitBytes =
                remainLen > stepSize ? stepSize : remainLen;
            long offsetBegin = fileLen - remainLen;
            long offsetEnd = offsetBegin + splitBytes;
            int indexBegin = getStartBlockIndex(startOffsets, offsetBegin);
            int indexEnd = getEndBlockIndex(startOffsets, offsetEnd);
            BlockLocation firstBlock = locations[indexBegin];
            BlockLocation lastBlock = locations[indexEnd-1];
            long lastBlockOffsetBegin = lastBlock.getOffset();
            long lastBlockOffsetEnd =
                lastBlockOffsetBegin + lastBlock.getLength();
            if ((firstBlock.getOffset() > offsetBegin)
                || (lastBlockOffsetEnd < offsetEnd)) {
              throw new AssertionError(
                  "Block locations returned by getFileBlockLocations do not cover requested range");
            }


            // Adjust offsets
            if ((offsetEnd > lastBlockOffsetBegin)
                && (offsetEnd - lastBlockOffsetBegin < EPSILON)) {
              // the split includes a bit of the next block, remove it.
              if (offsetEnd != fileLen)
              {
              // only if this is not the last chunk
                offsetEnd = lastBlockOffsetBegin;
                splitBytes = offsetEnd - offsetBegin;
                indexEnd--;
              }
            }
            else if ((lastBlockOffsetEnd > offsetEnd)
                && (lastBlockOffsetEnd - offsetEnd < EPSILON)) {
              // the split includes almost the whole block, fill it.
              offsetEnd = lastBlockOffsetEnd;
              splitBytes = offsetEnd - offsetBegin;
            }


            RawComparable key = reader.getKeyNear(offsetEnd);
            if (key == null) {
              offsetEnd = fileLen;
              splitBytes = offsetEnd - offsetBegin;
              if (i < paths.length-1)
              {

View Full Code Here

    
    keyDistri.resize(lastBd);
    
    RawComparable[] keys = keyDistri.getKeys();
    for (int i = 0; i <= keys.length; ++i) {
      RawComparable begin = (i == 0) ? null : keys[i - 1];
      RawComparable end = (i == keys.length) ? null : keys[i];
      BlockDistribution bd;
      if (i < keys.length)
        bd = keyDistri.getBlockDistribution(keys[i]);
      else
        bd = lastBd;
      BytesWritable beginB = null, endB = null;
      if (begin != null)
        beginB = new BytesWritable(begin.buffer());
      if (end != null)
        endB = new BytesWritable(end.buffer());
      SortedTableSplit split = new SortedTableSplit(beginB, endB, bd, conf);
      splits.add(split);
    }


    return splits.toArray(new InputSplit[splits.size()]);

View Full Code Here


    keyDistri.resize(lastBd);


    RawComparable[] keys = keyDistri.getKeys();
    for (int i = 0; i <= keys.length; ++i) {
      RawComparable begin = (i == 0) ? null : keys[i - 1];
      RawComparable end = (i == keys.length) ? null : keys[i];
      BlockDistribution bd;
      if (i < keys.length)
        bd = keyDistri.getBlockDistribution(keys[i]);
      else
        bd = lastBd;
      BytesWritable beginB = null, endB = null;
      if (begin != null)
        beginB = new BytesWritable(begin.buffer());
      if (end != null)
        endB = new BytesWritable(end.buffer());
      SortedTableSplit split = new SortedTableSplit(i, beginB, endB, bd, conf);
      splits.add(split);
    }
    LOG.info("getSplits : returning " + splits.size() + " sorted splits.");
    return splits;

View Full Code Here

          boolean first, last, realFirst = true;
          Path myPath;
          for (int i = beginIndex; i < endIndex; ++i) {
            first = (i == beginIndex);
            last = (i == endIndex -1);
            RawComparable begin = first ? beginKey : null;
            RawComparable end = last ? endKey : null;
            TFileScannerInfo scanner;
            if (rowRange == null)
              myPath = cgindex.getPath(i, path);
            else
              myPath = new Path(path, rowRange.names[i]);

View Full Code Here


    keyDistri.resize(lastBd);


    RawComparable[] keys = keyDistri.getKeys();
    for (int i = 0; i <= keys.length; ++i) {
      RawComparable begin = (i == 0) ? null : keys[i - 1];
      RawComparable end = (i == keys.length) ? null : keys[i];
      BlockDistribution bd;
      if (i < keys.length)
        bd = keyDistri.getBlockDistribution(keys[i]);
      else
        bd = lastBd;
      BytesWritable beginB = null, endB = null;
      if (begin != null)
        beginB = new BytesWritable(begin.buffer());
      if (end != null)
        endB = new BytesWritable(end.buffer());
      SortedTableSplit split = new SortedTableSplit(i, beginB, endB, bd, conf);
      splits.add(split);
    }
    LOG.info("getSplits : returning " + splits.size() + " sorted splits.");
    return splits;

View Full Code Here

      }
      if (!isSorted()) {
        throw new IOException(
            "Cannot get key-bounded scanner for unsorted table");
      }
      RawComparable begin =
          (beginKey != null) ? new ByteArray(beginKey.getBytes(), 0, beginKey
              .getLength()) : null;
      RawComparable end =
          (endKey != null) ? new ByteArray(endKey.getBytes(), 0, endKey.getLength())
              : null;
      if (begin != null && end != null) {
        if (comparator.compare(begin, end) >= 0) {
          throw new IOException("Zero-key-range split");

View Full Code Here

      final long minSize = getMinSplitSize(conf);
      final long EPSILON = (long) (minSize * (SPLIT_SLOP - 1));
      long goalSize = totalBytes / n;
      long batchSize = 0;
      BlockDistribution bd = new BlockDistribution();;
      RawComparable prevKey = null;


      long minStepSize = -1;
      FSDataInputStream nextFsdis = null;
      TFile.Reader nextReader = null;
      for (int i = 0; i < paths.length; ++i) {
        FileStatus fstatus = tfileStatus[i];
        long blkSize = fstatus.getBlockSize();
        long fileLen = fstatus.getLen();
        long stepSize = Math.max(minSize,
            (goalSize < blkSize) ? goalSize : blkSize);
        if (minStepSize== -1 || minStepSize > stepSize)
          minStepSize = stepSize;
        // adjust the block size by the scaling factor
        blkSize /= nTables;
        stepSize = Math.max(minSize,
          (goalSize < blkSize) ? goalSize : blkSize);
        FSDataInputStream fsdis = null;
        TFile.Reader reader = null;
        long remainLen = fileLen;
        try {
          if (nextReader == null)
          {
            fsdis = fs.open(paths[i]);
            reader = new TFile.Reader(fsdis, fileLen, conf);
          } else {
            fsdis = nextFsdis;
            reader = nextReader;
          }
          BlockLocation[] locations =
              fs.getFileBlockLocations(fstatus, 0, fileLen);
          if (locations.length == 0) {
            throw new AssertionError(
                "getFileBlockLocations returns 0 location");
          }


          Arrays.sort(locations, new Comparator<BlockLocation>() {
            @Override
            public int compare(BlockLocation o1, BlockLocation o2) {
              long diff = o1.getOffset() - o2.getOffset();
              if (diff < 0) return -1;
              if (diff > 0) return 1;
              return 0;
            }
          });
          
          long[] startOffsets = new long[locations.length];


          for (int ii = 0; ii < locations.length; ii++)
            startOffsets[ii] = locations[ii].getOffset();


          boolean done = false;
          while ((remainLen > 0) && !done) {
            long splitBytes =
                remainLen > stepSize ? stepSize : remainLen;
            long offsetBegin = fileLen - remainLen;
            long offsetEnd = offsetBegin + splitBytes;
            int indexBegin = getStartBlockIndex(startOffsets, offsetBegin);
            int indexEnd = getEndBlockIndex(startOffsets, offsetEnd);
            BlockLocation firstBlock = locations[indexBegin];
            BlockLocation lastBlock = locations[indexEnd-1];
            long lastBlockOffsetBegin = lastBlock.getOffset();
            long lastBlockOffsetEnd =
                lastBlockOffsetBegin + lastBlock.getLength();
            if ((firstBlock.getOffset() > offsetBegin)
                || (lastBlockOffsetEnd < offsetEnd)) {
              throw new AssertionError(
                  "Block locations returned by getFileBlockLocations do not cover requested range");
            }


            // Adjust offsets
            if ((offsetEnd > lastBlockOffsetBegin)
                && (offsetEnd - lastBlockOffsetBegin < EPSILON)) {
              // the split includes a bit of the next block, remove it.
              if (offsetEnd != fileLen)
              {
              // only if this is not the last chunk
                offsetEnd = lastBlockOffsetBegin;
                splitBytes = offsetEnd - offsetBegin;
                indexEnd--;
              }
            }
            else if ((lastBlockOffsetEnd > offsetEnd)
                && (lastBlockOffsetEnd - offsetEnd < EPSILON)) {
              // the split includes almost the whole block, fill it.
              offsetEnd = lastBlockOffsetEnd;
              splitBytes = offsetEnd - offsetBegin;
            }


            RawComparable key = reader.getKeyNear(offsetEnd);
            if (key == null) {
              offsetEnd = fileLen;
              splitBytes = offsetEnd - offsetBegin;
              if (i < paths.length-1)
              {

View Full Code Here

        result.minStepSize = sourceKeys[i].minStepSize;
    
    RawComparable[][] its = new RawComparable[srcSize][];
    for (int i = 0; i < srcSize; i++)
      its[i] = sourceKeys[i].getKeys();
    RawComparable min, current;
    int minIndex = -1;
    int[] index = new int[srcSize];
    boolean[] dirty = new boolean[srcSize];
    while (true)
    {

View Full Code Here

  {
    Iterator<Map.Entry<RawComparable, BlockDistribution>> it =
      data.entrySet().iterator();
    KeyDistribution adjusted = new KeyDistribution(data.comparator());
    long realSize = 0, mySize = 0;
    RawComparable key = null;
    BlockDistribution bd = null, bd0 = null;
    while (it.hasNext())
    {
      Map.Entry<RawComparable, BlockDistribution> mapEntry = it.next();
      bd0 = mapEntry.getValue();

View Full Code Here

0 1 2

TOP

Related Classes of org.apache.hadoop.zebra.tfile.RawComparable

org.apache.hadoop.zebra.io.ColumnGroup$CGIndex

org.apache.hadoop.zebra.io.ColumnGroup$Reader

org.apache.hadoop.zebra.io.ColumnGroup$Reader$CGScanner

org.apache.hadoop.zebra.io.KeyDistribution

org.apache.hadoop.zebra.mapred.TableInputFormat

org.apache.hadoop.zebra.mapreduce.TableInputFormat

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.