Package org.apache.hadoop.hbase.util.byterange

Source Code of org.apache.hadoop.hbase.util.byterange.ByteRangeSet

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hbase.util.byterange;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;

import com.google.common.collect.Lists;

/**
* Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
* order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
* <p>
* Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and
* {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
* trie-oriented ByteRangeTrieSet, etc
*/
@InterfaceAudience.Private
public abstract class ByteRangeSet {

  /******************** fields **********************/

  protected byte[] byteAppender;
  protected int numBytes;

  protected Map<ByteRange, Integer> uniqueIndexByUniqueRange;

  protected ArrayList<ByteRange> uniqueRanges;
  protected int numUniqueRanges = 0;

  protected int[] uniqueRangeIndexByInsertionId;
  protected int numInputs;

  protected List<Integer> sortedIndexByUniqueIndex;
  protected int[] sortedIndexByInsertionId;
  protected ArrayList<ByteRange> sortedRanges;


  /****************** construct **********************/

  protected ByteRangeSet() {
    this.byteAppender = new byte[0];
    this.uniqueRanges = Lists.newArrayList();
    this.uniqueRangeIndexByInsertionId = new int[0];
    this.sortedIndexByUniqueIndex = Lists.newArrayList();
    this.sortedIndexByInsertionId = new int[0];
    this.sortedRanges = Lists.newArrayList();
  }

  public void reset() {
    numBytes = 0;
    uniqueIndexByUniqueRange.clear();
    numUniqueRanges = 0;
    numInputs = 0;
    sortedIndexByUniqueIndex.clear();
    sortedRanges.clear();
  }


  /*************** abstract *************************/

  public abstract void addToSortedRanges();


  /**************** methods *************************/

  /**
   * Check if the incoming byte range exists.  If not, add it to the backing byteAppender[] and
   * insert it into the tracking Map uniqueIndexByUniqueRange.
   */
  public void add(ByteRange bytes) {
    Integer index = uniqueIndexByUniqueRange.get(bytes);
    if (index == null) {
      index = store(bytes);
    }
    int minLength = numInputs + 1;
    uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
        minLength, 2 * minLength);
    uniqueRangeIndexByInsertionId[numInputs] = index;
    ++numInputs;
  }

  protected int store(ByteRange bytes) {
    int indexOfNewElement = numUniqueRanges;
    if (uniqueRanges.size() <= numUniqueRanges) {
      uniqueRanges.add(new ByteRange());
    }
    ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
    int neededBytes = numBytes + bytes.getLength();
    byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
    bytes.deepCopyTo(byteAppender, numBytes);
    storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
    numBytes += bytes.getLength();
    uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
    int newestUniqueIndex = numUniqueRanges;
    ++numUniqueRanges;
    return newestUniqueIndex;
  }

  public ByteRangeSet compile() {
    addToSortedRanges();
    for (int i = 0; i < sortedRanges.size(); ++i) {
      sortedIndexByUniqueIndex.add(null);// need to grow the size
    }
    // TODO move this to an invert(int[]) util method
    for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
      int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
      sortedIndexByUniqueIndex.set(uniqueIndex, i);
    }
    sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
        numInputs);
    for (int i = 0; i < numInputs; ++i) {
      int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
      int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
      sortedIndexByInsertionId[i] = sortedIndex;
    }
    return this;
  }

  public int getSortedIndexForInsertionId(int insertionId) {
    return sortedIndexByInsertionId[insertionId];
  }

  public int size() {
    return uniqueIndexByUniqueRange.size();
  }


  /***************** standard methods ************************/

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    int i = 0;
    for (ByteRange r : sortedRanges) {
      if (i > 0) {
        sb.append("\n");
      }
      sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
      ++i;
    }
    sb.append("\ntotalSize:" + numBytes);
    sb.append("\navgSize:" + getAvgSize());
    return sb.toString();
  }


  /**************** get/set *****************************/

  public ArrayList<ByteRange> getSortedRanges() {
    return sortedRanges;
  }

  public long getAvgSize() {
    return numBytes / numUniqueRanges;
  }

}
TOP

Related Classes of org.apache.hadoop.hbase.util.byterange.ByteRangeSet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.