Package eu.stratosphere.pact.runtime.hash

Source Code of eu.stratosphere.pact.runtime.hash.MultiLevelHashTester$BucketBoundaries

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.pact.runtime.hash;

import java.util.ArrayList;
import java.util.Formatter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.TreeMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.Assert;
import org.junit.Test;

import eu.stratosphere.pact.runtime.hash.MultiLevelHashTester.BucketBoundaries;
import eu.stratosphere.pact.runtime.hash.util.LastBitsToRange;
import eu.stratosphere.pact.runtime.hash.util.RandomIterator;
import eu.stratosphere.pact.runtime.hash.util.RangeCalculator;
import eu.stratosphere.pact.runtime.hash.util.RangeIterator;
import eu.stratosphere.pact.runtime.hash.util.StepRangeIterator;

/**
* Test distribution of hash function for multiple levels
*
*
*/
public class HashFunctionCollisionBenchmark {

  private static final Log LOG = LogFactory.getLog(HashFunctionCollisionBenchmark.class);

  private static final long SEED = 561349061987311L;

  @Test
  public void testStepSeventeen() {

    // Define numbers of buckets on each level
    RangeCalculator[] rangeCalculators = {
        new LastBitsToRange(10), // 2^10=1024 Buckets on level 0
        new LastBitsToRange(10), // 2^10=1024 Buckets on level 1
        new LastBitsToRange(10) }; // 2^10=1024 Buckets on level 2

    Iterator<Integer> importIterator = new StepRangeIterator(-30000000,
        30000000, 17);

    MultiLevelHashTester ht = new MultiLevelHashTester(importIterator,
        rangeCalculators);

    BucketBoundaries[] boundaries = {
        new BucketBoundaries(3000, 3700, 5, 0.01),
        new BucketBoundaries(0, 20,
            BucketBoundaries.MAX_EMPTY_UNBOUNDED, 0.0001),
        new BucketBoundaries(0, 3,
            BucketBoundaries.MAX_EMPTY_UNBOUNDED, 0.000001) };

    LOG.debug("Start Step Seventeen hash test");
    ht.runTest(boundaries);
    LOG.debug("End Step Seventeen hash test");
  }

  @Test
  public void testThreeLevel() {

    // Define numbers of buckets on each level
    RangeCalculator[] rangeCalculators = {
        new LastBitsToRange(10), // 2^10=1024 Buckets on level 0
        new LastBitsToRange(10), // 2^10=1024 Buckets on level 1
        new LastBitsToRange(10) }; // 2^10=1024 Buckets on level 2

    Iterator<Integer> importIterator = new RangeIterator(-1000000, 1000000);

    MultiLevelHashTester ht = new MultiLevelHashTester(importIterator,
        rangeCalculators);

    BucketBoundaries[] boundaries = {
        new BucketBoundaries(1800, 2110, 5, 0.01),
        new BucketBoundaries(0, 15,
            BucketBoundaries.MAX_EMPTY_UNBOUNDED, 0.0001),
        new BucketBoundaries(0, 2,
            BucketBoundaries.MAX_EMPTY_UNBOUNDED, 0.000001) };

    LOG.debug("Start Three Level hash test");
    ht.runTest(boundaries);
    LOG.debug("End Three Level hash test");
  }

  @Test
  public void testRandom() {

    // Define numbers of buckets on each level
    RangeCalculator[] rangeCalculators = {
        new LastBitsToRange(10), // 2^10=1024 Buckets on level 0
        new LastBitsToRange(10), // 2^10=1024 Buckets on level 1
        new LastBitsToRange(10) }; // 2^10=1024 Buckets on level 2

    Iterator<Integer> importIterator = new RandomIterator(SEED, 2000000);

    MultiLevelHashTester ht = new MultiLevelHashTester(importIterator,
        rangeCalculators);

    BucketBoundaries[] boundaries = {
        new BucketBoundaries(1800, 2110, 5, 0.01),
        new BucketBoundaries(0, 15,
            BucketBoundaries.MAX_EMPTY_UNBOUNDED, 0.0001),
        new BucketBoundaries(0, 2,
            BucketBoundaries.MAX_EMPTY_UNBOUNDED, 0.000001) };

    LOG.debug("Start Random hash test");
    ht.runTest(boundaries);
    LOG.debug("End Random hash test");
  }

  @Test
  public void testTwoLevel() {

    // Define numbers of buckets on each level
    RangeCalculator[] rangeCalculators = {
        new LastBitsToRange(12)// 2^12=4096 Buckets on level 0
        new LastBitsToRange(12) }// 2^12=4096 Buckets on level 1

    Iterator<Integer> importIterator = new RangeIterator(-1000000, 1000000);

    MultiLevelHashTester ht = new MultiLevelHashTester(importIterator,
        rangeCalculators);

    BucketBoundaries[] boundaries = {
        new BucketBoundaries(400, 600, 5, 0.01),
        new BucketBoundaries(0, 4,
            BucketBoundaries.MAX_EMPTY_UNBOUNDED, 0.0001) };

    LOG.debug("Start Two Level hash test");
    ht.runTest(boundaries);
    LOG.debug("End Two Level hash test");
  }

}

class MultiLevelHashTester {

  private static final Log LOG = LogFactory.getLog(MultiLevelHashTester.class);

  private final int maxLevel;
  private final Iterator<Integer> importIterator;
  private final RangeCalculator[] rangeCalculators;
  private final HashMap<Integer, Object> rootMap = new HashMap<Integer, Object>();
  private final ArrayList<SortedMap<Integer, Integer>> bucketSizesPerLevel;

  /**
   *
   * @param hashFunction
   *            HashFunction to be tested
   * @param importIterator
   *            Iterator over values to be used in test run
   * @param rangeCalculators
   *            For each level a range calculator which defines how to map
   *            from hash to bucket
   */
  public MultiLevelHashTester(Iterator<Integer> importIterator,
      RangeCalculator[] rangeCalculators) {
    this.importIterator = importIterator;
    this.rangeCalculators = rangeCalculators;
    this.maxLevel = rangeCalculators.length;
    this.bucketSizesPerLevel = new ArrayList<SortedMap<Integer, Integer>>(
        maxLevel);

    for (int i = 0; i < maxLevel; i++) {
      bucketSizesPerLevel.add(i, new TreeMap<Integer, Integer>());
    }
  }

  /**
   * Run the test by: - Adding values from iterator to map - Creating
   * histogram over bucket sizes per level - Printing histogram informations
   *
   * @param boundaries
   *            Expected results for each level
   */
  public void runTest(BucketBoundaries[] boundaries) {
    addValues();
    collectStatistics(rootMap, 0);
    if (LOG.isDebugEnabled() == true) {
      printStatistics();
    }
    checkBoundaries(boundaries);
  }

  private void checkBoundaries(BucketBoundaries[] boundaries) {
    for (int level = 0; level < boundaries.length; level++) {
      int lowerBound = boundaries[level].getLowerBound();
      int upperBound = boundaries[level].getUpperBound();
      int bucketCountInLevel = 0;
      int bucketCountOutOfRange = 0;

      SortedMap<Integer, Integer> levelMap = bucketSizesPerLevel
          .get(level);
      Iterator<Integer> bucketSizeIterator = levelMap.keySet().iterator();

      while (bucketSizeIterator.hasNext()) {
        int bucketSize = bucketSizeIterator.next();
        if (bucketSize != 0) {
          int countForBucketSize = levelMap.get(bucketSize);
          bucketCountInLevel += countForBucketSize;
          if (lowerBound > bucketSize || upperBound < bucketSize) {
            bucketCountOutOfRange += countForBucketSize;
          }

        }
      }
      double bucketsOutOfRange = (double) bucketCountOutOfRange
          / (double) bucketCountInLevel;
      double maxBucketsOutOfRange = boundaries[level]
          .getPercentOutOfRange();
      Assert.assertTrue("More than " + (maxBucketsOutOfRange * 100)
          + "% of buckets out of range in level " + level,
          bucketsOutOfRange <= maxBucketsOutOfRange);

      int maxEmpty = boundaries[level].getMaxEmpty();
      Assert.assertTrue(
          "More than " + maxEmpty + " empty buckets in level "
              + level,
          (maxEmpty == BucketBoundaries.MAX_EMPTY_UNBOUNDED)
              || (levelMap.get(0) <= boundaries[level]
                  .getMaxEmpty()));
    }
  }

  /**
   * Find for each value the right bucket on the deepest level and increase
   * its count
   */
  @SuppressWarnings("unchecked")
  private void addValues() {

    while (importIterator.hasNext()) {
      int nextValue = importIterator.next();

      HashMap<Integer, Object> mapForCurrentLevel = rootMap;

      for (int i = 0; i < maxLevel - 1; i++) {
        int hashValue = MutableHashTable.hash(nextValue, i);
        int bucket = rangeCalculators[i].getBucket(hashValue);
        Object nextObject = mapForCurrentLevel.get(bucket);
        if (nextObject == null) {
          HashMap<Integer, Object> mapForNextLevel = new HashMap<Integer, Object>();
          mapForCurrentLevel.put(bucket, mapForNextLevel);
          mapForCurrentLevel = mapForNextLevel;

        } else {
          mapForCurrentLevel = (HashMap<Integer, Object>) nextObject;
        }
      }

      int lastHashValue = MutableHashTable.hash(nextValue, maxLevel - 1);
      int deepestBucketNr = rangeCalculators[maxLevel - 1]
          .getBucket(lastHashValue);
      Object countOnDeepestLevel = mapForCurrentLevel
          .get(deepestBucketNr);
      if (countOnDeepestLevel == null) {
        mapForCurrentLevel.put(deepestBucketNr, 1);
      } else {
        mapForCurrentLevel.put(deepestBucketNr,
            ((Integer) countOnDeepestLevel) + 1);
      }

    }
  }

  private void printStatistics() {
    for (int level = 0; level < maxLevel; level++) {
      int bucketCountInLevel = 0;

      SortedMap<Integer, Integer> levelMap = bucketSizesPerLevel
          .get(level);
      Iterator<Integer> bucketSizeIterator = levelMap.keySet().iterator();

      LOG.debug("Statistics for level: " + level);
      LOG.debug("----------------------------------------------");
      LOG.debug("");
      LOG.debug("Bucket Size |      Count");
      LOG.debug("------------------------");

      int i = 0;
      while (bucketSizeIterator.hasNext()) {
        int bucketSize = bucketSizeIterator.next();
        if (bucketSize != 0) {
          int countForBucketSize = levelMap.get(bucketSize);
          bucketCountInLevel += countForBucketSize;
          Formatter formatter = new Formatter();
          formatter.format(" %10d | %10d", bucketSize, countForBucketSize);

          if (levelMap.size() < 20 || i < 3 || i >= (levelMap.size() - 3)) {
            LOG.debug(formatter.out());
          } else if (levelMap.size() / 2 == i) {
            LOG.debug("         .. |         ..");
            LOG.debug(formatter.out());
            LOG.debug("         .. |         ..");
          }
          i++;
          formatter.close();
        }
      }

      LOG.debug("");
      LOG.debug("Number of non-empty buckets in level: "
          + bucketCountInLevel);
      LOG.debug("Number of empty buckets in level    : "
          + levelMap.get(0));
      LOG.debug("Number of different bucket sizes    : "
          + (levelMap.size() - 1));
      LOG.debug("");
      LOG.debug("");
      LOG.debug("");
    }
  }

  /**
   * Create histogram over bucket sizes
   *
   * @param map
   *            Map to be analyzed
   * @param level
   *            Level on which the map is located in
   * @return The total count of hashed values in the map
   */
  private int collectStatistics(HashMap<Integer, Object> map, int level) {
    SortedMap<Integer, Integer> bucketSizesForLevel = bucketSizesPerLevel
        .get(level);

    Iterator<Object> bucketIterator = map.values().iterator();
    int bucketCount = 0;
    int totalValueCount = 0;

    while (bucketIterator.hasNext()) {
      bucketCount++;

      Integer hashValuesInBucket;
      // If we are already on the deepest level, get the count in the
      // bucket, otherwise
      // recursively examine the subtree
      if (level == maxLevel - 1) {
        hashValuesInBucket = (Integer) bucketIterator.next();
      } else {
        @SuppressWarnings("unchecked")
        HashMap<Integer, Object> nextMap = (HashMap<Integer, Object>) bucketIterator
            .next();
        hashValuesInBucket = collectStatistics(nextMap, level + 1);
      }
      totalValueCount += hashValuesInBucket;
      Integer countOfBucketSizes = bucketSizesForLevel
          .get(hashValuesInBucket);
      if (countOfBucketSizes == null) {
        countOfBucketSizes = 1;
      } else {
        countOfBucketSizes += 1;
      }
      bucketSizesForLevel.put(hashValuesInBucket, countOfBucketSizes);
    }

    Integer countOfEmptyBuckets = bucketSizesForLevel.get(0);
    if (countOfEmptyBuckets == null) {
      countOfEmptyBuckets = rangeCalculators[level].getBucketCount()
          - bucketCount;
    } else {
      countOfEmptyBuckets += rangeCalculators[level].getBucketCount()
          - bucketCount;
    }
    bucketSizesForLevel.put(0, countOfEmptyBuckets);

    return totalValueCount;
  }

  /**
   * Expected results for bucket sizes per level
   *
   *
   */
  static class BucketBoundaries {

    public static final int MAX_EMPTY_UNBOUNDED = -1;
    private int lowerBound;
    private int upperBound;
    private int maxEmpty;
    private double percentOutOfRange;

    /**
     *
     *
     * @param lowerBound Lower bound for bucket sizes
     * @param upperBound Upper bound for bucket sizes
     * @param maxEmpty Maximum number of empty buckets
     * @param percentOutOfRange Maximum percentage of buckets out of range
     */
    public BucketBoundaries(int lowerBound, int upperBound, int maxEmpty,
        double percentOutOfRange) {
      this.lowerBound = lowerBound;
      this.upperBound = upperBound;
      this.maxEmpty = maxEmpty;
      this.percentOutOfRange = percentOutOfRange;
    }

    /**
     *
     * @return Lower bound for bucket sizes
     */
    public int getLowerBound() {
      return lowerBound;
    }
   
    /**
     *
     * @return Upper bound for bucket sizes
     */
    public int getUpperBound() {
      return upperBound;
    }

    /**
     *
     * @return Maximum number of empty buckets
     */
    public int getMaxEmpty() {
      return maxEmpty;
    }

    /**
     *
     * @return Maximum percentage of buckets out of range
     */
    public double getPercentOutOfRange() {
      return percentOutOfRange;
    }
  }
}
TOP

Related Classes of eu.stratosphere.pact.runtime.hash.MultiLevelHashTester$BucketBoundaries

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.