Package org.apache.hadoop.hive.ql.io

Examples of org.apache.hadoop.hive.ql.io.HiveKey


  }

  protected void flush(int index) throws IOException {
    if (index != evicted && values[index] != null) {
      // BytesWritable copies array for set method. So just creats new one
      HiveKey keyWritable = new HiveKey(keys[index], hashes[index]);
      BytesWritable valueWritable = new BytesWritable(values[index]);
      collector.collect(keyWritable, valueWritable);
      usage -= values[index].length;
      values[index] = null;
    }
View Full Code Here


            ObjectInspector udfInspector = udf.initialize(objectInspectors);
            checkArgument(udfInspector instanceof IntObjectInspector, "expected IntObjectInspector: %s", udfInspector);
            IntObjectInspector inspector = (IntObjectInspector) udfInspector;

            Object result = udf.evaluate(deferredObjects);
            HiveKey hiveKey = new HiveKey();
            hiveKey.setHashCode(inspector.get(result));

            int bucketNumber = new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount);

            return Optional.of(new HiveBucket(bucketNumber, bucketCount));
        }
View Full Code Here

            ObjectInspector udfInspector = udf.initialize(objectInspectors);
            checkArgument(udfInspector instanceof IntObjectInspector, "expected IntObjectInspector: %s", udfInspector);
            IntObjectInspector inspector = (IntObjectInspector) udfInspector;

            Object result = udf.evaluate(deferredObjects);
            HiveKey hiveKey = new HiveKey();
            hiveKey.setHashCode(inspector.get(result));

            return Optional.of(new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount));
        }
        catch (HiveException e) {
            log.debug(e, "Error evaluating bucket number");
View Full Code Here

            ObjectInspector udfInspector = udf.initialize(objectInspectors);
            IntObjectInspector inspector = checkType(udfInspector, IntObjectInspector.class, "udfInspector");

            Object result = udf.evaluate(deferredObjects);
            HiveKey hiveKey = new HiveKey();
            hiveKey.setHashCode(inspector.get(result));

            int bucketNumber = new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount);

            return Optional.of(new HiveBucket(bucketNumber, bucketCount));
        }
View Full Code Here

        int buckNum = 0;
        if (bucketEval != null && bucketEval.length != 0) {
          buckNum = computeBucketNumber(vrg, rowIndex, conf.getNumBuckets());
          cachedKeys[0][buckColIdxInKey] = new IntWritable(buckNum);
        }
        HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null);
        int distKeyLength = firstKey.getDistKeyLength();
        // Add first distinct expression, if any.
        if (numDistinctExprs > 0) {
          populateCachedDistinctKeys(vrg, rowIndex, 0);
          firstKey = toHiveKey(cachedKeys[0], tag, distKeyLength);
        }

        if (useTopN) {
          reducerHash.tryStoreVectorizedKey(firstKey, batchIndex);
        } else {
        // No TopN, just forward the first key and all others.
          int hashCode = 0;
          if (bucketEval != null && bucketEval.length != 0) {
            hashCode = computeHashCode(vrg, rowIndex, buckNum);
          } else {
            hashCode = computeHashCode(vrg, rowIndex);
          }
          firstKey.setHashCode(hashCode);
          BytesWritable value = makeValueWritable(vrg, rowIndex);
          collect(firstKey, value);
          forwardExtraDistinctRows(vrg, rowIndex, hashCode, value, distKeyLength, tag, 0);
        }
      }

      if (!useTopN) return; // All done.

      // If we use topN, we have called tryStore on every key now. We can process the results.
      for (int batchIndex = 0 ; batchIndex < vrg.size; ++batchIndex) {
        int result = reducerHash.getVectorizedBatchResult(batchIndex);
        if (result == TopNHash.EXCLUDE) continue;
        int rowIndex = batchIndex;
        if (vrg.selectedInUse) {
          rowIndex = vrg.selected[batchIndex];
        }
        // Compute value and hashcode - we'd either store or forward them.
        int hashCode = computeHashCode(vrg, rowIndex);
        BytesWritable value = makeValueWritable(vrg, rowIndex);
        int distKeyLength = -1;
        if (result == TopNHash.FORWARD) {
          HiveKey firstKey = reducerHash.getVectorizedKeyToForward(batchIndex);
          firstKey.setHashCode(hashCode);
          distKeyLength = firstKey.getDistKeyLength();
          collect(firstKey, value);
        } else {
          reducerHash.storeValue(result, value, hashCode, true);
          distKeyLength = reducerHash.getVectorizedKeyDistLength(batchIndex);
        }
View Full Code Here

    for (int i = 1; i < numDistinctExprs; i++) {
      if (i != baseIndex) {
        System.arraycopy(cachedKeys[baseIndex], 0, cachedKeys[i], 0, numDistributionKeys);
      }
      populateCachedDistinctKeys(vrg, rowIndex, i);
      HiveKey hiveKey = toHiveKey(cachedKeys[i], tag, distKeyLength);
      hiveKey.setHashCode(hashCode);
      collect(hiveKey, value);
    }
  }
View Full Code Here

   * @param batchIndex index of the key in the batch.
   * @return The key corresponding to the index.
   */
  public HiveKey getVectorizedKeyToForward(int batchIndex) {
    int index = MAY_FORWARD - batchIndexToResult[batchIndex];
    HiveKey hk = new HiveKey();
    hk.set(keys[index], 0, keys[index].length);
    hk.setDistKeyLength(distKeyLengths[index]);
    return hk;
  }
View Full Code Here

      if (bucketEval != null) {
        buckNum = computeBucketNumber(row, conf.getNumBuckets());
        cachedKeys[0][buckColIdxInKey] = new IntWritable(buckNum);
      }

      HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null);
      int distKeyLength = firstKey.getDistKeyLength();
      if (numDistinctExprs > 0) {
        populateCachedDistinctKeys(row, 0);
        firstKey = toHiveKey(cachedKeys[0], tag, distKeyLength);
      }

      // Try to store the first key. If it's not excluded, we will proceed.
      int firstIndex = reducerHash.tryStoreKey(firstKey);
      if (firstIndex == TopNHash.EXCLUDE) return; // Nothing to do.
      // Compute value and hashcode - we'd either store or forward them.
      BytesWritable value = makeValueWritable(row);
      int hashCode = 0;
      if (bucketEval == null) {
        hashCode = computeHashCode(row);
      } else {
        hashCode = computeHashCode(row, buckNum);
      }

      if (firstIndex == TopNHash.FORWARD) {
        firstKey.setHashCode(hashCode);
        collect(firstKey, value);
      } else {
        assert firstIndex >= 0;
        reducerHash.storeValue(firstIndex, value, hashCode, false);
      }

      // All other distinct keys will just be forwarded. This could be optimized...
      for (int i = 1; i < numDistinctExprs; i++) {
        System.arraycopy(cachedKeys[0], 0, cachedKeys[i], 0, numDistributionKeys);
        populateCachedDistinctKeys(row, i);
        HiveKey hiveKey = toHiveKey(cachedKeys[i], tag, distKeyLength);
        hiveKey.setHashCode(hashCode);
        collect(hiveKey, value);
      }
    } catch (HiveException e) {
      throw e;
    } catch (Exception e) {
View Full Code Here

    keyWritable.setDistKeyLength((distLength == null) ? keyLength : distLength);
    return keyWritable;
  }

  public void collect(byte[] key, byte[] value, int hash) throws IOException {
    HiveKey keyWritable = new HiveKey(key, hash);
    BytesWritable valueWritable = new BytesWritable(value);
    collect(keyWritable, valueWritable);
  }
View Full Code Here

        // In the non-partitioned case we still want to compute the bucket number for updates and
        // deletes.
        bucketNumber = computeBucketNumber(row, conf.getNumBuckets());
      }

      HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null);
      int distKeyLength = firstKey.getDistKeyLength();
      if (numDistinctExprs > 0) {
        populateCachedDistinctKeys(row, 0);
        firstKey = toHiveKey(cachedKeys[0], tag, distKeyLength);
      }

      final int hashCode;

      // distKeyLength doesn't include tag, but includes buckNum in cachedKeys[0]
      if (useUniformHash && partitionEval.length > 0) {
        hashCode = computeMurmurHash(firstKey);
      } else {
        hashCode = computeHashCode(row, bucketNumber);
      }

      firstKey.setHashCode(hashCode);

      /*
       * in case of TopN for windowing, we need to distinguish between rows with
       * null partition keys and rows with value 0 for partition keys.
       */
      boolean partKeyNull = conf.isPTFReduceSink() && partitionKeysAreNull(row);

      // Try to store the first key. If it's not excluded, we will proceed.
      int firstIndex = reducerHash.tryStoreKey(firstKey, partKeyNull);
      if (firstIndex == TopNHash.EXCLUDE) return; // Nothing to do.
      // Compute value and hashcode - we'd either store or forward them.
      BytesWritable value = makeValueWritable(row);

      if (firstIndex == TopNHash.FORWARD) {
        collect(firstKey, value);
      } else {
        assert firstIndex >= 0;
        reducerHash.storeValue(firstIndex, firstKey.hashCode(), value, false);
      }

      // All other distinct keys will just be forwarded. This could be optimized...
      for (int i = 1; i < numDistinctExprs; i++) {
        System.arraycopy(cachedKeys[0], 0, cachedKeys[i], 0, numDistributionKeys);
        populateCachedDistinctKeys(row, i);
        HiveKey hiveKey = toHiveKey(cachedKeys[i], tag, distKeyLength);
        hiveKey.setHashCode(hashCode);
        collect(hiveKey, value);
      }
    } catch (HiveException e) {
      throw e;
    } catch (Exception e) {
View Full Code Here

TOP

Related Classes of org.apache.hadoop.hive.ql.io.HiveKey

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.