Examples of HyperLogLog


Examples of com.clearspring.analytics.stream.cardinality.HyperLogLog

    }

    @Override
    public void prepare(FlowProcess flowProcess, OperationCall operationCall) {
      JobConf conf = (JobConf) flowProcess.getConfigCopy();
      approxCounter = new HyperLogLog(BloomProps.getHllErr(conf));
      sampleRate = BloomProps.getKeySampleRate(conf);
      tupleSerializationUtil = new TupleSerializationUtil((JobConf) flowProcess.getConfigCopy());
    }
View Full Code Here

Examples of com.clearspring.analytics.stream.cardinality.HyperLogLog

    List<HyperLogLog> countParts = new LinkedList<HyperLogLog>();

    long totalSum = 0;
    while (in.hasNext()) {
      TupleEntry tuple = in.next();
      HyperLogLog card = HyperLogLog.Builder.build(Bytes.getBytes((BytesWritable) tuple.getObject("bytes")));
      countParts.add(card);
      totalSum += card.cardinality();
    }

    HyperLogLog merged = (HyperLogLog) new HyperLogLog(BloomProps.getHllErr(conf)).merge(countParts.toArray(new ICardinality[countParts.size()]));
    long cardinality = merged.cardinality();

    //  HLL estimation doesn't work over 2^32, and the cardinality code just returns 0.
    //  Honestly if you get this high, your bloom filter is probably saturated anyway, so just return that max.
    if (cardinality == 0 && totalSum != 0) {
      LOG.info("HyperLogLog likely reached its max estimation of 2^32! Returning that max, but true count likely higher.");
View Full Code Here

Examples of io.airlift.stats.cardinality.HyperLogLog

    @ScalarFunction
    @SqlType(StandardTypes.HYPER_LOG_LOG)
    public static Slice createHll(@SqlType(StandardTypes.BIGINT) long value)
    {
        HyperLogLog hll = HyperLogLog.newInstance(4096);
        hll.add(value);
        return hll.serialize();
    }
View Full Code Here

Examples of io.airlift.stats.cardinality.HyperLogLog

    }

    @Override
    protected void processInput(HyperLogLogState state, Block block, int index)
    {
        HyperLogLog input = HyperLogLog.newInstance(block.getSlice(index));

        HyperLogLog previous = state.getHyperLogLog();
        if (previous == null) {
            state.setHyperLogLog(input);
            state.addMemoryUsage(input.estimatedInMemorySize());
        }
        else {
            state.addMemoryUsage(-previous.estimatedInMemorySize());
            previous.mergeWith(input);
            state.addMemoryUsage(previous.estimatedInMemorySize());
        }
    }
View Full Code Here

Examples of io.airlift.stats.cardinality.HyperLogLog

    }

    @Override
    protected void processInput(HyperLogLogState state, Block block, int index)
    {
        HyperLogLog hll = state.getHyperLogLog();
        if (hll == null) {
            hll = HyperLogLog.newInstance(NUMBER_OF_BUCKETS);
            state.setHyperLogLog(hll);
            state.addMemoryUsage(hll.estimatedInMemorySize());
        }

        state.addMemoryUsage(-hll.estimatedInMemorySize());
        add(block, index, parameterType, hll);
        state.addMemoryUsage(hll.estimatedInMemorySize());
    }
View Full Code Here

Examples of io.airlift.stats.cardinality.HyperLogLog

    }

    @Override
    protected void combineState(HyperLogLogState state, HyperLogLogState otherState)
    {
        HyperLogLog input = otherState.getHyperLogLog();

        HyperLogLog previous = state.getHyperLogLog();
        if (previous == null) {
            state.setHyperLogLog(input);
            state.addMemoryUsage(input.estimatedInMemorySize());
        }
        else {
            state.addMemoryUsage(-previous.estimatedInMemorySize());
            previous.mergeWith(input);
            state.addMemoryUsage(previous.estimatedInMemorySize());
        }
    }
View Full Code Here

Examples of io.airlift.stats.cardinality.HyperLogLog

                // skip null values
                if (!values.isNull() && (masks == null || masks.getBoolean())) {
                    long groupId = groupIdsBlock.getGroupId(position);

                    HyperLogLog hll = estimators.get(groupId);
                    if (hll == null) {
                        hll = HyperLogLog.newInstance(NUMBER_OF_BUCKETS);
                        estimators.set(groupId, hll);
                        sizeOfValues += hll.estimatedInMemorySize();
                    }

                    sizeOfValues -= hll.estimatedInMemorySize();
                    add(values, parameterType, hll);
                    sizeOfValues += hll.estimatedInMemorySize();
                }
            }
            checkState(!values.advanceNextPosition(), "group id and value blocks have different number of entries");
        }
View Full Code Here

Examples of io.airlift.stats.cardinality.HyperLogLog

                // skip null values
                if (!intermediates.isNull()) {
                    long groupId = groupIdsBlock.getGroupId(position);

                    HyperLogLog input = HyperLogLog.newInstance(intermediates.getSlice());

                    HyperLogLog previous = estimators.get(groupId);
                    if (previous == null) {
                        estimators.set(groupId, input);
                        sizeOfValues += input.estimatedInMemorySize();
                    }
                    else {
                        sizeOfValues -= previous.estimatedInMemorySize();
                        previous.mergeWith(input);
                        sizeOfValues += previous.estimatedInMemorySize();
                    }
                }
            }
            checkState(!intermediates.advanceNextPosition());
        }
View Full Code Here

Examples of io.airlift.stats.cardinality.HyperLogLog

        }

        @Override
        public void evaluateFinal(int groupId, BlockBuilder output)
        {
            HyperLogLog estimator = estimators.get(groupId);
            if (estimator == null) {
                output.appendNull();
            }
            else {
                output.appendSlice(estimator.serialize());
            }
        }
View Full Code Here

Examples of io.airlift.stats.cardinality.HyperLogLog

            BlockCursor intermediates = block.cursor();

            for (int position = 0; position < block.getPositionCount(); position++) {
                checkState(intermediates.advanceNextPosition());
                if (!intermediates.isNull()) {
                    HyperLogLog instance = HyperLogLog.newInstance(intermediates.getSlice());

                    if (estimator == null) {
                        estimator = instance;
                    }
                    else {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.