Package com.cloudera.flume.handlers.debug

Source Code of com.cloudera.flume.handlers.debug.BloomCheckDecorator

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.cloudera.flume.handlers.debug;

import java.io.IOException;
import java.util.Arrays;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.FlumeSpecException;
import com.cloudera.flume.conf.SinkFactory.SinkDecoBuilder;
import com.cloudera.flume.core.Attributes;
import com.cloudera.flume.core.CompositeSink;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.core.EventSinkDecorator;
import com.cloudera.flume.reporter.ReportEvent;
import com.cloudera.util.bloom.BloomSet;
import com.google.common.base.Preconditions;

/**
* BloomCheckDeco records checksums of the body of all the events received in a
* set represented as a bloom filter. This is intended to be downstream from a
* BloomGeneratorDeco. The generator may eventually send a specially tagged
* event with a with bloom filter's backing data. The generator's bloom filter
* is then compared against the checker's. Ideally the incoming set is equal to
* or a subset of this checker's bloom set.
*
* Some metrics are reported by the decorator's getReport method.
*
* Initially the decorator keeps track of the bloom inclusion state. It starts
* as UNKNOWN. If a generator bloom set is a subset, the decorator takes on the
* SUCCESS state. If at any time a bloom set fails the containment check, the
* decorator enters and gets stuck in FAIL mode until it is closed and opened.
* The number of successful and failed bloom checks are also recorded.
*/
public class BloomCheckDecorator extends EventSinkDecorator<EventSink> {
  public static final Logger LOG = LoggerFactory
      .getLogger(BloomCheckDecorator.class);
  protected BloomSet bloom;
  final int size; // size of bloom bit array in bytes
  final int hashes; // number of hashes per insertion/membership test
  final EventSink reportSink;

  public final static String A_SUCCESS = "bloomCheckSuccesses";
  public final static String A_FAILS = "bloomCheckFails";
  public final static String A_STATE = "bloomCheckState";

  public static enum BloomCheckState {
    UNKNOWN, // unknown means no bloom check msg received,
    SUCCESS, // all received bloom checks are ok and at least one bloock check
    // msg received.
    FAIL, // at least one received bloom check failed.
  };

  // metrics
  BloomCheckState state = BloomCheckState.UNKNOWN;
  int successCount = 0;
  int failCount = 0;

  /**
   * This checker must have the same size and # hash as the upstream
   * BloomGeneratorDeco.
   */
  public BloomCheckDecorator(EventSink s, int size, int hashes, EventSink rpt) {
    super(s);
    this.size = size;
    this.hashes = hashes;
    this.reportSink = rpt;
  }

  public BloomCheckDecorator(EventSink s, int size, int hashes) {
    super(s);
    this.size = size;
    this.hashes = hashes;
    this.reportSink = new NullSink();
  }

  /**
   * The default sink is null and must be set to be used.
   *
   * This checker must have the same size and # hash as the upstream
   * BloomGeneratorDeco.
   */
  public BloomCheckDecorator(int size, int hashes) {
    this(null, size, hashes);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  synchronized public void open() throws IOException, InterruptedException {
    bloom = new BloomSet(size, hashes);
    state = BloomCheckState.UNKNOWN;
    successCount = 0;
    failCount = 0;
    super.open();
    reportSink.open();
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void append(Event e) throws IOException, InterruptedException {
    byte[] data = e.get(BloomGeneratorDeco.A_BLOOMSETDATA);
    // if has BloomSet Data is present
    if (data != null) {
      // process and then drop the event.
      BloomSet subset = new BloomSet(data);
      boolean contained = bloom.contains(subset);

      if (LOG.isDebugEnabled()) {
        LOG.debug("received bloom set: " + Arrays.toString(subset.getBytes()));
        LOG.debug("local bloom set:    " + Arrays.toString(bloom.getBytes()));
      }

      synchronized (this) {
        switch (state) {
        case UNKNOWN:
        case SUCCESS:
          state = contained ? BloomCheckState.SUCCESS : BloomCheckState.FAIL;
          break;

        case FAIL:
        default:
          state = BloomCheckState.FAIL;
          LOG
              .info("received bloom set was not contained by local set! entering FAILed state");
        }

        if (contained) {
          successCount++;
        } else {
          failCount++;
        }
      }

      ReportEvent rpt = getMetrics();
      LOG.info(rpt.toText());
      reportSink.append(e);
      // record info but do not pass the message on.
      return;
    }

    // track the message.
    BloomGeneratorDeco.includeEvent(bloom, e);
    // and then just send the data downstream
    super.append(e);
  }

  /**
   * {@inheritDoc}
   */
  @Override
  synchronized public void close() throws IOException, InterruptedException {
    reportSink.close();
  }

  /**
   * {@inheritDoc}
   */
  @Override
  synchronized public ReportEvent getMetrics() {
    ReportEvent evt = super.getMetrics();
    evt.set(A_STATE, state.toString().getBytes());
    Attributes.setInt(evt, A_SUCCESS, successCount);
    Attributes.setInt(evt, A_FAILS, failCount);
    return evt;
  }

  /**
   * Builds a BloomCheckDeco with optional specified number of bits and number
   * of hash functions.
   */
  public static SinkDecoBuilder builder() {
    return new SinkDecoBuilder() {
      @Override
      public EventSinkDecorator<EventSink> build(Context ctx, String... argv) {
        Preconditions.checkArgument(argv.length <= 3,
            "usage: bloomGen[(sz[,hashes[,rptSink]])]");
        int sz = 100000000; // default: 100M bits => 12.5MB
        int hashes = 2; // default: # of hashes per insert/lookup
        if (argv.length >= 1) {
          sz = Integer.parseInt(argv[0]);
        }
        if (argv.length >= 2) {
          hashes = Integer.parseInt(argv[1]);
        }
        EventSink rptSink = new NullSink();
        if (argv.length >= 3) {
          String rptSpec = argv[2];
          try {
            rptSink = new CompositeSink(ctx, rptSpec);
          } catch (FlumeSpecException e) {
            LOG.debug("failed to parse rpt spec", e);
            throw new IllegalArgumentException(e.getMessage());
          }
        }

        return new BloomCheckDecorator(null, sz, hashes, rptSink);
      }
    };
  }
}
TOP

Related Classes of com.cloudera.flume.handlers.debug.BloomCheckDecorator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.