Package io.druid.examples.rand

Source Code of io.druid.examples.rand.RandomFirehoseFactory

/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013  Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

package io.druid.examples.rand;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.google.common.collect.Maps;
import com.metamx.common.logger.Logger;
import io.druid.data.input.Firehose;
import io.druid.data.input.FirehoseFactory;
import io.druid.data.input.InputRow;
import io.druid.data.input.MapBasedInputRow;
import io.druid.data.input.impl.InputRowParser;

import java.io.IOException;
import java.util.LinkedList;
import java.util.Map;
import java.util.Random;

import static java.lang.Thread.sleep;

/**
* Random value sequence Firehost Factory named "rand".
* Builds a Firehose that emits a stream of random numbers (outColumn, a positive double)
* with timestamps along with an associated token (target).  This provides a timeseries
* that requires no network access for demonstration, characterization, and testing.
* The generated tuples can be thought of as asynchronously
* produced triples (timestamp, outColumn, target) where the timestamp varies depending on
* speed of processing.
*
* <p>
* InputRows are produced as fast as requested, so this can be used to determine the
* upper rate of ingest if sleepUsec is set to 0; nTokens specifies how many associated
* target labels are used.  Generation is round-robin for nTokens and sleep occurs
* every nPerSleep values generated.  A random number seed can be used by setting the
* firehose parameter "seed" to a non-zero value so that values can be reproducible
* (but note that timestamp is not deterministic because timestamps are obtained at
* the moment an event is delivered.)
* Values are offset by adding the modulus of the token number to the random number
* so that token values have distinct, non-overlapping ranges.
* </p>
*
* Example spec file:
* <pre>
* [{
* "schema" : { "dataSource":"randseq",
* "aggregators":[ {"type":"count", "name":"events"},
* {"type":"doubleSum","name":"outColumn","fieldName":"inColumn"} ],
* "indexGranularity":"minute",
* "shardSpec" : { "type": "none" } },
* "config" : { "maxRowsInMemory" : 50000,
* "intermediatePersistPeriod" : "PT2m" },
*
* "firehose" : { "type" : "rand",
* "sleepUsec": 100000,
* "maxGeneratedRows" : 5000000,
* "seed" : 0,
* "nTokens" : 19,
* "nPerSleep" : 3
* },
*
* "plumber" : { "type" : "realtime",
* "windowPeriod" : "PT5m",
* "segmentGranularity":"hour",
* "basePersistDirectory" : "/tmp/realtime/basePersist" }
* }]
* </pre>
*
* Example query using POST to /druid/v2/  (where UTC date and time MUST include the current hour):
* <pre>
* {
* "queryType": "groupBy",
* "dataSource": "randSeq",
* "granularity": "all",
* "dimensions": [],
* "aggregations":[
* { "type": "count", "name": "rows"},
* { "type": "doubleSum", "fieldName": "events", "name": "e"},
* { "type": "doubleSum", "fieldName": "outColumn", "name": "randomNumberSum"}
* ],
* "postAggregations":[
* {  "type":"arithmetic",
* "name":"avg_random",
* "fn":"/",
* "fields":[ {"type":"fieldAccess","name":"randomNumberSum","fieldName":"randomNumberSum"},
* {"type":"fieldAccess","name":"rows","fieldName":"rows"} ]}
* ],
* "intervals":["2012-10-01T00:00/2020-01-01T00"]
* }
* </pre>
*/
@JsonTypeName("rand")
public class RandomFirehoseFactory implements FirehoseFactory<InputRowParser>
{
  private static final Logger log = new Logger(RandomFirehoseFactory.class);
  /**
   * msec to sleep before generating a new row; if this and delayNsec are 0, then go as fast as possible.
   * json param sleepUsec (microseconds) is used to initialize this.
   */
  private final long delayMsec;
  /**
   * nsec to sleep before generating a new row; if this and delayMsec are 0, then go as fast as possible.
   * json param sleepUsec (microseconds) is used to initialize this.
   */
  private final int delayNsec;
  /**
   * max rows to generate, -1 is infinite, 0 means nothing is generated; use this to prevent
   * infinite space consumption or to see what happens when a Firehose stops delivering
   * values, or to have hasMore() return false.
   */
  private final long maxGeneratedRows;
  /**
   * seed for random number generator; if 0, then no seed is used.
   */
  private final long seed;
  /**
   * number of tokens to randomly associate with values (no heap limits). This can be used to
   * stress test the number of tokens.
   */
  private final int nTokens;
  /**
   * Number of token events per sleep interval.
   */
  private final int nPerSleep;

  @JsonCreator
  public RandomFirehoseFactory(
      @JsonProperty("sleepUsec") Long sleepUsec,
      @JsonProperty("maxGeneratedRows") Long maxGeneratedRows,
      @JsonProperty("seed") Long seed,
      @JsonProperty("nTokens") Integer nTokens,
      @JsonProperty("nPerSleep") Integer nPerSleep
  )
  {
    long nsec = (sleepUsec > 0) ? sleepUsec * 1000L : 0;
    long msec = nsec / 1000000L;
    this.delayMsec = msec;
    this.delayNsec = (int) (nsec - (msec * 1000000L));
    this.maxGeneratedRows = maxGeneratedRows;
    this.seed = seed;
    this.nTokens = nTokens;
    this.nPerSleep = nPerSleep;
    if (nTokens <= 0) {
      log.warn("nTokens parameter " + nTokens + " ignored; must be greater than or equal to 1");
      nTokens = 1;
    }
    if (nPerSleep <= 0) {
      log.warn("nPerSleep parameter " + nPerSleep + " ignored; must be greater than or equal to 1");
      nPerSleep = 1;
    }
    log.info("maxGeneratedRows=" + maxGeneratedRows);
    log.info("seed=" + ((seed == 0L) ? "random value" : seed));
    log.info("nTokens=" + nTokens);
    log.info("nPerSleep=" + nPerSleep);
    double dmsec = (double) delayMsec + ((double) this.delayNsec) / 1000000.;
    if (dmsec > 0.0) {
      log.info("sleep period=" + dmsec + "msec");
      log.info(
          "approximate max rate of record generation=" + (nPerSleep * 1000. / dmsec) + "/sec" +
          "  or  " + (60. * nPerSleep * 1000. / dmsec) + "/minute"
      );
    } else {
      log.info("sleep period= NONE");
      log.info("approximate max rate of record generation= as fast as possible");
    }
  }

  @Override
  public Firehose connect(InputRowParser parser) throws IOException
  {
    final LinkedList<String> dimensions = new LinkedList<String>();
    dimensions.add("inColumn");
    dimensions.add("target");

    return new Firehose()
    {
      private final java.util.Random rand = (seed == 0L) ? new Random() : new Random(seed);

      private long rowCount = 0L;
      private boolean waitIfmaxGeneratedRows = true;

      @Override
      public boolean hasMore()
      {
        if (maxGeneratedRows >= 0 && rowCount >= maxGeneratedRows) {
          return waitIfmaxGeneratedRows;
        } else {
          return true; // there are always more random numbers
        }
      }

      @Override
      public InputRow nextRow()
      {
        final long modulus = rowCount % nPerSleep;
        final long nth = (rowCount % nTokens) + 1;
        long sleepMsec = delayMsec;
        // all done?
        if (maxGeneratedRows >= 0 && rowCount >= maxGeneratedRows && waitIfmaxGeneratedRows) {
          // sleep a long time instead of terminating
          sleepMsec = 2000000000L;
        }
        if (sleepMsec > 0L || delayNsec > 0) {
          try {
            if (modulus == 0) {
              sleep(sleepMsec, delayNsec);
            }
          }
          catch (InterruptedException e) {
            throw new RuntimeException("InterruptedException");
          }
        }
        if (++rowCount % 1000 == 0) {
          log.info("%,d events created.", rowCount);
        }

        final Map<String, Object> theMap = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
        theMap.put("inColumn", anotherRand((int) nth));
        theMap.put("target", ("a" + nth));
        return new MapBasedInputRow(System.currentTimeMillis(), dimensions, theMap);
      }

      private Float anotherRand(int scale)
      {
        double f = rand.nextDouble(); // [0.0,1.0]
        return new Float(f + (double) scale);
      }

      @Override
      public Runnable commit()
      {
        // Do nothing.
        return new Runnable()
        {
          @Override
          public void run()
          {

          }
        };
      }

      @Override
      public void close() throws IOException
      {
        // do nothing
      }
    };
  }

  @Override
  public InputRowParser getParser()
  {
    return null;
  }
}
TOP

Related Classes of io.druid.examples.rand.RandomFirehoseFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.