Package com.facebook.LinkBench

Source Code of com.facebook.LinkBench.LinkBenchLoad

/*
* Copyright 2012, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.LinkBench;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import com.facebook.LinkBench.distributions.ID2Chooser;
import com.facebook.LinkBench.distributions.LogNormalDistribution;
import com.facebook.LinkBench.generators.DataGenerator;
import com.facebook.LinkBench.stats.LatencyStats;
import com.facebook.LinkBench.stats.SampledStats;
import com.facebook.LinkBench.util.ClassLoadUtil;


/*
* Multi-threaded loader for loading graph edges (but not nodes) into
* LinkStore. The range from startid1 to maxid1 is chunked up into equal sized
* disjoint ranges.  These are then enqueued for processing by a number
* of loader threads to be loaded in parallel. The #links generated for
* an id1 is based on the configured distribution.  The # of link types,
* and link payload data is also controlled by the configuration file.
*  The actual counts of #links generated is tracked in nlinks_counts.
*/

public class LinkBenchLoad implements Runnable {

  private final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER);

  private long maxid1;   // max id1 to generate
  private long startid1; // id1 at which to start
  private int  loaderID; // ID for this loader
  private LinkStore store;// store interface (several possible implementations
                          // like mysql, hbase etc)

  private final LogNormalDistribution linkDataSize;
  private final DataGenerator linkDataGen; // Generate link data
  private SampledStats stats;
  private LatencyStats latencyStats;

  Level debuglevel;
  String dbid;

  private ID2Chooser id2chooser;

  // Counters for load statistics
  long sameShuffle;
  long diffShuffle;
  long linksloaded;

  /**
   * special case for single hot row benchmark. If singleAssoc is set,
   * then make this method not print any statistics message, all statistics
   * are collected at a higher layer. */
  boolean singleAssoc;

  private BlockingQueue<LoadChunk> chunk_q;

  // Track last time stats were updated in ms
  private long lastDisplayTime;
  // How often stats should be reported
  private final long displayFreq_ms;

  private LoadProgress prog_tracker;

  private Properties props;

  /**
   * Convenience constructor
   * @param store2
   * @param props
   * @param latencyStats
   * @param loaderID
   * @param nloaders
   */
  public LinkBenchLoad(LinkStore store, Properties props,
      LatencyStats latencyStats, PrintStream csvStreamOut,
      int loaderID, boolean singleAssoc,
      int nloaders, LoadProgress prog_tracker, Random rng) {
    this(store, props, latencyStats, csvStreamOut, loaderID, singleAssoc,
              new ArrayBlockingQueue<LoadChunk>(2), prog_tracker);

    // Just add a single chunk to the queue
    chunk_q.add(new LoadChunk(loaderID, startid1, maxid1, rng));
    chunk_q.add(LoadChunk.SHUTDOWN);
  }


  public LinkBenchLoad(LinkStore linkStore,
                       Properties props,
                       LatencyStats latencyStats,
                       PrintStream csvStreamOut,
                       int loaderID,
                       boolean singleAssoc,
                       BlockingQueue<LoadChunk> chunk_q,
                       LoadProgress prog_tracker) throws LinkBenchConfigError {
    /*
     * Initialize fields from arguments
     */
    this.store = linkStore;
    this.props = props;
    this.latencyStats = latencyStats;
    this.loaderID = loaderID;
    this.singleAssoc = singleAssoc;
    this.chunk_q = chunk_q;
    this.prog_tracker = prog_tracker;


    /*
     * Load settings from properties
     */
    maxid1 = ConfigUtil.getLong(props, Config.MAX_ID);
    startid1 = ConfigUtil.getLong(props, Config.MIN_ID);

    // math functions may cause problems for id1 = 0. Start at 1.
    if (startid1 <= 0) {
      throw new LinkBenchConfigError("startid1 must be >= 1");
    }

    debuglevel = ConfigUtil.getDebugLevel(props);

    double medianLinkDataSize = ConfigUtil.getDouble(props,
                                              Config.LINK_DATASIZE);
    linkDataSize = new LogNormalDistribution();
    linkDataSize.init(0, LinkStore.MAX_LINK_DATA, medianLinkDataSize,
                                         Config.LINK_DATASIZE_SIGMA);

    try {
      linkDataGen = ClassLoadUtil.newInstance(
          ConfigUtil.getPropertyRequired(props, Config.LINK_ADD_DATAGEN),
          DataGenerator.class);
      linkDataGen.init(props, Config.LINK_ADD_DATAGEN_PREFIX);
    } catch (ClassNotFoundException ex) {
      logger.error(ex);
      throw new LinkBenchConfigError("Error loading data generator class: "
            + ex.getMessage());
    }

    displayFreq_ms = ConfigUtil.getLong(props, Config.DISPLAY_FREQ) * 1000;
    int maxsamples = ConfigUtil.getInt(props, Config.MAX_STAT_SAMPLES);

    dbid = ConfigUtil.getPropertyRequired(props, Config.DBID);

    /*
     * Initialize statistics
     */
    linksloaded = 0;
    sameShuffle = 0;
    diffShuffle = 0;
    stats = new SampledStats(loaderID, maxsamples, csvStreamOut);

    id2chooser = new ID2Chooser(props, startid1, maxid1, 1, 1);
  }

  public long getLinksLoaded() {
    return linksloaded;
  }

  @Override
  public void run() {
    try {
      this.store.initialize(props, Phase.LOAD, loaderID);
    } catch (Exception e) {
      logger.error("Error while initializing store", e);
      throw new RuntimeException(e);
    }

    int bulkLoadBatchSize = store.bulkLoadBatchSize();
    boolean bulkLoad = bulkLoadBatchSize > 0;
    ArrayList<Link> loadBuffer = null;
    ArrayList<LinkCount> countLoadBuffer = null;
    if (bulkLoad) {
      loadBuffer = new ArrayList<Link>(bulkLoadBatchSize);
      countLoadBuffer = new ArrayList<LinkCount>(bulkLoadBatchSize);
    }

    logger.info("Starting loader thread  #" + loaderID + " loading links");
    lastDisplayTime = System.currentTimeMillis();

    while (true) {
      LoadChunk chunk;
      try {
        chunk = chunk_q.take();
        //logger.info("chunk end="+chunk.end);
      } catch (InterruptedException ie) {
        logger.warn("InterruptedException not expected, try again", ie);
        continue;
      }

      // Shutdown signal is received though special chunk type
      if (chunk.shutdown) {
        break;
      }

      // Load the link range specified in the chunk
      processChunk(chunk, bulkLoad, bulkLoadBatchSize,
                    loadBuffer, countLoadBuffer);
    }

    if (bulkLoad) {
      // Load any remaining links or counts
      loadLinks(loadBuffer);
      loadCounts(countLoadBuffer);
    }

    if (!singleAssoc) {
      logger.debug(" Same shuffle = " + sameShuffle +
                         " Different shuffle = " + diffShuffle);
      displayStats(lastDisplayTime, bulkLoad);
    }

    store.close();
  }


  private void displayStats(long startTime, boolean bulkLoad) {
    long endTime = System.currentTimeMillis();
    if (bulkLoad) {
      stats.displayStats(startTime, endTime,
          Arrays.asList(LinkBenchOp.LOAD_LINKS_BULK,
          LinkBenchOp.LOAD_COUNTS_BULK, LinkBenchOp.LOAD_LINKS_BULK_NLINKS,
          LinkBenchOp.LOAD_COUNTS_BULK_NLINKS));
    } else {
      stats.displayStats(startTime, endTime,
                         Arrays.asList(LinkBenchOp.LOAD_LINK));
    }
  }

  private void processChunk(LoadChunk chunk, boolean bulkLoad,
      int bulkLoadBatchSize, ArrayList<Link> loadBuffer,
      ArrayList<LinkCount> countLoadBuffer) {
    if (Level.DEBUG.isGreaterOrEqual(debuglevel)) {
      logger.debug("Loader thread  #" + loaderID + " processing "
                  + chunk.toString());
    }

    // Counter for total number of links loaded in chunk;
    long links_in_chunk = 0;

    Link link = null;
    if (!bulkLoad) {
      // When bulk-loading, need to have multiple link objects at a time
      // otherwise reuse object
      link = initLink();
    }

    long prevPercentPrinted = 0;
    for (long id1 = chunk.start; id1 < chunk.end; id1 += chunk.step) {
      long added_links= createOutLinks(chunk.rng, link, loadBuffer, countLoadBuffer,
          id1, singleAssoc, bulkLoad, bulkLoadBatchSize);
      links_in_chunk += added_links;

      if (!singleAssoc) {
        long nloaded = (id1 - chunk.start) / chunk.step;
        if (bulkLoad) {
          nloaded -= loadBuffer.size();
        }
        long percent = 100 * nloaded/(chunk.size);
        if ((percent % 10 == 0) && (percent > prevPercentPrinted)) {
          logger.debug(chunk.toString() ": Percent done = " + percent);
          prevPercentPrinted = percent;
        }
      }

      // Check if stats should be flushed and reset
      long now = System.currentTimeMillis();
      if (lastDisplayTime + displayFreq_ms <= now) {
        displayStats(lastDisplayTime, bulkLoad);
        stats.resetSamples();
        lastDisplayTime = now;
      }
    }

    // Update progress and maybe print message
    prog_tracker.update(chunk.size, links_in_chunk);
  }

  /**
   * Create the out links for a given id1
   * @param link
   * @param loadBuffer
   * @param id1
   * @param singleAssoc
   * @param bulkLoad
   * @param bulkLoadBatchSize
   * @return total number of links added
   */
  private long createOutLinks(Random rng,
      Link link, ArrayList<Link> loadBuffer,
      ArrayList<LinkCount> countLoadBuffer,
      long id1, boolean singleAssoc, boolean bulkLoad,
      int bulkLoadBatchSize) {
    Map<Long, LinkCount> linkTypeCounts = null;
    if (bulkLoad) {
      linkTypeCounts = new HashMap<Long, LinkCount>();
    }
    long nlinks_total = 0;

    for (long link_type: id2chooser.getLinkTypes()) {
      long nlinks = id2chooser.calcLinkCount(id1, link_type);
      nlinks_total += nlinks;
      if (id2chooser.sameShuffle) {
        sameShuffle++;
      } else {
        diffShuffle++;
      }

      if (Level.TRACE.isGreaterOrEqual(debuglevel)) {
        logger.trace("id1 = " + id1 + " link_type = " + link_type +
                           " nlinks = " + nlinks);
      }
      for (long j = 0; j < nlinks; j++) {
        if (bulkLoad) {
          // Can't reuse link object
          link = initLink();
        }
        constructLink(rng, link, id1, link_type, j, singleAssoc);

        if (bulkLoad) {
          loadBuffer.add(link);
          if (loadBuffer.size() >= bulkLoadBatchSize) {
            loadLinks(loadBuffer);
          }

          // Update link counts for this type
          LinkCount count = linkTypeCounts.get(link.link_type);
          if (count == null) {
            count = new LinkCount(id1, link.link_type,
                                  link.time, link.version, 1);
            linkTypeCounts.put(link.link_type, count);
          } else {
            count.count++;
            count.time = Math.max(count.time, link.time);
            count.version = link.version;
          }
        } else {
          loadLink(link, j, nlinks, singleAssoc);
        }
      }

    }

    // Maintain the counts separately
    if (bulkLoad) {
      for (LinkCount count: linkTypeCounts.values()) {
        countLoadBuffer.add(count);
        if (countLoadBuffer.size() >= bulkLoadBatchSize) {
          loadCounts(countLoadBuffer);
        }
      }
    }
    return nlinks_total;
  }

  private Link initLink() {
    Link link = new Link();
    link.link_type = LinkStore.DEFAULT_LINK_TYPE;
    link.visibility = LinkStore.VISIBILITY_DEFAULT;
    link.version = 0;
    link.data = new byte[0];
    link.time = System.currentTimeMillis();
    return link;
  }

  /**
   * Helper method to fill in link data
   * @param link this link is filled in.  Should have been initialized with
   *            initLink() earlier
   * @param outlink_ix the number of this link out of all outlinks from
   *                    id1
   * @param singleAssoc whether we are in singleAssoc mode
   */
  private void constructLink(Random rng, Link link, long id1,
      long link_type, long outlink_ix, boolean singleAssoc) {
    link.id1 = id1;
    link.link_type = link_type;

    // Using random number generator for id2 means we won't know
    // which id2s exist. So link id1 to
    // maxid1 + id1 + 1 thru maxid1 + id1 + nlinks(id1) UNLESS
    // config randomid2max is nonzero.
    if (singleAssoc) {
      link.id2 = 45; // some constant
    } else {
      link.id2 = id2chooser.chooseForLoad(rng, id1, link_type,outlink_ix);
      int datasize = (int)linkDataSize.choose(rng);
      link.data = linkDataGen.fill(rng, new byte[datasize]);
    }

    if (Level.TRACE.isGreaterOrEqual(debuglevel)) {
      logger.trace("id2 chosen is " + link.id2);
    }

    // Randomize time so that id2 and timestamp aren't closely correlated
    link.time = chooseInitialTimestamp(rng);
  }


  private long chooseInitialTimestamp(Random rng) {
    // Choose something from now back to about 50 days
    return (System.currentTimeMillis() - Integer.MAX_VALUE - 1L)
                                        + rng.nextInt();
  }

  /**
   * Load an individual link into the db.
   *
   * If an error occurs during loading, this method will log it,
   *  add stats, and reset the connection.
   * @param link
   * @param outlink_ix
   * @param nlinks
   * @param singleAssoc
   */
  private void loadLink(Link link, long outlink_ix, long nlinks,
      boolean singleAssoc) {
    long timestart = 0;
    if (!singleAssoc) {
      timestart = System.nanoTime();
    }

    try {
      // no inverses for now
      store.addLink(dbid, link, true);
      linksloaded++;

      if (!singleAssoc && outlink_ix == nlinks - 1) {
        long timetaken = (System.nanoTime() - timestart);

        // convert to microseconds
        stats.addStats(LinkBenchOp.LOAD_LINK, timetaken/1000, false);

        latencyStats.recordLatency(loaderID,
                      LinkBenchOp.LOAD_LINK, timetaken);
      }

    } catch (Throwable e){//Catch exception if any
        long endtime2 = System.nanoTime();
        long timetaken2 = (endtime2 - timestart)/1000;
        logger.error("Error: " + e.getMessage(), e);
        stats.addStats(LinkBenchOp.LOAD_LINK, timetaken2, true);
        store.clearErrors(loaderID);
    }
  }

  private void loadLinks(ArrayList<Link> loadBuffer) {
    long timestart = System.nanoTime();
    try {
      // no inverses for now
      int nlinks = loadBuffer.size();
      store.addBulkLinks(dbid, loadBuffer, true);
      linksloaded += nlinks;
      loadBuffer.clear();

      long timetaken = (System.nanoTime() - timestart);

      // convert to microseconds
      stats.addStats(LinkBenchOp.LOAD_LINKS_BULK, timetaken/1000, false);
      stats.addStats(LinkBenchOp.LOAD_LINKS_BULK_NLINKS, nlinks, false);

      latencyStats.recordLatency(loaderID, LinkBenchOp.LOAD_LINKS_BULK,
                                                             timetaken);
    } catch (Throwable e){//Catch exception if any
        long endtime2 = System.nanoTime();
        long timetaken2 = (endtime2 - timestart)/1000;
        logger.error("Error: " + e.getMessage(), e);
        stats.addStats(LinkBenchOp.LOAD_LINKS_BULK, timetaken2, true);
        store.clearErrors(loaderID);
    }
  }

  private void loadCounts(ArrayList<LinkCount> loadBuffer) {
    long timestart = System.nanoTime();

    try {
      // no inverses for now
      int ncounts = loadBuffer.size();
      store.addBulkCounts(dbid, loadBuffer);
      loadBuffer.clear();

      long timetaken = (System.nanoTime() - timestart);

      // convert to microseconds
      stats.addStats(LinkBenchOp.LOAD_COUNTS_BULK, timetaken/1000, false);
      stats.addStats(LinkBenchOp.LOAD_COUNTS_BULK_NLINKS, ncounts, false);

      latencyStats.recordLatency(loaderID, LinkBenchOp.LOAD_COUNTS_BULK,
                                                             timetaken);
    } catch (Throwable e){//Catch exception if any
        long endtime2 = System.nanoTime();
        long timetaken2 = (endtime2 - timestart)/1000;
        logger.error("Error: " + e.getMessage(), e);
        stats.addStats(LinkBenchOp.LOAD_COUNTS_BULK, timetaken2, true);
        store.clearErrors(loaderID);
    }
  }

  /**
   * Represents a portion of the id space, starting with
   * start, going up until end (non-inclusive) with step size
   * step
   *
   */
  public static class LoadChunk {
    public static LoadChunk SHUTDOWN = new LoadChunk(true,
                                              0, 0, 0, 1, null);

    public LoadChunk(long id, long start, long end, Random rng) {
      this(false, id, start, end, 1, rng);
    }
    public LoadChunk(boolean shutdown,
                      long id, long start, long end, long step, Random rng) {
      super();
      this.shutdown = shutdown;
      this.id = id;
      this.start = start;
      this.end = end;
      this.step = step;
      this.size = (end - start) / step;
      this.rng = rng;
    }
    public final boolean shutdown;
    public final long id;
    public final long start;
    public final long end;
    public final long step;
    public final long size;
    public Random rng;

    public String toString() {
      if (shutdown) {
        return "chunk SHUTDOWN";
      }
      String range;
      if (step == 1) {
        range = "[" + start + ":" + end + "]";
      } else {
        range = "[" + start + ":" + step + ":" + end + "]";
      }
      return "chunk " + id + range;
    }
  }
  public static class LoadProgress {
    /** report progress at intervals of progressReportInterval links */
    private final long progressReportInterval;

    public LoadProgress(Logger progressLogger,
                        long id1s_total, long progressReportInterval) {
      super();
      this.progressReportInterval = progressReportInterval;
      this.progressLogger = progressLogger;
      this.id1s_total = id1s_total;
      this.starttime_ms = 0;
      this.id1s_loaded = new AtomicLong();
      this.links_loaded = new AtomicLong();
    }

    public static LoadProgress create(Logger progressLogger, Properties props) {
      long maxid1 = ConfigUtil.getLong(props, Config.MAX_ID);
      long startid1 = ConfigUtil.getLong(props, Config.MIN_ID);
      long nids = maxid1 - startid1;
      long progressReportInterval = ConfigUtil.getLong(props,
                           Config.LOAD_PROG_INTERVAL, 50000L);
      return new LoadProgress(progressLogger, nids, progressReportInterval);
    }

    private final Logger progressLogger;
    private final AtomicLong id1s_loaded; // progress
    private final AtomicLong links_loaded; // progress
    private final long id1s_total; // goal
    private long starttime_ms;

    /** Mark current time as start time for load */
    public void startTimer() {
      starttime_ms = System.currentTimeMillis();
    }

    /**
     * Update progress
     * @param id1_incr number of additional id1s loaded since last call
     * @param links_incr number of links loaded since last call
     */
    public void update(long id1_incr, long links_incr) {
      long curr_id1s = id1s_loaded.addAndGet(id1_incr);

      long curr_links = links_loaded.addAndGet(links_incr);
      long prev_links = curr_links - links_incr;

      if ((curr_links / progressReportInterval) >
          (prev_links / progressReportInterval) || curr_id1s == id1s_total) {
        double percentage = (curr_id1s / (double)id1s_total) * 100.0;

        // Links per second loaded
        long now = System.currentTimeMillis();
        double link_rate = ((curr_links) / ((double) now - starttime_ms))*1000;
        double id1_rate = ((curr_id1s) / ((double) now - starttime_ms))*1000;
        progressLogger.info(String.format(
            "%d/%d id1s loaded (%.1f%% complete) at %.2f id1s/sec avg. " +
            "%d links loaded at %.2f links/sec avg.",
            curr_id1s, id1s_total, percentage, id1_rate,
            curr_links, link_rate));
      }
    }
  }
}
TOP

Related Classes of com.facebook.LinkBench.LinkBenchLoad

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.