/***************************************************************************
* Copyright (C) 2010 by H-Store Project *
* Brown University *
* Massachusetts Institute of Technology *
* Yale University *
* *
* Permission is hereby granted, free of charge, to any person obtaining *
* a copy of this software and associated documentation files (the *
* "Software"), to deal in the Software without restriction, including *
* without limitation the rights to use, copy, modify, merge, publish, *
* distribute, sublicense, and/or sell copies of the Software, and to *
* permit persons to whom the Software is furnished to do so, subject to *
* the following conditions: *
* *
* The above copyright notice and this permission notice shall be *
* included in all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, *
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF *
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR *
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
* OTHER DEALINGS IN THE SOFTWARE. *
***************************************************************************/
package edu.brown.benchmark.markov;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.log4j.Logger;
import org.voltdb.VoltTable;
import edu.brown.api.BenchmarkComponent;
import edu.brown.hstore.conf.HStoreConf;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.rand.AbstractRandomGenerator;
import edu.brown.rand.RandomDistribution;
import edu.brown.rand.WrappingRandomDistribution;
import edu.brown.statistics.ObjectHistogram;
public class MarkovLoader extends BenchmarkComponent {
private static final Logger LOG = Logger.getLogger(MarkovLoader.class);
private final static LoggerBoolean debug = new LoggerBoolean();
private final static LoggerBoolean trace = new LoggerBoolean();
static {
LoggerUtil.attachObserver(LOG, debug, trace);
}
// Composite Id
private static final long COMPOSITE_ID_MASK = 4294967295l; // (2^32)-1
private static final int COMPOSITE_ID_OFFSET = 32;
// scale all table cardinalities by this factor
private double m_scalefactor = 1;
// used internally
private final AbstractRandomGenerator m_rng;
// Histograms
// TableName -> Histogram for A_ID
private final Map<String, ObjectHistogram<Long>> histograms = new HashMap<String, ObjectHistogram<Long>>();
// Data Generator Classes
// TableName -> AbstactTableGenerator
private final Map<String, AbstractTableGenerator> generators = new HashMap<String, AbstractTableGenerator>();
// Table Sizes
// TableName -> Tuple Count
private final Map<String, AtomicLong> table_sizes = new HashMap<String, AtomicLong>();
public static void main(String args[]) throws Exception {
edu.brown.api.BenchmarkComponent.main(MarkovLoader.class, args, true);
}
/**
* Constructor
*
* @param args
*/
public MarkovLoader(String[] args) {
super(args);
int seed = 0;
String randGenClassName = RandomGenerator.class.getName();
String randGenProfilePath = null;
double scaleFactor = HStoreConf.singleton().client.scalefactor;
for (String key : m_extraParams.keySet()) {
String value = m_extraParams.get(key);
// Scale Factor
if (key.equalsIgnoreCase("CLIENT.SCALEFACTOR")) { // FIXME
scaleFactor = Double.parseDouble(value);
}
} // FOR
m_scalefactor = scaleFactor;
AbstractRandomGenerator rng = null;
try {
rng = AbstractRandomGenerator.factory(randGenClassName, seed);
if (randGenProfilePath != null)
rng.loadProfile(randGenProfilePath);
} catch (Exception ex) {
ex.printStackTrace();
System.exit(1);
}
m_rng = rng;
// Histograms + Table Sizes + Generators
for (String tableName : MarkovConstants.TABLENAMES) {
this.histograms.put(tableName, new ObjectHistogram<Long>());
this.table_sizes.put(tableName, new AtomicLong(0l));
if (tableName.equals(MarkovConstants.TABLENAME_TABLEA)) {
this.generators.put(tableName, new TABLEAGenerator());
} else if (tableName.equals(MarkovConstants.TABLENAME_TABLEB)) {
this.generators.put(tableName, new TABLEBGenerator());
} else if (tableName.equals(MarkovConstants.TABLENAME_TABLEC)) {
this.generators.put(tableName, new TABLECGenerator());
} else if (tableName.equals(MarkovConstants.TABLENAME_TABLED)) {
this.generators.put(tableName, new TABLEDGenerator());
}
} // FOR
}
@Override
public String[] getTransactionDisplayNames() {
return new String[] {};
}
/**
* Main execution loop for invoking all the data generator threads
*/
@Override
public void runLoop() {
List<Thread> load_threads = new ArrayList<Thread>();
for (final String tableName : MarkovConstants.TABLENAMES) {
load_threads.add(new Thread() {
@Override
public void run() {
generateTableData(tableName);
}
});
} // FOR
try {
for (Thread thread : load_threads) {
thread.start();
thread.join();
}
for (Thread thread : load_threads)
thread.join();
} catch (InterruptedException e) {
e.printStackTrace();
System.exit(-1);
}
LOG.info("Finished generating data for all tables");
}
/**
* Load the tuples for the given table name
*
* @param tableName
*/
protected void generateTableData(String tableName) {
LOG.info("Starting data generator for '" + tableName + "'");
final AbstractTableGenerator generator = this.generators.get(tableName);
assert (generator != null);
long tableSize = generator.getTableSize();
long batchSize = generator.getBatchSize();
VoltTable table = generator.getVoltTable();
LOG.info("Loading " + tableSize + " tuples for table '" + tableName + "'");
while (generator.hasMore()) {
generator.addRow();
if (table.getRowCount() >= batchSize) {
if (debug.val)
LOG.debug(String.format(tableName + ": loading %d rows (id %d of %d)", table.getRowCount(), generator.getCount(), tableSize));
loadTable(tableName, table);
table.clearRowData();
}
this.table_sizes.get(tableName).incrementAndGet();
} // WHILE
if (table.getRowCount() > 0) {
if (debug.val)
LOG.debug(tableName + ": loading final " + table.getRowCount() + " rows.");
loadTable(tableName, table);
table.clearRowData();
}
LOG.info(tableName + ": Inserted " + this.table_sizes.get(tableName) + " tuples");
}
protected static Long encodeCompositeId(long a_id, long id) {
return (a_id | id << COMPOSITE_ID_OFFSET);
}
/**
* Returns the pieces of a composite id The first element of the returned
* array will be the A_ID portion of the composite and the second element
* will be the ID portion
*
* @param composite_id
* @return
*/
protected static long[] decodeCompositeId(long composite_id) {
long values[] = { composite_id & COMPOSITE_ID_MASK, composite_id >> COMPOSITE_ID_OFFSET };
return (values);
}
// ----------------------------------------------------------------
// DATA GENERATION
// ----------------------------------------------------------------
protected abstract class AbstractTableGenerator {
protected final String tableName;
protected final VoltTable table;
protected final ObjectHistogram<Long> hist;
protected Long tableSize;
protected Long batchSize;
protected final Object[] row;
protected long count = 0;
public AbstractTableGenerator(String tableName, VoltTable table) {
this.tableName = tableName;
this.table = table;
this.hist = MarkovLoader.this.histograms.get(this.tableName);
assert (hist != null);
this.row = new Object[this.table.getColumnCount()];
// Initialize dynamic parameters
try {
String field_name = "TABLESIZE_" + tableName;
Field field_handle = MarkovConstants.class.getField(field_name);
assert (field_handle != null);
this.tableSize = Math.round((Long) field_handle.get(null) * MarkovLoader.this.m_scalefactor);
field_name = "BATCHSIZE_" + tableName;
field_handle = MarkovConstants.class.getField(field_name);
assert (field_handle != null);
this.batchSize = (Long) field_handle.get(null);
} catch (Exception ex) {
LOG.error(ex);
System.exit(1);
}
LOG.info("Preparing to load " + this.tableSize + " tuples for '" + this.tableName + "' [batchSize=" + this.batchSize + "]");
}
public boolean hasMore() {
return (this.count < this.tableSize);
}
public VoltTable getVoltTable() {
return this.table;
}
public Long getTableSize() {
return this.tableSize;
}
public Long getBatchSize() {
return this.batchSize;
}
public String getTableName() {
return this.tableName;
}
public long getCount() {
return this.count;
}
public void addRow() {
this.populateRow();
this.count++;
this.table.addRow(this.row);
}
protected abstract void populateRow();
} // END CLASS
/**
* TABLEA Generator
*/
protected class TABLEAGenerator extends AbstractTableGenerator {
public TABLEAGenerator() {
super(MarkovConstants.TABLENAME_TABLEA, MarkovTables.initializeTableA());
}
@Override
protected void populateRow() {
int col = 0;
// A_ID
row[col++] = new Integer((int) this.count);
// A_SATTR##
for (int j = 0; j < 20; ++j) {
row[col++] = m_rng.astring(6, 32);
}
// A_IATTR##
for (int j = 0; j < 20; ++j) {
row[col++] = m_rng.number(0, 1 << 30);
}
assert (col == this.table.getColumnCount());
// LOG.info(this.tableName + "[" + this.count + "]: " +
// Arrays.toString(row));
}
} // END CLASS
/**
* TABLEB Generator
*/
protected class TABLEBGenerator extends AbstractTableGenerator {
private RandomDistribution.DiscreteRNG rands[];
public TABLEBGenerator() {
super(MarkovConstants.TABLENAME_TABLEB, MarkovTables.initializeTableB());
// TABLEB has a Zipfian distribution on B_A_ID
// We alternate between a curve starting at zero and a curve
// starting at the middle of A_ID
// It's kind of lame but it's something for now
long num_a_records = Math.round(MarkovConstants.TABLESIZE_TABLEA * m_scalefactor);
RandomDistribution.Zipf zipf = new RandomDistribution.Zipf(m_rng, 0, (int) num_a_records, 1.001d);
this.rands = new WrappingRandomDistribution[] { new WrappingRandomDistribution(zipf, 0), new WrappingRandomDistribution(zipf, (int) (num_a_records / 2.0)), };
}
@Override
protected void populateRow() {
int col = 0;
// Use a composite id containing both the A_ID and the index of
// this B_ID record for said A_ID: <B_ID><A_ID>
long a_id = new Long(rands[(int) this.count % 2].nextInt());
hist.put(a_id);
long b_id = encodeCompositeId(a_id, hist.get(a_id)); // - 1);
// B_ID
row[col++] = b_id;
// B_A_ID
row[col++] = a_id;
// B_SATTR##
for (int j = 0; j < 16; ++j) {
row[col++] = m_rng.astring(6, 32);
}
// B_IATTR##
for (int j = 0; j < 16; ++j) {
row[col++] = m_rng.number(0, 1 << 30);
}
assert (col == this.table.getColumnCount());
// LOG.info(this.tableName + "[" + this.count + "]: " +
// Arrays.toString(row));
// for (int i = 0; i < col; i++) {
// LOG.info(" [" + i + "]: " + row[i].toString() + " (len=" +
// row[i].toString().length() + ")");
// }
// LOG.info(StringUtil.SINGLE_LINE);
}
} // END CLASS
/**
* TABLEC Generator
*/
protected class TABLECGenerator extends AbstractTableGenerator {
private RandomDistribution.DiscreteRNG rand;
public TABLECGenerator() {
super(MarkovConstants.TABLENAME_TABLEC, MarkovTables.initializeTableC());
// TABLEC has a uniform distribution on C_A_ID
long num_a_records = Math.round(MarkovConstants.TABLESIZE_TABLEA * m_scalefactor);
this.rand = new RandomDistribution.Flat(m_rng, 0, (int) num_a_records);
}
@Override
protected void populateRow() {
int col = 0;
// Use a composite id containing both the A_ID and the index of
// this C_ID record for said A_ID: <C_ID><A_ID>
long a_id = new Long(rand.nextInt());
hist.put(a_id);
// C_ID
row[col++] = encodeCompositeId(a_id, hist.get(a_id)); // - 1);
// C_A_ID
row[col++] = a_id;
// C_SATTR##
for (int j = 0; j < 16; ++j) {
row[col++] = m_rng.astring(32, 128);
}
// C_IATTR##
for (int j = 0; j < 16; ++j) {
row[col++] = m_rng.number(0, 1 << 30);
}
assert (col == this.table.getColumnCount());
// LOG.info(this.tableName + "[" + this.count + "]: " +
// Arrays.toString(row));
}
} // END CLASS
/**
* TABLED Generator
*/
protected class TABLEDGenerator extends AbstractTableGenerator {
private final long num_a_records;
private final long num_b_records;
private final long num_c_records;
public TABLEDGenerator() {
super(MarkovConstants.TABLENAME_TABLED, MarkovTables.initializeTableD());
this.num_a_records = MarkovLoader.this.table_sizes.get(MarkovConstants.TABLENAME_TABLEA).get();
this.num_b_records = MarkovLoader.this.table_sizes.get(MarkovConstants.TABLENAME_TABLEB).get();
this.num_c_records = MarkovLoader.this.table_sizes.get(MarkovConstants.TABLENAME_TABLEC).get();
}
@Override
protected void populateRow() {
int col = 0;
long id = this.count;
// Pick a random values for the parent B record
Long b_id = new Long(m_rng.number(0, (int) num_b_records));
Long b_a_id = new Long(m_rng.number(0, (int) num_a_records));
// Now generate a parent C record using the affinity generator on
// B_A_ID
// The C_ID value can be random
Long c_id = new Long(m_rng.number(0, (int) num_c_records));
Long c_a_id = new Long(m_rng.numberAffinity(0, (int) num_a_records, (int) id, MarkovConstants.TABLENAME_TABLEC, MarkovConstants.TABLENAME_TABLEB));
row[col++] = new Long(id); // D_ID
row[col++] = b_id; // D_B_ID
row[col++] = b_a_id; // D_B_A_ID
row[col++] = c_id; // D_C_ID
row[col++] = c_a_id; // D_C_A_ID
// D_SATTR##
for (int j = 0; j < 16; ++j) {
assert (col < row.length) : "Record got too big (" + col + " <=> " + row.length + ")";
row[col++] = m_rng.astring(6, 32);
} // FOR
// D_IATTR##
for (int j = 0; j < 16; ++j) {
try {
assert (col < row.length) : "Record got too big (" + col + " <=> " + row.length + ")";
row[col++] = m_rng.number(0, 1 << 30);
} catch (ArrayIndexOutOfBoundsException ex) {
LOG.info(col + " ==> " + row.length);
ex.printStackTrace();
System.exit(1);
}
} // FOR
assert (col == this.table.getColumnCount());
}
} // END CLASS
/**
* @param tablename
* @param table
*/
protected void loadTable(String tablename, VoltTable table) {
// LOG.info("Loading " + table.getRowCount() + " tuples for table " +
// tablename + " [bytes=" + table.getUnderlyingBufferSize() + "]");
// Load up this dirty mess...
try {
this.getClientHandle().callProcedure("@LoadMultipartitionTable", tablename, table);
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
}
}
}