/**
*
*/
package edu.brown.costmodel;
import java.io.File;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.log4j.Logger;
import org.voltdb.CatalogContext;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Statement;
import org.voltdb.utils.Pair;
import edu.brown.catalog.CatalogKey;
import edu.brown.catalog.CatalogUtil;
import edu.brown.catalog.ClusterConfiguration;
import edu.brown.catalog.FixCatalog;
import edu.brown.costmodel.SingleSitedCostModel.QueryCacheEntry;
import edu.brown.costmodel.SingleSitedCostModel.TransactionCacheEntry;
import edu.brown.designer.DesignerHints;
import edu.brown.designer.partitioners.plan.PartitionPlan;
import edu.brown.hstore.HStoreConstants;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.statistics.Histogram;
import edu.brown.statistics.ObjectHistogram;
import edu.brown.utils.ArgumentsParser;
import edu.brown.utils.ClassUtil;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.Consumer;
import edu.brown.utils.MathUtil;
import edu.brown.utils.PartitionEstimator;
import edu.brown.utils.PartitionSet;
import edu.brown.utils.Producer;
import edu.brown.utils.StringUtil;
import edu.brown.utils.ThreadUtil;
import edu.brown.workload.TransactionTrace;
import edu.brown.workload.Workload;
import edu.brown.workload.filters.Filter;
/**
* @author pavlo
*/
public class TimeIntervalCostModel<T extends AbstractCostModel> extends AbstractCostModel {
private static final Logger LOG = Logger.getLogger(TimeIntervalCostModel.class);
private static final LoggerBoolean debug = new LoggerBoolean();
private static final LoggerBoolean trace = new LoggerBoolean();
static {
LoggerUtil.attachObserver(LOG, debug, trace);
}
/**
* Internal cost models (one per interval)
*/
private final int num_intervals;
private final T cost_models[];
private PartitionSet all_partitions;
/**
* For testing
*/
protected double last_execution_cost;
protected double last_skew_cost;
protected Double last_final_cost;
protected final Map<String, ObjectHistogram<?>> debug_histograms = new LinkedHashMap<String, ObjectHistogram<?>>();
final ObjectHistogram<Integer> target_histogram = new ObjectHistogram<Integer>();
/** The number of single-partition txns per interval */
final int singlepartition_ctrs[];
/**
* The number of single-partition txns with actual partitions that we
* calculated This differs from incomplete txns where we have to mark them
* as single-partition because we don't know what they're actually going to
* do
*/
final int singlepartition_with_partitions_ctrs[];
/**
*
*/
final int multipartition_ctrs[];
final int incomplete_txn_ctrs[];
/**
* When the Java doesn't execute on the same machine as where the queries go
**/
final int exec_mismatch_ctrs[];
/** The number of partitions touched per interval **/
final int partitions_touched[];
final double interval_weights[];
final double total_interval_txns[];
final double total_interval_queries[];
final double txn_skews[];
final double exec_skews[];
final double total_skews[];
/**
* This histogram is to keep track of those partitions that we need to add
* to the access histogram in the entropy calculation. If a txn is
* incomplete (i.e., they have queries that we did not calculate an estimate
* for), then we need to mark it as going to all partitions. So we have to
* make sure we don't count the partitions that we *do* know the incomplete
* is going to more than once
*/
final ObjectHistogram<Integer> incomplete_txn_histogram[];
final ObjectHistogram<Integer> exec_histogram[];
final ObjectHistogram<Integer> missing_txn_histogram[];
/** Temporary Data Structures */
final List<Integer> tmp_touched = new ArrayList<Integer>();
final List<Long> tmp_total = new ArrayList<Long>();
final List<Double> tmp_penalties = new ArrayList<Double>();
final List<Long> tmp_potential = new ArrayList<Long>();
/** Temporary mapping from intervals to Consumers */
final Map<Integer, Consumer<Pair<TransactionTrace, Integer>>> tmp_consumers = new HashMap<Integer, Consumer<Pair<TransactionTrace, Integer>>>();
/**
* Constructor
*/
@SuppressWarnings("unchecked")
public TimeIntervalCostModel(CatalogContext catalogContext, Class<? extends T> inner_class, int num_intervals) {
super(TimeIntervalCostModel.class, catalogContext, new PartitionEstimator(catalogContext));
this.num_intervals = num_intervals;
this.cost_models = (T[]) (new AbstractCostModel[num_intervals]);
try {
Constructor<?> constructor = ClassUtil.getConstructor(inner_class, Database.class, PartitionEstimator.class);
for (int i = 0; i < this.cost_models.length; i++) {
this.cost_models[i] = (T) constructor.newInstance(catalogContext, this.p_estimator);
} // FOR
} catch (Exception ex) {
LOG.fatal("Failed to create the inner cost models", ex);
System.exit(1);
}
assert (this.num_intervals > 0);
if (trace.val)
LOG.trace("TimeIntervalCostModel: " + this.num_intervals + " intervals");
singlepartition_ctrs = new int[num_intervals];
singlepartition_with_partitions_ctrs = new int[num_intervals];
multipartition_ctrs = new int[num_intervals];
incomplete_txn_ctrs = new int[num_intervals];
exec_mismatch_ctrs = new int[num_intervals];
partitions_touched = new int[num_intervals];
interval_weights = new double[num_intervals];
total_interval_txns = new double[num_intervals];
total_interval_queries = new double[num_intervals];
incomplete_txn_histogram = new ObjectHistogram[num_intervals];
exec_histogram = new ObjectHistogram[num_intervals];
missing_txn_histogram = new ObjectHistogram[num_intervals];
txn_skews = new double[num_intervals];
exec_skews = new double[num_intervals];
total_skews = new double[num_intervals];
for (int i = 0; i < num_intervals; i++) {
incomplete_txn_histogram[i] = new ObjectHistogram<Integer>();
exec_histogram[i] = new ObjectHistogram<Integer>();
missing_txn_histogram[i] = new ObjectHistogram<Integer>();
} // FOR
}
// @Override
// public AbstractCostModel clone(CatalogContext catalogContext) throws
// CloneNotSupportedException {
// TimeIntervalCostModel<T> clone = new TimeIntervalCostModel<T>(catalog_db,
// this.inner_class, this.cost_models.length);
// return (clone);
// }
@Override
public void applyDesignerHints(DesignerHints hints) {
super.applyDesignerHints(hints);
for (T cm : this.cost_models) {
cm.applyDesignerHints(hints);
} // FOR
}
public double getLastSkewCost() {
return last_skew_cost;
}
public double getLastExecutionCost() {
return last_execution_cost;
}
public Double getLastFinalCost() {
return last_final_cost;
}
@Override
public void clear(boolean force) {
super.clear(force);
if (force || this.isCachingEnabled() == false) {
if (debug.val)
LOG.debug("Clearing out all interval cost models");
for (int i = 0; i < this.num_intervals; i++) {
this.cost_models[i].clear(force);
} // FOR
}
tmp_penalties.clear();
tmp_potential.clear();
tmp_total.clear();
tmp_touched.clear();
}
@Override
public void setCachingEnabled(boolean useCaching) {
super.setCachingEnabled(useCaching);
for (int i = 0; i < this.num_intervals; i++) {
this.cost_models[i].setCachingEnabled(useCaching);
} // FOR
assert (this.use_caching == useCaching);
}
/**
* Return the inner cost model for the given time interval
*
* @param interval
* @return
*/
public T getCostModel(int interval) {
return (this.cost_models[interval]);
}
/**
* Return the number of number of intervals
*
* @return
*/
public int getIntevalCount() {
return (this.cost_models.length);
}
@Override
public void prepareImpl(final CatalogContext catalogContext) {
this.all_partitions = catalogContext.getAllPartitionIds();
assert (this.all_partitions.isEmpty() == false) : "No partitions???";
for (int i = 0; i < num_intervals; i++) {
this.cost_models[i].prepare(catalogContext);
if (!this.use_caching) {
this.cost_models[i].clear(true);
assert (this.cost_models[i].getTxnPartitionAccessHistogram().isEmpty());
assert (this.cost_models[i].getQueryPartitionAccessHistogram().isEmpty());
}
} // FOR
// Note that we want to clear our counters but not our internal cost
// model data structures
this.clear();
}
/*
* (non-Javadoc)
* @see
* edu.brown.costmodel.AbstractCostModel#estimateCost(org.voltdb.catalog
* .Database, edu.brown.workload.TransactionTrace,
* edu.brown.workload.AbstractWorkload.Filter)
*/
@Override
public double estimateTransactionCost(CatalogContext catalogContext, Workload workload, Filter filter, TransactionTrace xact) throws Exception {
assert (workload != null) : "The workload handle is null";
// First figure out the time interval of this
int interval = workload.getTimeInterval(xact, this.cost_models.length);
return (this.cost_models[interval].estimateTransactionCost(catalogContext, workload, filter, xact));
}
/**
*
*/
@Override
protected double estimateWorkloadCostImpl(final CatalogContext catalogContext, final Workload workload, final Filter filter, final Double upper_bound) throws Exception {
if (debug.val)
LOG.debug("Calculating workload execution cost across " + num_intervals + " intervals for " + num_partitions + " partitions");
// (1) Grab the costs at the different time intervals
// Also create the ratios that we will use to weight the interval costs
final AtomicLong total_txns = new AtomicLong(0);
// final HashSet<Long> trace_ids[] = new HashSet[num_intervals];
for (int i = 0; i < num_intervals; i++) {
total_interval_txns[i] = 0;
total_interval_queries[i] = 0;
singlepartition_ctrs[i] = 0;
singlepartition_with_partitions_ctrs[i] = 0;
multipartition_ctrs[i] = 0;
partitions_touched[i] = 0;
incomplete_txn_ctrs[i] = 0;
exec_mismatch_ctrs[i] = 0;
incomplete_txn_histogram[i].clear();
missing_txn_histogram[i].clear();
exec_histogram[i].clear();
} // FOR
// (2) Now go through the workload and estimate the partitions that each txn
// will touch for the given catalog setups
if (trace.val) {
LOG.trace("Total # of Txns in Workload: " + workload.getTransactionCount());
if (filter != null)
LOG.trace("Workload Filter Chain: " + StringUtil.join(" ", "\n", filter.getFilters()));
}
// QUEUING THREAD
tmp_consumers.clear();
Producer<TransactionTrace, Pair<TransactionTrace, Integer>> producer = new Producer<TransactionTrace, Pair<TransactionTrace, Integer>>(CollectionUtil.iterable(workload.iterator(filter))) {
@Override
public Pair<Consumer<Pair<TransactionTrace, Integer>>, Pair<TransactionTrace, Integer>> transform(TransactionTrace txn_trace) {
int i = workload.getTimeInterval(txn_trace, num_intervals);
assert (i >= 0) : "Invalid time interval '" + i + "'\n" + txn_trace.debug(catalogContext.database);
assert (i < num_intervals) : "Invalid interval: " + i + "\n" + txn_trace.debug(catalogContext.database);
total_txns.incrementAndGet();
Pair<TransactionTrace, Integer> p = Pair.of(txn_trace, i);
return (Pair.of(tmp_consumers.get(i), p));
}
};
// PROCESSING THREADS
final int num_threads = ThreadUtil.getMaxGlobalThreads();
int interval_ctr = 0;
for (int thread = 0; thread < num_threads; thread++) {
// First create a new IntervalProcessor/Consumer
IntervalProcessor ip = new IntervalProcessor(catalogContext, workload, filter);
// Then assign it to some number of intervals
for (int i = 0, cnt = (int) Math.ceil(num_intervals / (double) num_threads); i < cnt; i++) {
if (interval_ctr > num_intervals)
break;
tmp_consumers.put(interval_ctr++, ip);
if (trace.val)
LOG.trace(String.format("Interval #%02d => IntervalProcessor #%02d", interval_ctr - 1, thread));
} // FOR
// And make sure that we queue it up too
producer.addConsumer(ip);
} // FOR (threads)
ThreadUtil.runGlobalPool(producer.getRunnablesList()); // BLOCKING
if (debug.val) {
int processed = 0;
for (Consumer<?> c : producer.getConsumers()) {
processed += c.getProcessedCounter();
} // FOR
assert (total_txns.get() == processed) : String.format("Expected[%d] != Processed[%d]", total_txns.get(), processed);
}
// We have to convert all of the costs into the range of [0.0, 1.0]
// For each interval, divide the number of partitions touched by the total number
// of partitions that the interval could have touched (worst case scenario)
final double execution_costs[] = new double[num_intervals];
StringBuilder sb = (this.isDebugEnabled() || debug.val ? new StringBuilder() : null);
Map<String, Object> debug_m = null;
if (sb != null) {
debug_m = new LinkedHashMap<String, Object>();
}
if (debug.val)
LOG.debug("Calculating execution cost for " + this.num_intervals + " intervals...");
long total_multipartition_txns = 0;
for (int i = 0; i < this.num_intervals; i++) {
interval_weights[i] = total_interval_txns[i] / (double) total_txns.get();
long total_txns_in_interval = (long) total_interval_txns[i];
long total_queries_in_interval = (long) total_interval_queries[i];
long num_txns = this.cost_models[i].txn_ctr.get();
long potential_txn_touches = (total_txns_in_interval * num_partitions); // TXNS
double penalty = 0.0d;
total_multipartition_txns += multipartition_ctrs[i];
// Divide the total number of partitions touched by...
// This is the total number of partitions that we could have touched
// in this interval
// And this is the total number of partitions that we did actually touch
if (multipartition_ctrs[i] > 0) {
assert (partitions_touched[i] > 0) : "No touched partitions for interval " + i;
double cost = (partitions_touched[i] / (double) potential_txn_touches);
if (this.use_multitpartition_penalty) {
penalty = this.multipartition_penalty * (1.0d + (multipartition_ctrs[i] / (double) total_txns_in_interval));
assert (penalty >= 1.0) : "The multipartition penalty is less than one: " + penalty;
cost *= penalty;
}
execution_costs[i] = Math.min(cost, (double) potential_txn_touches);
}
// For each txn that wasn't even evaluated, add all of the
// partitions to the incomplete histogram
if (num_txns < total_txns_in_interval) {
if (trace.val)
LOG.trace("Adding " + (total_txns_in_interval - num_txns) + " entries to the incomplete histogram for interval #" + i);
for (long ii = num_txns; ii < total_txns_in_interval; ii++) {
missing_txn_histogram[i].put(all_partitions);
} // WHILE
}
if (sb != null) {
tmp_penalties.add(penalty);
tmp_total.add(total_txns_in_interval);
tmp_touched.add(partitions_touched[i]);
tmp_potential.add(potential_txn_touches);
Map<String, Object> inner = new LinkedHashMap<String, Object>();
inner.put("Partitions Touched", partitions_touched[i]);
inner.put("Potential Touched", potential_txn_touches);
inner.put("Multi-Partition Txns", multipartition_ctrs[i]);
inner.put("Total Txns", total_txns_in_interval);
inner.put("Total Queries", total_queries_in_interval);
inner.put("Missing Txns", (total_txns_in_interval - num_txns));
inner.put("Cost", String.format("%.05f", execution_costs[i]));
inner.put("Exec Txns", exec_histogram[i].getSampleCount());
debug_m.put("Interval #" + i, inner);
}
} // FOR
if (sb != null) {
Map<String, Object> m0 = new LinkedHashMap<String, Object>();
m0.put("SinglePartition Txns", (total_txns.get() - total_multipartition_txns));
m0.put("MultiPartition Txns", total_multipartition_txns);
m0.put("Total Txns", String.format("%d [%.06f]", total_txns.get(), (1.0d - (total_multipartition_txns / (double) total_txns.get()))));
Map<String, Object> m1 = new LinkedHashMap<String, Object>();
m1.put("Touched Partitions", tmp_touched);
m1.put("Potential Partitions", tmp_potential);
m1.put("Total Partitions", tmp_total);
m1.put("Penalties", tmp_penalties);
sb.append(StringUtil.formatMaps(debug_m, m0, m1));
if (debug.val)
LOG.debug("**** Execution Cost ****\n" + sb);
this.appendDebugMessage(sb);
}
// LOG.debug("Execution By Intervals:\n" + sb.toString());
// (3) We then need to go through and grab the histograms of partitions were accessed
if (sb != null) {
if (debug.val)
LOG.debug("Calculating skew factor for " + this.num_intervals + " intervals...");
debug_histograms.clear();
sb = new StringBuilder();
}
for (int i = 0; i < this.num_intervals; i++) {
Histogram<Integer> histogram_txn = this.cost_models[i].getTxnPartitionAccessHistogram();
Histogram<Integer> histogram_query = this.cost_models[i].getQueryPartitionAccessHistogram();
this.histogram_query_partitions.put(histogram_query);
long num_queries = this.cost_models[i].query_ctr.get();
this.query_ctr.addAndGet(num_queries);
// DEBUG
SingleSitedCostModel inner_costModel = (SingleSitedCostModel) this.cost_models[i];
boolean is_valid = (partitions_touched[i] + singlepartition_with_partitions_ctrs[i]) == (this.cost_models[i].getTxnPartitionAccessHistogram().getSampleCount() + exec_mismatch_ctrs[i]);
if (!is_valid) {
LOG.error("Transaction Entries: " + inner_costModel.getTransactionCacheEntries().size());
ObjectHistogram<Integer> check = new ObjectHistogram<Integer>();
for (TransactionCacheEntry tce : inner_costModel.getTransactionCacheEntries()) {
check.put(tce.getTouchedPartitions());
// LOG.error(tce.debug() + "\n");
}
LOG.error("Check Touched Partitions: sample=" + check.getSampleCount() + ", values=" + check.getValueCount());
LOG.error("Cache Touched Partitions: sample=" + this.cost_models[i].getTxnPartitionAccessHistogram().getSampleCount() + ", values="
+ this.cost_models[i].getTxnPartitionAccessHistogram().getValueCount());
int qtotal = inner_costModel.getAllQueryCacheEntries().size();
int ctr = 0;
int multip = 0;
for (QueryCacheEntry qce : inner_costModel.getAllQueryCacheEntries()) {
ctr += (qce.getAllPartitions().isEmpty() ? 0 : 1);
multip += (qce.getAllPartitions().size() > 1 ? 1 : 0);
} // FOR
LOG.error("# of QueryCacheEntries with Touched Partitions: " + ctr + " / " + qtotal);
LOG.error("# of MultiP QueryCacheEntries: " + multip);
}
assert (is_valid) : String.format("Partitions Touched by Txns Mismatch in Interval #%d\n" + "(partitions_touched[%d] + singlepartition_with_partitions_ctrs[%d]) != "
+ "(histogram_txn[%d] + exec_mismatch_ctrs[%d])", i, partitions_touched[i], singlepartition_with_partitions_ctrs[i], this.cost_models[i].getTxnPartitionAccessHistogram()
.getSampleCount(), exec_mismatch_ctrs[i]);
this.histogram_java_partitions.put(this.cost_models[i].getJavaExecutionHistogram());
this.histogram_txn_partitions.put(histogram_txn);
long num_txns = this.cost_models[i].txn_ctr.get();
assert (num_txns >= 0) : "The transaction counter at interval #" + i + " is " + num_txns;
this.txn_ctr.addAndGet(num_txns);
// Calculate the skew factor at this time interval
// XXX: Should the number of txns be the total number of unique txns
// that were executed or the total number of times a txn touched the partitions?
// XXX: What do we do when the number of elements that we are examining is zero?
// I guess the cost just needs to be zero?
// XXX: What histogram do we want to use?
target_histogram.clear();
target_histogram.put(histogram_txn);
// For each txn that we haven't gotten an estimate for at this interval,
// we're going mark it as being broadcast to all partitions. That way the access
// histogram will look uniform. Then as more information is added, we will
// This is an attempt to make sure that the skew cost never decreases but only increases
long total_txns_in_interval = (long) total_interval_txns[i];
if (sb != null) {
debug_histograms.put("Incomplete Txns", incomplete_txn_histogram[i]);
debug_histograms.put("Missing Txns", missing_txn_histogram[i]);
debug_histograms.put("Target Partitions (BEFORE)", new ObjectHistogram<Integer>(target_histogram));
debug_histograms.put("Target Partitions (AFTER)", target_histogram);
}
// Merge the values from incomplete histogram into the target
// histogram
target_histogram.put(incomplete_txn_histogram[i]);
target_histogram.put(missing_txn_histogram[i]);
exec_histogram[i].put(missing_txn_histogram[i]);
long num_elements = target_histogram.getSampleCount();
// The number of partition touches should never be greater than our
// potential touches
assert(num_elements <= (total_txns_in_interval * num_partitions)) :
"New Partitions Touched Sample Count [" + num_elements + "] < " +
"Maximum Potential Touched Count [" + (total_txns_in_interval * num_partitions) + "]";
if (sb != null) {
Map<String, Object> m = new LinkedHashMap<String, Object>();
for (String key : debug_histograms.keySet()) {
ObjectHistogram<?> h = debug_histograms.get(key);
m.put(key, String.format("[Sample=%d, Value=%d]\n%s", h.getSampleCount(), h.getValueCount(), h));
} // FOR
sb.append(String.format("INTERVAL #%d [total_txns_in_interval=%d, num_txns=%d, incomplete_txns=%d]\n%s", i, total_txns_in_interval, num_txns, incomplete_txn_ctrs[i],
StringUtil.formatMaps(m)));
}
// Txn Skew
if (num_elements == 0) {
txn_skews[i] = 0.0d;
} else {
txn_skews[i] = SkewFactorUtil.calculateSkew(num_partitions, num_elements, target_histogram);
}
// Exec Skew
if (exec_histogram[i].getSampleCount() == 0) {
exec_skews[i] = 0.0d;
} else {
exec_skews[i] = SkewFactorUtil.calculateSkew(num_partitions, exec_histogram[i].getSampleCount(), exec_histogram[i]);
}
total_skews[i] = (0.5 * exec_skews[i]) + (0.5 * txn_skews[i]);
if (sb != null) {
sb.append("Txn Skew = " + MathUtil.roundToDecimals(txn_skews[i], 6) + "\n");
sb.append("Exec Skew = " + MathUtil.roundToDecimals(exec_skews[i], 6) + "\n");
sb.append("Total Skew = " + MathUtil.roundToDecimals(total_skews[i], 6) + "\n");
sb.append(StringUtil.DOUBLE_LINE);
}
} // FOR
if (sb != null && sb.length() > 0) {
if (debug.val)
LOG.debug("**** Skew Factor ****\n" + sb);
this.appendDebugMessage(sb);
}
if (trace.val) {
for (int i = 0; i < num_intervals; i++) {
LOG.trace("Time Interval #" + i + "\n" + "Total # of Txns: " + this.cost_models[i].txn_ctr.get() + "\n" + "Multi-Partition Txns: " + multipartition_ctrs[i] + "\n" + "Execution Cost: "
+ execution_costs[i] + "\n" + "ProcHistogram:\n" + this.cost_models[i].getProcedureHistogram().toString() + "\n" +
// "TransactionsPerPartitionHistogram:\n" +
// this.cost_models[i].getTxnPartitionAccessHistogram()
// + "\n" +
StringUtil.SINGLE_LINE);
}
}
// (3) We can now calculate the final total estimate cost of this workload as the following
// Just take the simple ratio of mp txns / all txns
this.last_execution_cost = MathUtil.weightedMean(execution_costs, total_interval_txns); // MathUtil.roundToDecimals(MathUtil.geometricMean(execution_costs,
// MathUtil.GEOMETRIC_MEAN_ZERO),
// 10);
// The final skew cost needs to be weighted by the percentage of txns running in that interval
// This will cause the partitions with few txns
this.last_skew_cost = MathUtil.weightedMean(total_skews, total_interval_txns); // roundToDecimals(MathUtil.geometricMean(entropies,
// MathUtil.GEOMETRIC_MEAN_ZERO),
// 10);
double new_final_cost = (this.use_execution ? (this.execution_weight * this.last_execution_cost) : 0) + (this.use_skew ? (this.skew_weight * this.last_skew_cost) : 0);
if (sb != null) {
Map<String, Object> m = new LinkedHashMap<String, Object>();
m.put("Total Txns", total_txns.get());
m.put("Interval Txns", Arrays.toString(total_interval_txns));
m.put("Execution Costs", Arrays.toString(execution_costs));
m.put("Skew Factors", Arrays.toString(total_skews));
m.put("Txn Skew", Arrays.toString(txn_skews));
m.put("Exec Skew", Arrays.toString(exec_skews));
m.put("Interval Weights", Arrays.toString(interval_weights));
m.put("Final Cost", String.format("%f = %f + %f", new_final_cost, this.last_execution_cost, this.last_skew_cost));
if (debug.val)
LOG.debug(StringUtil.formatMaps(m));
this.appendDebugMessage(StringUtil.formatMaps(m));
}
this.last_final_cost = new_final_cost;
return (MathUtil.roundToDecimals(this.last_final_cost, 5));
}
/**
*
*/
private class IntervalProcessor extends Consumer<Pair<TransactionTrace, Integer>> {
final Set<Integer> tmp_missingPartitions = new HashSet<Integer>();
final CatalogContext catalogContext;
final Workload workload;
final Filter filter;
public IntervalProcessor(CatalogContext catalogContext, final Workload workload, final Filter filter) {
this.catalogContext = catalogContext;
this.workload = workload;
this.filter = filter;
}
@Override
public void process(Pair<TransactionTrace, Integer> p) {
assert (p != null);
final TransactionTrace txn_trace = p.getFirst();
final int i = p.getSecond(); // Interval
final int txn_weight = (use_txn_weights ? txn_trace.getWeight() : 1);
final String proc_key = CatalogKey.createKey(CatalogUtil.DEFAULT_DATABASE_NAME, txn_trace.getCatalogItemName());
// Terrible Hack: Assume that we are using the SingleSitedCostModel
// and that
// it will return fixed values based on whether the txn is
// single-partitioned or not
SingleSitedCostModel singlesited_cost_model = (SingleSitedCostModel) cost_models[i];
total_interval_txns[i] += txn_weight;
total_interval_queries[i] += (txn_trace.getQueryCount() * txn_weight);
histogram_procs.put(proc_key, txn_weight);
try {
singlesited_cost_model.estimateTransactionCost(catalogContext, workload, filter, txn_trace);
TransactionCacheEntry txn_entry = singlesited_cost_model.getTransactionCacheEntry(txn_trace);
assert (txn_entry != null) : "No txn entry for " + txn_trace;
Collection<Integer> partitions = txn_entry.getTouchedPartitions();
// If the txn runs on only one partition, then the cost is
// nothing
if (txn_entry.isSinglePartitioned()) {
singlepartition_ctrs[i] += txn_weight;
if (!partitions.isEmpty()) {
assert (txn_entry.getAllTouchedPartitionsHistogram().getValueCount() == 1) : txn_entry + " says it was single-partitioned but the partition count says otherwise:\n"
+ txn_entry.debug();
singlepartition_with_partitions_ctrs[i] += txn_weight;
}
histogram_sp_procs.put(proc_key, txn_weight);
// If the txn runs on multiple partitions, then the cost
// is...
// XXX 2010-06-28: The number of partitions that the txn
// touches divided by the total number of partitions
// XXX 2010-07-02: The histogram for the total number of
// partitions touched by all of the queries
// in the transaction. This ensures that txns with just one
// multi-partition query
// isn't weighted the same as a txn with many
// multi-partition queries
} else {
assert (!partitions.isEmpty()) : "No touched partitions for " + txn_trace;
if (partitions.size() == 1 && txn_entry.getExecutionPartition() != HStoreConstants.NULL_PARTITION_ID) {
assert (CollectionUtil.first(partitions) != txn_entry.getExecutionPartition()) : txn_entry.debug();
exec_mismatch_ctrs[i] += txn_weight;
partitions_touched[i] += txn_weight;
} else {
assert (partitions.size() > 1) : String.format("%s is not marked as single-partition but it only touches one partition\n%s", txn_trace, txn_entry.debug());
}
partitions_touched[i] += (partitions.size() * txn_weight); // Txns
multipartition_ctrs[i] += txn_weight;
histogram_mp_procs.put(proc_key, txn_weight);
}
Integer base_partition = txn_entry.getExecutionPartition();
if (base_partition != null) {
exec_histogram[i].put(base_partition, txn_weight);
} else {
exec_histogram[i].put(all_partitions, txn_weight);
}
if (debug.val) { // &&
// txn_trace.getCatalogItemName().equalsIgnoreCase("DeleteCallForwarding"))
// {
Procedure catalog_proc = txn_trace.getCatalogItem(catalogContext.database);
Map<String, Object> inner = new LinkedHashMap<String, Object>();
for (Statement catalog_stmt : catalog_proc.getStatements()) {
inner.put(catalog_stmt.fullName(), CatalogUtil.getReferencedTables(catalog_stmt));
}
Map<String, Object> m = new LinkedHashMap<String, Object>();
m.put(txn_trace.toString(), null);
m.put("Interval", i);
m.put("Single-Partition", txn_entry.isSinglePartitioned());
m.put("Base Partition", base_partition);
m.put("Touched Partitions", partitions);
m.put(catalog_proc.fullName(), inner);
LOG.debug(StringUtil.formatMaps(m));
}
// We need to keep a count of the number txns that didn't have
// all of its queries estimated
// completely so that we can update the access histograms down
// below for entropy calculations
// Note that this is at the txn level, not the query level.
if (!txn_entry.isComplete()) {
incomplete_txn_ctrs[i] += txn_weight;
tmp_missingPartitions.clear();
tmp_missingPartitions.addAll(all_partitions);
tmp_missingPartitions.removeAll(txn_entry.getTouchedPartitions());
// Update the histogram for this interval to keep track of
// how many times we need to
// increase the partition access histogram
incomplete_txn_histogram[i].put(tmp_missingPartitions, txn_weight);
if (trace.val) {
Map<String, Object> m = new LinkedHashMap<String, Object>();
m.put(String.format("Marking %s as incomplete in interval #%d", txn_trace, i), null);
m.put("Examined Queries", txn_entry.getExaminedQueryCount());
m.put("Total Queries", txn_entry.getTotalQueryCount());
m.put("Touched Partitions", txn_entry.getTouchedPartitions());
m.put("Missing Partitions", tmp_missingPartitions);
LOG.trace(StringUtil.formatMaps(m));
}
}
} catch (Exception ex) {
CatalogUtil.saveCatalog(catalogContext.catalog, CatalogUtil.CATALOG_FILENAME);
throw new RuntimeException("Failed to estimate cost for " + txn_trace.getCatalogItemName() + " at interval " + i, ex);
}
}
}
/*
* (non-Javadoc)
* @see
* edu.brown.costmodel.AbstractCostModel#invalidateCache(java.lang.String)
*/
@Override
public void invalidateCache(String catalog_key) {
for (T cm : this.cost_models) {
cm.invalidateCache(catalog_key);
} // FOR
}
/**
* MAIN!
*
* @param vargs
* @throws Exception
*/
public static void main(String[] vargs) throws Exception {
ArgumentsParser args = ArgumentsParser.load(vargs);
args.require(ArgumentsParser.PARAM_CATALOG, ArgumentsParser.PARAM_WORKLOAD, ArgumentsParser.PARAM_PARTITION_PLAN, ArgumentsParser.PARAM_DESIGNER_INTERVALS
// ArgumentsParser.PARAM_DESIGNER_HINTS
);
assert (args.workload.getTransactionCount() > 0) : "No transactions were loaded from " + args.workload;
if (args.hasParam(ArgumentsParser.PARAM_CATALOG_HOSTS)) {
ClusterConfiguration cc = new ClusterConfiguration(args.getParam(ArgumentsParser.PARAM_CATALOG_HOSTS));
args.updateCatalog(FixCatalog.cloneCatalog(args.catalog, cc), null);
}
// If given a PartitionPlan, then update the catalog
File pplan_path = new File(args.getParam(ArgumentsParser.PARAM_PARTITION_PLAN));
if (pplan_path.exists()) {
PartitionPlan pplan = new PartitionPlan();
pplan.load(pplan_path, args.catalog_db);
if (args.getBooleanParam(ArgumentsParser.PARAM_PARTITION_PLAN_REMOVE_PROCS, false)) {
for (Procedure catalog_proc : pplan.proc_entries.keySet()) {
pplan.setNullProcParameter(catalog_proc);
} // FOR
}
if (args.getBooleanParam(ArgumentsParser.PARAM_PARTITION_PLAN_RANDOM_PROCS, false)) {
for (Procedure catalog_proc : pplan.proc_entries.keySet()) {
pplan.setRandomProcParameter(catalog_proc);
} // FOR
}
pplan.apply(args.catalog_db);
System.out.println("Applied PartitionPlan '" + pplan_path + "' to catalog\n" + pplan);
System.out.print(StringUtil.DOUBLE_LINE);
if (args.hasParam(ArgumentsParser.PARAM_PARTITION_PLAN_OUTPUT)) {
String output = args.getParam(ArgumentsParser.PARAM_PARTITION_PLAN_OUTPUT);
if (output.equals("-"))
output = pplan_path.getAbsolutePath();
pplan.save(new File(output));
System.out.println("Saved PartitionPlan to '" + output + "'");
}
} else {
System.err.println("PartitionPlan file '" + pplan_path + "' does not exist. Ignoring...");
}
System.out.flush();
int num_intervals = args.num_intervals; // getIntParam(ArgumentsParser.PARAM_DESIGNER_INTERVALS);
TimeIntervalCostModel<SingleSitedCostModel> costmodel = new TimeIntervalCostModel<SingleSitedCostModel>(args.catalogContext, SingleSitedCostModel.class, num_intervals);
if (args.hasParam(ArgumentsParser.PARAM_DESIGNER_HINTS))
costmodel.applyDesignerHints(args.designer_hints);
double cost = costmodel.estimateWorkloadCost(args.catalogContext, args.workload);
Map<String, Object> m = new LinkedHashMap<String, Object>();
m.put("PARTITIONS", args.catalogContext.numberOfPartitions);
m.put("INTERVALS", args.num_intervals);
m.put("EXEC COST", costmodel.last_execution_cost);
m.put("SKEW COST", costmodel.last_skew_cost);
m.put("TOTAL COST", cost);
m.put("PARTITIONS TOUCHED", costmodel.getTxnPartitionAccessHistogram().getSampleCount());
System.out.println(StringUtil.formatMaps(m));
// long total = 0;
m.clear();
// for (int i = 0; i < num_intervals; i++) {
// SingleSitedCostModel cm = costmodel.getCostModel(i);
// Histogram<Integer> h = cm.getTxnPartitionAccessHistogram();
// m.put(String.format("Interval %02d", i),
// cm.getTxnPartitionAccessHistogram());
// total += h.getSampleCount();
// h.setKeepZeroEntries(true);
// for (Integer partition :
// CatalogUtil.getAllPartitionIds(args.catalog_db)) {
// if (h.contains(partition) == false) h.put(partition, 0);
// }
// System.out.println(StringUtil.box("Interval #" + i, "+", 100) + "\n"
// + h);
// System.out.println();
// } // FOR
// System.out.println(StringUtil.formatMaps(m));
// System.err.println("TOTAL: " + total);
}
}