Package edu.brown.costmodel

Source Code of edu.brown.costmodel.TimeIntervalCostModel$IntervalProcessor

/**
*
*/
package edu.brown.costmodel;

import java.io.File;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.log4j.Logger;
import org.voltdb.CatalogContext;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Statement;
import org.voltdb.utils.Pair;

import edu.brown.catalog.CatalogKey;
import edu.brown.catalog.CatalogUtil;
import edu.brown.catalog.ClusterConfiguration;
import edu.brown.catalog.FixCatalog;
import edu.brown.costmodel.SingleSitedCostModel.QueryCacheEntry;
import edu.brown.costmodel.SingleSitedCostModel.TransactionCacheEntry;
import edu.brown.designer.DesignerHints;
import edu.brown.designer.partitioners.plan.PartitionPlan;
import edu.brown.hstore.HStoreConstants;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.statistics.Histogram;
import edu.brown.statistics.ObjectHistogram;
import edu.brown.utils.ArgumentsParser;
import edu.brown.utils.ClassUtil;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.Consumer;
import edu.brown.utils.MathUtil;
import edu.brown.utils.PartitionEstimator;
import edu.brown.utils.PartitionSet;
import edu.brown.utils.Producer;
import edu.brown.utils.StringUtil;
import edu.brown.utils.ThreadUtil;
import edu.brown.workload.TransactionTrace;
import edu.brown.workload.Workload;
import edu.brown.workload.filters.Filter;

/**
* @author pavlo
*/
public class TimeIntervalCostModel<T extends AbstractCostModel> extends AbstractCostModel {
    private static final Logger LOG = Logger.getLogger(TimeIntervalCostModel.class);
    private static final LoggerBoolean debug = new LoggerBoolean();
    private static final LoggerBoolean trace = new LoggerBoolean();
    static {
        LoggerUtil.attachObserver(LOG, debug, trace);
    }

    /**
     * Internal cost models (one per interval)
     */
    private final int num_intervals;
    private final T cost_models[];

    private PartitionSet all_partitions;

    /**
     * For testing
     */
    protected double last_execution_cost;
    protected double last_skew_cost;
    protected Double last_final_cost;

    protected final Map<String, ObjectHistogram<?>> debug_histograms = new LinkedHashMap<String, ObjectHistogram<?>>();

    final ObjectHistogram<Integer> target_histogram = new ObjectHistogram<Integer>();

    /** The number of single-partition txns per interval */
    final int singlepartition_ctrs[];

    /**
     * The number of single-partition txns with actual partitions that we
     * calculated This differs from incomplete txns where we have to mark them
     * as single-partition because we don't know what they're actually going to
     * do
     */
    final int singlepartition_with_partitions_ctrs[];

    /**
     *
     */
    final int multipartition_ctrs[];
    final int incomplete_txn_ctrs[];

    /**
     * When the Java doesn't execute on the same machine as where the queries go
     **/
    final int exec_mismatch_ctrs[];

    /** The number of partitions touched per interval **/
    final int partitions_touched[];
    final double interval_weights[];
    final double total_interval_txns[];
    final double total_interval_queries[];

    final double txn_skews[];
    final double exec_skews[];
    final double total_skews[];

    /**
     * This histogram is to keep track of those partitions that we need to add
     * to the access histogram in the entropy calculation. If a txn is
     * incomplete (i.e., they have queries that we did not calculate an estimate
     * for), then we need to mark it as going to all partitions. So we have to
     * make sure we don't count the partitions that we *do* know the incomplete
     * is going to more than once
     */
    final ObjectHistogram<Integer> incomplete_txn_histogram[];
    final ObjectHistogram<Integer> exec_histogram[];
    final ObjectHistogram<Integer> missing_txn_histogram[];

    /** Temporary Data Structures */
    final List<Integer> tmp_touched = new ArrayList<Integer>();
    final List<Long> tmp_total = new ArrayList<Long>();
    final List<Double> tmp_penalties = new ArrayList<Double>();
    final List<Long> tmp_potential = new ArrayList<Long>();

    /** Temporary mapping from intervals to Consumers */
    final Map<Integer, Consumer<Pair<TransactionTrace, Integer>>> tmp_consumers = new HashMap<Integer, Consumer<Pair<TransactionTrace, Integer>>>();

    /**
     * Constructor
     */
    @SuppressWarnings("unchecked")
    public TimeIntervalCostModel(CatalogContext catalogContext, Class<? extends T> inner_class, int num_intervals) {
        super(TimeIntervalCostModel.class, catalogContext, new PartitionEstimator(catalogContext));
        this.num_intervals = num_intervals;
        this.cost_models = (T[]) (new AbstractCostModel[num_intervals]);

        try {
            Constructor<?> constructor = ClassUtil.getConstructor(inner_class, Database.class, PartitionEstimator.class);
            for (int i = 0; i < this.cost_models.length; i++) {
                this.cost_models[i] = (T) constructor.newInstance(catalogContext, this.p_estimator);
            } // FOR
        } catch (Exception ex) {
            LOG.fatal("Failed to create the inner cost models", ex);
            System.exit(1);
        }
        assert (this.num_intervals > 0);
        if (trace.val)
            LOG.trace("TimeIntervalCostModel: " + this.num_intervals + " intervals");

        singlepartition_ctrs = new int[num_intervals];
        singlepartition_with_partitions_ctrs = new int[num_intervals];
        multipartition_ctrs = new int[num_intervals];
        incomplete_txn_ctrs = new int[num_intervals];
        exec_mismatch_ctrs = new int[num_intervals];
        partitions_touched = new int[num_intervals];
        interval_weights = new double[num_intervals];
        total_interval_txns = new double[num_intervals];
        total_interval_queries = new double[num_intervals];
        incomplete_txn_histogram = new ObjectHistogram[num_intervals];
        exec_histogram = new ObjectHistogram[num_intervals];
        missing_txn_histogram = new ObjectHistogram[num_intervals];

        txn_skews = new double[num_intervals];
        exec_skews = new double[num_intervals];
        total_skews = new double[num_intervals];

        for (int i = 0; i < num_intervals; i++) {
            incomplete_txn_histogram[i] = new ObjectHistogram<Integer>();
            exec_histogram[i] = new ObjectHistogram<Integer>();
            missing_txn_histogram[i] = new ObjectHistogram<Integer>();
        } // FOR
    }

    // @Override
    // public AbstractCostModel clone(CatalogContext catalogContext) throws
    // CloneNotSupportedException {
    // TimeIntervalCostModel<T> clone = new TimeIntervalCostModel<T>(catalog_db,
    // this.inner_class, this.cost_models.length);
    // return (clone);
    // }

    @Override
    public void applyDesignerHints(DesignerHints hints) {
        super.applyDesignerHints(hints);
        for (T cm : this.cost_models) {
            cm.applyDesignerHints(hints);
        } // FOR
    }

    public double getLastSkewCost() {
        return last_skew_cost;
    }

    public double getLastExecutionCost() {
        return last_execution_cost;
    }

    public Double getLastFinalCost() {
        return last_final_cost;
    }

    @Override
    public void clear(boolean force) {
        super.clear(force);
        if (force || this.isCachingEnabled() == false) {
            if (debug.val)
                LOG.debug("Clearing out all interval cost models");
            for (int i = 0; i < this.num_intervals; i++) {
                this.cost_models[i].clear(force);
            } // FOR
        }
        tmp_penalties.clear();
        tmp_potential.clear();
        tmp_total.clear();
        tmp_touched.clear();
    }

    @Override
    public void setCachingEnabled(boolean useCaching) {
        super.setCachingEnabled(useCaching);
        for (int i = 0; i < this.num_intervals; i++) {
            this.cost_models[i].setCachingEnabled(useCaching);
        } // FOR
        assert (this.use_caching == useCaching);
    }

    /**
     * Return the inner cost model for the given time interval
     *
     * @param interval
     * @return
     */
    public T getCostModel(int interval) {
        return (this.cost_models[interval]);
    }

    /**
     * Return the number of number of intervals
     *
     * @return
     */
    public int getIntevalCount() {
        return (this.cost_models.length);
    }

    @Override
    public void prepareImpl(final CatalogContext catalogContext) {
        this.all_partitions = catalogContext.getAllPartitionIds();
        assert (this.all_partitions.isEmpty() == false) : "No partitions???";

        for (int i = 0; i < num_intervals; i++) {
            this.cost_models[i].prepare(catalogContext);
            if (!this.use_caching) {
                this.cost_models[i].clear(true);
                assert (this.cost_models[i].getTxnPartitionAccessHistogram().isEmpty());
                assert (this.cost_models[i].getQueryPartitionAccessHistogram().isEmpty());
            }
        } // FOR

        // Note that we want to clear our counters but not our internal cost
        // model data structures
        this.clear();
    }

    /*
     * (non-Javadoc)
     * @see
     * edu.brown.costmodel.AbstractCostModel#estimateCost(org.voltdb.catalog
     * .Database, edu.brown.workload.TransactionTrace,
     * edu.brown.workload.AbstractWorkload.Filter)
     */
    @Override
    public double estimateTransactionCost(CatalogContext catalogContext, Workload workload, Filter filter, TransactionTrace xact) throws Exception {
        assert (workload != null) : "The workload handle is null";
        // First figure out the time interval of this
        int interval = workload.getTimeInterval(xact, this.cost_models.length);
        return (this.cost_models[interval].estimateTransactionCost(catalogContext, workload, filter, xact));
    }

    /**
     *
     */
    @Override
    protected double estimateWorkloadCostImpl(final CatalogContext catalogContext, final Workload workload, final Filter filter, final Double upper_bound) throws Exception {

        if (debug.val)
            LOG.debug("Calculating workload execution cost across " + num_intervals + " intervals for " + num_partitions + " partitions");

        // (1) Grab the costs at the different time intervals
        //     Also create the ratios that we will use to weight the interval costs
        final AtomicLong total_txns = new AtomicLong(0);

        // final HashSet<Long> trace_ids[] = new HashSet[num_intervals];
        for (int i = 0; i < num_intervals; i++) {
            total_interval_txns[i] = 0;
            total_interval_queries[i] = 0;
            singlepartition_ctrs[i] = 0;
            singlepartition_with_partitions_ctrs[i] = 0;
            multipartition_ctrs[i] = 0;
            partitions_touched[i] = 0;
            incomplete_txn_ctrs[i] = 0;
            exec_mismatch_ctrs[i] = 0;
            incomplete_txn_histogram[i].clear();
            missing_txn_histogram[i].clear();
            exec_histogram[i].clear();
        } // FOR

        // (2) Now go through the workload and estimate the partitions that each txn
        //     will touch for the given catalog setups
        if (trace.val) {
            LOG.trace("Total # of Txns in Workload: " + workload.getTransactionCount());
            if (filter != null)
                LOG.trace("Workload Filter Chain:       " + StringUtil.join("   ", "\n", filter.getFilters()));
        }

        // QUEUING THREAD
        tmp_consumers.clear();
        Producer<TransactionTrace, Pair<TransactionTrace, Integer>> producer = new Producer<TransactionTrace, Pair<TransactionTrace, Integer>>(CollectionUtil.iterable(workload.iterator(filter))) {
            @Override
            public Pair<Consumer<Pair<TransactionTrace, Integer>>, Pair<TransactionTrace, Integer>> transform(TransactionTrace txn_trace) {
                int i = workload.getTimeInterval(txn_trace, num_intervals);
                assert (i >= 0) : "Invalid time interval '" + i + "'\n" + txn_trace.debug(catalogContext.database);
                assert (i < num_intervals) : "Invalid interval: " + i + "\n" + txn_trace.debug(catalogContext.database);
                total_txns.incrementAndGet();
                Pair<TransactionTrace, Integer> p = Pair.of(txn_trace, i);
                return (Pair.of(tmp_consumers.get(i), p));
            }
        };

        // PROCESSING THREADS
        final int num_threads = ThreadUtil.getMaxGlobalThreads();
        int interval_ctr = 0;
        for (int thread = 0; thread < num_threads; thread++) {
            // First create a new IntervalProcessor/Consumer
            IntervalProcessor ip = new IntervalProcessor(catalogContext, workload, filter);

            // Then assign it to some number of intervals
            for (int i = 0, cnt = (int) Math.ceil(num_intervals / (double) num_threads); i < cnt; i++) {
                if (interval_ctr > num_intervals)
                    break;
                tmp_consumers.put(interval_ctr++, ip);
                if (trace.val)
                    LOG.trace(String.format("Interval #%02d => IntervalProcessor #%02d", interval_ctr - 1, thread));
            } // FOR

            // And make sure that we queue it up too
            producer.addConsumer(ip);
        } // FOR (threads)

        ThreadUtil.runGlobalPool(producer.getRunnablesList()); // BLOCKING
        if (debug.val) {
            int processed = 0;
            for (Consumer<?> c : producer.getConsumers()) {
                processed += c.getProcessedCounter();
            } // FOR
            assert (total_txns.get() == processed) : String.format("Expected[%d] != Processed[%d]", total_txns.get(), processed);
        }

        // We have to convert all of the costs into the range of [0.0, 1.0]
        // For each interval, divide the number of partitions touched by the total number
        // of partitions that the interval could have touched (worst case scenario)
        final double execution_costs[] = new double[num_intervals];
        StringBuilder sb = (this.isDebugEnabled() || debug.val ? new StringBuilder() : null);
        Map<String, Object> debug_m = null;
        if (sb != null) {
            debug_m = new LinkedHashMap<String, Object>();
        }

        if (debug.val)
            LOG.debug("Calculating execution cost for " + this.num_intervals + " intervals...");
        long total_multipartition_txns = 0;
        for (int i = 0; i < this.num_intervals; i++) {
            interval_weights[i] = total_interval_txns[i] / (double) total_txns.get();
            long total_txns_in_interval = (long) total_interval_txns[i];
            long total_queries_in_interval = (long) total_interval_queries[i];
            long num_txns = this.cost_models[i].txn_ctr.get();
            long potential_txn_touches = (total_txns_in_interval * num_partitions); // TXNS
            double penalty = 0.0d;
            total_multipartition_txns += multipartition_ctrs[i];

            // Divide the total number of partitions touched by...
            // This is the total number of partitions that we could have touched
            // in this interval
            // And this is the total number of partitions that we did actually touch
            if (multipartition_ctrs[i] > 0) {
                assert (partitions_touched[i] > 0) : "No touched partitions for interval " + i;
                double cost = (partitions_touched[i] / (double) potential_txn_touches);

                if (this.use_multitpartition_penalty) {
                    penalty = this.multipartition_penalty * (1.0d + (multipartition_ctrs[i] / (double) total_txns_in_interval));
                    assert (penalty >= 1.0) : "The multipartition penalty is less than one: " + penalty;
                    cost *= penalty;
                }
                execution_costs[i] = Math.min(cost, (double) potential_txn_touches);
            }

            // For each txn that wasn't even evaluated, add all of the
            // partitions to the incomplete histogram
            if (num_txns < total_txns_in_interval) {
                if (trace.val)
                    LOG.trace("Adding " + (total_txns_in_interval - num_txns) + " entries to the incomplete histogram for interval #" + i);
                for (long ii = num_txns; ii < total_txns_in_interval; ii++) {
                    missing_txn_histogram[i].put(all_partitions);
                } // WHILE
            }

            if (sb != null) {
                tmp_penalties.add(penalty);
                tmp_total.add(total_txns_in_interval);
                tmp_touched.add(partitions_touched[i]);
                tmp_potential.add(potential_txn_touches);

                Map<String, Object> inner = new LinkedHashMap<String, Object>();
                inner.put("Partitions Touched", partitions_touched[i]);
                inner.put("Potential Touched", potential_txn_touches);
                inner.put("Multi-Partition Txns", multipartition_ctrs[i]);
                inner.put("Total Txns", total_txns_in_interval);
                inner.put("Total Queries", total_queries_in_interval);
                inner.put("Missing Txns", (total_txns_in_interval - num_txns));
                inner.put("Cost", String.format("%.05f", execution_costs[i]));
                inner.put("Exec Txns", exec_histogram[i].getSampleCount());
                debug_m.put("Interval #" + i, inner);
            }
        } // FOR

        if (sb != null) {
            Map<String, Object> m0 = new LinkedHashMap<String, Object>();
            m0.put("SinglePartition Txns", (total_txns.get() - total_multipartition_txns));
            m0.put("MultiPartition Txns", total_multipartition_txns);
            m0.put("Total Txns", String.format("%d [%.06f]", total_txns.get(), (1.0d - (total_multipartition_txns / (double) total_txns.get()))));

            Map<String, Object> m1 = new LinkedHashMap<String, Object>();
            m1.put("Touched Partitions", tmp_touched);
            m1.put("Potential Partitions", tmp_potential);
            m1.put("Total Partitions", tmp_total);
            m1.put("Penalties", tmp_penalties);

            sb.append(StringUtil.formatMaps(debug_m, m0, m1));
            if (debug.val)
                LOG.debug("**** Execution Cost ****\n" + sb);
            this.appendDebugMessage(sb);
        }

        // LOG.debug("Execution By Intervals:\n" + sb.toString());

        // (3) We then need to go through and grab the histograms of partitions were accessed
        if (sb != null) {
            if (debug.val)
                LOG.debug("Calculating skew factor for " + this.num_intervals + " intervals...");
            debug_histograms.clear();
            sb = new StringBuilder();
        }
        for (int i = 0; i < this.num_intervals; i++) {
            Histogram<Integer> histogram_txn = this.cost_models[i].getTxnPartitionAccessHistogram();
            Histogram<Integer> histogram_query = this.cost_models[i].getQueryPartitionAccessHistogram();
            this.histogram_query_partitions.put(histogram_query);
            long num_queries = this.cost_models[i].query_ctr.get();
            this.query_ctr.addAndGet(num_queries);

            // DEBUG
            SingleSitedCostModel inner_costModel = (SingleSitedCostModel) this.cost_models[i];
            boolean is_valid = (partitions_touched[i] + singlepartition_with_partitions_ctrs[i]) == (this.cost_models[i].getTxnPartitionAccessHistogram().getSampleCount() + exec_mismatch_ctrs[i]);
            if (!is_valid) {
                LOG.error("Transaction Entries: " + inner_costModel.getTransactionCacheEntries().size());
                ObjectHistogram<Integer> check = new ObjectHistogram<Integer>();
                for (TransactionCacheEntry tce : inner_costModel.getTransactionCacheEntries()) {
                    check.put(tce.getTouchedPartitions());
                    // LOG.error(tce.debug() + "\n");
                }
                LOG.error("Check Touched Partitions: sample=" + check.getSampleCount() + ", values=" + check.getValueCount());
                LOG.error("Cache Touched Partitions: sample=" + this.cost_models[i].getTxnPartitionAccessHistogram().getSampleCount() + ", values="
                        + this.cost_models[i].getTxnPartitionAccessHistogram().getValueCount());

                int qtotal = inner_costModel.getAllQueryCacheEntries().size();
                int ctr = 0;
                int multip = 0;
                for (QueryCacheEntry qce : inner_costModel.getAllQueryCacheEntries()) {
                    ctr += (qce.getAllPartitions().isEmpty() ? 0 : 1);
                    multip += (qce.getAllPartitions().size() > 1 ? 1 : 0);
                } // FOR
                LOG.error("# of QueryCacheEntries with Touched Partitions: " + ctr + " / " + qtotal);
                LOG.error("# of MultiP QueryCacheEntries: " + multip);
            }
            assert (is_valid) : String.format("Partitions Touched by Txns Mismatch in Interval #%d\n" + "(partitions_touched[%d] + singlepartition_with_partitions_ctrs[%d]) != "
                    + "(histogram_txn[%d] + exec_mismatch_ctrs[%d])", i, partitions_touched[i], singlepartition_with_partitions_ctrs[i], this.cost_models[i].getTxnPartitionAccessHistogram()
                    .getSampleCount(), exec_mismatch_ctrs[i]);

            this.histogram_java_partitions.put(this.cost_models[i].getJavaExecutionHistogram());
            this.histogram_txn_partitions.put(histogram_txn);
            long num_txns = this.cost_models[i].txn_ctr.get();
            assert (num_txns >= 0) : "The transaction counter at interval #" + i + " is " + num_txns;
            this.txn_ctr.addAndGet(num_txns);

            // Calculate the skew factor at this time interval
            // XXX: Should the number of txns be the total number of unique txns
            //      that were executed or the total number of times a txn touched the partitions?
            // XXX: What do we do when the number of elements that we are examining is zero?
            //      I guess the cost just needs to be zero?
            // XXX: What histogram do we want to use?
            target_histogram.clear();
            target_histogram.put(histogram_txn);

            // For each txn that we haven't gotten an estimate for at this interval,
            // we're going mark it as being broadcast to all partitions. That way the access
            // histogram will look uniform. Then as more information is added, we will
            // This is an attempt to make sure that the skew cost never decreases but only increases
            long total_txns_in_interval = (long) total_interval_txns[i];
            if (sb != null) {
                debug_histograms.put("Incomplete Txns", incomplete_txn_histogram[i]);
                debug_histograms.put("Missing Txns", missing_txn_histogram[i]);
                debug_histograms.put("Target Partitions (BEFORE)", new ObjectHistogram<Integer>(target_histogram));
                debug_histograms.put("Target Partitions (AFTER)", target_histogram);
            }

            // Merge the values from incomplete histogram into the target
            // histogram
            target_histogram.put(incomplete_txn_histogram[i]);
            target_histogram.put(missing_txn_histogram[i]);
            exec_histogram[i].put(missing_txn_histogram[i]);

            long num_elements = target_histogram.getSampleCount();

            // The number of partition touches should never be greater than our
            // potential touches
            assert(num_elements <= (total_txns_in_interval * num_partitions)) :
                "New Partitions Touched Sample Count [" + num_elements + "] < " +
                "Maximum Potential Touched Count [" + (total_txns_in_interval * num_partitions) + "]";

            if (sb != null) {
                Map<String, Object> m = new LinkedHashMap<String, Object>();
                for (String key : debug_histograms.keySet()) {
                    ObjectHistogram<?> h = debug_histograms.get(key);
                    m.put(key, String.format("[Sample=%d, Value=%d]\n%s", h.getSampleCount(), h.getValueCount(), h));
                } // FOR
                sb.append(String.format("INTERVAL #%d [total_txns_in_interval=%d, num_txns=%d, incomplete_txns=%d]\n%s", i, total_txns_in_interval, num_txns, incomplete_txn_ctrs[i],
                        StringUtil.formatMaps(m)));
            }

            // Txn Skew
            if (num_elements == 0) {
                txn_skews[i] = 0.0d;
            } else {
                txn_skews[i] = SkewFactorUtil.calculateSkew(num_partitions, num_elements, target_histogram);
            }

            // Exec Skew
            if (exec_histogram[i].getSampleCount() == 0) {
                exec_skews[i] = 0.0d;
            } else {
                exec_skews[i] = SkewFactorUtil.calculateSkew(num_partitions, exec_histogram[i].getSampleCount(), exec_histogram[i]);
            }
            total_skews[i] = (0.5 * exec_skews[i]) + (0.5 * txn_skews[i]);

            if (sb != null) {
                sb.append("Txn Skew   = " + MathUtil.roundToDecimals(txn_skews[i], 6) + "\n");
                sb.append("Exec Skew  = " + MathUtil.roundToDecimals(exec_skews[i], 6) + "\n");
                sb.append("Total Skew = " + MathUtil.roundToDecimals(total_skews[i], 6) + "\n");
                sb.append(StringUtil.DOUBLE_LINE);
            }
        } // FOR
        if (sb != null && sb.length() > 0) {
            if (debug.val)
                LOG.debug("**** Skew Factor ****\n" + sb);
            this.appendDebugMessage(sb);
        }
        if (trace.val) {
            for (int i = 0; i < num_intervals; i++) {
                LOG.trace("Time Interval #" + i + "\n" + "Total # of Txns: " + this.cost_models[i].txn_ctr.get() + "\n" + "Multi-Partition Txns: " + multipartition_ctrs[i] + "\n" + "Execution Cost: "
                        + execution_costs[i] + "\n" + "ProcHistogram:\n" + this.cost_models[i].getProcedureHistogram().toString() + "\n" +
                        // "TransactionsPerPartitionHistogram:\n" +
                        // this.cost_models[i].getTxnPartitionAccessHistogram()
                        // + "\n" +
                        StringUtil.SINGLE_LINE);
            }
        }

        // (3) We can now calculate the final total estimate cost of this workload as the following
        //     Just take the simple ratio of mp txns / all txns
        this.last_execution_cost = MathUtil.weightedMean(execution_costs, total_interval_txns); // MathUtil.roundToDecimals(MathUtil.geometricMean(execution_costs,
                                                                                                // MathUtil.GEOMETRIC_MEAN_ZERO),
                                                                                                // 10);

        // The final skew cost needs to be weighted by the percentage of txns running in that interval
        // This will cause the partitions with few txns
        this.last_skew_cost = MathUtil.weightedMean(total_skews, total_interval_txns); // roundToDecimals(MathUtil.geometricMean(entropies,
                                                                                       // MathUtil.GEOMETRIC_MEAN_ZERO),
                                                                                       // 10);
        double new_final_cost = (this.use_execution ? (this.execution_weight * this.last_execution_cost) : 0) + (this.use_skew ? (this.skew_weight * this.last_skew_cost) : 0);

        if (sb != null) {
            Map<String, Object> m = new LinkedHashMap<String, Object>();
            m.put("Total Txns", total_txns.get());
            m.put("Interval Txns", Arrays.toString(total_interval_txns));
            m.put("Execution Costs", Arrays.toString(execution_costs));
            m.put("Skew Factors", Arrays.toString(total_skews));
            m.put("Txn Skew", Arrays.toString(txn_skews));
            m.put("Exec Skew", Arrays.toString(exec_skews));
            m.put("Interval Weights", Arrays.toString(interval_weights));
            m.put("Final Cost", String.format("%f = %f + %f", new_final_cost, this.last_execution_cost, this.last_skew_cost));
            if (debug.val)
                LOG.debug(StringUtil.formatMaps(m));
            this.appendDebugMessage(StringUtil.formatMaps(m));
        }

        this.last_final_cost = new_final_cost;
        return (MathUtil.roundToDecimals(this.last_final_cost, 5));
    }

    /**
     *
     */
    private class IntervalProcessor extends Consumer<Pair<TransactionTrace, Integer>> {

        final Set<Integer> tmp_missingPartitions = new HashSet<Integer>();
        final CatalogContext catalogContext;
        final Workload workload;
        final Filter filter;

        public IntervalProcessor(CatalogContext catalogContext, final Workload workload, final Filter filter) {
            this.catalogContext = catalogContext;
            this.workload = workload;
            this.filter = filter;
        }

        @Override
        public void process(Pair<TransactionTrace, Integer> p) {
            assert (p != null);
            final TransactionTrace txn_trace = p.getFirst();
            final int i = p.getSecond(); // Interval
            final int txn_weight = (use_txn_weights ? txn_trace.getWeight() : 1);
            final String proc_key = CatalogKey.createKey(CatalogUtil.DEFAULT_DATABASE_NAME, txn_trace.getCatalogItemName());

            // Terrible Hack: Assume that we are using the SingleSitedCostModel
            // and that
            // it will return fixed values based on whether the txn is
            // single-partitioned or not
            SingleSitedCostModel singlesited_cost_model = (SingleSitedCostModel) cost_models[i];

            total_interval_txns[i] += txn_weight;
            total_interval_queries[i] += (txn_trace.getQueryCount() * txn_weight);
            histogram_procs.put(proc_key, txn_weight);

            try {
                singlesited_cost_model.estimateTransactionCost(catalogContext, workload, filter, txn_trace);
                TransactionCacheEntry txn_entry = singlesited_cost_model.getTransactionCacheEntry(txn_trace);
                assert (txn_entry != null) : "No txn entry for " + txn_trace;
                Collection<Integer> partitions = txn_entry.getTouchedPartitions();

                // If the txn runs on only one partition, then the cost is
                // nothing
                if (txn_entry.isSinglePartitioned()) {
                    singlepartition_ctrs[i] += txn_weight;
                    if (!partitions.isEmpty()) {
                        assert (txn_entry.getAllTouchedPartitionsHistogram().getValueCount() == 1) : txn_entry + " says it was single-partitioned but the partition count says otherwise:\n"
                                + txn_entry.debug();
                        singlepartition_with_partitions_ctrs[i] += txn_weight;
                    }
                    histogram_sp_procs.put(proc_key, txn_weight);

                    // If the txn runs on multiple partitions, then the cost
                    // is...
                    // XXX 2010-06-28: The number of partitions that the txn
                    // touches divided by the total number of partitions
                    // XXX 2010-07-02: The histogram for the total number of
                    // partitions touched by all of the queries
                    // in the transaction. This ensures that txns with just one
                    // multi-partition query
                    // isn't weighted the same as a txn with many
                    // multi-partition queries
                } else {
                    assert (!partitions.isEmpty()) : "No touched partitions for " + txn_trace;
                    if (partitions.size() == 1 && txn_entry.getExecutionPartition() != HStoreConstants.NULL_PARTITION_ID) {
                        assert (CollectionUtil.first(partitions) != txn_entry.getExecutionPartition()) : txn_entry.debug();
                        exec_mismatch_ctrs[i] += txn_weight;
                        partitions_touched[i] += txn_weight;
                    } else {
                        assert (partitions.size() > 1) : String.format("%s is not marked as single-partition but it only touches one partition\n%s", txn_trace, txn_entry.debug());
                    }
                    partitions_touched[i] += (partitions.size() * txn_weight); // Txns
                    multipartition_ctrs[i] += txn_weight;
                    histogram_mp_procs.put(proc_key, txn_weight);
                }
                Integer base_partition = txn_entry.getExecutionPartition();
                if (base_partition != null) {
                    exec_histogram[i].put(base_partition, txn_weight);
                } else {
                    exec_histogram[i].put(all_partitions, txn_weight);
                }
                if (debug.val) { // &&
                                   // txn_trace.getCatalogItemName().equalsIgnoreCase("DeleteCallForwarding"))
                                   // {
                    Procedure catalog_proc = txn_trace.getCatalogItem(catalogContext.database);
                    Map<String, Object> inner = new LinkedHashMap<String, Object>();
                    for (Statement catalog_stmt : catalog_proc.getStatements()) {
                        inner.put(catalog_stmt.fullName(), CatalogUtil.getReferencedTables(catalog_stmt));
                    }

                    Map<String, Object> m = new LinkedHashMap<String, Object>();
                    m.put(txn_trace.toString(), null);
                    m.put("Interval", i);
                    m.put("Single-Partition", txn_entry.isSinglePartitioned());
                    m.put("Base Partition", base_partition);
                    m.put("Touched Partitions", partitions);
                    m.put(catalog_proc.fullName(), inner);
                    LOG.debug(StringUtil.formatMaps(m));
                }

                // We need to keep a count of the number txns that didn't have
                // all of its queries estimated
                // completely so that we can update the access histograms down
                // below for entropy calculations
                // Note that this is at the txn level, not the query level.
                if (!txn_entry.isComplete()) {
                    incomplete_txn_ctrs[i] += txn_weight;
                    tmp_missingPartitions.clear();
                    tmp_missingPartitions.addAll(all_partitions);
                    tmp_missingPartitions.removeAll(txn_entry.getTouchedPartitions());
                    // Update the histogram for this interval to keep track of
                    // how many times we need to
                    // increase the partition access histogram
                    incomplete_txn_histogram[i].put(tmp_missingPartitions, txn_weight);
                    if (trace.val) {
                        Map<String, Object> m = new LinkedHashMap<String, Object>();
                        m.put(String.format("Marking %s as incomplete in interval #%d", txn_trace, i), null);
                        m.put("Examined Queries", txn_entry.getExaminedQueryCount());
                        m.put("Total Queries", txn_entry.getTotalQueryCount());
                        m.put("Touched Partitions", txn_entry.getTouchedPartitions());
                        m.put("Missing Partitions", tmp_missingPartitions);
                        LOG.trace(StringUtil.formatMaps(m));
                    }
                }
            } catch (Exception ex) {
                CatalogUtil.saveCatalog(catalogContext.catalog, CatalogUtil.CATALOG_FILENAME);
                throw new RuntimeException("Failed to estimate cost for " + txn_trace.getCatalogItemName() + " at interval " + i, ex);
            }
        }
    }

    /*
     * (non-Javadoc)
     * @see
     * edu.brown.costmodel.AbstractCostModel#invalidateCache(java.lang.String)
     */
    @Override
    public void invalidateCache(String catalog_key) {
        for (T cm : this.cost_models) {
            cm.invalidateCache(catalog_key);
        } // FOR
    }

    /**
     * MAIN!
     *
     * @param vargs
     * @throws Exception
     */
    public static void main(String[] vargs) throws Exception {
        ArgumentsParser args = ArgumentsParser.load(vargs);
        args.require(ArgumentsParser.PARAM_CATALOG, ArgumentsParser.PARAM_WORKLOAD, ArgumentsParser.PARAM_PARTITION_PLAN, ArgumentsParser.PARAM_DESIGNER_INTERVALS
        // ArgumentsParser.PARAM_DESIGNER_HINTS
        );
        assert (args.workload.getTransactionCount() > 0) : "No transactions were loaded from " + args.workload;

        if (args.hasParam(ArgumentsParser.PARAM_CATALOG_HOSTS)) {
            ClusterConfiguration cc = new ClusterConfiguration(args.getParam(ArgumentsParser.PARAM_CATALOG_HOSTS));
            args.updateCatalog(FixCatalog.cloneCatalog(args.catalog, cc), null);
        }

        // If given a PartitionPlan, then update the catalog
        File pplan_path = new File(args.getParam(ArgumentsParser.PARAM_PARTITION_PLAN));
        if (pplan_path.exists()) {
            PartitionPlan pplan = new PartitionPlan();
            pplan.load(pplan_path, args.catalog_db);
            if (args.getBooleanParam(ArgumentsParser.PARAM_PARTITION_PLAN_REMOVE_PROCS, false)) {
                for (Procedure catalog_proc : pplan.proc_entries.keySet()) {
                    pplan.setNullProcParameter(catalog_proc);
                } // FOR
            }
            if (args.getBooleanParam(ArgumentsParser.PARAM_PARTITION_PLAN_RANDOM_PROCS, false)) {
                for (Procedure catalog_proc : pplan.proc_entries.keySet()) {
                    pplan.setRandomProcParameter(catalog_proc);
                } // FOR
            }
            pplan.apply(args.catalog_db);
            System.out.println("Applied PartitionPlan '" + pplan_path + "' to catalog\n" + pplan);
            System.out.print(StringUtil.DOUBLE_LINE);

            if (args.hasParam(ArgumentsParser.PARAM_PARTITION_PLAN_OUTPUT)) {
                String output = args.getParam(ArgumentsParser.PARAM_PARTITION_PLAN_OUTPUT);
                if (output.equals("-"))
                    output = pplan_path.getAbsolutePath();
                pplan.save(new File(output));
                System.out.println("Saved PartitionPlan to '" + output + "'");
            }
        } else {
            System.err.println("PartitionPlan file '" + pplan_path + "' does not exist. Ignoring...");
        }
        System.out.flush();

        int num_intervals = args.num_intervals; // getIntParam(ArgumentsParser.PARAM_DESIGNER_INTERVALS);
        TimeIntervalCostModel<SingleSitedCostModel> costmodel = new TimeIntervalCostModel<SingleSitedCostModel>(args.catalogContext, SingleSitedCostModel.class, num_intervals);
        if (args.hasParam(ArgumentsParser.PARAM_DESIGNER_HINTS))
            costmodel.applyDesignerHints(args.designer_hints);
        double cost = costmodel.estimateWorkloadCost(args.catalogContext, args.workload);

        Map<String, Object> m = new LinkedHashMap<String, Object>();
        m.put("PARTITIONS", args.catalogContext.numberOfPartitions);
        m.put("INTERVALS", args.num_intervals);
        m.put("EXEC COST", costmodel.last_execution_cost);
        m.put("SKEW COST", costmodel.last_skew_cost);
        m.put("TOTAL COST", cost);
        m.put("PARTITIONS TOUCHED", costmodel.getTxnPartitionAccessHistogram().getSampleCount());
        System.out.println(StringUtil.formatMaps(m));

        // long total = 0;
        m.clear();
        // for (int i = 0; i < num_intervals; i++) {
        // SingleSitedCostModel cm = costmodel.getCostModel(i);
        // Histogram<Integer> h = cm.getTxnPartitionAccessHistogram();
        // m.put(String.format("Interval %02d", i),
        // cm.getTxnPartitionAccessHistogram());
        // total += h.getSampleCount();
        // h.setKeepZeroEntries(true);
        // for (Integer partition :
        // CatalogUtil.getAllPartitionIds(args.catalog_db)) {
        // if (h.contains(partition) == false) h.put(partition, 0);
        // }
        // System.out.println(StringUtil.box("Interval #" + i, "+", 100) + "\n"
        // + h);
        // System.out.println();
        // } // FOR
        // System.out.println(StringUtil.formatMaps(m));
        // System.err.println("TOTAL: " + total);

    }
}
TOP

Related Classes of edu.brown.costmodel.TimeIntervalCostModel$IntervalProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.