/**
*
*/
package edu.brown.designer.partitioners;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections15.map.ListOrderedMap;
import org.apache.log4j.Logger;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.ProcParameter;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Table;
import org.voltdb.types.PartitionMethodType;
import edu.brown.catalog.CatalogKey;
import edu.brown.catalog.CatalogUtil;
import edu.brown.catalog.special.ReplicatedColumn;
import edu.brown.designer.AccessGraph;
import edu.brown.designer.Designer;
import edu.brown.designer.DesignerEdge;
import edu.brown.designer.DesignerHints;
import edu.brown.designer.DesignerInfo;
import edu.brown.designer.DesignerVertex;
import edu.brown.designer.partitioners.plan.PartitionPlan;
import edu.brown.designer.partitioners.plan.ProcedureEntry;
import edu.brown.designer.partitioners.plan.TableEntry;
import edu.brown.gui.common.GraphVisualizationPanel;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.statistics.Histogram;
import edu.brown.statistics.ObjectHistogram;
import edu.brown.statistics.TableStatistics;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.EventObservable;
import edu.brown.utils.EventObserver;
import edu.brown.utils.PredicatePairs;
import edu.brown.utils.StringUtil;
/**
* @author pavlo
*/
public class MostPopularPartitioner extends AbstractPartitioner {
private static final Logger LOG = Logger.getLogger(MostPopularPartitioner.class);
private static final LoggerBoolean debug = new LoggerBoolean();
private static final LoggerBoolean trace = new LoggerBoolean();
static {
LoggerUtil.attachObserver(LOG, debug, trace);
}
private Long last_memory = null;
/**
* @param designer
* @param info
*/
public MostPopularPartitioner(Designer designer, DesignerInfo info) {
super(designer, info);
}
public Long getLastMemory() {
return last_memory;
}
/*
* (non-Javadoc)
* @see
* edu.brown.designer.partitioners.AbstractPartitioner#generate(edu.brown
* .designer.DesignerHints)
*/
@Override
public PartitionPlan generate(DesignerHints hints) throws Exception {
final PartitionPlan pplan = new PartitionPlan();
// Generate an AccessGraph and select the column with the greatest
// weight for each table
final AccessGraph agraph = this.generateAccessGraph();
final boolean calculate_memory = (hints.force_replication_size_limit != null && hints.max_memory_per_partition != 0);
double total_partitionRatio = 0.0;
long total_partitionSize = 0l;
for (DesignerVertex v : agraph.getVertices()) {
Table catalog_tbl = v.getCatalogItem();
String table_key = CatalogKey.createKey(catalog_tbl);
Collection<Column> forced_columns = hints.getForcedTablePartitionCandidates(catalog_tbl);
TableStatistics ts = info.stats.getTableStatistics(catalog_tbl);
assert (ts != null) : "Null TableStatistics for " + catalog_tbl;
double partition_size = (calculate_memory ? (ts.tuple_size_total / (double) info.getNumPartitions()) : 0);
double partition_ratio = (calculate_memory ? (ts.tuple_size_total / (double) hints.max_memory_per_partition) : 0);
TableEntry pentry = null;
if (debug.val) {
Map<String, Object> m = new ListOrderedMap<String, Object>();
m.put("Read Only", ts.readonly);
m.put("Table Size", StringUtil.formatSize(ts.tuple_size_total));
m.put("Table Partition Size", StringUtil.formatSize((long)partition_size));
m.put("Table Partition Ratio", String.format("%.02f", partition_ratio));
m.put("Total Partition Size", String.format("%s / %s", StringUtil.formatSize(total_partitionSize), StringUtil.formatSize(hints.max_memory_per_partition)));
m.put("Total Partition Ratio", String.format("%.02f", total_partitionRatio));
LOG.debug(String.format("%s\n%s", catalog_tbl.getName(), StringUtil.formatMaps(m)));
}
// -------------------------------
// Replication
// -------------------------------
if (hints.force_replication.contains(table_key) || (calculate_memory && ts.readonly && hints.enable_replication_readonly && partition_ratio <= hints.force_replication_size_limit)) {
total_partitionRatio += partition_ratio;
total_partitionSize += ts.tuple_size_total;
Column catalog_col = ReplicatedColumn.get(catalog_tbl);
pentry = new TableEntry(PartitionMethodType.REPLICATION, catalog_col);
if (debug.val)
LOG.debug(String.format("Replicating %s at all partitions [%s]", catalog_tbl.getName(), catalog_col.fullName()));
// -------------------------------
// Forced Selection
// -------------------------------
} else if (forced_columns.isEmpty() == false) {
// Assume there is only one candidate
assert (forced_columns.size() == 1) : "Unexpected number of forced columns: " + forced_columns;
Column catalog_col = CollectionUtil.first(forced_columns);
pentry = new TableEntry(PartitionMethodType.HASH, catalog_col);
total_partitionRatio += partition_size / (double) hints.max_memory_per_partition;
total_partitionSize += partition_size;
if (debug.val)
LOG.debug(String.format("Forcing %s to be partitioned by specific column [%s]", catalog_tbl.getName(), catalog_col.fullName()));
// -------------------------------
// Select Most Popular
// -------------------------------
} else {
// If there are no edges, then we'll just randomly pick a column
// since it doesn't matter
final Collection<DesignerEdge> edges = agraph.getIncidentEdges(v);
if (edges.isEmpty())
continue;
if (trace.val)
LOG.trace(catalog_tbl + " has " + edges.size() + " edges in AccessGraph");
ObjectHistogram<Column> column_histogram = null;
ObjectHistogram<Column> join_column_histogram = new ObjectHistogram<Column>();
ObjectHistogram<Column> self_column_histogram = new ObjectHistogram<Column>();
// Map<Column, Double> unsorted = new HashMap<Column, Double>();
for (DesignerEdge e : edges) {
Collection<DesignerVertex> vertices = agraph.getIncidentVertices(e);
DesignerVertex v0 = CollectionUtil.get(vertices, 0);
DesignerVertex v1 = CollectionUtil.get(vertices, 1);
boolean self = (v0.equals(v) && v1.equals(v));
column_histogram = (self ? self_column_histogram : join_column_histogram);
double edge_weight = e.getTotalWeight();
PredicatePairs cset = e.getAttribute(AccessGraph.EdgeAttributes.COLUMNSET);
if (trace.val)
LOG.trace("Examining ColumnSet for " + e.toString(true));
Histogram<Column> cset_histogram = cset.buildHistogramForType(Column.class);
Collection<Column> columns = cset_histogram.values();
if (trace.val)
LOG.trace("Constructed Histogram for " + catalog_tbl + " from ColumnSet:\n"
+ cset_histogram.setDebugLabels(CatalogUtil.getHistogramLabels(cset_histogram.values())).toString(100, 50));
for (Column catalog_col : columns) {
if (!catalog_col.getParent().equals(catalog_tbl))
continue;
if (catalog_col.getNullable())
continue;
long cnt = cset_histogram.get(catalog_col);
if (trace.val)
LOG.trace("Found Match: " + catalog_col.fullName() + " [cnt=" + cnt + "]");
column_histogram.put(catalog_col, Math.round(cnt * edge_weight));
} // FOR
// System.err.println(cset.debug());
// LOG.info("[" + e.getTotalWeight() + "]: " + cset);
} // FOR
// If there were no join columns, then use the self-reference
// histogram
column_histogram = (join_column_histogram.isEmpty() ? self_column_histogram : join_column_histogram);
if (column_histogram.isEmpty()) {
EventObserver<DesignerVertex> observer = new EventObserver<DesignerVertex>() {
@Override
public void update(EventObservable<DesignerVertex> o, DesignerVertex v) {
for (DesignerEdge e : agraph.getIncidentEdges(v)) {
LOG.info(e.getAttribute(AccessGraph.EdgeAttributes.COLUMNSET));
}
LOG.info(StringUtil.repeat("-", 100));
}
};
LOG.info("Edges: " + edges);
GraphVisualizationPanel.createFrame(agraph, observer).setVisible(true);
// ThreadUtil.sleep(10000);
}
// We might not find anything if we are calculating the lower
// bounds using only one transaction
// if (column_histogram.isEmpty()) {
// if (trace.val)
// LOG.trace("Failed to find any ColumnSets for " +
// catalog_tbl);
// continue;
// }
assert (!column_histogram.isEmpty()) : "Failed to find any ColumnSets for " + catalog_tbl;
if (trace.val)
LOG.trace("Column Histogram:\n" + column_histogram);
Column catalog_col = CollectionUtil.first(column_histogram.getMaxCountValues());
pentry = new TableEntry(PartitionMethodType.HASH, catalog_col, null, null);
total_partitionRatio += partition_size / (double) hints.max_memory_per_partition;
total_partitionSize += partition_size;
if (debug.val)
LOG.debug(String.format("Selected %s's most popular column for partitioning [%s]", catalog_tbl.getName(), catalog_col.fullName()));
}
pplan.table_entries.put(catalog_tbl, pentry);
if (debug.val)
LOG.debug(String.format("Current Partition Size: %s", StringUtil.formatSize(total_partitionSize), StringUtil.formatSize(hints.max_memory_per_partition)));
assert (total_partitionRatio <= 1) : String.format("Too much memory per partition: %s / %s", StringUtil.formatSize(total_partitionSize),
StringUtil.formatSize(hints.max_memory_per_partition));
} // FOR
for (Table catalog_tbl : info.catalogContext.database.getTables()) {
if (pplan.getTableEntry(catalog_tbl) == null) {
Column catalog_col = CollectionUtil.random(catalog_tbl.getColumns());
assert (catalog_col != null) : "Failed to randomly pick column for " + catalog_tbl;
pplan.table_entries.put(catalog_tbl, new TableEntry(PartitionMethodType.HASH, catalog_col, null, null));
if (debug.val)
LOG.debug(String.format("No partitioning column selected for %s. Choosing a random attribute [%s]", catalog_tbl, catalog_col.fullName()));
}
} // FOR
if (hints.enable_procparameter_search) {
if (debug.val)
LOG.debug("Selecting partitioning ProcParameter for " + this.info.catalogContext.database.getProcedures().size() + " Procedures");
pplan.apply(info.catalogContext.database);
// Temporarily disable multi-attribute parameters
boolean multiproc_orig = hints.enable_multi_partitioning;
hints.enable_multi_partitioning = false;
for (Procedure catalog_proc : this.info.catalogContext.database.getProcedures()) {
if (PartitionerUtil.shouldIgnoreProcedure(hints, catalog_proc))
continue;
Set<String> param_order = PartitionerUtil.generateProcParameterOrder(info, info.catalogContext.database, catalog_proc, hints);
if (param_order.isEmpty() == false) {
ProcParameter catalog_proc_param = CatalogKey.getFromKey(info.catalogContext.database, CollectionUtil.first(param_order), ProcParameter.class);
if (debug.val)
LOG.debug(String.format("PARTITION %-25s%s", catalog_proc.getName(), CatalogUtil.getDisplayName(catalog_proc_param)));
// Create a new PartitionEntry for this procedure and set it
// to be always single-partitioned
// We will check down below whether that's always true or
// not
ProcedureEntry pentry = new ProcedureEntry(PartitionMethodType.HASH, catalog_proc_param, true);
pplan.getProcedureEntries().put(catalog_proc, pentry);
}
} // FOR
hints.enable_multi_partitioning = multiproc_orig;
}
this.setProcedureSinglePartitionFlags(pplan, hints);
this.last_memory = total_partitionSize;
return (pplan);
}
}