package org.voltdb.planner;
import java.io.File;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.collections15.CollectionUtils;
import org.apache.log4j.Logger;
import org.voltdb.catalog.Catalog;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.MaterializedViewInfo;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Statement;
import org.voltdb.catalog.Table;
import org.voltdb.compiler.StatementCompiler;
import org.voltdb.types.QueryType;
import edu.brown.benchmark.AbstractProjectBuilder;
import edu.brown.catalog.CatalogUtil;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.plannodes.PlanNodeUtil;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.FileUtil;
import edu.brown.utils.StringUtil;
public class VerticalPartitionPlanner {
private static final Logger LOG = Logger.getLogger(VerticalPartitionPlanner.class);
private static final LoggerBoolean debug = new LoggerBoolean();
private static final LoggerBoolean trace = new LoggerBoolean();
static {
LoggerUtil.attachObserver(LOG, debug, trace);
}
private static final Pattern SELECT_REGEX = Pattern.compile("SELECT[\\s]+(.*?)[\\s]+FROM[\\s]+(.*?)[\\s]+WHERE[\\s]+(.*)", Pattern.CASE_INSENSITIVE);
private static final Pattern FROM_SPLIT = Pattern.compile(",");
private static final Pattern FROM_REGEX = Pattern.compile("[\\s]+AS[\\s]+", Pattern.CASE_INSENSITIVE);
// ======================================================================================
// INTERNAL DATA MEMBERS
// ======================================================================================
/** convenience pointer to the database object in the catalog */
private final Database catalog_db;
private final Map<Table, MaterializedViewInfo> vp_views = new HashMap<Table, MaterializedViewInfo>();
private final Map<Table, Collection<Column>> vp_columns = new HashMap<Table, Collection<Column>>();
/** New schema DDL that contains all of the vertical partitions */
private final String ddl;
/**
* For each Statement, maintain a mapping of the tables to rewrite, as well
* as the new rewritten SQL
*/
private class StatementRewrite extends HashMap<Table, Table> {
private static final long serialVersionUID = 1L;
private String sql;
}
private final Map<Statement, StatementRewrite> stmt_rewrites = new HashMap<Statement, StatementRewrite>();
/**
* Internal ProjectBuilder that we'll use to get new query plans that use
* the Vertical Partitions
*/
private final VPPlannerProjectBuilder projectBuilder;
// ======================================================================================
// CONSTRUCTOR
// ======================================================================================
/**
* @param catalog_db
* Catalog info about schema, metadata and procedures
*/
public VerticalPartitionPlanner(Database catalog_db, MaterializedViewInfo...catalog_views) {
this.catalog_db = catalog_db;
// Construct a DDL that includes the vertical partitions
this.ddl = CatalogUtil.toSchema(catalog_db, false);
// We will use this ProjectBuilder to generate new query plans
this.projectBuilder = new VPPlannerProjectBuilder();
for (MaterializedViewInfo catalog_view : catalog_views) {
this.addVerticalPartition(catalog_view);
} // FOR
}
public VerticalPartitionPlanner(Database catalog_db, boolean addAll) {
this(catalog_db);
if (addAll) this.addAllVerticalPartitions();
}
public VerticalPartitionPlanner addAllVerticalPartitions() {
for (MaterializedViewInfo catalog_view : CatalogUtil.getVerticallyPartitionedTables(catalog_db).values()) {
this.addVerticalPartition(catalog_view);
}
return (this);
}
public VerticalPartitionPlanner addVerticalPartition(MaterializedViewInfo catalog_view) {
if (debug.val)
LOG.debug("Adding " + catalog_view + " for discovering query optimizations");
Table catalog_tbl = catalog_view.getParent();
this.vp_views.put(catalog_tbl, catalog_view);
Collection<Column> columns = CatalogUtil.getColumns(catalog_view.getGroupbycols());
assert(columns.isEmpty() == false) : "No vertical partition columns for " + catalog_tbl;
this.vp_columns.put(catalog_tbl, columns);
return (this);
}
// ======================================================================================
// MAIN ENTRY POINTS
// ======================================================================================
/**
* Generate all the optimized query plans for all Statements in the database
* and apply them to the catalog immediately
* @throws Exception
*/
public Collection<Statement> optimizeDatabase() throws Exception {
Set<Statement> updated = new HashSet<Statement>();
Map<Statement, Statement> optimized = this.generateOptimizedStatements();
if (optimized != null) {
for (Entry<Statement, Statement> e : optimized.entrySet()) {
applyOptimization(e.getValue(), e.getKey());
} // FOR
updated.addAll(optimized.keySet());
}
return (updated);
}
/**
* Generate an optimized query plan for just one Statement and apply
* it to the catalog immediately
* @param catalog_stmt
* @return
* @throws Exception
*/
public boolean optimizeStatement(Statement catalog_stmt) throws Exception {
this.projectBuilder.clear();
if (this.process(catalog_stmt)) {
StatementRewrite rewrite = this.stmt_rewrites.get(catalog_stmt);
this.projectBuilder.queueRewrittenStatement(catalog_stmt, rewrite.sql);
Map<Statement, Statement> optimized = this.projectBuilder.getRewrittenQueryPlans();
assert (optimized != null);
assert (optimized.size() == 1);
applyOptimization(CollectionUtil.first(optimized.values()), catalog_stmt);
return (true);
}
return (false);
}
public Map<Statement, Statement> generateOptimizedStatements() throws Exception {
for (Procedure catalog_proc : catalog_db.getProcedures()) {
for (Statement catalog_stmt : catalog_proc.getStatements()) {
if (this.process(catalog_stmt)) {
StatementRewrite rewrite = this.stmt_rewrites.get(catalog_stmt);
this.projectBuilder.queueRewrittenStatement(catalog_stmt, rewrite.sql);
}
} // FOR (stmts)
} // FOR (procs)
if (this.stmt_rewrites.size() > 0) {
if (trace.val) {
Map<String, Object> m = new LinkedHashMap<String, Object>();
for (Statement catalog_stmt : this.stmt_rewrites.keySet()) {
m.put(catalog_stmt.fullName(), this.stmt_rewrites.get(catalog_stmt));
}
LOG.trace(String.format("Rewritten Queries [%d]\n%s", this.stmt_rewrites.size(), StringUtil.formatMaps(m)));
}
return (this.projectBuilder.getRewrittenQueryPlans());
}
return (null);
}
// ======================================================================================
// INTERNAL UTILITY METHODS
// ======================================================================================
public static void applyOptimization(Statement src, Statement dest) {
CatalogUtil.copyQueryPlans(src, dest);
dest.setSecondaryindex(true);
// Check whether the only table the query references is our replicated index
Collection<Table> tables = CatalogUtil.getReferencedTables(dest);
if (debug.val) LOG.debug(dest + " => " + tables);
dest.setReplicatedonly(tables.size() == 1);
}
/**
* Process the given Statement and rewrite its query plan if it can take
* advantage of a vertical partitioning column
*
* @param catalog_stmt
* @return
* @throws Exception
*/
protected boolean process(Statement catalog_stmt) throws Exception {
// Always skip if there are no vertically partitioned tables
if (this.vp_views.isEmpty()) {
if (debug.val)
LOG.warn("Skipping " + catalog_stmt.fullName() + ": There are no vertically partitioned tables.");
return (false);
}
// We can only work our magic on SELECTs
QueryType qtype = QueryType.get(catalog_stmt.getQuerytype());
if (qtype != QueryType.SELECT) {
if (debug.val)
LOG.warn("Skipping " + catalog_stmt.fullName() + ": QueryType is " + qtype + ".");
return (false);
}
// Check whether this query references a table that has a vertical partition
Collection<Table> tables = CollectionUtils.intersection(this.vp_views.keySet(), CatalogUtil.getReferencedTables(catalog_stmt));
if (tables.isEmpty()) {
if (debug.val)
LOG.warn("Skipping " + catalog_stmt.fullName() + ": It does not reference a vertical partitioning table.");
return (false);
}
// Now check whether the columns referenced doesn't include what the
// table is horizontally partitioned but do include the columns that
// we have in our vertical partition
Collection<Column> stmt_cols = CollectionUtils.union(CatalogUtil.getReferencedColumns(catalog_stmt),
CatalogUtil.getOrderByColumns(catalog_stmt));
if (stmt_cols.isEmpty()) {
if (debug.val)
LOG.warn("Skipping " + catalog_stmt.fullName() + ": Query does not reference any columns in its predicate or order-by clause.");
return (false);
}
Collection<Column> output_cols = PlanNodeUtil.getOutputColumnsForStatement(catalog_stmt);
assert (output_cols.isEmpty() == false);
for (Table catalog_tbl : tables) {
MaterializedViewInfo catalog_view = this.vp_views.get(catalog_tbl);
assert(catalog_view != null);
assert(catalog_view.getGroupbycols().isEmpty() == false) :
String.format("Missing vertical partitioning columns in %s when trying to process %s\n%s\nCACHED: %s",
catalog_view.fullName(), catalog_stmt.fullName(), CatalogUtil.debug(catalog_view), CatalogUtil.debug(this.vp_columns.get(catalog_tbl)));
Collection<Column> view_cols = CatalogUtil.getColumns(catalog_view.getGroupbycols());
assert(view_cols.isEmpty() == false) : "Missing vertical partitioning columns in " + catalog_view.fullName() + " when trying to process " + catalog_stmt.fullName();
Column partitioning_col = catalog_tbl.getPartitioncolumn();
assert(partitioning_col != null);
// The current vertical partition is valid for this query if all the
// following are true:
// (1) The partitioning_col is in output_cols
// (2) The partitioning_col is *not* in the predicate_cols
// (3) At least one of the vertical partition's columns is in
// predicate_cols
if (debug.val) {
Map<String, Object> m = new LinkedHashMap<String, Object>();
m.put("VerticalP", catalog_view.getName());
m.put("Partitioning Col", partitioning_col.fullName());
m.put("Output Cols", output_cols);
m.put("Statement Cols", stmt_cols);
m.put("VerticalP Cols", view_cols);
LOG.debug(String.format("Checking whether %s can use vertical partition for %s\n%s", catalog_stmt.fullName(), catalog_tbl.getName(), StringUtil.formatMaps(m)));
}
// if (output_cols.contains(partitioning_col) == false) {
// if (debug.val)
// LOG.warn("Output Columns do not contain horizontal partitioning column");
// }
// else
if (view_cols.containsAll(output_cols) == false) {
if (debug.val)
LOG.warn("Vertical Partition columns do not contain output columns");
}
/** Is this needed?
else if (stmt_cols.contains(partitioning_col) == true) {
if (debug.val)
LOG.warn("Statement Columns contains horizontal partition column");
}
**/
else if (view_cols.containsAll(stmt_cols) == false) {
if (debug.val)
LOG.warn("The Vertical Partition Columns does not contain all of the Statement Columns " + CollectionUtils.subtract(view_cols, stmt_cols));
}
else {
if (debug.val)
LOG.debug("Valid VP Candidate: " + catalog_tbl);
StatementRewrite rewrite = this.stmt_rewrites.get(catalog_stmt);
if (rewrite == null) {
rewrite = new StatementRewrite();
this.stmt_rewrites.put(catalog_stmt, rewrite);
}
rewrite.put(catalog_tbl, catalog_view.getDest());
}
} // FOR
// Check to make sure that we were able to generate a StatementRewrite candidate for this one
StatementRewrite rewrite = this.stmt_rewrites.get(catalog_stmt);
if (rewrite == null) {
if (debug.val)
LOG.warn("Skipping " + catalog_stmt.fullName() + ": Query does not have any valid vertical partitioning references.");
return (false);
}
try {
rewrite.sql = this.rewriteSQL(catalog_stmt, rewrite);
} catch (Throwable ex) {
String msg = String.format("Failed to rewrite SQL for %s to use replicated secondary index on %s",
catalog_stmt.fullName(), CatalogUtil.debug(rewrite.keySet()));
LOG.warn(msg, (debug.val ? ex : null));
return (false);
}
return (true);
}
/**
* Hackishly rewrite the SQL statement to change all tables references to
* vertical partitions based on the provided table mapping
*
* @param catalog_stmt
* @param tbl_mapping
* @return
* @throws Exception
*/
protected String rewriteSQL(Statement catalog_stmt, Map<Table, Table> tbl_mapping) throws Exception {
if (debug.val)
LOG.debug(String.format("Rewriting %s's SQL using %d mappings: %s", catalog_stmt.fullName(), tbl_mapping.size(), tbl_mapping));
// This isn't perfect but it's good enough for our experiments
Matcher m = SELECT_REGEX.matcher(catalog_stmt.getSqltext());
if (m.matches() == false) {
throw new Exception(String.format("Failed to match %s's SQL: %s", catalog_stmt.fullName(), catalog_stmt.getSqltext()));
}
String select_clause = m.group(1);
String where_clause = m.group(3);
String from_clause[] = FROM_SPLIT.split(m.group(2));
if (from_clause.length == 0) {
throw new Exception(String.format("Failed to extract %s's FROM clause: %s", catalog_stmt.fullName(), catalog_stmt.getSqltext()));
}
Map<String, String> from_xref = new LinkedHashMap<String, String>();
Map<String, String> new_from_xref = new HashMap<String, String>();
for (String from : from_clause) {
String split[] = FROM_REGEX.split(from.trim());
from_xref.put(split[0], (split.length > 1 ? split[1] : null));
} // FROM
for (Entry<Table, Table> e : tbl_mapping.entrySet()) {
String fromTable = e.getKey().getName();
String toTable = e.getValue().getName();
select_clause = select_clause.replace(fromTable + ".", toTable + ".");
where_clause = where_clause.replace(fromTable + ".", toTable + ".");
new_from_xref.put(fromTable, toTable);
} // FOR
String new_from[] = new String[from_xref.size()];
int i = 0;
for (String tableName : from_xref.keySet()) {
String new_tableName = new_from_xref.get(tableName);
if (new_tableName == null)
new_tableName = tableName;
new_from[i] = new_tableName;
if (from_xref.get(tableName) != null)
new_from[i] += " AS " + from_xref.get(tableName);
i++;
} // FOR
return (String.format("SELECT %s FROM %s WHERE %s", select_clause, StringUtil.join(",", new_from), where_clause));
}
/**
* VPPlannerProjectBuilder We will queue up a bunch of single-query
* Procedures and then use the VoltDB planner to generate new query plans
* that use the vertical partitions
*/
protected class VPPlannerProjectBuilder extends AbstractProjectBuilder {
private final Map<Statement, String> rewritten_queries = new HashMap<Statement, String>();
private final File tempDDL;
public VPPlannerProjectBuilder() {
super("vpplanner", VPPlannerProjectBuilder.class, null, null);
this.tempDDL = this.setDDLContents(VerticalPartitionPlanner.this.ddl);
// Add in all the table partitioning info
for (Table catalog_tbl : CatalogUtil.getDataTables(catalog_db)) {
if (catalog_tbl.getIsreplicated()) continue;
this.addTablePartitionInfo(catalog_tbl, catalog_tbl.getPartitioncolumn());
} // FOR
// Make sure that we disable VP optimizations otherwise we will get stuck
// in an infinite loop
this.enableReplicatedSecondaryIndexes(false);
// Make sure we initialize the StatementCompiler's PlanFragment counter
// so that we don't get overlapping PlanFragment ids
StatementCompiler.getNextFragmentId(catalog_db);
}
@Override
public String getJarDirectory() {
return FileUtil.getTempDirectory().getAbsolutePath();
}
public void clear() {
this.rewritten_queries.clear();
}
protected Map<Statement, Statement> getRewrittenQueryPlans() throws Exception {
if (debug.val) {
String temp = "";
for (String procName : this.rewritten_queries.values())
temp += String.format("\n%s: %s", procName, this.getStmtProcedureSQL(procName));
LOG.debug(String.format("Generating new query plans for %d rewritten Statements%s", this.rewritten_queries.size(), temp));
}
// Build the catalog
Catalog catalog = null;
try {
catalog = this.createCatalog();
} catch (Throwable ex) {
LOG.error("Failed to build compile vertical partitioning catalog");
LOG.error("Busted DDL:\n" + VerticalPartitionPlanner.this.ddl);
throw new Exception(ex);
} finally {
this.tempDDL.delete();
}
assert (catalog != null);
Database catalog_db = CatalogUtil.getDatabase(catalog);
Map<Statement, Statement> ret = new HashMap<Statement, Statement>();
for (Statement catalog_stmt : this.rewritten_queries.keySet()) {
String procName = this.rewritten_queries.get(catalog_stmt);
assert (procName != null);
Procedure new_catalog_proc = catalog_db.getProcedures().get(procName);
assert (new_catalog_proc != null);
Statement new_catalog_stmt = CollectionUtil.first(new_catalog_proc.getStatements());
assert (new_catalog_stmt != null);
ret.put(catalog_stmt, new_catalog_stmt);
} // FOR
// CatalogUtil.saveCatalog(catalog,
// FileUtil.getTempFile("catalog").getAbsolutePath());
return (ret);
}
protected void queueRewrittenStatement(Statement catalog_stmt, String sql) {
String procName = String.format("%s_%s", catalog_stmt.getParent().getName(), catalog_stmt.getName());
this.addStmtProcedure(procName, sql);
this.rewritten_queries.put(catalog_stmt, procName);
if (debug.val)
LOG.debug(String.format("Queued %s to generate new query plan\n%s", catalog_stmt.fullName(), sql));
}
}
}