/* This file is part of VoltDB.
* Copyright (C) 2008-2009 VoltDB L.L.C.
*
* VoltDB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* VoltDB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with VoltDB. If not, see <http://www.gnu.org/licenses/>.
*/
package org.voltdb.sysprocs;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.voltdb.DependencySet;
import org.voltdb.ParameterSet;
import org.voltdb.ProcInfo;
import org.voltdb.VoltSystemProcedure;
import org.voltdb.VoltTable;
import org.voltdb.VoltType;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.MaterializedViewInfo;
import org.voltdb.catalog.Table;
import org.voltdb.exceptions.MispredictionException;
import edu.brown.catalog.CatalogUtil;
import edu.brown.hstore.HStoreConstants;
import edu.brown.hstore.PartitionExecutor.SystemProcedureExecutionContext;
import edu.brown.hstore.txns.AbstractTransaction;
import edu.brown.hstore.txns.LocalTransaction;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.statistics.ObjectHistogram;
/**
* Given a VoltTable with a schema corresponding to a persistent table, load all
* of the rows applicable to the current partitioning at each node in the
* cluster.
*/
@ProcInfo(singlePartition = false)
public class LoadMultipartitionTable extends VoltSystemProcedure {
private static final Logger LOG = Logger.getLogger(LoadMultipartitionTable.class);
private static final LoggerBoolean debug = new LoggerBoolean();
private static final LoggerBoolean trace = new LoggerBoolean();
static {
LoggerUtil.attachObserver(LOG, debug, trace);
}
private static final long DEP_distribute = SysProcFragmentId.PF_loadDistribute | HStoreConstants.MULTIPARTITION_DEPENDENCY;
private static final long DEP_aggregate = SysProcFragmentId.PF_loadAggregate;
private ObjectHistogram<Integer> allPartitionsHistogram = new ObjectHistogram<Integer>();
@Override
public void initImpl() {
executor.registerPlanFragment(SysProcFragmentId.PF_loadDistribute, this);
executor.registerPlanFragment(SysProcFragmentId.PF_loadAggregate, this);
this.allPartitionsHistogram.put(catalogContext.getAllPartitionIds());
}
@Override
public DependencySet executePlanFragment(Long txn_id,
Map<Integer, List<VoltTable>> dependencies,
int fragmentId,
ParameterSet params,
SystemProcedureExecutionContext context) {
// need to return something ..
VoltTable[] result = new VoltTable[1];
result[0] = new VoltTable(new VoltTable.ColumnInfo("TxnId", VoltType.BIGINT));
result[0].addRow(txn_id);
if (fragmentId == SysProcFragmentId.PF_loadDistribute) {
assert context.getCluster().getName() != null;
assert context.getDatabase().getName() != null;
assert params != null;
assert params.toArray() != null;
assert params.toArray()[0] != null;
assert params.toArray()[1] != null;
String table_name = (String) (params.toArray()[0]);
VoltTable table = (VoltTable)(params.toArray()[1]);
if (debug.val) LOG.debug(String.format("Loading %d tuples for table '%s' in txn #%d",
table.getRowCount(), table_name, txn_id));
assert(this.isInitialized()) : " The sysproc " + this.getClass().getSimpleName() + " was not initialized properly";
try {
AbstractTransaction ts = this.hstore_site.getTransaction(txn_id);
this.executor.loadTable(ts,
context.getCluster().getName(),
context.getDatabase().getName(),
table_name, table, 0);
} catch (VoltAbortException e) {
// must continue and reply with dependency.
e.printStackTrace();
}
if (debug.val) LOG.debug("Finished loading table. Things look good...");
return new DependencySet(new int[] { (int)DEP_distribute }, result);
} else if (fragmentId == SysProcFragmentId.PF_loadAggregate) {
if (debug.val) LOG.debug("Aggregating results from loading fragments in txn #" + txn_id);
return new DependencySet(new int[] { (int)DEP_aggregate }, result);
}
// must handle every dependency id.
assert (false);
return null;
}
private SynthesizedPlanFragment[] createReplicatedPlan(LocalTransaction ts, Table catalog_tbl, VoltTable table) {
if (debug.val)
LOG.debug(String.format("%s - %s is replicated. Creating %d fragments to send to all partitions",
ts, catalog_tbl.getName(), catalogContext.numberOfPartitions));
ParameterSet params = new ParameterSet(catalog_tbl.getName(), table);
final SynthesizedPlanFragment pfs[] = new SynthesizedPlanFragment[2];
int idx = 0;
// Create a work unit to invoke super.loadTable() on each partition
pfs[idx] = new SynthesizedPlanFragment();
pfs[idx].fragmentId = SysProcFragmentId.PF_loadDistribute;
pfs[idx].outputDependencyIds = new int[] { (int)DEP_distribute };
pfs[idx].inputDependencyIds = new int[] { };
pfs[idx].multipartition = true;
pfs[idx].nonExecSites = false;
pfs[idx].parameters = params;
// Create a work unit to aggregate the results.
idx += 1;
pfs[idx] = new SynthesizedPlanFragment();
pfs[idx].fragmentId = SysProcFragmentId.PF_loadAggregate;
pfs[idx].outputDependencyIds = new int[] { (int)DEP_aggregate };
pfs[idx].inputDependencyIds = new int[] { (int)DEP_distribute };
pfs[idx].multipartition = false;
pfs[idx].nonExecSites = false;
pfs[idx].parameters = new ParameterSet();
pfs[idx].destPartitionId = this.partitionId;
return (pfs);
}
private SynthesizedPlanFragment[] createNonReplicatedPlan(LocalTransaction ts, Table catalog_tbl, VoltTable table) {
if (debug.val)
LOG.debug(catalog_tbl + " is not replicated. Splitting table data into separate pieces for partitions");
// Create a table for each partition
VoltTable partitionedTables[] = new VoltTable[catalogContext.numberOfPartitions];
// Split the input table into per-partition units
if (debug.val)
LOG.debug(String.format("Splitting original %d %s rows into partitioned tables",
table.getRowCount(), catalog_tbl));
boolean mispredict = false;
table.resetRowPosition();
while (table.advanceRow()) {
int p = -1;
try {
p = this.p_estimator.getTableRowPartition(catalog_tbl, table);
} catch (Exception e) {
LOG.fatal("Failed to split input table into partitions", e);
throw new RuntimeException(e.getMessage());
}
assert(p >= 0);
if (partitionedTables[p] == null) {
partitionedTables[p] = table.clone(1024 * 1024);
ts.getTouchedPartitions().put(p);
if (ts.getPredictTouchedPartitions().contains(p) == false) {
mispredict = true;
}
if (trace.val) LOG.trace("Cloned VoltTable for Partition #" + p);
}
// Add the active row from table
// Don't bother doing it if we know that we're going to mispredict afterwards
if (mispredict == false) {
partitionedTables[p].add(table);
if (trace.val && table.getActiveRowIndex() > 0 && table.getActiveRowIndex() % 1000 == 0)
LOG.trace(String.format("Processed %s tuples for " + catalog_tbl, table.getActiveRowIndex()));
}
} // WHILE
// Allow them to restart and lock on the partitions that they need to load
// data on. This will help speed up concurrent bulk loading
if (mispredict) {
if (debug.val)
LOG.warn(String.format("%s - Restarting as a distributed transaction on partitions %s",
ts, ts.getTouchedPartitions().values()));
throw new MispredictionException(ts.getTransactionId(), ts.getTouchedPartitions());
}
StringBuilder sb = null;
if (trace.val) {
sb = new StringBuilder();
sb.append("LoadMultipartition Info for ").append(catalog_tbl.getName()).append(":");
}
// Generate a plan fragment for each site using the sub-tables
// Note that we only need to create a PlanFragment for a partition if its portion
// of the table that we just split up doesn't have any rows
List<SynthesizedPlanFragment> pfs = new ArrayList<SynthesizedPlanFragment>();
for (int i = 0; i < partitionedTables.length; ++i) {
int partition = i;
if (partitionedTables[partition] == null || partitionedTables[partition].getRowCount() == 0) continue;
ParameterSet params = new ParameterSet(catalog_tbl.getName(), partitionedTables[partition]);
SynthesizedPlanFragment pf = new SynthesizedPlanFragment();
pf.fragmentId = SysProcFragmentId.PF_loadDistribute;
pf.inputDependencyIds = new int[] { };
pf.outputDependencyIds = new int[] { (int)DEP_distribute };
pf.multipartition = false;
pf.nonExecSites = false;
pf.destPartitionId = partition; // partitionsToSites[i - 1];
pf.parameters = params;
pf.last_task = false;
pfs.add(pf);
if (trace.val)
sb.append(String.format("\n Partition #%d: %d tuples",
partition, partitionedTables[partition].getRowCount()));
} // FOR
if (trace.val) LOG.trace(sb.toString());
// a final plan fragment to aggregate the results
SynthesizedPlanFragment pf = new SynthesizedPlanFragment();
pf.destPartitionId = this.partitionId;
pf.fragmentId = SysProcFragmentId.PF_loadAggregate;
pf.inputDependencyIds = new int[] { (int)DEP_distribute };
pf.outputDependencyIds = new int[] { (int)DEP_aggregate };
pf.multipartition = false;
pf.nonExecSites = false;
pf.last_task = true;
pf.parameters = new ParameterSet();
pfs.add(pf);
return (pfs.toArray(new SynthesizedPlanFragment[0]));
}
private SynthesizedPlanFragment[] createVerticalPartitionPlan(LocalTransaction ts, MaterializedViewInfo catalog_view, VoltTable table) {
Table virtual_tbl = catalog_view.getDest();
VoltTable vt = CatalogUtil.getVoltTable(virtual_tbl);
Collection<Column> virtual_cols = CatalogUtil.getColumns(catalog_view.getGroupbycols());
table.resetRowPosition();
while (table.advanceRow()) {
int i = 0;
Object row[] = new Object[virtual_cols.size()];
for (Column catalog_col : CatalogUtil.getSortedCatalogItems(virtual_cols, "index")) {
if (trace.val)
LOG.trace(String.format("Adding %s [%d] to virtual column %d",
table.getColumnName(catalog_col.getIndex()), catalog_col.getIndex(), i));
row[catalog_col.getIndex()] = table.get(catalog_col.getIndex());
} // FOR
vt.addRow(row);
} // WHILE
if (debug.val)
LOG.info(String.format("Vertical Partition %s -> %s\n",
catalog_view.getParent().getName(), virtual_tbl.getName()) + vt);
return (createReplicatedPlan(ts, virtual_tbl, vt));
}
public VoltTable[] run(String tableName, VoltTable table) throws VoltAbortException {
assert(table != null) :
"VoltTable to be loaded into " + tableName + " is null in txn #" + this.getTransactionId();
if (debug.val)
LOG.debug(String.format("Executing multi-partition loader for %s with %d tuples in txn #%d [bytes=%d]",
tableName, table.getRowCount(), this.getTransactionId(), table.getUnderlyingBufferSize()));
VoltTable[] results;
SynthesizedPlanFragment pfs[];
Table catalog_tbl = catalogContext.database.getTables().getIgnoreCase(tableName);
if (catalog_tbl == null) {
throw new VoltAbortException("Table '" + tableName + "' does not exist");
}
else if (table.getRowCount() == 0) {
throw new VoltAbortException("The VoltTable for table '" + tableName + "' is empty");
}
LocalTransaction ts = this.getTransactionState();
// if tableName is replicated, just send table everywhere.
if (catalog_tbl.getIsreplicated()) {
// If they haven't locked all of the partitions in the cluster, then we'll
// stop them right here and force them to get those
if (ts.getPredictTouchedPartitions().size() != this.allPartitionsHistogram.getValueCount()) {
throw new MispredictionException(this.getTransactionId(), this.allPartitionsHistogram);
}
pfs = this.createReplicatedPlan(ts, catalog_tbl, table);
}
// Otherwise, create a VoltTable for each partition and split up the incoming table
// then send those partial tables to the appropriate sites.
else {
pfs = this.createNonReplicatedPlan(ts, catalog_tbl, table);
}
// distribute and execute the fragments providing pfs and id
// of the aggregator's output dependency table.
if (debug.val) LOG.debug("Passing " + pfs.length + " sysproc fragments to executeSysProcPlanFragments()");
results = executeSysProcPlanFragments(pfs, (int)DEP_aggregate);
// Check whether this table has a vertical partition
// If so, then we'll automatically blast out the data that it needs
MaterializedViewInfo catalog_view = CatalogUtil.getVerticalPartition(catalog_tbl);
if (debug.val)
LOG.debug(String.format("%s - %s Vertical Partition: %s",
ts, catalog_tbl.getName(), catalog_view));
if (catalog_view != null) {
if (debug.val)
LOG.debug(String.format("%s - Updating %s's vertical partition %s",
ts, catalog_tbl.getName(), catalog_view.getDest().getName()));
executeSysProcPlanFragments(createVerticalPartitionPlan(ts, catalog_view, table), (int)DEP_aggregate);
}
return (results);
}
}