Source Code of org.voltdb.planner.MaterializedViewFixInfo

/* This file is part of VoltDB.
 * Copyright (C) 2008-2014 VoltDB Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */


package org.voltdb.planner;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;


import org.json_voltpatches.JSONException;
import org.voltdb.catalog.CatalogMap;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.ColumnRef;
import org.voltdb.catalog.MaterializedViewInfo;
import org.voltdb.catalog.Table;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.AggregateExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.planner.ParsedSelectStmt.ParsedColInfo;
import org.voltdb.planner.parseinfo.BranchNode;
import org.voltdb.planner.parseinfo.JoinNode;
import org.voltdb.planner.parseinfo.StmtTableScan;
import org.voltdb.planner.parseinfo.StmtTargetTableScan;
import org.voltdb.plannodes.AbstractPlanNode;
import org.voltdb.plannodes.AbstractScanPlanNode;
import org.voltdb.plannodes.HashAggregatePlanNode;
import org.voltdb.plannodes.NodeSchema;
import org.voltdb.plannodes.ProjectionPlanNode;
import org.voltdb.plannodes.SchemaColumn;
import org.voltdb.types.ExpressionType;
import org.voltdb.utils.CatalogUtil;


public class MaterializedViewFixInfo {
    /**
     * This class contain all the information that Materialized view partitioned query need to be fixed.
     */


    // New inlined projection node for the scan node, contain extra group by columns.
    private ProjectionPlanNode m_scanInlinedProjectionNode = null;
    // New re-Aggregation plan node on the coordinator to eliminate the duplicated rows.
    private HashAggregatePlanNode m_reAggNode = null;


    // Does this mv partitioned based query needs to be fixed.
    private boolean m_needed = false;
    // materialized view table
    private StmtTableScan m_mvTableScan = null;


    // Scan Node for join query.
    AbstractScanPlanNode m_scanNode = null;


    // ENG-5386: Edge case query.
    private boolean m_edgeCaseQueryNoFixNeeded = true;


    public boolean needed() {
        return m_needed;
    }


    public void setNeeded(boolean need) {
        m_needed = need;
    }


    public String getMVTableName () {
        assert(m_mvTableScan != null);
        return m_mvTableScan.getTableName();
    }


    public String getMVTableAlias() {
        assert(m_mvTableScan != null);
        return m_mvTableScan.getTableAlias();
    }


    public HashAggregatePlanNode getReAggregationPlanNode () {
        return m_reAggNode;
    }


    public void setEdgeCaseQueryNoFixNeeded (boolean edgeCase) {
        m_edgeCaseQueryNoFixNeeded = edgeCase;
    }


    /**
     * Check whether the table need to be fixed or not.
     * Set the need flag to true, only if it needs to be fixed.
     * @return
     */
    public boolean processMVBasedQueryFix(StmtTableScan mvTableScan, Set<SchemaColumn> scanColumns, JoinNode joinTree,
            List<ParsedColInfo> displayColumns, List<ParsedColInfo> groupByColumns) {


        // Check valid cases first
        //@TODO
        if  ( ! (mvTableScan instanceof StmtTargetTableScan)) {
            return false;
        }
        Table table = ((StmtTargetTableScan)mvTableScan).getTargetTable();
        assert (table != null);
        String mvTableName = table.getTypeName();
        Table srcTable = table.getMaterializer();
        if (srcTable == null) {
            return false;
        }
        Column partitionCol = srcTable.getPartitioncolumn();
        if (partitionCol == null) {
            return false;
        }


        int partitionColIndex = partitionCol.getIndex();
        MaterializedViewInfo mvInfo = srcTable.getViews().get(mvTableName);


        int numOfGroupByColumns;
        // Justify whether partition column is in group by column list or not
        String complexGroupbyJson = mvInfo.getGroupbyexpressionsjson();
        if (complexGroupbyJson.length() > 0) {
            List<AbstractExpression> mvComplexGroupbyCols = null;
            try {
                mvComplexGroupbyCols = AbstractExpression.fromJSONArrayString(complexGroupbyJson, null);
            } catch (JSONException e) {
                e.printStackTrace();
            }
            numOfGroupByColumns = mvComplexGroupbyCols.size();


            for (AbstractExpression expr: mvComplexGroupbyCols) {
                if (expr instanceof TupleValueExpression) {
                    TupleValueExpression tve = (TupleValueExpression) expr;
                    if (tve.getColumnIndex() == partitionColIndex) {
                        // If group by columns contain partition column from source table.
                        // Then, query on MV table will have duplicates from each partition.
                        // There is no need to fix this case, so just return.
                        return false;
                    }
                }
            }
        } else {
            CatalogMap<ColumnRef> mvSimpleGroupbyCols = mvInfo.getGroupbycols();
            numOfGroupByColumns = mvSimpleGroupbyCols.size();


            for (ColumnRef colRef: mvSimpleGroupbyCols) {
                if (colRef.getColumn().getIndex() == partitionColIndex) {
                    // If group by columns contain partition column from source table.
                    // Then, query on MV table will have duplicates from each partition.
                    // There is no need to fix this case, so just return.
                    return false;
                }
            }
        }
        assert(numOfGroupByColumns > 0);
        m_mvTableScan = mvTableScan;


        Set<String> mvDDLGroupbyColumnNames = new HashSet<String>();
        List<Column> mvColumnArray =
                CatalogUtil.getSortedCatalogItems(table.getColumns(), "index");


        // Start to do real materialized view processing to fix the duplicates problem.
        // (1) construct new projection columns for scan plan node.
        Set<SchemaColumn> mvDDLGroupbyColumns = new HashSet<SchemaColumn>();
        NodeSchema inlineProjSchema = new NodeSchema();
        for (SchemaColumn scol: scanColumns) {
            inlineProjSchema.addColumn(scol);
        }


        String mvTableAlias = getMVTableAlias();


        for (int i = 0; i < numOfGroupByColumns; i++) {
            Column mvCol = mvColumnArray.get(i);
            String colName = mvCol.getName();


            TupleValueExpression tve = new TupleValueExpression(mvTableName, mvTableAlias, colName, colName, i);
            tve.setTypeSizeBytes(mvCol.getType(), mvCol.getSize(), mvCol.getInbytes());


            mvDDLGroupbyColumnNames.add(colName);


            SchemaColumn scol = new SchemaColumn(mvTableName, mvTableAlias, colName, colName, tve);


            mvDDLGroupbyColumns.add(scol);
            if (!scanColumns.contains(scol)) {
                scanColumns.add(scol);
                // construct new projection columns for scan plan node.
                inlineProjSchema.addColumn(scol);
            }
        }




        // Record the re-aggregation type for each scan columns.
        Map<String, ExpressionType> mvColumnReAggType = new HashMap<String, ExpressionType>();
        for (int i = numOfGroupByColumns; i < mvColumnArray.size(); i++) {
            Column mvCol = mvColumnArray.get(i);
            ExpressionType reAggType = ExpressionType.get(mvCol.getAggregatetype());


            if (reAggType == ExpressionType.AGGREGATE_COUNT_STAR ||
                    reAggType == ExpressionType.AGGREGATE_COUNT) {
                reAggType = ExpressionType.AGGREGATE_SUM;
            }
            mvColumnReAggType.put(mvCol.getName(), reAggType);
        }


        m_scanInlinedProjectionNode = new ProjectionPlanNode();
        m_scanInlinedProjectionNode.setOutputSchema(inlineProjSchema);


        // (2) Construct the reAggregation Node.


        // Construct the reAggregation plan node's aggSchema
        m_reAggNode = new HashAggregatePlanNode();
        int outputColumnIndex = 0;
        // inlineProjSchema contains the group by columns, while aggSchema may do not.
        NodeSchema aggSchema = new NodeSchema();


        // Construct reAggregation node's aggregation and group by list.
        for (SchemaColumn scol: scanColumns) {
            if (mvDDLGroupbyColumns.contains(scol)) {
                // Add group by expression.
                m_reAggNode.addGroupByExpression(scol.getExpression());
            } else {
                ExpressionType reAggType = mvColumnReAggType.get(scol.getColumnName());
                assert(reAggType != null);
                AbstractExpression agg_input_expr = scol.getExpression();
                assert(agg_input_expr instanceof TupleValueExpression);
                // Add aggregation information.
                m_reAggNode.addAggregate(reAggType, false, outputColumnIndex, agg_input_expr);
            }
            aggSchema.addColumn(scol);
            outputColumnIndex++;
        }
        m_reAggNode.setOutputSchema(aggSchema);




        // Collect all TVEs that need to be do re-aggregation in coordinator.
        List<TupleValueExpression> needReAggTVEs = new ArrayList<TupleValueExpression>();
        List<AbstractExpression> aggPostExprs = new ArrayList<AbstractExpression>();


        for (int i=numOfGroupByColumns; i < mvColumnArray.size(); i++) {
            Column mvCol = mvColumnArray.get(i);
            String colName = mvCol.getName();


            TupleValueExpression tve = new TupleValueExpression(mvTableName, mvTableAlias, colName, colName);
            tve.setTypeSizeBytes(mvCol.getType(), mvCol.getSize(), mvCol.getInbytes());


            needReAggTVEs.add(tve);
        }


        collectReAggNodePostExpressions(joinTree, needReAggTVEs, aggPostExprs);


        AbstractExpression aggPostExpr = ExpressionUtil.combine(aggPostExprs);
        // Add post filters for the reAggregation node.
        m_reAggNode.setPostPredicate(aggPostExpr);




        // ENG-5386
        if (m_edgeCaseQueryNoFixNeeded &&
                edgeCaseQueryNoFixNeeded(mvDDLGroupbyColumnNames, mvColumnReAggType, displayColumns, groupByColumns)) {
            return false;
        }


        m_needed = true;
        return true;
    }


    // ENG-5386: do not fix some cases in order to get better performance.
    private boolean edgeCaseQueryNoFixNeeded(Set<String> mvDDLGroupbyColumnNames,
            Map<String, ExpressionType> mvColumnAggType, List<ParsedColInfo> displayColumns, List<ParsedColInfo> groupByColumns) {


        // Condition (1): Group by columns must be part of or all from MV DDL group by TVEs.
        for (ParsedColInfo gcol: groupByColumns) {
            assert(gcol.expression instanceof TupleValueExpression);
            TupleValueExpression tve = (TupleValueExpression) gcol.expression;
            if (tve.getTableName().equals(getMVTableName()) && !mvDDLGroupbyColumnNames.contains(tve.getColumnName())) {
                return false;
            }
        }


        // Condition (2): Aggregation must be:
        for (ParsedColInfo dcol: displayColumns) {
            if (groupByColumns.contains(dcol)) {
                continue;
            }
            if (dcol.expression instanceof AggregateExpression == false) {
                return false;
            }
            AggregateExpression aggExpr = (AggregateExpression) dcol.expression;
            if (aggExpr.getLeft() instanceof TupleValueExpression == false) {
                return false;
            }
            ExpressionType type = aggExpr.getExpressionType();
            TupleValueExpression tve = (TupleValueExpression) aggExpr.getLeft();
            String columnName = tve.getColumnName();


            if (type != ExpressionType.AGGREGATE_SUM && type != ExpressionType.AGGREGATE_MIN
                    && type != ExpressionType.AGGREGATE_MAX) {
                return false;
            }


            if (tve.getTableName().equals(getMVTableName())) {
                if (mvColumnAggType.get(columnName) != type ) {
                    return false;
                }
            } else {
                // The other join table.
                if (type == ExpressionType.AGGREGATE_SUM) {
                    return false;
                }
            }
        }


        // Edge case query can be optimized with correct answer without MV reAggregation fix.
        return true;
    }




    /**
     * Find the scan node on MV table, replace it with reAggNode for join query.
     * This scan node can not be in-lined, so it should be as a child of a join node.
     * @param node
     */
    public boolean processScanNodeWithReAggNode(AbstractPlanNode node, AbstractPlanNode reAggNode) {
        // MV table scan node can not be in in-lined nodes.
        for (int i = 0; i < node.getChildCount(); i++) {
            AbstractPlanNode child = node.getChild(i);


            if (child instanceof AbstractScanPlanNode) {
                AbstractScanPlanNode scanNode = (AbstractScanPlanNode) child;
                if (!scanNode.getTargetTableName().equals(getMVTableName())) {
                    continue;
                }
                if (reAggNode != null) {
                    // Join query case.
                    node.setAndLinkChild(i, reAggNode);
                }
                // Process scan node.
                // Set up the scan plan node's scan columns. Add in-line projection node for scan node.
                scanNode.addInlinePlanNode(m_scanInlinedProjectionNode);
                m_scanNode = scanNode;
                return true;
            } else {
                boolean replaced = processScanNodeWithReAggNode(child, reAggNode);
                if (replaced) {
                    return true;
                }
            }
        }
        return false;
    }


    private void collectReAggNodePostExpressions(JoinNode joinTree,
            List<TupleValueExpression> needReAggTVEs, List<AbstractExpression> aggPostExprs) {
        if (joinTree instanceof BranchNode) {
            collectReAggNodePostExpressions(((BranchNode)joinTree).getLeftNode(), needReAggTVEs, aggPostExprs);
            collectReAggNodePostExpressions(((BranchNode)joinTree).getRightNode(), needReAggTVEs, aggPostExprs);
            return;
        }
        joinTree.setJoinExpression(processFilters(joinTree.getJoinExpression(),
                                                  needReAggTVEs, aggPostExprs));
        // For outer join filters. Inner join or single table query will have whereExpr be null.
        joinTree.setWhereExpression(processFilters(joinTree.getWhereExpression(),
                                    needReAggTVEs, aggPostExprs));
    }




    private boolean fromMVTableOnly(List<AbstractExpression> tves) {
        String mvTableName = getMVTableName();
        for (AbstractExpression tve: tves) {
            assert(tve instanceof TupleValueExpression);
            String tveTableName = ((TupleValueExpression)tve).getTableName();
            if (!mvTableName.equals(tveTableName)) {
                return false;
            }
        }
        return true;
    }


    private AbstractExpression processFilters (AbstractExpression filters,
            List<TupleValueExpression> needReAggTVEs, List<AbstractExpression> aggPostExprs) {
        if (filters == null) {
            return null;
        }


        // Collect all TVEs that need to be do re-aggregation in coordinator.
        List<AbstractExpression> remaningExprs = new ArrayList<AbstractExpression>();
        // Check where clause.
        List<AbstractExpression> exprs = ExpressionUtil.uncombine(filters);


        for (AbstractExpression expr: exprs) {
            ArrayList<AbstractExpression> tves = expr.findBaseTVEs();


            boolean canPushdown = true;


            for (TupleValueExpression needReAggTVE: needReAggTVEs) {
                if (tves.contains(needReAggTVE)) {
                    m_edgeCaseQueryNoFixNeeded = false;


                    if (fromMVTableOnly(tves)) {
                        canPushdown = false;
                    }


                    break;
                }
            }
            if (canPushdown) {
                remaningExprs.add(expr);
            } else {
                aggPostExprs.add(expr);
            }
        }
        AbstractExpression remaningFilters = ExpressionUtil.combine(remaningExprs);
        // Update new filters for the scanNode.
        return remaningFilters;
    }
}
Source Code of org.voltdb.planner.MaterializedViewFixInfo

Related Classes of org.voltdb.planner.MaterializedViewFixInfo