Package org.apache.pig.newplan

Source Code of org.apache.pig.newplan.FilterExtractor$KeyState

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.newplan;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.Expression;
import org.apache.pig.Expression.OpType;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.newplan.logical.expression.AddExpression;
import org.apache.pig.newplan.logical.expression.AndExpression;
import org.apache.pig.newplan.logical.expression.BinaryExpression;
import org.apache.pig.newplan.logical.expression.ConstantExpression;
import org.apache.pig.newplan.logical.expression.DivideExpression;
import org.apache.pig.newplan.logical.expression.EqualExpression;
import org.apache.pig.newplan.logical.expression.GreaterThanEqualExpression;
import org.apache.pig.newplan.logical.expression.GreaterThanExpression;
import org.apache.pig.newplan.logical.expression.LessThanEqualExpression;
import org.apache.pig.newplan.logical.expression.LessThanExpression;
import org.apache.pig.newplan.logical.expression.LogicalExpression;
import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
import org.apache.pig.newplan.logical.expression.ModExpression;
import org.apache.pig.newplan.logical.expression.MultiplyExpression;
import org.apache.pig.newplan.logical.expression.NotEqualExpression;
import org.apache.pig.newplan.logical.expression.OrExpression;
import org.apache.pig.newplan.logical.expression.ProjectExpression;
import org.apache.pig.newplan.logical.expression.RegexExpression;
import org.apache.pig.newplan.logical.expression.SubtractExpression;

/**
* This is a rewrite of {@code PColFilterExtractor}
*
* We traverse the expression plan bottom up and separate it into two plans
* - pushdownExprPlan, plan that can be pushed down to the loader and
* - filterExprPlan, remaining plan that needs to be evaluated by pig
*
*/
public class FilterExtractor {

    private static final Log LOG = LogFactory.getLog(FilterExtractor.class);

    /**
     * partition columns associated with the table
     * present in the load on which the filter whose
     * inner plan is being visited is applied
     */
    private List<String> partitionCols;

    /**
     * We visit this plan to create the filteredPlan
     */
    protected LogicalExpressionPlan originalPlan;

    /**
     * Plan that is created after all pushable filters are removed
     */
    protected LogicalExpressionPlan filteredPlan;

    /**
     * Plan that can be pushed down
     */
    protected LogicalExpressionPlan pushdownExprPlan;

    /**
     * Final filterExpr after we are done
     */
    private LogicalExpression filterExpr = null;

    /**
     * @{code Expression} to pushdown
     */
    private Expression pushdownExpr = null;

    /**
     *
     * @param plan logical plan corresponding the filter's comparison condition
     * @param partitionCols list of partition columns of the table which is
     * being loaded in the LOAD statement which is input to the filter
     */
    public FilterExtractor(LogicalExpressionPlan plan,
            List<String> partitionCols) {
        this.originalPlan = plan;
        this.partitionCols = new ArrayList<String>(partitionCols);
        this.filteredPlan = new LogicalExpressionPlan();
        this.pushdownExprPlan = new LogicalExpressionPlan();
    }

    public void visit() throws FrontendException {
        // we will visit the leaf and it will recursively walk the plan
        LogicalExpression leaf = (LogicalExpression)originalPlan.getSources().get( 0 );
        // if the leaf is a unary operator it should be a FilterFunc in
        // which case we don't try to extract partition filter conditions
        if(leaf instanceof BinaryExpression) {
            // recursively traverse the tree bottom up
            // checkPushdown returns KeyState which is pair of LogicalExpression
            BinaryExpression binExpr = (BinaryExpression)leaf;
            KeyState finale = checkPushDown(binExpr);
            this.filterExpr = finale.filterExpr;
            this.pushdownExpr = getExpression(finale.pushdownExpr);
        }
    }

    /**
     * @return new filtered plan after pushdownable filters are removed
     */
    public LogicalExpressionPlan getFilteredPlan() {
        return filteredPlan;
    }

    /**
     * @return true if pushdown is possible
     */
    public boolean canPushDown() {
        return pushdownExpr != null;
    }

    /**
     * @return the filterRemovable
     */
    public boolean isFilterRemovable() {
        return filterExpr == null;
    }

    /**
     * @return the condition on partition columns extracted from filter
     */
    public  Expression getPColCondition(){
        return pushdownExpr;
    }

    private class KeyState {
        LogicalExpression pushdownExpr;
        LogicalExpression filterExpr;
    }

    private KeyState checkPushDown(LogicalExpression op) throws FrontendException {
        // Note: Currently, Expression interface only understands 3 Expression Types
        // (Look at getExpression below) BinaryExpression, ProjectExpression and ConstantExpression
        if(op instanceof ProjectExpression) {
            return checkPushDown((ProjectExpression)op);
        } else if (op instanceof BinaryExpression) {
            return checkPushDown((BinaryExpression)op);
        } else if (op instanceof ConstantExpression) {
            // Constants can be pushdown
            KeyState state = new KeyState();
            state.pushdownExpr = op;
            state.filterExpr = null;
            return state;
        } else {
            KeyState state = new KeyState();
            state.pushdownExpr = null;
            state.filterExpr = addToFilterPlan(op);
            return state;
        }
    }

    private LogicalExpression addToFilterPlan(LogicalExpression op) throws FrontendException {
        // This copies the whole tree underneath op
        LogicalExpression newOp = op.deepCopy(filteredPlan);
        return newOp;
    }

    private LogicalExpression andLogicalExpressions(
            LogicalExpressionPlan plan, LogicalExpression a, LogicalExpression b) {
        if (a == null) {
            return b;
        }
        if (b == null) {
            return a;
        }
        LogicalExpression andOp = new AndExpression(plan, a, b);
        return andOp;
    }

    private LogicalExpression orLogicalExpressions(
            LogicalExpressionPlan plan, LogicalExpression a, LogicalExpression b) {
        // Or 2 operators if they are not null
        if (a == null || b == null) {
            return null;
        }
        LogicalExpression orOp = new OrExpression(plan, a, b);
        return orOp;
    }

    private KeyState checkPushDown(BinaryExpression binExpr) throws FrontendException {
        KeyState state = new KeyState();
        KeyState leftState = checkPushDown(binExpr.getLhs());
        KeyState rightState = checkPushDown(binExpr.getRhs());

        if (binExpr instanceof AndExpression) {
            // AND is commutative
            // Expression =
            // (leftState.pushdownExpr AND leftState.filterExpr)
            // AND (rightState.pushdownExpr AND leftState.filterExpr)
            //
            // pushDownExpr = (leftState.pushdownExpr AND rightState.pushdownExpr)
            // filterExpr = (leftState.filterExpr AND rightState.filterExpr)
            state.pushdownExpr = andLogicalExpressions(pushdownExprPlan, leftState.pushdownExpr, rightState.pushdownExpr);
            state.filterExpr = andLogicalExpressions(filteredPlan, leftState.filterExpr, rightState.filterExpr);
        } else if (binExpr instanceof OrExpression) {
            // Expression =
            // (leftState.pushdownExpr AND leftState.filterExpr)
            // OR (rightState.pushdownExpr AND leftState.filterExpr)
            //
            // This could be rewritten with distributive property as
            // (leftState.pushdownExpr OR rightState.pushdownExpr)
            // AND
            // ( (leftState.pushdownExpr OR rightState.filterExpr)
            // AND (leftState.filterExpr OR rightState.pushdownExpr)
            // AND (leftState.filterExpr OR rightState.filterExpr)
            // )
            // In other words,
            // pushdownExpr = leftState.pushdownExpr OR rightState.pushdownExpr
            // filterExpr = (leftState.pushdownExpr OR rightState.filterExpr)
            //              AND (leftState.filterExpr OR rightState.pushdownExpr)
            //              AND (leftState.filterExpr OR rightState.filterExpr)
            state.pushdownExpr = orLogicalExpressions(pushdownExprPlan, leftState.pushdownExpr, rightState.pushdownExpr);
            if(state.pushdownExpr == null) {
                // Whatever we did so far on the right tree is all wasted :(
                // Undo all the mutation (AND OR distributions) until now
                removeFromFilteredPlan(leftState.filterExpr);
                removeFromFilteredPlan(rightState.filterExpr);
                state.filterExpr = addToFilterPlan(binExpr);
            } else {
                LogicalExpression f1 = orLogicalExpressions(filteredPlan, leftState.pushdownExpr, rightState.filterExpr);
                LogicalExpression f2 = orLogicalExpressions(filteredPlan, leftState.filterExpr, rightState.pushdownExpr);
                LogicalExpression f3 = orLogicalExpressions(filteredPlan, leftState.filterExpr, rightState.filterExpr);
                state.filterExpr = andLogicalExpressions(filteredPlan, f1, andLogicalExpressions(filteredPlan, f2, f3));
            }
        } else {
            // leftState OP rightState
            if (leftState.filterExpr == null && rightState.filterExpr == null) {
                state.pushdownExpr = binExpr;
                state.filterExpr = null;
            } else {
                state.pushdownExpr = null;
                removeFromFilteredPlan(leftState.filterExpr);
                removeFromFilteredPlan(rightState.filterExpr);
                state.filterExpr = addToFilterPlan(binExpr);
            }
        }
        return state;
    }

    private KeyState checkPushDown(ProjectExpression project) throws FrontendException {
        String fieldName = project.getFieldSchema().alias;
        KeyState state = new KeyState();
        if(partitionCols.contains(fieldName)) {
            state.filterExpr = null;
            state.pushdownExpr = project;
        } else {
            state.filterExpr = addToFilterPlan(project);
            state.pushdownExpr = null;
        }
        return state;
    }

    /**
     * Assume that the given operator is already disconnected from its predecessors.
     * @param op
     * @throws FrontendException
     */
    private void removeFromFilteredPlan(Operator op) throws FrontendException {
        List<Operator> succs = filteredPlan.getSuccessors( op );
        if( succs == null ) {
            filteredPlan.remove( op );
            return;
        }

        Operator[] children = new Operator[succs.size()];
        for( int i = 0; i < succs.size(); i++ ) {
            children[i] = succs.get(i);
        }

        for( Operator succ : children ) {
            filteredPlan.disconnect( op, succ );
            removeFromFilteredPlan( succ );
        }

        filteredPlan.remove( op );
    }

    public static Expression getExpression(LogicalExpression op) throws FrontendException
    {
        if(op == null) {
            return null;
        }
        if(op instanceof ConstantExpression) {
            ConstantExpression constExpr =(ConstantExpression)op ;
            return new Expression.Const( constExpr.getValue() );
        } else if (op instanceof ProjectExpression) {
            ProjectExpression projExpr = (ProjectExpression)op;
            String fieldName = projExpr.getFieldSchema().alias;
            return new Expression.Column(fieldName);
        } else {
            if( !( op instanceof BinaryExpression ) ) {
                LOG.error("Unsupported conversion of LogicalExpression to Expression: " + op.getName());
                throw new FrontendException("Unsupported conversion of LogicalExpression to Expression: " + op.getName());
            }
            BinaryExpression binOp = (BinaryExpression)op;
            if(binOp instanceof AddExpression) {
                return getExpression( binOp, OpType.OP_PLUS );
            } else if(binOp instanceof SubtractExpression) {
                return getExpression(binOp, OpType.OP_MINUS);
            } else if(binOp instanceof MultiplyExpression) {
                return getExpression(binOp, OpType.OP_TIMES);
            } else if(binOp instanceof DivideExpression) {
                return getExpression(binOp, OpType.OP_DIV);
            } else if(binOp instanceof ModExpression) {
                return getExpression(binOp, OpType.OP_MOD);
            } else if(binOp instanceof AndExpression) {
                return getExpression(binOp, OpType.OP_AND);
            } else if(binOp instanceof OrExpression) {
                return getExpression(binOp, OpType.OP_OR);
            } else if(binOp instanceof EqualExpression) {
                return getExpression(binOp, OpType.OP_EQ);
            } else if(binOp instanceof NotEqualExpression) {
                return getExpression(binOp, OpType.OP_NE);
            } else if(binOp instanceof GreaterThanExpression) {
                return getExpression(binOp, OpType.OP_GT);
            } else if(binOp instanceof GreaterThanEqualExpression) {
                return getExpression(binOp, OpType.OP_GE);
            } else if(binOp instanceof LessThanExpression) {
                return getExpression(binOp, OpType.OP_LT);
            } else if(binOp instanceof LessThanEqualExpression) {
                return getExpression(binOp, OpType.OP_LE);
            } else if(binOp instanceof RegexExpression) {
                return getExpression(binOp, OpType.OP_MATCH);
            } else {
                LOG.error("Unsupported conversion of LogicalExpression to Expression: " + op.getName());
                throw new FrontendException("Unsupported conversion of LogicalExpression to Expression: " + op.getName());
            }
        }
    }

    private static Expression getExpression(BinaryExpression binOp, OpType
            opType) throws FrontendException {
        return new Expression.BinaryExpression(getExpression(binOp.getLhs())
                , getExpression(binOp.getRhs()), opType);
    }
}
TOP

Related Classes of org.apache.pig.newplan.FilterExtractor$KeyState

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.