Source Code of sql.optimizers.index.IndexRuleOptimizer

package sql.optimizers.index;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;


import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.operators.conditional.OrExpression;
import net.sf.jsqlparser.schema.Table;
import net.sf.jsqlparser.statement.select.SelectItem;


import org.apache.log4j.Logger;


import plan_runner.components.Component;
import plan_runner.components.DataSourceComponent;
import plan_runner.components.OperatorComponent;
import plan_runner.expressions.ValueExpression;
import plan_runner.operators.AggregateOperator;
import plan_runner.operators.ProjectOperator;
import plan_runner.operators.SelectOperator;
import plan_runner.query_plans.QueryPlan;
import plan_runner.utilities.DeepCopy;
import sql.optimizers.Optimizer;
import sql.schema.Schema;
import sql.util.HierarchyExtractor;
import sql.util.JoinTablesExprs;
import sql.util.ParserUtil;
import sql.visitors.jsql.AndVisitor;
import sql.visitors.jsql.SQLVisitor;
import sql.visitors.squall.IndexSelectItemsVisitor;
import sql.visitors.squall.IndexWhereVisitor;


/*
 * It generates a single query plan, adds a final aggregation,
 *   adds selections (WHERE clause) and do early projections (all unused columns are projected away)
 *
 * Does not take relation cardinalities into account.
 * Assume no projections before the aggregation, so that EarlyProjection may impose some projections.
 * Aggregation only on the last level.
 */
public class IndexRuleOptimizer implements Optimizer {
  private static Logger LOG = Logger.getLogger(IndexRuleOptimizer.class);


  private final Schema _schema;
  private final SQLVisitor _pq;
  private IndexCompGen _cg;
  private final IndexTranslator _it;
  private final Map _map; // map is updates in place


  public IndexRuleOptimizer(Map map) {
    _map = map;
    _pq = ParserUtil.parseQuery(map);


    _schema = new Schema(map);
    _it = new IndexTranslator(_schema, _pq.getTan());
  }


  private void attachSelectClause(Component lastComponent, List<AggregateOperator> aggOps,
      List<ValueExpression> groupByVEs) {
    if (aggOps.isEmpty()) {
      final ProjectOperator project = new ProjectOperator(groupByVEs);
      lastComponent.addOperator(project);
    } else if (aggOps.size() == 1) {
      // all the others are group by
      final AggregateOperator firstAgg = aggOps.get(0);


      if (ParserUtil.isAllColumnRefs(groupByVEs)) {
        // plain fields in select
        final List<Integer> groupByColumns = ParserUtil.extractColumnIndexes(groupByVEs);
        firstAgg.setGroupByColumns(groupByColumns);


        // Setting new level of components is necessary for correctness
        // only for distinct in aggregates
        // but it's certainly pleasant to have the final result grouped
        // on nodes by group by columns.
        final boolean newLevel = !(_it.isHashedBy(lastComponent, groupByColumns));
        if (newLevel) {
          lastComponent.setHashIndexes(groupByColumns);
          new OperatorComponent(lastComponent, ParserUtil.generateUniqueName("OPERATOR"),
              _cg.getQueryPlan()).addOperator(firstAgg);


        } else
          lastComponent.addOperator(firstAgg);
      } else {
        // Sometimes groupByVEs contains other functions, so we have to
        // use projections instead of simple groupBy
        // always new level


        // WARNING: groupByVEs cannot be used on two places: that's why
        // we do deep copy
        final ProjectOperator groupByProj = new ProjectOperator(
            (List<ValueExpression>) DeepCopy.copy(groupByVEs));
        if (!(groupByProj.getExpressions() == null || groupByProj.getExpressions()
            .isEmpty()))
          firstAgg.setGroupByProjection(groupByProj);


        // current component
        lastComponent.setHashExpressions((List<ValueExpression>) DeepCopy.copy(groupByVEs));


        new OperatorComponent(lastComponent, ParserUtil.generateUniqueName("OPERATOR"),
            _cg.getQueryPlan()).addOperator(firstAgg);
      }
    } else
      throw new RuntimeException("For now only one aggregate function supported!");
  }


  private void attachWhereClause(Component affectedComponent, SelectOperator select) {
    affectedComponent.addOperator(select);
  }


  private void earlyProjection(QueryPlan queryPlan) {
    final EarlyProjection early = new EarlyProjection(_schema, _pq.getTan());
    early.operate(queryPlan);
  }


  @Override
  public QueryPlan generate() {
    _cg = generateTableJoins();


    LOG.info("Before WHERE, SELECT and EarlyProjection: ");
    LOG.info(ParserUtil.toString(_cg.getQueryPlan()));


    // selectItems might add OperatorComponent, this is why it goes first
    final int queryType = processSelectClause(_pq.getSelectItems());
    processWhereClause(_pq.getWhereExpr());
    if (queryType == IndexSelectItemsVisitor.NON_AGG)
      LOG.info("Early projection will not be performed since the query is NON_AGG type (contains projections)!");
    else
      earlyProjection(_cg.getQueryPlan());


    ParserUtil.orderOperators(_cg.getQueryPlan());


    final RuleParallelismAssigner parAssign = new RuleParallelismAssigner(_cg.getQueryPlan(),
        _pq.getTan(), _schema, _map);
    parAssign.assignPar();


    return _cg.getQueryPlan();
  }


  private IndexCompGen generateTableJoins() {
    final List<Table> tableList = _pq.getTableList();
    final TableSelector ts = new TableSelector(tableList, _schema, _pq.getTan());
    final JoinTablesExprs jte = _pq.getJte();


    final IndexCompGen cg = new IndexCompGen(_schema, _pq, _map);


    // first phase
    // make high level pairs
    final List<String> skippedBestTableNames = new ArrayList<String>();
    final int numTables = tableList.size();
    if (numTables == 1) {
      cg.generateDataSource(ParserUtil.getComponentName(tableList.get(0)));
      return cg;
    } else {
      final int highLevelPairs = getNumHighLevelPairs(numTables);


      for (int i = 0; i < highLevelPairs; i++) {
        final String bestTableName = ts.removeBestTableName();


        // enumerates all the tables it has joinCondition to join with
        final List<String> joinedWith = jte.getJoinedWith(bestTableName);
        // dependent on previously used tables, so might return null
        final String bestPairedTable = ts.removeBestPairedTableName(joinedWith);
        if (bestPairedTable != null) {
          // we found a pair
          final DataSourceComponent bestSource = cg.generateDataSource(bestTableName);
          final DataSourceComponent bestPairedSource = cg
              .generateDataSource(bestPairedTable);
          cg.generateEquiJoin(bestSource, bestPairedSource);
        } else
          // we have to keep this table for latter processing
          skippedBestTableNames.add(bestTableName);
      }
    }


    // second phase
    // join (2-way join components) with unused tables, until there is no
    // more tables
    List<Component> subPlans = cg.getSubPlans();


    /*
     * Why outer loop is unpairedTables, and inner is subPlans: 1) We first
     * take care of small tables 2) In general, there is smaller number of
     * unpaired tables than tables 3) Number of ancestors always grow, while
     * number of joinedTables is a constant Bad side is updating of
     * subPlanAncestors, but than has to be done anyway LinkedHashMap
     * guarantees in order iterator
     */
    List<String> unpairedTableNames = ts.removeAll();
    unpairedTableNames.addAll(skippedBestTableNames);
    while (!unpairedTableNames.isEmpty()) {
      final List<String> stillUnprocessed = new ArrayList<String>();
      // we will try to join all the tables, but some of them cannot be
      // joined before some other tables
      // that's why we have while outer loop
      for (final String unpaired : unpairedTableNames) {
        boolean processed = false;
        for (final Component currentComp : subPlans)
          if (_pq.getJte().joinExistsBetween(unpaired,
              ParserUtil.getSourceNameList(currentComp))) {
            final DataSourceComponent unpairedSource = cg.generateDataSource(unpaired);
            cg.generateEquiJoin(currentComp, unpairedSource);


            processed = true;
            break;
          }
        if (!processed)
          stillUnprocessed.add(unpaired);
      }
      unpairedTableNames = stillUnprocessed;
    }


    // third phase: joining Components until there is a single component
    subPlans = cg.getSubPlans();
    while (subPlans.size() > 1) {
      // this is joining of components having approximately the same
      // number of ancestors - the same level
      final Component firstComp = subPlans.get(0);
      final List<String> firstAncestors = ParserUtil.getSourceNameList(firstComp);
      for (int i = 1; i < subPlans.size(); i++) {
        final Component otherComp = subPlans.get(i);
        final List<String> otherAncestors = ParserUtil.getSourceNameList(otherComp);
        if (_pq.getJte().joinExistsBetween(firstAncestors, otherAncestors)) {
          cg.generateEquiJoin(firstComp, otherComp);
          break;
        }
      }
      // until this point, we change subPlans by locally remove operations
      // when going to the next level, whesh look over subPlans is taken
      subPlans = cg.getSubPlans();
    }
    return cg;
  }


  private int getNumHighLevelPairs(int numTables) {
    int highLevelPairs = 0;
    if (numTables == 2)
      highLevelPairs = 1;
    else if (numTables > 2)
      highLevelPairs = (numTables % 2 == 0 ? numTables / 2 - 1 : numTables / 2);
    return highLevelPairs;
  }


  /*
   * this method returns a list of <ComponentName, whereCompExpression>
   * @whereCompExpression part of JSQL expression which relates to the
   * corresponding Component
   */
  private Map<String, Expression> getWhereForComponents(Expression whereExpr) {
    final AndVisitor andVisitor = new AndVisitor();
    whereExpr.accept(andVisitor);
    final List<Expression> atomicExprs = andVisitor.getAtomicExprs();
    final List<OrExpression> orExprs = andVisitor.getOrExprs();


    /*
     * we have to group atomicExpr (conjuctive terms) by ComponentName there
     * might be mutliple columns from a single DataSourceComponent, and we
     * want to group them
     * conditions such as R.A + R.B = 10 are possible not possible to have
     * ColumnReference from multiple tables, because than it would be join
     * condition
     */
    final Map<String, Expression> collocatedExprs = new HashMap<String, Expression>();
    ParserUtil.addAndExprsToComps(collocatedExprs, atomicExprs);


    final Map<Set<String>, Expression> collocatedOrs = new HashMap<Set<String>, Expression>();
    ParserUtil.addOrExprsToComps(collocatedOrs, orExprs);


    for (final Map.Entry<Set<String>, Expression> orEntry : collocatedOrs.entrySet()) {
      final List<String> compNames = new ArrayList<String>(orEntry.getKey());
      final List<Component> compList = ParserUtil.getComponents(compNames, _cg);
      final Component affectedComponent = HierarchyExtractor.getLCM(compList);


      final Expression orExpr = orEntry.getValue();
      ParserUtil.addAndExprToComp(collocatedExprs, orExpr, affectedComponent.getName());
    }


    return collocatedExprs;
  }


  /*************************************************************************************
   * SELECT clause - Final Aggregation
   *************************************************************************************/


  private int processSelectClause(List<SelectItem> selectItems) {
    final IndexSelectItemsVisitor selectVisitor = new IndexSelectItemsVisitor(
        _cg.getQueryPlan(), _schema, _pq.getTan(), _map);
    for (final SelectItem elem : selectItems)
      elem.accept(selectVisitor);
    final List<AggregateOperator> aggOps = selectVisitor.getAggOps();
    final List<ValueExpression> groupByVEs = selectVisitor.getGroupByVEs();


    final Component affectedComponent = _cg.getQueryPlan().getLastComponent();
    attachSelectClause(affectedComponent, aggOps, groupByVEs);
    return (aggOps.isEmpty() ? IndexSelectItemsVisitor.NON_AGG : IndexSelectItemsVisitor.AGG);
  }


  /*************************************************************************************
   * WHERE clause - SelectOperator
   *************************************************************************************/


  private void processWhereClause(Expression whereExpr) {
    if (whereExpr == null)
      return;


    // assinging JSQL expressions to Components
    final Map<String, Expression> whereCompExprPairs = getWhereForComponents(whereExpr);


    // Each component process its own part of JSQL whereExpression
    for (final Map.Entry<String, Expression> whereCompExprPair : whereCompExprPairs.entrySet()) {
      final Component affectedComponent = _cg.getQueryPlan().getComponent(
          whereCompExprPair.getKey());
      final Expression whereCompExpr = whereCompExprPair.getValue();
      processWhereForComponent(affectedComponent, whereCompExpr);
    }


  }


  /*
   * whereCompExpression is the part of WHERE clause which refers to
   * affectedComponent This is the only method in this class where
   * IndexWhereVisitor is actually instantiated and invoked
   */
  private void processWhereForComponent(Component affectedComponent,
      Expression whereCompExpression) {
    final IndexWhereVisitor whereVisitor = new IndexWhereVisitor(affectedComponent, _schema,
        _pq.getTan());
    whereCompExpression.accept(whereVisitor);
    attachWhereClause(affectedComponent, whereVisitor.getSelectOperator());
  }


}
Source Code of sql.optimizers.index.IndexRuleOptimizer

Related Classes of sql.optimizers.index.IndexRuleOptimizer