Source Code of sql.optimizers.name.ProjSchemaCreator

package sql.optimizers.name;


import java.util.ArrayList;
import java.util.List;
import java.util.Map;


import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
import net.sf.jsqlparser.expression.operators.conditional.OrExpression;
import net.sf.jsqlparser.schema.Column;
import plan_runner.components.Component;
import plan_runner.conversion.IntegerConversion;
import plan_runner.conversion.TypeConversion;
import plan_runner.expressions.ValueExpression;
import plan_runner.operators.ProjectOperator;
import sql.schema.ColumnNameType;
import sql.schema.Schema;
import sql.util.JoinTablesExprs;
import sql.util.ParserUtil;
import sql.util.TableAliasName;
import sql.util.TupleSchema;
import sql.visitors.jsql.MaxSubExpressionsVisitor;
import sql.visitors.jsql.SQLVisitor;
import sql.visitors.squall.NameProjectVisitor;


/*
 * This class takes expressions from GlobalProjExpr,
 *   add to them those from Hashes,
 *   and create an output schema based on which expressions are required down the topology.
 * Correctly recognized subexpressions as well.
 */
public class ProjSchemaCreator {
  private final ProjGlobalCollect _globalProject; // this is shared by all the
  // ProjSchemaCreator objects
  private final TupleSchema _inputTupleSchema;


  private final NameTranslator _nt;
  private final TableAliasName _tan; // used for getting a list of all the
  // tableCompNames
  private final Schema _schema;
  private final JoinTablesExprs _jte; // used for getting joinCondition
  private final Component _component;
  private final SQLVisitor _pq;
  private final NameProjectVisitor _npv;


  // output of this class
  private TupleSchema _outputTupleSchema;
  private List<ValueExpression> _veList;


  private static final IntegerConversion _ic = new IntegerConversion();


  public ProjSchemaCreator(ProjGlobalCollect globalProject, TupleSchema inputTupleSchema,
      Component component, SQLVisitor pq, Schema schema) {


    _globalProject = globalProject;
    _inputTupleSchema = inputTupleSchema;
    _tan = pq.getTan();
    _schema = schema;
    _jte = pq.getJte();
    _component = component;
    _pq = pq;


    _nt = new NameTranslator(component.getName());
    _npv = new NameProjectVisitor(_inputTupleSchema, component);
  }


  /*
   * Expressions from exprList are all appeapring somewhere in the query plan
   * This method never can raise an exception, it can only cause suboptimality
   * There is no mandatory projections because SelectOperator,
   * AggregateOperator and Hashes are able to deal with ValueExpressions (and
   * not only with ColumnReferences). For example, if I have inputTupleSchema
   * "R.A, R.A + R.B" in R and exprList "R.A, R.A + R.B" and decide to go with
   * outputTupleSchema "R.A, R.B", there is no "R.B" in inputTupleSchema
   * However, this is not possible, because parent will never send something
   * like this "R.A, R.A + R.B".
   */
  private List<Expression> chooseProjections(List<Expression> exprList) {
    // colect all the columnNames from JSQL Column Expressions
    final List<String> aloneColumnNames = getAloneColumnNames(exprList);


    List<Expression> resultExpr = new ArrayList<Expression>();
    for (final Expression expr : exprList)
      if (expr instanceof Column)
        resultExpr.add(expr); // all the Column Expressions should be
      // added
      else {
        // all the columns used in expression expr
        final List<Column> exprColumns = ParserUtil.getJSQLColumns(expr);
        boolean existsAlone = false; // does at least one column from
        // expr already appears in
        // aloneColumnNames?
        for (final Column column : exprColumns) {
          final String columnStr = ParserUtil.getStringExpr(column);
          if (aloneColumnNames != null && aloneColumnNames.contains(columnStr)) {
            existsAlone = true;
            break;
          }
        }
        if (existsAlone)
          // all the columns should be added to result
          resultExpr.addAll(exprColumns);
        else
          // add whole expr
          resultExpr.add(expr);
      }


    // now take care of duplicates
    resultExpr = eliminateDuplicates(resultExpr);


    return resultExpr;
  }


  /*
   * can be invoked multiple times with no harm
   */
  public void create() {
    final List<Expression> exprList = new ArrayList<Expression>();


    // these methods adds to exprList
    // each added expression is either present in inputTupleSchema, or can
    // be built out of it
    processGlobalExprs(exprList);
    processGlobalOrs(exprList);
    if (!ParserUtil.isFinalComponent(_component, _pq))
      // last component does not have hashes, because it's joined with
      // noone
      processHashes(exprList);


    // choose for which expressions we do projection, and create a schema
    // out of that
    final List<Expression> chosenExprs = chooseProjections(exprList);
    _outputTupleSchema = createSchema(chosenExprs);


    // convert JSQL to Squall expressions
    _npv.visit(chosenExprs);
    _veList = _npv.getExprs();


  }


  /*
   * Create new schema, but preserve all the synonims from _inputTupleSchema
   */
  private TupleSchema createSchema(List<Expression> choosenExprs) {
    final List<ColumnNameType> cnts = new ArrayList<ColumnNameType>();


    for (final Expression expr : choosenExprs) {
      // first to determine the type, we use the first column for that


      final TypeConversion tc = getTC(expr);


      // attach the TypeConversion
      final String exprStr = ParserUtil.getStringExpr(expr);
      final ColumnNameType cnt = new ColumnNameType(exprStr, tc);
      cnts.add(cnt);
    }


    // copying all the synonims from inputTupleSchema
    final TupleSchema result = new TupleSchema(cnts);
    final Map<String, String> inputSynonims = _inputTupleSchema.getSynonims();
    if (inputSynonims != null)
      result.setSynonims(inputSynonims);


    return result;
  }


  /*
   * We have to convert it to String, because that's the way we implemented
   * equals operator (we don't want to change JSQL classes to add equals
   * operator)
   */
  private List<Expression> eliminateDuplicates(List<Expression> exprList) {
    final List<Expression> result = new ArrayList<Expression>();
    final List<String> exprStrList = new ArrayList<String>();
    for (final Expression expr : exprList) {
      final String exprStr = ParserUtil.getStringExpr(expr);
      if (!exprStrList.contains(exprStr))
        // if it is not already there, add it
        result.add(expr);
      // anyway we update a list of strings
      exprStrList.add(exprStr);
    }
    return result;
  }


  /*
   * Return all the columns which appears alone in its String form e.g. for
   * R(A), R(A) + 5, R(B), R(C) + 2 this methods return R(A), R(B)
   */
  private List<String> getAloneColumnNames(List<Expression> exprList) {
    final List<String> result = new ArrayList<String>();
    for (final Expression expr : exprList)
      if (expr instanceof Column) {
        final Column column = (Column) expr;
        result.add(ParserUtil.getStringExpr(column));
      }
    return result;
  }


  public TupleSchema getOutputSchema() {
    return _outputTupleSchema;
  }


  /*
   * will be used for a creation of a ProjectOperator
   */
  public ProjectOperator getProjectOperator() {
    return new ProjectOperator(_veList);
  }


  /*
   * Have to distinguish special cases from normal ones
   */
  private TypeConversion getTC(Expression expr) {
    if (expr instanceof Function) {
      final Function fun = (Function) expr;
      if (fun.getName().equalsIgnoreCase("EXTRACT_YEAR"))
        return _ic;
    }


    // non special cases
    final List<Column> columns = ParserUtil.getJSQLColumns(expr);
    final Column column = columns.get(0);
    return _schema.getType(ParserUtil.getFullSchemaColumnName(column, _tan));
  }


  /*
   * For each expression from _globalProject (for now these are only from
   * SELECT clause), add the appropriate subexpressions to _exprList
   */
  private void processGlobalExprs(List<Expression> exprList) {
    final MaxSubExpressionsVisitor sev = new MaxSubExpressionsVisitor(_nt, _inputTupleSchema);
    sev.visit(_globalProject.getExprList());
    exprList.addAll(sev.getExprs());
  }


  /*
   * OrExpressions are from WHERE clause We need to project for it if they are
   * not already executed This could work without subexpressions, because
   * SelectOperator can work with ValueExpressions (and not only with
   * ColumnReferences)
   */
  private void processGlobalOrs(List<Expression> exprList) {
    final List<OrExpression> orList = _globalProject.getOrExprs();
    if (orList != null)
      for (final OrExpression orExpr : _globalProject.getOrExprs()) {
        final MaxSubExpressionsVisitor sev = new MaxSubExpressionsVisitor(_nt,
            _inputTupleSchema);
        sev.visit(orExpr);
        if (!sev.isAllSubsMine(orExpr)) {
          // if all of them are available, SELECT operator is already
          // done
          // (either in this component because SELECT goes before
          // PROJECT
          // or in some of ancestor components)


          // we get all the subexpressions correlated to me
          final List<Expression> mineSubExprs = sev.getExprs();
          exprList.addAll(mineSubExprs);
        }
      }
  }


  /*
   * All the HashExpressions for joinining between ancestor of component and
   * all other tables are collected
   */
  private void processHashes(List<Expression> exprList) {
    final List<String> ancestorNames = ParserUtil.getSourceNameList(_component);


    // it has to be done like this, because queryPlan is not finished
    // and does not contain all the tables yet
    final List<String> allCompNames = _tan.getComponentNames();
    final List<String> otherCompNames = ParserUtil.getDifference(allCompNames, ancestorNames);


    // now we find joinCondition between ancestorNames and otherCompNames
    // joinExprs is a list of EqualsTo
    final List<Expression> joinExprs = _jte.getExpressions(ancestorNames, otherCompNames);


    final MaxSubExpressionsVisitor sev = new MaxSubExpressionsVisitor(_nt, _inputTupleSchema);
    sev.visit(joinExprs);
    // we get all the subexpressions correlated to me
    final List<Expression> mineSubExprs = sev.getExprs();
    exprList.addAll(mineSubExprs);


  }


}
Source Code of sql.optimizers.name.ProjSchemaCreator

Related Classes of sql.optimizers.name.ProjSchemaCreator