Package com.odiago.flumebase.parser

Source Code of com.odiago.flumebase.parser.SelectStmt

/**
* Licensed to Odiago, Inc. under one or more contributor license
* agreements.  See the NOTICE.txt file distributed with this work for
* additional information regarding copyright ownership.  Odiago, Inc.
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
* License for the specific language governing permissions and limitations
* under the License.
*/

package com.odiago.flumebase.parser;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.avro.Schema;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.odiago.flumebase.exec.HashSymbolTable;
import com.odiago.flumebase.exec.SymbolTable;

import com.odiago.flumebase.lang.Type;

import com.odiago.flumebase.plan.AggregateNode;
import com.odiago.flumebase.plan.OutputNode;
import com.odiago.flumebase.plan.EvaluateExprsNode;
import com.odiago.flumebase.plan.FilterNode;
import com.odiago.flumebase.plan.FlowSpecification;
import com.odiago.flumebase.plan.MemoryOutputNode;
import com.odiago.flumebase.plan.PlanContext;
import com.odiago.flumebase.plan.PlanNode;
import com.odiago.flumebase.plan.ProjectionNode;

import com.odiago.flumebase.util.StringUtils;

/**
* SELECT statement.
*/
public class SelectStmt extends RecordSource {

  private static final Logger LOG = LoggerFactory.getLogger(SelectStmt.class.getName());

  /**
   * Configuration key that specifies how we should deliver output records of a
   * top-level rtsql statement to the client. If this is set to "$console," we
   * print to the screen. Other strings cause us to allocate a list buffer that
   * can be retrieved later by the client.
   */
  public static final String CLIENT_SELECT_TARGET_KEY = "flumebase.client.select.target";

  /** Special value for flumebase.client.select.target that prints to stdout. */
  public static final String CONSOLE_SELECT_TARGET = "$console";

  /** The default for flumebase.client.select.target is to use the console. */
  public static final String DEFAULT_CLIENT_SELECT_TARGET = CONSOLE_SELECT_TARGET;

  /** Set of fields or other expressions to select */
  private List<AliasedExpr> mSelectExprs;

  // Source stream for the FROM clause. must be a LiteralSource or a SelectStmt.
  // (That fact is proven by a TypeChecker visitor.)
  private SQLStatement mSource;

  // Expression that must evaluate to true in the WHERE clause to accept records.
  // (may be null)
  private Expr mWhereExpr;

  // GROUP BY clause (may be null).
  private GroupBy mGroupBy;

  // OVER clause (may be null); expr specifies the window we aggregate on.
  private Expr mAggregateOver;

  // Expressions in the SELECT statement that are produced by aggregate functions.
  // (provided by IdentifyAggregates visitor pass).
  private List<AliasedExpr> mAggregateExprs;

  // Expression that must evaluate to true in the HAVING clause to accept records.
  // (may be null)
  private Expr mHaving;

  // List of window definitions; bindings from identifiers to WindowSpecs
  // in the scope of this SELECT statement.
  private List<WindowDef> mWindowDefs;

  /** User-specified alias for the ephemeral stream containing the results of this
   * SELECT statement inside another SELECT statement.
   */
  private String mAlias;

  /**
   * Name associated with an output logical node emitting the select's results
   * into Flume. (May be null.)
   */
  private String mOutputName;

  /**
   * All symbols representing fields available as output of this select stmt.
   */
  private SymbolTable mFieldSymbols;

  public SelectStmt(List<AliasedExpr> selExprs, SQLStatement source, Expr where,
      GroupBy groupBy, Expr aggregateOver, Expr having, List<WindowDef> windowDefs) {
    mSelectExprs = selExprs;
    mSource = source;
    mWhereExpr = where;
    mGroupBy = groupBy;
    mAggregateOver = aggregateOver;
    mHaving = having;
    mWindowDefs = windowDefs;
  }

  public List<AliasedExpr> getSelectExprs() {
    return mSelectExprs;
  }

  public SQLStatement getSource() {
    return mSource;
  }

  public void setSource(SQLStatement src) {
    mSource = src;
  }

  public Expr getWhereConditions() {
    return mWhereExpr;
  }

  public void setWhereConditions(Expr where) {
    mWhereExpr = where;
  }

  public Expr getHaving() {
    return mHaving;
  }

  public void setHaving(Expr having) {
    mHaving = having;
  }

  public GroupBy getGroupBy() {
    return mGroupBy;
  }

  public void setGroupBy(GroupBy groupBy) {
    mGroupBy = groupBy;
  }

  public List<WindowDef> getWindowDefs() {
    return mWindowDefs;
  }

  public String getAlias() {
    return mAlias;
  }

  public void setAlias(String alias) {
    mAlias = alias;
  }

  public String getOutputName() {
    return mOutputName;
  }

  public void setOutputName(String outputName) {
    mOutputName = outputName;
  }

  public List<AliasedExpr> getAggregateExprs() {
    return mAggregateExprs;
  }

  public Expr getWindowOver() {
    return mAggregateOver;
  }

  public void setWindowOver(Expr windowOver) {
    mAggregateOver = windowOver;
  }

  public void setAggregateExprs(List<AliasedExpr> aggregateExprs) {
    mAggregateExprs = aggregateExprs;
  }

  /** {@inheritDoc} */
  @Override
  public List<String> getSourceNames() {
    if (null != mAlias) {
      return Collections.singletonList(mAlias);
    } else {
      return Collections.emptyList();
    }
  }

  /** {@inheritDoc} */
  @Override
  public String getSourceName() {
    return mAlias;
  }

  /**
   * After calculating a SymbolTable containing all the fields
   * of this select statement, attach it to the statement for future use.
   */
  public void setFieldSymbols(SymbolTable fieldSymbols) {
    mFieldSymbols = fieldSymbols;
  }

  /** {@inheritDoc} */
  @Override
  public SymbolTable getFieldSymbols() {
    return mFieldSymbols;
  }

  @Override
  public void format(StringBuilder sb, int depth) {
    pad(sb, depth);
    sb.append("SELECT");
    sb.append("\n");
    pad(sb, depth + 1);
    sb.append("expressions:\n");
    for (AliasedExpr ae : mSelectExprs) {
      ae.format(sb, depth + 2);
    }
    pad(sb, depth + 1);
    sb.append("FROM:\n");
    mSource.format(sb, depth + 2);

    if (null != mWhereExpr) {
      pad(sb, depth + 1);
      sb.append("WHERE\n");
      mWhereExpr.format(sb, depth + 2);
    }

    if (null != mGroupBy) {
      mGroupBy.format(sb, depth + 1);
    }

    if (null != mAggregateOver) {
      pad(sb, depth + 1);
      sb.append("OVER\n");
      mAggregateOver.format(sb, depth + 2);
    }

    if (null != mHaving) {
      pad(sb, depth + 1);
      sb.append("HAVING\n");
      mHaving.format(sb, depth + 2);
    }

    if (mWindowDefs.size() > 0) {
      pad(sb, depth + 1);
      sb.append("Windows:\n");
      for (WindowDef def : mWindowDefs) {
        def.format(sb, depth + 2);
      }
    }

    if (null != mAlias) {
      pad(sb, depth + 1);
      sb.append("AS: alias=");
      sb.append(mAlias);
      sb.append("\n");
    }

    if (null != mOutputName) {
      pad(sb, depth + 1);
      sb.append("OUTPUT AS: outputName=");
      sb.append(mOutputName);
      sb.append("\n");
    }
  }

  @Override
  public PlanContext createExecPlan(PlanContext planContext) {
    SQLStatement source = getSource();
    Expr where = getWhereConditions();

    // Create an execution plan for the source(s) of this SELECT stream.
    PlanContext sourceOutCtxt = getSubPlan(source, planContext);
    SymbolTable srcOutSymbolTable = sourceOutCtxt.getSymbolTable();

    // Now incorporate that entire plan into our plan.
    FlowSpecification flowSpec = planContext.getFlowSpec();
    flowSpec.addNodesFromDAG(sourceOutCtxt.getFlowSpec());

    // List of all fields required as output from the source node.
    List<TypedField> allRequiredFields = new ArrayList<TypedField>();

    // All fields carried forward by the aggregation layer from the source layer.
    List<TypedField> groupByPropagateFields = new ArrayList<TypedField>();

    // Another list holds all the fields which the EvaluateExprsNode will need to
    // propagate from the initial source layer forward.
    List<TypedField> exprPropagateFields = new ArrayList<TypedField>();

    // List of all fields with their input names that should be read by the ProjectionNode.
    // This is exprPropagateFields + fields emitted by the expr layer.
    List<TypedField> projectionInputs = new ArrayList<TypedField>();

    // List of all fields returned from the ProjectionNode; this layer
    // uses the translated names from the "x AS y" clauses.
    List<TypedField> projectionOutputs = new ArrayList<TypedField>();

    // Create a list containing the (ordered) set of fields we want emitted to the console.
    List<TypedField> consoleFields = new ArrayList<TypedField>();

    // Populate the field lists defined above
    calculateRequiredFields(srcOutSymbolTable, sourceOutCtxt.getOutFields(),
        allRequiredFields, groupByPropagateFields, exprPropagateFields,
        projectionInputs, projectionOutputs, consoleFields);

    if (where != null) {
      // Non-null filter conditions; apply the filter to all of our sources.
      PlanNode filterNode = new FilterNode(where);
      flowSpec.attachToLastLayer(filterNode);
    }

    // Add an aggregation layer, if required.
    addAggregationToPlan(srcOutSymbolTable, flowSpec, groupByPropagateFields);

    // Evaluate calculated-expression fields.
    addExpressionsToPlan(flowSpec, exprPropagateFields, projectionInputs);

    // Create the projected schema based on the symbol table returned by our source.
    Schema projectedSchema = createFieldSchema(distinctFields(projectionOutputs));
    ProjectionNode projectionNode = new ProjectionNode(projectionInputs, projectionOutputs);
    projectionNode.setAttr(PlanNode.OUTPUT_SCHEMA_ATTR, projectedSchema);
    flowSpec.attachToLastLayer(projectionNode);

    if (mHaving != null) {
      // Non-null HAVING conditions; apply another filter to our output.
      PlanNode havingNode = new FilterNode(mHaving);
      flowSpec.attachToLastLayer(havingNode);
    }

    return createReturnedContext(planContext, consoleFields);
  }

  /**
   * Analyze the expressions in the SELECT field projection list, the WHERE
   * clause, etc. and determine which fields of the underlying stream
   * need to be pulled out into the intermediate and result records.
   * @param fieldSymbols the SymbolTable returned by the source which defines
   * the types of all the fields of the source stream(s).
   * @param srcOutFields the list of all fields available as output from the
   * source.
   * @param allRequiredFields (output) - all fields required as output from
   * the source (e.g., because they are consoleFields, or used in other
   * expressions in the WHERE clause).
   * @param groupByPropagateFields (output) - the fields the aggregate eval
   * layer carries forward and passes through to its output.
   * @param exprPropagateFields (output) - the fields which the expression
   * evaluation layer carries forward and passes through to its output.
   * @param projectionInputs (output) - the fields that should be read by the
   * ProjectionNode and carried through to its output.
   * @param projectionOutputs (output) - the same set of fields as
   * projectionInputs, after being transformed by the projection layer.
   * @param consoleFields (output) - the list of fields that should be
   * presented to the console (or other sink for this SELECT statement).
   */
  private void calculateRequiredFields(SymbolTable fieldSymbols,
      List<TypedField> srcOutFields,
      List<TypedField> allRequiredFields,
      List<TypedField> groupByPropagateFields,
      List<TypedField> exprPropagateFields,
      List<TypedField> projectionInputs,
      List<TypedField> projectionOutputs,
      List<TypedField> consoleFields) {

    // Start with all the fields the user explicitly selected.
    List<AliasedExpr> exprList = getSelectExprs();
    for (AliasedExpr aliasExpr : exprList) {
      Expr e = aliasExpr.getExpr();
      if (e instanceof AllFieldsExpr) {
        // Use all field names listed as outputs from the source's output context.
        for (TypedField outField : srcOutFields) {
          allRequiredFields.add(outField);
          groupByPropagateFields.add(outField);
          exprPropagateFields.add(outField);
          projectionInputs.add(outField);
          projectionOutputs.add(outField);
          consoleFields.add(outField);
        }
      } else {
        // Get the type within the expression, and add the appropriate labels.
        // These have been already assigned by a visitor pass.

        Type t = e.getType(fieldSymbols);
        TypedField projectionField = new TypedField(
          aliasExpr.getUserAlias(), t,
          aliasExpr.getAvroLabel(), aliasExpr.getDisplayLabel());

        // Make sure our dependencies are pulled out of the source layer.
        List<TypedField> fieldsForExpr = e.getRequiredFields(fieldSymbols);

        if (!e.requiresEval()) {
          // The aggregation and expression evaluation nodes need to
          // carry this field forward into the output.
          // Make sure to use the aliased name as the output of the
          // projection/expr-propagate layers, but use the original name as
          // the output of the source layer (projection input list).
          groupByPropagateFields.add(projectionField);
          exprPropagateFields.add(projectionField);
        } else if (mAggregateExprs.contains(aliasExpr)) {
          // Calculated in the aggregation layer.
          // Carry result forward through expr eval.
          exprPropagateFields.add(projectionField);
          // Pull dependencies from source layer.
          allRequiredFields.addAll(fieldsForExpr);
        } else {
          // This is calculated in the expression evaluation layer.
          allRequiredFields.addAll(fieldsForExpr);
          groupByPropagateFields.addAll(fieldsForExpr); // Propagate dependencies forward..
        }

        // Regardless of which calculation stage generated the field, this
        // result is carried through to the end of the query.
        projectionInputs.add(projectionField);
        projectionOutputs.add(projectionField);
        consoleFields.add(projectionField);
      }
    }

    Expr where = getWhereConditions();
    if (null != where) {
      // Add to this all the fields required by the where clause.
      List<TypedField> whereReqs = where.getRequiredFields(fieldSymbols);
      allRequiredFields.addAll(whereReqs);
    }

    if (null != mGroupBy) {
      // Add to this all the fields required for grouping in the GROUP BY clause.
      allRequiredFields.addAll(mGroupBy.getFieldTypes());
    }

    allRequiredFields = distinctFields(allRequiredFields);
    exprPropagateFields = distinctFields(exprPropagateFields);
    // Important: the ProjectionElement requires these to have the same arity
    // and order.
    projectionInputs = distinctFields(projectionInputs);
    projectionOutputs = distinctFields(projectionOutputs);
    assert projectionInputs.size() == projectionOutputs.size();
  }

  private void addAggregationToPlan(SymbolTable fieldSymbols, FlowSpecification flowSpec,
      List<TypedField> groupByPropagateFields) {

    if (null != mAggregateExprs && mAggregateExprs.size() > 0) {
      // Non-null aggregate expression list; add an aggregation step to our plan.

      List<TypedField> aggregateOverFields = Collections.emptyList();
      if (null != mGroupBy) {
        aggregateOverFields = mGroupBy.getFieldTypes();
      }

      LOG.debug("Aggregate exprs: " + StringUtils.listToStr(mAggregateExprs));
      assert flowSpec.getConf() != null;
      PlanNode aggregateNode = new AggregateNode(aggregateOverFields,
          mAggregateOver, mAggregateExprs, groupByPropagateFields, flowSpec.getConf());
      flowSpec.attachToLastLayer(aggregateNode);

      // Output schema for this layer contains everything we need to forward
      // from our upstream layers...
      List<TypedField> aggOutputFields = new ArrayList<TypedField>();
      aggOutputFields.addAll(groupByPropagateFields);
      // As well as the names of everything we calculate in this layer.
      for (AliasedExpr aliasExpr : mAggregateExprs) {
        Expr e = aliasExpr.getExpr();
        Type t = e.getType(fieldSymbols);
        TypedField aggregateField = new TypedField(
          aliasExpr.getUserAlias(), t,
          aliasExpr.getAvroLabel(), aliasExpr.getDisplayLabel());
        aggOutputFields.add(aggregateField);
      }
      Schema aggregateOutSchema = createFieldSchema(aggOutputFields);
      aggregateNode.setAttr(PlanNode.OUTPUT_SCHEMA_ATTR, aggregateOutSchema);
    }
  }

  /**
   * If we output columns which are based on computed (non-aggregate)
   * expressions, add an expression computation node to the flow
   * specification.
   */
  private void addExpressionsToPlan(FlowSpecification flowSpec,
      List<TypedField> exprPropagateFields, List<TypedField> projectionInputs) {
    List<AliasedExpr> calculatedExprs = new ArrayList<AliasedExpr>();
    for (AliasedExpr expr : getSelectExprs()) {
      Expr subExpr = expr.getExpr();
      if (subExpr.requiresEval() && !mAggregateExprs.contains(expr)) {
        calculatedExprs.add(expr);
      }
    }

    if (calculatedExprs.size() > 0) {
      PlanNode exprNode = new EvaluateExprsNode(calculatedExprs, exprPropagateFields);
      // TODO(aaron): assert that calculatedExprs UNION exprPropagateFields gives
      // us the projectionInputs list.
      Schema exprOutSchema = createFieldSchema(projectionInputs);
      exprNode.setAttr(PlanNode.OUTPUT_SCHEMA_ATTR, exprOutSchema);
      flowSpec.attachToLastLayer(exprNode);
    }
  }

  /**
   * Create the output PlanContext that should be returned by createExecPlan().
   */
  private PlanContext createReturnedContext(PlanContext planContext,
      List<TypedField> outputFields) {
    PlanContext outContext = planContext;
    FlowSpecification flowSpec = planContext.getFlowSpec();
    if (planContext.isRoot()) {
      String selectTarget = planContext.getConf().get(CLIENT_SELECT_TARGET_KEY,
          DEFAULT_CLIENT_SELECT_TARGET);
      if (CONSOLE_SELECT_TARGET.equals(selectTarget)) {
        // SELECT statements that are root queries go to the output node.

        // This output node may emit Avro records to a Flume node. These records
        // should use more user-friendly names for the fields than the anonymized
        // field names we use internally. Create a final schema for the output
        // plan node.
        String outputName = getOutputName();
        List<TypedField> outSchemaFields = new ArrayList<TypedField>();
        List<TypedField> distinctOutFields = distinctFields(outputFields);
        for (TypedField outField : distinctOutFields) {
          String safeName = avroSafeName(outField.getDisplayName());
          outSchemaFields.add(new TypedField(safeName, outField.getType()));
        }
        Schema finalSchema = createFieldSchema(outSchemaFields, outputName);
        OutputNode outputNode = new OutputNode(outputFields, outSchemaFields, outputName);
        outputNode.setAttr(PlanNode.OUTPUT_SCHEMA_ATTR, finalSchema);
        flowSpec.attachToLastLayer(outputNode);
      } else {
        // Client has specified that outputs of this root query go to a named memory buffer.
        flowSpec.attachToLastLayer(new MemoryOutputNode(selectTarget,
            distinctFields(outputFields)));
      }
    } else {
      // If the initial projection contained both explicitly selected fields as
      // well as implicitly selected fields (e.g., for the WHERE clause), attach another
      // projection layer that extracts only the explicitly selected fields.

      // SELECT as a sub-query needs to create an output context with a
      // symbol table that contains the fields we expose through projection.
      // We also need to set the output field names and output schema in our
      // returned context.
      outContext = new PlanContext(planContext);
      SymbolTable inTable = planContext.getSymbolTable();
      SymbolTable outTable = new HashSymbolTable(inTable);
      outputFields = distinctFields(outputFields);
      outTable.addAll(mFieldSymbols);
      Schema outputSchema = createFieldSchema(outputFields);
      ProjectionNode cleanupProjection = new ProjectionNode(outputFields, outputFields);
      cleanupProjection.setAttr(PlanNode.OUTPUT_SCHEMA_ATTR, outputSchema);
      flowSpec.attachToLastLayer(cleanupProjection);

      outContext.setSymbolTable(outTable);
      outContext.setSchema(outputSchema);
      outContext.setOutFields(outputFields);
    }

    return outContext;
  }
}
TOP

Related Classes of com.odiago.flumebase.parser.SelectStmt

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.