Package org.apache.derby.iapi.sql.compile

Examples of org.apache.derby.iapi.sql.compile.CostEstimate


    // The total cost is the sum of all the costs, but the total
    // number of rows is the number of rows returned by the innermost
    // optimizable.
    finalCostEstimate = getNewCostEstimate(0.0d, 0.0d, 0.0d);
    CostEstimate ce = null;
    for (int i = 0; i < bestJoinOrder.length; i++)
    {
      ce = optimizableList.getOptimizable(bestJoinOrder[i])
          .getTrulyTheBestAccessPath().getCostEstimate();

      finalCostEstimate.setCost(
        finalCostEstimate.getEstimatedCost() + ce.getEstimatedCost(),
        ce.rowCount(),
        ce.singleScanRowCount());
    }

    return finalCostEstimate;
  }
View Full Code Here


                      CostEstimate outerCost,
                      Optimizable optimizable)
    throws StandardException
  {
    /* Get the cost of a single scan */
    CostEstimate resultCost =
      optimizable.estimateCost(predList,
                  cd,
                  outerCost,
                  this,
                  currentRowOrdering);
View Full Code Here

                  OptimizablePredicateList predList,
                  CostEstimate outerCost,
                  RowOrdering rowOrdering)
      throws StandardException
  {
    CostEstimate childCost =
      ((Optimizable) childResult).optimizeIt(optimizer,
                  predList,
                  outerCost,
                  rowOrdering);
View Full Code Here

                  RowOrdering rowOrdering)
      throws StandardException
  {
    // RESOLVE: WE NEED TO ADD IN THE COST OF SORTING HERE, AND FIGURE
    // OUT HOW MANY ROWS WILL BE ELIMINATED.
    CostEstimate childCost =
      ((Optimizable) childResult).estimateCost(predList,
                  cd,
                  outerCost,
                  optimizer,
                  rowOrdering);

    costEstimate = getCostEstimate(optimizer);
    costEstimate.setCost(childCost.getEstimatedCost(),
               childCost.rowCount(),
               childCost.singleScanRowCount());


    /*
    ** No need to use estimateCost on join strategy - that has already
    ** been done on the child.
View Full Code Here

     * won't affect the results.
     */

    // Get the cost estimate for this node so that we can put it in
    // the new ProjectRestrictNode, if one is needed.
    CostEstimate ce = getFinalCostEstimate();

    // Modify this node's access paths.
    ResultSetNode topNode = (ResultSetNode)modifyAccessPath(outerTables);

    /* Now see if there are any left over predicates; if so, then we
     * have to generate a ProjectRestrictNode.  Note: we want to check
     * all SetOpNodes that exist in the subtree rooted at this SetOpNode.
     * Since we just modified access paths on this node, it's possible
     * that the SetOperatorNode chain (if there was one) is now "broken"
     * as a result of the insertion of new nodes.  For example, prior
     * to modification of access paths we may have a chain such as:
     *
     *                          UnionNode (0)
     *                          /       \
     *                 UnionNode (1)    SelectNode (2)
     *                 /        \
     *      SelectNode (3)     SelectNode (4)
     *
     * Now if UnionNode(1) did not specify "ALL" then as part of the
     * above call to modifyAccessPaths() we will have inserted a
     * DistinctNode above it, thus giving:
     *
     *                          UnionNode (0)
     *                          /       \
     *                 DistinctNode (5)  SelectNode (2)
     *                      |
     *                 UnionNode (1)
     *                 /        \
     *      SelectNode (3)     SelectNode (4)
     *
     * So our chain of UnionNode's has now been "broken" by an intervening
     * DistinctNode.  For this reason we can't just walk the chain of
     * SetOperatorNodes looking for unpushed predicates (because the
     * chain might be broken and then we could miss some nodes). Instead,
     * we have to get a collection of all relevant nodes that exist beneath
     * this SetOpNode and call hasUnPushedPredicates() on each one.  For
     * now we only consider UnionNodes to be "relevant" because those are
     * the only ones that might actually have unpushed predicates.
     *
     * If we find any UnionNodes that *do* have unpushed predicates then
     * we have to use a PRN to enforce the predicate at the level of
     * this, the top-most, SetOperatorNode.
     */

    // Find all UnionNodes in the subtree.
    CollectNodesVisitor cnv = new CollectNodesVisitor(UnionNode.class);
    this.accept(cnv);
    java.util.Vector unions = cnv.getList();

    // Now see if any of them have unpushed predicates.
    boolean genPRN = false;
    for (int i = unions.size() - 1; i >= 0; i--)
    {
      if (((UnionNode)unions.get(i)).hasUnPushedPredicates())
      {
        genPRN = true;
        break;
      }
    }

    if (genPRN)
    {
      // When we generate the project restrict node, we pass in the
      // "pushedPredicates" list because that has the predicates in
      // _unscoped_ form, which means they are intended for _this_
      // node instead of this node's children.  That's exactly what
      // we want.
      ResultSetNode prnRSN = (ResultSetNode) getNodeFactory().getNode(
        C_NodeTypes.PROJECT_RESTRICT_NODE,
        topNode,          // Child ResultSet
        topNode.getResultColumns()// Projection
        null,            // Restriction
        pushedPredicates,      // Restriction as PredicateList
        null,            // Subquerys in Projection
        null,            // Subquerys in Restriction
        null,            // Table properties
        getContextManager());
      prnRSN.costEstimate = ce.cloneMe();
      prnRSN.setReferencedTableMap(topNode.getReferencedTableMap());
      topNode = prnRSN;
    }

    return (Optimizable)topNode;
View Full Code Here

    ExpressionClassBuilder  acb,
    MethodBuilder mb
  ) throws StandardException
  {
    ConglomerateDescriptor cd = getTrulyTheBestAccessPath().getConglomerateDescriptor();
    CostEstimate costEstimate = getFinalCostEstimate();
    int colRefItem = (referencedCols == null) ?
            -1 :
            acb.addItem(referencedCols);
    boolean tableLockGranularity = tableDescriptor.getLockGranularity() == TableDescriptor.TABLE_LOCK_GRANULARITY;
 
    /*
    ** getLastIndexKeyResultSet
    ** (
    **    activation,     
    **    resultSetNumber,     
    **    resultRowAllocator,     
    **    conglomereNumber,     
    **    tableName,
    **    optimizeroverride     
    **    indexName,     
    **    colRefItem,     
    **    lockMode,     
    **    tableLocked,
    **    isolationLevel,
    **    optimizerEstimatedRowCount,
    **    optimizerEstimatedRowCost,
    **  );
    */

    acb.pushGetResultSetFactoryExpression(mb);

    acb.pushThisAsActivation(mb);
    mb.push(getResultSetNumber());
    resultColumns.generateHolder(acb, mb, referencedCols, (FormatableBitSet) null);
    mb.push(cd.getConglomerateNumber());
    mb.push(tableDescriptor.getName());
    //User may have supplied optimizer overrides in the sql
    //Pass them onto execute phase so it can be shown in
    //run time statistics.
    if (tableProperties != null)
      mb.push(org.apache.derby.iapi.util.PropertyUtil.sortProperties(tableProperties));
    else
      mb.pushNull("java.lang.String");
                pushIndexName(cd, mb);
    mb.push(colRefItem);
    mb.push(getTrulyTheBestAccessPath().getLockMode());
    mb.push(tableLockGranularity);
    mb.push(getCompilerContext().getScanIsolationLevel());
    mb.push(costEstimate.singleScanRowCount());
    mb.push(costEstimate.getEstimatedCost());

    mb.callMethod(VMOpcode.INVOKEINTERFACE, (String) null, "getLastIndexKeyResultSet",
          ClassName.NoPutResultSet, 13);


View Full Code Here

    ExpressionClassBuilder  acb,
    MethodBuilder mb
  ) throws StandardException
  {
    ConglomerateDescriptor cd = getTrulyTheBestAccessPath().getConglomerateDescriptor();
    CostEstimate costEstimate = getFinalCostEstimate();
    int colRefItem = (referencedCols == null) ?
            -1 :
            acb.addItem(referencedCols);
    boolean tableLockGranularity = tableDescriptor.getLockGranularity() == TableDescriptor.TABLE_LOCK_GRANULARITY;
 
    /*
    ** getDistinctScanResultSet
    ** (
    **    activation,     
    **    resultSetNumber,     
    **    resultRowAllocator,     
    **    conglomereNumber,     
    **    tableName,
    **    optimizeroverride     
    **    indexName,     
    **    colRefItem,     
    **    lockMode,     
    **    tableLocked,
    **    isolationLevel,
    **    optimizerEstimatedRowCount,
    **    optimizerEstimatedRowCost,
    **    closeCleanupMethod
    **  );
    */

    /* Get the hash key columns and wrap them in a formattable */
    int[] hashKeyColumns;

    hashKeyColumns = new int[resultColumns.size()];
    if (referencedCols == null)
    {
      for (int index = 0; index < hashKeyColumns.length; index++)
      {
        hashKeyColumns[index] = index;
      }
    }
    else
    {
      int index = 0;
      for (int colNum = referencedCols.anySetBit();
          colNum != -1;
          colNum = referencedCols.anySetBit(colNum))
      {
        hashKeyColumns[index++] = colNum;
      }
    }

    FormatableIntHolder[] fihArray =
        FormatableIntHolder.getFormatableIntHolders(hashKeyColumns);
    FormatableArrayHolder hashKeyHolder = new FormatableArrayHolder(fihArray);
    int hashKeyItem = acb.addItem(hashKeyHolder);
    long conglomNumber = cd.getConglomerateNumber();
    StaticCompiledOpenConglomInfo scoci = getLanguageConnectionContext().
                        getTransactionCompile().
                          getStaticCompiledConglomInfo(conglomNumber);

    acb.pushGetResultSetFactoryExpression(mb);

       acb.pushThisAsActivation(mb);
    mb.push(conglomNumber);
    mb.push(acb.addItem(scoci));
     resultColumns.generateHolder(acb, mb, referencedCols, (FormatableBitSet) null);
    mb.push(getResultSetNumber());
    mb.push(hashKeyItem);
    mb.push(tableDescriptor.getName());
    //User may have supplied optimizer overrides in the sql
    //Pass them onto execute phase so it can be shown in
    //run time statistics.
    if (tableProperties != null)
      mb.push(org.apache.derby.iapi.util.PropertyUtil.sortProperties(tableProperties));
    else
      mb.pushNull("java.lang.String");
    pushIndexName(cd, mb);
    mb.push(cd.isConstraint());
    mb.push(colRefItem);
    mb.push(getTrulyTheBestAccessPath().getLockMode());
    mb.push(tableLockGranularity);
    mb.push(getCompilerContext().getScanIsolationLevel());
    mb.push(costEstimate.singleScanRowCount());
    mb.push(costEstimate.getEstimatedCost());
   
    mb.callMethod(VMOpcode.INVOKEINTERFACE, (String) null, "getDistinctScanResultSet",
              ClassName.NoPutResultSet, 16);
  }
View Full Code Here

    /*
    ** Only need to do this for current access path, because the
    ** costEstimate will be copied to the best access paths as
    ** necessary.
    */
    CostEstimate costEstimate = getCostEstimate(optimizer);
    ap.setCostEstimate(costEstimate);

    /*
    ** This is the initial cost of this optimizable.  Initialize it
    ** to the maximum cost so that the optimizer will think that
    ** any access path is better than none.
    */
    costEstimate.setCost(Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE);

    super.startOptimizing(optimizer, rowOrdering);
  }
View Full Code Here

                     this);
                 
    /* RESOLVE: Need to figure out how to cache the StoreCostController */
    StoreCostController scc = getStoreCostController(cd);

    CostEstimate costEstimate = getScratchCostEstimate(optimizer);

    /* First, get the cost for one scan */

    /* Does the conglomerate match at most one row? */
    if (isOneRowResultSet(cd, baseTableRestrictionList))
    {
      /*
      ** Tell the RowOrdering that this optimizable is always ordered.
      ** It will figure out whether it is really always ordered in the
      ** context of the outer tables and their orderings.
      */
      rowOrdering.optimizableAlwaysOrdered(this);

      singleScanRowCount = 1.0;

      /* Yes, the cost is to fetch exactly one row */
      // RESOLVE: NEED TO FIGURE OUT HOW TO GET REFERENCED COLUMN LIST,
      // FIELD STATES, AND ACCESS TYPE
      cost = scc.getFetchFromFullKeyCost(
                    (FormatableBitSet) null,
                    0);

      optimizer.trace(Optimizer.MATCH_SINGLE_ROW_COST,
              tableNumber, 0, cost, null);

      costEstimate.setCost(cost, 1.0d, 1.0d);

      /*
      ** Let the join strategy decide whether the cost of the base
      ** scan is a single scan, or a scan per outer row.
      ** NOTE: The multiplication should only be done against the
      ** total row count, not the singleScanRowCount.
      */
      double newCost = costEstimate.getEstimatedCost();

      if (currentJoinStrategy.multiplyBaseCostByOuterRows())
      {
        newCost *= outerCost.rowCount();
      }

      costEstimate.setCost(
        newCost,
        costEstimate.rowCount() * outerCost.rowCount(),
        costEstimate.singleScanRowCount());

      /*
      ** Choose the lock mode.  If the start/stop conditions are
      ** constant, choose row locking, because we will always match
      ** the same row.  If they are not constant (i.e. they include
      ** a join), we decide whether to do row locking based on
      ** the total number of rows for the life of the query.
      */
      boolean constantStartStop = true;
      for (int i = 0; i < predList.size(); i++)
      {
        OptimizablePredicate pred = predList.getOptPredicate(i);

        /*
        ** The predicates are in index order, so the start and
        ** stop keys should be first.
        */
        if ( ! (pred.isStartKey() || pred.isStopKey()))
        {
          break;
        }

        /* Stop when we've found a join */
        if ( ! pred.getReferencedMap().hasSingleBitSet())
        {
          constantStartStop = false;
          break;
        }
      }

      if (constantStartStop)
      {
        currentAccessPath.setLockMode(
                      TransactionController.MODE_RECORD);

        optimizer.trace(Optimizer.ROW_LOCK_ALL_CONSTANT_START_STOP,
                0, 0, 0.0, null);
      }
      else
      {
        setLockingBasedOnThreshold(optimizer, costEstimate.rowCount());
      }

      optimizer.trace(Optimizer.COST_OF_N_SCANS,
              tableNumber, 0, outerCost.rowCount(), costEstimate);

      /* Add in cost of fetching base row for non-covering index */
      if (cd.isIndex() && ( ! isCoveringIndex(cd) ) )
      {
        double singleFetchCost =
            getBaseCostController().getFetchFromRowLocationCost(
                                (FormatableBitSet) null,
                                0);
        cost = singleFetchCost * costEstimate.rowCount();

        costEstimate.setEstimatedCost(
                costEstimate.getEstimatedCost() + cost);

        optimizer.trace(Optimizer.NON_COVERING_INDEX_COST,
                tableNumber, 0, cost, null);
      }
    }
    else
    {
      /* Conglomerate might match more than one row */

      /*
      ** Some predicates are good for start/stop, but we don't know
      ** the values they are being compared to at this time, so we
      ** estimate their selectivity in language rather than ask the
      ** store about them .  The predicates on the first column of
      ** the conglomerate reduce the number of pages and rows scanned.
      ** The predicates on columns after the first reduce the number
      ** of rows scanned, but have a much smaller effect on the number
      ** of pages scanned, so we keep track of these selectivities in
      ** two separate variables: extraFirstColumnSelectivity and
      ** extraStartStopSelectivity. (Theoretically, we could try to
      ** figure out the effect of predicates after the first column
      ** on the number of pages scanned, but it's too hard, so we
      ** use these predicates only to reduce the estimated number of
      ** rows.  For comparisons with known values, though, the store
      ** can figure out exactly how many rows and pages are scanned.)
      **
      ** Other predicates are not good for start/stop.  We keep track
      ** of their selectvities separately, because these limit the
      ** number of rows, but not the number of pages, and so need to
      ** be factored into the row count but not into the cost.
      ** These selectivities are factored into extraQualifierSelectivity.
      **
      ** statStartStopSelectivity (using statistics) represents the
      ** selectivity of start/stop predicates that can be used to scan
      ** the index. If no statistics exist for the conglomerate then
      ** the value of this variable remains at 1.0
      **
      ** statCompositeSelectivity (using statistics) represents the
      ** selectivity of all the predicates (including NonBaseTable
      ** predicates). This represents the most educated guess [among
      ** all the wild surmises in this routine] as to the number
      ** of rows that will be returned from this joinNode.
      ** If no statistics exist on the table or no statistics at all
      ** can be found to satisfy the predicates at this join opertor,
      ** then statCompositeSelectivity is left initialized at 1.0
      */
      double extraFirstColumnSelectivity = 1.0d;
      double extraStartStopSelectivity = 1.0d;
      double extraQualifierSelectivity = 1.0d;
      double extraNonQualifierSelectivity = 1.0d;
      double statStartStopSelectivity = 1.0d;
      double statCompositeSelectivity = 1.0d;

      int     numExtraFirstColumnPreds = 0;
      int     numExtraStartStopPreds = 0;
      int     numExtraQualifiers = 0;
      int     numExtraNonQualifiers = 0;

      /*
      ** It is possible for something to be a start or stop predicate
      ** without it being possible to use it as a key for cost estimation.
      ** For example, with an index on (c1, c2), and the predicate
      ** c1 = othertable.c3 and c2 = 1, the comparison on c1 is with
      ** an unknown value, so we can't pass it to the store.  This means
      ** we can't pass the comparison on c2 to the store, either.
      **
      ** The following booleans keep track of whether we have seen
      ** gaps in the keys we can pass to the store.
      */
      boolean startGap = false;
      boolean stopGap = false;
      boolean seenFirstColumn = false;

      /*
      ** We need to figure out the number of rows touched to decide
      ** whether to use row locking or table locking.  If the start/stop
      ** conditions are constant (i.e. no joins), the number of rows
      ** touched is the number of rows per scan.  But if the start/stop
      ** conditions contain a join, the number of rows touched must
      ** take the number of outer rows into account.
      */
      boolean constantStartStop = true;
      boolean startStopFound = false;

      /* Count the number of start and stop keys */
      int startKeyNum = 0;
      int stopKeyNum = 0;
      OptimizablePredicate pred;
      int predListSize;

      if (predList != null)
        predListSize = baseTableRestrictionList.size();
      else
        predListSize = 0;

      int startStopPredCount = 0;
      ColumnReference firstColumn = null;
      for (int i = 0; i < predListSize; i++)
      {
        pred = baseTableRestrictionList.getOptPredicate(i);
        boolean startKey = pred.isStartKey();
        boolean stopKey = pred.isStopKey();
        if (startKey || stopKey)
        {
          startStopFound = true;

          if ( ! pred.getReferencedMap().hasSingleBitSet())
          {
            constantStartStop = false;
          }

          boolean knownConstant =
            pred.compareWithKnownConstant(this, true);
          if (startKey)
          {
            if (knownConstant && ( ! startGap ) )
            {
              startKeyNum++;
                if (unknownPredicateList != null)
                  unknownPredicateList.removeOptPredicate(pred);
            }
            else
            {
              startGap = true;
            }
          }

          if (stopKey)
          {
            if (knownConstant && ( ! stopGap ) )
            {
              stopKeyNum++;
                if (unknownPredicateList != null)
                  unknownPredicateList.removeOptPredicate(pred);
            }
            else
            {
              stopGap = true;
            }
          }

          /* If either we are seeing startGap or stopGap because start/stop key is
           * comparison with non-constant, we should multiply the selectivity to
           * extraFirstColumnSelectivity.  Beetle 4787.
           */
          if (startGap || stopGap)
          {
            // Don't include redundant join predicates in selectivity calculations
            if (baseTableRestrictionList.isRedundantPredicate(i))
              continue;

            if (startKey && stopKey)
              startStopPredCount++;

            if (pred.getIndexPosition() == 0)
            {
              extraFirstColumnSelectivity *=
                            pred.selectivity(this);
              if (! seenFirstColumn)
              {
                ValueNode relNode = ((Predicate) pred).getAndNode().getLeftOperand();
                if (relNode instanceof BinaryRelationalOperatorNode)
                  firstColumn = ((BinaryRelationalOperatorNode) relNode).getColumnOperand(this);
                seenFirstColumn = true;
              }
            }
            else
            {
              extraStartStopSelectivity *= pred.selectivity(this);
              numExtraStartStopPreds++;
            }
          }
        }
        else
        {
          // Don't include redundant join predicates in selectivity calculations
          if (baseTableRestrictionList.isRedundantPredicate(i))
          {
            continue;
          }

          /* If we have "like" predicate on the first index column, it is more likely
           * to have a smaller range than "between", so we apply extra selectivity 0.2
           * here.  beetle 4387, 4787.
           */
          if (pred instanceof Predicate)
          {
            ValueNode leftOpnd = ((Predicate) pred).getAndNode().getLeftOperand();
            if (firstColumn != null && leftOpnd instanceof LikeEscapeOperatorNode)
            {
              LikeEscapeOperatorNode likeNode = (LikeEscapeOperatorNode) leftOpnd;
              if (likeNode.getLeftOperand().requiresTypeFromContext())
              {
                ValueNode receiver = ((TernaryOperatorNode) likeNode).getReceiver();
                if (receiver instanceof ColumnReference)
                {
                  ColumnReference cr = (ColumnReference) receiver;
                  if (cr.getTableNumber() == firstColumn.getTableNumber() &&
                    cr.getColumnNumber() == firstColumn.getColumnNumber())
                    extraFirstColumnSelectivity *= 0.2;
                }
              }
            }
          }

          if (pred.isQualifier())
          {
            extraQualifierSelectivity *= pred.selectivity(this);
            numExtraQualifiers++;
          }
          else
          {
            extraNonQualifierSelectivity *= pred.selectivity(this);
            numExtraNonQualifiers++;
          }

          /*
          ** Strictly speaking, it shouldn't be necessary to
          ** indicate a gap here, since there should be no more
          ** start/stop predicates, but let's do it, anyway.
          */
          startGap = true;
          stopGap = true;
        }
      }

      if (unknownPredicateList != null)
      {
        statCompositeSelectivity = unknownPredicateList.selectivity(this);
        if (statCompositeSelectivity == -1.0d)
          statCompositeSelectivity = 1.0d;
      }

      if (seenFirstColumn && statisticsForConglomerate &&
        (startStopPredCount > 0))
      {
        statStartStopSelectivity =
          tableDescriptor.selectivityForConglomerate(cd, startStopPredCount);
      }

      /*
      ** Factor the non-base-table predicates into the extra
      ** non-qualifier selectivity, since these will restrict the
      ** number of rows, but not the cost.
      */
      extraNonQualifierSelectivity *=
        currentJoinStrategy.nonBasePredicateSelectivity(this, predList);

      /* Create the start and stop key arrays, and fill them in */
      DataValueDescriptor[] startKeys;
      DataValueDescriptor[] stopKeys;

      if (startKeyNum > 0)
        startKeys = new DataValueDescriptor[startKeyNum];
      else
        startKeys = null;

      if (stopKeyNum > 0)
        stopKeys = new DataValueDescriptor[stopKeyNum];
      else
        stopKeys = null;

      startKeyNum = 0;
      stopKeyNum = 0;
      startGap = false;
      stopGap = false;

      /* If we have a probe predicate that is being used as a start/stop
       * key then ssKeySourceInList will hold the InListOperatorNode
       * from which the probe predicate was built.
       */
      InListOperatorNode ssKeySourceInList = null;
      for (int i = 0; i < predListSize; i++)
      {
        pred = baseTableRestrictionList.getOptPredicate(i);
        boolean startKey = pred.isStartKey();
        boolean stopKey = pred.isStopKey();

        if (startKey || stopKey)
        {
          /* A probe predicate is only useful if it can be used as
           * as a start/stop key for _first_ column in an index
           * (i.e. if the column position is 0).  That said, we only
           * allow a single start/stop key per column position in
           * the index (see PredicateList.orderUsefulPredicates()).
           * Those two facts combined mean that we should never have
           * more than one probe predicate start/stop key for a given
           * conglomerate.
           */
          if (SanityManager.DEBUG)
          {
            if ((ssKeySourceInList != null) &&
              ((Predicate)pred).isInListProbePredicate())
            {
              SanityManager.THROWASSERT(
              "Found multiple probe predicate start/stop keys" +
              " for conglomerate '" + cd.getConglomerateName() +
              "' when at most one was expected.");
            }
          }

          /* By passing "true" in the next line we indicate that we
           * should only retrieve the underlying InListOpNode *if*
           * the predicate is a "probe predicate".
           */
          ssKeySourceInList = ((Predicate)pred).getSourceInList(true);
          boolean knownConstant = pred.compareWithKnownConstant(this, true);

          if (startKey)
          {
            if (knownConstant && ( ! startGap ) )
            {
              startKeys[startKeyNum] = pred.getCompareValue(this);
              startKeyNum++;
            }
            else
            {
              startGap = true;
            }
          }

          if (stopKey)
          {
            if (knownConstant && ( ! stopGap ) )
            {
              stopKeys[stopKeyNum] = pred.getCompareValue(this);
              stopKeyNum++;
            }
            else
            {
              stopGap = true;
            }
          }
        }
        else
        {
          startGap = true;
          stopGap = true;
        }
      }

      int startOperator;
      int stopOperator;

      if (baseTableRestrictionList != null)
      {
        startOperator = baseTableRestrictionList.startOperator(this);
        stopOperator = baseTableRestrictionList.stopOperator(this);
      }
      else
      {
        /*
        ** If we're doing a full scan, it doesn't matter what the
        ** start and stop operators are.
        */
        startOperator = ScanController.NA;
        stopOperator = ScanController.NA;
      }

      /*
      ** Get a row template for this conglomerate.  For now, just tell
      ** it we are using all the columns in the row.
      */
      DataValueDescriptor[] rowTemplate =
                getRowTemplate(cd, getBaseCostController());

      /* we prefer index than table scan for concurrency reason, by a small
       * adjustment on estimated row count.  This affects optimizer's decision
       * especially when few rows are in table. beetle 5006. This makes sense
       * since the plan may stay long before we actually check and invalidate it.
       * And new rows may be inserted before we check and invalidate the plan.
       * Here we only prefer index that has start/stop key from predicates. Non-
       * constant start/stop key case is taken care of by selectivity later.
       */
      long baseRC = (startKeys != null || stopKeys != null) ? baseRowCount() : baseRowCount() + 5;

      scc.getScanCost(
          currentJoinStrategy.scanCostType(),
          baseRC,
                    1,
          forUpdate(),
          (FormatableBitSet) null,
          rowTemplate,
          startKeys,
          startOperator,
          stopKeys,
          stopOperator,
          false,
          0,
          costEstimate);

      /* initialPositionCost is the first part of the index scan cost we get above.
       * It's the cost of initial positioning/fetch of key.  So it's unrelated to
       * row count of how many rows we fetch from index.  We extract it here so that
       * we only multiply selectivity to the other part of index scan cost, which is
       * nearly linear, to make cost calculation more accurate and fair, especially
       * compared to the plan of "one row result set" (unique index). beetle 4787.
       */
      double initialPositionCost = 0.0;
      if (cd.isIndex())
      {
        initialPositionCost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
        /* oneRowResultSetForSomeConglom means there's a unique index, but certainly
         * not this one since we are here.  If store knows this non-unique index
         * won't return any row or just returns one row (eg., the predicate is a
         * comparison with constant or almost empty table), we do minor adjustment
         * on cost (affecting decision for covering index) and rc (decision for
         * non-covering). The purpose is favoring unique index. beetle 5006.
         */
        if (oneRowResultSetForSomeConglom && costEstimate.rowCount() <= 1)
        {
          costEstimate.setCost(costEstimate.getEstimatedCost() * 2,
                     costEstimate.rowCount() + 2,
                     costEstimate.singleScanRowCount() + 2);
        }
      }

      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN1,
              tableNumber, 0, 0.0, cd);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN2,
              tableNumber, 0, 0.0, costEstimate);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN3,
              numExtraFirstColumnPreds, 0,
              extraFirstColumnSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN4,
              numExtraStartStopPreds, 0,
              extraStartStopSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN7,
              startStopPredCount, 0,
              statStartStopSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN5,
              numExtraQualifiers, 0,
              extraQualifierSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN6,
              numExtraNonQualifiers, 0,
              extraNonQualifierSelectivity, null);

      /* initial row count is the row count without applying
         any predicates-- we use this at the end of the routine
         when we use statistics to recompute the row count.
      */
      double initialRowCount = costEstimate.rowCount();

      if (statStartStopSelectivity != 1.0d)
      {
        /*
        ** If statistics exist use the selectivity computed
        ** from the statistics to calculate the cost.
        ** NOTE: we apply this selectivity to the cost as well
        ** as both the row counts. In the absence of statistics
        ** we only applied the FirstColumnSelectivity to the
        ** cost.
        */
        costEstimate.setCost(
               scanCostAfterSelectivity(costEstimate.getEstimatedCost(),
                            initialPositionCost,
                            statStartStopSelectivity,
                            oneRowResultSetForSomeConglom),
               costEstimate.rowCount() * statStartStopSelectivity,
               costEstimate.singleScanRowCount() *
               statStartStopSelectivity);
        optimizer.trace(Optimizer.COST_INCLUDING_STATS_FOR_INDEX,
                tableNumber, 0, 0.0, costEstimate);

      }
      else
      {
        /*
        ** Factor in the extra selectivity on the first column
        ** of the conglomerate (see comment above).
        ** NOTE: In this case we want to apply the selectivity to both
        ** the total row count and singleScanRowCount.
        */
        if (extraFirstColumnSelectivity != 1.0d)
        {
          costEstimate.setCost(
             scanCostAfterSelectivity(costEstimate.getEstimatedCost(),
                          initialPositionCost,
                          extraFirstColumnSelectivity,
                          oneRowResultSetForSomeConglom),
             costEstimate.rowCount() * extraFirstColumnSelectivity,
             costEstimate.singleScanRowCount() * extraFirstColumnSelectivity);
         
          optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_1ST_COL_SELECTIVITY,
                  tableNumber, 0, 0.0, costEstimate);
        }

        /* Factor in the extra start/stop selectivity (see comment above).
         * NOTE: In this case we want to apply the selectivity to both
         * the row count and singleScanRowCount.
         */
        if (extraStartStopSelectivity != 1.0d)
        {
          costEstimate.setCost(
            costEstimate.getEstimatedCost(),
            costEstimate.rowCount() * extraStartStopSelectivity,
            costEstimate.singleScanRowCount() * extraStartStopSelectivity);

          optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_START_STOP,
                  tableNumber, 0, 0.0, costEstimate);
        }
      }

      /* If the start and stop key came from an IN-list "probe predicate"
       * then we need to adjust the cost estimate.  The probe predicate
       * is of the form "col = ?" and we currently have the estimated
       * cost of probing the index a single time for "?".  But with an
       * IN-list we don't just probe the index once; we're going to
       * probe it once for every value in the IN-list.  And we are going
       * to potentially return an additional row (or set of rows) for
       * each probe.  To account for this "multi-probing" we take the
       * costEstimate and multiply each of its fields by the size of
       * the IN-list.
       *
       * Note: If the IN-list has duplicate values then this simple
       * multiplication could give us an elevated cost (because we
       * only probe the index for each *non-duplicate* value in the
       * IN-list).  But for now, we're saying that's okay.
       */
      if (ssKeySourceInList != null)
      {
        int listSize = ssKeySourceInList.getRightOperandList().size();
        double rc = costEstimate.rowCount() * listSize;
        double ssrc = costEstimate.singleScanRowCount() * listSize;

        /* If multiplication by listSize returns more rows than are
         * in the scan then just use the number of rows in the scan.
         */
        costEstimate.setCost(
          costEstimate.getEstimatedCost() * listSize,
          rc > initialRowCount ? initialRowCount : rc,
          ssrc > initialRowCount ? initialRowCount : ssrc);
      }

      /*
      ** Figure out whether to do row locking or table locking.
      **
      ** If there are no start/stop predicates, we're doing full
      ** conglomerate scans, so do table locking.
      */
      if (! startStopFound)
      {
        currentAccessPath.setLockMode(
                      TransactionController.MODE_TABLE);

        optimizer.trace(Optimizer.TABLE_LOCK_NO_START_STOP,
                  0, 0, 0.0, null);
      }
      else
      {
        /*
        ** Figure out the number of rows touched.  If all the
        ** start/stop predicates are constant, the number of
        ** rows touched is the number of rows per scan.
        ** This is also true for join strategies that scan the
        ** inner table only once (like hash join) - we can
        ** tell if we have one of those, because
        ** multiplyBaseCostByOuterRows() will return false.
        */
        double rowsTouched = costEstimate.rowCount();

        if ( (! constantStartStop) &&
           currentJoinStrategy.multiplyBaseCostByOuterRows())
        {
          /*
          ** This is a join where the inner table is scanned
          ** more than once, so we have to take the number
          ** of outer rows into account.  The formula for this
          ** works out as follows:
          **
          **  total rows in table = r
          **  number of rows touched per scan = s
          **  number of outer rows = o
          **  proportion of rows touched per scan = s / r
          **  proportion of rows not touched per scan =
          **                    1 - (s / r)
          **  proportion of rows not touched for all scans =
          **                  (1 - (s / r)) ** o
          **  proportion of rows touched for all scans =
          **                  1 - ((1 - (s / r)) ** o)
          **  total rows touched for all scans =
          **              r * (1 - ((1 - (s / r)) ** o))
          **
          ** In doing these calculations, we must be careful not
          ** to divide by zero.  This could happen if there are
          ** no rows in the table.  In this case, let's do table
          ** locking.
          */
          double r = baseRowCount();
          if (r > 0.0)
          {
            double s = costEstimate.rowCount();
            double o = outerCost.rowCount();
            double pRowsNotTouchedPerScan = 1.0 - (s / r);
            double pRowsNotTouchedAllScans =
                    Math.pow(pRowsNotTouchedPerScan, o);
            double pRowsTouchedAllScans =
                    1.0 - pRowsNotTouchedAllScans;
            double rowsTouchedAllScans =
                    r * pRowsTouchedAllScans;

            rowsTouched = rowsTouchedAllScans;
          }
          else
          {
            /* See comments in setLockingBasedOnThreshold */
            rowsTouched = optimizer.tableLockThreshold() + 1;
          }
        }

        setLockingBasedOnThreshold(optimizer, rowsTouched);
      }

      /*
      ** If the index isn't covering, add the cost of getting the
      ** base row.  Only apply extraFirstColumnSelectivity and extraStartStopSelectivity
      ** before we do this, don't apply extraQualifierSelectivity etc.  The
      ** reason is that the row count here should be the number of index rows
      ** (and hence heap rows) we get, and we need to fetch all those rows, even
      ** though later on some of them may be filtered out by other predicates.
      ** beetle 4787.
      */
      if (cd.isIndex() && ( ! isCoveringIndex(cd) ) )
      {
        double singleFetchCost =
            getBaseCostController().getFetchFromRowLocationCost(
                                (FormatableBitSet) null,
                                0);

        cost = singleFetchCost * costEstimate.rowCount();

        costEstimate.setEstimatedCost(
                costEstimate.getEstimatedCost() + cost);

        optimizer.trace(Optimizer.COST_OF_NONCOVERING_INDEX,
                tableNumber, 0, 0.0, costEstimate);
      }

      /* Factor in the extra qualifier selectivity (see comment above).
       * NOTE: In this case we want to apply the selectivity to both
       * the row count and singleScanRowCount.
       */
      if (extraQualifierSelectivity != 1.0d)
      {
        costEstimate.setCost(
            costEstimate.getEstimatedCost(),
            costEstimate.rowCount() * extraQualifierSelectivity,
            costEstimate.singleScanRowCount() * extraQualifierSelectivity);

        optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_QUALIFIER_SELECTIVITY,
                tableNumber, 0, 0.0, costEstimate);
      }

      singleScanRowCount = costEstimate.singleScanRowCount();

      /*
      ** Let the join strategy decide whether the cost of the base
      ** scan is a single scan, or a scan per outer row.
      ** NOTE: In this case we only want to multiply against the
      ** total row count, not the singleScanRowCount.
      ** NOTE: Do not multiply row count if we determined that
      ** conglomerate is a 1 row result set when costing nested
      ** loop.  (eg, we will find at most 1 match when probing
      ** the hash table.)
      */
      double newCost = costEstimate.getEstimatedCost();
      double rowCount = costEstimate.rowCount();

      /*
      ** RESOLVE - If there is a unique index on the joining
      ** columns, the number of matching rows will equal the
      ** number of outer rows, even if we're not considering the
      ** unique index for this access path. To figure that out,
      ** however, would require an analysis phase at the beginning
      ** of optimization. So, we'll always multiply the number
      ** of outer rows by the number of rows per scan. This will
      ** give us a higher than actual row count when there is
      ** such a unique index, which will bias the optimizer toward
      ** using the unique index. This is probably OK most of the
      ** time, since the optimizer would probably choose the
      ** unique index, anyway. But it would be better if the
      ** optimizer set the row count properly in this case.
      */
      if (currentJoinStrategy.multiplyBaseCostByOuterRows())
      {
        newCost *= outerCost.rowCount();
      }

      rowCount *= outerCost.rowCount();
      initialRowCount *= outerCost.rowCount();


      /*
      ** If this table can generate at most one row per scan,
      ** the maximum row count is the number of outer rows.
      ** NOTE: This does not completely take care of the RESOLVE
      ** in the above comment, since it will only notice
      ** one-row result sets for the current join order.
      */
      if (oneRowResultSetForSomeConglom)
      {
        if (outerCost.rowCount() < rowCount)
        {
          rowCount = outerCost.rowCount();
        }
      }

      /*
      ** The estimated cost may be too high for indexes, if the
      ** estimated row count exceeds the maximum. Only do this
      ** if we're not doing a full scan, and the start/stop position
      ** is not constant (i.e. we're doing a join on the first column
      ** of the index) - the reason being that this is when the
      ** cost may be inaccurate.
      */
      if (cd.isIndex() && startStopFound && ( ! constantStartStop ) )
      {
        /*
        ** Does any table outer to this one have a unique key on
        ** a subset of the joining columns? If so, the maximum number
        ** of rows that this table can return is the number of rows
        ** in this table times the number of times the maximum number
        ** of times each key can be repeated.
        */
        double scanUniquenessFactor =
          optimizer.uniqueJoinWithOuterTable(baseTableRestrictionList);
        if (scanUniquenessFactor > 0.0)
        {
          /*
          ** A positive uniqueness factor means there is a unique
          ** outer join key. The value is the reciprocal of the
          ** maximum number of duplicates for each unique key
          ** (the duplicates can be caused by other joining tables).
          */
          double maxRows =
              ((double) baseRowCount()) / scanUniquenessFactor;
          if (rowCount > maxRows)
          {
            /*
            ** The estimated row count is too high. Adjust the
            ** estimated cost downwards proportionately to
            ** match the maximum number of rows.
            */
            newCost *= (maxRows / rowCount);
          }
        }
      }

      /* The estimated total row count may be too high */
      if (tableUniquenessFactor > 0.0)
      {
        /*
        ** A positive uniqueness factor means there is a unique outer
        ** join key. The value is the reciprocal of the maximum number
        ** of duplicates for each unique key (the duplicates can be
        ** caused by other joining tables).
        */
        double maxRows =
              ((double) baseRowCount()) / tableUniquenessFactor;
        if (rowCount > maxRows)
        {
          /*
          ** The estimated row count is too high. Set it to the
          ** maximum row count.
          */
          rowCount = maxRows;
        }
      }

      costEstimate.setCost(
        newCost,
        rowCount,
        costEstimate.singleScanRowCount());


      optimizer.trace(Optimizer.COST_OF_N_SCANS,
              tableNumber, 0, outerCost.rowCount(), costEstimate);

      /*
      ** Now figure in the cost of the non-qualifier predicates.
      ** existsBaseTables have a row count of 1
      */
      double rc = -1, src = -1;
      if (existsBaseTable)
        rc = src = 1;
      // don't factor in extraNonQualifierSelectivity in case of oneRowResultSetForSomeConglom
      // because "1" is the final result and the effect of other predicates already considered
      // beetle 4787
      else if (extraNonQualifierSelectivity != 1.0d)
      {
        rc = oneRowResultSetForSomeConglom ? costEstimate.rowCount() :
                      costEstimate.rowCount() * extraNonQualifierSelectivity;
        src = costEstimate.singleScanRowCount() * extraNonQualifierSelectivity;
      }
      if (rc != -1) // changed
      {
        costEstimate.setCost(costEstimate.getEstimatedCost(), rc, src);
        optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_NONQUALIFIER_SELECTIVITY,
                tableNumber, 0, 0.0, costEstimate);
      }
     
    recomputeRowCount:
      if (statisticsForTable && !oneRowResultSetForSomeConglom &&
        (statCompositeSelectivity != 1.0d))
      {
        /* if we have statistics we should use statistics to calculate
           row  count-- if it has been determined that this table
           returns one row for some conglomerate then there is no need
           to do this recalculation
        */

        double compositeStatRC = initialRowCount * statCompositeSelectivity;
        optimizer.trace(Optimizer.COMPOSITE_SEL_FROM_STATS,
                0, 0, statCompositeSelectivity, null);


        if (tableUniquenessFactor > 0.0)
        {
          /* If the row count from the composite statistics
             comes up more than what the table uniqueness
             factor indicates then lets stick with the current
             row count.
          */
          if (compositeStatRC > (baseRowCount() *
                       tableUniquenessFactor))
           
          {
           
            break recomputeRowCount;
          }
        }
       
        /* set the row count and the single scan row count
           to the initialRowCount. initialRowCount is the product
           of the RC from store * RC of the outerCost.
           Thus RC = initialRowCount * the selectivity from stats.
           SingleRC = RC / outerCost.rowCount().
        */
        costEstimate.setCost(costEstimate.getEstimatedCost(),
                   compositeStatRC,
                   (existsBaseTable) ?
                   1 :
                   compositeStatRC / outerCost.rowCount());
       
View Full Code Here

    /*
    ** Only need to do this for current access path, because the
    ** costEstimate will be copied to the best access paths as
    ** necessary.
    */
    CostEstimate costEstimate = getCostEstimate(optimizer);
    ap.setCostEstimate(costEstimate);

    /*
    ** This is the initial cost of this optimizable.  Initialize it
    ** to the maximum cost so that the optimizer will think that
    ** any access path is better than none.
    */
    costEstimate.setCost(Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE);

    super.startOptimizing(optimizer, rowOrdering);
  }
View Full Code Here

TOP

Related Classes of org.apache.derby.iapi.sql.compile.CostEstimate

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.