Package org.apache.derby.iapi.sql.compile

Examples of org.apache.derby.iapi.sql.compile.JoinStrategy


      predList.copyPredicatesToOtherList(unknownPredicateList);

    }

    AccessPath currentAccessPath = getCurrentAccessPath();
    JoinStrategy currentJoinStrategy =
      currentAccessPath.getJoinStrategy();

    optimizer.trace(Optimizer.ESTIMATING_COST_OF_CONGLOMERATE,
            tableNumber, 0, 0.0, cd);

    /* Get the uniqueness factory for later use (see below) */
    double tableUniquenessFactor =
        optimizer.uniqueJoinWithOuterTable(predList);

    boolean oneRowResultSetForSomeConglom = isOneRowResultSet(predList);

    /* Get the predicates that can be used for scanning the base table */
    baseTableRestrictionList.removeAllElements();

    currentJoinStrategy.getBasePredicates(predList, 
                     baseTableRestrictionList,
                     this);
                 
    /* RESOLVE: Need to figure out how to cache the StoreCostController */
    StoreCostController scc = getStoreCostController(cd);

    CostEstimate costEstimate = getScratchCostEstimate(optimizer);

    /* First, get the cost for one scan */

    /* Does the conglomerate match at most one row? */
    if (isOneRowResultSet(cd, baseTableRestrictionList))
    {
      /*
      ** Tell the RowOrdering that this optimizable is always ordered.
      ** It will figure out whether it is really always ordered in the
      ** context of the outer tables and their orderings.
      */
      rowOrdering.optimizableAlwaysOrdered(this);

      singleScanRowCount = 1.0;

      /* Yes, the cost is to fetch exactly one row */
      // RESOLVE: NEED TO FIGURE OUT HOW TO GET REFERENCED COLUMN LIST,
      // FIELD STATES, AND ACCESS TYPE
      cost = scc.getFetchFromFullKeyCost(
                    (FormatableBitSet) null,
                    0);

      optimizer.trace(Optimizer.MATCH_SINGLE_ROW_COST,
              tableNumber, 0, cost, null);

      costEstimate.setCost(cost, 1.0d, 1.0d);

      /*
      ** Let the join strategy decide whether the cost of the base
      ** scan is a single scan, or a scan per outer row.
      ** NOTE: The multiplication should only be done against the
      ** total row count, not the singleScanRowCount.
      */
      double newCost = costEstimate.getEstimatedCost();

      if (currentJoinStrategy.multiplyBaseCostByOuterRows())
      {
        newCost *= outerCost.rowCount();
      }

      costEstimate.setCost(
        newCost,
        costEstimate.rowCount() * outerCost.rowCount(),
        costEstimate.singleScanRowCount());

      /*
      ** Choose the lock mode.  If the start/stop conditions are
      ** constant, choose row locking, because we will always match
      ** the same row.  If they are not constant (i.e. they include
      ** a join), we decide whether to do row locking based on
      ** the total number of rows for the life of the query.
      */
      boolean constantStartStop = true;
      for (int i = 0; i < predList.size(); i++)
      {
        OptimizablePredicate pred = predList.getOptPredicate(i);

        /*
        ** The predicates are in index order, so the start and
        ** stop keys should be first.
        */
        if ( ! (pred.isStartKey() || pred.isStopKey()))
        {
          break;
        }

        /* Stop when we've found a join */
        if ( ! pred.getReferencedMap().hasSingleBitSet())
        {
          constantStartStop = false;
          break;
        }
      }

      if (constantStartStop)
      {
        currentAccessPath.setLockMode(
                      TransactionController.MODE_RECORD);

        optimizer.trace(Optimizer.ROW_LOCK_ALL_CONSTANT_START_STOP,
                0, 0, 0.0, null);
      }
      else
      {
        setLockingBasedOnThreshold(optimizer, costEstimate.rowCount());
      }

      optimizer.trace(Optimizer.COST_OF_N_SCANS,
              tableNumber, 0, outerCost.rowCount(), costEstimate);

      /* Add in cost of fetching base row for non-covering index */
      if (cd.isIndex() && ( ! isCoveringIndex(cd) ) )
      {
        double singleFetchCost =
            getBaseCostController().getFetchFromRowLocationCost(
                                (FormatableBitSet) null,
                                0);
        cost = singleFetchCost * costEstimate.rowCount();

        costEstimate.setEstimatedCost(
                costEstimate.getEstimatedCost() + cost);

        optimizer.trace(Optimizer.NON_COVERING_INDEX_COST,
                tableNumber, 0, cost, null);
      }
    }
    else
    {
      /* Conglomerate might match more than one row */

      /*
      ** Some predicates are good for start/stop, but we don't know
      ** the values they are being compared to at this time, so we
      ** estimate their selectivity in language rather than ask the
      ** store about them .  The predicates on the first column of
      ** the conglomerate reduce the number of pages and rows scanned.
      ** The predicates on columns after the first reduce the number
      ** of rows scanned, but have a much smaller effect on the number
      ** of pages scanned, so we keep track of these selectivities in
      ** two separate variables: extraFirstColumnSelectivity and
      ** extraStartStopSelectivity. (Theoretically, we could try to
      ** figure out the effect of predicates after the first column
      ** on the number of pages scanned, but it's too hard, so we
      ** use these predicates only to reduce the estimated number of
      ** rows.  For comparisons with known values, though, the store
      ** can figure out exactly how many rows and pages are scanned.)
      **
      ** Other predicates are not good for start/stop.  We keep track
      ** of their selectvities separately, because these limit the
      ** number of rows, but not the number of pages, and so need to
      ** be factored into the row count but not into the cost.
      ** These selectivities are factored into extraQualifierSelectivity.
      **
      ** statStartStopSelectivity (using statistics) represents the
      ** selectivity of start/stop predicates that can be used to scan
      ** the index. If no statistics exist for the conglomerate then
      ** the value of this variable remains at 1.0
      **
      ** statCompositeSelectivity (using statistics) represents the
      ** selectivity of all the predicates (including NonBaseTable
      ** predicates). This represents the most educated guess [among
      ** all the wild surmises in this routine] as to the number
      ** of rows that will be returned from this joinNode.
      ** If no statistics exist on the table or no statistics at all
      ** can be found to satisfy the predicates at this join opertor,
      ** then statCompositeSelectivity is left initialized at 1.0
      */
      double extraFirstColumnSelectivity = 1.0d;
      double extraStartStopSelectivity = 1.0d;
      double extraQualifierSelectivity = 1.0d;
      double extraNonQualifierSelectivity = 1.0d;
      double statStartStopSelectivity = 1.0d;
      double statCompositeSelectivity = 1.0d;

      int     numExtraFirstColumnPreds = 0;
      int     numExtraStartStopPreds = 0;
      int     numExtraQualifiers = 0;
      int     numExtraNonQualifiers = 0;

      /*
      ** It is possible for something to be a start or stop predicate
      ** without it being possible to use it as a key for cost estimation.
      ** For example, with an index on (c1, c2), and the predicate
      ** c1 = othertable.c3 and c2 = 1, the comparison on c1 is with
      ** an unknown value, so we can't pass it to the store.  This means
      ** we can't pass the comparison on c2 to the store, either.
      **
      ** The following booleans keep track of whether we have seen
      ** gaps in the keys we can pass to the store.
      */
      boolean startGap = false;
      boolean stopGap = false;
      boolean seenFirstColumn = false;

      /*
      ** We need to figure out the number of rows touched to decide
      ** whether to use row locking or table locking.  If the start/stop
      ** conditions are constant (i.e. no joins), the number of rows
      ** touched is the number of rows per scan.  But if the start/stop
      ** conditions contain a join, the number of rows touched must
      ** take the number of outer rows into account.
      */
      boolean constantStartStop = true;
      boolean startStopFound = false;

      /* Count the number of start and stop keys */
      int startKeyNum = 0;
      int stopKeyNum = 0;
      OptimizablePredicate pred;
      int predListSize;

      if (predList != null)
        predListSize = baseTableRestrictionList.size();
      else
        predListSize = 0;

      int startStopPredCount = 0;
      ColumnReference firstColumn = null;
      for (int i = 0; i < predListSize; i++)
      {
        pred = baseTableRestrictionList.getOptPredicate(i);
        boolean startKey = pred.isStartKey();
        boolean stopKey = pred.isStopKey();
        if (startKey || stopKey)
        {
          startStopFound = true;

          if ( ! pred.getReferencedMap().hasSingleBitSet())
          {
            constantStartStop = false;
          }

          boolean knownConstant =
            pred.compareWithKnownConstant(this, true);
          if (startKey)
          {
            if (knownConstant && ( ! startGap ) )
            {
              startKeyNum++;
                if (unknownPredicateList != null)
                  unknownPredicateList.removeOptPredicate(pred);
            }
            else
            {
              startGap = true;
            }
          }

          if (stopKey)
          {
            if (knownConstant && ( ! stopGap ) )
            {
              stopKeyNum++;
                if (unknownPredicateList != null)
                  unknownPredicateList.removeOptPredicate(pred);
            }
            else
            {
              stopGap = true;
            }
          }

          /* If either we are seeing startGap or stopGap because start/stop key is
           * comparison with non-constant, we should multiply the selectivity to
           * extraFirstColumnSelectivity.  Beetle 4787.
           */
          if (startGap || stopGap)
          {
            // Don't include redundant join predicates in selectivity calculations
            if (baseTableRestrictionList.isRedundantPredicate(i))
              continue;

            if (startKey && stopKey)
              startStopPredCount++;

            if (pred.getIndexPosition() == 0)
            {
              extraFirstColumnSelectivity *=
                            pred.selectivity(this);
              if (! seenFirstColumn)
              {
                ValueNode relNode = ((Predicate) pred).getAndNode().getLeftOperand();
                if (relNode instanceof BinaryRelationalOperatorNode)
                  firstColumn = ((BinaryRelationalOperatorNode) relNode).getColumnOperand(this);
                seenFirstColumn = true;
              }
            }
            else
            {
              extraStartStopSelectivity *= pred.selectivity(this);
              numExtraStartStopPreds++;
            }
          }
        }
        else
        {
          // Don't include redundant join predicates in selectivity calculations
          if (baseTableRestrictionList.isRedundantPredicate(i))
          {
            continue;
          }

          /* If we have "like" predicate on the first index column, it is more likely
           * to have a smaller range than "between", so we apply extra selectivity 0.2
           * here.  beetle 4387, 4787.
           */
          if (pred instanceof Predicate)
          {
            ValueNode leftOpnd = ((Predicate) pred).getAndNode().getLeftOperand();
            if (firstColumn != null && leftOpnd instanceof LikeEscapeOperatorNode)
            {
              LikeEscapeOperatorNode likeNode = (LikeEscapeOperatorNode) leftOpnd;
              if (likeNode.getLeftOperand().isParameterNode())
              {
                ValueNode receiver = ((TernaryOperatorNode) likeNode).getReceiver();
                if (receiver instanceof ColumnReference)
                {
                  ColumnReference cr = (ColumnReference) receiver;
                  if (cr.getTableNumber() == firstColumn.getTableNumber() &&
                    cr.getColumnNumber() == firstColumn.getColumnNumber())
                    extraFirstColumnSelectivity *= 0.2;
                }
              }
            }
          }

          if (pred.isQualifier())
          {
            extraQualifierSelectivity *= pred.selectivity(this);
            numExtraQualifiers++;
          }
          else
          {
            extraNonQualifierSelectivity *= pred.selectivity(this);
            numExtraNonQualifiers++;
          }

          /*
          ** Strictly speaking, it shouldn't be necessary to
          ** indicate a gap here, since there should be no more
          ** start/stop predicates, but let's do it, anyway.
          */
          startGap = true;
          stopGap = true;
        }
      }

      if (unknownPredicateList != null)
      {
        statCompositeSelectivity = unknownPredicateList.selectivity(this);
        if (statCompositeSelectivity == -1.0d)
          statCompositeSelectivity = 1.0d;
      }

      if (seenFirstColumn && statisticsForConglomerate &&
        (startStopPredCount > 0))
      {
        statStartStopSelectivity =
          tableDescriptor.selectivityForConglomerate(cd, startStopPredCount);
      }

      /*
      ** Factor the non-base-table predicates into the extra
      ** non-qualifier selectivity, since these will restrict the
      ** number of rows, but not the cost.
      */
      extraNonQualifierSelectivity *=
        currentJoinStrategy.nonBasePredicateSelectivity(this, predList);

      /* Create the start and stop key arrays, and fill them in */
      DataValueDescriptor[] startKeys;
      DataValueDescriptor[] stopKeys;

      if (startKeyNum > 0)
        startKeys = new DataValueDescriptor[startKeyNum];
      else
        startKeys = null;

      if (stopKeyNum > 0)
        stopKeys = new DataValueDescriptor[stopKeyNum];
      else
        stopKeys = null;

      startKeyNum = 0;
      stopKeyNum = 0;
      startGap = false;
      stopGap = false;

      for (int i = 0; i < predListSize; i++)
      {
        pred = baseTableRestrictionList.getOptPredicate(i);
        boolean startKey = pred.isStartKey();
        boolean stopKey = pred.isStopKey();

        if (startKey || stopKey)
        {
          boolean knownConstant = pred.compareWithKnownConstant(this, true);

          if (startKey)
          {
            if (knownConstant && ( ! startGap ) )
            {
              startKeys[startKeyNum] = pred.getCompareValue(this);
              startKeyNum++;
            }
            else
            {
              startGap = true;
            }
          }

          if (stopKey)
          {
            if (knownConstant && ( ! stopGap ) )
            {
              stopKeys[stopKeyNum] = pred.getCompareValue(this);
              stopKeyNum++;
            }
            else
            {
              stopGap = true;
            }
          }
        }
        else
        {
          startGap = true;
          stopGap = true;
        }
      }

      int startOperator;
      int stopOperator;

      if (baseTableRestrictionList != null)
      {
        startOperator = baseTableRestrictionList.startOperator(this);
        stopOperator = baseTableRestrictionList.stopOperator(this);
      }
      else
      {
        /*
        ** If we're doing a full scan, it doesn't matter what the
        ** start and stop operators are.
        */
        startOperator = ScanController.NA;
        stopOperator = ScanController.NA;
      }

      /*
      ** Get a row template for this conglomerate.  For now, just tell
      ** it we are using all the columns in the row.
      */
      DataValueDescriptor[] rowTemplate =
                getRowTemplate(cd, getBaseCostController());

      /* we prefer index than table scan for concurrency reason, by a small
       * adjustment on estimated row count.  This affects optimizer's decision
       * especially when few rows are in table. beetle 5006. This makes sense
       * since the plan may stay long before we actually check and invalidate it.
       * And new rows may be inserted before we check and invalidate the plan.
       * Here we only prefer index that has start/stop key from predicates. Non-
       * constant start/stop key case is taken care of by selectivity later.
       */
      long baseRC = (startKeys != null || stopKeys != null) ? baseRowCount() : baseRowCount() + 5;

      scc.getScanCost(
          currentJoinStrategy.scanCostType(),
          baseRC,
                    1,
          forUpdate(),
          (FormatableBitSet) null,
          rowTemplate,
          startKeys,
          startOperator,
          stopKeys,
          stopOperator,
          false,
          0,
          costEstimate);

      /* initialPositionCost is the first part of the index scan cost we get above.
       * It's the cost of initial positioning/fetch of key.  So it's unrelated to
       * row count of how many rows we fetch from index.  We extract it here so that
       * we only multiply selectivity to the other part of index scan cost, which is
       * nearly linear, to make cost calculation more accurate and fair, especially
       * compared to the plan of "one row result set" (unique index). beetle 4787.
       */
      double initialPositionCost = 0.0;
      if (cd.isIndex())
      {
        initialPositionCost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
        /* oneRowResultSetForSomeConglom means there's a unique index, but certainly
         * not this one since we are here.  If store knows this non-unique index
         * won't return any row or just returns one row (eg., the predicate is a
         * comparison with constant or almost empty table), we do minor adjustment
         * on cost (affecting decision for covering index) and rc (decision for
         * non-covering). The purpose is favoring unique index. beetle 5006.
         */
        if (oneRowResultSetForSomeConglom && costEstimate.rowCount() <= 1)
        {
          costEstimate.setCost(costEstimate.getEstimatedCost() * 2,
                     costEstimate.rowCount() + 2,
                     costEstimate.singleScanRowCount() + 2);
        }
      }

      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN1,
              tableNumber, 0, 0.0, cd);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN2,
              tableNumber, 0, 0.0, costEstimate);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN3,
              numExtraFirstColumnPreds, 0,
              extraFirstColumnSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN4,
              numExtraStartStopPreds, 0,
              extraStartStopSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN7,
              startStopPredCount, 0,
              statStartStopSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN5,
              numExtraQualifiers, 0,
              extraQualifierSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN6,
              numExtraNonQualifiers, 0,
              extraNonQualifierSelectivity, null);

      /* initial row count is the row count without applying
         any predicates-- we use this at the end of the routine
         when we use statistics to recompute the row count.
      */
      double initialRowCount = costEstimate.rowCount();

      if (statStartStopSelectivity != 1.0d)
      {
        /*
        ** If statistics exist use the selectivity computed
        ** from the statistics to calculate the cost.
        ** NOTE: we apply this selectivity to the cost as well
        ** as both the row counts. In the absence of statistics
        ** we only applied the FirstColumnSelectivity to the
        ** cost.
        */
        costEstimate.setCost(
               scanCostAfterSelectivity(costEstimate.getEstimatedCost(),
                            initialPositionCost,
                            statStartStopSelectivity,
                            oneRowResultSetForSomeConglom),
               costEstimate.rowCount() * statStartStopSelectivity,
               costEstimate.singleScanRowCount() *
               statStartStopSelectivity);
        optimizer.trace(Optimizer.COST_INCLUDING_STATS_FOR_INDEX,
                tableNumber, 0, 0.0, costEstimate);

      }
      else
      {
        /*
        ** Factor in the extra selectivity on the first column
        ** of the conglomerate (see comment above).
        ** NOTE: In this case we want to apply the selectivity to both
        ** the total row count and singleScanRowCount.
        */
        if (extraFirstColumnSelectivity != 1.0d)
        {
          costEstimate.setCost(
             scanCostAfterSelectivity(costEstimate.getEstimatedCost(),
                          initialPositionCost,
                          extraFirstColumnSelectivity,
                          oneRowResultSetForSomeConglom),
             costEstimate.rowCount() * extraFirstColumnSelectivity,
             costEstimate.singleScanRowCount() * extraFirstColumnSelectivity);
         
          optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_1ST_COL_SELECTIVITY,
                  tableNumber, 0, 0.0, costEstimate);
        }

        /* Factor in the extra start/stop selectivity (see comment above).
         * NOTE: In this case we want to apply the selectivity to both
         * the row count and singleScanRowCount.
         */
        if (extraStartStopSelectivity != 1.0d)
        {
          costEstimate.setCost(
            costEstimate.getEstimatedCost(),
            costEstimate.rowCount() * extraStartStopSelectivity,
            costEstimate.singleScanRowCount() * extraStartStopSelectivity);

          optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_START_STOP,
                  tableNumber, 0, 0.0, costEstimate);
        }
      }

      /*
      ** Figure out whether to do row locking or table locking.
      **
      ** If there are no start/stop predicates, we're doing full
      ** conglomerate scans, so do table locking.
      */
      if (! startStopFound)
      {
        currentAccessPath.setLockMode(
                      TransactionController.MODE_TABLE);

        optimizer.trace(Optimizer.TABLE_LOCK_NO_START_STOP,
                  0, 0, 0.0, null);
      }
      else
      {
        /*
        ** Figure out the number of rows touched.  If all the
        ** start/stop predicates are constant, the number of
        ** rows touched is the number of rows per scan.
        ** This is also true for join strategies that scan the
        ** inner table only once (like hash join) - we can
        ** tell if we have one of those, because
        ** multiplyBaseCostByOuterRows() will return false.
        */
        double rowsTouched = costEstimate.rowCount();

        if ( (! constantStartStop) &&
           currentJoinStrategy.multiplyBaseCostByOuterRows())
        {
          /*
          ** This is a join where the inner table is scanned
          ** more than once, so we have to take the number
          ** of outer rows into account.  The formula for this
          ** works out as follows:
          **
          **  total rows in table = r
          **  number of rows touched per scan = s
          **  number of outer rows = o
          **  proportion of rows touched per scan = s / r
          **  proportion of rows not touched per scan =
          **                    1 - (s / r)
          **  proportion of rows not touched for all scans =
          **                  (1 - (s / r)) ** o
          **  proportion of rows touched for all scans =
          **                  1 - ((1 - (s / r)) ** o)
          **  total rows touched for all scans =
          **              r * (1 - ((1 - (s / r)) ** o))
          **
          ** In doing these calculations, we must be careful not
          ** to divide by zero.  This could happen if there are
          ** no rows in the table.  In this case, let's do table
          ** locking.
          */
          double r = baseRowCount();
          if (r > 0.0)
          {
            double s = costEstimate.rowCount();
            double o = outerCost.rowCount();
            double pRowsNotTouchedPerScan = 1.0 - (s / r);
            double pRowsNotTouchedAllScans =
                    Math.pow(pRowsNotTouchedPerScan, o);
            double pRowsTouchedAllScans =
                    1.0 - pRowsNotTouchedAllScans;
            double rowsTouchedAllScans =
                    r * pRowsTouchedAllScans;

            rowsTouched = rowsTouchedAllScans;
          }
          else
          {
            /* See comments in setLockingBasedOnThreshold */
            rowsTouched = optimizer.tableLockThreshold() + 1;
          }
        }

        setLockingBasedOnThreshold(optimizer, rowsTouched);
      }

      /*
      ** If the index isn't covering, add the cost of getting the
      ** base row.  Only apply extraFirstColumnSelectivity and extraStartStopSelectivity
      ** before we do this, don't apply extraQualifierSelectivity etc.  The
      ** reason is that the row count here should be the number of index rows
      ** (and hence heap rows) we get, and we need to fetch all those rows, even
      ** though later on some of them may be filtered out by other predicates.
      ** beetle 4787.
      */
      if (cd.isIndex() && ( ! isCoveringIndex(cd) ) )
      {
        double singleFetchCost =
            getBaseCostController().getFetchFromRowLocationCost(
                                (FormatableBitSet) null,
                                0);

        cost = singleFetchCost * costEstimate.rowCount();

        costEstimate.setEstimatedCost(
                costEstimate.getEstimatedCost() + cost);

        optimizer.trace(Optimizer.COST_OF_NONCOVERING_INDEX,
                tableNumber, 0, 0.0, costEstimate);
      }

      /* Factor in the extra qualifier selectivity (see comment above).
       * NOTE: In this case we want to apply the selectivity to both
       * the row count and singleScanRowCount.
       */
      if (extraQualifierSelectivity != 1.0d)
      {
        costEstimate.setCost(
            costEstimate.getEstimatedCost(),
            costEstimate.rowCount() * extraQualifierSelectivity,
            costEstimate.singleScanRowCount() * extraQualifierSelectivity);

        optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_QUALIFIER_SELECTIVITY,
                tableNumber, 0, 0.0, costEstimate);
      }

      singleScanRowCount = costEstimate.singleScanRowCount();

      /*
      ** Let the join strategy decide whether the cost of the base
      ** scan is a single scan, or a scan per outer row.
      ** NOTE: In this case we only want to multiply against the
      ** total row count, not the singleScanRowCount.
      ** NOTE: Do not multiply row count if we determined that
      ** conglomerate is a 1 row result set when costing nested
      ** loop.  (eg, we will find at most 1 match when probing
      ** the hash table.)
      */
      double newCost = costEstimate.getEstimatedCost();
      double rowCount = costEstimate.rowCount();

      /*
      ** RESOLVE - If there is a unique index on the joining
      ** columns, the number of matching rows will equal the
      ** number of outer rows, even if we're not considering the
      ** unique index for this access path. To figure that out,
      ** however, would require an analysis phase at the beginning
      ** of optimization. So, we'll always multiply the number
      ** of outer rows by the number of rows per scan. This will
      ** give us a higher than actual row count when there is
      ** such a unique index, which will bias the optimizer toward
      ** using the unique index. This is probably OK most of the
      ** time, since the optimizer would probably choose the
      ** unique index, anyway. But it would be better if the
      ** optimizer set the row count properly in this case.
      */
      if (currentJoinStrategy.multiplyBaseCostByOuterRows())
      {
        newCost *= outerCost.rowCount();
      }

      rowCount *= outerCost.rowCount();
      initialRowCount *= outerCost.rowCount();


      /*
      ** If this table can generate at most one row per scan,
      ** the maximum row count is the number of outer rows.
      ** NOTE: This does not completely take care of the RESOLVE
      ** in the above comment, since it will only notice
      ** one-row result sets for the current join order.
      */
      if (oneRowResultSetForSomeConglom)
      {
        if (outerCost.rowCount() < rowCount)
        {
          rowCount = outerCost.rowCount();
        }
      }

      /*
      ** The estimated cost may be too high for indexes, if the
      ** estimated row count exceeds the maximum. Only do this
      ** if we're not doing a full scan, and the start/stop position
      ** is not constant (i.e. we're doing a join on the first column
      ** of the index) - the reason being that this is when the
      ** cost may be inaccurate.
      */
      if (cd.isIndex() && startStopFound && ( ! constantStartStop ) )
      {
        /*
        ** Does any table outer to this one have a unique key on
        ** a subset of the joining columns? If so, the maximum number
        ** of rows that this table can return is the number of rows
        ** in this table times the number of times the maximum number
        ** of times each key can be repeated.
        */
        double scanUniquenessFactor =
          optimizer.uniqueJoinWithOuterTable(baseTableRestrictionList);
        if (scanUniquenessFactor > 0.0)
        {
          /*
          ** A positive uniqueness factor means there is a unique
          ** outer join key. The value is the reciprocal of the
          ** maximum number of duplicates for each unique key
          ** (the duplicates can be caused by other joining tables).
          */
          double maxRows =
              ((double) baseRowCount()) / scanUniquenessFactor;
          if (rowCount > maxRows)
          {
            /*
            ** The estimated row count is too high. Adjust the
            ** estimated cost downwards proportionately to
            ** match the maximum number of rows.
            */
            newCost *= (maxRows / rowCount);
          }
        }
      }

      /* The estimated total row count may be too high */
      if (tableUniquenessFactor > 0.0)
      {
        /*
        ** A positive uniqueness factor means there is a unique outer
        ** join key. The value is the reciprocal of the maximum number
        ** of duplicates for each unique key (the duplicates can be
        ** caused by other joining tables).
        */
        double maxRows =
              ((double) baseRowCount()) / tableUniquenessFactor;
        if (rowCount > maxRows)
        {
          /*
          ** The estimated row count is too high. Set it to the
          ** maximum row count.
          */
          rowCount = maxRows;
        }
      }

      costEstimate.setCost(
        newCost,
        rowCount,
        costEstimate.singleScanRowCount());


      optimizer.trace(Optimizer.COST_OF_N_SCANS,
              tableNumber, 0, outerCost.rowCount(), costEstimate);

      /*
      ** Now figure in the cost of the non-qualifier predicates.
      ** existsBaseTables have a row count of 1
      */
      double rc = -1, src = -1;
      if (existsBaseTable)
        rc = src = 1;
      // don't factor in extraNonQualifierSelectivity in case of oneRowResultSetForSomeConglom
      // because "1" is the final result and the effect of other predicates already considered
      // beetle 4787
      else if (extraNonQualifierSelectivity != 1.0d)
      {
        rc = oneRowResultSetForSomeConglom ? costEstimate.rowCount() :
                      costEstimate.rowCount() * extraNonQualifierSelectivity;
        src = costEstimate.singleScanRowCount() * extraNonQualifierSelectivity;
      }
      if (rc != -1) // changed
      {
        costEstimate.setCost(costEstimate.getEstimatedCost(), rc, src);
        optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_NONQUALIFIER_SELECTIVITY,
                tableNumber, 0, 0.0, costEstimate);
      }
     
    recomputeRowCount:
      if (statisticsForTable && !oneRowResultSetForSomeConglom &&
        (statCompositeSelectivity != 1.0d))
      {
        /* if we have statistics we should use statistics to calculate
           row  count-- if it has been determined that this table
           returns one row for some conglomerate then there is no need
           to do this recalculation
        */

        double compositeStatRC = initialRowCount * statCompositeSelectivity;
        optimizer.trace(Optimizer.COMPOSITE_SEL_FROM_STATS,
                0, 0, statCompositeSelectivity, null);


        if (tableUniquenessFactor > 0.0)
        {
          /* If the row count from the composite statistics
             comes up more than what the table uniqueness
             factor indicates then lets stick with the current
             row count.
          */
          if (compositeStatRC > (baseRowCount() *
                       tableUniquenessFactor))
           
          {
           
            break recomputeRowCount;
          }
        }
       
        /* set the row count and the single scan row count
           to the initialRowCount. initialRowCount is the product
           of the RC from store * RC of the outerCost.
           Thus RC = initialRowCount * the selectivity from stats.
           SingleRC = RC / outerCost.rowCount().
        */
        costEstimate.setCost(costEstimate.getEstimatedCost(),
                   compositeStatRC,
                   (existsBaseTable) ?
                   1 :
                   compositeStatRC / outerCost.rowCount());
       
        optimizer.trace(Optimizer.COST_INCLUDING_COMPOSITE_SEL_FROM_STATS,
                tableNumber, 0, 0.0, costEstimate);
      }
    }

    /* Put the base predicates back in the predicate list */
    currentJoinStrategy.putBasePredicates(predList,
                     baseTableRestrictionList);
    return costEstimate;
  }
View Full Code Here


  {
    ResultSetNode  retval;
    AccessPath ap = getTrulyTheBestAccessPath();
    ConglomerateDescriptor trulyTheBestConglomerateDescriptor =
                         ap.getConglomerateDescriptor();
    JoinStrategy trulyTheBestJoinStrategy = ap.getJoinStrategy();
    Optimizer optimizer = ap.getOptimizer();

    optimizer.trace(Optimizer.CHANGING_ACCESS_PATH_FOR_TABLE,
            tableNumber, 0, 0.0, null);

    if (SanityManager.DEBUG)
    {
      SanityManager.ASSERT(
        trulyTheBestConglomerateDescriptor != null,
        "Should only modify access path after conglomerate has been chosen.");
    }

    /*
    ** Make sure user-specified bulk fetch is OK with the chosen join
    ** strategy.
    */
    if (bulkFetch != UNSET)
    {
      if ( ! trulyTheBestJoinStrategy.bulkFetchOK())
      {
        throw StandardException.newException(SQLState.LANG_INVALID_BULK_FETCH_WITH_JOIN_TYPE,
                      trulyTheBestJoinStrategy.getName());
      }
      // bulkFetch has no meaning for hash join, just ignore it
      else if (trulyTheBestJoinStrategy.ignoreBulkFetch())
      {
        disableBulkFetch();
      }
      // bug 4431 - ignore bulkfetch property if it's 1 row resultset
      else if (isOneRowResultSet())
      {
        disableBulkFetch();
      }
    }

    // bulkFetch = 1 is the same as no bulk fetch
    if (bulkFetch == 1)
    {
      disableBulkFetch();
    }

    /* Remove any redundant join clauses.  A redundant join clause is one
     * where there are other join clauses in the same equivalence class
     * after it in the PredicateList.
     */
    restrictionList.removeRedundantPredicates();

    /*
    ** Divide up the predicates for different processing phases of the
    ** best join strategy.
    */
    storeRestrictionList = (PredicateList) getNodeFactory().getNode(
                          C_NodeTypes.PREDICATE_LIST,
                          getContextManager());
    nonStoreRestrictionList = (PredicateList) getNodeFactory().getNode(
                          C_NodeTypes.PREDICATE_LIST,
                          getContextManager());
    requalificationRestrictionList =
                  (PredicateList) getNodeFactory().getNode(
                          C_NodeTypes.PREDICATE_LIST,
                          getContextManager());
    trulyTheBestJoinStrategy.divideUpPredicateLists(
                      this,
                      restrictionList,
                      storeRestrictionList,
                      nonStoreRestrictionList,
                      requalificationRestrictionList,
                      getDataDictionary());

    /*
    ** Consider turning on bulkFetch if it is turned
    ** off.  Only turn it on if it is a not an updatable
    ** scan and if it isn't a oneRowResultSet, and
    ** not a subquery, and it is OK to use bulk fetch
    ** with the chosen join strategy.  NOTE: the subquery logic
    ** could be more sophisticated -- we are taking
    ** the safe route in avoiding reading extra
    ** data for something like:
    **
    **  select x from t where x in (select y from t)
     **
    ** In this case we want to stop the subquery
    ** evaluation as soon as something matches.
    */
    if (trulyTheBestJoinStrategy.bulkFetchOK() &&
      !(trulyTheBestJoinStrategy.ignoreBulkFetch()) &&
      ! bulkFetchTurnedOff &&
      (bulkFetch == UNSET) &&
      !forUpdate() &&
      !isOneRowResultSet() &&
      getLevel() == 0)
View Full Code Here

      generateRefActionDependentTableScan(acb, mb);
      return;

    }
 
    JoinStrategy trulyTheBestJoinStrategy =
      getTrulyTheBestAccessPath().getJoinStrategy();

    // the table scan generator is what we return
    acb.pushGetResultSetFactoryExpression(mb);

    int nargs = getScanArguments(acb, mb);

    mb.callMethod(VMOpcode.INVOKEINTERFACE, (String) null,
      trulyTheBestJoinStrategy.resultSetMethodName(bulkFetch != UNSET),
      ClassName.NoPutResultSet, nargs);

    /* If this table is the target of an update or a delete, then we must
     * wrap the Expression up in an assignment expression before
     * returning.
 
View Full Code Here

          indexCols[i] = isAscending[i] ? baseColPos[i] : -baseColPos[i];
        indexColItem = acb.addItem(indexCols);
      }
    }

    JoinStrategy trulyTheBestJoinStrategy =
      getTrulyTheBestAccessPath().getJoinStrategy();

    /*
    ** We can only do bulkFetch on NESTEDLOOP
    */
    if (SanityManager.DEBUG)
    {
      if ( ( ! trulyTheBestJoinStrategy.bulkFetchOK()) &&
        (bulkFetch != UNSET))
      {
        SanityManager.THROWASSERT("bulkFetch should not be set "+
                "for the join strategy " +
                trulyTheBestJoinStrategy.getName());
      }
    }

    int nargs = trulyTheBestJoinStrategy.getScanArgs(
                      getLanguageConnectionContext().getTransactionCompile(),
                      mb,
                      this,
                      storeRestrictionList,
                      nonStoreRestrictionList,
View Full Code Here

     * otherwise we just consider the predicates in the
     * restriction list and the conglomerate being scanned.

     */
    AccessPath ap = getTrulyTheBestAccessPath();
    JoinStrategy trulyTheBestJoinStrategy = ap.getJoinStrategy();
    PredicateList pl;

    if (trulyTheBestJoinStrategy.isHashJoin())
    {
      pl = (PredicateList) getNodeFactory().getNode(
                      C_NodeTypes.PREDICATE_LIST,
                      getContextManager());
      if (storeRestrictionList != null)
View Full Code Here

    return joinStrategies[whichStrategy];
  }

  /** @see Optimizer#getJoinStrategy */
  public JoinStrategy getJoinStrategy(String whichStrategy) {
    JoinStrategy retval = null;
    String upperValue = StringUtil.SQLToUpperCase(whichStrategy);

    for (int i = 0; i < joinStrategies.length; i++) {
      if (upperValue.equals(joinStrategies[i].getName())) {
        retval = joinStrategies[i];
View Full Code Here

      predList.copyPredicatesToOtherList(unknownPredicateList);

    }

    AccessPath currentAccessPath = getCurrentAccessPath();
    JoinStrategy currentJoinStrategy =
      currentAccessPath.getJoinStrategy();

    optimizer.trace(Optimizer.ESTIMATING_COST_OF_CONGLOMERATE,
            tableNumber, 0, 0.0, cd);

    /* Get the uniqueness factory for later use (see below) */
    double tableUniquenessFactor =
        optimizer.uniqueJoinWithOuterTable(predList);

    boolean oneRowResultSetForSomeConglom = isOneRowResultSet(predList);

    /* Get the predicates that can be used for scanning the base table */
    baseTableRestrictionList.removeAllElements();

    currentJoinStrategy.getBasePredicates(predList, 
                     baseTableRestrictionList,
                     this);
                 
    /* RESOLVE: Need to figure out how to cache the StoreCostController */
    StoreCostController scc = getStoreCostController(cd);

    CostEstimate costEstimate = getScratchCostEstimate(optimizer);

    /* First, get the cost for one scan */

    /* Does the conglomerate match at most one row? */
    if (isOneRowResultSet(cd, baseTableRestrictionList))
    {
      /*
      ** Tell the RowOrdering that this optimizable is always ordered.
      ** It will figure out whether it is really always ordered in the
      ** context of the outer tables and their orderings.
      */
      rowOrdering.optimizableAlwaysOrdered(this);

      singleScanRowCount = 1.0;

      /* Yes, the cost is to fetch exactly one row */
      // RESOLVE: NEED TO FIGURE OUT HOW TO GET REFERENCED COLUMN LIST,
      // FIELD STATES, AND ACCESS TYPE
      cost = scc.getFetchFromFullKeyCost(
                    (FormatableBitSet) null,
                    0);

      optimizer.trace(Optimizer.MATCH_SINGLE_ROW_COST,
              tableNumber, 0, cost, null);

      costEstimate.setCost(cost, 1.0d, 1.0d);

      /*
      ** Let the join strategy decide whether the cost of the base
      ** scan is a single scan, or a scan per outer row.
      ** NOTE: The multiplication should only be done against the
      ** total row count, not the singleScanRowCount.
      */
      double newCost = costEstimate.getEstimatedCost();

      if (currentJoinStrategy.multiplyBaseCostByOuterRows())
      {
        newCost *= outerCost.rowCount();
      }

      costEstimate.setCost(
        newCost,
        costEstimate.rowCount() * outerCost.rowCount(),
        costEstimate.singleScanRowCount());

      /*
      ** Choose the lock mode.  If the start/stop conditions are
      ** constant, choose row locking, because we will always match
      ** the same row.  If they are not constant (i.e. they include
      ** a join), we decide whether to do row locking based on
      ** the total number of rows for the life of the query.
      */
      boolean constantStartStop = true;
      for (int i = 0; i < predList.size(); i++)
      {
        OptimizablePredicate pred = predList.getOptPredicate(i);

        /*
        ** The predicates are in index order, so the start and
        ** stop keys should be first.
        */
        if ( ! (pred.isStartKey() || pred.isStopKey()))
        {
          break;
        }

        /* Stop when we've found a join */
        if ( ! pred.getReferencedMap().hasSingleBitSet())
        {
          constantStartStop = false;
          break;
        }
      }

      if (constantStartStop)
      {
        currentAccessPath.setLockMode(
                      TransactionController.MODE_RECORD);

        optimizer.trace(Optimizer.ROW_LOCK_ALL_CONSTANT_START_STOP,
                0, 0, 0.0, null);
      }
      else
      {
        setLockingBasedOnThreshold(optimizer, costEstimate.rowCount());
      }

      optimizer.trace(Optimizer.COST_OF_N_SCANS,
              tableNumber, 0, outerCost.rowCount(), costEstimate);

      /* Add in cost of fetching base row for non-covering index */
      if (cd.isIndex() && ( ! isCoveringIndex(cd) ) )
      {
        double singleFetchCost =
            getBaseCostController().getFetchFromRowLocationCost(
                                (FormatableBitSet) null,
                                0);
        cost = singleFetchCost * costEstimate.rowCount();

        costEstimate.setEstimatedCost(
                costEstimate.getEstimatedCost() + cost);

        optimizer.trace(Optimizer.NON_COVERING_INDEX_COST,
                tableNumber, 0, cost, null);
      }
    }
    else
    {
      /* Conglomerate might match more than one row */

      /*
      ** Some predicates are good for start/stop, but we don't know
      ** the values they are being compared to at this time, so we
      ** estimate their selectivity in language rather than ask the
      ** store about them .  The predicates on the first column of
      ** the conglomerate reduce the number of pages and rows scanned.
      ** The predicates on columns after the first reduce the number
      ** of rows scanned, but have a much smaller effect on the number
      ** of pages scanned, so we keep track of these selectivities in
      ** two separate variables: extraFirstColumnSelectivity and
      ** extraStartStopSelectivity. (Theoretically, we could try to
      ** figure out the effect of predicates after the first column
      ** on the number of pages scanned, but it's too hard, so we
      ** use these predicates only to reduce the estimated number of
      ** rows.  For comparisons with known values, though, the store
      ** can figure out exactly how many rows and pages are scanned.)
      **
      ** Other predicates are not good for start/stop.  We keep track
      ** of their selectvities separately, because these limit the
      ** number of rows, but not the number of pages, and so need to
      ** be factored into the row count but not into the cost.
      ** These selectivities are factored into extraQualifierSelectivity.
      **
      ** statStartStopSelectivity (using statistics) represents the
      ** selectivity of start/stop predicates that can be used to scan
      ** the index. If no statistics exist for the conglomerate then
      ** the value of this variable remains at 1.0
      **
      ** statCompositeSelectivity (using statistics) represents the
      ** selectivity of all the predicates (including NonBaseTable
      ** predicates). This represents the most educated guess [among
      ** all the wild surmises in this routine] as to the number
      ** of rows that will be returned from this joinNode.
      ** If no statistics exist on the table or no statistics at all
      ** can be found to satisfy the predicates at this join opertor,
      ** then statCompositeSelectivity is left initialized at 1.0
      */
      double extraFirstColumnSelectivity = 1.0d;
      double extraStartStopSelectivity = 1.0d;
      double extraQualifierSelectivity = 1.0d;
      double extraNonQualifierSelectivity = 1.0d;
      double statStartStopSelectivity = 1.0d;
      double statCompositeSelectivity = 1.0d;

      int     numExtraFirstColumnPreds = 0;
      int     numExtraStartStopPreds = 0;
      int     numExtraQualifiers = 0;
      int     numExtraNonQualifiers = 0;

      /*
      ** It is possible for something to be a start or stop predicate
      ** without it being possible to use it as a key for cost estimation.
      ** For example, with an index on (c1, c2), and the predicate
      ** c1 = othertable.c3 and c2 = 1, the comparison on c1 is with
      ** an unknown value, so we can't pass it to the store.  This means
      ** we can't pass the comparison on c2 to the store, either.
      **
      ** The following booleans keep track of whether we have seen
      ** gaps in the keys we can pass to the store.
      */
      boolean startGap = false;
      boolean stopGap = false;
      boolean seenFirstColumn = false;

      /*
      ** We need to figure out the number of rows touched to decide
      ** whether to use row locking or table locking.  If the start/stop
      ** conditions are constant (i.e. no joins), the number of rows
      ** touched is the number of rows per scan.  But if the start/stop
      ** conditions contain a join, the number of rows touched must
      ** take the number of outer rows into account.
      */
      boolean constantStartStop = true;
      boolean startStopFound = false;

      /* Count the number of start and stop keys */
      int startKeyNum = 0;
      int stopKeyNum = 0;
      OptimizablePredicate pred;
      int predListSize;

      if (predList != null)
        predListSize = baseTableRestrictionList.size();
      else
        predListSize = 0;

      int startStopPredCount = 0;
      ColumnReference firstColumn = null;
      for (int i = 0; i < predListSize; i++)
      {
        pred = baseTableRestrictionList.getOptPredicate(i);
        boolean startKey = pred.isStartKey();
        boolean stopKey = pred.isStopKey();
        if (startKey || stopKey)
        {
          startStopFound = true;

          if ( ! pred.getReferencedMap().hasSingleBitSet())
          {
            constantStartStop = false;
          }

          boolean knownConstant =
            pred.compareWithKnownConstant(this, true);
          if (startKey)
          {
            if (knownConstant && ( ! startGap ) )
            {
              startKeyNum++;
                if (unknownPredicateList != null)
                  unknownPredicateList.removeOptPredicate(pred);
            }
            else
            {
              startGap = true;
            }
          }

          if (stopKey)
          {
            if (knownConstant && ( ! stopGap ) )
            {
              stopKeyNum++;
                if (unknownPredicateList != null)
                  unknownPredicateList.removeOptPredicate(pred);
            }
            else
            {
              stopGap = true;
            }
          }

          /* If either we are seeing startGap or stopGap because start/stop key is
           * comparison with non-constant, we should multiply the selectivity to
           * extraFirstColumnSelectivity.  Beetle 4787.
           */
          if (startGap || stopGap)
          {
            // Don't include redundant join predicates in selectivity calculations
            if (baseTableRestrictionList.isRedundantPredicate(i))
              continue;

            if (startKey && stopKey)
              startStopPredCount++;

            if (pred.getIndexPosition() == 0)
            {
              extraFirstColumnSelectivity *=
                            pred.selectivity(this);
              if (! seenFirstColumn)
              {
                ValueNode relNode = ((Predicate) pred).getAndNode().getLeftOperand();
                if (relNode instanceof BinaryRelationalOperatorNode)
                  firstColumn = ((BinaryRelationalOperatorNode) relNode).getColumnOperand(this);
                seenFirstColumn = true;
              }
            }
            else
            {
              extraStartStopSelectivity *= pred.selectivity(this);
              numExtraStartStopPreds++;
            }
          }
        }
        else
        {
          // Don't include redundant join predicates in selectivity calculations
          if (baseTableRestrictionList.isRedundantPredicate(i))
          {
            continue;
          }

          /* If we have "like" predicate on the first index column, it is more likely
           * to have a smaller range than "between", so we apply extra selectivity 0.2
           * here.  beetle 4387, 4787.
           */
          if (pred instanceof Predicate)
          {
            ValueNode leftOpnd = ((Predicate) pred).getAndNode().getLeftOperand();
            if (firstColumn != null && leftOpnd instanceof LikeEscapeOperatorNode)
            {
              LikeEscapeOperatorNode likeNode = (LikeEscapeOperatorNode) leftOpnd;
              if (likeNode.getLeftOperand().requiresTypeFromContext())
              {
                ValueNode receiver = ((TernaryOperatorNode) likeNode).getReceiver();
                if (receiver instanceof ColumnReference)
                {
                  ColumnReference cr = (ColumnReference) receiver;
                  if (cr.getTableNumber() == firstColumn.getTableNumber() &&
                    cr.getColumnNumber() == firstColumn.getColumnNumber())
                    extraFirstColumnSelectivity *= 0.2;
                }
              }
            }
          }

          if (pred.isQualifier())
          {
            extraQualifierSelectivity *= pred.selectivity(this);
            numExtraQualifiers++;
          }
          else
          {
            extraNonQualifierSelectivity *= pred.selectivity(this);
            numExtraNonQualifiers++;
          }

          /*
          ** Strictly speaking, it shouldn't be necessary to
          ** indicate a gap here, since there should be no more
          ** start/stop predicates, but let's do it, anyway.
          */
          startGap = true;
          stopGap = true;
        }
      }

      if (unknownPredicateList != null)
      {
        statCompositeSelectivity = unknownPredicateList.selectivity(this);
        if (statCompositeSelectivity == -1.0d)
          statCompositeSelectivity = 1.0d;
      }

      if (seenFirstColumn && statisticsForConglomerate &&
        (startStopPredCount > 0))
      {
        statStartStopSelectivity =
          tableDescriptor.selectivityForConglomerate(cd, startStopPredCount);
      }

      /*
      ** Factor the non-base-table predicates into the extra
      ** non-qualifier selectivity, since these will restrict the
      ** number of rows, but not the cost.
      */
      extraNonQualifierSelectivity *=
        currentJoinStrategy.nonBasePredicateSelectivity(this, predList);

      /* Create the start and stop key arrays, and fill them in */
      DataValueDescriptor[] startKeys;
      DataValueDescriptor[] stopKeys;

      if (startKeyNum > 0)
        startKeys = new DataValueDescriptor[startKeyNum];
      else
        startKeys = null;

      if (stopKeyNum > 0)
        stopKeys = new DataValueDescriptor[stopKeyNum];
      else
        stopKeys = null;

      startKeyNum = 0;
      stopKeyNum = 0;
      startGap = false;
      stopGap = false;

      for (int i = 0; i < predListSize; i++)
      {
        pred = baseTableRestrictionList.getOptPredicate(i);
        boolean startKey = pred.isStartKey();
        boolean stopKey = pred.isStopKey();

        if (startKey || stopKey)
        {
          boolean knownConstant = pred.compareWithKnownConstant(this, true);

          if (startKey)
          {
            if (knownConstant && ( ! startGap ) )
            {
              startKeys[startKeyNum] = pred.getCompareValue(this);
              startKeyNum++;
            }
            else
            {
              startGap = true;
            }
          }

          if (stopKey)
          {
            if (knownConstant && ( ! stopGap ) )
            {
              stopKeys[stopKeyNum] = pred.getCompareValue(this);
              stopKeyNum++;
            }
            else
            {
              stopGap = true;
            }
          }
        }
        else
        {
          startGap = true;
          stopGap = true;
        }
      }

      int startOperator;
      int stopOperator;

      if (baseTableRestrictionList != null)
      {
        startOperator = baseTableRestrictionList.startOperator(this);
        stopOperator = baseTableRestrictionList.stopOperator(this);
      }
      else
      {
        /*
        ** If we're doing a full scan, it doesn't matter what the
        ** start and stop operators are.
        */
        startOperator = ScanController.NA;
        stopOperator = ScanController.NA;
      }

      /*
      ** Get a row template for this conglomerate.  For now, just tell
      ** it we are using all the columns in the row.
      */
      DataValueDescriptor[] rowTemplate =
                getRowTemplate(cd, getBaseCostController());

      /* we prefer index than table scan for concurrency reason, by a small
       * adjustment on estimated row count.  This affects optimizer's decision
       * especially when few rows are in table. beetle 5006. This makes sense
       * since the plan may stay long before we actually check and invalidate it.
       * And new rows may be inserted before we check and invalidate the plan.
       * Here we only prefer index that has start/stop key from predicates. Non-
       * constant start/stop key case is taken care of by selectivity later.
       */
      long baseRC = (startKeys != null || stopKeys != null) ? baseRowCount() : baseRowCount() + 5;

      scc.getScanCost(
          currentJoinStrategy.scanCostType(),
          baseRC,
                    1,
          forUpdate(),
          (FormatableBitSet) null,
          rowTemplate,
          startKeys,
          startOperator,
          stopKeys,
          stopOperator,
          false,
          0,
          costEstimate);

      /* initialPositionCost is the first part of the index scan cost we get above.
       * It's the cost of initial positioning/fetch of key.  So it's unrelated to
       * row count of how many rows we fetch from index.  We extract it here so that
       * we only multiply selectivity to the other part of index scan cost, which is
       * nearly linear, to make cost calculation more accurate and fair, especially
       * compared to the plan of "one row result set" (unique index). beetle 4787.
       */
      double initialPositionCost = 0.0;
      if (cd.isIndex())
      {
        initialPositionCost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
        /* oneRowResultSetForSomeConglom means there's a unique index, but certainly
         * not this one since we are here.  If store knows this non-unique index
         * won't return any row or just returns one row (eg., the predicate is a
         * comparison with constant or almost empty table), we do minor adjustment
         * on cost (affecting decision for covering index) and rc (decision for
         * non-covering). The purpose is favoring unique index. beetle 5006.
         */
        if (oneRowResultSetForSomeConglom && costEstimate.rowCount() <= 1)
        {
          costEstimate.setCost(costEstimate.getEstimatedCost() * 2,
                     costEstimate.rowCount() + 2,
                     costEstimate.singleScanRowCount() + 2);
        }
      }

      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN1,
              tableNumber, 0, 0.0, cd);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN2,
              tableNumber, 0, 0.0, costEstimate);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN3,
              numExtraFirstColumnPreds, 0,
              extraFirstColumnSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN4,
              numExtraStartStopPreds, 0,
              extraStartStopSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN7,
              startStopPredCount, 0,
              statStartStopSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN5,
              numExtraQualifiers, 0,
              extraQualifierSelectivity, null);
      optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN6,
              numExtraNonQualifiers, 0,
              extraNonQualifierSelectivity, null);

      /* initial row count is the row count without applying
         any predicates-- we use this at the end of the routine
         when we use statistics to recompute the row count.
      */
      double initialRowCount = costEstimate.rowCount();

      if (statStartStopSelectivity != 1.0d)
      {
        /*
        ** If statistics exist use the selectivity computed
        ** from the statistics to calculate the cost.
        ** NOTE: we apply this selectivity to the cost as well
        ** as both the row counts. In the absence of statistics
        ** we only applied the FirstColumnSelectivity to the
        ** cost.
        */
        costEstimate.setCost(
               scanCostAfterSelectivity(costEstimate.getEstimatedCost(),
                            initialPositionCost,
                            statStartStopSelectivity,
                            oneRowResultSetForSomeConglom),
               costEstimate.rowCount() * statStartStopSelectivity,
               costEstimate.singleScanRowCount() *
               statStartStopSelectivity);
        optimizer.trace(Optimizer.COST_INCLUDING_STATS_FOR_INDEX,
                tableNumber, 0, 0.0, costEstimate);

      }
      else
      {
        /*
        ** Factor in the extra selectivity on the first column
        ** of the conglomerate (see comment above).
        ** NOTE: In this case we want to apply the selectivity to both
        ** the total row count and singleScanRowCount.
        */
        if (extraFirstColumnSelectivity != 1.0d)
        {
          costEstimate.setCost(
             scanCostAfterSelectivity(costEstimate.getEstimatedCost(),
                          initialPositionCost,
                          extraFirstColumnSelectivity,
                          oneRowResultSetForSomeConglom),
             costEstimate.rowCount() * extraFirstColumnSelectivity,
             costEstimate.singleScanRowCount() * extraFirstColumnSelectivity);
         
          optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_1ST_COL_SELECTIVITY,
                  tableNumber, 0, 0.0, costEstimate);
        }

        /* Factor in the extra start/stop selectivity (see comment above).
         * NOTE: In this case we want to apply the selectivity to both
         * the row count and singleScanRowCount.
         */
        if (extraStartStopSelectivity != 1.0d)
        {
          costEstimate.setCost(
            costEstimate.getEstimatedCost(),
            costEstimate.rowCount() * extraStartStopSelectivity,
            costEstimate.singleScanRowCount() * extraStartStopSelectivity);

          optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_START_STOP,
                  tableNumber, 0, 0.0, costEstimate);
        }
      }

      /*
      ** Figure out whether to do row locking or table locking.
      **
      ** If there are no start/stop predicates, we're doing full
      ** conglomerate scans, so do table locking.
      */
      if (! startStopFound)
      {
        currentAccessPath.setLockMode(
                      TransactionController.MODE_TABLE);

        optimizer.trace(Optimizer.TABLE_LOCK_NO_START_STOP,
                  0, 0, 0.0, null);
      }
      else
      {
        /*
        ** Figure out the number of rows touched.  If all the
        ** start/stop predicates are constant, the number of
        ** rows touched is the number of rows per scan.
        ** This is also true for join strategies that scan the
        ** inner table only once (like hash join) - we can
        ** tell if we have one of those, because
        ** multiplyBaseCostByOuterRows() will return false.
        */
        double rowsTouched = costEstimate.rowCount();

        if ( (! constantStartStop) &&
           currentJoinStrategy.multiplyBaseCostByOuterRows())
        {
          /*
          ** This is a join where the inner table is scanned
          ** more than once, so we have to take the number
          ** of outer rows into account.  The formula for this
          ** works out as follows:
          **
          **  total rows in table = r
          **  number of rows touched per scan = s
          **  number of outer rows = o
          **  proportion of rows touched per scan = s / r
          **  proportion of rows not touched per scan =
          **                    1 - (s / r)
          **  proportion of rows not touched for all scans =
          **                  (1 - (s / r)) ** o
          **  proportion of rows touched for all scans =
          **                  1 - ((1 - (s / r)) ** o)
          **  total rows touched for all scans =
          **              r * (1 - ((1 - (s / r)) ** o))
          **
          ** In doing these calculations, we must be careful not
          ** to divide by zero.  This could happen if there are
          ** no rows in the table.  In this case, let's do table
          ** locking.
          */
          double r = baseRowCount();
          if (r > 0.0)
          {
            double s = costEstimate.rowCount();
            double o = outerCost.rowCount();
            double pRowsNotTouchedPerScan = 1.0 - (s / r);
            double pRowsNotTouchedAllScans =
                    Math.pow(pRowsNotTouchedPerScan, o);
            double pRowsTouchedAllScans =
                    1.0 - pRowsNotTouchedAllScans;
            double rowsTouchedAllScans =
                    r * pRowsTouchedAllScans;

            rowsTouched = rowsTouchedAllScans;
          }
          else
          {
            /* See comments in setLockingBasedOnThreshold */
            rowsTouched = optimizer.tableLockThreshold() + 1;
          }
        }

        setLockingBasedOnThreshold(optimizer, rowsTouched);
      }

      /*
      ** If the index isn't covering, add the cost of getting the
      ** base row.  Only apply extraFirstColumnSelectivity and extraStartStopSelectivity
      ** before we do this, don't apply extraQualifierSelectivity etc.  The
      ** reason is that the row count here should be the number of index rows
      ** (and hence heap rows) we get, and we need to fetch all those rows, even
      ** though later on some of them may be filtered out by other predicates.
      ** beetle 4787.
      */
      if (cd.isIndex() && ( ! isCoveringIndex(cd) ) )
      {
        double singleFetchCost =
            getBaseCostController().getFetchFromRowLocationCost(
                                (FormatableBitSet) null,
                                0);

        cost = singleFetchCost * costEstimate.rowCount();

        costEstimate.setEstimatedCost(
                costEstimate.getEstimatedCost() + cost);

        optimizer.trace(Optimizer.COST_OF_NONCOVERING_INDEX,
                tableNumber, 0, 0.0, costEstimate);
      }

      /* Factor in the extra qualifier selectivity (see comment above).
       * NOTE: In this case we want to apply the selectivity to both
       * the row count and singleScanRowCount.
       */
      if (extraQualifierSelectivity != 1.0d)
      {
        costEstimate.setCost(
            costEstimate.getEstimatedCost(),
            costEstimate.rowCount() * extraQualifierSelectivity,
            costEstimate.singleScanRowCount() * extraQualifierSelectivity);

        optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_QUALIFIER_SELECTIVITY,
                tableNumber, 0, 0.0, costEstimate);
      }

      singleScanRowCount = costEstimate.singleScanRowCount();

      /*
      ** Let the join strategy decide whether the cost of the base
      ** scan is a single scan, or a scan per outer row.
      ** NOTE: In this case we only want to multiply against the
      ** total row count, not the singleScanRowCount.
      ** NOTE: Do not multiply row count if we determined that
      ** conglomerate is a 1 row result set when costing nested
      ** loop.  (eg, we will find at most 1 match when probing
      ** the hash table.)
      */
      double newCost = costEstimate.getEstimatedCost();
      double rowCount = costEstimate.rowCount();

      /*
      ** RESOLVE - If there is a unique index on the joining
      ** columns, the number of matching rows will equal the
      ** number of outer rows, even if we're not considering the
      ** unique index for this access path. To figure that out,
      ** however, would require an analysis phase at the beginning
      ** of optimization. So, we'll always multiply the number
      ** of outer rows by the number of rows per scan. This will
      ** give us a higher than actual row count when there is
      ** such a unique index, which will bias the optimizer toward
      ** using the unique index. This is probably OK most of the
      ** time, since the optimizer would probably choose the
      ** unique index, anyway. But it would be better if the
      ** optimizer set the row count properly in this case.
      */
      if (currentJoinStrategy.multiplyBaseCostByOuterRows())
      {
        newCost *= outerCost.rowCount();
      }

      rowCount *= outerCost.rowCount();
      initialRowCount *= outerCost.rowCount();


      /*
      ** If this table can generate at most one row per scan,
      ** the maximum row count is the number of outer rows.
      ** NOTE: This does not completely take care of the RESOLVE
      ** in the above comment, since it will only notice
      ** one-row result sets for the current join order.
      */
      if (oneRowResultSetForSomeConglom)
      {
        if (outerCost.rowCount() < rowCount)
        {
          rowCount = outerCost.rowCount();
        }
      }

      /*
      ** The estimated cost may be too high for indexes, if the
      ** estimated row count exceeds the maximum. Only do this
      ** if we're not doing a full scan, and the start/stop position
      ** is not constant (i.e. we're doing a join on the first column
      ** of the index) - the reason being that this is when the
      ** cost may be inaccurate.
      */
      if (cd.isIndex() && startStopFound && ( ! constantStartStop ) )
      {
        /*
        ** Does any table outer to this one have a unique key on
        ** a subset of the joining columns? If so, the maximum number
        ** of rows that this table can return is the number of rows
        ** in this table times the number of times the maximum number
        ** of times each key can be repeated.
        */
        double scanUniquenessFactor =
          optimizer.uniqueJoinWithOuterTable(baseTableRestrictionList);
        if (scanUniquenessFactor > 0.0)
        {
          /*
          ** A positive uniqueness factor means there is a unique
          ** outer join key. The value is the reciprocal of the
          ** maximum number of duplicates for each unique key
          ** (the duplicates can be caused by other joining tables).
          */
          double maxRows =
              ((double) baseRowCount()) / scanUniquenessFactor;
          if (rowCount > maxRows)
          {
            /*
            ** The estimated row count is too high. Adjust the
            ** estimated cost downwards proportionately to
            ** match the maximum number of rows.
            */
            newCost *= (maxRows / rowCount);
          }
        }
      }

      /* The estimated total row count may be too high */
      if (tableUniquenessFactor > 0.0)
      {
        /*
        ** A positive uniqueness factor means there is a unique outer
        ** join key. The value is the reciprocal of the maximum number
        ** of duplicates for each unique key (the duplicates can be
        ** caused by other joining tables).
        */
        double maxRows =
              ((double) baseRowCount()) / tableUniquenessFactor;
        if (rowCount > maxRows)
        {
          /*
          ** The estimated row count is too high. Set it to the
          ** maximum row count.
          */
          rowCount = maxRows;
        }
      }

      costEstimate.setCost(
        newCost,
        rowCount,
        costEstimate.singleScanRowCount());


      optimizer.trace(Optimizer.COST_OF_N_SCANS,
              tableNumber, 0, outerCost.rowCount(), costEstimate);

      /*
      ** Now figure in the cost of the non-qualifier predicates.
      ** existsBaseTables have a row count of 1
      */
      double rc = -1, src = -1;
      if (existsBaseTable)
        rc = src = 1;
      // don't factor in extraNonQualifierSelectivity in case of oneRowResultSetForSomeConglom
      // because "1" is the final result and the effect of other predicates already considered
      // beetle 4787
      else if (extraNonQualifierSelectivity != 1.0d)
      {
        rc = oneRowResultSetForSomeConglom ? costEstimate.rowCount() :
                      costEstimate.rowCount() * extraNonQualifierSelectivity;
        src = costEstimate.singleScanRowCount() * extraNonQualifierSelectivity;
      }
      if (rc != -1) // changed
      {
        costEstimate.setCost(costEstimate.getEstimatedCost(), rc, src);
        optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_NONQUALIFIER_SELECTIVITY,
                tableNumber, 0, 0.0, costEstimate);
      }
     
    recomputeRowCount:
      if (statisticsForTable && !oneRowResultSetForSomeConglom &&
        (statCompositeSelectivity != 1.0d))
      {
        /* if we have statistics we should use statistics to calculate
           row  count-- if it has been determined that this table
           returns one row for some conglomerate then there is no need
           to do this recalculation
        */

        double compositeStatRC = initialRowCount * statCompositeSelectivity;
        optimizer.trace(Optimizer.COMPOSITE_SEL_FROM_STATS,
                0, 0, statCompositeSelectivity, null);


        if (tableUniquenessFactor > 0.0)
        {
          /* If the row count from the composite statistics
             comes up more than what the table uniqueness
             factor indicates then lets stick with the current
             row count.
          */
          if (compositeStatRC > (baseRowCount() *
                       tableUniquenessFactor))
           
          {
           
            break recomputeRowCount;
          }
        }
       
        /* set the row count and the single scan row count
           to the initialRowCount. initialRowCount is the product
           of the RC from store * RC of the outerCost.
           Thus RC = initialRowCount * the selectivity from stats.
           SingleRC = RC / outerCost.rowCount().
        */
        costEstimate.setCost(costEstimate.getEstimatedCost(),
                   compositeStatRC,
                   (existsBaseTable) ?
                   1 :
                   compositeStatRC / outerCost.rowCount());
       
        optimizer.trace(Optimizer.COST_INCLUDING_COMPOSITE_SEL_FROM_STATS,
                tableNumber, 0, 0.0, costEstimate);
      }
    }

    /* Put the base predicates back in the predicate list */
    currentJoinStrategy.putBasePredicates(predList,
                     baseTableRestrictionList);
    return costEstimate;
  }
View Full Code Here

  {
    ResultSetNode  retval;
    AccessPath ap = getTrulyTheBestAccessPath();
    ConglomerateDescriptor trulyTheBestConglomerateDescriptor =
                         ap.getConglomerateDescriptor();
    JoinStrategy trulyTheBestJoinStrategy = ap.getJoinStrategy();
    Optimizer optimizer = ap.getOptimizer();

    optimizer.trace(Optimizer.CHANGING_ACCESS_PATH_FOR_TABLE,
            tableNumber, 0, 0.0, null);

    if (SanityManager.DEBUG)
    {
      SanityManager.ASSERT(
        trulyTheBestConglomerateDescriptor != null,
        "Should only modify access path after conglomerate has been chosen.");
    }

    /*
    ** Make sure user-specified bulk fetch is OK with the chosen join
    ** strategy.
    */
    if (bulkFetch != UNSET)
    {
      if ( ! trulyTheBestJoinStrategy.bulkFetchOK())
      {
        throw StandardException.newException(SQLState.LANG_INVALID_BULK_FETCH_WITH_JOIN_TYPE,
                      trulyTheBestJoinStrategy.getName());
      }
      // bulkFetch has no meaning for hash join, just ignore it
      else if (trulyTheBestJoinStrategy.ignoreBulkFetch())
      {
        disableBulkFetch();
      }
      // bug 4431 - ignore bulkfetch property if it's 1 row resultset
      else if (isOneRowResultSet())
      {
        disableBulkFetch();
      }
    }

    // bulkFetch = 1 is the same as no bulk fetch
    if (bulkFetch == 1)
    {
      disableBulkFetch();
    }

    /* Remove any redundant join clauses.  A redundant join clause is one
     * where there are other join clauses in the same equivalence class
     * after it in the PredicateList.
     */
    restrictionList.removeRedundantPredicates();

    /*
    ** Divide up the predicates for different processing phases of the
    ** best join strategy.
    */
    storeRestrictionList = (PredicateList) getNodeFactory().getNode(
                          C_NodeTypes.PREDICATE_LIST,
                          getContextManager());
    nonStoreRestrictionList = (PredicateList) getNodeFactory().getNode(
                          C_NodeTypes.PREDICATE_LIST,
                          getContextManager());
    requalificationRestrictionList =
                  (PredicateList) getNodeFactory().getNode(
                          C_NodeTypes.PREDICATE_LIST,
                          getContextManager());
    trulyTheBestJoinStrategy.divideUpPredicateLists(
                      this,
                      restrictionList,
                      storeRestrictionList,
                      nonStoreRestrictionList,
                      requalificationRestrictionList,
                      getDataDictionary());

    /*
    ** Consider turning on bulkFetch if it is turned
    ** off.  Only turn it on if it is a not an updatable
    ** scan and if it isn't a oneRowResultSet, and
    ** not a subquery, and it is OK to use bulk fetch
    ** with the chosen join strategy.  NOTE: the subquery logic
    ** could be more sophisticated -- we are taking
    ** the safe route in avoiding reading extra
    ** data for something like:
    **
    **  select x from t where x in (select y from t)
     **
    ** In this case we want to stop the subquery
    ** evaluation as soon as something matches.
    */
    if (trulyTheBestJoinStrategy.bulkFetchOK() &&
      !(trulyTheBestJoinStrategy.ignoreBulkFetch()) &&
      ! bulkFetchTurnedOff &&
      (bulkFetch == UNSET) &&
      !forUpdate() &&
      !isOneRowResultSet() &&
      getLevel() == 0)
View Full Code Here

      generateRefActionDependentTableScan(acb, mb);
      return;

    }
 
    JoinStrategy trulyTheBestJoinStrategy =
      getTrulyTheBestAccessPath().getJoinStrategy();

    // the table scan generator is what we return
    acb.pushGetResultSetFactoryExpression(mb);

    int nargs = getScanArguments(acb, mb);

    mb.callMethod(VMOpcode.INVOKEINTERFACE, (String) null,
      trulyTheBestJoinStrategy.resultSetMethodName(bulkFetch != UNSET),
      ClassName.NoPutResultSet, nargs);

    /* If this table is the target of an update or a delete, then we must
     * wrap the Expression up in an assignment expression before
     * returning.
 
View Full Code Here

        indexColItem = acb.addItem(indexCols);
      }
    }

        AccessPath ap = getTrulyTheBestAccessPath();
    JoinStrategy trulyTheBestJoinStrategy =  ap.getJoinStrategy();

    /*
    ** We can only do bulkFetch on NESTEDLOOP
    */
    if (SanityManager.DEBUG)
    {
      if ( ( ! trulyTheBestJoinStrategy.bulkFetchOK()) &&
        (bulkFetch != UNSET))
      {
        SanityManager.THROWASSERT("bulkFetch should not be set "+
                "for the join strategy " +
                trulyTheBestJoinStrategy.getName());
      }
    }

    int nargs = trulyTheBestJoinStrategy.getScanArgs(
                      getLanguageConnectionContext().getTransactionCompile(),
                      mb,
                      this,
                      storeRestrictionList,
                      nonStoreRestrictionList,
View Full Code Here

TOP

Related Classes of org.apache.derby.iapi.sql.compile.JoinStrategy

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.