}
}
}
AccessPath currentAccessPath = getCurrentAccessPath();
JoinStrategy currentJoinStrategy =
currentAccessPath.getJoinStrategy();
optimizer.trace(Optimizer.ESTIMATING_COST_OF_CONGLOMERATE,
tableNumber, 0, 0.0, cd);
/* Get the uniqueness factory for later use (see below) */
double tableUniquenessFactor =
optimizer.uniqueJoinWithOuterTable(predList);
boolean oneRowResultSetForSomeConglom = isOneRowResultSet(predList);
/* Get the predicates that can be used for scanning the base table */
baseTableRestrictionList.removeAllElements();
currentJoinStrategy.getBasePredicates(predList,
baseTableRestrictionList,
this);
/* RESOLVE: Need to figure out how to cache the StoreCostController */
StoreCostController scc = getStoreCostController(cd);
CostEstimate costEstimate = getScratchCostEstimate(optimizer);
/* First, get the cost for one scan */
/* Does the conglomerate match at most one row? */
if (isOneRowResultSet(cd, baseTableRestrictionList))
{
/*
** Tell the RowOrdering that this optimizable is always ordered.
** It will figure out whether it is really always ordered in the
** context of the outer tables and their orderings.
*/
rowOrdering.optimizableAlwaysOrdered(this);
singleScanRowCount = 1.0;
/* Yes, the cost is to fetch exactly one row */
// RESOLVE: NEED TO FIGURE OUT HOW TO GET REFERENCED COLUMN LIST,
// FIELD STATES, AND ACCESS TYPE
cost = scc.getFetchFromFullKeyCost(
(FormatableBitSet) null,
0);
optimizer.trace(Optimizer.MATCH_SINGLE_ROW_COST,
tableNumber, 0, cost, null);
costEstimate.setCost(cost, 1.0d, 1.0d);
/*
** Let the join strategy decide whether the cost of the base
** scan is a single scan, or a scan per outer row.
** NOTE: The multiplication should only be done against the
** total row count, not the singleScanRowCount.
*/
double newCost = costEstimate.getEstimatedCost();
if (currentJoinStrategy.multiplyBaseCostByOuterRows())
{
newCost *= outerCost.rowCount();
}
costEstimate.setCost(
newCost,
costEstimate.rowCount() * outerCost.rowCount(),
costEstimate.singleScanRowCount());
/*
** Choose the lock mode. If the start/stop conditions are
** constant, choose row locking, because we will always match
** the same row. If they are not constant (i.e. they include
** a join), we decide whether to do row locking based on
** the total number of rows for the life of the query.
*/
boolean constantStartStop = true;
for (int i = 0; i < predList.size(); i++)
{
OptimizablePredicate pred = predList.getOptPredicate(i);
/*
** The predicates are in index order, so the start and
** stop keys should be first.
*/
if ( ! (pred.isStartKey() || pred.isStopKey()))
{
break;
}
/* Stop when we've found a join */
if ( ! pred.getReferencedMap().hasSingleBitSet())
{
constantStartStop = false;
break;
}
}
if (constantStartStop)
{
currentAccessPath.setLockMode(
TransactionController.MODE_RECORD);
optimizer.trace(Optimizer.ROW_LOCK_ALL_CONSTANT_START_STOP,
0, 0, 0.0, null);
}
else
{
setLockingBasedOnThreshold(optimizer, costEstimate.rowCount());
}
optimizer.trace(Optimizer.COST_OF_N_SCANS,
tableNumber, 0, outerCost.rowCount(), costEstimate);
/* Add in cost of fetching base row for non-covering index */
if (cd.isIndex() && ( ! isCoveringIndex(cd) ) )
{
double singleFetchCost =
getBaseCostController().getFetchFromRowLocationCost(
(FormatableBitSet) null,
0);
// The estimated row count is always 1 here, although the
// index scan may actually return 0 rows, depending on whether
// or not the predicates match a key. It is assumed that a
// match is more likely than a miss, hence the row count is 1.
// Note (DERBY-6011): Alternative (non-unique) indexes may come
// up with row counts lower than 1 because they multiply with
// the selectivity, especially if the table is almost empty.
// This makes the optimizer prefer non-unique indexes if there
// are not so many rows in the table. We still want to use the
// unique index in that case, as the performance difference
// between the different scans on a small table is small, and
// the unique index is likely to lock fewer rows and reduce
// the chance of deadlocks. Therefore, we compensate by
// making the row count at least 1 for the non-unique index.
// See reference to DERBY-6011 further down in this method.
cost = singleFetchCost * costEstimate.rowCount();
costEstimate.setEstimatedCost(
costEstimate.getEstimatedCost() + cost);
optimizer.trace(Optimizer.NON_COVERING_INDEX_COST,
tableNumber, 0, cost, null);
}
}
else
{
/* Conglomerate might match more than one row */
/*
** Some predicates are good for start/stop, but we don't know
** the values they are being compared to at this time, so we
** estimate their selectivity in language rather than ask the
** store about them . The predicates on the first column of
** the conglomerate reduce the number of pages and rows scanned.
** The predicates on columns after the first reduce the number
** of rows scanned, but have a much smaller effect on the number
** of pages scanned, so we keep track of these selectivities in
** two separate variables: extraFirstColumnSelectivity and
** extraStartStopSelectivity. (Theoretically, we could try to
** figure out the effect of predicates after the first column
** on the number of pages scanned, but it's too hard, so we
** use these predicates only to reduce the estimated number of
** rows. For comparisons with known values, though, the store
** can figure out exactly how many rows and pages are scanned.)
**
** Other predicates are not good for start/stop. We keep track
** of their selectvities separately, because these limit the
** number of rows, but not the number of pages, and so need to
** be factored into the row count but not into the cost.
** These selectivities are factored into extraQualifierSelectivity.
**
** statStartStopSelectivity (using statistics) represents the
** selectivity of start/stop predicates that can be used to scan
** the index. If no statistics exist for the conglomerate then
** the value of this variable remains at 1.0
**
** statCompositeSelectivity (using statistics) represents the
** selectivity of all the predicates (including NonBaseTable
** predicates). This represents the most educated guess [among
** all the wild surmises in this routine] as to the number
** of rows that will be returned from this joinNode.
** If no statistics exist on the table or no statistics at all
** can be found to satisfy the predicates at this join opertor,
** then statCompositeSelectivity is left initialized at 1.0
*/
double extraFirstColumnSelectivity = 1.0d;
double extraStartStopSelectivity = 1.0d;
double extraQualifierSelectivity = 1.0d;
double extraNonQualifierSelectivity = 1.0d;
double statStartStopSelectivity = 1.0d;
double statCompositeSelectivity = 1.0d;
int numExtraFirstColumnPreds = 0;
int numExtraStartStopPreds = 0;
int numExtraQualifiers = 0;
int numExtraNonQualifiers = 0;
/*
** It is possible for something to be a start or stop predicate
** without it being possible to use it as a key for cost estimation.
** For example, with an index on (c1, c2), and the predicate
** c1 = othertable.c3 and c2 = 1, the comparison on c1 is with
** an unknown value, so we can't pass it to the store. This means
** we can't pass the comparison on c2 to the store, either.
**
** The following booleans keep track of whether we have seen
** gaps in the keys we can pass to the store.
*/
boolean startGap = false;
boolean stopGap = false;
boolean seenFirstColumn = false;
/*
** We need to figure out the number of rows touched to decide
** whether to use row locking or table locking. If the start/stop
** conditions are constant (i.e. no joins), the number of rows
** touched is the number of rows per scan. But if the start/stop
** conditions contain a join, the number of rows touched must
** take the number of outer rows into account.
*/
boolean constantStartStop = true;
boolean startStopFound = false;
/* Count the number of start and stop keys */
int startKeyNum = 0;
int stopKeyNum = 0;
OptimizablePredicate pred;
int predListSize;
if (predList != null)
predListSize = baseTableRestrictionList.size();
else
predListSize = 0;
int startStopPredCount = 0;
ColumnReference firstColumn = null;
for (int i = 0; i < predListSize; i++)
{
pred = baseTableRestrictionList.getOptPredicate(i);
boolean startKey = pred.isStartKey();
boolean stopKey = pred.isStopKey();
if (startKey || stopKey)
{
startStopFound = true;
if ( ! pred.getReferencedMap().hasSingleBitSet())
{
constantStartStop = false;
}
boolean knownConstant =
pred.compareWithKnownConstant(this, true);
if (startKey)
{
if (knownConstant && ( ! startGap ) )
{
startKeyNum++;
if (unknownPredicateList != null)
unknownPredicateList.removeOptPredicate(pred);
}
else
{
startGap = true;
}
}
if (stopKey)
{
if (knownConstant && ( ! stopGap ) )
{
stopKeyNum++;
if (unknownPredicateList != null)
unknownPredicateList.removeOptPredicate(pred);
}
else
{
stopGap = true;
}
}
/* If either we are seeing startGap or stopGap because start/stop key is
* comparison with non-constant, we should multiply the selectivity to
* extraFirstColumnSelectivity. Beetle 4787.
*/
if (startGap || stopGap)
{
// Don't include redundant join predicates in selectivity calculations
if (baseTableRestrictionList.isRedundantPredicate(i))
continue;
if (startKey && stopKey)
startStopPredCount++;
if (pred.getIndexPosition() == 0)
{
extraFirstColumnSelectivity *=
pred.selectivity(this);
if (! seenFirstColumn)
{
ValueNode relNode = ((Predicate) pred).getAndNode().getLeftOperand();
if (relNode instanceof BinaryRelationalOperatorNode)
firstColumn = ((BinaryRelationalOperatorNode) relNode).getColumnOperand(this);
seenFirstColumn = true;
}
}
else
{
extraStartStopSelectivity *= pred.selectivity(this);
numExtraStartStopPreds++;
}
}
}
else
{
// Don't include redundant join predicates in selectivity calculations
if (baseTableRestrictionList.isRedundantPredicate(i))
{
continue;
}
/* If we have "like" predicate on the first index column, it is more likely
* to have a smaller range than "between", so we apply extra selectivity 0.2
* here. beetle 4387, 4787.
*/
if (pred instanceof Predicate)
{
ValueNode leftOpnd = ((Predicate) pred).getAndNode().getLeftOperand();
if (firstColumn != null && leftOpnd instanceof LikeEscapeOperatorNode)
{
LikeEscapeOperatorNode likeNode = (LikeEscapeOperatorNode) leftOpnd;
if (likeNode.getLeftOperand().requiresTypeFromContext())
{
ValueNode receiver = ((TernaryOperatorNode) likeNode).getReceiver();
if (receiver instanceof ColumnReference)
{
ColumnReference cr = (ColumnReference) receiver;
if (cr.getTableNumber() == firstColumn.getTableNumber() &&
cr.getColumnNumber() == firstColumn.getColumnNumber())
extraFirstColumnSelectivity *= 0.2;
}
}
}
}
if (pred.isQualifier())
{
extraQualifierSelectivity *= pred.selectivity(this);
numExtraQualifiers++;
}
else
{
extraNonQualifierSelectivity *= pred.selectivity(this);
numExtraNonQualifiers++;
}
/*
** Strictly speaking, it shouldn't be necessary to
** indicate a gap here, since there should be no more
** start/stop predicates, but let's do it, anyway.
*/
startGap = true;
stopGap = true;
}
}
if (unknownPredicateList != null)
{
statCompositeSelectivity = unknownPredicateList.selectivity(this);
if (statCompositeSelectivity == -1.0d)
statCompositeSelectivity = 1.0d;
}
if (seenFirstColumn && (startStopPredCount > 0))
{
if (statisticsForConglomerate) {
statStartStopSelectivity =
tableDescriptor.selectivityForConglomerate(cd,
startStopPredCount);
} else if (cd.isIndex()) {
//DERBY-3790 (Investigate if request for update
// statistics can be skipped for certain kind of
// indexes, one instance may be unique indexes based
// on one column.) But as found in DERBY-6045 (in list
// multi-probe by primary key not chosen on tables with
// >256 rows), even though we do not keep the
// statistics for single-column unique indexes, we
// should improve the selectivity of such an index
// when the index is being considered by the optimizer.
IndexRowGenerator irg = cd.getIndexDescriptor();
if (irg.isUnique()
&& irg.numberOfOrderedColumns() == 1
&& startStopPredCount == 1) {
statStartStopSelectivity =
(double)(1/(double)baseRowCount());
}
}
}
/*
** Factor the non-base-table predicates into the extra
** non-qualifier selectivity, since these will restrict the
** number of rows, but not the cost.
*/
extraNonQualifierSelectivity *=
currentJoinStrategy.nonBasePredicateSelectivity(this, predList);
/* Create the start and stop key arrays, and fill them in */
DataValueDescriptor[] startKeys;
DataValueDescriptor[] stopKeys;
if (startKeyNum > 0)
startKeys = new DataValueDescriptor[startKeyNum];
else
startKeys = null;
if (stopKeyNum > 0)
stopKeys = new DataValueDescriptor[stopKeyNum];
else
stopKeys = null;
startKeyNum = 0;
stopKeyNum = 0;
startGap = false;
stopGap = false;
/* If we have a probe predicate that is being used as a start/stop
* key then ssKeySourceInList will hold the InListOperatorNode
* from which the probe predicate was built.
*/
InListOperatorNode ssKeySourceInList = null;
for (int i = 0; i < predListSize; i++)
{
pred = baseTableRestrictionList.getOptPredicate(i);
boolean startKey = pred.isStartKey();
boolean stopKey = pred.isStopKey();
if (startKey || stopKey)
{
/* A probe predicate is only useful if it can be used as
* as a start/stop key for _first_ column in an index
* (i.e. if the column position is 0). That said, we only
* allow a single start/stop key per column position in
* the index (see PredicateList.orderUsefulPredicates()).
* Those two facts combined mean that we should never have
* more than one probe predicate start/stop key for a given
* conglomerate.
*/
if (SanityManager.DEBUG)
{
if ((ssKeySourceInList != null) &&
((Predicate)pred).isInListProbePredicate())
{
SanityManager.THROWASSERT(
"Found multiple probe predicate start/stop keys" +
" for conglomerate '" + cd.getConglomerateName() +
"' when at most one was expected.");
}
}
/* By passing "true" in the next line we indicate that we
* should only retrieve the underlying InListOpNode *if*
* the predicate is a "probe predicate".
*/
ssKeySourceInList = ((Predicate)pred).getSourceInList(true);
boolean knownConstant = pred.compareWithKnownConstant(this, true);
if (startKey)
{
if (knownConstant && ( ! startGap ) )
{
startKeys[startKeyNum] = pred.getCompareValue(this);
startKeyNum++;
}
else
{
startGap = true;
}
}
if (stopKey)
{
if (knownConstant && ( ! stopGap ) )
{
stopKeys[stopKeyNum] = pred.getCompareValue(this);
stopKeyNum++;
}
else
{
stopGap = true;
}
}
}
else
{
startGap = true;
stopGap = true;
}
}
int startOperator;
int stopOperator;
if (baseTableRestrictionList != null)
{
startOperator = baseTableRestrictionList.startOperator(this);
stopOperator = baseTableRestrictionList.stopOperator(this);
}
else
{
/*
** If we're doing a full scan, it doesn't matter what the
** start and stop operators are.
*/
startOperator = ScanController.NA;
stopOperator = ScanController.NA;
}
/*
** Get a row template for this conglomerate. For now, just tell
** it we are using all the columns in the row.
*/
DataValueDescriptor[] rowTemplate =
getRowTemplate(cd, getBaseCostController());
/* we prefer index than table scan for concurrency reason, by a small
* adjustment on estimated row count. This affects optimizer's decision
* especially when few rows are in table. beetle 5006. This makes sense
* since the plan may stay long before we actually check and invalidate it.
* And new rows may be inserted before we check and invalidate the plan.
* Here we only prefer index that has start/stop key from predicates. Non-
* constant start/stop key case is taken care of by selectivity later.
*/
long baseRC = (startKeys != null || stopKeys != null) ? baseRowCount() : baseRowCount() + 5;
scc.getScanCost(
currentJoinStrategy.scanCostType(),
baseRC,
1,
forUpdate(),
(FormatableBitSet) null,
rowTemplate,
startKeys,
startOperator,
stopKeys,
stopOperator,
false,
0,
costEstimate);
/* initialPositionCost is the first part of the index scan cost we get above.
* It's the cost of initial positioning/fetch of key. So it's unrelated to
* row count of how many rows we fetch from index. We extract it here so that
* we only multiply selectivity to the other part of index scan cost, which is
* nearly linear, to make cost calculation more accurate and fair, especially
* compared to the plan of "one row result set" (unique index). beetle 4787.
*/
double initialPositionCost = 0.0;
if (cd.isIndex())
{
initialPositionCost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
/* oneRowResultSetForSomeConglom means there's a unique index, but certainly
* not this one since we are here. If store knows this non-unique index
* won't return any row or just returns one row (eg., the predicate is a
* comparison with constant or almost empty table), we do minor adjustment
* on cost (affecting decision for covering index) and rc (decision for
* non-covering). The purpose is favoring unique index. beetle 5006.
*/
if (oneRowResultSetForSomeConglom && costEstimate.rowCount() <= 1)
{
costEstimate.setCost(costEstimate.getEstimatedCost() * 2,
costEstimate.rowCount() + 2,
costEstimate.singleScanRowCount() + 2);
}
}
optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN1,
tableNumber, 0, 0.0, cd);
optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN2,
tableNumber, 0, 0.0, costEstimate);
optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN3,
numExtraFirstColumnPreds, 0,
extraFirstColumnSelectivity, null);
optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN4,
numExtraStartStopPreds, 0,
extraStartStopSelectivity, null);
optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN7,
startStopPredCount, 0,
statStartStopSelectivity, null);
optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN5,
numExtraQualifiers, 0,
extraQualifierSelectivity, null);
optimizer.trace(Optimizer.COST_OF_CONGLOMERATE_SCAN6,
numExtraNonQualifiers, 0,
extraNonQualifierSelectivity, null);
/* initial row count is the row count without applying
any predicates-- we use this at the end of the routine
when we use statistics to recompute the row count.
*/
double initialRowCount = costEstimate.rowCount();
if (statStartStopSelectivity != 1.0d)
{
/*
** If statistics exist use the selectivity computed
** from the statistics to calculate the cost.
** NOTE: we apply this selectivity to the cost as well
** as both the row counts. In the absence of statistics
** we only applied the FirstColumnSelectivity to the
** cost.
*/
costEstimate.setCost(
scanCostAfterSelectivity(costEstimate.getEstimatedCost(),
initialPositionCost,
statStartStopSelectivity,
oneRowResultSetForSomeConglom),
costEstimate.rowCount() * statStartStopSelectivity,
costEstimate.singleScanRowCount() *
statStartStopSelectivity);
optimizer.trace(Optimizer.COST_INCLUDING_STATS_FOR_INDEX,
tableNumber, 0, 0.0, costEstimate);
}
else
{
/*
** Factor in the extra selectivity on the first column
** of the conglomerate (see comment above).
** NOTE: In this case we want to apply the selectivity to both
** the total row count and singleScanRowCount.
*/
if (extraFirstColumnSelectivity != 1.0d)
{
costEstimate.setCost(
scanCostAfterSelectivity(costEstimate.getEstimatedCost(),
initialPositionCost,
extraFirstColumnSelectivity,
oneRowResultSetForSomeConglom),
costEstimate.rowCount() * extraFirstColumnSelectivity,
costEstimate.singleScanRowCount() * extraFirstColumnSelectivity);
optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_1ST_COL_SELECTIVITY,
tableNumber, 0, 0.0, costEstimate);
}
/* Factor in the extra start/stop selectivity (see comment above).
* NOTE: In this case we want to apply the selectivity to both
* the row count and singleScanRowCount.
*/
if (extraStartStopSelectivity != 1.0d)
{
costEstimate.setCost(
costEstimate.getEstimatedCost(),
costEstimate.rowCount() * extraStartStopSelectivity,
costEstimate.singleScanRowCount() * extraStartStopSelectivity);
optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_START_STOP,
tableNumber, 0, 0.0, costEstimate);
}
}
/* If the start and stop key came from an IN-list "probe predicate"
* then we need to adjust the cost estimate. The probe predicate
* is of the form "col = ?" and we currently have the estimated
* cost of probing the index a single time for "?". But with an
* IN-list we don't just probe the index once; we're going to
* probe it once for every value in the IN-list. And we are going
* to potentially return an additional row (or set of rows) for
* each probe. To account for this "multi-probing" we take the
* costEstimate and multiply each of its fields by the size of
* the IN-list.
*
* Note: If the IN-list has duplicate values then this simple
* multiplication could give us an elevated cost (because we
* only probe the index for each *non-duplicate* value in the
* IN-list). But for now, we're saying that's okay.
*/
if (ssKeySourceInList != null)
{
int listSize = ssKeySourceInList.getRightOperandList().size();
double rc = costEstimate.rowCount() * listSize;
double ssrc = costEstimate.singleScanRowCount() * listSize;
/* If multiplication by listSize returns more rows than are
* in the scan then just use the number of rows in the scan.
*/
costEstimate.setCost(
costEstimate.getEstimatedCost() * listSize,
rc > initialRowCount ? initialRowCount : rc,
ssrc > initialRowCount ? initialRowCount : ssrc);
}
/*
** Figure out whether to do row locking or table locking.
**
** If there are no start/stop predicates, we're doing full
** conglomerate scans, so do table locking.
*/
if (! startStopFound)
{
currentAccessPath.setLockMode(
TransactionController.MODE_TABLE);
optimizer.trace(Optimizer.TABLE_LOCK_NO_START_STOP,
0, 0, 0.0, null);
}
else
{
/*
** Figure out the number of rows touched. If all the
** start/stop predicates are constant, the number of
** rows touched is the number of rows per scan.
** This is also true for join strategies that scan the
** inner table only once (like hash join) - we can
** tell if we have one of those, because
** multiplyBaseCostByOuterRows() will return false.
*/
double rowsTouched = costEstimate.rowCount();
if ( (! constantStartStop) &&
currentJoinStrategy.multiplyBaseCostByOuterRows())
{
/*
** This is a join where the inner table is scanned
** more than once, so we have to take the number
** of outer rows into account. The formula for this
** works out as follows:
**
** total rows in table = r
** number of rows touched per scan = s
** number of outer rows = o
** proportion of rows touched per scan = s / r
** proportion of rows not touched per scan =
** 1 - (s / r)
** proportion of rows not touched for all scans =
** (1 - (s / r)) ** o
** proportion of rows touched for all scans =
** 1 - ((1 - (s / r)) ** o)
** total rows touched for all scans =
** r * (1 - ((1 - (s / r)) ** o))
**
** In doing these calculations, we must be careful not
** to divide by zero. This could happen if there are
** no rows in the table. In this case, let's do table
** locking.
*/
double r = baseRowCount();
if (r > 0.0)
{
double s = costEstimate.rowCount();
double o = outerCost.rowCount();
double pRowsNotTouchedPerScan = 1.0 - (s / r);
double pRowsNotTouchedAllScans =
Math.pow(pRowsNotTouchedPerScan, o);
double pRowsTouchedAllScans =
1.0 - pRowsNotTouchedAllScans;
double rowsTouchedAllScans =
r * pRowsTouchedAllScans;
rowsTouched = rowsTouchedAllScans;
}
else
{
/* See comments in setLockingBasedOnThreshold */
rowsTouched = optimizer.tableLockThreshold() + 1;
}
}
setLockingBasedOnThreshold(optimizer, rowsTouched);
}
/*
** If the index isn't covering, add the cost of getting the
** base row. Only apply extraFirstColumnSelectivity and extraStartStopSelectivity
** before we do this, don't apply extraQualifierSelectivity etc. The
** reason is that the row count here should be the number of index rows
** (and hence heap rows) we get, and we need to fetch all those rows, even
** though later on some of them may be filtered out by other predicates.
** beetle 4787.
*/
if (cd.isIndex() && ( ! isCoveringIndex(cd) ) )
{
double singleFetchCost =
getBaseCostController().getFetchFromRowLocationCost(
(FormatableBitSet) null,
0);
// The number of rows we expect to fetch from the base table.
double rowsToFetch = costEstimate.rowCount();
if (oneRowResultSetForSomeConglom) {
// DERBY-6011: We know that there is a unique index, and
// that there are predicates that guarantee that at most
// one row will be fetched from the unique index. The
// unique alternative always has 1 as estimated row count
// (see reference to DERBY-6011 further up in this method),
// even though it could actually return 0 rows.
//
// If the alternative that's being considered here has
// expected row count less than 1, it is going to have
// lower estimated cost for fetching base rows. We prefer
// unique indexes, as they lock fewer rows and allow more
// concurrency. Therefore, make sure the cost estimate for
// this alternative includes at least fetching one row from
// the base table.
rowsToFetch = Math.max(1.0d, rowsToFetch);
}
cost = singleFetchCost * rowsToFetch;
costEstimate.setEstimatedCost(
costEstimate.getEstimatedCost() + cost);
optimizer.trace(Optimizer.COST_OF_NONCOVERING_INDEX,
tableNumber, 0, 0.0, costEstimate);
}
/* Factor in the extra qualifier selectivity (see comment above).
* NOTE: In this case we want to apply the selectivity to both
* the row count and singleScanRowCount.
*/
if (extraQualifierSelectivity != 1.0d)
{
costEstimate.setCost(
costEstimate.getEstimatedCost(),
costEstimate.rowCount() * extraQualifierSelectivity,
costEstimate.singleScanRowCount() * extraQualifierSelectivity);
optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_QUALIFIER_SELECTIVITY,
tableNumber, 0, 0.0, costEstimate);
}
singleScanRowCount = costEstimate.singleScanRowCount();
/*
** Let the join strategy decide whether the cost of the base
** scan is a single scan, or a scan per outer row.
** NOTE: In this case we only want to multiply against the
** total row count, not the singleScanRowCount.
** NOTE: Do not multiply row count if we determined that
** conglomerate is a 1 row result set when costing nested
** loop. (eg, we will find at most 1 match when probing
** the hash table.)
*/
double newCost = costEstimate.getEstimatedCost();
double rowCount = costEstimate.rowCount();
/*
** RESOLVE - If there is a unique index on the joining
** columns, the number of matching rows will equal the
** number of outer rows, even if we're not considering the
** unique index for this access path. To figure that out,
** however, would require an analysis phase at the beginning
** of optimization. So, we'll always multiply the number
** of outer rows by the number of rows per scan. This will
** give us a higher than actual row count when there is
** such a unique index, which will bias the optimizer toward
** using the unique index. This is probably OK most of the
** time, since the optimizer would probably choose the
** unique index, anyway. But it would be better if the
** optimizer set the row count properly in this case.
*/
if (currentJoinStrategy.multiplyBaseCostByOuterRows())
{
newCost *= outerCost.rowCount();
}
rowCount *= outerCost.rowCount();
initialRowCount *= outerCost.rowCount();
/*
** If this table can generate at most one row per scan,
** the maximum row count is the number of outer rows.
** NOTE: This does not completely take care of the RESOLVE
** in the above comment, since it will only notice
** one-row result sets for the current join order.
*/
if (oneRowResultSetForSomeConglom)
{
if (outerCost.rowCount() < rowCount)
{
rowCount = outerCost.rowCount();
}
}
/*
** The estimated cost may be too high for indexes, if the
** estimated row count exceeds the maximum. Only do this
** if we're not doing a full scan, and the start/stop position
** is not constant (i.e. we're doing a join on the first column
** of the index) - the reason being that this is when the
** cost may be inaccurate.
*/
if (cd.isIndex() && startStopFound && ( ! constantStartStop ) )
{
/*
** Does any table outer to this one have a unique key on
** a subset of the joining columns? If so, the maximum number
** of rows that this table can return is the number of rows
** in this table times the number of times the maximum number
** of times each key can be repeated.
*/
double scanUniquenessFactor =
optimizer.uniqueJoinWithOuterTable(baseTableRestrictionList);
if (scanUniquenessFactor > 0.0)
{
/*
** A positive uniqueness factor means there is a unique
** outer join key. The value is the reciprocal of the
** maximum number of duplicates for each unique key
** (the duplicates can be caused by other joining tables).
*/
double maxRows =
((double) baseRowCount()) / scanUniquenessFactor;
if (rowCount > maxRows)
{
/*
** The estimated row count is too high. Adjust the
** estimated cost downwards proportionately to
** match the maximum number of rows.
*/
newCost *= (maxRows / rowCount);
}
}
}
/* The estimated total row count may be too high */
if (tableUniquenessFactor > 0.0)
{
/*
** A positive uniqueness factor means there is a unique outer
** join key. The value is the reciprocal of the maximum number
** of duplicates for each unique key (the duplicates can be
** caused by other joining tables).
*/
double maxRows =
((double) baseRowCount()) / tableUniquenessFactor;
if (rowCount > maxRows)
{
/*
** The estimated row count is too high. Set it to the
** maximum row count.
*/
rowCount = maxRows;
}
}
costEstimate.setCost(
newCost,
rowCount,
costEstimate.singleScanRowCount());
optimizer.trace(Optimizer.COST_OF_N_SCANS,
tableNumber, 0, outerCost.rowCount(), costEstimate);
/*
** Now figure in the cost of the non-qualifier predicates.
** existsBaseTables have a row count of 1
*/
double rc = -1, src = -1;
if (existsBaseTable)
rc = src = 1;
// don't factor in extraNonQualifierSelectivity in case of oneRowResultSetForSomeConglom
// because "1" is the final result and the effect of other predicates already considered
// beetle 4787
else if (extraNonQualifierSelectivity != 1.0d)
{
rc = oneRowResultSetForSomeConglom ? costEstimate.rowCount() :
costEstimate.rowCount() * extraNonQualifierSelectivity;
src = costEstimate.singleScanRowCount() * extraNonQualifierSelectivity;
}
if (rc != -1) // changed
{
costEstimate.setCost(costEstimate.getEstimatedCost(), rc, src);
optimizer.trace(Optimizer.COST_INCLUDING_EXTRA_NONQUALIFIER_SELECTIVITY,
tableNumber, 0, 0.0, costEstimate);
}
recomputeRowCount:
if (statisticsForTable && !oneRowResultSetForSomeConglom &&
(statCompositeSelectivity != 1.0d))
{
/* if we have statistics we should use statistics to calculate
row count-- if it has been determined that this table
returns one row for some conglomerate then there is no need
to do this recalculation
*/
double compositeStatRC = initialRowCount * statCompositeSelectivity;
optimizer.trace(Optimizer.COMPOSITE_SEL_FROM_STATS,
0, 0, statCompositeSelectivity, null);
if (tableUniquenessFactor > 0.0)
{
/* If the row count from the composite statistics
comes up more than what the table uniqueness
factor indicates then lets stick with the current
row count.
*/
if (compositeStatRC > (baseRowCount() *
tableUniquenessFactor))
{
break recomputeRowCount;
}
}
/* set the row count and the single scan row count
to the initialRowCount. initialRowCount is the product
of the RC from store * RC of the outerCost.
Thus RC = initialRowCount * the selectivity from stats.
SingleRC = RC / outerCost.rowCount().
*/
costEstimate.setCost(costEstimate.getEstimatedCost(),
compositeStatRC,
(existsBaseTable) ?
1 :
compositeStatRC / outerCost.rowCount());
optimizer.trace(Optimizer.COST_INCLUDING_COMPOSITE_SEL_FROM_STATS,
tableNumber, 0, 0.0, costEstimate);
}
}
/* Put the base predicates back in the predicate list */
currentJoinStrategy.putBasePredicates(predList,
baseTableRestrictionList);
return costEstimate;
}