int[] szNew = new int[NB + 2];
ISqlJetMemoryPointer[] apCell = null; /* All cells begin balanced */
int[] szCell; /* Local size of all cells in apCell[] */
/* Space for holding data of apCopy[] */
ISqlJetMemoryPointer[] aCopy = new ISqlJetMemoryPointer[NB];
ISqlJetMemoryPointer aSpace1; /*
* Space for copies of dividers cells
* before balance
*/
/* Space for overflow dividers cells after balance */
ISqlJetMemoryPointer aSpace2 = null;
ISqlJetMemoryPointer aFrom = null;
pPage = pCur.apPage[pCur.iPage];
assert (pPage.pBt.mutex.held());
/* Find the parent page. */
assert (pCur.iPage > 0);
assert (pPage.isInit);
assert (pPage.pDbPage.isWriteable() || pPage.nOverflow == 1);
pBt = pPage.pBt;
pParent = pCur.apPage[pCur.iPage - 1];
assert (pParent != null);
boolean ignore_clean = false;
try {
pParent.pDbPage.write();
TRACE("BALANCE: begin page %d child of %d\n", pPage.pgno, pParent.pgno);
/*
* A special case: If a new entry has just been inserted into a
* table (that is, a btree with integer keys and all data at the
* leaves) and the new entry is the right-most entry in the tree (it
* has the largest key) then use the special balance_quick() routine
* for balancing. balance_quick() is much faster and results in a
* tighter packing of data in the common case.
*/
if (pPage.leaf && pPage.intKey && pPage.nOverflow == 1 && pPage.aOvfl[0].idx == pPage.nCell
&& pParent.pgno != 1 && get4byte(pParent.aData, pParent.hdrOffset + 8) == pPage.pgno) {
assert (pPage.intKey);
ignore_clean = true;
/*
* * TODO: Check the siblings to the left of pPage. It may be
* that* they are not full and no new page is required.
*/
pCur.balance_quick();
return;
}
pPage.pDbPage.write();
/*
* * Find the cell in the parent page whose left child points back
* to pPage. The "idx" variable is the index of that cell. If pPage
* is the rightmost child of pParent then set idx to pParent->nCell
*/
idx = pCur.aiIdx[pCur.iPage - 1];
pParent.assertParentIndex(idx, pPage.pgno);
/*
* Find sibling pages to pPage and the cells in pParent that divide
* the siblings. An attempt is made to find NN siblings on either
* side of pPage. More siblings are taken from one side, however, if
* pPage there are fewer than NN siblings on the other side. If
* pParent has NB or fewer children then all children of pParent are
* taken.
*/
nxDiv = idx - NN;
if (nxDiv + NB > pParent.nCell) {
nxDiv = pParent.nCell - NB + 1;
}
if (nxDiv < 0) {
nxDiv = 0;
}
nDiv = 0;
for (i = 0, k = nxDiv; i < NB; i++, k++) {
if (k < pParent.nCell) {
apDiv[i] = pParent.findCell(k);
nDiv++;
assert (!pParent.leaf);
pgnoOld[i] = get4byte(apDiv[i]);
} else if (k == pParent.nCell) {
pgnoOld[i] = get4byte(pParent.aData, pParent.hdrOffset + 8);
} else {
break;
}
apOld[i] = pBt.getAndInitPage(pgnoOld[i]);
/* apOld[i]->idxParent = k; */
apCopy[i] = null;
assert (i == nOld);
nOld++;
nMaxCells += 1 + apOld[i].nCell + apOld[i].nOverflow;
}
/*
* Make nMaxCells a multiple of 4 in order to preserve 8-byte*
* alignment
*/
// nMaxCells = (nMaxCells + 3)&~3;
/*
* * Allocate space for memory structures
*/
apCell = new ISqlJetMemoryPointer[nMaxCells];
szCell = new int[nMaxCells];
aSpace1 = SqlJetUtility.allocatePtr(pBt.pageSize);
if (pBt.autoVacuum) {
aFrom = SqlJetUtility.allocatePtr(nMaxCells);
}
aSpace2 = SqlJetUtility.allocatePtr(pBt.pageSize);
/*
* Make copies of the content of pPage and its siblings into
* aOld[].The rest of this function will use data from the copies
* rather that the original pages since the original pages will be
* in the process of being overwritten.
*/
for (i = 0; i < nOld; i++) {
SqlJetMemPage p = apCopy[i] = (SqlJetMemPage) memcpy(apOld[i]);
p.aData = aCopy[i] = SqlJetUtility.allocatePtr(pBt.pageSize);
memcpy(p.aData, apOld[i].aData, pBt.pageSize);
}
/*
* Load pointers to all cells on sibling pages and the divider cells
* into the local apCell[] array. Make copies of the divider cells
* into space obtained form aSpace1[] and remove the the divider
* Cells* from pParent.
*
* If the siblings are on leaf pages, then the child pointers of the
* divider cells are stripped from the cells before they are copied
* into aSpace1[]. In this way, all cells in apCell[] are without
* child pointers. If siblings are not leaves, then all cell in
* apCell[] include child pointers. Either way, all cells in
* apCell[] are alike.
*
* leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf.
* leafData: 1 if pPage holds key+data and pParent holds only keys.
*/
nCell = 0;
leafCorrection = (pPage.leaf ? 1 : 0) * 4;
leafData = pPage.hasData;
for (i = 0; i < nOld; i++) {
SqlJetMemPage pOld = apCopy[i];
int limit = pOld.nCell + pOld.nOverflow;
for (j = 0; j < limit; j++) {
assert (nCell < nMaxCells);
apCell[nCell] = pOld.findOverflowCell(j);
szCell[nCell] = pOld.cellSizePtr(apCell[nCell]);
if (pBt.autoVacuum) {
int a;
SqlJetUtility.putUnsignedByte(aFrom, nCell, (byte) i);
assert (i >= 0 && i < 6);
for (a = 0; a < pOld.nOverflow; a++) {
if (pOld.aOvfl[a].pCell == apCell[nCell]) {
SqlJetUtility.putUnsignedByte(aFrom, nCell, (byte) 0xFF);
break;
}
}
}
nCell++;
}
if (i < nOld - 1) {
int sz = pParent.cellSizePtr(apDiv[i]);
if (leafData) {
/*
* With the LEAFDATA flag, pParent cells hold only
* INTKEYs that are duplicates of keys on the child
* pages. We need to remove the divider cells from
* pParent, but the dividers cells are not added to
* apCell[] because they are duplicates of child cells.
*/
pParent.dropCell(nxDiv, sz);
} else {
ISqlJetMemoryPointer pTemp;
assert (nCell < nMaxCells);
szCell[nCell] = sz;
pTemp = pointer(aSpace1, iSpace1);
iSpace1 += sz;
assert (sz <= pBt.pageSize / 4);
assert (iSpace1 <= pBt.pageSize);
memcpy(pTemp, apDiv[i], sz);
apCell[nCell] = pointer(pTemp, leafCorrection);
if (pBt.autoVacuum) {
SqlJetUtility.putUnsignedByte(aFrom, nCell, (byte) 0xFF);
}
pParent.dropCell(nxDiv, sz);
assert (leafCorrection == 0 || leafCorrection == 4);
szCell[nCell] -= leafCorrection;
assert (get4byte(pTemp) == pgnoOld[i]);
if (!pOld.leaf) {
assert (leafCorrection == 0);
/*
* The right pointer of the child page pOld becomes
* the left pointer of the divider cell
*/
memcpy(apCell[nCell], 0, pOld.aData, pOld.hdrOffset + 8, 4);
} else {
assert (leafCorrection == 4);
if (szCell[nCell] < 4) {
/* Do not allow any cells smaller than 4 bytes. */
szCell[nCell] = 4;
}
}
nCell++;
}
}
}
/*
* Figure out the number of pages needed to hold all nCell cells.
* Store this number in "k". Also compute szNew[] which is the total
* size of all cells on the i-th page and cntNew[] which is the
* index in apCell[] of the cell that divides page i from page i+1.
* cntNew[k] should equal nCell.
*
* Values computed by this block: <p> k: The total number of sibling
* pages. </p> <p> szNew[i]: Spaced used on the i-th sibling page.
* </p> <p> cntNew[i]: Index in apCell[] and szCell[] for the first
* cell to the right of the i-th sibling page.</p> <p> usableSpace:
* Number of bytes of space available on each sibling.</p>
*/
usableSpace = pBt.usableSize - 12 + leafCorrection;
for (subtotal = k = i = 0; i < nCell; i++) {
assert (i < nMaxCells);
subtotal += szCell[i] + 2;
if (subtotal > usableSpace) {
szNew[k] = subtotal - szCell[i];
cntNew[k] = i;
if (leafData) {
i--;
}
subtotal = 0;
k++;
}
}
szNew[k] = subtotal;
cntNew[k] = nCell;
k++;
/*
* The packing computed by the previous block is biased toward the
* siblings on the left side. The left siblings are always nearly
* full, while the right-most sibling might be nearly empty. This
* block of code attempts to adjust the packing of siblings to get a
* better balance.
*
* This adjustment is more than an optimization. The packing above
* might be so out of balance as to be illegal. For example, the
* right-most sibling might be completely empty. This adjustment is
* not optional.
*/
for (i = k - 1; i > 0; i--) {
int szRight = szNew[i]; /* Size of sibling on the right */
int szLeft = szNew[i - 1]; /* Size of sibling on the left */
int r; /* Index of right-most cell in left sibling */
int d; /* Index of first cell to the left of right sibling */
r = cntNew[i - 1] - 1;
d = r + 1 - (leafData ? 1 : 0);
assert (d < nMaxCells);
assert (r < nMaxCells);
while (szRight == 0 || szRight + szCell[d] + 2 <= szLeft - (szCell[r] + 2)) {
szRight += szCell[d] + 2;
szLeft -= szCell[r] + 2;
cntNew[i - 1]--;
r = cntNew[i - 1] - 1;
d = r + 1 - (leafData ? 1 : 0);
}
szNew[i] = szRight;
szNew[i - 1] = szLeft;
}
/*
* Either we found one or more cells (cntnew[0])>0) or we are the* a
* virtual root page. A virtual root page is when the real root*
* page is page 1 and we are the only child of that page.
*/
assert (cntNew[0] > 0 || (pParent.pgno == 1 && pParent.nCell == 0));
/*
* * Allocate k new pages. Reuse old pages where possible.
*/
assert (pPage.pgno > 1);
pageFlags = SqlJetUtility.getUnsignedByte(pPage.aData, 0);
int[] ipgnoNew = new int[1];
for (i = 0; i < k; i++) {
SqlJetMemPage pNew;
if (i < nOld) {
pNew = apNew[i] = apOld[i];
pgnoNew[i] = pgnoOld[i];
apOld[i] = null;
pNew.pDbPage.write();
nNew++;
} else {
assert (i > 0);
ipgnoNew[0] = pgnoNew[i];
pNew = pBt.allocatePage(ipgnoNew, pgnoNew[i - 1], false);
pgnoNew[i] = ipgnoNew[0];
apNew[i] = pNew;
nNew++;
}
}
/*
* Free any old pages that were not reused as new pages.
*/
while (i < nOld) {
apOld[i].freePage();
SqlJetMemPage.releasePage(apOld[i]);
apOld[i] = null;
i++;
}
/*
* Put the new pages in accending order. This helps to keep entries
* in the disk file in order so that a scan of the table is a linear
* scan through the file. That in turn helps the operating system to
* deliver pages from the disk more rapidly.
*
* An O(n^2) insertion sort algorithm is used, but since n is never
* more than NB (a small constant), that should not be a problem.
*
* When NB==3, this one optimization makes the database about 25%
* faster for large insertions and deletions.
*/
for (i = 0; i < k - 1; i++) {
int minV = pgnoNew[i];
int minI = i;
for (j = i + 1; j < k; j++) {
if (pgnoNew[j] < minV) {
minI = j;
minV = pgnoNew[j];
}
}
if (minI > i) {
int t;
SqlJetMemPage pT;
t = pgnoNew[i];
pT = apNew[i];
pgnoNew[i] = pgnoNew[minI];
apNew[i] = apNew[minI];
pgnoNew[minI] = t;
apNew[minI] = pT;
}
}
TRACE("BALANCE: old: %d %d %d new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n", pgnoOld[0],
nOld >= 2 ? pgnoOld[1] : 0, nOld >= 3 ? pgnoOld[2] : 0, pgnoNew[0], szNew[0],
nNew >= 2 ? pgnoNew[1] : 0, nNew >= 2 ? szNew[1] : 0, nNew >= 3 ? pgnoNew[2] : 0,
nNew >= 3 ? szNew[2] : 0, nNew >= 4 ? pgnoNew[3] : 0, nNew >= 4 ? szNew[3] : 0,
nNew >= 5 ? pgnoNew[4] : 0, nNew >= 5 ? szNew[4] : 0);
/*
* Evenly distribute the data in apCell[] across the new pages.
* Insert divider cells into pParent as necessary.
*/
j = 0;
for (i = 0; i < nNew; i++) {
/* Assemble the new sibling page. */
SqlJetMemPage pNew = apNew[i];
assert (j < nMaxCells);
assert (pNew.pgno == pgnoNew[i]);
pNew.zeroPage(pageFlags);
pNew.assemblePage(cntNew[i] - j, apCell, j, szCell, j);
assert (pNew.nCell > 0 || (nNew == 1 && cntNew[0] == 0));
assert (pNew.nOverflow == 0);
/*
* If this is an auto-vacuum database, update the pointer map
* entries that point to the siblings that were rearranged.
* These can be: left children of cells, the right-child of the
* page, or overflow pages pointed to by cells.
*/
if (pBt.autoVacuum) {
for (k = j; k < cntNew[i]; k++) {
assert (k < nMaxCells);
if (SqlJetUtility.getUnsignedByte(aFrom, k) == 0xFF
|| apCopy[SqlJetUtility.getUnsignedByte(aFrom, k)].pgno != pNew.pgno) {
pNew.ptrmapPutOvfl(k - j);
if (leafCorrection == 0) {
pBt.ptrmapPut(get4byte(apCell[k]), SqlJetBtreeShared.PTRMAP_BTREE, pNew.pgno);
}
}
}
}
j = cntNew[i];
/*
* If the sibling page assembled above was not the right-most
* sibling, insert a divider cell into the parent page.
*/
if (i < nNew - 1 && j < nCell) {
ISqlJetMemoryPointer pCell;
ISqlJetMemoryPointer pTemp;
int sz;
assert (j < nMaxCells);
pCell = apCell[j];
sz = szCell[j] + leafCorrection;
pTemp = pointer(aSpace2, iSpace2);
if (!pNew.leaf) {
memcpy(pNew.aData, 8, pCell, 0, 4);
if (pBt.autoVacuum
&& (SqlJetUtility.getUnsignedByte(aFrom, j) == 0xFF || apCopy[SqlJetUtility
.getUnsignedByte(aFrom, j)].pgno != pNew.pgno)) {
pBt.ptrmapPut(get4byte(pCell), SqlJetBtreeShared.PTRMAP_BTREE, pNew.pgno);
}
} else if (leafData) {
/*
* If the tree is a leaf-data tree, and the siblings are
* leaves, then there is no divider cell in apCell[].
* Instead, the divider cell consists of the integer key
* for the right-most cell of the sibling-page assembled
* above only.
*/
j--;
SqlJetBtreeCellInfo info = pNew.parseCellPtr(apCell[j]);
pCell = pTemp;
sz = pParent.fillInCell(pCell, null, info.nKey, null, 0, 0);
pTemp = null;
} else {
final int c = pCell.getBuffer().getSize() - pCell.getPointer();
ISqlJetMemoryPointer p = SqlJetUtility.allocatePtr(c + 4);
movePtr(p, 4);
SqlJetUtility.memcpy(p, pCell, c);
pCell = p;
movePtr(pCell, -4);
/*
* Obscure case for non-leaf-data trees: If the cell at
* pCell was previously stored on a leaf node, and its
* reported size was 4 bytes, then it may actually be
* smaller than this (see sqlite3BtreeParseCellPtr(), 4
* bytes is the minimum size of any cell). But it is
* important to pass the correct size to insertCell(),
* so reparse the cell now.
*
* Note that this can never happen in an SQLite data
* file, as all cells are at least 4 bytes. It only
* happens in b-trees used to evaluate "IN (SELECT ...)"
* and similar clauses.
*/
if (szCell[j] == 4) {
assert (leafCorrection == 4);
sz = pParent.cellSizePtr(pCell);
}
}
iSpace2 += sz;
assert (sz <= pBt.pageSize / 4);
assert (iSpace2 <= pBt.pageSize);
pParent.insertCell(nxDiv, pCell, sz, pTemp, (byte) 4);
assert (pParent.pDbPage.isWriteable());
put4byte(pParent.findOverflowCell(nxDiv), pNew.pgno);
/*
* If this is an auto-vacuum database, and not a leaf-data
* tree, then update the pointer map with an entry for the
* overflow page that the cell just inserted points to (if
* any).
*/
if (pBt.autoVacuum && !leafData) {
pParent.ptrmapPutOvfl(nxDiv);
}
j++;
nxDiv++;
}
/* Set the pointer-map entry for the new sibling page. */
if (pBt.autoVacuum) {
pBt.ptrmapPut(pNew.pgno, SqlJetBtreeShared.PTRMAP_BTREE, pParent.pgno);
}
}
assert (j == nCell);
assert (nOld > 0);
assert (nNew > 0);
if ((pageFlags & SqlJetMemPage.PTF_LEAF) == 0) {
final ISqlJetMemoryPointer zChild = pointer(apCopy[nOld - 1].aData, 8);
memcpy(apNew[nNew - 1].aData, 8, zChild, 0, 4);
if (pBt.autoVacuum) {
pBt.ptrmapPut(get4byte(zChild), SqlJetBtreeShared.PTRMAP_BTREE, apNew[nNew - 1].pgno);
}
}