/**
* Copyright © 2005-2012 Akiban Technologies, Inc. All rights reserved.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Public License v1.0 which
* accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* This program may also be available under different license terms.
* For more information, see www.akiban.com or contact licensing@akiban.com.
*
* Contributors:
* Akiban Technologies, Inc.
*/
package com.persistit;
import java.util.ArrayList;
import java.util.BitSet;
import com.persistit.Buffer.VerifyVisitor;
import com.persistit.CLI.Arg;
import com.persistit.CLI.Cmd;
import com.persistit.CleanupManager.CleanupIndexHole;
import com.persistit.exception.InUseException;
import com.persistit.exception.PersistitException;
import com.persistit.util.Debug;
import com.persistit.util.Util;
/**
* <p>
* A simple integrity checker that traverses all pages within one or more
* {@link Tree}s, verifies the internal structure of each page, and verifies the
* relationships between the pages. Any inconsistency is noted as a
* {@link Fault}.
* </p>
* <p>
* An application creates an <code>IntegrityCheck</code>, invokes its
* {@link #checkVolume} or {@link #checkTree} method to perform the integrity
* check, and then reviews the <code>Fault</code>s available through the
* {@link #getFaults} method.
* </p>
* <p>
* In this version of Persistit, <code>IntegrityCheck</code> operates reliably
* only on quiescent <code>Tree</code>s; if other threads are modifying a
* <code>Tree</code> while <code>IntegrityCheck</code> is reviewing its
* structure, spurious faults are likely to be detected.
* </p>
*
* @version 1.0
*/
public class IntegrityCheck extends Task {
final static int MAX_FAULTS = 200;
final static int MAX_HOLES_TO_FIX = 1000;
final static int MAX_WALK_RIGHT = 1000;
final static int MAX_PRUNING_ERRORS = 50;
private Volume _currentVolume;
private Tree _currentTree;
private LongBitSet _usedPageBits = new LongBitSet();
private long _totalPages = 0;
private long _pagesVisited = 0;
private final Counters _counters = new Counters();
private final Buffer[] _edgeBuffers = new Buffer[Exchange.MAX_TREE_DEPTH];
private final long[] _edgePages = new long[Exchange.MAX_TREE_DEPTH];
private final int[] _edgePositions = new int[Exchange.MAX_TREE_DEPTH];
private final Key[] _edgeKeys = new Key[Exchange.MAX_TREE_DEPTH];
private int _treeDepth = -1;
private TreeSelector _treeSelector;
private boolean _suspendUpdates;
private boolean _fixHoles;
private boolean _prune;
private boolean _pruneAndClear;
private boolean _csv;
private final ArrayList<Fault> _faults = new ArrayList<Fault>();
private final ArrayList<CleanupIndexHole> _holes = new ArrayList<CleanupIndexHole>();
// Used in checking long values
private final Value _value = new Value((Persistit) null);
private final MVVVisitor _versionVisitor = new MVVVisitor();
private static class Counters {
private long _indexPageCount = 0;
private long _dataPageCount = 0;
private long _indexBytesInUse = 0;
private long _dataBytesInUse = 0;
private long _longRecordPageCount = 0;
private long _longRecordBytesInUse = 0;
private long _indexHoleCount = 0;
private long _mvvPageCount = 0;
private long _mvvCount = 0;
private long _mvvOverhead = 0;
private long _mvvAntiValues = 0;
private long _pruningErrorCount = 0;
private long _prunedPageCount = 0;
private long _garbagePageCount = 0;
Counters() {
}
Counters(final Counters counters) {
_indexPageCount = counters._indexPageCount;
_dataPageCount = counters._dataPageCount;
_indexBytesInUse = counters._indexBytesInUse;
_dataBytesInUse = counters._dataBytesInUse;
_longRecordPageCount = counters._longRecordPageCount;
_longRecordBytesInUse = counters._longRecordBytesInUse;
_indexHoleCount = counters._indexHoleCount;
_mvvPageCount = counters._mvvPageCount;
_mvvCount = counters._mvvCount;
_mvvOverhead = counters._mvvOverhead;
_mvvAntiValues = counters._mvvAntiValues;
_pruningErrorCount = counters._pruningErrorCount;
_prunedPageCount = counters._prunedPageCount;
_garbagePageCount = counters._garbagePageCount;
}
void difference(final Counters counters) {
_indexPageCount = counters._indexPageCount - _indexPageCount;
_dataPageCount = counters._dataPageCount - _dataPageCount;
_indexBytesInUse = counters._indexBytesInUse - _indexBytesInUse;
_dataBytesInUse = counters._dataBytesInUse - _dataBytesInUse;
_longRecordPageCount = counters._longRecordPageCount - _longRecordPageCount;
_longRecordBytesInUse = counters._longRecordBytesInUse - _longRecordBytesInUse;
_indexHoleCount = counters._indexHoleCount - _indexHoleCount;
_mvvPageCount = counters._mvvPageCount - _mvvPageCount;
_mvvCount = counters._mvvCount - _mvvCount;
_mvvOverhead = counters._mvvOverhead - _mvvOverhead;
_mvvAntiValues = counters._mvvAntiValues - _mvvAntiValues;
_pruningErrorCount = counters._pruningErrorCount - _pruningErrorCount;
_prunedPageCount = counters._prunedPageCount - _prunedPageCount;
_garbagePageCount = counters._garbagePageCount - _garbagePageCount;
}
@Override
public String toString() {
return String.format("Index pages/bytes: %,d / %,d Data pages/bytes: %,d / %,d"
+ " LongRec pages/bytes: %,d / %,d MVV pages/records/bytes/antivalues: "
+ "%,d / %,d / %,d / %,d Holes %,d Pages pruned %,d", _indexPageCount, _indexBytesInUse,
_dataPageCount, _dataBytesInUse, _longRecordPageCount, _longRecordBytesInUse, _mvvPageCount,
_mvvCount, _mvvOverhead, _mvvAntiValues, _indexHoleCount, _prunedPageCount);
}
private String toCSV() {
return String.format("%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", _indexPageCount, _indexBytesInUse,
_dataPageCount, _dataBytesInUse, _longRecordPageCount, _longRecordBytesInUse, _mvvPageCount,
_mvvCount, _mvvOverhead, _mvvAntiValues, _indexHoleCount, _prunedPageCount);
}
private final static String CSV_HEADERS = "IndexPages,IndexBytes,"
+ "DataPages,DataBytes,LongRecordPages,LongRecordBytes,MvvPages,"
+ "MvvRecords,MvvOverhead,MvvAntiValues,IndexHoles,PrunedPages";
}
private static class MVVVisitor implements MVV.VersionVisitor {
int _lastOffset;
int _count;
@Override
public void init() throws PersistitException {
_lastOffset = 0;
_count = 0;
}
@Override
public void sawVersion(final long version, final int offset, final int valueLength) throws PersistitException {
if (version != MVV.PRIMORDIAL_VALUE_VERSION) {
_count++;
_lastOffset = offset;
}
}
};
private final VerifyVisitor _visitor = new VerifyVisitor() {
@Override
protected void visitDataRecord(final Key key, final int foundAt, final int tail, final int klength,
final int offset, final int length, final byte[] bytes) throws PersistitException {
MVV.visitAllVersions(_versionVisitor, bytes, offset, length);
if (_versionVisitor._count > 0) {
_counters._mvvCount++;
final int voffset = _versionVisitor._lastOffset;
final int vlength = length - (voffset - offset);
_counters._mvvOverhead += length - vlength;
if (vlength == 1 && bytes[voffset] == MVV.TYPE_ANTIVALUE) {
_counters._mvvOverhead++;
_counters._mvvAntiValues++;
}
}
}
};
@Cmd("icheck")
public static IntegrityCheck icheck(
@Arg("trees|string|Tree selector: Volumes/Trees to check") final String treeSelectorString,
@Arg("_flag|r|Use regex expression") final boolean regex,
@Arg("_flag|u|Don't freeze updates (Default is to freeze updates)") final boolean dontSuspendUpdates,
@Arg("_flag|h|Fix index holes") final boolean fixHoles,
@Arg("_flag|p|Prune MVV values") final boolean prune,
@Arg("_flag|P|Prune MVV values and clear TransactionIndex") final boolean pruneAndClear,
@Arg("_flag|v|Verbose results") final boolean verbose, @Arg("_flag|c|Format as CSV") final boolean csv)
throws Exception {
final IntegrityCheck task = new IntegrityCheck();
task._treeSelector = TreeSelector.parseSelector(treeSelectorString, regex, '\\');
task._fixHoles = fixHoles;
task._prune = prune | pruneAndClear;
task._pruneAndClear = pruneAndClear;
task._suspendUpdates = !dontSuspendUpdates;
task._csv = csv;
task.setMessageLogVerbosity(verbose ? LOG_VERBOSE : LOG_NORMAL);
return task;
}
/**
* Package-private constructor for use in a {@link Task}.
*/
IntegrityCheck() {
}
public IntegrityCheck(final Persistit persistit) {
super(persistit);
_persistit = persistit;
}
@Override
protected void runTask() {
if (_pruneAndClear && !_treeSelector.isSelectAll()) {
postMessage("The pruneAndClear (-P) flag requires all trees (trees=*) to be selected", LOG_NORMAL);
return;
}
final boolean freeze = !_persistit.isUpdateSuspended() && (_suspendUpdates);
boolean needsToDrain = false;
if (freeze) {
_persistit.setUpdateSuspended(true);
needsToDrain = true;
}
if (_csv) {
postMessage("Volume,Tree,Faults," + Counters.CSV_HEADERS, LOG_NORMAL);
}
final long startTimestamp = _persistit.getTimestampAllocator().updateTimestamp();
try {
final ArrayList<Volume> volumes = new ArrayList<Volume>();
long _totalPages = 0;
for (final Volume volume : _persistit.getVolumes()) {
if (_treeSelector.isSelected(volume)) {
volumes.add(volume);
_totalPages += volume.getStorage().getNextAvailablePage();
}
}
Volume previousVolume = null;
for (final Tree tree : _persistit.getSelectedTrees(_treeSelector)) {
final Volume volume = tree.getVolume();
boolean checkWholeVolume = false;
if (volume != previousVolume) {
reset();
if (tree == volume.getDirectoryTree()) {
checkWholeVolume = true;
}
}
previousVolume = volume;
try {
if (needsToDrain) {
needsToDrain = false;
//
// Delay for a few seconds to allow
// completion of any update operations
// currently in progress. We do this only if
// we are going to fix holes.
//
Util.sleep(3000);
}
if (checkWholeVolume) {
checkVolume(volume);
} else {
checkTree(tree);
}
} catch (final PersistitException pe) {
postMessage(pe.toString(), LOG_NORMAL);
}
}
_currentVolume = null;
_currentTree = null;
final int faults = _faults.size();
if (_csv) {
postMessage(String.format("\"%s\",\"%s\",%d,%s", "*", "*", faults, _counters.toCSV()), LOG_NORMAL);
} else {
postMessage("Total " + toString(), LOG_NORMAL);
}
if (_pruneAndClear) {
if (_faults.isEmpty() && _counters._mvvPageCount == _counters._prunedPageCount
&& _counters._pruningErrorCount == 0) {
final int count = _persistit.getTransactionIndex().resetMVVCounts(startTimestamp);
postMessage(String.format("%,d aborted transactions were cleared by pruning", count), LOG_NORMAL);
} else {
postMessage("PruneAndClear failed to remove all aborted MMVs", LOG_NORMAL);
}
}
endMessage(LOG_NORMAL);
} catch (final PersistitException e) {
postMessage(e.toString(), LOG_NORMAL);
endMessage(LOG_NORMAL);
} finally {
if (freeze) {
_persistit.setUpdateSuspended(false);
}
}
}
private String resourceName() {
return _currentTree == null ? _currentVolume.getName() : _currentVolume.getName() + ":"
+ _currentTree.getName();
}
private String resourceName(final Volume vol) {
return vol.getName();
}
private String resourceName(final Tree tree) {
return tree.getVolume().getName() + ":" + tree.getName();
}
private String plural(final int n, final String m) {
if (n == 1) {
return "1 " + m;
} else {
return String.format("%,d %ss", n, m);
}
}
private void addFault(final String description, final long page, final int level, final int position) {
final Fault fault = new Fault(resourceName(), this, description, page, _treeDepth, level, position);
if (_faults.size() < MAX_FAULTS)
_faults.add(fault);
postMessage(fault.toString(), LOG_VERBOSE);
}
private void addGarbageFault(final String description, final long page, final int level, final int position) {
final Fault fault = new Fault(resourceName(), this, description, page, 3, level, position);
if (_faults.size() < MAX_FAULTS)
_faults.add(fault);
postMessage(fault.toString(), LOG_VERBOSE);
}
private void initTree(final Tree tree) {
_currentVolume = tree.getVolume();
_currentTree = tree;
_holes.clear();
_treeDepth = tree.getDepth();
for (int index = Exchange.MAX_TREE_DEPTH; --index >= 0;) {
_edgeBuffers[index] = null;
_edgePages[index] = 0;
_edgeKeys[index] = null;
_edgePositions[index] = 0;
}
}
/**
* Indicate mode of operation. If <code>true</code> then all threads
* attempting to perform updates are blocked while the integrity check is in
* progress.
*
* @return <code>true</code> if updates are suspended
*/
public boolean isSuspendUpdates() {
return _suspendUpdates;
}
/**
* Control mode of operation. If <code>true</code> then all threads
* attempting to perform updates are blocked while the integrity check is in
* progress.
*
* @param suspendUpdates
* <code>true</code> to suspend updates
*/
public void setSuspendUpdates(final boolean suspendUpdates) {
_suspendUpdates = suspendUpdates;
}
/**
* Control output format. When CSV mode is enabled, the output is organized
* as comma-separated-variable text that can be imported into a spreadsheet.
*
* @param csvMode
*/
public void setCsvMode(final boolean csvMode) {
_csv = csvMode;
}
/**
* Indicate whether CSV mode is enabled. If so the output is organized as
* comma-separated-variable text that can be imported into a spreadsheet.
*
* @return <code>true<c/code> if CSV mode is enabled.
*/
public boolean isCsvMode() {
return _csv;
}
/**
* Indicate whether missing index pages should be added when an index "hole"
* is discovered.
*
* @return <code>true</code> if IntegrityCheck will attempt to fix holes.
*/
public boolean isFixHolesEnabled() {
return _fixHoles;
}
/**
* Indicate whether pages containing MVV values should be pruned.
*
* @return <code>true</code> if IntegrityCheck will attempt to prune MVV
* values.
*/
public boolean isPruneEnabled() {
return _prune;
}
/**
* Control whether missing index pages should be added when an index "hole"
* is discovered.
*
* @param fixHoles
* <code>true</code> to attempt to fix holes
*/
public void setFixHolesEnabled(final boolean fixHoles) {
_fixHoles = fixHoles;
}
/**
* Control whether <code>IntegrityCheck</code> should attempt to prune pages
* containing MVV values.
*
* @param prune
* <code>true</code> to attempt to prune MVV values
*/
public void setPruneEnabled(final boolean prune) {
_prune = prune;
}
/**
* Indicates whether fault have been detected
*
* @return <i>true</i> if faults were detected
*/
public boolean hasFaults() {
return _faults.size() > 0;
}
/**
* Returns the detected faults
*
* @return An array of detected Faults
*/
public Fault[] getFaults() {
return _faults.toArray(new Fault[_faults.size()]);
}
/**
* Returns the total count of index pages traversed during the integrity
* checking process.
*
* @return The count of pages
*/
public long getIndexPageCount() {
return _counters._indexPageCount;
}
/**
* Returns the total count of data pages traversed during the integrity
* checking process.
*
* @return The count of pages
*/
public long getDataPageCount() {
return _counters._dataPageCount;
}
/**
* Returns the total count of long record pages traversed during the
* integrity checking process. Long record pages contain segments of long
* record values.
*
* @return The count of pages
*/
public long getLongRecordPageCount() {
return _counters._longRecordPageCount;
}
/**
* Returns the total count of bytes in use (not page structure overhead) in
* index pages traversed during the integrity checking process.
*
* @return The count of allocated bytes
*/
public long getIndexByteCount() {
return _counters._indexBytesInUse;
}
/**
* Returns the total count of bytes in use (not page structure overhead) in
* data pages traversed during the integrity checking process.
*
* @return The count of allocated bytes
*/
public long getDataByteCount() {
return _counters._dataBytesInUse;
}
/**
* Returns the total count of bytes in use (not page structure overhead) in
* long record pages traversed during the integrity checking process.
*
* @return The count of allocated bytes
*/
public long getLongRecordByteCount() {
return _counters._longRecordBytesInUse;
}
/**
* @return Count of records containing multiple versions. These will be
* condensed to primordial values by the CLEANUP_MANAGER.
*/
public long getMvvCount() {
return _counters._mvvCount;
}
/**
* @return Approximate overhead in bytes occupied by multi-version values.
* This space will be condensed by the CLEANUP_MANAGER.
*/
public long getMvvOverhead() {
return _counters._mvvOverhead;
}
/**
* @return Count records containing AntiValues. These records will be
* removed from the tree by the CLEANUP_MANAGER.
*/
public long getMvvAntiValues() {
return _counters._mvvAntiValues;
}
/**
* @return Count of pages for which an expected index pointer is missing
*/
public long getIndexHoleCount() {
return _counters._indexHoleCount;
}
/**
* @return Count of pages having MVV values that were pruned
*/
public long getPrunedPagesCount() {
return _counters._prunedPageCount;
}
/**
* @return Count of errors encountered while pruning pages
*/
public long getPruningErrorCount() {
return _counters._pruningErrorCount;
}
/**
* @return Count of garbage pages encountered while checking volumes.
*/
public long getGarbagePageCount() {
return _counters._garbagePageCount;
}
/**
* Returns an approximate indication of progress during the integrity
* checking process, where 0.0 indicates that work has not started, and 1.0
* represents completion.
*
* @return The progress indicator on a scale of 0.0 to 1.0.
*/
public double getProgress() {
if (_totalPages == 0) {
return 1;
} else {
return ((double) _pagesVisited) / ((double) _totalPages);
}
}
@Override
public String getStatusDetail() {
if (_faults.isEmpty()) {
return getStatus() + " - no faults";
}
final StringBuilder sb = new StringBuilder();
for (final Fault fault : _faults) {
sb.append(fault);
sb.append(Util.NEW_LINE);
}
sb.append(getStatus());
return sb.toString();
}
/**
* Returns a displayable String indicating the tree currently being checked
* and the relative counts of pages that have been traversed.
*
* @return The progress description
*/
@Override
public String getStatus() {
if (_currentVolume == null) {
return null;
} else {
return _pagesVisited + "/" + _totalPages + " (" + resourceName() + ")";
}
}
/**
* Returns a displayable summary of the state of the integrity check,
* including the number of detected faults, and the page and byte counts.
*
* @return The displayable string
*/
@Override
public String toString() {
return toString(false);
}
/**
* Creates A displayable representation of the state of the integrity check,
* including the number of detected faults, and the page and byte counts.
* The returned String optionally includes a listing of all detected Faults.
*
* @param details
* If <i>true</i> the returned representation will include a list
* of all detected Faults.
*
* @return the String representation.
*/
public String toString(final boolean details) {
final StringBuilder sb = new StringBuilder(String.format("Faults:%,3d %s", _faults.size(), _counters));
if (details) {
for (int index = 0; index < _faults.size(); index++) {
sb.append(Util.NEW_LINE);
sb.append(" ");
sb.append(_faults.get(index));
}
}
return sb.toString();
}
/**
* A representation of an error or inconsistency within a {@link Tree}.
*/
public static class Fault {
String _treeName;
String _description;
long[] _path;
int _level;
int _depth;
int _position;
Fault(final String treeName, final IntegrityCheck work, final String description, final long page,
final int depth, final int level, final int position) {
_treeName = treeName;
_description = description;
_depth = depth;
_path = new long[_depth - level];
for (int index = _depth; --index > level;) {
if (index >= work._edgeBuffers.length) {
_path[index - level] = 0;
} else {
_path[index - level] = work._edgePages[index];
}
}
_path[0] = page;
_level = level;
_depth = work._treeDepth;
_position = position;
}
/**
* Returns a displayable description of this Fault.
*
* @return The description
*/
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
{
sb.append(" Tree ");
sb.append(_treeName);
sb.append(" ");
sb.append(_description);
sb.append(" (path ");
for (int index = _depth; --index >= _level;) {
if (index < _depth - 1) {
sb.append("->");
}
sb.append(String.format("%,d", _path[index - _level]));
}
if (_position != 0) {
sb.append(":");
sb.append(_position & Buffer.P_MASK);
}
sb.append(")");
if (_depth >= 0) {
sb.append(" depth=");
sb.append(_depth);
}
}
return sb.toString();
}
}
/**
* Implements a bit set that operates on long- rather than int- valued
* indexes.
*/
private static class LongBitSet {
//
// Temporary implementation uses integers. This limits its use to
// volumes with fewer than Integer.MAX_VALUE pages.
//
BitSet _bitSet = new BitSet();
public void set(final long index, final boolean value) {
if (index > Integer.MAX_VALUE) {
throw new RuntimeException("Large page addresses not implemented yet.");
}
if (value)
_bitSet.set((int) index);
else
_bitSet.clear((int) index);
}
public boolean get(final long index) {
if (index > Integer.MAX_VALUE) {
throw new RuntimeException("Large page addresses not implemented yet.");
}
return _bitSet.get((int) index);
}
}
/**
* Resets this <code>IntegrityCheck</code> to handle a new volume.
*
* @param initCounts
* <code>true</code> to reset all counters to zero.
*/
private void reset() {
_currentVolume = null;
_currentTree = null;
_usedPageBits = new LongBitSet();
_totalPages = 0;
_pagesVisited = 0;
}
/**
* Performs the integrity checking process on a {@link Volume}. Logs any
* detected Faults for subsequent review.
*
* @param volume
* The {@link Volume} to check.
* @return <i>true</i> if the volume is clean (has no Faults).
* @throws PersistitException
*/
public boolean checkVolume(final Volume volume) throws PersistitException {
reset();
int faults = _faults.size();
if (!_csv) {
postMessage("Volume " + resourceName(volume) + " - checking", LOG_VERBOSE);
}
final Counters counters = new Counters(_counters);
_currentVolume = volume;
final String[] treeNames = volume.getTreeNames();
// This is just for the progress counter.
_totalPages = volume.getStorage().getNextAvailablePage();
final Tree directoryTree = volume.getDirectoryTree();
if (directoryTree != null) {
checkTree(directoryTree);
}
for (int index = 0; index < treeNames.length; index++) {
final Tree tree = volume.getTree(treeNames[index], false);
if (tree != null)
checkTree(tree);
}
final long garbageRoot = volume.getStructure().getGarbageRoot();
checkGarbage(garbageRoot);
counters.difference(_counters);
faults = _faults.size() - faults;
if (_csv) {
postMessage(String.format("\"%s\",\"%s\",%d,%s", resourceName(volume), "*", faults, counters.toCSV()),
LOG_NORMAL);
} else {
postMessage(
"Volume " + resourceName(volume) + String.format(" Faults:%,3d ", faults) + counters.toString(),
LOG_VERBOSE);
}
return faults == 0;
}
/**
* Performs the integrity checking process on a {@link Tree}. Logs any
* detected Faults for subsequent review.
*
* @param tree
* The <code>Tree</code> to check.
* @return <i>true</i> if the volume is clean (has no Faults).
* @throws PersistitException
*/
public boolean checkTree(final Tree tree) throws PersistitException {
final String messageStart;
if (_csv) {
messageStart = String.format("\"%s\",\"%s\"", tree.getVolume().getName(), tree.getName());
} else {
messageStart = " Tree " + resourceName(tree);
}
final Counters treeCounters = new Counters(_counters);
int faults = _faults.size();
if (!tree.claim(true)) {
throw new InUseException("Unable to acquire claim on " + this);
}
try {
try {
initTree(tree);
checkTree(new Key(_persistit), 0, tree.getRootPageAddr(), _treeDepth - 1, tree);
} finally {
//
// Release all the buffers.
//
for (int index = 0; index < Exchange.MAX_TREE_DEPTH; index++) {
final Buffer buffer = _edgeBuffers[index];
if (buffer != null) {
buffer.release();
_edgeBuffers[index] = null;
_edgePages[index] = 0;
}
}
_currentTree = null;
}
} finally {
tree.release();
}
faults = _faults.size() - faults;
treeCounters.difference(_counters);
if (_counters._indexHoleCount > 0) {
postMessage(
" Tree " + resourceName(tree) + " has "
+ plural((int) _counters._indexHoleCount, "unindexed page"), LOG_NORMAL);
if (_fixHoles) {
int offered = 0;
for (final CleanupIndexHole hole : _holes) {
if (_persistit.getCleanupManager().offer(hole)) {
offered++;
}
}
postMessage(" - enqueued " + offered + " for repair", LOG_NORMAL);
}
}
if (_csv) {
postMessage(String.format("%s,%d,%s", messageStart, faults, treeCounters.toCSV()), LOG_NORMAL);
} else {
postMessage(String.format("%s - Faults:%,3d ", messageStart, faults) + treeCounters.toString(), LOG_VERBOSE);
}
return faults == 0;
}
/**
* Verifies integrity the subtree rooted in the supplied page. If this page
* has already been visited, then we have some kind of cycle error. If this
* page is a data page, then we are at the bottom of the tree. Else we
* recursively checkTree on each of the subordinate pages.
*
* @param page
* @param level
* @param work
* @throws PersistitException
*/
private void checkTree(final Key parentKey, final long parent, final long page, final int level, final Tree tree)
throws PersistitException {
if (level >= Exchange.MAX_TREE_DEPTH) {
addFault("Tree is too deep", page, level, 0);
}
if (_usedPageBits.get(page)) {
addFault("Page has more than one parent", page, level, 0);
}
if (page == 0) {
addFault("Page 0 not allowed in tree structure", page, level, 0);
}
_usedPageBits.set(page, true);
Buffer buffer = getPage(page);
_pagesVisited++;
try {
if (parent == 0 && buffer.getRightSibling() != 0) {
addFault("Tree root has a right sibling", page, 0, 0);
}
} catch (final Exception e) {
e.printStackTrace();
}
try {
Buffer leftSibling = null;
Key key;
if (_edgeBuffers[level] != null) {
key = _edgeKeys[level];
leftSibling = walkRight(level, page, key, tree);
final int compare = key.compareTo(parentKey);
if (compare != 0) {
addFault("left sibling final key is " + (compare < 0 ? "less than" : "greater than")
+ " parent key", page, level, 0);
}
} else {
key = new Key(parentKey);
_edgeKeys[level] = key;
}
Debug.$assert0.t(leftSibling != buffer);
_edgeBuffers[level] = buffer;
_edgePages[level] = page;
if (leftSibling != null) {
leftSibling.release();
}
_edgeKeys[level] = key;
if (checkPageType(buffer, level, tree) && verifyPage(buffer, page, level, key, tree)) {
if (buffer.isDataPage()) {
_counters._dataPageCount++;
_counters._dataBytesInUse += (buffer.getBufferSize() - buffer.getAvailableSize() - Buffer.DATA_PAGE_OVERHEAD);
for (int p = Buffer.KEY_BLOCK_START;; p += Buffer.KEYBLOCK_LENGTH) {
p = buffer.nextLongRecord(_value, p);
if (p == -1) {
break;
}
verifyLongRecord(_value, page, p);
}
} else if (buffer.isIndexPage()) {
_counters._indexPageCount++;
_counters._indexBytesInUse += (buffer.getBufferSize() - buffer.getAvailableSize() - Buffer.INDEX_PAGE_OVERHEAD);
//
// Resetting the key because we are going to re-traverse
// the same page, now handling each downpointer.
//
key.clear();
//
// Here we work our way through the index page.
//
for (int p = Buffer.KEY_BLOCK_START;; p += Buffer.KEYBLOCK_LENGTH) {
final int foundAt = buffer.nextKey(key, p);
// If the exact bit is not set, then next() reached
// the end of keyblocks.
if ((foundAt & Buffer.EXACT_MASK) == 0)
break;
final long child = buffer.getPointer(foundAt);
if (child == -1 && buffer.isAfterRightEdge(foundAt)) {
break;
}
if (child <= 0 || child > Buffer.MAX_VALID_PAGE_ADDR) {
addFault("Invalid index pointer value " + child, page, level, foundAt);
}
// Recursively check the subtree.
checkTree(key, page, child, level - 1, tree);
}
} else {
throw new RuntimeException("should never happen!");
}
}
//
// because the buffer should be left claimed in the
// _edgeBuffers array in the normal case.
//
buffer = null;
} finally {
if (buffer != null) {
_edgeBuffers[level] = null;
_edgePages[level] = 0;
buffer.release();
}
}
}
private void checkGarbage(final long garbageRootPage) throws PersistitException {
long garbagePageAddress = garbageRootPage;
boolean first = true;
while (garbagePageAddress != 0) {
final Buffer garbageBuffer = getPage(garbagePageAddress);
if (first) {
_edgePages[0] = garbagePageAddress;
first = false;
}
checkGarbagePage(garbageBuffer);
_pagesVisited++;
garbagePageAddress = garbageBuffer.getRightSibling();
garbageBuffer.release();
}
_edgePages[0] = 0;
}
private void checkGarbagePage(final Buffer garbageBuffer) throws PersistitException {
final long page = garbageBuffer.getPageAddress();
if (!garbageBuffer.isGarbagePage()) {
addGarbageFault("Unexpected page type " + garbageBuffer.getPageType() + " expected a garbage page", page,
1, 0);
return;
}
if (_usedPageBits.get(page)) {
addGarbageFault("Garbage page is referenced by multiple parents", page, 1, 0);
return;
}
_counters._garbagePageCount++;
final int next = garbageBuffer.getAlloc();
final int size = garbageBuffer.getBufferSize();
final int count = (size - next) / Buffer.GARBAGE_BLOCK_SIZE;
if (count * Buffer.GARBAGE_BLOCK_SIZE != (size - next)) {
addGarbageFault("Garbage page is malformed: _alloc=" + next + " is not at a multiple of "
+ Buffer.GARBAGE_BLOCK_SIZE + " bytes", page, 1, 0);
}
_usedPageBits.set(page, true);
_edgePages[1] = page;
for (int p = garbageBuffer.getAlloc(); p < garbageBuffer.getBufferSize(); p += Buffer.GARBAGE_BLOCK_SIZE) {
final long left = garbageBuffer.getGarbageChainLeftPage(next);
final long right = garbageBuffer.getGarbageChainRightPage(next);
_edgePositions[1] = p;
checkGarbageChain(left, right);
}
_edgePages[1] = 0;
}
private void checkGarbageChain(final long left, final long right) throws PersistitException {
long page = left;
_edgePages[2] = page;
while (page != 0 && page != right) {
if (_usedPageBits.get(page)) {
addGarbageFault("Page on garbage chain is referenced by multiple parents", page, 0, 0);
return;
}
final Buffer buffer = getPage(page);
if (!buffer.isDataPage() && !buffer.isIndexPage() && !buffer.isLongRecordPage()) {
addGarbageFault("Page of type " + buffer.getPageTypeName() + " found on garbage page", page, 0, 0);
}
_counters._garbagePageCount++;
_pagesVisited++;
page = buffer.getRightSibling();
buffer.release();
}
_edgePages[2] = 0;
}
private boolean checkPageType(final Buffer buffer, final int level, final Tree tree) {
final int type = buffer.getPageType();
if (type != Buffer.PAGE_TYPE_DATA + level) {
addFault("Unexpected page type " + type, buffer.getPageAddress(), level, 0);
return false;
} else {
return true;
}
}
private Buffer walkRight(final int level, final long toPage, final Key key, final Tree tree)
throws PersistitException {
final Buffer startingBuffer = _edgeBuffers[level];
if (startingBuffer == null)
return null;
Buffer buffer = startingBuffer;
if (buffer.getPageAddress() == toPage) {
addFault("Overlapping page", toPage, level, 0);
return startingBuffer;
}
int walkCount = MAX_WALK_RIGHT;
Buffer oldBuffer = null;
try {
while (buffer.getRightSibling() != toPage) {
final long page = buffer.getRightSibling();
if (startingBuffer.getPageAddress() == page) {
addFault("Right pointer cycle", page, level, 0);
oldBuffer = buffer;
return startingBuffer;
}
_counters._indexHoleCount++;
final int treeHandle = _currentTree.getHandle();
if (treeHandle != 0 && _holes.size() < MAX_HOLES_TO_FIX) {
_holes.add(new CleanupIndexHole(_currentTree.getHandle(), page, level));
}
if (page <= 0 || page > Buffer.MAX_VALID_PAGE_ADDR) {
addFault(
String.format("Invalid right sibling address in page %,d after walking right %,d",
buffer.getPageAddress(), walkCount), startingBuffer.getPageAddress(), level, 0);
key.clear();
oldBuffer = buffer;
return startingBuffer;
}
if (walkCount-- <= 0) {
addFault("More than " + Exchange.MAX_WALK_RIGHT + " unindexed siblings",
startingBuffer.getPageAddress(), level, 0);
key.clear();
oldBuffer = buffer;
return startingBuffer;
}
oldBuffer = buffer;
buffer = getPage(page);
if (oldBuffer != startingBuffer) {
oldBuffer.release();
oldBuffer = null;
}
final boolean ok = verifyPage(buffer, page, level, key, tree);
if (!ok) {
key.clear();
oldBuffer = buffer;
return startingBuffer;
}
_pagesVisited++;
}
if (startingBuffer != buffer) {
startingBuffer.release();
}
return buffer;
} finally {
if (oldBuffer != null && oldBuffer != startingBuffer) {
oldBuffer.release();
oldBuffer = null;
}
}
}
private boolean verifyPage(final Buffer buffer, final long page, final int level, final Key key, final Tree tree) {
if (buffer.getPageAddress() != page) {
addFault("Buffer contains wrong page " + buffer.getPageAddress(), page, level, 0);
return false;
}
try {
if (buffer.isDataPage() || buffer.isIndexPage()) {
final long mvvCount = _counters._mvvCount;
final PersistitException ipse = buffer.verify(key, _visitor);
if (ipse != null) {
addFault(ipse.getMessage(), page, level, 0);
key.clear();
return false;
}
if (_counters._mvvCount > mvvCount) {
_counters._mvvPageCount++;
if (_prune && !_currentVolume.isReadOnly() && _counters._pruningErrorCount < MAX_PRUNING_ERRORS) {
try {
buffer.pruneMvvValues(tree, true);
_counters._prunedPageCount++;
} catch (final PersistitException e) {
_counters._pruningErrorCount++;
}
}
}
}
} catch (final Exception e) {
addFault(e.toString(), page, level, 0);
}
return true;
}
private boolean verifyLongRecord(final Value value, final long page, final int foundAt) throws PersistitException {
final int size = value.getEncodedSize();
final byte[] bytes = value.getEncodedBytes();
if (size != Buffer.LONGREC_SIZE) {
addFault("Invalid long record stub size (" + size + ")", page, 0, foundAt);
return false;
}
int longSize = Buffer.decodeLongRecordDescriptorSize(bytes, 0);
final long pointer = Buffer.decodeLongRecordDescriptorPointer(bytes, 0);
if (longSize < Buffer.LONGREC_PREFIX_SIZE) {
addFault("Invalid long record size (" + longSize + ")", page, 0, foundAt);
}
if (pointer <= 0 || pointer > Buffer.MAX_VALID_PAGE_ADDR) {
addFault("Invalid long record pointer (" + pointer + ")", page, 0, foundAt);
}
long fromPage = page;
longSize -= Buffer.LONGREC_PREFIX_SIZE;
for (long longPage = pointer; longPage != 0;) {
if (_usedPageBits.get(longPage)) {
addFault("Long record page " + longPage + " is multiply linked", page, 0, foundAt);
break;
}
_usedPageBits.set(longPage, true);
if (longSize <= 0) {
addFault("Long record chain too long at page " + longPage + " pointed to by " + fromPage, page, 0,
foundAt);
break;
}
Buffer longBuffer = null;
try {
longBuffer = getPage(longPage);
if (!longBuffer.isLongRecordPage()) {
addFault("Invalid long record page " + longPage + ": type=" + longBuffer.getPageTypeName(), page,
0, foundAt);
break;
}
int segmentSize = longBuffer.getBufferSize() - Buffer.HEADER_SIZE;
if (segmentSize > longSize)
segmentSize = longSize;
longSize -= segmentSize;
_counters._longRecordBytesInUse += segmentSize;
_counters._longRecordPageCount++;
fromPage = longPage;
longPage = longBuffer.getRightSibling();
} catch (final Exception e) {
addFault(e.toString() + " while verifying long record page " + longPage, page, 0, foundAt);
break;
} finally {
if (longBuffer != null)
longBuffer.release();
}
}
return true;
}
private Buffer getPage(final long page) throws PersistitException {
poll();
final BufferPool pool = _currentVolume.getPool();
try {
final Buffer buffer = pool
.get(_currentVolume, page, isPruneEnabled() && !_currentVolume.isReadOnly(), true);
return buffer;
} catch (final PersistitException de) {
throw de;
}
}
}