Package org.h2.store

Source Code of org.h2.store.PageStore

/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.store;

import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.zip.CRC32;
import org.h2.command.CommandInterface;
import org.h2.command.ddl.CreateTableData;
import org.h2.constant.ErrorCode;
import org.h2.constant.SysProperties;
import org.h2.engine.Constants;
import org.h2.engine.Database;
import org.h2.engine.Session;
import org.h2.index.Cursor;
import org.h2.index.Index;
import org.h2.index.IndexType;
import org.h2.index.MultiVersionIndex;
import org.h2.index.PageBtreeIndex;
import org.h2.index.PageBtreeLeaf;
import org.h2.index.PageBtreeNode;
import org.h2.index.PageDataIndex;
import org.h2.index.PageDataLeaf;
import org.h2.index.PageDataNode;
import org.h2.index.PageDataOverflow;
import org.h2.index.PageDelegateIndex;
import org.h2.index.PageIndex;
import org.h2.message.DbException;
import org.h2.message.Trace;
import org.h2.result.Row;
import org.h2.schema.Schema;
import org.h2.store.fs.FileUtils;
import org.h2.table.Column;
import org.h2.table.IndexColumn;
import org.h2.table.RegularTable;
import org.h2.table.Table;
import org.h2.util.BitField;
import org.h2.util.Cache;
import org.h2.util.CacheLRU;
import org.h2.util.CacheObject;
import org.h2.util.CacheWriter;
import org.h2.util.IntArray;
import org.h2.util.IntIntHashMap;
import org.h2.util.New;
import org.h2.util.StatementBuilder;
import org.h2.util.StringUtils;
import org.h2.value.CompareMode;
import org.h2.value.Value;
import org.h2.value.ValueInt;
import org.h2.value.ValueString;

/**
* This class represents a file that is organized as a number of pages. Page 0
* contains a static file header, and pages 1 and 2 both contain the variable
* file header (page 2 is a copy of page 1 and is only read if the checksum of
* page 1 is invalid). The format of page 0 is:
* <ul>
* <li>0-47: file header (3 time "-- H2 0.5/B -- \n")</li>
* <li>48-51: page size in bytes (512 - 32768, must be a power of 2)</li>
* <li>52: write version (read-only if larger than 1)</li>
* <li>53: read version (opening fails if larger than 1)</li>
* </ul>
* The format of page 1 and 2 is:
* <ul>
* <li>CRC32 of the remaining data: int (0-3)</li>
* <li>write counter (incremented on each write): long (4-11)</li>
* <li>log trunk key: int (12-15)</li>
* <li>log trunk page (0 for none): int (16-19)</li>
* <li>log data page (0 for none): int (20-23)</li>
* </ul>
* Page 3 contains the first free list page.
* Page 4 contains the meta table root page.
*/
public class PageStore implements CacheWriter {

    // TODO test running out of disk space (using a special file system)
    // TODO unused pages should be freed once in a while
    // TODO node row counts are incorrect (it's not splitting row counts)
    // TODO after opening the database, delay writing until required
    // TODO optimization: try to avoid allocating a byte array per page
    // TODO optimization: check if calling Data.getValueLen slows things down
    // TODO order pages so that searching for a key only seeks forward
    // TODO optimization: update: only log the key and changed values
    // TODO index creation: use less space (ordered, split at insertion point)
    // TODO detect circles in linked lists
    // (input stream, free list, extend pages...)
    // at runtime and recovery
    // TODO remove trace or use isDebugEnabled
    // TODO recover tool: support syntax to delete a row with a key
    // TODO don't store default values (store a special value)
    // TODO check for file size (exception if not exact size expected)
    // TODO online backup using bsdiff

    /**
     * The smallest possible page size.
     */
    public static final int PAGE_SIZE_MIN = 64;

    /**
     * The biggest possible page size.
     */
    public static final int PAGE_SIZE_MAX = 32768;

    /**
     * This log mode means the transaction log is not used.
     */
    public static final int LOG_MODE_OFF = 0;

    /**
     * This log mode means the transaction log is used and FileDescriptor.sync()
     * is called for each checkpoint. This is the default level.
     */
    public static final int LOG_MODE_SYNC = 2;
    private static final int PAGE_ID_FREE_LIST_ROOT = 3;
    private static final int PAGE_ID_META_ROOT = 4;
    private static final int MIN_PAGE_COUNT = 6;
    private static final int INCREMENT_KB = 1024;
    private static final int INCREMENT_PERCENT_MIN = 35;
    private static final int READ_VERSION = 3;
    private static final int WRITE_VERSION = 3;
    private static final int META_TYPE_DATA_INDEX = 0;
    private static final int META_TYPE_BTREE_INDEX = 1;
    private static final int META_TABLE_ID = -1;
    private static final int COMPACT_BLOCK_SIZE = 1536;
    private Database database;
    private final Trace trace;
    private String fileName;
    private FileStore file;
    private String accessMode;
    private int pageSize = Constants.DEFAULT_PAGE_SIZE;
    private int pageSizeShift;
    private long writeCountBase, writeCount, readCount;
    private int logKey, logFirstTrunkPage, logFirstDataPage;
    private Cache cache;
    private int freeListPagesPerList;
    private boolean recoveryRunning;

    /**
     * The index to the first free-list page that potentially has free space.
     */
    private int firstFreeListIndex;

    /**
     * The file size in bytes.
     */
    private long fileLength;

    /**
     * Number of pages (including free pages).
     */
    private int pageCount;

    private PageLog log;
    private Schema metaSchema;
    private RegularTable metaTable;
    private PageDataIndex metaIndex;
    private IntIntHashMap metaRootPageId = new IntIntHashMap();
    private HashMap<Integer, PageIndex> metaObjects = New.hashMap();
    private HashMap<Integer, PageIndex> tempObjects;

    /**
     * The map of reserved pages, to ensure index head pages
     * are not used for regular data during recovery. The key is the page id,
     * and the value the latest transaction position where this page is used.
     */
    private HashMap<Integer, Integer> reservedPages;
    private boolean isNew;
    private long maxLogSize = Constants.DEFAULT_MAX_LOG_SIZE;
    private Session systemSession;
    private BitField freed = new BitField();
    private ArrayList<PageFreeList> freeLists = New.arrayList();

    private boolean recordPageReads;
    private ArrayList<Integer> recordedPagesList;
    private IntIntHashMap recordedPagesIndex;

    /**
     * The change count is something like a "micro-transaction-id".
     * It is used to ensure that changed pages are not written to the file
     * before the the current operation is not finished. This is only a problem
     * when using a very small cache size. The value starts at 1 so that
     * pages with change count 0 can be evicted from the cache.
     */
    private int changeCount = 1;

    private Data emptyPage;
    private long logSizeBase;
    private HashMap<String, Integer> statistics;
    private int logMode = LOG_MODE_SYNC;
    private boolean lockFile;
    private boolean readMode;
    private int backupLevel;

    /**
     * Create a new page store object.
     *
     * @param database the database
     * @param fileName the file name
     * @param accessMode the access mode
     * @param cacheSizeDefault the default cache size
     */
    public PageStore(Database database, String fileName, String accessMode, int cacheSizeDefault) {
        this.fileName = fileName;
        this.accessMode = accessMode;
        this.database = database;
        trace = database.getTrace(Trace.PAGE_STORE);
        // if (fileName.endsWith("X.h2.db"))
        // trace.setLevel(TraceSystem.DEBUG);
        String cacheType = database.getCacheType();
        this.cache = CacheLRU.getCache(this, cacheType, cacheSizeDefault);
        systemSession = new Session(database, null, 0);
    }

    /**
     * Start collecting statistics.
     */
    public void statisticsStart() {
        statistics = New.hashMap();
    }

    /**
     * Stop collecting statistics.
     *
     * @return the statistics
     */
    public HashMap<String, Integer> statisticsEnd() {
        HashMap<String, Integer> result = statistics;
        statistics = null;
        return result;
    }

    private void statisticsIncrement(String key) {
        if (statistics != null) {
            Integer old = statistics.get(key);
            statistics.put(key, old == null ? 1 : old + 1);
        }
    }

    /**
     * Copy the next page to the output stream.
     *
     * @param pageId the page to copy
     * @param out the output stream
     * @return the new position, or -1 if there is no more data to copy
     */
    public synchronized int copyDirect(int pageId, OutputStream out) throws IOException {
        byte[] buffer = new byte[pageSize];
        if (pageId >= pageCount) {
            return -1;
        }
        file.seek((long) pageId << pageSizeShift);
        file.readFullyDirect(buffer, 0, pageSize);
        readCount++;
        out.write(buffer, 0, pageSize);
        return pageId + 1;
    }

    /**
     * Open the file and read the header.
     */
    public synchronized void open() {
        try {
            metaRootPageId.put(META_TABLE_ID, PAGE_ID_META_ROOT);
            if (FileUtils.exists(fileName)) {
                long length = FileUtils.size(fileName);
                if (length < MIN_PAGE_COUNT * PAGE_SIZE_MIN) {
                    if (database.isReadOnly()) {
                        throw DbException.get(ErrorCode.FILE_CORRUPTED_1, fileName + " length: " + length);
                    }
                    // the database was not fully created
                    openNew();
                } else {
                    openExisting();
                }
            } else {
                openNew();
            }
        } catch (DbException e) {
            close();
            throw e;
        }
    }

    private void openNew() {
        setPageSize(pageSize);
        freeListPagesPerList = PageFreeList.getPagesAddressed(pageSize);
        file = database.openFile(fileName, accessMode, false);
        lockFile();
        recoveryRunning = true;
        writeStaticHeader();
        writeVariableHeader();
        log = new PageLog(this);
        increaseFileSize(MIN_PAGE_COUNT);
        openMetaIndex();
        logFirstTrunkPage = allocatePage();
        log.openForWriting(logFirstTrunkPage, false);
        isNew = true;
        recoveryRunning = false;
        increaseFileSize();
    }

    private void lockFile() {
        if (lockFile) {
            if (!file.tryLock()) {
                throw DbException.get(ErrorCode.DATABASE_ALREADY_OPEN_1, fileName);
            }
        }
    }

    private void openExisting() {
        try {
            file = database.openFile(fileName, accessMode, true);
        } catch (DbException e) {
            if (e.getErrorCode() == ErrorCode.IO_EXCEPTION_2) {
                if (e.getMessage().indexOf("locked") >= 0) {
                    // in Windows, you can't open a locked file
                    // (in other operating systems, you can)
                    // the exact error message is:
                    // "The process cannot access the file because
                    // another process has locked a portion of the file"
                    throw DbException.get(ErrorCode.DATABASE_ALREADY_OPEN_1, e, fileName);
                }
            }
            throw e;
        }
        lockFile();
        readStaticHeader();
        freeListPagesPerList = PageFreeList.getPagesAddressed(pageSize);
        fileLength = file.length();
        pageCount = (int) (fileLength / pageSize);
        if (pageCount < MIN_PAGE_COUNT) {
            if (database.isReadOnly()) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, fileName + " pageCount: " + pageCount);
            }
            file.releaseLock();
            file.close();
            FileUtils.delete(fileName);
            openNew();
            return;
        }
        readVariableHeader();
        log = new PageLog(this);
        log.openForReading(logKey, logFirstTrunkPage, logFirstDataPage);
        boolean old = database.isMultiVersion();
        // temporarily disabling multi-version concurrency, because
        // the multi-version index sometimes compares rows
        // and the LOB storage is not yet available.
        database.setMultiVersion(false);
        boolean isEmpty = recover();
        database.setMultiVersion(old);
        if (!database.isReadOnly()) {
            readMode = true;
            if (!isEmpty || !SysProperties.MODIFY_ON_WRITE || tempObjects != null) {
                openForWriting();
                removeOldTempIndexes();
            }
        }
    }

    private void openForWriting() {
        if (!readMode || database.isReadOnly()) {
            return;
        }
        readMode = false;
        recoveryRunning = true;
        log.free();
        logFirstTrunkPage = allocatePage();
        log.openForWriting(logFirstTrunkPage, false);
        recoveryRunning = false;
        freed.set(0, pageCount, true);
        checkpoint();
    }

    private void removeOldTempIndexes() {
        if (tempObjects != null) {
            metaObjects.putAll(tempObjects);
            for (PageIndex index: tempObjects.values()) {
                if (index.getTable().isTemporary()) {
                    index.truncate(systemSession);
                    index.remove(systemSession);
                }
            }
            systemSession.commit(true);
            tempObjects = null;
        }
        metaObjects.clear();
        metaObjects.put(-1, metaIndex);
    }

    private void writeIndexRowCounts() {
        for (PageIndex index: metaObjects.values()) {
            index.writeRowCount();
        }
    }

    private void writeBack() {
        ArrayList<CacheObject> list = cache.getAllChanged();
        Collections.sort(list);
        for (int i = 0, size = list.size(); i < size; i++) {
            writeBack(list.get(i));
        }
    }

    /**
     * Flush all pending changes to disk, and switch the new transaction log.
     */
    public synchronized void checkpoint() {
        trace.debug("checkpoint");
        if (log == null || readMode || database.isReadOnly() || backupLevel > 0) {
            // the file was never fully opened, or is read-only,
            // or checkpoint is currently disabled
            return;
        }
        database.checkPowerOff();
        writeIndexRowCounts();

        log.checkpoint();
        writeBack();

        int firstUncommittedSection = getFirstUncommittedSection();

        log.removeUntil(firstUncommittedSection);

        // write back the free list
        writeBack();

        // ensure the free list is backed up again
        log.checkpoint();

        if (trace.isDebugEnabled()) {
            trace.debug("writeFree");
        }
        byte[] test = new byte[16];
        byte[] empty = new byte[pageSize];
        for (int i = PAGE_ID_FREE_LIST_ROOT; i < pageCount; i++) {
            if (isUsed(i)) {
                freed.clear(i);
            } else if (!freed.get(i)) {
                if (trace.isDebugEnabled()) {
                    trace.debug("free " + i);
                }
                file.seek((long) i << pageSizeShift);
                file.readFully(test, 0, 16);
                if (test[0] != 0) {
                    file.seek((long) i << pageSizeShift);
                    file.write(empty, 0, pageSize);
                    writeCount++;
                }
                freed.set(i);
            }
        }
    }

    /**
     * Shrink the file so there are no empty pages at the end.
     *
     * @param compactMode 0 if no compacting should happen, otherwise
     * TransactionCommand.SHUTDOWN_COMPACT or TransactionCommand.SHUTDOWN_DEFRAG
     */
    public synchronized void compact(int compactMode) {
        if (!database.getSettings().pageStoreTrim) {
            return;
        }
        if (SysProperties.MODIFY_ON_WRITE && readMode && compactMode == 0) {
            return;
        }
        openForWriting();
        // find the last used page
        int lastUsed = -1;
        for (int i = getFreeListId(pageCount); i >= 0; i--) {
            lastUsed = getFreeList(i).getLastUsed();
            if (lastUsed != -1) {
                break;
            }
        }
        // open a new log at the very end
        // (to be truncated later)
        writeBack();
        log.free();
        recoveryRunning = true;
        try {
            logFirstTrunkPage = lastUsed + 1;
            allocatePage(logFirstTrunkPage);
            log.openForWriting(logFirstTrunkPage, true);
            // ensure the free list is backed up again
            log.checkpoint();
        } finally {
            recoveryRunning = false;
        }
        long start = System.currentTimeMillis();
        boolean isCompactFully = compactMode == CommandInterface.SHUTDOWN_COMPACT;
        boolean isDefrag = compactMode == CommandInterface.SHUTDOWN_DEFRAG;

        if (database.getSettings().defragAlways) {
            isCompactFully = isDefrag = true;
        }

        int maxCompactTime = database.getSettings().maxCompactTime;
        int maxMove = database.getSettings().maxCompactCount;

        if (isCompactFully || isDefrag) {
            maxCompactTime = Integer.MAX_VALUE;
            maxMove = Integer.MAX_VALUE;
        }
        int blockSize = isCompactFully ? COMPACT_BLOCK_SIZE : 1;
        int firstFree = MIN_PAGE_COUNT;
        for (int x = lastUsed, j = 0; x > MIN_PAGE_COUNT && j < maxMove; x -= blockSize) {
            for (int full = x - blockSize + 1; full <= x; full++) {
                if (full > MIN_PAGE_COUNT && isUsed(full)) {
                    synchronized (this) {
                        firstFree = getFirstFree(firstFree);
                        if (firstFree == -1 || firstFree >= full) {
                            j = maxMove;
                            break;
                        }
                        if (compact(full, firstFree)) {
                            j++;
                            long now = System.currentTimeMillis();
                            if (now > start + maxCompactTime) {
                                j = maxMove;
                                break;
                            }
                        }
                    }
                }
            }
        }
        if (isDefrag) {
            log.checkpoint();
            writeBack();
            cache.clear();
            ArrayList<Table> tables = database.getAllTablesAndViews(false);
            recordedPagesList = New.arrayList();
            recordedPagesIndex = new IntIntHashMap();
            recordPageReads = true;
            Session s = database.getSystemSession();
            for (Table table : tables) {
                if (!table.isTemporary() && Table.TABLE.equals(table.getTableType())) {
                    Index scanIndex = table.getScanIndex(s);
                    Cursor cursor = scanIndex.find(s, null, null);
                    while (cursor.next()) {
                        cursor.get();
                    }
                    for (Index index : table.getIndexes()) {
                        if (index != scanIndex && index.canScan()) {
                            cursor = index.find(s, null, null);
                            while (cursor.next()) {
                                // the data is already read
                            }
                        }
                    }
                }
            }
            recordPageReads = false;
            int target = MIN_PAGE_COUNT - 1;
            int temp = 0;
            for (int i = 0, size = recordedPagesList.size(); i < size; i++) {
                log.checkpoint();
                writeBack();
                int source = recordedPagesList.get(i);
                Page pageSource = getPage(source);
                if (!pageSource.canMove()) {
                    continue;
                }
                while (true) {
                    Page pageTarget = getPage(++target);
                    if (pageTarget == null || pageTarget.canMove()) {
                        break;
                    }
                }
                if (target == source) {
                    continue;
                }
                temp = getFirstFree(temp);
                if (temp == -1) {
                    DbException.throwInternalError("no free page for defrag");
                }
                cache.clear();
                swap(source, target, temp);
                int index = recordedPagesIndex.get(target);
                if (index != IntIntHashMap.NOT_FOUND) {
                    recordedPagesList.set(index, source);
                    recordedPagesIndex.put(source, index);
                }
                recordedPagesList.set(i, target);
                recordedPagesIndex.put(target, i);
            }
            recordedPagesList = null;
            recordedPagesIndex = null;
        }
        // TODO can most likely be simplified
        checkpoint();
        log.checkpoint();
        writeIndexRowCounts();
        log.checkpoint();
        writeBack();
        commit(systemSession);
        writeBack();
        log.checkpoint();

        log.free();
        // truncate the log
        recoveryRunning = true;
        try {
            setLogFirstPage(++logKey, 0, 0);
        } finally {
            recoveryRunning = false;
        }
        writeBack();
        for (int i = getFreeListId(pageCount); i >= 0; i--) {
            lastUsed = getFreeList(i).getLastUsed();
            if (lastUsed != -1) {
                break;
            }
        }
        int newPageCount = lastUsed + 1;
        if (newPageCount < pageCount) {
            freed.set(newPageCount, pageCount, false);
        }
        pageCount = newPageCount;
        // the easiest way to remove superfluous entries
        freeLists.clear();
        trace.debug("pageCount: " + pageCount);
        long newLength = (long) pageCount << pageSizeShift;
        if (file.length() != newLength) {
            file.setLength(newLength);
            writeCount++;
        }
    }

    private int getFirstFree(int start) {
        int free = -1;
        for (int id = getFreeListId(start); start < pageCount; id++) {
            free = getFreeList(id).getFirstFree(start);
            if (free != -1) {
                break;
            }
        }
        return free;
    }

    private void swap(int a, int b, int free) {
        if (a < MIN_PAGE_COUNT || b < MIN_PAGE_COUNT) {
            System.out.println(isUsed(a) + " " + isUsed(b));
            DbException.throwInternalError("can't swap " + a + " and " + b);
        }
        Page f = (Page) cache.get(free);
        if (f != null) {
            DbException.throwInternalError("not free: " + f);
        }
        if (trace.isDebugEnabled()) {
            trace.debug("swap " + a + " and " + b + " via " + free);
        }
        Page pageA = null;
        if (isUsed(a)) {
            pageA = getPage(a);
            if (pageA != null) {
                pageA.moveTo(systemSession, free);
            }
            free(a);
        }
        if (free != b) {
            if (isUsed(b)) {
                Page pageB = getPage(b);
                if (pageB != null) {
                    pageB.moveTo(systemSession, a);
                }
                free(b);
            }
            if (pageA != null) {
                f = getPage(free);
                if (f != null) {
                    f.moveTo(systemSession, b);
                }
                free(free);
            }
        }
    }

    private boolean compact(int full, int free) {
        if (full < MIN_PAGE_COUNT || free == -1 || free >= full || !isUsed(full)) {
            return false;
        }
        Page f = (Page) cache.get(free);
        if (f != null) {
            DbException.throwInternalError("not free: " + f);
        }
        Page p = getPage(full);
        if (p == null) {
            freePage(full);
        } else if (p instanceof PageStreamData || p instanceof PageStreamTrunk) {
            if (p.getPos() < log.getMinPageId()) {
                // an old transaction log page
                // probably a leftover from a crash
                freePage(full);
            }
        } else {
            if (trace.isDebugEnabled()) {
                trace.debug("move " + p.getPos() + " to " + free);
            }
            try {
                p.moveTo(systemSession, free);
            } finally {
                changeCount++;
            }
        }
        return true;
    }

    /**
     * Read a page from the store.
     *
     * @param pageId the page id
     * @return the page
     */
    public synchronized Page getPage(int pageId) {
        Page p = (Page) cache.get(pageId);
        if (p != null) {
            return p;
        }

        Data data = createData();
        readPage(pageId, data);
        int type = data.readByte();
        if (type == Page.TYPE_EMPTY) {
            return null;
        }
        data.readShortInt();
        data.readInt();
        if (!checksumTest(data.getBytes(), pageId, pageSize)) {
            throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "wrong checksum");
        }
        switch (type & ~Page.FLAG_LAST) {
        case Page.TYPE_FREE_LIST:
            p = PageFreeList.read(this, data, pageId);
            break;
        case Page.TYPE_DATA_LEAF: {
            int indexId = data.readVarInt();
            PageIndex idx = metaObjects.get(indexId);
            if (idx == null) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "index not found " + indexId);
            }
            if (!(idx instanceof PageDataIndex)) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "not a data index " + indexId + " " + idx);
            }
            PageDataIndex index = (PageDataIndex) idx;
            if (statistics != null) {
                statisticsIncrement(index.getTable().getName() + "." + index.getName() + " read");
            }
            p = PageDataLeaf.read(index, data, pageId);
            break;
        }
        case Page.TYPE_DATA_NODE: {
            int indexId = data.readVarInt();
            PageIndex idx = metaObjects.get(indexId);
            if (idx == null) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "index not found " + indexId);
            }
            if (!(idx instanceof PageDataIndex)) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "not a data index " + indexId + " " + idx);
            }
            PageDataIndex index = (PageDataIndex) idx;
            if (statistics != null) {
                statisticsIncrement(index.getTable().getName() + "." + index.getName() + " read");
            }
            p = PageDataNode.read(index, data, pageId);
            break;
        }
        case Page.TYPE_DATA_OVERFLOW: {
            p = PageDataOverflow.read(this, data, pageId);
            if (statistics != null) {
                statisticsIncrement("overflow read");
            }
            break;
        }
        case Page.TYPE_BTREE_LEAF: {
            int indexId = data.readVarInt();
            PageIndex idx = metaObjects.get(indexId);
            if (idx == null) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "index not found " + indexId);
            }
            if (!(idx instanceof PageBtreeIndex)) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "not a btree index " + indexId + " " + idx);
            }
            PageBtreeIndex index = (PageBtreeIndex) idx;
            if (statistics != null) {
                statisticsIncrement(index.getTable().getName() + "." + index.getName() + " read");
            }
            p = PageBtreeLeaf.read(index, data, pageId);
            break;
        }
        case Page.TYPE_BTREE_NODE: {
            int indexId = data.readVarInt();
            PageIndex idx = metaObjects.get(indexId);
            if (idx == null) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "index not found " + indexId);
            }
            if (!(idx instanceof PageBtreeIndex)) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "not a btree index " + indexId + " " + idx);
            }
            PageBtreeIndex index = (PageBtreeIndex) idx;
            if (statistics != null) {
                statisticsIncrement(index.getTable().getName() + "." + index.getName() + " read");
            }
            p = PageBtreeNode.read(index, data, pageId);
            break;
        }
        case Page.TYPE_STREAM_TRUNK:
            p = PageStreamTrunk.read(this, data, pageId);
            break;
        case Page.TYPE_STREAM_DATA:
            p = PageStreamData.read(this, data, pageId);
            break;
        default:
            throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "page=" + pageId + " type=" + type);
        }
        cache.put(p);
        return p;
    }

    private int getFirstUncommittedSection() {
        trace.debug("getFirstUncommittedSection");
        Session[] sessions = database.getSessions(true);
        int firstUncommittedSection = log.getLogSectionId();
        for (Session session : sessions) {
            int firstUncommitted = session.getFirstUncommittedLog();
            if (firstUncommitted != Session.LOG_WRITTEN) {
                if (firstUncommitted < firstUncommittedSection) {
                    firstUncommittedSection = firstUncommitted;
                }
            }
        }
        return firstUncommittedSection;
    }

    private void readStaticHeader() {
        file.seek(FileStore.HEADER_LENGTH);
        Data page = Data.create(database, new byte[PAGE_SIZE_MIN - FileStore.HEADER_LENGTH]);
        file.readFully(page.getBytes(), 0, PAGE_SIZE_MIN - FileStore.HEADER_LENGTH);
        readCount++;
        setPageSize(page.readInt());
        int writeVersion = page.readByte();
        int readVersion = page.readByte();
        if (readVersion > READ_VERSION) {
            throw DbException.get(ErrorCode.FILE_VERSION_ERROR_1, fileName);
        }
        if (writeVersion > WRITE_VERSION) {
            close();
            database.setReadOnly(true);
            accessMode = "r";
            file = database.openFile(fileName, accessMode, true);
        }
    }

    private void readVariableHeader() {
        Data page = createData();
        for (int i = 1;; i++) {
            if (i == 3) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, fileName);
            }
            page.reset();
            readPage(i, page);
            CRC32 crc = new CRC32();
            crc.update(page.getBytes(), 4, pageSize - 4);
            int expected = (int) crc.getValue();
            int got = page.readInt();
            if (expected == got) {
                writeCountBase = page.readLong();
                logKey = page.readInt();
                logFirstTrunkPage = page.readInt();
                logFirstDataPage = page.readInt();
                break;
            }
        }
    }

    /**
     * Set the page size. The size must be a power of two. This method must be
     * called before opening.
     *
     * @param size the page size
     */
    public void setPageSize(int size) {
        if (size < PAGE_SIZE_MIN || size > PAGE_SIZE_MAX) {
            throw DbException.get(ErrorCode.FILE_CORRUPTED_1, fileName + " pageSize: " + size);
        }
        boolean good = false;
        int shift = 0;
        for (int i = 1; i <= size;) {
            if (size == i) {
                good = true;
                break;
            }
            shift++;
            i += i;
        }
        if (!good) {
            throw DbException.get(ErrorCode.FILE_CORRUPTED_1, fileName);
        }
        pageSize = size;
        emptyPage = createData();
        pageSizeShift = shift;
    }

    private void writeStaticHeader() {
        Data page = Data.create(database, new byte[pageSize - FileStore.HEADER_LENGTH]);
        page.writeInt(pageSize);
        page.writeByte((byte) WRITE_VERSION);
        page.writeByte((byte) READ_VERSION);
        file.seek(FileStore.HEADER_LENGTH);
        file.write(page.getBytes(), 0, pageSize - FileStore.HEADER_LENGTH);
        writeCount++;
    }

    /**
     * Set the trunk page and data page id of the log.
     *
     * @param logKey the log key of the trunk page
     * @param trunkPageId the trunk page id
     * @param dataPageId the data page id
     */
    void setLogFirstPage(int logKey, int trunkPageId, int dataPageId) {
        if (trace.isDebugEnabled()) {
            trace.debug("setLogFirstPage key: " + logKey + " trunk: "+ trunkPageId +" data: " + dataPageId);
        }
        this.logKey = logKey;
        this.logFirstTrunkPage = trunkPageId;
        this.logFirstDataPage = dataPageId;
        writeVariableHeader();
    }

    private void writeVariableHeader() {
        trace.debug("writeVariableHeader");
        if (logMode == LOG_MODE_SYNC) {
            file.sync();
        }
        Data page = createData();
        page.writeInt(0);
        page.writeLong(getWriteCountTotal());
        page.writeInt(logKey);
        page.writeInt(logFirstTrunkPage);
        page.writeInt(logFirstDataPage);
        CRC32 crc = new CRC32();
        crc.update(page.getBytes(), 4, pageSize - 4);
        page.setInt(0, (int) crc.getValue());
        file.seek(pageSize);
        file.write(page.getBytes(), 0, pageSize);
        file.seek(pageSize + pageSize);
        file.write(page.getBytes(), 0, pageSize);
        // don't increment the write counter, because it was just written
    }

    /**
     * Close the file without further writing.
     */
    public synchronized void close() {
        trace.debug("close");
        if (log != null) {
            log.close();
            log = null;
        }
        if (file != null) {
            try {
                file.releaseLock();
                file.close();
            } finally {
                file = null;
            }
        }
    }

    public synchronized void flushLog() {
        if (file != null) {
            log.flush();
        }
    }

    /**
     * Flush the transaction log and sync the file.
     */
    public synchronized void sync() {
        if (file != null) {
            log.flush();
            file.sync();
        }
    }

    public Trace getTrace() {
        return trace;
    }

    public synchronized void writeBack(CacheObject obj) {
        Page record = (Page) obj;
        if (trace.isDebugEnabled()) {
            trace.debug("writeBack " + record);
        }
        record.write();
        record.setChanged(false);
    }

    /**
     * Write an undo log entry if required.
     *
     * @param page the page
     * @param old the old data (if known) or null
     */
    public synchronized void logUndo(Page page, Data old) {
        if (logMode == LOG_MODE_OFF) {
            return;
        }
        checkOpen();
        database.checkWritingAllowed();
        if (!recoveryRunning) {
            int pos = page.getPos();
            if (!log.getUndo(pos)) {
                if (old == null) {
                    old = readPage(pos);
                }
                openForWriting();
                log.addUndo(pos, old);
            }
        }
    }

    /**
     * Update a page.
     *
     * @param page the page
     */
    public synchronized void update(Page page) {
        if (trace.isDebugEnabled()) {
            if (!page.isChanged()) {
                trace.debug("updateRecord " + page.toString());
            }
        }
        checkOpen();
        database.checkWritingAllowed();
        page.setChanged(true);
        int pos = page.getPos();
        if (SysProperties.CHECK && !recoveryRunning) {
            // ensure the undo entry is already written
            if (logMode != LOG_MODE_OFF) {
                log.addUndo(pos, null);
            }
        }
        allocatePage(pos);
        cache.update(pos, page);
    }

    private int getFreeListId(int pageId) {
        return (pageId - PAGE_ID_FREE_LIST_ROOT) / freeListPagesPerList;
    }

    private PageFreeList getFreeListForPage(int pageId) {
        return getFreeList(getFreeListId(pageId));
    }

    private PageFreeList getFreeList(int i) {
        PageFreeList list = null;
        if (i < freeLists.size()) {
            list = freeLists.get(i);
            if (list != null) {
                return list;
            }
        }
        int p = PAGE_ID_FREE_LIST_ROOT + i * freeListPagesPerList;
        while (p >= pageCount) {
            increaseFileSize();
        }
        if (p < pageCount) {
            list = (PageFreeList) getPage(p);
        }
        if (list == null) {
            list = PageFreeList.create(this, p);
            cache.put(list);
        }
        while (freeLists.size() <= i) {
            freeLists.add(null);
        }
        freeLists.set(i, list);
        return list;
    }

    private void freePage(int pageId) {
        int index = getFreeListId(pageId);
        PageFreeList list = getFreeList(index);
        firstFreeListIndex = Math.min(index, firstFreeListIndex);
        list.free(pageId);
    }

    /**
     * Set the bit of an already allocated page.
     *
     * @param pageId the page to allocate
     */
    void allocatePage(int pageId) {
        PageFreeList list = getFreeListForPage(pageId);
        list.allocate(pageId);
    }

    private boolean isUsed(int pageId) {
        return getFreeListForPage(pageId).isUsed(pageId);
    }

    /**
     * Allocate a number of pages.
     *
     * @param list the list where to add the allocated pages
     * @param pagesToAllocate the number of pages to allocate
     * @param exclude the exclude list
     * @param after all allocated pages are higher than this page
     */
    void allocatePages(IntArray list, int pagesToAllocate, BitField exclude, int after) {
        list.ensureCapacity(list.size() + pagesToAllocate);
        for (int i = 0; i < pagesToAllocate; i++) {
            int page = allocatePage(exclude, after);
            after = page;
            list.add(page);
        }
    }

    /**
     * Allocate a page.
     *
     * @return the page id
     */
    public synchronized int allocatePage() {
        openForWriting();
        int pos = allocatePage(null, 0);
        if (!recoveryRunning) {
            if (logMode != LOG_MODE_OFF) {
                log.addUndo(pos, emptyPage);
            }
        }
        return pos;
    }

    private int allocatePage(BitField exclude, int first) {
        int page;
        for (int i = firstFreeListIndex;; i++) {
            PageFreeList list = getFreeList(i);
            page = list.allocate(exclude, first);
            if (page >= 0) {
                firstFreeListIndex = i;
                break;
            }
        }
        while (page >= pageCount) {
            increaseFileSize();
        }
        if (trace.isDebugEnabled()) {
            // trace.debug("allocatePage " + pos);
        }
        return page;
    }

    private void increaseFileSize() {
        int increment = INCREMENT_KB * 1024 / pageSize;
        int percent = pageCount * INCREMENT_PERCENT_MIN / 100;
        if (increment < percent) {
            increment = (1 + (percent / increment)) * increment;
        }
        int max = database.getSettings().pageStoreMaxGrowth;
        if (max < increment) {
            increment = max;
        }
        increaseFileSize(increment);
    }

    private void increaseFileSize(int increment) {
        for (int i = pageCount; i < pageCount + increment; i++) {
            freed.set(i);
        }
        pageCount += increment;
        long newLength = (long) pageCount << pageSizeShift;
        file.setLength(newLength);
        writeCount++;
        fileLength = newLength;
    }

    /**
     * Add a page to the free list. The undo log entry must have been written.
     *
     * @param pageId the page id
     */
    public synchronized void free(int pageId) {
        free(pageId, true);
    }

    /**
     * Add a page to the free list.
     *
     * @param pageId the page id
     * @param undo if the undo record must have been written
     */
    void free(int pageId, boolean undo) {
        if (trace.isDebugEnabled()) {
            // trace.debug("free " + pageId + " " + undo);
        }
        cache.remove(pageId);
        if (SysProperties.CHECK && !recoveryRunning && undo) {
            // ensure the undo entry is already written
            if (logMode != LOG_MODE_OFF) {
                log.addUndo(pageId, null);
            }
        }
        freePage(pageId);
        if (recoveryRunning) {
            writePage(pageId, createData());
            if (reservedPages != null && reservedPages.containsKey(pageId)) {
                // re-allocate the page if it is used later on again
                int latestPos = reservedPages.get(pageId);
                if (latestPos > log.getLogPos()) {
                    allocatePage(pageId);
                }
            }
        }
    }

    /**
     * Add a page to the free list. The page is not used, therefore doesn't need
     * to be overwritten.
     *
     * @param pageId the page id
     */
    void freeUnused(int pageId) {
        if (trace.isDebugEnabled()) {
            trace.debug("freeUnused " + pageId);
        }
        cache.remove(pageId);
        freePage(pageId);
        freed.set(pageId);
    }

    /**
     * Create a data object.
     *
     * @return the data page.
     */
    public Data createData() {
        return Data.create(database, new byte[pageSize]);
    }

    /**
     * Read a page.
     *
     * @param pos the page id
     * @return the page
     */
    public synchronized Data readPage(int pos) {
        Data page = createData();
        readPage(pos, page);
        return page;
    }

    /**
     * Read a page.
     *
     * @param pos the page id
     * @param page the page
     */
    void readPage(int pos, Data page) {
        if (recordPageReads) {
            if (pos >= MIN_PAGE_COUNT && recordedPagesIndex.get(pos) == IntIntHashMap.NOT_FOUND) {
                recordedPagesIndex.put(pos, recordedPagesList.size());
                recordedPagesList.add(pos);
            }
        }
        if (pos < 0 || pos >= pageCount) {
            throw DbException.get(ErrorCode.FILE_CORRUPTED_1, pos + " of " + pageCount);
        }
        file.seek((long) pos << pageSizeShift);
        file.readFully(page.getBytes(), 0, pageSize);
        readCount++;
    }

    /**
     * Get the page size.
     *
     * @return the page size
     */
    public int getPageSize() {
        return pageSize;
    }

    /**
     * Get the number of pages (including free pages).
     *
     * @return the page count
     */
    public int getPageCount() {
        return pageCount;
    }

    /**
     * Write a page.
     *
     * @param pageId the page id
     * @param data the data
     */
    public synchronized void writePage(int pageId, Data data) {
        if (pageId <= 0) {
            DbException.throwInternalError("write to page " + pageId);
        }
        byte[] bytes = data.getBytes();
        if (SysProperties.CHECK) {
            boolean shouldBeFreeList = (pageId - PAGE_ID_FREE_LIST_ROOT) % freeListPagesPerList == 0;
            boolean isFreeList = bytes[0] == Page.TYPE_FREE_LIST;
            if (bytes[0] != 0 && shouldBeFreeList != isFreeList) {
                throw DbException.throwInternalError();
            }
        }
        checksumSet(bytes, pageId);
        file.seek((long) pageId << pageSizeShift);
        file.write(bytes, 0, pageSize);
        writeCount++;
    }

    /**
     * Remove a page from the cache.
     *
     * @param pageId the page id
     */
    public synchronized void removeRecord(int pageId) {
        cache.remove(pageId);
    }

    Database getDatabase() {
        return database;
    }

    /**
     * Run recovery.
     *
     * @return whether the transaction log was empty
     */
    private boolean recover() {
        trace.debug("log recover");
        recoveryRunning = true;
        boolean isEmpty = true;
        isEmpty &= log.recover(PageLog.RECOVERY_STAGE_UNDO);
        if (reservedPages != null) {
            for (int r : reservedPages.keySet()) {
                if (trace.isDebugEnabled()) {
                    trace.debug("reserve " + r);
                }
                allocatePage(r);
            }
        }
        isEmpty &= log.recover(PageLog.RECOVERY_STAGE_ALLOCATE);
        openMetaIndex();
        readMetaData();
        isEmpty &= log.recover(PageLog.RECOVERY_STAGE_REDO);
        boolean setReadOnly = false;
        if (!database.isReadOnly()) {
            if (log.getInDoubtTransactions().size() == 0) {
                log.recoverEnd();
                int firstUncommittedSection = getFirstUncommittedSection();
                log.removeUntil(firstUncommittedSection);
            } else {
                setReadOnly = true;
            }
        }
        PageDataIndex systemTable = (PageDataIndex) metaObjects.get(0);
        isNew = systemTable == null;
        for (PageIndex index : metaObjects.values()) {
            if (index.getTable().isTemporary()) {
                // temporary indexes are removed after opening
                if (tempObjects == null) {
                    tempObjects = New.hashMap();
                }
                tempObjects.put(index.getId(), index);
            } else {
                index.close(systemSession);
            }
        }

        allocatePage(PAGE_ID_META_ROOT);
        writeIndexRowCounts();
        recoveryRunning = false;
        reservedPages = null;

        writeBack();
        // clear the cache because it contains pages with closed indexes
        cache.clear();
        freeLists.clear();

        metaObjects.clear();
        metaObjects.put(-1, metaIndex);

        if (setReadOnly) {
            database.setReadOnly(true);
        }
        trace.debug("log recover done");
        return isEmpty;
    }

    /**
     * A record is added to a table, or removed from a table.
     *
     * @param session the session
     * @param tableId the table id
     * @param row the row to add
     * @param add true if the row is added, false if it is removed
     */
    public synchronized void logAddOrRemoveRow(Session session, int tableId, Row row, boolean add) {
        if (logMode != LOG_MODE_OFF) {
            if (!recoveryRunning) {
                log.logAddOrRemoveRow(session, tableId, row, add);
            }
        }
    }

    /**
     * Mark a committed transaction.
     *
     * @param session the session
     */
    public synchronized void commit(Session session) {
        checkOpen();
        openForWriting();
        log.commit(session.getId());
        if (log.getSize() - logSizeBase > maxLogSize) {
            checkpoint();
            logSizeBase = log.getSize();
        }
    }

    /**
     * Prepare a transaction.
     *
     * @param session the session
     * @param transaction the name of the transaction
     */
    public synchronized void prepareCommit(Session session, String transaction) {
        log.prepareCommit(session, transaction);
    }

    /**
     * Check whether this is a new database.
     *
     * @return true if it is
     */
    public boolean isNew() {
        return isNew;
    }

    /**
     * Reserve the page if this is a index root page entry.
     *
     * @param logPos the redo log position
     * @param tableId the table id
     * @param row the row
     */
    void allocateIfIndexRoot(int logPos, int tableId, Row row) {
        if (tableId == META_TABLE_ID) {
            int rootPageId = row.getValue(3).getInt();
            if (reservedPages == null) {
                reservedPages = New.hashMap();
            }
            reservedPages.put(rootPageId, logPos);
        }
    }

    /**
     * Redo a delete in a table.
     *
     * @param logPos the redo log position
     * @param tableId the object id of the table
     * @param key the key of the row to delete
     */
    void redoDelete(int logPos, int tableId, long key) {
        Index index = metaObjects.get(tableId);
        PageDataIndex scan = (PageDataIndex) index;
        Row row = scan.getRowWithKey(key);
        redo(logPos, tableId, row, false);
    }

    /**
     * Redo a change in a table.
     *
     * @param logPos the redo log position
     * @param tableId the object id of the table
     * @param row the row
     * @param add true if the record is added, false if deleted
     */
    void redo(int logPos, int tableId, Row row, boolean add) {
        if (tableId == META_TABLE_ID) {
            if (add) {
                addMeta(row, systemSession, true);
            } else {
                removeMeta(row);
            }
        }
        Index index = metaObjects.get(tableId);
        if (index == null) {
            throw DbException.throwInternalError("Table not found: " + tableId + " " + row + " " + add);
        }
        Table table = index.getTable();
        if (add) {
            table.addRow(systemSession, row);
        } else {
            table.removeRow(systemSession, row);
        }
    }

    /**
     * Redo a truncate.
     *
     * @param tableId the object id of the table
     */
    void redoTruncate(int tableId) {
        Index index = metaObjects.get(tableId);
        Table table = index.getTable();
        table.truncate(systemSession);
    }

    private void openMetaIndex() {
        CreateTableData data = new CreateTableData();
        ArrayList<Column> cols = data.columns;
        cols.add(new Column("ID", Value.INT));
        cols.add(new Column("TYPE", Value.INT));
        cols.add(new Column("PARENT", Value.INT));
        cols.add(new Column("HEAD", Value.INT));
        cols.add(new Column("OPTIONS", Value.STRING));
        cols.add(new Column("COLUMNS", Value.STRING));
        metaSchema = new Schema(database, 0, "", null, true);
        data.schema = metaSchema;
        data.tableName = "PAGE_INDEX";
        data.id = META_TABLE_ID;
        data.temporary = false;
        data.persistData = true;
        data.persistIndexes = true;
        data.create = false;
        data.session = systemSession;
        metaTable = new RegularTable(data);
        metaIndex = (PageDataIndex) metaTable.getScanIndex(
                systemSession);
        metaObjects.clear();
        metaObjects.put(-1, metaIndex);
    }

    private void readMetaData() {
        Cursor cursor = metaIndex.find(systemSession, null, null);
        // first, create all tables
        while (cursor.next()) {
            Row row = cursor.get();
            int type = row.getValue(1).getInt();
            if (type == META_TYPE_DATA_INDEX) {
                addMeta(row, systemSession, false);
            }
        }
        // now create all secondary indexes
        // otherwise the table might not be created yet
        cursor = metaIndex.find(systemSession, null, null);
        while (cursor.next()) {
            Row row = cursor.get();
            int type = row.getValue(1).getInt();
            if (type != META_TYPE_DATA_INDEX) {
                addMeta(row, systemSession, false);
            }
        }
    }

    private void removeMeta(Row row) {
        int id = row.getValue(0).getInt();
        PageIndex index = metaObjects.get(id);
        index.getTable().removeIndex(index);
        if (index instanceof PageBtreeIndex || index instanceof PageDelegateIndex) {
            if (index.isTemporary()) {
                systemSession.removeLocalTempTableIndex(index);
            } else {
                index.getSchema().remove(index);
            }
        }
        index.remove(systemSession);
        metaObjects.remove(id);
    }

    private void addMeta(Row row, Session session, boolean redo) {
        int id = row.getValue(0).getInt();
        int type = row.getValue(1).getInt();
        int parent = row.getValue(2).getInt();
        int rootPageId = row.getValue(3).getInt();
        String options = row.getValue(4).getString();
        String columnList = row.getValue(5).getString();
        String[] columns = StringUtils.arraySplit(columnList, ',', false);
        String[] ops = StringUtils.arraySplit(options, ',', false);
        Index meta;
        if (trace.isDebugEnabled()) {
            trace.debug("addMeta id="+ id +" type=" + type +
                    " root=" + rootPageId + " parent=" + parent + " columns=" + columnList);
        }
        if (redo && rootPageId != 0) {
            // ensure the page is empty, but not used by regular data
            writePage(rootPageId, createData());
            allocatePage(rootPageId);
        }
        metaRootPageId.put(id, rootPageId);
        if (type == META_TYPE_DATA_INDEX) {
            CreateTableData data = new CreateTableData();
            if (SysProperties.CHECK) {
                if (columns == null) {
                    throw DbException.throwInternalError(row.toString());
                }
            }
            for (int i = 0, len = columns.length; i < len; i++) {
                Column col = new Column("C" + i, Value.INT);
                data.columns.add(col);
            }
            data.schema = metaSchema;
            data.tableName = "T" + id;
            data.id = id;
            data.temporary = ops[2].equals("temp");
            data.persistData = true;
            data.persistIndexes = true;
            data.create = false;
            data.session = session;
            RegularTable table = new RegularTable(data);
            CompareMode mode = CompareMode.getInstance(ops[0], Integer.parseInt(ops[1]));
            table.setCompareMode(mode);
            meta = table.getScanIndex(session);
        } else {
            Index p = metaObjects.get(parent);
            if (p == null) {
                throw DbException.get(ErrorCode.FILE_CORRUPTED_1, "Table not found:" + parent + " for " + row + " meta:" + metaObjects);
            }
            RegularTable table = (RegularTable) p.getTable();
            Column[] tableCols = table.getColumns();
            int len = columns.length;
            IndexColumn[] cols = new IndexColumn[len];
            for (int i = 0; i < len; i++) {
                String c = columns[i];
                IndexColumn ic = new IndexColumn();
                int idx = c.indexOf('/');
                if (idx >= 0) {
                    String s = c.substring(idx + 1);
                    ic.sortType = Integer.parseInt(s);
                    c = c.substring(0, idx);
                }
                Column column = tableCols[Integer.parseInt(c)];
                ic.column = column;
                cols[i] = ic;
            }
            IndexType indexType;
            if (ops[3].equals("d")) {
                indexType = IndexType.createPrimaryKey(true, false);
                Column[] tableColumns = table.getColumns();
                for (IndexColumn indexColumn : cols) {
                    tableColumns[indexColumn.column.getColumnId()].setNullable(false);
                }
            } else {
                indexType = IndexType.createNonUnique(true);
            }
            meta = table.addIndex(session, "I" + id, id, cols, indexType, false, null);
        }
        PageIndex index;
        if (meta instanceof MultiVersionIndex) {
            index = (PageIndex) ((MultiVersionIndex) meta).getBaseIndex();
        } else {
            index = (PageIndex) meta;
        }
        metaObjects.put(id, index);
    }

    /**
     * Add an index to the in-memory index map.
     *
     * @param index the index
     */
    public synchronized void addIndex(PageIndex index) {
        metaObjects.put(index.getId(), index);
    }

    /**
     * Add the meta data of an index.
     *
     * @param index the index to add
     * @param session the session
     */
    public synchronized void addMeta(PageIndex index, Session session) {
        int type = index instanceof PageDataIndex ? META_TYPE_DATA_INDEX : META_TYPE_BTREE_INDEX;
        IndexColumn[] columns = index.getIndexColumns();
        StatementBuilder buff = new StatementBuilder();
        for (IndexColumn col : columns) {
            buff.appendExceptFirst(",");
            int id = col.column.getColumnId();
            buff.append(id);
            int sortType = col.sortType;
            if (sortType != 0) {
                buff.append('/');
                buff.append(sortType);
            }
        }
        String columnList = buff.toString();
        Table table = index.getTable();
        CompareMode mode = table.getCompareMode();
        String options = mode.getName()+ "," + mode.getStrength() + ",";
        if (table.isTemporary()) {
            options += "temp";
        }
        if (SysProperties.CHECK) {
            if (!table.isTemporary()) {
                database.verifyMetaLocked(session);
            }
        }
        options += ",";
        if (index instanceof PageDelegateIndex) {
            options += "d";
        }
        Row row = metaTable.getTemplateRow();
        row.setValue(0, ValueInt.get(index.getId()));
        row.setValue(1, ValueInt.get(type));
        row.setValue(2, ValueInt.get(table.getId()));
        row.setValue(3, ValueInt.get(index.getRootPageId()));
        row.setValue(4, ValueString.get(options));
        row.setValue(5, ValueString.get(columnList));
        row.setKey(index.getId() + 1);
        metaIndex.add(session, row);
    }

    /**
     * Remove the meta data of an index.
     *
     * @param index the index to remove
     * @param session the session
     */
    public synchronized void removeMeta(Index index, Session session) {
        if (SysProperties.CHECK) {
            if (!index.getTable().isTemporary()) {
                database.verifyMetaLocked(session);
            }
        }
        if (!recoveryRunning) {
            removeMetaIndex(index, session);
            metaObjects.remove(index.getId());
        }
    }

    private void removeMetaIndex(Index index, Session session) {
        int key = index.getId() + 1;
        Row row = metaIndex.getRow(session, key);
        if (row.getKey() != key) {
            throw DbException.get(ErrorCode.FILE_CORRUPTED_1,
                    "key: " + key + " index: " + index +
                    " table: " + index.getTable() + " row: " + row);
        }
        metaIndex.remove(session, row);
    }

    /**
     * Set the maximum transaction log size in megabytes.
     *
     * @param maxSize the new maximum log size
     */
    public void setMaxLogSize(long maxSize) {
        this.maxLogSize = maxSize;
    }

    /**
     * Commit or rollback a prepared transaction after opening a database with
     * in-doubt transactions.
     *
     * @param sessionId the session id
     * @param pageId the page where the transaction was prepared
     * @param commit if the transaction should be committed
     */
    public synchronized void setInDoubtTransactionState(int sessionId, int pageId, boolean commit) {
        boolean old = database.isReadOnly();
        try {
            database.setReadOnly(false);
            log.setInDoubtTransactionState(sessionId, pageId, commit);
        } finally {
            database.setReadOnly(old);
        }
    }

    /**
     * Get the list of in-doubt transaction.
     *
     * @return the list
     */
    public ArrayList<InDoubtTransaction> getInDoubtTransactions() {
        return log.getInDoubtTransactions();
    }

    /**
     * Check whether the recovery process is currently running.
     *
     * @return true if it is
     */
    public boolean isRecoveryRunning() {
        return recoveryRunning;
    }

    private void checkOpen() {
        if (file == null) {
            throw DbException.get(ErrorCode.DATABASE_IS_CLOSED);
        }
    }

    /**
     * Get the file write count since the database was created.
     *
     * @return the write count
     */
    public long getWriteCountTotal() {
        return writeCount + writeCountBase;
    }

    /**
     * Get the file write count since the database was opened.
     *
     * @return the write count
     */
    public long getWriteCount() {
        return writeCount;
    }

    /**
     * Get the file read count since the database was opened.
     *
     * @return the read count
     */
    public long getReadCount() {
        return readCount;
    }

    /**
     * A table is truncated.
     *
     * @param session the session
     * @param tableId the table id
     */
    public synchronized void logTruncate(Session session, int tableId) {
        if (!recoveryRunning) {
            openForWriting();
            log.logTruncate(session, tableId);
        }
    }

    /**
     * Get the root page of an index.
     *
     * @param indexId the index id
     * @return the root page
     */
    public int getRootPageId(int indexId) {
        return metaRootPageId.get(indexId);
    }

    public Cache getCache() {
        return cache;
    }

    private void checksumSet(byte[] d, int pageId) {
        int ps = pageSize;
        int type = d[0];
        if (type == Page.TYPE_EMPTY) {
            return;
        }
        int s1 = 255 + (type & 255), s2 = 255 + s1;
        s2 += s1 += d[6] & 255;
        s2 += s1 += d[(ps >> 1) - 1] & 255;
        s2 += s1 += d[ps >> 1] & 255;
        s2 += s1 += d[ps - 2] & 255;
        s2 += s1 += d[ps - 1] & 255;
        d[1] = (byte) (((s1 & 255) + (s1 >> 8)) ^ pageId);
        d[2] = (byte) (((s2 & 255) + (s2 >> 8)) ^ (pageId >> 8));
    }

    /**
     * Check if the stored checksum is correct
     * @param d the data
     * @param pageId the page id
     * @param pageSize the page size
     * @return true if it is correct
     */
    public static boolean checksumTest(byte[] d, int pageId, int pageSize) {
        int ps = pageSize;
        int s1 = 255 + (d[0] & 255), s2 = 255 + s1;
        s2 += s1 += d[6] & 255;
        s2 += s1 += d[(ps >> 1) - 1] & 255;
        s2 += s1 += d[ps >> 1] & 255;
        s2 += s1 += d[ps - 2] & 255;
        s2 += s1 += d[ps - 1] & 255;
        if (d[1] != (byte) (((s1 & 255) + (s1 >> 8)) ^ pageId)
                || d[2] != (byte) (((s2 & 255) + (s2 >> 8)) ^ (pageId >> 8))) {
            return false;
        }
        return true;
    }

    /**
     * Increment the change count. To be done after the operation has finished.
     */
    public void incrementChangeCount() {
        changeCount++;
    }

    /**
     * Get the current change count. The first value is 1
     *
     * @return the change count
     */
    public int getChangeCount() {
        return changeCount;
    }

    public void setLogMode(int logMode) {
        this.logMode = logMode;
    }

    public int getLogMode() {
        return logMode;
    }

    public void setLockFile(boolean lockFile) {
        this.lockFile = lockFile;
    }

    public BitField getObjectIds() {
        BitField f = new BitField();
        Cursor cursor = metaIndex.find(systemSession, null, null);
        while (cursor.next()) {
            Row row = cursor.get();
            int id = row.getValue(0).getInt();
            if (id > 0) {
                f.set(id);
            }
        }
        return f;
    }

    public Session getSystemSession() {
        return systemSession;
    }

    public synchronized void setBackup(boolean start) {
        backupLevel += start ? 1 : -1;
    }

}
TOP

Related Classes of org.h2.store.PageStore

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.