Package org.archive.bdb

Source Code of org.archive.bdb.BdbModule

/*
*  This file is part of the Heritrix web crawler (crawler.archive.org).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/
package org.archive.bdb;

import java.io.Closeable;
import java.io.File;
import java.io.FileFilter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.archive.checkpointing.Checkpoint;
import org.archive.checkpointing.Checkpointable;
import org.archive.spring.ConfigPath;
import org.archive.util.FilesystemLinkMaker;
import org.archive.util.IdentityCacheable;
import org.archive.util.ObjectIdentityBdbManualCache;
import org.archive.util.ObjectIdentityCache;
import org.archive.util.TextUtils;
import org.archive.util.bdbje.EnhancedEnvironment;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.Lifecycle;

import com.sleepycat.bind.EntryBinding;
import com.sleepycat.bind.serial.SerialBinding;
import com.sleepycat.bind.serial.StoredClassCatalog;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.je.CheckpointConfig;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DatabaseNotFoundException;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.util.DbBackup;

/**
* Utility module for managing a shared BerkeleyDB-JE environment
*
* @contributor pjack
* @contributor gojomo
*/
public class BdbModule implements Lifecycle, Checkpointable, Closeable, DisposableBean {
    final private static Logger LOGGER =
        Logger.getLogger(BdbModule.class.getName());

   
    private static class DatabasePlusConfig implements Serializable {
        private static final long serialVersionUID = 1L;
        public transient Database database;
        public BdbConfig config;
    }
   
    /**
     * Configuration object for databases.  Needed because
     * {@link DatabaseConfig} is not serializable.  Also it prevents invalid
     * configurations.  (All databases opened through this module must be
     * deferred-write, because otherwise they can't sync(), and you can't
     * run a checkpoint without doing sync() first.)
     *
     * @author pjack
     *
     */
    public static class BdbConfig implements Serializable {
        private static final long serialVersionUID = 1L;

        protected boolean allowCreate;
        protected boolean sortedDuplicates;
        protected boolean transactional;
        protected boolean deferredWrite = true;

        public BdbConfig() {
        }

        public boolean isAllowCreate() {
            return allowCreate;
        }

        public void setAllowCreate(boolean allowCreate) {
            this.allowCreate = allowCreate;
        }

        public boolean getSortedDuplicates() {
            return sortedDuplicates;
        }

        public void setSortedDuplicates(boolean sortedDuplicates) {
            this.sortedDuplicates = sortedDuplicates;
        }

        public DatabaseConfig toDatabaseConfig() {
            DatabaseConfig result = new DatabaseConfig();
            result.setDeferredWrite(deferredWrite);
            result.setTransactional(transactional);
            result.setAllowCreate(allowCreate);
            result.setSortedDuplicates(sortedDuplicates);
            return result;
        }

        public boolean isTransactional() {
            return transactional;
        }

        public void setTransactional(boolean transactional) {
            this.transactional = transactional;
        }

        public void setDeferredWrite(boolean b) {
            this.deferredWrite = true;
        }
    }
   
    protected ConfigPath dir = new ConfigPath("bdbmodule subdirectory","state");
    public ConfigPath getDir() {
        return dir;
    }
    public void setDir(ConfigPath dir) {
        this.dir = dir;
    }
   
    protected int cachePercent = -1;
    public int getCachePercent() {
        return cachePercent;
    }
    public void setCachePercent(int cachePercent) {
        this.cachePercent = cachePercent;
    }
   
    protected int cacheSize = -1;
    public int getCacheSize() {
        return cacheSize;
    }
    public void setCacheSize(int cacheSize) {
        this.cacheSize = cacheSize;
    }

    protected boolean useSharedCache = true;
    public boolean getUseSharedCache() {
        return useSharedCache;
    }
    public void setUseSharedCache(boolean useSharedCache) {
        this.useSharedCache = useSharedCache;
    }
   
    /**
     * Expected number of concurrent threads; used to tune nLockTables
     * according to JE FAQ
     * http://www.oracle.com/technology/products/berkeley-db/faq/je_faq.html#33
     */
    protected int expectedConcurrency = 64;
    public int getExpectedConcurrency() {
        return expectedConcurrency;
    }
    public void setExpectedConcurrency(int expectedConcurrency) {
        this.expectedConcurrency = expectedConcurrency;
    }
   
    /**
     * Whether to use hard-links to log files to collect/retain
     * the BDB log files needed for a checkpoint. Default is true.
     * May not work on Windows (especially on pre-NTFS filesystems).
     * If false, the BDB 'je.cleaner.expunge' value will be set to
     * 'false', as well, meaning BDB will *not* delete obsolete JDB
     * files, but only rename the '.DEL'. They will have to be
     * manually deleted to free disk space, but .DEL files referenced
     * in any checkpoint's 'jdbfiles.manifest' should be retained to
     * keep the checkpoint valid.
     */
    protected boolean useHardLinkCheckpoints = true;
    public boolean getUseHardLinkCheckpoints() {
        return useHardLinkCheckpoints;
    }
    public void setUseHardLinkCheckpoints(boolean useHardLinkCheckpoints) {
        this.useHardLinkCheckpoints = useHardLinkCheckpoints;
    }
   
    private transient EnhancedEnvironment bdbEnvironment;
       
    private transient StoredClassCatalog classCatalog;
   
    @SuppressWarnings("rawtypes")
    private Map<String,ObjectIdentityCache> oiCaches =
        new ConcurrentHashMap<String,ObjectIdentityCache>();

    private Map<String,DatabasePlusConfig> databases =
        new ConcurrentHashMap<String,DatabasePlusConfig>();

    protected boolean isRunning = false;

    public BdbModule() {
    }
   
    public synchronized void start() {
        if (isRunning()) {
            return;
        }
       
        isRunning = true;
       
        try {
            boolean isRecovery = false;
            if(recoveryCheckpoint!=null) {
                isRecovery = true;
                doRecover();
            }
  
            setup(getDir().getFile(), !isRecovery);
        } catch (DatabaseException e) {
            throw new IllegalStateException(e);
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    }
   
    public boolean isRunning() {
        return isRunning;
    }

    public void stop() {
        isRunning = false;
    }
   
    protected void setup(File f, boolean create)
    throws DatabaseException, IOException {
        EnvironmentConfig config = new EnvironmentConfig();
        config.setAllowCreate(create);
        config.setLockTimeout(75, TimeUnit.MINUTES); // set to max

        if (getCacheSize() > 0) {
            config.setCacheSize(getCacheSize());
            if (getCachePercent() > 0) {
                LOGGER.warning("cachePercent and cacheSize are both set. Only cacheSize will be used.");
            }
        } else if (getCachePercent() > 0) {
            config.setCachePercent(getCachePercent());
        }

        config.setSharedCache(getUseSharedCache());
       
        // we take the advice literally from...
        // http://www.oracle.com/technology/products/berkeley-db/faq/je_faq.html#33
        long nLockTables = getExpectedConcurrency()-1;
        while(!BigInteger.valueOf(nLockTables).isProbablePrime(Integer.MAX_VALUE)) {
            nLockTables--;
        }
        config.setConfigParam("je.lock.nLockTables", Long.toString(nLockTables));
       
        // triple this value to 6K because stats show many faults
        config.setConfigParam("je.log.faultReadSize", "6144");

        if(!getUseHardLinkCheckpoints()) {
            // to support checkpoints by textual manifest only,
            // prevent BDB's cleaner from deleting log files
            config.setConfigParam("je.cleaner.expunge", "false");
        } // else leave whatever other setting was already in place

        org.archive.util.FileUtils.ensureWriteableDirectory(f);
        this.bdbEnvironment = new EnhancedEnvironment(f, config);
        this.classCatalog = this.bdbEnvironment.getClassCatalog();
        if(!create) {
            // freeze last log file -- so that originating checkpoint isn't fouled
            DbBackup dbBackup = new DbBackup(bdbEnvironment);
            dbBackup.startBackup();
            dbBackup.endBackup();
        }
    }

    public void closeDatabase(Database db) {
        try {
            closeDatabase(db.getDatabaseName());
        } catch (DatabaseException e) {
            LOGGER.log(Level.SEVERE, "Error getting db name", e);           
        }
    }
   
    public void closeDatabase(String name) {
        DatabasePlusConfig dpc = databases.remove(name);
        if (dpc == null) {
            LOGGER.warning("No such database: " + name);
            return;
        }
        Database db = dpc.database;
        try {
            db.sync();
            db.close();
        } catch (DatabaseException e) {
            LOGGER.log(Level.WARNING, "Error closing db " + name, e);
        }
    }
   
    /**
     * Open a Database inside this BdbModule's environment, and
     * remember it for automatic close-at-module-stop.
     *
     * @param name
     * @param config
     * @param usePriorData
     * @return
     * @throws DatabaseException
     */
    public Database openDatabase(String name, BdbConfig config, boolean usePriorData)
    throws DatabaseException {
        if (bdbEnvironment == null) {
            // proper initialization hasn't occurred
            throw new IllegalStateException("BdbModule not started");
        }
        if (databases.containsKey(name)) {
            DatabasePlusConfig dpc = databases.get(name);
            if(dpc.config == config) {
                // object-identical configs: OK to share DB
                return dpc.database;
            }
            // unshared config object: might be name collision; error
            throw new IllegalStateException("Database already exists: " +name);
        }
       
        DatabasePlusConfig dpc = new DatabasePlusConfig();
        if (!usePriorData) {
            try {
                bdbEnvironment.truncateDatabase(null, name, false);
            } catch (DatabaseNotFoundException e) {
                // Ignored
            }
        }
        dpc.database = bdbEnvironment.openDatabase(null, name, config.toDatabaseConfig());
        dpc.config = config;
        databases.put(name, dpc);
        return dpc.database;
    }

    public StoredClassCatalog getClassCatalog() {
        return classCatalog;
    }

    public <K extends Serializable> StoredQueue<K> getStoredQueue(String dbname, Class<K> clazz, boolean usePriorData) {
        try {
            Database queueDb;
            queueDb = openDatabase(dbname,
                    StoredQueue.databaseConfig(), usePriorData);
            return new StoredQueue<K>(queueDb, clazz, getClassCatalog());
        } catch (DatabaseException e) {
            throw new RuntimeException(e);
        }
       
    }


    /**
     * Get an ObjectIdentityBdbCache, backed by a BDB Database of the
     * given name, with the given value class type. If 'recycle' is true,
     * reuse values already in the database; otherwise start with an
     * empty cache.
     * 
     * @param <V>
     * @param dbName
     * @param recycle
     * @param valueClass
     * @return
     * @throws DatabaseException
     */
    public <V extends IdentityCacheable> ObjectIdentityBdbManualCache<V> getOIBCCache(String dbName, boolean recycle,
            Class<? extends V> valueClass)
    throws DatabaseException {
        if (!recycle) {
            try {
                bdbEnvironment.truncateDatabase(null, dbName, false);
            } catch (DatabaseNotFoundException e) {
                // ignored
            }
        }
        ObjectIdentityBdbManualCache<V> oic = new ObjectIdentityBdbManualCache<V>();
        oic.initialize(bdbEnvironment, dbName, valueClass, classCatalog);
        oiCaches.put(dbName, oic);
        return oic;
    }
 
    public <V extends IdentityCacheable> ObjectIdentityCache<V> getObjectCache(String dbName, boolean recycle,
            Class<V> valueClass)
    throws DatabaseException {
        return getObjectCache(dbName, recycle, valueClass, valueClass);
    }
   
    /**
     * Get an ObjectIdentityCache, backed by a BDB Database of the given
     * name, with objects of the given valueClass type. If 'recycle' is
     * true, reuse values already in the database; otherwise start with
     * an empty cache.
     *
     * @param <V>
     * @param dbName
     * @param recycle
     * @param valueClass
     * @return
     * @throws DatabaseException
     */
    public <V extends IdentityCacheable> ObjectIdentityCache<V> getObjectCache(String dbName, boolean recycle,
            Class<V> declaredClass, Class<? extends V> valueClass)
    throws DatabaseException {
        @SuppressWarnings("unchecked")
        ObjectIdentityCache<V> oic = oiCaches.get(dbName);
        if(oic!=null) {
            return oic;
        }
        oic =  getOIBCCache(dbName, recycle, valueClass);
        return oic;
    }
   
    private void writeObject(ObjectOutputStream out) throws IOException {
        out.defaultWriteObject();
    }
   
    public void startCheckpoint(Checkpoint checkpointInProgress) {}

    public void doCheckpoint(final Checkpoint checkpointInProgress) throws IOException {
        // First sync objectCaches
        for (@SuppressWarnings("rawtypes") ObjectIdentityCache oic : oiCaches.values()) {
            oic.sync();
        }

        try {
            // sync all databases
            for (DatabasePlusConfig dbc: databases.values()) {
                dbc.database.sync();
            }
       
            // Do a force checkpoint.  Thats what a sync does (i.e. doSync).
            CheckpointConfig chkptConfig = new CheckpointConfig();
            chkptConfig.setForce(true);
           
            // Mark Hayes of sleepycat says:
            // "The default for this property is false, which gives the current
            // behavior (allow deltas).  If this property is true, deltas are
            // prohibited -- full versions of internal nodes are always logged
            // during the checkpoint. When a full version of an internal node
            // is logged during a checkpoint, recovery does not need to process
            // it at all.  It is only fetched if needed by the application,
            // during normal DB operations after recovery. When a delta of an
            // internal node is logged during a checkpoint, recovery must
            // process it by fetching the full version of the node from earlier
            // in the log, and then applying the delta to it.  This can be
            // pretty slow, since it is potentially a large amount of
            // random I/O."
            // chkptConfig.setMinimizeRecoveryTime(true);
            bdbEnvironment.checkpoint(chkptConfig);
            LOGGER.fine("Finished bdb checkpoint.");
       
            DbBackup dbBackup = new DbBackup(bdbEnvironment);
            try {
                dbBackup.startBackup();
               
                File envCpDir = new File(dir.getFile(),checkpointInProgress.getName());
                org.archive.util.FileUtils.ensureWriteableDirectory(envCpDir);
                File logfilesList = new File(envCpDir,"jdbfiles.manifest");
                String[] filedata = dbBackup.getLogFilesInBackupSet();
                for (int i=0; i<filedata.length;i++) {
                    File f = new File(dir.getFile(),filedata[i]);
                    filedata[i] += ","+f.length();
                    if(getUseHardLinkCheckpoints()) {
                        File hardLink = new File(envCpDir,filedata[i]);
                        if (!FilesystemLinkMaker.makeHardLink(f.getAbsolutePath(), hardLink.getAbsolutePath())) {
                            LOGGER.log(Level.SEVERE, "unable to create required checkpoint link "+hardLink);
                        }
                    }
                }
                FileUtils.writeLines(logfilesList,Arrays.asList(filedata));
                LOGGER.fine("Finished processing bdb log files.");
            } finally {
                dbBackup.endBackup();
            }
        } catch (DatabaseException e) {
            throw new IOException(e);
        }
       
        if (checkpointInProgress.getForgetAllButLatest()) {
            File[] oldEnvCpDirs = dir.getFile().listFiles(new FilenameFilter() {
                @Override
                public boolean accept(File dir, String name) {
                    return !name.equals(checkpointInProgress.getName())
                            && TextUtils.matches("cp\\d{5}-\\d{14}", name);
                }
            });
            for (File d: oldEnvCpDirs) {
                FileUtils.deleteDirectory(d);
            }
        }
    }
   
    @SuppressWarnings("unchecked")
    protected void doRecover() throws IOException {
        File cpDir = new File(dir.getFile(),recoveryCheckpoint.getName());
        File logfilesList = new File(cpDir,"jdbfiles.manifest");
        List<String> filesAndLengths = FileUtils.readLines(logfilesList);
        HashMap<String,Long> retainLogfiles = new HashMap<String,Long>();
        for(String line : filesAndLengths) {
            String[] fileAndLength = line.split(",");
            long expectedLength = Long.valueOf(fileAndLength[1]);
            retainLogfiles.put(fileAndLength[0],expectedLength);
           
            // check for files in checkpoint directory; relink to environment as necessary
            File cpFile = new File(cpDir, line);
            File destFile = new File(dir.getFile(), fileAndLength[0]);
            if(cpFile.exists()) {
                if(cpFile.length()!=expectedLength) {
                    LOGGER.warning(cpFile.getName()+" expected "+expectedLength+" actual "+cpFile.length());
                    // TODO: is truncation necessary?
                }
                if(destFile.exists()) {
                    if(!destFile.delete()) {
                        LOGGER.log(Level.SEVERE, "unable to delete obstructing file "+destFile)
                    }
                }
               
                boolean status = FilesystemLinkMaker.makeHardLink(cpFile.getAbsolutePath(), destFile.getAbsolutePath());
                if (!status) {
                    LOGGER.log(Level.SEVERE, "unable to create required restore link "+destFile);
                }
            }
           
        }
       
        IOFileFilter filter = FileFilterUtils.orFileFilter(
                FileFilterUtils.suffixFileFilter(".jdb"),
                FileFilterUtils.suffixFileFilter(".del"));
        filter = FileFilterUtils.makeFileOnly(filter);
       
        // reverify environment directory is as it was at checkpoint time,
        // deleting any extra files
        for(File f : dir.getFile().listFiles((FileFilter)filter)) {
            if(retainLogfiles.containsKey(f.getName())) {
                // named file still exists under original name
                long expectedLength = retainLogfiles.get(f.getName());
                if(f.length()!=expectedLength) {
                    LOGGER.warning(f.getName()+" expected "+expectedLength+" actual "+f.length());
                    // TODO: truncate? this unexpected length mismatch
                    // probably only happens if there was already a recovery
                    // where the affected file was the last of the set, in
                    // which case BDB appends a small amount of (harmless?) data
                    // to the previously-undersized file
                }
                retainLogfiles.remove(f.getName());
                continue;
            }
            // file as now-named not in restore set; check if un-".DEL" renaming needed
            String undelName = f.getName().replace(".del", ".jdb");
            if(retainLogfiles.containsKey(undelName)) {
                // file if renamed matches desired file name
                long expectedLength = retainLogfiles.get(undelName);
                if(f.length()!=expectedLength) {
                    LOGGER.warning(f.getName()+" expected "+expectedLength+" actual "+f.length());
                    // TODO: truncate to expected size?
                }
                if(!f.renameTo(new File(f.getParentFile(),undelName))) {
                    throw new IOException("Unable to rename " + f + " to " +
                            undelName);
                }
                retainLogfiles.remove(undelName);
            }
            // file not needed; delete/move-aside
            if(!f.delete()) {
                LOGGER.warning("unable to delete "+f);
                org.archive.util.FileUtils.moveAsideIfExists(f);
            }
            // TODO: log/warn of ruined later checkpoints?
        }
        if(retainLogfiles.size()>0) {
            // some needed files weren't present
            LOGGER.severe("Checkpoint corrupt, needed log files missing: "+retainLogfiles);
        }
       
    }

    public void finishCheckpoint(Checkpoint checkpointInProgress) {}
    
    protected Checkpoint recoveryCheckpoint;
    @Autowired(required=false)
    public void setRecoveryCheckpoint(Checkpoint checkpoint) {
        this.recoveryCheckpoint = checkpoint;
    }

    public void close() {
        if (classCatalog == null) {
            return;
        }
       
        for(@SuppressWarnings("rawtypes") ObjectIdentityCache cache : oiCaches.values()) {
            try {
                cache.close();
            } catch (Exception e) {
                LOGGER.log(Level.SEVERE, "Error closing oiCache " + cache, e);
            }
        }

        List<String> dbNames = new ArrayList<String>(databases.keySet());
        for (String dbName: dbNames) try {
            closeDatabase(dbName);
        } catch (Exception e) {
            LOGGER.log(Level.SEVERE, "Error closing db " + dbName, e);
        }

        try {
            this.bdbEnvironment.sync();
            this.bdbEnvironment.close();
        } catch (Exception e) {
            LOGGER.log(Level.SEVERE, "Error closing environment.", e);
        }
    }
   
    public Database getDatabase(String name) {
        DatabasePlusConfig dpc = databases.get(name);
        if (dpc == null) {
            return null;
        }
        return dpc.database;
    }

    /** uniqueness serial number for temp map databases */
    protected long sn = 0;
       
    /**
     * Creates a database-backed TempStoredSortedMap for transient
     * reporting requirements. Calling the returned map's destroy()
     * method when done discards the associated Database.
     *
     * @param <K>
     * @param <V>
     * @param dbName Database name to use; if null a name will be synthesized
     * @param keyClass Class of keys; should be a Java primitive type
     * @param valueClass Class of values; may be any serializable type
     * @param allowDuplicates whether duplicate keys allowed
     * @return
     */
    public <K,V> DisposableStoredSortedMap<K, V> getStoredMap(String dbName, Class<K> keyClass, Class<V> valueClass, boolean allowDuplicates, boolean usePriorData) {
        BdbConfig config = new BdbConfig();
        config.setSortedDuplicates(allowDuplicates);
        config.setAllowCreate(!usePriorData);
        Database mapDb;
        if(dbName==null) {
            dbName = "tempMap-"+System.identityHashCode(this)+"-"+sn;
            sn++;
        }
        final String openName = dbName;
        try {
            mapDb = openDatabase(openName,config,usePriorData);
        } catch (DatabaseException e) {
            throw new RuntimeException(e);
        }
        EntryBinding<V> valueBinding = TupleBinding.getPrimitiveBinding(valueClass);
        if(valueBinding == null) {
            valueBinding = new SerialBinding<V>(classCatalog, valueClass);
        }
        DisposableStoredSortedMap<K,V> storedMap = new DisposableStoredSortedMap<K, V>(
                mapDb,
                TupleBinding.getPrimitiveBinding(keyClass),
                valueBinding,
                true) {
                    @Override
                    public void dispose() {
                        super.dispose();
                        DatabasePlusConfig dpc = BdbModule.this.databases.remove(openName);
                        if (dpc == null) {
                            BdbModule.LOGGER.log(Level.WARNING,"No such database: " + openName);
                        }
                    }
        };
        return storedMap;
    }
   
    @Override
    public void destroy() throws Exception {
        close();
    }

}
TOP

Related Classes of org.archive.bdb.BdbModule

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.