/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.util;
import java.io.Closeable;
import java.io.File;
import java.io.Serializable;
import java.lang.ref.PhantomReference;
import java.lang.ref.Reference;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
import java.lang.reflect.Field;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.archive.bdb.KryoBinding;
import com.sleepycat.bind.EntryBinding;
import com.sleepycat.bind.serial.StoredClassCatalog;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.collections.StoredSortedMap;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Environment;
/**
* A BDB JE backed object cache.
*
* Soft references to previously-instantiated objects are held so that
* unless/until an object is garbage collected, subsequent get()s will
* return the exact same object. (If all outside references are lost,
* when the soft reference is broken, the object state -- still
* accessible to this class via reflective access to a phantom
* referent --is flushed to disk. The next get() will reconsitute a new
* object, from the disk state.)
* <p/>
* The backing disk is only guaranteed to be up-to-date after a flush
* of all in-memory values to disk, as can be forced by sync().
* <p/>
* To ensure that changes/mutations to values in this map are coherent and
* consistent at the application level, it is assumed that the application
* level only mutates values that are in this map and does not retain references
* to values longer than necessary. This allows mappings to be persisted
* during GC without explicit transactions or write operations.
* <p/>
* Based on the earlier CachedBdbMap.
* <p/>
*
* @author John Erik Halse
* @author stack
* @author gojomo
* @author paul baclace (conversion to ConcurrentMap)
*
*/
public class ObjectIdentityBdbCache<V extends IdentityCacheable>
implements ObjectIdentityCache<V>, Closeable, Serializable {
private static final long serialVersionUID = 1L;
private static final Logger logger =
Logger.getLogger(ObjectIdentityBdbCache.class.getName());
/** The BDB JE database used for this instance. */
protected transient Database db;
/** in-memory map of new/recent/still-referenced-elsewhere instances */
protected transient ConcurrentHashMap<String,SoftEntry<V>> memMap;
protected transient ReferenceQueue<V> refQueue;
/** The Collection view of the BDB JE database used for this instance. */
protected transient StoredSortedMap<String, V> diskMap;
protected AtomicLong count;
//
// USAGE STATS
//
/** Count of times we got an object from in-memory cache */
private AtomicLong cacheHit = new AtomicLong(0);
/** Count of times the {@link ObjectIdentityBdbCache#get} method was called. */
private AtomicLong countOfGets = new AtomicLong(0);
/** Count of every time disk-based map provided non-null object */
private AtomicLong diskHit = new AtomicLong(0);
/** Count of times Supplier was used for new object */
private AtomicLong supplierUsed = new AtomicLong(0);
/** count of expunge put() to BDB (implies disk) */
private AtomicLong expungeStatsDiskPut = new AtomicLong(0);
/** count of {@link #sync()} use */
transient private AtomicLong useStatsSyncUsed = new AtomicLong(0);
/** Reference to the Reference#referent Field. */
protected static Field referentField;
static {
// We need access to the referent field in the PhantomReference.
// For more on this trick, see
//
// http://www.javaspecialists.co.za/archive/Issue098.html and for
// discussion:
// http://www.theserverside.com/tss?service=direct/0/NewsThread/threadViewer.markNoisy.link&sp=l29865&sp=l146901
try {
referentField = Reference.class.getDeclaredField("referent");
referentField.setAccessible(true);
} catch (SecurityException e) {
throw new RuntimeException(e);
} catch (NoSuchFieldException e) {
throw new RuntimeException(e);
}
}
/**
* Constructor. You must call
* {@link #initialize(Environment, Class, Class, StoredClassCatalog)}
* to finish construction. Construction is two-stepped to support
* reconnecting a deserialized CachedBdbMap with its backing bdbje
* database.
*
* @param dbName Name of the backing db this instance should use.
*/
public ObjectIdentityBdbCache() {
super();
}
/**
* Call this method when you have an instance when you used the
* default constructor or when you have a deserialized instance that you
* want to reconnect with an extant bdbje environment. Do not
* call this method if you used the
* {@link #CachedBdbMap(File, String, Class, Class)} constructor.
* @param env
* @param keyClass
* @param valueClass
* @param classCatalog
* @throws DatabaseException
*/
public void initialize(final Environment env, String dbName,
final Class valueClass, final StoredClassCatalog classCatalog)
throws DatabaseException {
// TODO: initial capacity should be related to number of seeds, max depth, max docs
this.memMap = new ConcurrentHashMap<String,SoftEntry<V>>(
8192, // initial capacity
0.9f, // acceptable load factor
64 // est. number of concurrent threads
);
this.refQueue = new ReferenceQueue<V>();
canary = new SoftReference<LowMemoryCanary>(new LowMemoryCanary());
this.db = openDatabase(env, dbName);
this.diskMap = createDiskMap(this.db, classCatalog, valueClass);
this.count = new AtomicLong(diskMap.size());
}
@SuppressWarnings("unchecked")
protected StoredSortedMap<String, V> createDiskMap(Database database,
StoredClassCatalog classCatalog, Class valueClass) {
EntryBinding keyBinding = TupleBinding.getPrimitiveBinding(String.class);
EntryBinding valueBinding = TupleBinding.getPrimitiveBinding(valueClass);
if(valueBinding == null) {
valueBinding =
new KryoBinding<V>(valueClass);
// new SerialBinding(classCatalog, valueClass);
// new BenchmarkingBinding<V>(new EntryBinding[] {
// new KryoBinding<V>(valueClass),
// new RecyclingSerialBinding<V>(classCatalog, valueClass),
// }, valueClass);
}
return new StoredSortedMap<String,V>(database, keyBinding, valueBinding, true);
}
protected Database openDatabase(final Environment environment,
final String dbName) throws DatabaseException {
DatabaseConfig dbConfig = new DatabaseConfig();
dbConfig.setTransactional(false);
dbConfig.setAllowCreate(true);
dbConfig.setDeferredWrite(true);
return environment.openDatabase(null, dbName, dbConfig);
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#close()
*/
public synchronized void close() {
// Close out my bdb db.
if (this.db != null) {
try {
sync();
this.db.sync();
this.db.close();
} catch (DatabaseException e) {
logger.log(Level.WARNING,"problem closing ObjectIdentityBdbCache",e);
} finally {
this.db = null;
}
}
}
protected void finalize() throws Throwable {
close();
super.finalize();
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#get(java.lang.String)
*/
public V get(final String key) {
return getOrUse(key,null);
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#get(java.lang.String, org.archive.util.ObjectIdentityBdbCache)
*/
public V getOrUse(final String key, Supplier<V> supplierOrNull) {
countOfGets.incrementAndGet();
if (countOfGets.get() % 10000 == 0) {
logCacheSummary();
}
// check mem cache
SoftEntry<V> entry = memMap.get(key);
if(entry != null) {
V val = entry.get();
if(val != null) {
// the concurrent garden path: in mem, valid
cacheHit.incrementAndGet();
val.setIdentityCache(this);
return val;
}
}
// everything in other difficult cases happens inside this block
synchronized(this) {
// recheck mem cache -- if another thread beat us into sync
// block and already filled the key
entry = memMap.get(key);
if(entry != null) {
V val = entry.get();
if(val != null) {
cacheHit.incrementAndGet();
val.setIdentityCache(this);
return val;
}
}
// persist to disk all ref-enqueued stale (soft-ref-cleared) entries now
pageOutStaleEntries();
// and catch if this exact entry not yet ref-enqueued
if(memMap.get(key)!=null) {
pageOutStaleEntry(entry);
if(memMap.get(key)!=null) {
logger.log(Level.SEVERE,"nulled key "+key+" not paged-out", new Exception());
}
}
// check disk
V valDisk = (V) diskMap.get(key);
if(valDisk==null) {
// never yet created, consider creating
if(supplierOrNull==null) {
return null;
}
// create using provided Supplier
valDisk = supplierOrNull.get();
supplierUsed.incrementAndGet();
// putting initial value directly into diskMap
// (rather than just the memMap until page-out)
// ensures diskMap.keySet() provides complete view
V prevVal = diskMap.putIfAbsent(key, valDisk);
count.incrementAndGet();
if(prevVal!=null) {
// ERROR: diskMap modification since previous
// diskMap.get() should be impossible
logger.log(Level.SEVERE,"diskMap modified outside synchronized block?");
}
} else {
diskHit.incrementAndGet();
}
// keep new val in memMap
SoftEntry<V> newEntry = new SoftEntry<V>(key, valDisk, refQueue);
SoftEntry<V> prevVal = memMap.putIfAbsent(key, newEntry);
if(prevVal != null) {
// ERROR: memMap modification since previous
// memMap.get() should be impossible
logger.log(Level.SEVERE,"memMap modified outside synchronized block?", new Exception());
}
valDisk.setIdentityCache(this);
return valDisk;
}
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#keySet()
*/
public Set<String> keySet() {
return diskMap.keySet();
}
/**
* Summary to log, if at FINE level
*/
private void logCacheSummary() {
if (logger.isLoggable((Level.FINE))) {
logger.fine(composeCacheSummary());
}
}
protected String composeCacheSummary() {
long totalHits = cacheHit.get() + diskHit.get();
if (totalHits < 1) {
return "";
}
long cacheHitPercent
= (cacheHit.get() * 100) / totalHits;
StringBuilder sb = new StringBuilder(120);
sb.append("DB name:")
.append(getDatabaseName())
.append(", ")
.append(" hit%: ")
.append(cacheHitPercent)
.append("%, gets=")
.append(countOfGets.get())
.append(" memHits=")
.append(cacheHit.get())
.append(" diskHits=")
.append(diskHit.get())
.append(" supplieds=")
.append(supplierUsed.get())
.append(" expungePuts=")
.append(expungeStatsDiskPut.get())
.append(" syncs=")
.append(useStatsSyncUsed.get());
return sb.toString();
}
/* (non-Javadoc)
* @see org.archive.util.ObjectIdentityCache#size()
*/
public int size() {
if(db==null) {
return 0;
}
return (int) count.get();
}
protected String getDatabaseName() {
String name = "DbName-Lookup-Failed";
try {
if (this.db != null) {
name = this.db.getDatabaseName();
}
} catch (DatabaseException e) {
// Ignore.
}
return name;
}
/**
* Sync all in-memory map entries to backing disk store.
*/
public synchronized void sync() {
String dbName = null;
// Sync. memory and disk.
useStatsSyncUsed.incrementAndGet();
long startTime = 0;
if (logger.isLoggable(Level.FINE)) {
dbName = getDatabaseName();
startTime = System.currentTimeMillis();
logger.fine(dbName + " start sizes: disk " + this.diskMap.size() +
", mem " + this.memMap.size());
}
for (String key : this.memMap.keySet()) {
SoftEntry<V> entry = memMap.get(key);
if (entry != null) {
// Get & hold so not cleared pre-return.
V value = entry.get();
if (value != null) {
expungeStatsDiskPut.incrementAndGet();
this.diskMap.put(key, value); // unchecked cast
}
}
}
pageOutStaleEntries();
// force sync of deferred-writes
try {
this.db.sync();
} catch (DatabaseException e) {
throw new RuntimeException(e);
}
if (logger.isLoggable(Level.FINE)) {
logger.fine(dbName + " sync took " +
(System.currentTimeMillis() - startTime) + "ms. " +
"Finish sizes: disk " +
this.diskMap.size() + ", mem " + this.memMap.size());
}
}
@Override
public void dirtyKey(String key) {
// do nothing, because our weak/phantom trickery is supposed to
// ensure sync-to-persistence if/when dereferenced and collected
}
/** An incremental, poll-based expunger.
*
* Package-protected for unit-test visibility.
*/
@SuppressWarnings("unchecked")
protected synchronized void pageOutStaleEntries() {
int c = 0;
long startTime = System.currentTimeMillis();
for(SoftEntry<V> entry; (entry = (SoftEntry<V>)refQueue.poll()) != null;) {
pageOutStaleEntry(entry);
c++;
}
if (c > 0 && logger.isLoggable(Level.FINER)) {
long endTime = System.currentTimeMillis();
try {
logger.finer("DB: " + db.getDatabaseName() + ", Expunged: "
+ c + ", Diskmap size: " + diskMap.size()
+ ", Cache size: " + memMap.size()
+ ", in "+(endTime-startTime)+"ms");
} catch (DatabaseException e) {
logger.log(Level.FINER,"exception while logging",e);
}
}
}
/**
* Expunge an entry from memMap while updating diskMap.
*
* @param entry a SoftEntry<V> obtained from refQueuePoll()
*/
synchronized private void pageOutStaleEntry(SoftEntry<V> entry) {
PhantomEntry<V> phantom = entry.phantom;
// Still in memMap? if not, was paged-out by earlier direct access
// before placed into reference-queue; just return
if (memMap.get(phantom.key) != entry) { // NOTE: intentional identity compare
return;
}
// recover hidden value
V phantomValue = phantom.doctoredGet();
// Expected value present? (should be; only clear is at end of
// this method, after entry removal from memMap)
if(phantomValue == null) {
logger.log(Level.WARNING,"unexpected null phantomValue", new Exception());
return; // nothing to do
}
// given instance entry still in memMap;
// we have the key and phantom Value,
// the diskMap can be updated.
diskMap.put(phantom.key, phantomValue); // unchecked cast
expungeStatsDiskPut.incrementAndGet();
// remove memMap entry
boolean removed = memMap.remove(phantom.key, entry);
if(!removed) {
logger.log(Level.WARNING,"expunge memMap.remove() ineffective",new Exception());
}
phantom.clear(); // truly allows GC of unreferenced V object
}
private static class PhantomEntry<V> extends PhantomReference<V> {
protected final String key;
public PhantomEntry(String key, V referent) {
super(referent, null);
this.key = key;
}
/**
* @return Return the referent. The contract for {@link #get()}
* always returns a null referent. We've cheated and doctored
* PhantomReference to return the actual referent value. See notes
* at {@link #referentField};
*/
@SuppressWarnings("unchecked")
final public V doctoredGet() {
try {
// Here we use the referentField saved off on static
// initialization of this class to get at this References'
// private referent field.
return (V) referentField.get(this);
} catch (IllegalAccessException e) {
throw new RuntimeException(e);
}
}
}
/**
* SoftReference cache entry.
*
* A PhantomReference is used to hold the key and value as a last
* chance before GC hook that can effect the update of diskMap.
* <p/>
* Entries are not recycled.
*/
private static class SoftEntry<V> extends SoftReference<V> {
PhantomEntry<V> phantom;
public SoftEntry(String key, V referent, ReferenceQueue<V> q) {
super(referent, q);
this.phantom = new PhantomEntry<V>(key, referent);
}
public V get() {
// ensure visibility
synchronized (this) {
return super.get();
}
}
public String toString() {
if (phantom != null) {
return "SoftEntry(key=" + phantom.key + ")";
} else {
return "SoftEntry()";
}
}
}
//
// Crude, probably unreliable/fragile but harmless mechanism to
// trigger expunge of cleared SoftReferences in low-memory
// conditions even without any of the other get/put triggers.
//
protected transient SoftReference<LowMemoryCanary> canary;
protected class LowMemoryCanary {
/** When collected/finalized -- as should be expected in
* low-memory conditions -- trigger an expunge and a
* new 'canary' insertion. */
public void finalize() {
ObjectIdentityBdbCache.this.pageOutStaleEntries();
// System.err.println("CANARY KILLED - "+ObjectIdentityBdbCache.this);
// only install new canary if map still 'open' with db reference
if(ObjectIdentityBdbCache.this.db !=null) {
ObjectIdentityBdbCache.this.canary =
new SoftReference<LowMemoryCanary>(new LowMemoryCanary());
} else {
ObjectIdentityBdbCache.this.canary = null;
}
}
}
}