/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NavigableSet;
import java.util.SortedSet;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CompoundConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.exceptions.WrongRegionException;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.exceptions.InvalidHFileException;
import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionPolicy;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
import org.apache.hadoop.hbase.regionserver.compactions.OffPeakCompactions;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.hbase.util.CollectionBackedScanner;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.util.StringUtils;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
/**
* A Store holds a column family in a Region. Its a memstore and a set of zero
* or more StoreFiles, which stretch backwards over time.
*
* <p>There's no reason to consider append-logging at this level; all logging
* and locking is handled at the HRegion level. Store just provides
* services to manage sets of StoreFiles. One of the most important of those
* services is compaction services where files are aggregated once they pass
* a configurable threshold.
*
* <p>The only thing having to do with logs that Store needs to deal with is
* the reconstructionLog. This is a segment of an HRegion's log that might
* NOT be present upon startup. If the param is NULL, there's nothing to do.
* If the param is non-NULL, we need to process the log to reconstruct
* a TreeMap that might not have been written to disk before the process
* died.
*
* <p>It's assumed that after this constructor returns, the reconstructionLog
* file will be deleted (by whoever has instantiated the Store).
*
* <p>Locking and transactions are handled at a higher level. This API should
* not be called directly but by an HRegion manager.
*/
@InterfaceAudience.Private
public class HStore implements Store {
public static final String BLOCKING_STOREFILES_KEY = "hbase.hstore.blockingStoreFiles";
public static final int DEFAULT_BLOCKING_STOREFILE_COUNT = 7;
static final Log LOG = LogFactory.getLog(HStore.class);
protected final MemStore memstore;
private final HRegion region;
private final HColumnDescriptor family;
private final HRegionFileSystem fs;
private final Configuration conf;
private final CacheConfig cacheConf;
private long lastCompactSize = 0;
volatile boolean forceMajor = false;
/* how many bytes to write between status checks */
static int closeCheckInterval = 0;
private volatile long storeSize = 0L;
private volatile long totalUncompressedBytes = 0L;
private final Object flushLock = new Object();
final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
private final boolean verifyBulkLoads;
private ScanInfo scanInfo;
final List<StoreFile> filesCompacting = Lists.newArrayList();
// All access must be synchronized.
private final CopyOnWriteArraySet<ChangedReadersObserver> changedReaderObservers =
new CopyOnWriteArraySet<ChangedReadersObserver>();
private final int blocksize;
private HFileDataBlockEncoder dataBlockEncoder;
/** Checksum configuration */
private ChecksumType checksumType;
private int bytesPerChecksum;
// Comparing KeyValues
private final KeyValue.KVComparator comparator;
final StoreEngine<?, ?, ?> storeEngine;
private OffPeakCompactions offPeakCompactions;
private static final int DEFAULT_FLUSH_RETRIES_NUMBER = 10;
private static int flush_retries_number;
private static int pauseTime;
private long blockingFileCount;
/**
* Constructor
* @param region
* @param family HColumnDescriptor for this column
* @param confParam configuration object
* failed. Can be null.
* @throws IOException
*/
protected HStore(final HRegion region, final HColumnDescriptor family,
final Configuration confParam) throws IOException {
HRegionInfo info = region.getRegionInfo();
this.fs = region.getRegionFileSystem();
// Assemble the store's home directory and Ensure it exists.
fs.createStoreDir(family.getNameAsString());
this.region = region;
this.family = family;
// 'conf' renamed to 'confParam' b/c we use this.conf in the constructor
// CompoundConfiguration will look for keys in reverse order of addition, so we'd
// add global config first, then table and cf overrides, then cf metadata.
this.conf = new CompoundConfiguration()
.add(confParam)
.addStringMap(region.getTableDesc().getConfiguration())
.addStringMap(family.getConfiguration())
.addWritableMap(family.getValues());
this.blocksize = family.getBlocksize();
this.dataBlockEncoder =
new HFileDataBlockEncoderImpl(family.getDataBlockEncodingOnDisk(),
family.getDataBlockEncoding());
this.comparator = info.getComparator();
// used by ScanQueryMatcher
long timeToPurgeDeletes =
Math.max(conf.getLong("hbase.hstore.time.to.purge.deletes", 0), 0);
LOG.trace("Time to purge deletes set to " + timeToPurgeDeletes +
"ms in store " + this);
// Get TTL
long ttl = determineTTLFromFamily(family);
// Why not just pass a HColumnDescriptor in here altogether? Even if have
// to clone it?
scanInfo = new ScanInfo(family, ttl, timeToPurgeDeletes, this.comparator);
this.memstore = new MemStore(conf, this.comparator);
this.offPeakCompactions = new OffPeakCompactions(conf);
// Setting up cache configuration for this family
this.cacheConf = new CacheConfig(conf, family);
this.verifyBulkLoads = conf.getBoolean("hbase.hstore.bulkload.verify", false);
this.blockingFileCount =
conf.getInt(BLOCKING_STOREFILES_KEY, DEFAULT_BLOCKING_STOREFILE_COUNT);
if (HStore.closeCheckInterval == 0) {
HStore.closeCheckInterval = conf.getInt(
"hbase.hstore.close.check.interval", 10*1000*1000 /* 10 MB */);
}
this.storeEngine = StoreEngine.create(this, this.conf, this.comparator);
this.storeEngine.getStoreFileManager().loadFiles(loadStoreFiles());
// Initialize checksum type from name. The names are CRC32, CRC32C, etc.
this.checksumType = getChecksumType(conf);
// initilize bytes per checksum
this.bytesPerChecksum = getBytesPerChecksum(conf);
// Create a compaction manager.
if (HStore.flush_retries_number == 0) {
HStore.flush_retries_number = conf.getInt(
"hbase.hstore.flush.retries.number", DEFAULT_FLUSH_RETRIES_NUMBER);
HStore.pauseTime = conf.getInt(HConstants.HBASE_SERVER_PAUSE,
HConstants.DEFAULT_HBASE_SERVER_PAUSE);
if (HStore.flush_retries_number <= 0) {
throw new IllegalArgumentException(
"hbase.hstore.flush.retries.number must be > 0, not "
+ HStore.flush_retries_number);
}
}
}
/**
* @param family
* @return TTL in seconds of the specified family
*/
private static long determineTTLFromFamily(final HColumnDescriptor family) {
// HCD.getTimeToLive returns ttl in seconds. Convert to milliseconds.
long ttl = family.getTimeToLive();
if (ttl == HConstants.FOREVER) {
// Default is unlimited ttl.
ttl = Long.MAX_VALUE;
} else if (ttl == -1) {
ttl = Long.MAX_VALUE;
} else {
// Second -> ms adjust for user data
ttl *= 1000;
}
return ttl;
}
public String getColumnFamilyName() {
return this.family.getNameAsString();
}
@Override
public String getTableName() {
return this.getRegionInfo().getTableNameAsString();
}
@Override
public FileSystem getFileSystem() {
return this.fs.getFileSystem();
}
public HRegionFileSystem getRegionFileSystem() {
return this.fs;
}
/* Implementation of StoreConfigInformation */
@Override
public long getStoreFileTtl() {
// TTL only applies if there's no MIN_VERSIONs setting on the column.
return (this.scanInfo.getMinVersions() == 0) ? this.scanInfo.getTtl() : Long.MAX_VALUE;
}
@Override
public long getMemstoreFlushSize() {
return this.region.memstoreFlushSize;
}
/* End implementation of StoreConfigInformation */
/**
* Returns the configured bytesPerChecksum value.
* @param conf The configuration
* @return The bytesPerChecksum that is set in the configuration
*/
public static int getBytesPerChecksum(Configuration conf) {
return conf.getInt(HConstants.BYTES_PER_CHECKSUM,
HFile.DEFAULT_BYTES_PER_CHECKSUM);
}
/**
* Returns the configured checksum algorithm.
* @param conf The configuration
* @return The checksum algorithm that is set in the configuration
*/
public static ChecksumType getChecksumType(Configuration conf) {
String checksumName = conf.get(HConstants.CHECKSUM_TYPE_NAME);
if (checksumName == null) {
return HFile.DEFAULT_CHECKSUM_TYPE;
} else {
return ChecksumType.nameToType(checksumName);
}
}
/**
* @return how many bytes to write between status checks
*/
public static int getCloseCheckInterval() {
return closeCheckInterval;
}
public HColumnDescriptor getFamily() {
return this.family;
}
/**
* @return The maximum sequence id in all store files. Used for log replay.
*/
long getMaxSequenceId(boolean includeBulkFiles) {
return StoreFile.getMaxSequenceIdInList(this.getStorefiles(), includeBulkFiles);
}
@Override
public long getMaxMemstoreTS() {
return StoreFile.getMaxMemstoreTSInList(this.getStorefiles());
}
/**
* @param tabledir {@link Path} to where the table is being stored
* @param hri {@link HRegionInfo} for the region.
* @param family {@link HColumnDescriptor} describing the column family
* @return Path to family/Store home directory.
*/
@Deprecated
public static Path getStoreHomedir(final Path tabledir,
final HRegionInfo hri, final byte[] family) {
return getStoreHomedir(tabledir, hri.getEncodedName(), family);
}
/**
* @param tabledir {@link Path} to where the table is being stored
* @param encodedName Encoded region name.
* @param family {@link HColumnDescriptor} describing the column family
* @return Path to family/Store home directory.
*/
@Deprecated
public static Path getStoreHomedir(final Path tabledir,
final String encodedName, final byte[] family) {
return new Path(tabledir, new Path(encodedName, Bytes.toString(family)));
}
@Override
public HFileDataBlockEncoder getDataBlockEncoder() {
return dataBlockEncoder;
}
/**
* Should be used only in tests.
* @param blockEncoder the block delta encoder to use
*/
void setDataBlockEncoderInTest(HFileDataBlockEncoder blockEncoder) {
this.dataBlockEncoder = blockEncoder;
}
/**
* Creates an unsorted list of StoreFile loaded in parallel
* from the given directory.
* @throws IOException
*/
private List<StoreFile> loadStoreFiles() throws IOException {
Collection<StoreFileInfo> files = fs.getStoreFiles(getColumnFamilyName());
if (files == null || files.size() == 0) {
return new ArrayList<StoreFile>();
}
// initialize the thread pool for opening store files in parallel..
ThreadPoolExecutor storeFileOpenerThreadPool =
this.region.getStoreFileOpenAndCloseThreadPool("StoreFileOpenerThread-" +
this.getColumnFamilyName());
CompletionService<StoreFile> completionService =
new ExecutorCompletionService<StoreFile>(storeFileOpenerThreadPool);
int totalValidStoreFile = 0;
final FileSystem fs = this.getFileSystem();
for (final StoreFileInfo storeFileInfo: files) {
// open each store file in parallel
completionService.submit(new Callable<StoreFile>() {
public StoreFile call() throws IOException {
StoreFile storeFile = new StoreFile(fs, storeFileInfo.getPath(), conf, cacheConf,
family.getBloomFilterType(), dataBlockEncoder);
storeFile.createReader();
return storeFile;
}
});
totalValidStoreFile++;
}
ArrayList<StoreFile> results = new ArrayList<StoreFile>(files.size());
IOException ioe = null;
try {
for (int i = 0; i < totalValidStoreFile; i++) {
try {
Future<StoreFile> future = completionService.take();
StoreFile storeFile = future.get();
long length = storeFile.getReader().length();
this.storeSize += length;
this.totalUncompressedBytes +=
storeFile.getReader().getTotalUncompressedBytes();
if (LOG.isDebugEnabled()) {
LOG.debug("loaded " + storeFile.toStringDetailed());
}
results.add(storeFile);
} catch (InterruptedException e) {
if (ioe == null) ioe = new InterruptedIOException(e.getMessage());
} catch (ExecutionException e) {
if (ioe == null) ioe = new IOException(e.getCause());
}
}
} finally {
storeFileOpenerThreadPool.shutdownNow();
}
if (ioe != null) {
// close StoreFile readers
try {
for (StoreFile file : results) {
if (file != null) file.closeReader(true);
}
} catch (IOException e) { }
throw ioe;
}
return results;
}
@Override
public long add(final KeyValue kv) {
lock.readLock().lock();
try {
return this.memstore.add(kv);
} finally {
lock.readLock().unlock();
}
}
/**
* Adds a value to the memstore
*
* @param kv
* @return memstore size delta
*/
protected long delete(final KeyValue kv) {
lock.readLock().lock();
try {
return this.memstore.delete(kv);
} finally {
lock.readLock().unlock();
}
}
@Override
public void rollback(final KeyValue kv) {
lock.readLock().lock();
try {
this.memstore.rollback(kv);
} finally {
lock.readLock().unlock();
}
}
/**
* @return All store files.
*/
@Override
public Collection<StoreFile> getStorefiles() {
return this.storeEngine.getStoreFileManager().getStorefiles();
}
@Override
public void assertBulkLoadHFileOk(Path srcPath) throws IOException {
HFile.Reader reader = null;
try {
LOG.info("Validating hfile at " + srcPath + " for inclusion in "
+ "store " + this + " region " + this.getRegionInfo().getRegionNameAsString());
reader = HFile.createReader(srcPath.getFileSystem(conf),
srcPath, cacheConf);
reader.loadFileInfo();
byte[] firstKey = reader.getFirstRowKey();
Preconditions.checkState(firstKey != null, "First key can not be null");
byte[] lk = reader.getLastKey();
Preconditions.checkState(lk != null, "Last key can not be null");
byte[] lastKey = KeyValue.createKeyValueFromKey(lk).getRow();
LOG.debug("HFile bounds: first=" + Bytes.toStringBinary(firstKey) +
" last=" + Bytes.toStringBinary(lastKey));
LOG.debug("Region bounds: first=" +
Bytes.toStringBinary(getRegionInfo().getStartKey()) +
" last=" + Bytes.toStringBinary(getRegionInfo().getEndKey()));
if (!this.getRegionInfo().containsRange(firstKey, lastKey)) {
throw new WrongRegionException(
"Bulk load file " + srcPath.toString() + " does not fit inside region "
+ this.getRegionInfo().getRegionNameAsString());
}
if (verifyBulkLoads) {
KeyValue prevKV = null;
HFileScanner scanner = reader.getScanner(false, false, false);
scanner.seekTo();
do {
KeyValue kv = scanner.getKeyValue();
if (prevKV != null) {
if (Bytes.compareTo(prevKV.getBuffer(), prevKV.getRowOffset(),
prevKV.getRowLength(), kv.getBuffer(), kv.getRowOffset(),
kv.getRowLength()) > 0) {
throw new InvalidHFileException("Previous row is greater than"
+ " current row: path=" + srcPath + " previous="
+ Bytes.toStringBinary(prevKV.getKey()) + " current="
+ Bytes.toStringBinary(kv.getKey()));
}
if (Bytes.compareTo(prevKV.getBuffer(), prevKV.getFamilyOffset(),
prevKV.getFamilyLength(), kv.getBuffer(), kv.getFamilyOffset(),
kv.getFamilyLength()) != 0) {
throw new InvalidHFileException("Previous key had different"
+ " family compared to current key: path=" + srcPath
+ " previous=" + Bytes.toStringBinary(prevKV.getFamily())
+ " current=" + Bytes.toStringBinary(kv.getFamily()));
}
}
prevKV = kv;
} while (scanner.next());
}
} finally {
if (reader != null) reader.close();
}
}
@Override
public void bulkLoadHFile(String srcPathStr, long seqNum) throws IOException {
Path srcPath = new Path(srcPathStr);
Path dstPath = fs.bulkLoadStoreFile(getColumnFamilyName(), srcPath, seqNum);
StoreFile sf = new StoreFile(this.getFileSystem(), dstPath, this.conf, this.cacheConf,
this.family.getBloomFilterType(), this.dataBlockEncoder);
StoreFile.Reader r = sf.createReader();
this.storeSize += r.length();
this.totalUncompressedBytes += r.getTotalUncompressedBytes();
LOG.info("Loaded HFile " + srcPath + " into store '" + getColumnFamilyName() +
"' as " + dstPath + " - updating store file list.");
// Append the new storefile into the list
this.lock.writeLock().lock();
try {
this.storeEngine.getStoreFileManager().insertNewFile(sf);
} finally {
// We need the lock, as long as we are updating the storeFiles
// or changing the memstore. Let us release it before calling
// notifyChangeReadersObservers. See HBASE-4485 for a possible
// deadlock scenario that could have happened if continue to hold
// the lock.
this.lock.writeLock().unlock();
}
notifyChangedReadersObservers();
LOG.info("Successfully loaded store file " + srcPath
+ " into store " + this + " (new location: " + dstPath + ")");
}
@Override
public ImmutableCollection<StoreFile> close() throws IOException {
this.lock.writeLock().lock();
try {
// Clear so metrics doesn't find them.
ImmutableCollection<StoreFile> result = storeEngine.getStoreFileManager().clearFiles();
if (!result.isEmpty()) {
// initialize the thread pool for closing store files in parallel.
ThreadPoolExecutor storeFileCloserThreadPool = this.region
.getStoreFileOpenAndCloseThreadPool("StoreFileCloserThread-"
+ this.getColumnFamilyName());
// close each store file in parallel
CompletionService<Void> completionService =
new ExecutorCompletionService<Void>(storeFileCloserThreadPool);
for (final StoreFile f : result) {
completionService.submit(new Callable<Void>() {
public Void call() throws IOException {
f.closeReader(true);
return null;
}
});
}
IOException ioe = null;
try {
for (int i = 0; i < result.size(); i++) {
try {
Future<Void> future = completionService.take();
future.get();
} catch (InterruptedException e) {
if (ioe == null) {
ioe = new InterruptedIOException();
ioe.initCause(e);
}
} catch (ExecutionException e) {
if (ioe == null) ioe = new IOException(e.getCause());
}
}
} finally {
storeFileCloserThreadPool.shutdownNow();
}
if (ioe != null) throw ioe;
}
LOG.info("Closed " + this);
return result;
} finally {
this.lock.writeLock().unlock();
}
}
/**
* Snapshot this stores memstore. Call before running
* {@link #flushCache(long, SortedSet, TimeRangeTracker, AtomicLong, MonitoredTask)}
* so it has some work to do.
*/
void snapshot() {
this.memstore.snapshot();
}
/**
* Write out current snapshot. Presumes {@link #snapshot()} has been called
* previously.
* @param logCacheFlushId flush sequence number
* @param snapshot
* @param snapshotTimeRangeTracker
* @param flushedSize The number of bytes flushed
* @param status
* @return Path The path name of the tmp file to which the store was flushed
* @throws IOException
*/
protected Path flushCache(final long logCacheFlushId,
SortedSet<KeyValue> snapshot,
TimeRangeTracker snapshotTimeRangeTracker,
AtomicLong flushedSize,
MonitoredTask status) throws IOException {
// If an exception happens flushing, we let it out without clearing
// the memstore snapshot. The old snapshot will be returned when we say
// 'snapshot', the next time flush comes around.
// Retry after catching exception when flushing, otherwise server will abort
// itself
IOException lastException = null;
for (int i = 0; i < HStore.flush_retries_number; i++) {
try {
Path pathName = internalFlushCache(snapshot, logCacheFlushId,
snapshotTimeRangeTracker, flushedSize, status);
try {
// Path name is null if there is no entry to flush
if (pathName != null) {
validateStoreFile(pathName);
}
return pathName;
} catch (Exception e) {
LOG.warn("Failed validating store file " + pathName
+ ", retring num=" + i, e);
if (e instanceof IOException) {
lastException = (IOException) e;
} else {
lastException = new IOException(e);
}
}
} catch (IOException e) {
LOG.warn("Failed flushing store file, retring num=" + i, e);
lastException = e;
}
if (lastException != null) {
try {
Thread.sleep(pauseTime);
} catch (InterruptedException e) {
IOException iie = new InterruptedIOException();
iie.initCause(e);
throw iie;
}
}
}
throw lastException;
}
/*
* @param cache
* @param logCacheFlushId
* @param snapshotTimeRangeTracker
* @param flushedSize The number of bytes flushed
* @return Path The path name of the tmp file to which the store was flushed
* @throws IOException
*/
private Path internalFlushCache(final SortedSet<KeyValue> set,
final long logCacheFlushId,
TimeRangeTracker snapshotTimeRangeTracker,
AtomicLong flushedSize,
MonitoredTask status)
throws IOException {
StoreFile.Writer writer;
// Find the smallest read point across all the Scanners.
long smallestReadPoint = region.getSmallestReadPoint();
long flushed = 0;
Path pathName;
// Don't flush if there are no entries.
if (set.size() == 0) {
return null;
}
// Use a store scanner to find which rows to flush.
// Note that we need to retain deletes, hence
// treat this as a minor compaction.
InternalScanner scanner = null;
KeyValueScanner memstoreScanner = new CollectionBackedScanner(set, this.comparator);
if (this.getCoprocessorHost() != null) {
scanner = this.getCoprocessorHost().preFlushScannerOpen(this, memstoreScanner);
}
if (scanner == null) {
Scan scan = new Scan();
scan.setMaxVersions(scanInfo.getMaxVersions());
scanner = new StoreScanner(this, scanInfo, scan,
Collections.singletonList(memstoreScanner), ScanType.COMPACT_RETAIN_DELETES,
smallestReadPoint, HConstants.OLDEST_TIMESTAMP);
}
if (this.getCoprocessorHost() != null) {
InternalScanner cpScanner =
this.getCoprocessorHost().preFlush(this, scanner);
// NULL scanner returned from coprocessor hooks means skip normal processing
if (cpScanner == null) {
return null;
}
scanner = cpScanner;
}
try {
int compactionKVMax = conf.getInt(HConstants.COMPACTION_KV_MAX, 10);
// TODO: We can fail in the below block before we complete adding this
// flush to list of store files. Add cleanup of anything put on filesystem
// if we fail.
synchronized (flushLock) {
status.setStatus("Flushing " + this + ": creating writer");
// A. Write the map out to the disk
writer = createWriterInTmp(set.size());
writer.setTimeRangeTracker(snapshotTimeRangeTracker);
pathName = writer.getPath();
try {
List<KeyValue> kvs = new ArrayList<KeyValue>();
boolean hasMore;
do {
hasMore = scanner.next(kvs, compactionKVMax);
if (!kvs.isEmpty()) {
for (KeyValue kv : kvs) {
// If we know that this KV is going to be included always, then let us
// set its memstoreTS to 0. This will help us save space when writing to
// disk.
if (kv.getMemstoreTS() <= smallestReadPoint) {
// let us not change the original KV. It could be in the memstore
// changing its memstoreTS could affect other threads/scanners.
kv = kv.shallowCopy();
kv.setMemstoreTS(0);
}
writer.append(kv);
flushed += this.memstore.heapSizeChange(kv, true);
}
kvs.clear();
}
} while (hasMore);
} finally {
// Write out the log sequence number that corresponds to this output
// hfile. Also write current time in metadata as minFlushTime.
// The hfile is current up to and including logCacheFlushId.
status.setStatus("Flushing " + this + ": appending metadata");
writer.appendMetadata(logCacheFlushId, false);
status.setStatus("Flushing " + this + ": closing flushed file");
writer.close();
}
}
} finally {
flushedSize.set(flushed);
scanner.close();
}
if (LOG.isInfoEnabled()) {
LOG.info("Flushed " +
", sequenceid=" + logCacheFlushId +
", memsize=" + StringUtils.humanReadableInt(flushed) +
", into tmp file " + pathName);
}
return pathName;
}
/*
* @param path The pathname of the tmp file into which the store was flushed
* @param logCacheFlushId
* @return StoreFile created.
* @throws IOException
*/
private StoreFile commitFile(final Path path,
final long logCacheFlushId,
TimeRangeTracker snapshotTimeRangeTracker,
AtomicLong flushedSize,
MonitoredTask status)
throws IOException {
// Write-out finished successfully, move into the right spot
Path dstPath = fs.commitStoreFile(getColumnFamilyName(), path);
status.setStatus("Flushing " + this + ": reopening flushed file");
StoreFile sf = new StoreFile(this.getFileSystem(), dstPath, this.conf, this.cacheConf,
this.family.getBloomFilterType(), this.dataBlockEncoder);
StoreFile.Reader r = sf.createReader();
this.storeSize += r.length();
this.totalUncompressedBytes += r.getTotalUncompressedBytes();
if (LOG.isInfoEnabled()) {
LOG.info("Added " + sf + ", entries=" + r.getEntries() +
", sequenceid=" + logCacheFlushId +
", filesize=" + StringUtils.humanReadableInt(r.length()));
}
return sf;
}
/*
* @param maxKeyCount
* @return Writer for a new StoreFile in the tmp dir.
*/
private StoreFile.Writer createWriterInTmp(long maxKeyCount)
throws IOException {
return createWriterInTmp(maxKeyCount, this.family.getCompression(), false, true);
}
/*
* @param maxKeyCount
* @param compression Compression algorithm to use
* @param isCompaction whether we are creating a new file in a compaction
* @return Writer for a new StoreFile in the tmp dir.
*/
public StoreFile.Writer createWriterInTmp(long maxKeyCount,
Compression.Algorithm compression, boolean isCompaction, boolean includeMVCCReadpoint)
throws IOException {
final CacheConfig writerCacheConf;
if (isCompaction) {
// Don't cache data on write on compactions.
writerCacheConf = new CacheConfig(cacheConf);
writerCacheConf.setCacheDataOnWrite(false);
} else {
writerCacheConf = cacheConf;
}
StoreFile.Writer w = new StoreFile.WriterBuilder(conf, writerCacheConf,
this.getFileSystem(), blocksize)
.withFilePath(fs.createTempName())
.withDataBlockEncoder(dataBlockEncoder)
.withComparator(comparator)
.withBloomType(family.getBloomFilterType())
.withMaxKeyCount(maxKeyCount)
.withChecksumType(checksumType)
.withBytesPerChecksum(bytesPerChecksum)
.withCompression(compression)
.includeMVCCReadpoint(includeMVCCReadpoint)
.build();
return w;
}
/*
* Change storeFiles adding into place the Reader produced by this new flush.
* @param sf
* @param set That was used to make the passed file <code>p</code>.
* @throws IOException
* @return Whether compaction is required.
*/
private boolean updateStorefiles(final StoreFile sf,
final SortedSet<KeyValue> set)
throws IOException {
this.lock.writeLock().lock();
try {
this.storeEngine.getStoreFileManager().insertNewFile(sf);
this.memstore.clearSnapshot(set);
} finally {
// We need the lock, as long as we are updating the storeFiles
// or changing the memstore. Let us release it before calling
// notifyChangeReadersObservers. See HBASE-4485 for a possible
// deadlock scenario that could have happened if continue to hold
// the lock.
this.lock.writeLock().unlock();
}
// Tell listeners of the change in readers.
notifyChangedReadersObservers();
return needsCompaction();
}
/*
* Notify all observers that set of Readers has changed.
* @throws IOException
*/
private void notifyChangedReadersObservers() throws IOException {
for (ChangedReadersObserver o: this.changedReaderObservers) {
o.updateReaders();
}
}
/**
* Get all scanners with no filtering based on TTL (that happens further down
* the line).
* @return all scanners for this store
*/
@Override
public List<KeyValueScanner> getScanners(boolean cacheBlocks,
boolean isGet, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow,
byte[] stopRow) throws IOException {
Collection<StoreFile> storeFilesToScan;
List<KeyValueScanner> memStoreScanners;
this.lock.readLock().lock();
try {
storeFilesToScan =
this.storeEngine.getStoreFileManager().getFilesForScanOrGet(isGet, startRow, stopRow);
memStoreScanners = this.memstore.getScanners();
} finally {
this.lock.readLock().unlock();
}
// First the store file scanners
// TODO this used to get the store files in descending order,
// but now we get them in ascending order, which I think is
// actually more correct, since memstore get put at the end.
List<StoreFileScanner> sfScanners = StoreFileScanner
.getScannersForStoreFiles(storeFilesToScan, cacheBlocks, isGet, isCompaction, matcher);
List<KeyValueScanner> scanners =
new ArrayList<KeyValueScanner>(sfScanners.size()+1);
scanners.addAll(sfScanners);
// Then the memstore scanners
scanners.addAll(memStoreScanners);
return scanners;
}
@Override
public void addChangedReaderObserver(ChangedReadersObserver o) {
this.changedReaderObservers.add(o);
}
@Override
public void deleteChangedReaderObserver(ChangedReadersObserver o) {
// We don't check if observer present; it may not be (legitimately)
this.changedReaderObservers.remove(o);
}
//////////////////////////////////////////////////////////////////////////////
// Compaction
//////////////////////////////////////////////////////////////////////////////
/**
* Compact the StoreFiles. This method may take some time, so the calling
* thread must be able to block for long periods.
*
* <p>During this time, the Store can work as usual, getting values from
* StoreFiles and writing new StoreFiles from the memstore.
*
* Existing StoreFiles are not destroyed until the new compacted StoreFile is
* completely written-out to disk.
*
* <p>The compactLock prevents multiple simultaneous compactions.
* The structureLock prevents us from interfering with other write operations.
*
* <p>We don't want to hold the structureLock for the whole time, as a compact()
* can be lengthy and we want to allow cache-flushes during this period.
*
* @param compaction compaction details obtained from requestCompaction()
* @throws IOException
* @return Storefile we compacted into or null if we failed or opted out early.
*/
public List<StoreFile> compact(CompactionContext compaction) throws IOException {
assert compaction != null && compaction.hasSelection();
CompactionRequest cr = compaction.getRequest();
Collection<StoreFile> filesToCompact = cr.getFiles();
assert !filesToCompact.isEmpty();
synchronized (filesCompacting) {
// sanity check: we're compacting files that this store knows about
// TODO: change this to LOG.error() after more debugging
Preconditions.checkArgument(filesCompacting.containsAll(filesToCompact));
}
// Ready to go. Have list of files to compact.
LOG.info("Starting compaction of " + filesToCompact.size() + " file(s) in "
+ this + " of " + this.getRegionInfo().getRegionNameAsString()
+ " into tmpdir=" + fs.getTempDir() + ", totalSize="
+ StringUtils.humanReadableInt(cr.getSize()));
List<StoreFile> sfs = new ArrayList<StoreFile>();
long compactionStartTime = EnvironmentEdgeManager.currentTimeMillis();
try {
// Commence the compaction.
List<Path> newFiles = compaction.compact();
// Move the compaction into place.
if (this.conf.getBoolean("hbase.hstore.compaction.complete", true)) {
for (Path newFile: newFiles) {
assert newFile != null;
StoreFile sf = moveFileIntoPlace(newFile);
if (this.getCoprocessorHost() != null) {
this.getCoprocessorHost().postCompact(this, sf, cr);
}
assert sf != null;
sfs.add(sf);
}
completeCompaction(filesToCompact, sfs);
} else {
for (Path newFile: newFiles) {
// Create storefile around what we wrote with a reader on it.
StoreFile sf = new StoreFile(this.getFileSystem(), newFile, this.conf, this.cacheConf,
this.family.getBloomFilterType(), this.dataBlockEncoder);
sf.createReader();
sfs.add(sf);
}
}
} finally {
finishCompactionRequest(cr);
}
logCompactionEndMessage(cr, sfs, compactionStartTime);
return sfs;
}
/**
* Log a very elaborate compaction completion message.
* @param cr Request.
* @param sfs Resulting files.
* @param compactionStartTime Start time.
*/
private void logCompactionEndMessage(
CompactionRequest cr, List<StoreFile> sfs, long compactionStartTime) {
long now = EnvironmentEdgeManager.currentTimeMillis();
StringBuilder message = new StringBuilder(
"Completed" + (cr.isMajor() ? " major " : " ") + "compaction of "
+ cr.getFiles().size() + " file(s) in " + this + " of "
+ this.getRegionInfo().getRegionNameAsString()
+ " into ");
if (sfs.isEmpty()) {
message.append("none, ");
} else {
for (StoreFile sf: sfs) {
message.append(sf.getPath().getName());
message.append("(size=");
message.append(StringUtils.humanReadableInt(sf.getReader().length()));
message.append("), ");
}
}
message.append("total size for store is ")
.append(StringUtils.humanReadableInt(storeSize))
.append(". This selection was in queue for ")
.append(StringUtils.formatTimeDiff(compactionStartTime, cr.getSelectionTime()))
.append(", and took ").append(StringUtils.formatTimeDiff(now, compactionStartTime))
.append(" to execute.");
LOG.info(message.toString());
}
// Package-visible for tests
StoreFile moveFileIntoPlace(final Path newFile) throws IOException {
validateStoreFile(newFile);
// Move the file into the right spot
Path destPath = fs.commitStoreFile(getColumnFamilyName(), newFile);
StoreFile result = new StoreFile(this.getFileSystem(), destPath, this.conf, this.cacheConf,
this.family.getBloomFilterType(), this.dataBlockEncoder);
result.createReader();
return result;
}
/**
* This method tries to compact N recent files for testing.
* Note that because compacting "recent" files only makes sense for some policies,
* e.g. the default one, it assumes default policy is used. It doesn't use policy,
* but instead makes a compaction candidate list by itself.
* @param N Number of files.
*/
public void compactRecentForTestingAssumingDefaultPolicy(int N) throws IOException {
List<StoreFile> filesToCompact;
boolean isMajor;
this.lock.readLock().lock();
try {
synchronized (filesCompacting) {
filesToCompact = Lists.newArrayList(storeEngine.getStoreFileManager().getStorefiles());
if (!filesCompacting.isEmpty()) {
// exclude all files older than the newest file we're currently
// compacting. this allows us to preserve contiguity (HBASE-2856)
StoreFile last = filesCompacting.get(filesCompacting.size() - 1);
int idx = filesToCompact.indexOf(last);
Preconditions.checkArgument(idx != -1);
filesToCompact.subList(0, idx + 1).clear();
}
int count = filesToCompact.size();
if (N > count) {
throw new RuntimeException("Not enough files");
}
filesToCompact = filesToCompact.subList(count - N, count);
isMajor = (filesToCompact.size() == storeEngine.getStoreFileManager().getStorefileCount());
filesCompacting.addAll(filesToCompact);
Collections.sort(filesCompacting, StoreFile.Comparators.SEQ_ID);
}
} finally {
this.lock.readLock().unlock();
}
try {
// Ready to go. Have list of files to compact.
List<Path> newFiles =
this.storeEngine.getCompactor().compactForTesting(filesToCompact, isMajor);
for (Path newFile: newFiles) {
// Move the compaction into place.
StoreFile sf = moveFileIntoPlace(newFile);
if (this.getCoprocessorHost() != null) {
this.getCoprocessorHost().postCompact(this, sf, null);
}
ArrayList<StoreFile> tmp = new ArrayList<StoreFile>();
tmp.add(sf);
completeCompaction(filesToCompact, tmp);
}
} finally {
synchronized (filesCompacting) {
filesCompacting.removeAll(filesToCompact);
}
}
}
@Override
public boolean hasReferences() {
return StoreUtils.hasReferences(this.storeEngine.getStoreFileManager().getStorefiles());
}
@Override
public CompactionProgress getCompactionProgress() {
return this.storeEngine.getCompactor().getProgress();
}
@Override
public boolean isMajorCompaction() throws IOException {
for (StoreFile sf : this.storeEngine.getStoreFileManager().getStorefiles()) {
// TODO: what are these reader checks all over the place?
if (sf.getReader() == null) {
LOG.debug("StoreFile " + sf + " has null Reader");
return false;
}
}
return storeEngine.getCompactionPolicy().isMajorCompaction(
this.storeEngine.getStoreFileManager().getStorefiles());
}
@Override
public CompactionContext requestCompaction() throws IOException {
return requestCompaction(Store.NO_PRIORITY, null);
}
@Override
public CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
throws IOException {
// don't even select for compaction if writes are disabled
if (!this.areWritesEnabled()) {
return null;
}
CompactionContext compaction = storeEngine.createCompaction();
this.lock.readLock().lock();
try {
synchronized (filesCompacting) {
// First, see if coprocessor would want to override selection.
if (this.getCoprocessorHost() != null) {
List<StoreFile> candidatesForCoproc = compaction.preSelect(this.filesCompacting);
boolean override = this.getCoprocessorHost().preCompactSelection(
this, candidatesForCoproc, baseRequest);
if (override) {
// Coprocessor is overriding normal file selection.
compaction.forceSelect(new CompactionRequest(candidatesForCoproc));
}
}
// Normal case - coprocessor is not overriding file selection.
if (!compaction.hasSelection()) {
boolean isUserCompaction = priority == Store.PRIORITY_USER;
boolean mayUseOffPeak = this.offPeakCompactions.tryStartOffPeakRequest();
compaction.select(this.filesCompacting, isUserCompaction,
mayUseOffPeak, forceMajor && filesCompacting.isEmpty());
assert compaction.hasSelection();
if (mayUseOffPeak && !compaction.getRequest().isOffPeak()) {
// Compaction policy doesn't want to take advantage of off-peak.
this.offPeakCompactions.endOffPeakRequest();
}
}
if (this.getCoprocessorHost() != null) {
this.getCoprocessorHost().postCompactSelection(
this, ImmutableList.copyOf(compaction.getRequest().getFiles()), baseRequest);
}
// Selected files; see if we have a compaction with some custom base request.
if (baseRequest != null) {
// Update the request with what the system thinks the request should be;
// its up to the request if it wants to listen.
compaction.forceSelect(
baseRequest.combineWith(compaction.getRequest()));
}
// Finally, we have the resulting files list. Check if we have any files at all.
final Collection<StoreFile> selectedFiles = compaction.getRequest().getFiles();
if (selectedFiles.isEmpty()) {
return null;
}
// Update filesCompacting (check that we do not try to compact the same StoreFile twice).
if (!Collections.disjoint(filesCompacting, selectedFiles)) {
// TODO: change this from an IAE to LOG.error after sufficient testing
Preconditions.checkArgument(false, "%s overlaps with %s",
selectedFiles, filesCompacting);
}
filesCompacting.addAll(selectedFiles);
Collections.sort(filesCompacting, StoreFile.Comparators.SEQ_ID);
// If we're enqueuing a major, clear the force flag.
boolean isMajor = selectedFiles.size() == this.getStorefilesCount();
this.forceMajor = this.forceMajor && !isMajor;
// Set common request properties.
// Set priority, either override value supplied by caller or from store.
compaction.getRequest().setPriority(
(priority != Store.NO_PRIORITY) ? priority : getCompactPriority());
compaction.getRequest().setIsMajor(isMajor);
compaction.getRequest().setDescription(
getRegionInfo().getRegionNameAsString(), getColumnFamilyName());
}
} finally {
this.lock.readLock().unlock();
}
LOG.debug(getRegionInfo().getEncodedName() + " - " + getColumnFamilyName() + ": Initiating "
+ (compaction.getRequest().isMajor() ? "major" : "minor") + " compaction");
this.region.reportCompactionRequestStart(compaction.getRequest().isMajor());
return compaction;
}
public void cancelRequestedCompaction(CompactionContext compaction) {
finishCompactionRequest(compaction.getRequest());
}
private void finishCompactionRequest(CompactionRequest cr) {
this.region.reportCompactionRequestEnd(cr.isMajor());
if (cr.isOffPeak()) {
this.offPeakCompactions.endOffPeakRequest();
cr.setOffPeak(false);
}
synchronized (filesCompacting) {
filesCompacting.removeAll(cr.getFiles());
}
}
/**
* Validates a store file by opening and closing it. In HFileV2 this should
* not be an expensive operation.
*
* @param path the path to the store file
*/
private void validateStoreFile(Path path)
throws IOException {
StoreFile storeFile = null;
try {
storeFile = new StoreFile(this.getFileSystem(), path, this.conf,
this.cacheConf, this.family.getBloomFilterType(),
NoOpDataBlockEncoder.INSTANCE);
storeFile.createReader();
} catch (IOException e) {
LOG.error("Failed to open store file : " + path
+ ", keeping it in tmp location", e);
throw e;
} finally {
if (storeFile != null) {
storeFile.closeReader(false);
}
}
}
/*
* <p>It works by processing a compaction that's been written to disk.
*
* <p>It is usually invoked at the end of a compaction, but might also be
* invoked at HStore startup, if the prior execution died midway through.
*
* <p>Moving the compacted TreeMap into place means:
* <pre>
* 1) Moving the new compacted StoreFile into place
* 2) Unload all replaced StoreFile, close and collect list to delete.
* 3) Loading the new TreeMap.
* 4) Compute new store size
* </pre>
*
* @param compactedFiles list of files that were compacted
* @param newFile StoreFile that is the result of the compaction
* @return StoreFile created. May be null.
* @throws IOException
*/
private void completeCompaction(final Collection<StoreFile> compactedFiles,
final Collection<StoreFile> result) throws IOException {
try {
this.lock.writeLock().lock();
try {
// Change this.storeFiles so it reflects new state but do not
// delete old store files until we have sent out notification of
// change in case old files are still being accessed by outstanding
// scanners.
this.storeEngine.getStoreFileManager().addCompactionResults(compactedFiles, result);
filesCompacting.removeAll(compactedFiles); // safe bc: lock.writeLock()
} finally {
// We need the lock, as long as we are updating the storeFiles
// or changing the memstore. Let us release it before calling
// notifyChangeReadersObservers. See HBASE-4485 for a possible
// deadlock scenario that could have happened if continue to hold
// the lock.
this.lock.writeLock().unlock();
}
// Tell observers that list of StoreFiles has changed.
notifyChangedReadersObservers();
// let the archive util decide if we should archive or delete the files
LOG.debug("Removing store files after compaction...");
this.fs.removeStoreFiles(this.getColumnFamilyName(), compactedFiles);
} catch (IOException e) {
e = RemoteExceptionHandler.checkIOException(e);
LOG.error("Failed replacing compacted files in " + this +
". Compacted files are " + (result == null? "none": result.toString()) +
". Files replaced " + compactedFiles.toString() +
" some of which may have been already removed", e);
}
// 4. Compute new store size
this.storeSize = 0L;
this.totalUncompressedBytes = 0L;
for (StoreFile hsf : this.storeEngine.getStoreFileManager().getStorefiles()) {
StoreFile.Reader r = hsf.getReader();
if (r == null) {
LOG.warn("StoreFile " + hsf + " has a null Reader");
continue;
}
this.storeSize += r.length();
this.totalUncompressedBytes += r.getTotalUncompressedBytes();
}
}
/*
* @param wantedVersions How many versions were asked for.
* @return wantedVersions or this families' {@link HConstants#VERSIONS}.
*/
int versionsToReturn(final int wantedVersions) {
if (wantedVersions <= 0) {
throw new IllegalArgumentException("Number of versions must be > 0");
}
// Make sure we do not return more than maximum versions for this store.
int maxVersions = this.family.getMaxVersions();
return wantedVersions > maxVersions ? maxVersions: wantedVersions;
}
static boolean isExpired(final KeyValue key, final long oldestTimestamp) {
return key.getTimestamp() < oldestTimestamp;
}
@Override
public KeyValue getRowKeyAtOrBefore(final byte[] row) throws IOException {
// If minVersions is set, we will not ignore expired KVs.
// As we're only looking for the latest matches, that should be OK.
// With minVersions > 0 we guarantee that any KV that has any version
// at all (expired or not) has at least one version that will not expire.
// Note that this method used to take a KeyValue as arguments. KeyValue
// can be back-dated, a row key cannot.
long ttlToUse = scanInfo.getMinVersions() > 0 ? Long.MAX_VALUE : this.scanInfo.getTtl();
KeyValue kv = new KeyValue(row, HConstants.LATEST_TIMESTAMP);
GetClosestRowBeforeTracker state = new GetClosestRowBeforeTracker(
this.comparator, kv, ttlToUse, this.getRegionInfo().isMetaRegion());
this.lock.readLock().lock();
try {
// First go to the memstore. Pick up deletes and candidates.
this.memstore.getRowKeyAtOrBefore(state);
// Check if match, if we got a candidate on the asked for 'kv' row.
// Process each relevant store file. Run through from newest to oldest.
Iterator<StoreFile> sfIterator = this.storeEngine.getStoreFileManager()
.getCandidateFilesForRowKeyBefore(state.getTargetKey());
while (sfIterator.hasNext()) {
StoreFile sf = sfIterator.next();
sfIterator.remove(); // Remove sf from iterator.
boolean haveNewCandidate = rowAtOrBeforeFromStoreFile(sf, state);
if (haveNewCandidate) {
// TODO: we may have an optimization here which stops the search if we find exact match.
sfIterator = this.storeEngine.getStoreFileManager().updateCandidateFilesForRowKeyBefore(
sfIterator, state.getTargetKey(), state.getCandidate());
}
}
return state.getCandidate();
} finally {
this.lock.readLock().unlock();
}
}
/*
* Check an individual MapFile for the row at or before a given row.
* @param f
* @param state
* @throws IOException
* @return True iff the candidate has been updated in the state.
*/
private boolean rowAtOrBeforeFromStoreFile(final StoreFile f,
final GetClosestRowBeforeTracker state)
throws IOException {
StoreFile.Reader r = f.getReader();
if (r == null) {
LOG.warn("StoreFile " + f + " has a null Reader");
return false;
}
if (r.getEntries() == 0) {
LOG.warn("StoreFile " + f + " is a empty store file");
return false;
}
// TODO: Cache these keys rather than make each time?
byte [] fk = r.getFirstKey();
if (fk == null) return false;
KeyValue firstKV = KeyValue.createKeyValueFromKey(fk, 0, fk.length);
byte [] lk = r.getLastKey();
KeyValue lastKV = KeyValue.createKeyValueFromKey(lk, 0, lk.length);
KeyValue firstOnRow = state.getTargetKey();
if (this.comparator.compareRows(lastKV, firstOnRow) < 0) {
// If last key in file is not of the target table, no candidates in this
// file. Return.
if (!state.isTargetTable(lastKV)) return false;
// If the row we're looking for is past the end of file, set search key to
// last key. TODO: Cache last and first key rather than make each time.
firstOnRow = new KeyValue(lastKV.getRow(), HConstants.LATEST_TIMESTAMP);
}
// Get a scanner that caches blocks and that uses pread.
HFileScanner scanner = r.getScanner(true, true, false);
// Seek scanner. If can't seek it, return.
if (!seekToScanner(scanner, firstOnRow, firstKV)) return false;
// If we found candidate on firstOnRow, just return. THIS WILL NEVER HAPPEN!
// Unlikely that there'll be an instance of actual first row in table.
if (walkForwardInSingleRow(scanner, firstOnRow, state)) return true;
// If here, need to start backing up.
while (scanner.seekBefore(firstOnRow.getBuffer(), firstOnRow.getKeyOffset(),
firstOnRow.getKeyLength())) {
KeyValue kv = scanner.getKeyValue();
if (!state.isTargetTable(kv)) break;
if (!state.isBetterCandidate(kv)) break;
// Make new first on row.
firstOnRow = new KeyValue(kv.getRow(), HConstants.LATEST_TIMESTAMP);
// Seek scanner. If can't seek it, break.
if (!seekToScanner(scanner, firstOnRow, firstKV)) return false;
// If we find something, break;
if (walkForwardInSingleRow(scanner, firstOnRow, state)) return true;
}
return false;
}
/*
* Seek the file scanner to firstOnRow or first entry in file.
* @param scanner
* @param firstOnRow
* @param firstKV
* @return True if we successfully seeked scanner.
* @throws IOException
*/
private boolean seekToScanner(final HFileScanner scanner,
final KeyValue firstOnRow,
final KeyValue firstKV)
throws IOException {
KeyValue kv = firstOnRow;
// If firstOnRow < firstKV, set to firstKV
if (this.comparator.compareRows(firstKV, firstOnRow) == 0) kv = firstKV;
int result = scanner.seekTo(kv.getBuffer(), kv.getKeyOffset(),
kv.getKeyLength());
return result >= 0;
}
/*
* When we come in here, we are probably at the kv just before we break into
* the row that firstOnRow is on. Usually need to increment one time to get
* on to the row we are interested in.
* @param scanner
* @param firstOnRow
* @param state
* @return True we found a candidate.
* @throws IOException
*/
private boolean walkForwardInSingleRow(final HFileScanner scanner,
final KeyValue firstOnRow,
final GetClosestRowBeforeTracker state)
throws IOException {
boolean foundCandidate = false;
do {
KeyValue kv = scanner.getKeyValue();
// If we are not in the row, skip.
if (this.comparator.compareRows(kv, firstOnRow) < 0) continue;
// Did we go beyond the target row? If so break.
if (state.isTooFar(kv, firstOnRow)) break;
if (state.isExpired(kv)) {
continue;
}
// If we added something, this row is a contender. break.
if (state.handle(kv)) {
foundCandidate = true;
break;
}
} while(scanner.next());
return foundCandidate;
}
public boolean canSplit() {
this.lock.readLock().lock();
try {
// Not split-able if we find a reference store file present in the store.
boolean result = !hasReferences();
if (!result && LOG.isDebugEnabled()) {
LOG.debug("Cannot split region due to reference files being there");
}
return result;
} finally {
this.lock.readLock().unlock();
}
}
@Override
public byte[] getSplitPoint() {
this.lock.readLock().lock();
try {
// Should already be enforced by the split policy!
assert !this.getRegionInfo().isMetaRegion();
// Not split-able if we find a reference store file present in the store.
if (hasReferences()) {
assert false : "getSplitPoint() called on a region that can't split!";
return null;
}
return this.storeEngine.getStoreFileManager().getSplitPoint();
} catch(IOException e) {
LOG.warn("Failed getting store size for " + this, e);
} finally {
this.lock.readLock().unlock();
}
return null;
}
@Override
public long getLastCompactSize() {
return this.lastCompactSize;
}
@Override
public long getSize() {
return storeSize;
}
public void triggerMajorCompaction() {
this.forceMajor = true;
}
boolean getForceMajorCompaction() {
return this.forceMajor;
}
//////////////////////////////////////////////////////////////////////////////
// File administration
//////////////////////////////////////////////////////////////////////////////
@Override
public KeyValueScanner getScanner(Scan scan,
final NavigableSet<byte []> targetCols) throws IOException {
lock.readLock().lock();
try {
KeyValueScanner scanner = null;
if (this.getCoprocessorHost() != null) {
scanner = this.getCoprocessorHost().preStoreScannerOpen(this, scan, targetCols);
}
if (scanner == null) {
scanner = new StoreScanner(this, getScanInfo(), scan, targetCols);
}
return scanner;
} finally {
lock.readLock().unlock();
}
}
@Override
public String toString() {
return this.getColumnFamilyName();
}
@Override
// TODO: why is there this and also getNumberOfStorefiles?! Remove one.
public int getStorefilesCount() {
return this.storeEngine.getStoreFileManager().getStorefileCount();
}
@Override
public long getStoreSizeUncompressed() {
return this.totalUncompressedBytes;
}
@Override
public long getStorefilesSize() {
long size = 0;
for (StoreFile s: this.storeEngine.getStoreFileManager().getStorefiles()) {
StoreFile.Reader r = s.getReader();
if (r == null) {
LOG.warn("StoreFile " + s + " has a null Reader");
continue;
}
size += r.length();
}
return size;
}
@Override
public long getStorefilesIndexSize() {
long size = 0;
for (StoreFile s: this.storeEngine.getStoreFileManager().getStorefiles()) {
StoreFile.Reader r = s.getReader();
if (r == null) {
LOG.warn("StoreFile " + s + " has a null Reader");
continue;
}
size += r.indexSize();
}
return size;
}
@Override
public long getTotalStaticIndexSize() {
long size = 0;
for (StoreFile s : this.storeEngine.getStoreFileManager().getStorefiles()) {
size += s.getReader().getUncompressedDataIndexSize();
}
return size;
}
@Override
public long getTotalStaticBloomSize() {
long size = 0;
for (StoreFile s : this.storeEngine.getStoreFileManager().getStorefiles()) {
StoreFile.Reader r = s.getReader();
size += r.getTotalBloomSize();
}
return size;
}
@Override
public long getMemStoreSize() {
return this.memstore.heapSize();
}
@Override
public int getCompactPriority() {
return this.storeEngine.getStoreFileManager().getStoreCompactionPriority();
}
@Override
public boolean throttleCompaction(long compactionSize) {
return storeEngine.getCompactionPolicy().throttleCompaction(compactionSize);
}
public HRegion getHRegion() {
return this.region;
}
@Override
public RegionCoprocessorHost getCoprocessorHost() {
return this.region.getCoprocessorHost();
}
@Override
public HRegionInfo getRegionInfo() {
return this.fs.getRegionInfo();
}
@Override
public boolean areWritesEnabled() {
return this.region.areWritesEnabled();
}
@Override
public long getSmallestReadPoint() {
return this.region.getSmallestReadPoint();
}
/**
* Used in tests. TODO: Remove
*
* Updates the value for the given row/family/qualifier. This function will always be seen as
* atomic by other readers because it only puts a single KV to memstore. Thus no read/write
* control necessary.
* @param row row to update
* @param f family to update
* @param qualifier qualifier to update
* @param newValue the new value to set into memstore
* @return memstore size delta
* @throws IOException
*/
public long updateColumnValue(byte [] row, byte [] f,
byte [] qualifier, long newValue)
throws IOException {
this.lock.readLock().lock();
try {
long now = EnvironmentEdgeManager.currentTimeMillis();
return this.memstore.updateColumnValue(row,
f,
qualifier,
newValue,
now);
} finally {
this.lock.readLock().unlock();
}
}
@Override
public long upsert(Iterable<? extends Cell> cells, long readpoint) throws IOException {
this.lock.readLock().lock();
try {
return this.memstore.upsert(cells, readpoint);
} finally {
this.lock.readLock().unlock();
}
}
public StoreFlusher getStoreFlusher(long cacheFlushId) {
return new StoreFlusherImpl(cacheFlushId);
}
private class StoreFlusherImpl implements StoreFlusher {
private long cacheFlushId;
private SortedSet<KeyValue> snapshot;
private StoreFile storeFile;
private Path storeFilePath;
private TimeRangeTracker snapshotTimeRangeTracker;
private AtomicLong flushedSize;
private StoreFlusherImpl(long cacheFlushId) {
this.cacheFlushId = cacheFlushId;
this.flushedSize = new AtomicLong();
}
@Override
public void prepare() {
memstore.snapshot();
this.snapshot = memstore.getSnapshot();
this.snapshotTimeRangeTracker = memstore.getSnapshotTimeRangeTracker();
}
@Override
public void flushCache(MonitoredTask status) throws IOException {
storeFilePath = HStore.this.flushCache(
cacheFlushId, snapshot, snapshotTimeRangeTracker, flushedSize, status);
}
@Override
public boolean commit(MonitoredTask status) throws IOException {
if (storeFilePath == null) {
return false;
}
storeFile = HStore.this.commitFile(storeFilePath, cacheFlushId,
snapshotTimeRangeTracker, flushedSize, status);
if (HStore.this.getCoprocessorHost() != null) {
HStore.this.getCoprocessorHost().postFlush(HStore.this, storeFile);
}
// Add new file to store files. Clear snapshot too while we have
// the Store write lock.
return HStore.this.updateStorefiles(storeFile, snapshot);
}
}
@Override
public boolean needsCompaction() {
return storeEngine.getCompactionPolicy().needsCompaction(
this.storeEngine.getStoreFileManager().getStorefiles(), filesCompacting);
}
@Override
public CacheConfig getCacheConfig() {
return this.cacheConf;
}
public static final long FIXED_OVERHEAD =
ClassSize.align((17 * ClassSize.REFERENCE) + (5 * Bytes.SIZEOF_LONG)
+ (2 * Bytes.SIZEOF_INT) + Bytes.SIZEOF_BOOLEAN);
public static final long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD
+ ClassSize.OBJECT + ClassSize.REENTRANT_LOCK
+ ClassSize.CONCURRENT_SKIPLISTMAP
+ ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY + ClassSize.OBJECT
+ ScanInfo.FIXED_OVERHEAD);
@Override
public long heapSize() {
return DEEP_OVERHEAD + this.memstore.heapSize();
}
public KeyValue.KVComparator getComparator() {
return comparator;
}
@Override
public ScanInfo getScanInfo() {
return scanInfo;
}
/**
* Set scan info, used by test
* @param scanInfo new scan info to use for test
*/
void setScanInfo(ScanInfo scanInfo) {
this.scanInfo = scanInfo;
}
@Override
public boolean hasTooManyStoreFiles() {
return getStorefilesCount() > this.blockingFileCount;
}
}