/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.gateway.blobstore;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
import org.elasticsearch.common.blobstore.ImmutableBlobContainer;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Iterables;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.io.FastByteArrayInputStream;
import org.elasticsearch.common.io.FastByteArrayOutputStream;
import org.elasticsearch.common.io.stream.BytesStreamInput;
import org.elasticsearch.common.lucene.store.InputStreamIndexInput;
import org.elasticsearch.common.lucene.store.ThreadSafeInputStreamIndexInput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
import org.elasticsearch.index.gateway.CommitPoint;
import org.elasticsearch.index.gateway.CommitPoints;
import org.elasticsearch.index.gateway.IndexGateway;
import org.elasticsearch.index.gateway.IndexShardGateway;
import org.elasticsearch.index.gateway.IndexShardGatewayRecoveryException;
import org.elasticsearch.index.gateway.IndexShardGatewaySnapshotFailedException;
import org.elasticsearch.index.gateway.RecoveryStatus;
import org.elasticsearch.index.gateway.SnapshotStatus;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.service.IndexShard;
import org.elasticsearch.index.shard.service.InternalIndexShard;
import org.elasticsearch.index.store.Store;
import org.elasticsearch.index.store.StoreFileMetaData;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.index.translog.TranslogStreams;
import org.elasticsearch.threadpool.ThreadPool;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
/**
* @author kimchy (shay.banon)
*/
public abstract class BlobStoreIndexShardGateway extends AbstractIndexShardComponent implements IndexShardGateway {
protected final ThreadPool threadPool;
protected final InternalIndexShard indexShard;
protected final Store store;
protected final ByteSizeValue chunkSize;
protected final BlobStore blobStore;
protected final BlobPath shardPath;
protected final ImmutableBlobContainer blobContainer;
private volatile RecoveryStatus recoveryStatus;
private volatile SnapshotStatus lastSnapshotStatus;
private volatile SnapshotStatus currentSnapshotStatus;
protected BlobStoreIndexShardGateway(ShardId shardId, @IndexSettings Settings indexSettings, ThreadPool threadPool, IndexGateway indexGateway,
IndexShard indexShard, Store store) {
super(shardId, indexSettings);
this.threadPool = threadPool;
this.indexShard = (InternalIndexShard) indexShard;
this.store = store;
BlobStoreIndexGateway blobStoreIndexGateway = (BlobStoreIndexGateway) indexGateway;
this.chunkSize = blobStoreIndexGateway.chunkSize(); // can be null -> no chunking
this.blobStore = blobStoreIndexGateway.blobStore();
this.shardPath = blobStoreIndexGateway.shardPath(shardId.id());
this.blobContainer = blobStore.immutableBlobContainer(shardPath);
this.recoveryStatus = new RecoveryStatus();
}
@Override public RecoveryStatus recoveryStatus() {
return this.recoveryStatus;
}
@Override public String toString() {
return type() + "://" + blobStore + "/" + shardPath;
}
@Override public boolean requiresSnapshot() {
return true;
}
@Override public boolean requiresSnapshotScheduling() {
return true;
}
@Override public SnapshotLock obtainSnapshotLock() throws Exception {
return NO_SNAPSHOT_LOCK;
}
@Override public void close(boolean delete) throws ElasticSearchException {
if (delete) {
blobStore.delete(shardPath);
}
}
@Override public SnapshotStatus lastSnapshotStatus() {
return this.lastSnapshotStatus;
}
@Override public SnapshotStatus currentSnapshotStatus() {
SnapshotStatus snapshotStatus = this.currentSnapshotStatus;
if (snapshotStatus == null) {
return snapshotStatus;
}
if (snapshotStatus.stage() != SnapshotStatus.Stage.DONE || snapshotStatus.stage() != SnapshotStatus.Stage.FAILURE) {
snapshotStatus.time(System.currentTimeMillis() - snapshotStatus.startTime());
}
return snapshotStatus;
}
@Override public SnapshotStatus snapshot(final Snapshot snapshot) throws IndexShardGatewaySnapshotFailedException {
currentSnapshotStatus = new SnapshotStatus();
currentSnapshotStatus.startTime(System.currentTimeMillis());
try {
doSnapshot(snapshot);
currentSnapshotStatus.time(System.currentTimeMillis() - currentSnapshotStatus.startTime());
currentSnapshotStatus.updateStage(SnapshotStatus.Stage.DONE);
} catch (Exception e) {
currentSnapshotStatus.time(System.currentTimeMillis() - currentSnapshotStatus.startTime());
currentSnapshotStatus.updateStage(SnapshotStatus.Stage.FAILURE);
currentSnapshotStatus.failed(e);
if (e instanceof IndexShardGatewaySnapshotFailedException) {
throw (IndexShardGatewaySnapshotFailedException) e;
} else {
throw new IndexShardGatewaySnapshotFailedException(shardId, e.getMessage(), e);
}
} finally {
this.lastSnapshotStatus = currentSnapshotStatus;
this.currentSnapshotStatus = null;
}
return this.lastSnapshotStatus;
}
private void doSnapshot(final Snapshot snapshot) throws IndexShardGatewaySnapshotFailedException {
ImmutableMap<String, BlobMetaData> blobs;
try {
blobs = blobContainer.listBlobs();
} catch (IOException e) {
throw new IndexShardGatewaySnapshotFailedException(shardId, "failed to list blobs", e);
}
long generation = findLatestFileNameGeneration(blobs);
CommitPoints commitPoints = buildCommitPoints(blobs);
currentSnapshotStatus.index().startTime(System.currentTimeMillis());
currentSnapshotStatus.updateStage(SnapshotStatus.Stage.INDEX);
final SnapshotIndexCommit snapshotIndexCommit = snapshot.indexCommit();
final Translog.Snapshot translogSnapshot = snapshot.translogSnapshot();
final CountDownLatch indexLatch = new CountDownLatch(snapshotIndexCommit.getFiles().length);
final CopyOnWriteArrayList<Throwable> failures = new CopyOnWriteArrayList<Throwable>();
final List<CommitPoint.FileInfo> indexCommitPointFiles = Lists.newArrayList();
int indexNumberOfFiles = 0;
long indexTotalFilesSize = 0;
for (final String fileName : snapshotIndexCommit.getFiles()) {
StoreFileMetaData md;
try {
md = store.metaData(fileName);
} catch (IOException e) {
throw new IndexShardGatewaySnapshotFailedException(shardId, "Failed to get store file metadata", e);
}
boolean snapshotRequired = false;
if (snapshot.indexChanged() && fileName.equals(snapshotIndexCommit.getSegmentsFileName())) {
snapshotRequired = true; // we want to always snapshot the segment file if the index changed
}
CommitPoint.FileInfo fileInfo = commitPoints.findPhysicalIndexFile(fileName);
if (fileInfo == null || !fileInfo.isSame(md) || !commitPointFileExistsInBlobs(fileInfo, blobs)) {
// commit point file does not exists in any commit point, or has different length, or does not fully exists in the listed blobs
snapshotRequired = true;
}
if (snapshotRequired) {
indexNumberOfFiles++;
indexTotalFilesSize += md.length();
// create a new FileInfo
try {
CommitPoint.FileInfo snapshotFileInfo = new CommitPoint.FileInfo(fileNameFromGeneration(++generation), fileName, md.length(), md.checksum());
indexCommitPointFiles.add(snapshotFileInfo);
snapshotFile(snapshotIndexCommit.getDirectory(), snapshotFileInfo, indexLatch, failures);
} catch (IOException e) {
failures.add(e);
indexLatch.countDown();
}
} else {
indexCommitPointFiles.add(fileInfo);
indexLatch.countDown();
}
}
currentSnapshotStatus.index().files(indexNumberOfFiles, indexTotalFilesSize);
try {
indexLatch.await();
} catch (InterruptedException e) {
failures.add(e);
}
if (!failures.isEmpty()) {
throw new IndexShardGatewaySnapshotFailedException(shardId(), "Failed to perform snapshot (index files)", failures.get(failures.size() - 1));
}
currentSnapshotStatus.index().time(System.currentTimeMillis() - currentSnapshotStatus.index().startTime());
currentSnapshotStatus.updateStage(SnapshotStatus.Stage.TRANSLOG);
currentSnapshotStatus.translog().startTime(System.currentTimeMillis());
// Note, we assume the snapshot is always started from "base 0". We need to seek forward if we want to lastTranslogPosition if we want the delta
List<CommitPoint.FileInfo> translogCommitPointFiles = Lists.newArrayList();
int expectedNumberOfOperations = 0;
boolean snapshotRequired = false;
if (snapshot.newTranslogCreated()) {
if (translogSnapshot.lengthInBytes() > 0) {
snapshotRequired = true;
expectedNumberOfOperations = translogSnapshot.estimatedTotalOperations();
}
} else {
// if we have a commit point, check that we have all the files listed in it in the blob store
if (!commitPoints.commits().isEmpty()) {
CommitPoint commitPoint = commitPoints.commits().get(0);
boolean allTranslogFilesExists = true;
for (CommitPoint.FileInfo fileInfo : commitPoint.translogFiles()) {
if (!commitPointFileExistsInBlobs(fileInfo, blobs)) {
allTranslogFilesExists = false;
break;
}
}
// if everything exists, we can seek forward in case there are new operations, otherwise, we copy over all again...
if (allTranslogFilesExists) {
translogCommitPointFiles.addAll(commitPoint.translogFiles());
if (snapshot.sameTranslogNewOperations()) {
translogSnapshot.seekForward(snapshot.lastTranslogLength());
if (translogSnapshot.lengthInBytes() > 0) {
snapshotRequired = true;
expectedNumberOfOperations = translogSnapshot.estimatedTotalOperations() - snapshot.lastTotalTranslogOperations();
}
} // else (no operations, nothing to snapshot)
} else {
// a full translog snapshot is required
if (translogSnapshot.lengthInBytes() > 0) {
expectedNumberOfOperations = translogSnapshot.estimatedTotalOperations();
snapshotRequired = true;
}
}
} else {
// no commit point, snapshot all the translog
if (translogSnapshot.lengthInBytes() > 0) {
expectedNumberOfOperations = translogSnapshot.estimatedTotalOperations();
snapshotRequired = true;
}
}
}
currentSnapshotStatus.translog().expectedNumberOfOperations(expectedNumberOfOperations);
if (snapshotRequired) {
CommitPoint.FileInfo addedTranslogFileInfo = new CommitPoint.FileInfo(fileNameFromGeneration(++generation), "translog-" + translogSnapshot.translogId(), translogSnapshot.lengthInBytes(), null /* no need for checksum in translog */);
translogCommitPointFiles.add(addedTranslogFileInfo);
try {
snapshotTranslog(translogSnapshot, addedTranslogFileInfo);
} catch (Exception e) {
throw new IndexShardGatewaySnapshotFailedException(shardId, "Failed to snapshot translog", e);
}
}
currentSnapshotStatus.translog().time(System.currentTimeMillis() - currentSnapshotStatus.translog().startTime());
// now create and write the commit point
currentSnapshotStatus.updateStage(SnapshotStatus.Stage.FINALIZE);
long version = 0;
if (!commitPoints.commits().isEmpty()) {
version = commitPoints.commits().iterator().next().version() + 1;
}
String commitPointName = "commit-" + Long.toString(version, Character.MAX_RADIX);
CommitPoint commitPoint = new CommitPoint(version, commitPointName, CommitPoint.Type.GENERATED, indexCommitPointFiles, translogCommitPointFiles);
try {
byte[] commitPointData = CommitPoints.toXContent(commitPoint);
blobContainer.writeBlob(commitPointName, new FastByteArrayInputStream(commitPointData), commitPointData.length);
} catch (Exception e) {
throw new IndexShardGatewaySnapshotFailedException(shardId, "Failed to write commit point", e);
}
// delete all files that are not referenced by any commit point
// build a new CommitPoint, that includes this one and all the saved ones
List<CommitPoint> newCommitPointsList = Lists.newArrayList();
newCommitPointsList.add(commitPoint);
for (CommitPoint point : commitPoints) {
if (point.type() == CommitPoint.Type.SAVED) {
newCommitPointsList.add(point);
}
}
CommitPoints newCommitPoints = new CommitPoints(newCommitPointsList);
// first, go over and delete all the commit points
for (String blobName : blobs.keySet()) {
if (!blobName.startsWith("commit-")) {
continue;
}
long checkedVersion = Long.parseLong(blobName.substring("commit-".length()), Character.MAX_RADIX);
if (!newCommitPoints.hasVersion(checkedVersion)) {
try {
blobContainer.deleteBlob(blobName);
} catch (IOException e) {
// ignore
}
}
}
// now go over all the blobs, and if they don't exists in a commit point, delete them
for (String blobName : blobs.keySet()) {
String name = blobName;
if (!name.startsWith("__")) {
continue;
}
if (blobName.contains(".part")) {
name = blobName.substring(0, blobName.indexOf(".part"));
}
if (newCommitPoints.findNameFile(name) == null) {
try {
blobContainer.deleteBlob(blobName);
} catch (IOException e) {
// ignore, will delete it laters
}
}
}
}
@Override public void recover(boolean indexShouldExists, RecoveryStatus recoveryStatus) throws IndexShardGatewayRecoveryException {
this.recoveryStatus = recoveryStatus;
final ImmutableMap<String, BlobMetaData> blobs;
try {
blobs = blobContainer.listBlobs();
} catch (IOException e) {
throw new IndexShardGatewayRecoveryException(shardId, "Failed to list content of gateway", e);
}
List<CommitPoint> commitPointsList = Lists.newArrayList();
boolean atLeastOneCommitPointExists = false;
for (String name : blobs.keySet()) {
if (name.startsWith("commit-")) {
atLeastOneCommitPointExists = true;
try {
commitPointsList.add(CommitPoints.fromXContent(blobContainer.readBlobFully(name)));
} catch (Exception e) {
logger.warn("failed to read commit point [{}]", e, name);
}
}
}
if (atLeastOneCommitPointExists && commitPointsList.isEmpty()) {
// no commit point managed to load, bail so we won't corrupt the index, will require manual intervention
throw new IndexShardGatewayRecoveryException(shardId, "Commit points exists but none could be loaded", null);
}
CommitPoints commitPoints = new CommitPoints(commitPointsList);
if (commitPoints.commits().isEmpty()) {
// no commit points, clean the store just so we won't recover wrong files
try {
indexShard.store().deleteContent();
} catch (IOException e) {
logger.warn("failed to clean store before starting shard", e);
}
recoveryStatus.index().startTime(System.currentTimeMillis());
recoveryStatus.index().time(System.currentTimeMillis() - recoveryStatus.index().startTime());
recoveryStatus.translog().startTime(System.currentTimeMillis());
recoveryStatus.translog().time(System.currentTimeMillis() - recoveryStatus.index().startTime());
return;
}
for (CommitPoint commitPoint : commitPoints) {
if (!commitPointExistsInBlobs(commitPoint, blobs)) {
logger.warn("listed commit_point [{}]/[{}], but not all files exists, ignoring", commitPoint.name(), commitPoint.version());
continue;
}
try {
recoveryStatus.index().startTime(System.currentTimeMillis());
recoveryStatus.updateStage(RecoveryStatus.Stage.INDEX);
recoverIndex(commitPoint, blobs);
recoveryStatus.index().time(System.currentTimeMillis() - recoveryStatus.index().startTime());
recoveryStatus.translog().startTime(System.currentTimeMillis());
recoveryStatus.updateStage(RecoveryStatus.Stage.TRANSLOG);
recoverTranslog(commitPoint, blobs);
recoveryStatus.translog().time(System.currentTimeMillis() - recoveryStatus.index().startTime());
return;
} catch (Exception e) {
throw new IndexShardGatewayRecoveryException(shardId, "failed to recover commit_point [" + commitPoint.name() + "]/[" + commitPoint.version() + "]", e);
}
}
throw new IndexShardGatewayRecoveryException(shardId, "No commit point data is available in gateway", null);
}
private void recoverTranslog(CommitPoint commitPoint, ImmutableMap<String, BlobMetaData> blobs) throws IndexShardGatewayRecoveryException {
if (commitPoint.translogFiles().isEmpty()) {
// no translog files, bail
indexShard.start("post recovery from gateway, no translog");
return;
}
try {
indexShard.performRecoveryPrepareForTranslog();
final AtomicReference<Throwable> failure = new AtomicReference<Throwable>();
final CountDownLatch latch = new CountDownLatch(1);
final Iterator<CommitPoint.FileInfo> transIt = commitPoint.translogFiles().iterator();
blobContainer.readBlob(transIt.next().name(), new BlobContainer.ReadBlobListener() {
FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
boolean ignore = false;
@Override public synchronized void onPartial(byte[] data, int offset, int size) throws IOException {
if (ignore) {
return;
}
bos.write(data, offset, size);
// if we don't have enough to read the header size of the first translog, bail and wait for the next one
if (bos.size() < 4) {
return;
}
BytesStreamInput si = new BytesStreamInput(bos.unsafeByteArray(), 0, bos.size());
int position;
while (true) {
try {
position = si.position();
if (position + 4 > bos.size()) {
break;
}
int opSize = si.readInt();
int curPos = si.position();
if ((si.position() + opSize) > bos.size()) {
break;
}
Translog.Operation operation = TranslogStreams.readTranslogOperation(si);
if ((si.position() - curPos) != opSize) {
logger.warn("mismatch in size, expected [{}], got [{}]", opSize, si.position() - curPos);
}
recoveryStatus.translog().addTranslogOperations(1);
indexShard.performRecoveryOperation(operation);
if (si.position() >= bos.size()) {
position = si.position();
break;
}
} catch (Exception e) {
logger.warn("failed to retrieve translog after [{}] operations, ignoring the rest, considered corrupted", e, recoveryStatus.translog().currentTranslogOperations());
ignore = true;
latch.countDown();
return;
}
}
FastByteArrayOutputStream newBos = new FastByteArrayOutputStream();
int leftOver = bos.size() - position;
if (leftOver > 0) {
newBos.write(bos.unsafeByteArray(), position, leftOver);
}
bos = newBos;
}
@Override public synchronized void onCompleted() {
if (ignore) {
return;
}
if (!transIt.hasNext()) {
latch.countDown();
return;
}
blobContainer.readBlob(transIt.next().name(), this);
}
@Override public void onFailure(Throwable t) {
failure.set(t);
latch.countDown();
}
});
latch.await();
if (failure.get() != null) {
throw failure.get();
}
indexShard.performRecoveryFinalization(true);
} catch (Throwable e) {
throw new IndexShardGatewayRecoveryException(shardId, "Failed to recover translog", e);
}
}
private void recoverIndex(CommitPoint commitPoint, ImmutableMap<String, BlobMetaData> blobs) throws Exception {
int numberOfFiles = 0;
long totalSize = 0;
int numberOfReusedFiles = 0;
long reusedTotalSize = 0;
List<CommitPoint.FileInfo> filesToRecover = Lists.newArrayList();
for (CommitPoint.FileInfo fileInfo : commitPoint.indexFiles()) {
String fileName = fileInfo.physicalName();
StoreFileMetaData md = null;
try {
md = store.metaData(fileName);
} catch (Exception e) {
// no file
}
// we don't compute checksum for segments, so always recover them
if (!fileName.startsWith("segments") && md != null && fileInfo.isSame(md)) {
numberOfFiles++;
totalSize += md.length();
numberOfReusedFiles++;
reusedTotalSize += md.length();
if (logger.isTraceEnabled()) {
logger.trace("not_recovering [{}], exists in local store and is same", fileInfo.physicalName());
}
} else {
if (logger.isTraceEnabled()) {
if (md == null) {
logger.trace("recovering [{}], does not exists in local store", fileInfo.physicalName());
} else {
logger.trace("recovering [{}], exists in local store but is different", fileInfo.physicalName());
}
}
numberOfFiles++;
totalSize += fileInfo.length();
filesToRecover.add(fileInfo);
}
}
recoveryStatus.index().files(numberOfFiles, totalSize, numberOfReusedFiles, reusedTotalSize);
if (filesToRecover.isEmpty()) {
logger.trace("no files to recover, all exists within the local store");
}
if (logger.isTraceEnabled()) {
logger.trace("recovering_files [{}] with total_size [{}], reusing_files [{}] with reused_size [{}]", numberOfFiles, new ByteSizeValue(totalSize), numberOfReusedFiles, new ByteSizeValue(reusedTotalSize));
}
final CountDownLatch latch = new CountDownLatch(filesToRecover.size());
final CopyOnWriteArrayList<Throwable> failures = new CopyOnWriteArrayList<Throwable>();
for (final CommitPoint.FileInfo fileToRecover : filesToRecover) {
recoverFile(fileToRecover, blobs, latch, failures);
}
try {
latch.await();
} catch (InterruptedException e) {
throw new IndexShardGatewayRecoveryException(shardId, "Interrupted while recovering index", e);
}
if (!failures.isEmpty()) {
throw new IndexShardGatewayRecoveryException(shardId, "Failed to recover index", failures.get(0));
}
// read the gateway data persisted
long version = -1;
try {
if (IndexReader.indexExists(store.directory())) {
version = IndexReader.getCurrentVersion(store.directory());
}
} catch (IOException e) {
throw new IndexShardGatewayRecoveryException(shardId(), "Failed to fetch index version after copying it over", e);
}
recoveryStatus.index().updateVersion(version);
/// now, go over and clean files that are in the store, but were not in the gateway
try {
for (String storeFile : store.directory().listAll()) {
if (!commitPoint.containPhysicalIndexFile(storeFile)) {
try {
store.directory().deleteFile(storeFile);
} catch (Exception e) {
// ignore
}
}
}
} catch (Exception e) {
// ignore
}
}
private void recoverFile(final CommitPoint.FileInfo fileInfo, final ImmutableMap<String, BlobMetaData> blobs, final CountDownLatch latch, final List<Throwable> failures) {
final IndexOutput indexOutput;
try {
// we create an output with no checksum, this is because the pure binary data of the file is not
// the checksum (because of seek). We will create the checksum file once copying is done
indexOutput = store.createOutputWithNoChecksum(fileInfo.physicalName());
} catch (IOException e) {
failures.add(e);
latch.countDown();
return;
}
String firstFileToRecover = fileInfo.name();
if (!blobs.containsKey(fileInfo.name())) {
// chunking, append part0 to it
firstFileToRecover = fileInfo.name() + ".part0";
}
if (!blobs.containsKey(firstFileToRecover)) {
// no file, what to do, what to do?
logger.warn("no file [{}]/[{}] to recover, ignoring it", fileInfo.name(), fileInfo.physicalName());
latch.countDown();
return;
}
final AtomicInteger partIndex = new AtomicInteger();
blobContainer.readBlob(firstFileToRecover, new BlobContainer.ReadBlobListener() {
@Override public synchronized void onPartial(byte[] data, int offset, int size) throws IOException {
recoveryStatus.index().addCurrentFilesSize(size);
indexOutput.writeBytes(data, offset, size);
}
@Override public synchronized void onCompleted() {
int part = partIndex.incrementAndGet();
String partName = fileInfo.name() + ".part" + part;
if (blobs.containsKey(partName)) {
// continue with the new part
blobContainer.readBlob(partName, this);
return;
} else {
// we are done...
try {
indexOutput.close();
// write the checksum
if (fileInfo.checksum() != null) {
store.writeChecksum(fileInfo.physicalName(), fileInfo.checksum());
}
store.directory().sync(Collections.singleton(fileInfo.physicalName()));
} catch (IOException e) {
onFailure(e);
return;
}
}
latch.countDown();
}
@Override public void onFailure(Throwable t) {
failures.add(t);
latch.countDown();
}
});
}
private void snapshotTranslog(Translog.Snapshot snapshot, CommitPoint.FileInfo fileInfo) throws IOException {
blobContainer.writeBlob(fileInfo.name(), snapshot.stream(), snapshot.lengthInBytes());
//
// long chunkBytes = Long.MAX_VALUE;
// if (chunkSize != null) {
// chunkBytes = chunkSize.bytes();
// }
//
// long totalLength = fileInfo.length();
// long numberOfChunks = totalLength / chunkBytes;
// if (totalLength % chunkBytes > 0) {
// numberOfChunks++;
// }
// if (numberOfChunks == 0) {
// numberOfChunks++;
// }
//
// if (numberOfChunks == 1) {
// blobContainer.writeBlob(fileInfo.name(), snapshot.stream(), snapshot.lengthInBytes());
// } else {
// InputStream translogStream = snapshot.stream();
// long totalLengthLeftToWrite = totalLength;
// for (int i = 0; i < numberOfChunks; i++) {
// long lengthToWrite = chunkBytes;
// if (totalLengthLeftToWrite < chunkBytes) {
// lengthToWrite = totalLengthLeftToWrite;
// }
// blobContainer.writeBlob(fileInfo.name() + ".part" + i, new LimitInputStream(translogStream, lengthToWrite), lengthToWrite);
// totalLengthLeftToWrite -= lengthToWrite;
// }
// }
}
private void snapshotFile(Directory dir, final CommitPoint.FileInfo fileInfo, final CountDownLatch latch, final List<Throwable> failures) throws IOException {
long chunkBytes = Long.MAX_VALUE;
if (chunkSize != null) {
chunkBytes = chunkSize.bytes();
}
long totalLength = fileInfo.length();
long numberOfChunks = totalLength / chunkBytes;
if (totalLength % chunkBytes > 0) {
numberOfChunks++;
}
if (numberOfChunks == 0) {
numberOfChunks++;
}
final long fNumberOfChunks = numberOfChunks;
final AtomicLong counter = new AtomicLong(numberOfChunks);
for (long i = 0; i < fNumberOfChunks; i++) {
final long partNumber = i;
IndexInput indexInput = null;
try {
indexInput = dir.openInput(fileInfo.physicalName());
indexInput.seek(partNumber * chunkBytes);
InputStreamIndexInput is = new ThreadSafeInputStreamIndexInput(indexInput, chunkBytes);
String blobName = fileInfo.name();
if (fNumberOfChunks > 1) {
// if we do chunks, then all of them are in the form of "[xxx].part[N]".
blobName += ".part" + partNumber;
}
final IndexInput fIndexInput = indexInput;
blobContainer.writeBlob(blobName, is, is.actualSizeToRead(), new ImmutableBlobContainer.WriterListener() {
@Override public void onCompleted() {
try {
fIndexInput.close();
} catch (IOException e) {
// ignore
}
if (counter.decrementAndGet() == 0) {
latch.countDown();
}
}
@Override public void onFailure(Throwable t) {
try {
fIndexInput.close();
} catch (IOException e) {
// ignore
}
failures.add(t);
if (counter.decrementAndGet() == 0) {
latch.countDown();
}
}
});
} catch (Exception e) {
if (indexInput != null) {
try {
indexInput.close();
} catch (IOException e1) {
// ignore
}
}
failures.add(e);
latch.countDown();
}
}
}
private boolean commitPointExistsInBlobs(CommitPoint commitPoint, ImmutableMap<String, BlobMetaData> blobs) {
for (CommitPoint.FileInfo fileInfo : Iterables.concat(commitPoint.indexFiles(), commitPoint.translogFiles())) {
if (!commitPointFileExistsInBlobs(fileInfo, blobs)) {
return false;
}
}
return true;
}
private boolean commitPointFileExistsInBlobs(CommitPoint.FileInfo fileInfo, ImmutableMap<String, BlobMetaData> blobs) {
BlobMetaData blobMetaData = blobs.get(fileInfo.name());
if (blobMetaData != null) {
if (blobMetaData.length() != fileInfo.length()) {
return false;
}
} else if (blobs.containsKey(fileInfo.name() + ".part0")) {
// multi part file sum up the size and check
int part = 0;
long totalSize = 0;
while (true) {
blobMetaData = blobs.get(fileInfo.name() + ".part" + part++);
if (blobMetaData == null) {
break;
}
totalSize += blobMetaData.length();
}
if (totalSize != fileInfo.length()) {
return false;
}
} else {
// no file, not exact and not multipart
return false;
}
return true;
}
private CommitPoints buildCommitPoints(ImmutableMap<String, BlobMetaData> blobs) {
List<CommitPoint> commitPoints = Lists.newArrayList();
for (String name : blobs.keySet()) {
if (name.startsWith("commit-")) {
try {
commitPoints.add(CommitPoints.fromXContent(blobContainer.readBlobFully(name)));
} catch (Exception e) {
logger.warn("failed to read commit point [{}]", e, name);
}
}
}
return new CommitPoints(commitPoints);
}
private String fileNameFromGeneration(long generation) {
return "__" + Long.toString(generation, Character.MAX_RADIX);
}
private long findLatestFileNameGeneration(ImmutableMap<String, BlobMetaData> blobs) {
long generation = -1;
for (String name : blobs.keySet()) {
if (!name.startsWith("__")) {
continue;
}
if (name.contains(".part")) {
name = name.substring(0, name.indexOf(".part"));
}
try {
long currentGen = Long.parseLong(name.substring(2) /*__*/, Character.MAX_RADIX);
if (currentGen > generation) {
generation = currentGen;
}
} catch (NumberFormatException e) {
logger.warn("file [{}] does not conform to the '__' schema");
}
}
return generation;
}
}