Package org.elasticsearch.indices.recovery

Source Code of org.elasticsearch.indices.recovery.BlobRecoverySource$Actions

/*
* Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
* license agreements.  See the NOTICE file distributed with this work for
* additional information regarding copyright ownership.  Crate licenses
* this file to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.  You may
* obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
* License for the specific language governing permissions and limitations
* under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial agreement.
*/

package org.elasticsearch.indices.recovery;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import io.crate.blob.BlobTransferTarget;
import io.crate.blob.recovery.BlobRecoveryHandler;
import io.crate.blob.v2.BlobIndices;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.compress.CompressorFactory;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.shard.IllegalIndexShardStateException;
import org.elasticsearch.index.shard.IndexShardClosedException;
import org.elasticsearch.index.shard.IndexShardState;
import org.elasticsearch.index.shard.service.InternalIndexShard;
import org.elasticsearch.index.store.StoreFileMetaData;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.*;

import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicReference;

/**
* The source recovery accepts recovery requests from other peer shards and start the recovery process from this
* source shard to the target shard.
*/
public class BlobRecoverySource extends AbstractComponent {

    public static class Actions {
        public static final String START_RECOVERY = "index/shard/recovery/startRecovery";
    }

    private final TransportService transportService;
    private final IndicesService indicesService;
    private final RecoverySettings recoverySettings;

    private final ClusterService clusterService;

    private final TimeValue internalActionTimeout;
    private final TimeValue internalActionLongTimeout;
    private final BlobTransferTarget blobTransferTarget;
    private final BlobIndices blobIndices;


    @Inject
    public BlobRecoverySource(Settings settings, TransportService transportService, IndicesService indicesService,
                              RecoverySettings recoverySettings, ClusterService clusterService,
                              BlobTransferTarget blobTransferTarget, BlobIndices blobIndices) {
        super(settings);
        this.transportService = transportService;
        this.indicesService = indicesService;
        this.clusterService = clusterService;
        this.blobTransferTarget = blobTransferTarget;
        this.blobIndices = blobIndices;

        this.recoverySettings = recoverySettings;
        this.internalActionTimeout = componentSettings.getAsTime("internal_action_timeout", TimeValue.timeValueMinutes(15));
        this.internalActionLongTimeout = new TimeValue(internalActionTimeout.millis() * 2);
    }

    public void registerHandler(){
        transportService.registerHandler(Actions.START_RECOVERY, new StartRecoveryTransportRequestHandler());
    }

    private RecoveryResponse recover(final StartRecoveryRequest request) {
        final InternalIndexShard shard = (InternalIndexShard) indicesService.indexServiceSafe(request.shardId().index().name()).shardSafe(request.shardId().id());

        // verify that our (the source) shard state is marking the shard to be in recovery mode as well, otherwise
        // the index operations will not be routed to it properly
        RoutingNode node = clusterService.state().readOnlyRoutingNodes().node(request.targetNode().id());
        if (node == null) {
            throw new DelayRecoveryException("source node does not have the node [" + request.targetNode() + "] in its state yet..");
        }
        ShardRouting targetShardRouting = null;
        for (ShardRouting shardRouting : node) {
            if (shardRouting.shardId().equals(request.shardId())) {
                targetShardRouting = shardRouting;
                break;
            }
        }
        if (targetShardRouting == null) {
            throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node");
        }
        if (!targetShardRouting.initializing()) {
            throw new DelayRecoveryException("source node has the state of the target shard to be [" + targetShardRouting.state() + "], expecting to be [initializing]");
        }

        logger.trace("[{}][{}] starting recovery to {}, mark_as_relocated {}", request.shardId().index().name(), request.shardId().id(), request.targetNode(), request.markAsRelocated());
        final RecoveryResponse response = new RecoveryResponse();

        final BlobRecoveryHandler blobRecoveryHandler;

        if (BlobIndices.isBlobIndex(shard.shardId().getIndex())) {
            blobRecoveryHandler = new BlobRecoveryHandler(
                transportService, recoverySettings, blobTransferTarget, blobIndices, shard, request);
        } else {
            blobRecoveryHandler = null;
        }

        shard.recover(new Engine.RecoveryHandler() {
            @Override
            public void phase1(final SnapshotIndexCommit snapshot) throws ElasticsearchException {
                long totalSize = 0;
                long existingTotalSize = 0;
                try {
                    if (blobRecoveryHandler != null) {
                        blobRecoveryHandler.phase1();
                    }
                    StopWatch stopWatch = new StopWatch().start();

                    for (String name : snapshot.getFiles()) {
                        StoreFileMetaData md = shard.store().getMetadata().get(name);
                        boolean useExisting = false;
                        if (request.existingFiles().containsKey(name)) {
                            // we don't compute checksum for segments, so always recover them
                            if (!name.startsWith("segments") && md.isSame(request.existingFiles().get(name))) {
                                response.phase1ExistingFileNames.add(name);
                                response.phase1ExistingFileSizes.add(md.length());
                                existingTotalSize += md.length();
                                useExisting = true;
                                if (logger.isTraceEnabled()) {
                                    logger.trace("[{}][{}] recovery [phase1] to {}: not recovering [{}], exists in local store and has checksum [{}], size [{}]", request.shardId().index().name(), request.shardId().id(), request.targetNode(), name, md.checksum(), md.length());
                                }
                            }
                        }
                        if (!useExisting) {
                            if (request.existingFiles().containsKey(name)) {
                                logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", request.shardId().index().name(), request.shardId().id(), request.targetNode(), name, request.existingFiles().get(name), md);
                            } else {
                                logger.trace("[{}][{}] recovery [phase1] to {}: recovering [{}], does not exists in remote", request.shardId().index().name(), request.shardId().id(), request.targetNode(), name);
                            }
                            response.phase1FileNames.add(name);
                            response.phase1FileSizes.add(md.length());
                        }
                        totalSize += md.length();
                    }
                    response.phase1TotalSize = totalSize;
                    response.phase1ExistingTotalSize = existingTotalSize;

                    logger.trace("[{}][{}] recovery [phase1] to {}: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", request.shardId().index().name(), request.shardId().id(), request.targetNode(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), response.phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSize));

                    RecoveryFilesInfoRequest recoveryInfoFilesRequest = new RecoveryFilesInfoRequest(request.recoveryId(), request.shardId(), response.phase1FileNames, response.phase1FileSizes,
                            response.phase1ExistingFileNames, response.phase1ExistingFileSizes, response.phase1TotalSize, response.phase1ExistingTotalSize);
                    transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILES_INFO, recoveryInfoFilesRequest, TransportRequestOptions.options().withTimeout(internalActionTimeout), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();

                    final CountDownLatch latch = new CountDownLatch(response.phase1FileNames.size());
                    final AtomicReference<Exception> lastException = new AtomicReference<Exception>();
                    for (final String name : response.phase1FileNames) {
                        recoverySettings.concurrentStreamPool().execute(new Runnable() {
                            @Override
                            public void run() {
                                IndexInput indexInput = null;
                                try {
                                    final int BUFFER_SIZE = (int) recoverySettings.fileChunkSize().bytes();
                                    byte[] buf = new byte[BUFFER_SIZE];
                                    StoreFileMetaData md = shard.store().getMetadata().get(name);
                                    // TODO: maybe use IOContext.READONCE?
                                    indexInput = shard.store().directory().openInput(name, IOContext.READ);
                                    boolean shouldCompressRequest = recoverySettings.compress();
                                    if (CompressorFactory.isCompressed(indexInput)) {
                                        shouldCompressRequest = false;
                                    }

                                    long len = indexInput.length();
                                    long readCount = 0;
                                    while (readCount < len) {
                                        if (shard.state() == IndexShardState.CLOSED) { // check if the shard got closed on us
                                            throw new IndexShardClosedException(shard.shardId());
                                        }
                                        int toRead = readCount + BUFFER_SIZE > len ? (int) (len - readCount) : BUFFER_SIZE;
                                        long position = indexInput.getFilePointer();

                                        if (recoverySettings.rateLimiter() != null) {
                                            recoverySettings.rateLimiter().pause(toRead);
                                        }

                                        indexInput.readBytes(buf, 0, toRead, false);
                                        BytesArray content = new BytesArray(buf, 0, toRead);
                                        transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FILE_CHUNK, new RecoveryFileChunkRequest(request.recoveryId(), request.shardId(), md, position, content),
                                                TransportRequestOptions.options().withCompress(shouldCompressRequest).withType(TransportRequestOptions.Type.RECOVERY).withTimeout(internalActionTimeout), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                                        readCount += toRead;
                                    }
                                } catch (Exception e) {
                                    lastException.set(e);
                                } finally {
                                    if (indexInput != null) {
                                        try {
                                            indexInput.close();
                                        } catch (IOException e) {
                                            // ignore
                                        }
                                    }
                                    latch.countDown();
                                }
                            }
                        });
                    }

                    latch.await();

                    if (lastException.get() != null) {
                        throw lastException.get();
                    }

                    // now, set the clean files request
                    Set<String> snapshotFiles = Sets.newHashSet(snapshot.getFiles());
                    transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES, new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), snapshotFiles), TransportRequestOptions.options().withTimeout(internalActionTimeout), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();

                    stopWatch.stop();
                    logger.trace("[{}][{}] recovery [phase1] to {}: took [{}]", request.shardId().index().name(), request.shardId().id(), request.targetNode(), stopWatch.totalTime());
                    response.phase1Time = stopWatch.totalTime().millis();
                } catch (Throwable e) {
                    throw new RecoverFilesRecoveryException(request.shardId(), response.phase1FileNames.size(), new ByteSizeValue(totalSize), e);
                }
            }

            @Override
            public void phase2(Translog.Snapshot snapshot) throws ElasticsearchException {
                if (shard.state() == IndexShardState.CLOSED) {
                    throw new IndexShardClosedException(request.shardId());
                }
                logger.trace("[{}][{}] recovery [phase2] to {}: start", request.shardId().index().name(), request.shardId().id(), request.targetNode());
                StopWatch stopWatch = new StopWatch().start();
                transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.PREPARE_TRANSLOG, new RecoveryPrepareForTranslogOperationsRequest(request.recoveryId(), request.shardId()), TransportRequestOptions.options().withTimeout(internalActionTimeout), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                stopWatch.stop();
                response.startTime = stopWatch.totalTime().millis();
                logger.trace("[{}][{}] recovery [phase2] to {}: start took [{}]", request.shardId().index().name(), request.shardId().id(), request.targetNode(), stopWatch.totalTime());

                logger.trace("[{}][{}] recovery [phase2] to {}: sending transaction log operations", request.shardId().index().name(), request.shardId().id(), request.targetNode());
                stopWatch = new StopWatch().start();
                int totalOperations = sendSnapshot(snapshot);
                stopWatch.stop();
                logger.trace("[{}][{}] recovery [phase2] to {}: took [{}]", request.shardId().index().name(), request.shardId().id(), request.targetNode(), stopWatch.totalTime());
                response.phase2Time = stopWatch.totalTime().millis();
                response.phase2Operations = totalOperations;
            }

            @Override
            public void phase3(Translog.Snapshot snapshot) throws ElasticsearchException {
                if (shard.state() == IndexShardState.CLOSED) {
                    throw new IndexShardClosedException(request.shardId());
                }
                logger.trace("[{}][{}] recovery [phase3] to {}: sending transaction log operations", request.shardId().index().name(), request.shardId().id(), request.targetNode());
                StopWatch stopWatch = new StopWatch().start();
                int totalOperations = sendSnapshot(snapshot);
                transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.FINALIZE, new RecoveryFinalizeRecoveryRequest(request.recoveryId(), request.shardId()), TransportRequestOptions.options().withTimeout(internalActionLongTimeout), EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
                if (request.markAsRelocated()) {
                    // TODO what happens if the recovery process fails afterwards, we need to mark this back to started
                    try {
                        shard.relocated("to " + request.targetNode());
                    } catch (IllegalIndexShardStateException e) {
                        // we can ignore this exception since, on the other node, when it moved to phase3
                        // it will also send shard started, which might cause the index shard we work against
                        // to move be closed by the time we get to the the relocated method
                    }
                }
                stopWatch.stop();
                logger.trace("[{}][{}] recovery [phase3] to {}: took [{}]", request.shardId().index().name(), request.shardId().id(), request.targetNode(), stopWatch.totalTime());
                response.phase3Time = stopWatch.totalTime().millis();
                response.phase3Operations = totalOperations;
            }

            private int sendSnapshot(Translog.Snapshot snapshot) throws ElasticsearchException {
                int ops = 0;
                long size = 0;
                int totalOperations = 0;
                List<Translog.Operation> operations = Lists.newArrayList();
                while (snapshot.hasNext()) {
                    if (shard.state() == IndexShardState.CLOSED) {
                        throw new IndexShardClosedException(request.shardId());
                    }
                    Translog.Operation operation = snapshot.next();
                    operations.add(operation);
                    ops += 1;
                    size += operation.estimateSize();
                    totalOperations++;
                    if (ops >= recoverySettings.translogOps() || size >= recoverySettings.translogSize().bytes()) {

                        if (recoverySettings.rateLimiter() != null) {
                            recoverySettings.rateLimiter().pause(size);
                        }

                        RecoveryTranslogOperationsRequest translogOperationsRequest = new RecoveryTranslogOperationsRequest(request.recoveryId(), request.shardId(), operations);
                        transportService.submitRequest(request.targetNode(),
                            RecoveryTarget.Actions.TRANSLOG_OPS,
                            translogOperationsRequest,
                            TransportRequestOptions.options().withCompress(recoverySettings.compress()).withType(TransportRequestOptions.Type.RECOVERY).withTimeout(internalActionLongTimeout),
                            EmptyTransportResponseHandler.INSTANCE_SAME
                        ).txGet();
                        ops = 0;
                        size = 0;
                        operations.clear();
                    }
                }
                // send the leftover
                if (!operations.isEmpty()) {
                    RecoveryTranslogOperationsRequest translogOperationsRequest = new RecoveryTranslogOperationsRequest(request.recoveryId(), request.shardId(), operations);
                    transportService.submitRequest(request.targetNode(),
                        RecoveryTarget.Actions.TRANSLOG_OPS,
                        translogOperationsRequest,
                        TransportRequestOptions.options().withCompress(recoverySettings.compress()).withType(TransportRequestOptions.Type.RECOVERY).withTimeout(internalActionLongTimeout),
                        EmptyTransportResponseHandler.INSTANCE_SAME
                    ).txGet();
                }
                return totalOperations;
            }
        });
        return response;
    }

    class StartRecoveryTransportRequestHandler extends BaseTransportRequestHandler<StartRecoveryRequest> {

        @Override
        public StartRecoveryRequest newInstance() {
            return new StartRecoveryRequest();
        }

        @Override
        public String executor() {
            return ThreadPool.Names.GENERIC;
        }

        @Override
        public void messageReceived(final StartRecoveryRequest request, final TransportChannel channel) throws Exception {
            RecoveryResponse response = recover(request);
            channel.sendResponse(response);
        }
    }

}
TOP

Related Classes of org.elasticsearch.indices.recovery.BlobRecoverySource$Actions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.