Package org.apache.lucene.replicator

Source Code of org.apache.lucene.replicator.IndexReplicationHandler

package org.apache.lucene.replicator;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.regex.Matcher;

import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexNotFoundException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.replicator.ReplicationClient.ReplicationHandler;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.InfoStream;

/**
* A {@link ReplicationHandler} for replication of an index. Implements
* {@link #revisionReady} by copying the files pointed by the client resolver to
* the index {@link Directory} and then touches the index with
* {@link IndexWriter} to make sure any unused files are deleted.
* <p>
* <b>NOTE:</b> this handler assumes that {@link IndexWriter} is not opened by
* another process on the index directory. In fact, opening an
* {@link IndexWriter} on the same directory to which files are copied can lead
* to undefined behavior, where some or all the files will be deleted, override
* other files or simply create a mess. When you replicate an index, it is best
* if the index is never modified by {@link IndexWriter}, except the one that is
* open on the source index, from which you replicate.
* <p>
* This handler notifies the application via a provided {@link Callable} when an
* updated index commit was made available for it.
*
* @lucene.experimental
*/
public class IndexReplicationHandler implements ReplicationHandler {
 
  /**
   * The component used to log messages to the {@link InfoStream#getDefault()
   * default} {@link InfoStream}.
   */
  public static final String INFO_STREAM_COMPONENT = "IndexReplicationHandler";
 
  private final Directory indexDir;
  private final Callable<Boolean> callback;
 
  private volatile Map<String,List<RevisionFile>> currentRevisionFiles;
  private volatile String currentVersion;
  private volatile InfoStream infoStream = InfoStream.getDefault();
 
  /**
   * Returns the last {@link IndexCommit} found in the {@link Directory}, or
   * {@code null} if there are no commits.
   */
  public static IndexCommit getLastCommit(Directory dir) throws IOException {
    try {
      if (DirectoryReader.indexExists(dir)) {
        List<IndexCommit> commits = DirectoryReader.listCommits(dir);
        // listCommits guarantees that we get at least one commit back, or
        // IndexNotFoundException which we handle below
        return commits.get(commits.size() - 1);
      }
    } catch (IndexNotFoundException e) {
      // ignore the exception and return null
    }
    return null;
  }
 
  /**
   * Verifies that the last file is segments_N and fails otherwise. It also
   * removes and returns the file from the list, because it needs to be handled
   * last, after all files. This is important in order to guarantee that if a
   * reader sees the new segments_N, all other segment files are already on
   * stable storage.
   * <p>
   * The reason why the code fails instead of putting segments_N file last is
   * that this indicates an error in the Revision implementation.
   */
  public static String getSegmentsFile(List<String> files, boolean allowEmpty) {
    if (files.isEmpty()) {
      if (allowEmpty) {
        return null;
      } else {
        throw new IllegalStateException("empty list of files not allowed");
      }
    }
   
    String segmentsFile = files.remove(files.size() - 1);
    if (!segmentsFile.startsWith(IndexFileNames.SEGMENTS) || segmentsFile.equals(IndexFileNames.SEGMENTS_GEN)) {
      throw new IllegalStateException("last file to copy+sync must be segments_N but got " + segmentsFile
          + "; check your Revision implementation!");
    }
    return segmentsFile;
  }

  /**
   * Cleanup the index directory by deleting all given files. Called when file
   * copy or sync failed.
   */
  public static void cleanupFilesOnFailure(Directory dir, List<String> files) {
    for (String file : files) {
      try {
        if (dir.fileExists(file)) {
          dir.deleteFile(file);
        }
      } catch (Throwable t) {
        // suppress any exception because if we're here, it means copy
        // failed, and we must cleanup after ourselves.
      }
    }
  }
 
  /**
   * Cleans up the index directory from old index files. This method uses the
   * last commit found by {@link #getLastCommit(Directory)}. If it matches the
   * expected segmentsFile, then all files not referenced by this commit point
   * are deleted.
   * <p>
   * <b>NOTE:</b> this method does a best effort attempt to clean the index
   * directory. It suppresses any exceptions that occur, as this can be retried
   * the next time.
   */
  public static void cleanupOldIndexFiles(Directory dir, String segmentsFile) {
    try {
      IndexCommit commit = getLastCommit(dir);
      // commit == null means weird IO errors occurred, ignore them
      // if there were any IO errors reading the expected commit point (i.e.
      // segments files mismatch), then ignore that commit either.
      if (commit != null && commit.getSegmentsFileName().equals(segmentsFile)) {
        Set<String> commitFiles = new HashSet<String>();
        commitFiles.addAll(commit.getFileNames());
        commitFiles.add(IndexFileNames.SEGMENTS_GEN);
        Matcher matcher = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
        for (String file : dir.listAll()) {
          if (!commitFiles.contains(file)
              && (matcher.reset(file).matches() || file.startsWith(IndexFileNames.SEGMENTS))) {
            try {
              dir.deleteFile(file);
            } catch (Throwable t) {
              // suppress, it's just a best effort
            }
          }
        }
      }
    } catch (Throwable t) {
      // ignore any errors that happens during this state and only log it. this
      // cleanup will have a chance to succeed the next time we get a new
      // revision.
    }
  }
 
  /**
   * Copies the files from the source directory to the target one, if they are
   * not the same.
   */
  public static void copyFiles(Directory source, Directory target, List<String> files) throws IOException {
    if (!source.equals(target)) {
      for (String file : files) {
        source.copy(target, file, file, IOContext.READONCE);
      }
    }
  }

  /**
   * Writes {@link IndexFileNames#SEGMENTS_GEN} file to the directory, reading
   * the generation from the given {@code segmentsFile}. If it is {@code null},
   * this method deletes segments.gen from the directory.
   */
  public static void writeSegmentsGen(String segmentsFile, Directory dir) {
    if (segmentsFile != null) {
      SegmentInfos.writeSegmentsGen(dir, SegmentInfos.generationFromSegmentsFileName(segmentsFile));
    } else {
      try {
        if (dir.fileExists(IndexFileNames.SEGMENTS_GEN)) {
          dir.deleteFile(IndexFileNames.SEGMENTS_GEN);
        }
      } catch (Throwable t) {
        // suppress any errors while deleting this file.
      }
    }
  }

  /**
   * Constructor with the given index directory and callback to notify when the
   * indexes were updated.
   */
  public IndexReplicationHandler(Directory indexDir, Callable<Boolean> callback) throws IOException {
    this.callback = callback;
    this.indexDir = indexDir;
    currentRevisionFiles = null;
    currentVersion = null;
    if (DirectoryReader.indexExists(indexDir)) {
      final List<IndexCommit> commits = DirectoryReader.listCommits(indexDir);
      final IndexCommit commit = commits.get(commits.size() - 1);
      currentRevisionFiles = IndexRevision.revisionFiles(commit);
      currentVersion = IndexRevision.revisionVersion(commit);
      final InfoStream infoStream = InfoStream.getDefault();
      if (infoStream.isEnabled(INFO_STREAM_COMPONENT)) {
        infoStream.message(INFO_STREAM_COMPONENT, "constructor(): currentVersion=" + currentVersion
            + " currentRevisionFiles=" + currentRevisionFiles);
        infoStream.message(INFO_STREAM_COMPONENT, "constructor(): commit=" + commit);
      }
    }
  }
 
  @Override
  public String currentVersion() {
    return currentVersion;
  }
 
  @Override
  public Map<String,List<RevisionFile>> currentRevisionFiles() {
    return currentRevisionFiles;
  }
 
  @Override
  public void revisionReady(String version, Map<String,List<RevisionFile>> revisionFiles,
      Map<String,List<String>> copiedFiles, Map<String,Directory> sourceDirectory) throws IOException {
    if (revisionFiles.size() > 1) {
      throw new IllegalArgumentException("this handler handles only a single source; got " + revisionFiles.keySet());
    }
   
    Directory clientDir = sourceDirectory.values().iterator().next();
    List<String> files = copiedFiles.values().iterator().next();
    String segmentsFile = getSegmentsFile(files, false);
   
    boolean success = false;
    try {
      // copy files from the client to index directory
      copyFiles(clientDir, indexDir, files);
     
      // fsync all copied files (except segmentsFile)
      indexDir.sync(files);
     
      // now copy and fsync segmentsFile
      clientDir.copy(indexDir, segmentsFile, segmentsFile, IOContext.READONCE);
      indexDir.sync(Collections.singletonList(segmentsFile));
     
      success = true;
    } finally {
      if (!success) {
        files.add(segmentsFile); // add it back so it gets deleted too
        cleanupFilesOnFailure(indexDir, files);
      }
    }

    // all files have been successfully copied + sync'd. update the handler's state
    currentRevisionFiles = revisionFiles;
    currentVersion = version;
   
    if (infoStream.isEnabled(INFO_STREAM_COMPONENT)) {
      infoStream.message(INFO_STREAM_COMPONENT, "revisionReady(): currentVersion=" + currentVersion
          + " currentRevisionFiles=" + currentRevisionFiles);
    }

    // update the segments.gen file
    writeSegmentsGen(segmentsFile, indexDir);
   
    // Cleanup the index directory from old and unused index files.
    // NOTE: we don't use IndexWriter.deleteUnusedFiles here since it may have
    // side-effects, e.g. if it hits sudden IO errors while opening the index
    // (and can end up deleting the entire index). It is not our job to protect
    // against those errors, app will probably hit them elsewhere.
    cleanupOldIndexFiles(indexDir, segmentsFile);

    // successfully updated the index, notify the callback that the index is
    // ready.
    if (callback != null) {
      try {
        callback.call();
      } catch (Exception e) {
        throw new IOException(e);
      }
    }
  }

  /** Sets the {@link InfoStream} to use for logging messages. */
  public void setInfoStream(InfoStream infoStream) {
    if (infoStream == null) {
      infoStream = InfoStream.NO_OUTPUT;
    }
    this.infoStream = infoStream;
  }
 
}
TOP

Related Classes of org.apache.lucene.replicator.IndexReplicationHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.