Package org.apache.hadoop.hdfs.server.namenode

Source Code of org.apache.hadoop.hdfs.server.namenode.Checkpointer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;

import java.io.IOException;
import java.io.File;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.Collection;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.NamenodeRole;
import org.apache.hadoop.hdfs.server.namenode.FSImage.CheckpointStates;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
import org.apache.hadoop.hdfs.server.protocol.CheckpointCommand;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.http.HttpServer;
import org.apache.hadoop.util.Daemon;

/**
* The Checkpointer is responsible for supporting periodic checkpoints
* of the HDFS metadata.
*
* The Checkpointer is a daemon that periodically wakes up
* up (determined by the schedule specified in the configuration),
* triggers a periodic checkpoint and then goes back to sleep.
*
* The start of a checkpoint is triggered by one of the two factors:
* (1) time or (2) the size of the edits file.
*/
class Checkpointer extends Daemon {
  public static final Log LOG =
    LogFactory.getLog(Checkpointer.class.getName());

  private BackupNode backupNode;
  volatile boolean shouldRun;
  private long checkpointPeriod;  // in seconds
  private long checkpointSize;    // size (in MB) of current Edit Log

  private BackupStorage getFSImage() {
    return (BackupStorage)backupNode.getFSImage();
  }

  private NamenodeProtocol getNamenode(){
    return backupNode.namenode;
  }

  /**
   * Create a connection to the primary namenode.
   */
  Checkpointer(Configuration conf, BackupNode bnNodethrows IOException {
    this.backupNode = bnNode;
    try {
      initialize(conf);
    } catch(IOException e) {
      shutdown();
      throw e;
    }
  }

  /**
   * Initialize checkpoint.
   */
  private void initialize(Configuration conf) throws IOException {
    // Create connection to the namenode.
    shouldRun = true;

    // Initialize other scheduling parameters from the configuration
    checkpointPeriod = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY,
                                    DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
    checkpointSize = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_KEY,
                                  DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_SIZE_DEFAULT);

    HttpServer httpServer = backupNode.httpServer;
    httpServer.setAttribute("name.system.image", getFSImage());
    httpServer.setAttribute("name.conf", conf);
    httpServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class);

    LOG.info("Checkpoint Period : " + checkpointPeriod + " secs " +
             "(" + checkpointPeriod/60 + " min)");
    LOG.info("Log Size Trigger  : " + checkpointSize + " bytes " +
             "(" + checkpointSize/1024 + " KB)");
  }

  /**
   * Shut down the checkpointer.
   */
  void shutdown() {
    shouldRun = false;
    backupNode.stop();
  }

  //
  // The main work loop
  //
  public void run() {
    // Check the size of the edit log once every 5 minutes.
    long periodMSec = 5 * 60;   // 5 minutes
    if(checkpointPeriod < periodMSec) {
      periodMSec = checkpointPeriod;
    }
    periodMSec *= 1000;

    long lastCheckpointTime = 0;
    if(!backupNode.shouldCheckpointAtStartup())
      lastCheckpointTime = FSNamesystem.now();
    while(shouldRun) {
      try {
        long now = FSNamesystem.now();
        boolean shouldCheckpoint = false;
        if(now >= lastCheckpointTime + periodMSec) {
          shouldCheckpoint = true;
        } else {
          long size = getJournalSize();
          if(size >= checkpointSize)
            shouldCheckpoint = true;
        }
        if(shouldCheckpoint) {
          doCheckpoint();
          lastCheckpointTime = now;
        }
      } catch(IOException e) {
        LOG.error("Exception in doCheckpoint: ", e);
      } catch(Throwable e) {
        LOG.error("Throwable Exception in doCheckpoint: ", e);
        shutdown();
        break;
      }
      try {
        Thread.sleep(periodMSec);
      } catch(InterruptedException ie) {
        // do nothing
      }
    }
  }

  private long getJournalSize() throws IOException {
    // If BACKUP node has been loaded
    // get edits size from the local file. ACTIVE has the same.
    if(backupNode.isRole(NamenodeRole.BACKUP)
        && getFSImage().getEditLog().isOpen())
      return backupNode.journalSize();
    // Go to the ACTIVE node for its size
    return getNamenode().journalSize(backupNode.getRegistration());
  }

  /**
   * Download <code>fsimage</code> and <code>edits</code>
   * files from the remote name-node.
   */
  private void downloadCheckpoint(CheckpointSignature sig) throws IOException {
    // Retrieve image file
    String fileid = "getimage=1";
    Collection<File> list = getFSImage().getFiles(NameNodeFile.IMAGE,
        NameNodeDirType.IMAGE);
    File[] files = list.toArray(new File[list.size()]);
    assert files.length > 0 : "No checkpoint targets.";
    String nnHttpAddr = backupNode.nnHttpAddress;
    TransferFsImage.getFileClient(nnHttpAddr, fileid, files);
    LOG.info("Downloaded file " + files[0].getName() + " size " +
             files[0].length() + " bytes.");

    // Retrieve edits file
    fileid = "getedit=1";
    list = getFSImage().getFiles(NameNodeFile.EDITS, NameNodeDirType.EDITS);
    files = list.toArray(new File[list.size()]);
    assert files.length > 0 : "No checkpoint targets.";
    TransferFsImage.getFileClient(nnHttpAddr, fileid, files);
    LOG.info("Downloaded file " + files[0].getName() + " size " +
        files[0].length() + " bytes.");
  }

  /**
   * Copy the new image into remote name-node.
   */
  private void uploadCheckpoint(CheckpointSignature sig) throws IOException {
    InetSocketAddress httpSocAddr = backupNode.getHttpAddress();
    int httpPort = httpSocAddr.getPort();
    String fileid = "putimage=1&port=" + httpPort +
      "&machine=" +
      InetAddress.getLocalHost().getHostAddress() +
      "&token=" + sig.toString();
    LOG.info("Posted URL " + backupNode.nnHttpAddress + fileid);
    TransferFsImage.getFileClient(backupNode.nnHttpAddress, fileid, (File[])null);
  }

  /**
   * Create a new checkpoint
   */
  void doCheckpoint() throws IOException {
    long startTime = FSNamesystem.now();
    NamenodeCommand cmd =
      getNamenode().startCheckpoint(backupNode.getRegistration());
    CheckpointCommand cpCmd = null;
    switch(cmd.getAction()) {
      case NamenodeProtocol.ACT_SHUTDOWN:
        shutdown();
        throw new IOException("Name-node " + backupNode.nnRpcAddress
                                           + " requested shutdown.");
      case NamenodeProtocol.ACT_CHECKPOINT:
        cpCmd = (CheckpointCommand)cmd;
        break;
      default:
        throw new IOException("Unsupported NamenodeCommand: "+cmd.getAction());
    }

    CheckpointSignature sig = cpCmd.getSignature();
    assert FSConstants.LAYOUT_VERSION == sig.getLayoutVersion() :
      "Signature should have current layout version. Expected: "
      + FSConstants.LAYOUT_VERSION + " actual "+ sig.getLayoutVersion();
    assert !backupNode.isRole(NamenodeRole.CHECKPOINT) ||
      cpCmd.isImageObsolete() : "checkpoint node should always download image.";
    backupNode.setCheckpointState(CheckpointStates.UPLOAD_START);
    if(cpCmd.isImageObsolete()) {
      // First reset storage on disk and memory state
      backupNode.resetNamespace();
      downloadCheckpoint(sig);
    }

    BackupStorage bnImage = getFSImage();
    bnImage.loadCheckpoint(sig);
    sig.validateStorageInfo(bnImage);
    bnImage.saveCheckpoint();

    if(cpCmd.needToReturnImage())
      uploadCheckpoint(sig);

    getNamenode().endCheckpoint(backupNode.getRegistration(), sig);

    bnImage.convergeJournalSpool();
    backupNode.setRegistration(); // keep registration up to date
    if(backupNode.isRole(NamenodeRole.CHECKPOINT))
        getFSImage().getEditLog().close();
    LOG.info("Checkpoint completed in "
        + (FSNamesystem.now() - startTime)/1000 + " seconds."
        " New Image Size: " + bnImage.getFsImageName().length());
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.namenode.Checkpointer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.