/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.File;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.fs.HardLink;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.NodeType;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.datanode.DataStorage.RollbackThread;
import org.apache.hadoop.hdfs.server.datanode.DataStorage.UpgradeThread;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.util.Daemon;
/**
* Manages storage for the set of BlockPoolSlices which share a particular
* namespace id, on this DataNode.
*
* This class supports the following functionality:
* <ol>
* <li> Formatting a new namespace storage</li>
* <li> Recovering a storage state to a consistent state (if possible></li>
* <li> Taking a snapshot of the namespace during upgrade</li>
* <li> Rolling back a namespace to a previous snapshot</li>
* <li> Finalizing block storage by deletion of a snapshot</li>
* </ul>
*
* @see Storage
*/
public class NameSpaceSliceStorage extends Storage {
final static String NS_DIR_PREFIX = "NS-";
private Map<File, Integer> layoutMap = new HashMap<File, Integer>();
public NameSpaceSliceStorage() {
super(NodeType.DATA_NODE);
}
public NameSpaceSliceStorage(int namespaceID, long cTime) {
super(NodeType.DATA_NODE, namespaceID, cTime);
}
public NameSpaceSliceStorage(int namespaceID, long cTime,
Map<File, Integer> layoutMap) {
super(NodeType.DATA_NODE, namespaceID, cTime);
this.layoutMap = layoutMap;
}
/**
* Analyze storage directories. Recover from previous transitions if required.
*
* @param datanode Datanode to which this storage belongs to
* @param nsInfo namespace information
* @param dataDirs storage directories of namespace
* @param startOpt startup option
* @throws IOException on error
*/
void recoverTransitionRead(DataNode datanode, NamespaceInfo nsInfo,
Collection<File> dataDirs, StartupOption startOpt) throws IOException {
assert FSConstants.LAYOUT_VERSION == nsInfo.getLayoutVersion()
: "Block-pool and name-node layout versions must be the same.";
// 1. For each Namespace data directory analyze the state and
// check whether all is consistent before transitioning.
this.storageDirs = new ArrayList<StorageDirectory>(dataDirs.size());
ArrayList<StorageState> dataDirStates = new ArrayList<StorageState>(
dataDirs.size());
for (Iterator<File> it = dataDirs.iterator(); it.hasNext();) {
File dataDir = it.next();
StorageDirectory sd = new StorageDirectory(dataDir, null, false);
StorageState curState;
try {
curState = sd.analyzeStorage(startOpt);
// sd is locked but not opened
switch (curState) {
case NORMAL:
break;
case NON_EXISTENT:
// ignore this storage
LOG.info("Storage directory " + dataDir + " does not exist.");
it.remove();
continue;
case NOT_FORMATTED: // format
LOG.info("Storage directory " + dataDir + " is not formatted.");
if (!sd.isEmpty()) {
LOG.error("Storage directory " + dataDir
+ " is not empty, and will not be formatted! Exiting.");
throw new IOException(
"Storage directory " + dataDir + " is not empty!");
}
LOG.info("Formatting ...");
format(sd, nsInfo);
break;
default: // recovery part is common
sd.doRecover(curState);
}
} catch (IOException ioe) {
sd.unlock();
throw ioe;
}
// add to the storage list. This is inherited from parent class, Storage.
addStorageDir(sd);
dataDirStates.add(curState);
}
if (dataDirs.size() == 0) // none of the data dirs exist
throw new IOException(
"All specified directories are not accessible or do not exist.");
// 2. Do transitions
// Each storage directory is treated individually.
// During startup some of them can upgrade or roll back
// while others could be up-to-date for the regular startup.
doTransition(datanode, nsInfo, startOpt);
// 3. Update all storages. Some of them might have just been formatted.
this.writeAll();
}
/**
* Format a namespace slice storage.
* @param dnCurDir DataStorage current directory
* @param nsInfo the name space info
* @throws IOException Signals that an I/O exception has occurred.
*/
void format(File dnCurDir, NamespaceInfo nsInfo) throws IOException {
File curNsDir = getNsRoot(namespaceID, dnCurDir);
StorageDirectory nsSdir = new StorageDirectory(curNsDir);
format(nsSdir, nsInfo);
}
/**
* Format a namespace slice storage.
* @param sd the namespace storage
* @param nsInfo the name space info
* @throws IOException Signals that an I/O exception has occurred.
*/
private void format(StorageDirectory nsSdir, NamespaceInfo nsInfo) throws IOException {
LOG.info("Formatting namespace " + namespaceID + " directory "
+ nsSdir.getCurrentDir());
nsSdir.clearDirectory(); // create directory
File rbwDir = new File(nsSdir.getCurrentDir(), STORAGE_DIR_RBW);
File finalizedDir = new File(nsSdir.getCurrentDir(), STORAGE_DIR_FINALIZED);
LOG.info("Creating Directories : " + rbwDir + ", " + finalizedDir);
if (!rbwDir.mkdirs() || !finalizedDir.mkdirs()) {
throw new IOException("Cannot create directories : " + rbwDir + ", "
+ finalizedDir);
}
this.layoutVersion = FSConstants.LAYOUT_VERSION;
this.cTime = nsInfo.getCTime();
this.namespaceID = nsInfo.getNamespaceID();
this.storageType = NodeType.DATA_NODE;
nsSdir.write();
}
/**
* Set layoutVersion, namespaceID and blockpoolID into namespace storage
* VERSION file
*/
@Override
protected void setFields(Properties props, StorageDirectory sd)
throws IOException {
props.setProperty(NAMESPACE_ID, String.valueOf(namespaceID));
props.setProperty(CHECK_TIME, String.valueOf(cTime));
props.setProperty(LAYOUT_VERSION, String.valueOf(layoutVersion));
}
/** Validate and set namespace ID */
private void setNameSpaceID(File storage, String nsid)
throws InconsistentFSStateException {
if (nsid == null || nsid.equals("")) {
throw new InconsistentFSStateException(storage, "file "
+ STORAGE_FILE_VERSION + " is invalid.");
}
int newNsId = Integer.parseInt(nsid);
if (namespaceID > 0 && namespaceID != newNsId) {
throw new InconsistentFSStateException(storage,
"Unexepcted namespaceID " + nsid + " . Expected " + namespaceID);
}
namespaceID = newNsId;
}
@Override
protected void getFields(Properties props, StorageDirectory sd)
throws IOException {
setNamespaceID(props, sd);
setcTime(props, sd);
String snsid = props.getProperty(NAMESPACE_ID);
setNameSpaceID(sd.getRoot(), snsid);
String property = props.getProperty(LAYOUT_VERSION);
int lv;
if (property == null) {
Integer topLayout = getTopLevelLayout(sd);
if (topLayout == null) {
throw new InconsistentFSStateException(sd.getRoot(),
"Top level layout and NS level layout do not exist");
}
lv = topLayout;
} else {
lv = Integer.parseInt(property);
}
if (lv < FSConstants.LAYOUT_VERSION) { // future version
throw new InconsistentFSStateException(sd.getRoot(),
"has future layout version : " + lv);
}
layoutVersion = lv;
}
private Integer getTopLevelLayout(StorageDirectory sd)
throws IOException {
File topDir = sd.getRoot().getParentFile().getParentFile();
Integer layoutVersion = layoutMap.get(topDir);
if (layoutVersion != null && topDir.exists() && topDir.isDirectory()) {
return layoutVersion;
}
return null;
}
private boolean isTopLevelUpgraded(StorageDirectory sd) {
File topDir = sd.getRoot().getParentFile().getParentFile();
return new File(topDir, STORAGE_DIR_PREVIOUS).exists();
}
/**
* Analyze whether a transition of the NS state is required and perform it if
* necessary. <br>
* Rollback if (previousLV >= LAYOUT_VERSION &&
* LAYOUT_VERSION<=FEDERATION_VERSION) || prevCTime <= namenode.cTime.
* Upgrade if this.LV > LAYOUT_VERSION || this.cTime < namenode.cTime
* Regular startup if this.LV = LAYOUT_VERSION && this.cTime = namenode.cTime
*
* @param dn
* DataNode to which this storage belongs to
* @param nsInfo
* namespace info
* @param startOpt
* startup option
* @throws IOException
*/
private void doTransition(DataNode datanode,
NamespaceInfo nsInfo, StartupOption startOpt) throws IOException {
if (startOpt == StartupOption.ROLLBACK)
doRollback(nsInfo); // rollback if applicable
int numOfDirs = getNumStorageDirs();
List<StorageDirectory> dirsToUpgrade = new ArrayList<StorageDirectory>(numOfDirs);
List<StorageInfo> dirsInfo = new ArrayList<StorageInfo>(numOfDirs);
for(int idx = 0; idx < numOfDirs; idx++) {
StorageDirectory sd = this.getStorageDir(idx);
sd.read();
checkVersionUpgradable(this.layoutVersion);
assert this.layoutVersion >= FSConstants.LAYOUT_VERSION
: "Future version is not allowed";
if (getNamespaceID() != nsInfo.getNamespaceID()) {
throw new IOException("Incompatible namespaceIDs in "
+ sd.getRoot().getCanonicalPath() + ": namenode namespaceID = "
+ nsInfo.getNamespaceID() + "; datanode namespaceID = "
+ getNamespaceID());
}
if (this.layoutVersion == FSConstants.LAYOUT_VERSION
&& this.cTime == nsInfo.getCTime())
continue; // regular startup
// verify necessity of a distributed upgrade
UpgradeManagerDatanode um =
datanode.getUpgradeManager(nsInfo.namespaceID);
verifyDistributedUpgradeProgress(um, nsInfo);
// upgrade if layout version has not changed and NN has a newer checkpoint
// if layout version gets updated, a global snapshot has already taken
// so no need to do a per namespace snapshot
if (this.layoutVersion > nsInfo.layoutVersion ||
this.cTime < nsInfo.getCTime()) {
if (isTopLevelUpgraded(sd)) {
throw new IOException("Top level directory already upgraded for : " +
sd.getRoot());
}
dirsToUpgrade.add(sd); // upgrade
dirsInfo.add(new StorageInfo(this));
continue;
}
// layoutVersion == LAYOUT_VERSION && this.cTime > nsInfo.cTime
// must shutdown
if (this.layoutVersion == nsInfo.layoutVersion &&
this.cTime > nsInfo.cTime) {
throw new IOException("Datanode state: LV = " + this.getLayoutVersion()
+ " CTime = " + this.getCTime()
+ " is newer than the namespace state: LV = "
+ nsInfo.getLayoutVersion() + " CTime = " + nsInfo.getCTime());
}
}
// Now do upgrade if dirsToUpgrade is not empty
if (!dirsToUpgrade.isEmpty()) {
doUpgrade(dirsToUpgrade, dirsInfo, nsInfo);
}
}
/**
* Move current storage into a backup directory,
* and hardlink all its blocks into the new current directory.
*/
private void doUpgrade(List<StorageDirectory> sds,
List<StorageInfo> sdsInfo,
final NamespaceInfo nsInfo
) throws IOException {
assert sds.size() == sdsInfo.size();
UpgradeThread[] upgradeThreads = new UpgradeThread[sds.size()];
// start to upgrade
for (int i=0; i<upgradeThreads.length; i++) {
final StorageDirectory sd = sds.get(i);
final StorageInfo si = sdsInfo.get(i);
UpgradeThread thread = new UpgradeThread(sd, si, nsInfo);
thread.start();
upgradeThreads[i] = thread;
}
// wait for upgrade to be done
for (UpgradeThread thread : upgradeThreads) {
try {
thread.join();
} catch (InterruptedException e) {
throw (InterruptedIOException)new InterruptedIOException().initCause(e);
}
}
// check for errors
for (UpgradeThread thread : upgradeThreads) {
if (thread.error != null)
throw new IOException(thread.error);
}
// write version file
this.layoutVersion = FSConstants.LAYOUT_VERSION;
assert this.namespaceID == nsInfo.getNamespaceID() :
"Data-node and name-node layout versions must be the same.";
this.cTime = nsInfo.getCTime();
for (StorageDirectory sd :sds) {
sd.write();
File prevDir = sd.getPreviousDir();
File tmpDir = sd.getPreviousTmp();
// rename tmp to previous
rename(tmpDir, prevDir);
LOG.info("Upgrade of " + sd.getRoot()+ " is complete.");
}
}
private boolean isGlobalUpgraded(StorageDirectory nsSd) {
return new File(nsSd.getRoot().getParentFile().getParentFile(), STORAGE_DIR_PREVIOUS).exists();
}
/**
* Cleanup the detachDir.
*
* If the directory is not empty report an error; Otherwise remove the
* directory.
*
* @param detachDir detach directory
* @throws IOException if the directory is not empty or it can not be removed
private void cleanupDetachDir(File detachDir) throws IOException {
if (!LayoutVersion.supports(Feature.APPEND_RBW_DIR, layoutVersion)
&& detachDir.exists() && detachDir.isDirectory()) {
if (detachDir.list().length != 0) {
throw new IOException("Detached directory " + detachDir
+ " is not empty. Please manually move each file under this "
+ "directory to the finalized directory if the finalized "
+ "directory tree does not have the file.");
} else if (!detachDir.delete()) {
throw new IOException("Cannot remove directory " + detachDir);
}
}
}
*/
/*
* Roll back to old snapshot at the namespace level
* If previous directory exists:
* <ol>
* <li>Rename <SD>/current/<nsid>/current to removed.tmp</li>
* <li>Rename * <SD>/current/<nsid>/previous to current</li>
* <li>Remove removed.tmp</li>
* </ol>
*
* Do nothing if previous directory does not exist.
* @param nsSd Block pool storage directory at <SD>/current/<nsid>
*/
void doRollback(NamespaceInfo nsInfo)
throws IOException {
int numDirs = getNumStorageDirs();
RollbackThread[] rollbackThreads = new RollbackThread[numDirs];
// start to rollback
for (int i=0; i<numDirs; i++) {
final StorageDirectory sd = this.getStorageDir(i);
RollbackThread thread = new RollbackThread(sd, nsInfo, new NameSpaceSliceStorage());
thread.start();
rollbackThreads[i] = thread;
}
// wait for rollback to be done
for (RollbackThread thread : rollbackThreads) {
try {
thread.join();
} catch (InterruptedException e) {
return;
}
}
// check for errors
for (RollbackThread thread : rollbackThreads) {
if (thread.error != null)
throw new IOException(thread.error);
}
}
/*
* Finalize the namespace storage by deleting <NS>/previous directory
* that holds the snapshot.
*/
void doFinalize(File dnCurDir) throws IOException {
File nsRoot = getNsRoot(namespaceID, dnCurDir);
StorageDirectory nsSd = new StorageDirectory(nsRoot);
// namespace level previous directory
File prevDir = nsSd.getPreviousDir();
if (!prevDir.exists()) {
return; // already finalized
}
final String dataDirPath = nsSd.getRoot().getCanonicalPath();
LOG.info("Finalizing upgrade for storage directory " + dataDirPath
+ ".\n cur LV = " + this.getLayoutVersion() + "; cur CTime = "
+ this.getCTime());
assert nsSd.getCurrentDir().exists() : "Current directory must exist.";
// rename previous to finalized.tmp
final File tmpDir = nsSd.getFinalizedTmp();
rename(prevDir, tmpDir);
// delete finalized.tmp dir in a separate thread
new Daemon(new Runnable() {
public void run() {
try {
deleteDir(tmpDir);
} catch (IOException ex) {
LOG.error("Finalize upgrade for " + dataDirPath + " failed.", ex);
}
LOG.info("Finalize upgrade for " + dataDirPath + " is complete.");
}
public String toString() {
return "Finalize " + dataDirPath;
}
}).start();
}
private void verifyDistributedUpgradeProgress(UpgradeManagerDatanode um,
NamespaceInfo nsInfo) throws IOException {
assert um != null : "DataNode.upgradeManager is null.";
um.setUpgradeState(false, getLayoutVersion());
um.initializeUpgrade(nsInfo);
}
@Override
public String toString() {
return super.toString() + ";nsid=" + namespaceID;
}
/**
* Get a namespace storage root based on data node storage root
* @param nsID namespace ID
* @param dnCurDir data node storage root directory
* @return root directory for namespace storage
*/
public static File getNsRoot(int namespaceId, File dnCurDir) {
return new File(dnCurDir, getNamespaceDataDirName(namespaceId));
}
public File getNsRoot(File dnCurDir) {
return new File(dnCurDir, getNamespaceDataDirName(namespaceID));
}
public static String getNamespaceDataDirName(int namespaceId) {
return NS_DIR_PREFIX+String.valueOf(namespaceId);
}
public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
return false;
}
@Override
protected void corruptPreUpgradeStorage(File rootDir) throws IOException {
// TODO Auto-generated method stub
}
@Override
public boolean isConversionNeeded(StorageDirectory sd) throws IOException {
// TODO Auto-generated method stub
return false;
}
}