/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
import org.apache.hadoop.hdfs.server.namenode.INodeRaidStorage.RaidBlockInfo;
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
import com.google.common.base.Joiner;
import org.apache.hadoop.util.ReflectionUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
/**
* A class with the information of a raid codec.
* A raid codec has the information of
* 1. Which erasure code class to use
* 2. numDataBlocks and numParityBlocks
* 3. stripe chunk size
* 4. parity replication
*/
public class RaidCodec implements Serializable {
private static final long serialVersionUID = 1L;
public static final String HDFS_RAID_CODEC_JSON = "hdfs.raid.codec.json";
public static final Log LOG = LogFactory.getLog(RaidCodec.class);
// Used by offline raiding, it means one stripe chunk is equal to the block size
public static final long FULL_BLOCK = -1L;
public static final Joiner joiner = Joiner.on(",");
/**
* Used by ErasureCode.init() to get Code specific extra parameters.
*/
public final String jsonStr;
/**
* id of the codec
*/
public final String id;
/**
* Number of data blocks in one stripe
*/
public final int numDataBlocks;
/**
* Number of parity blocks in one stripe
*/
public final int numParityBlocks;
/**
* Number of blocks in one stripe = numDataBlocks + numParityBlocks;
*/
public final int numStripeBlocks;
/**
* size of data store in one datanode for one stripe
* stripeChunkSize = one stripe size / numDataBlocks;
*/
public final long stripeChunkSize;
/**
* Define the number of replicas for parity blocks
*/
public final short parityReplication;
/**
* Define the minimum number of replicas for source blocks
*/
public final short minSourceReplication;
/**
* The full class name of the ErasureCode used
*/
public final String erasureCodeClass;
/**
* Human readable description of the codec
*/
public final String description;
private static List<RaidCodec> codecs;
private static Map<String, RaidCodec> idToCodec;
public String getCodecJson() {
return joiner.join(
" { " +
"\"id\":\"" + id + "\"",
"\"num_data_blocks\":" + numDataBlocks,
"\"num_parity_blocks\":" + numParityBlocks,
"\"stripe_chunk_size\":" + stripeChunkSize,
"\"parity_repl\":" + parityReplication,
"\"min_source_repl\":" + minSourceReplication,
"\"erasure_code\":\"" + erasureCodeClass + "\"",
"\"description\":\"" + description + "\"",
" }, ");
}
/**
* Get single instantce of the list of codecs ordered by priority.
*/
public static List<RaidCodec> getCodecs() {
return RaidCodec.codecs;
}
/**
* Get the instance of the codec by id
*/
public static RaidCodec getCodec(String id) {
return idToCodec.get(id);
}
static {
try {
Configuration.addDefaultResource("hdfs-default.xml");
Configuration.addDefaultResource("hdfs-site.xml");
initializeCodecs(new Configuration());
} catch (Exception e) {
LOG.fatal("Fail initialize Raid codecs", e);
System.exit(-1);
}
}
public static void initializeCodecs(Configuration conf) throws IOException {
try {
String source = conf.get(HDFS_RAID_CODEC_JSON);
if (source == null) {
codecs = Collections.emptyList();
idToCodec = Collections.emptyMap();
if (LOG.isDebugEnabled()) {
LOG.debug("None Codec is specified");
}
return;
}
JSONArray jsonArray = new JSONArray(source);
List<RaidCodec> localCodecs = new ArrayList<RaidCodec>();
Map<String, RaidCodec> localIdToCodec = new HashMap<String, RaidCodec>();
for (int i = 0; i < jsonArray.length(); ++i) {
RaidCodec codec = new RaidCodec(jsonArray.getJSONObject(i));
localIdToCodec.put(codec.id, codec);
localCodecs.add(codec);
}
codecs = Collections.unmodifiableList(localCodecs);
idToCodec = Collections.unmodifiableMap(localIdToCodec);
} catch (JSONException e) {
throw new IOException(e);
}
}
private RaidCodec(JSONObject json) throws JSONException {
this.jsonStr = json.toString();
this.id = json.getString("id");
this.numParityBlocks = json.getInt("num_parity_blocks");
this.numDataBlocks = json.getInt("num_data_blocks");
this.numStripeBlocks = this.numParityBlocks + this.numDataBlocks;
this.stripeChunkSize = json.getLong("stripe_chunk_size");
this.parityReplication = (short)json.getInt("parity_repl");
this.minSourceReplication = (short)json.getInt("min_source_repl");
this.erasureCodeClass = json.getString("erasure_code");
this.description = getJSONString(json, "description", "");
}
static private String getJSONString(
JSONObject json, String key, String defaultResult) {
String result = defaultResult;
try {
result = json.getString(key);
} catch (JSONException e) {
}
return result;
}
public ErasureCode createErasureCode(Configuration conf) {
// Create the scheduler
Class<?> erasureCode = null;
try {
erasureCode = conf.getClassByName(this.erasureCodeClass);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
ErasureCode code = (ErasureCode) ReflectionUtils.newInstance(erasureCode,
conf);
code.init(this.numDataBlocks, this.numParityBlocks);
return code;
}
@Override
public String toString() {
if (jsonStr == null) {
return "Test codec " + id;
} else {
return jsonStr;
}
}
public int getNumStripes(int numBlocks) {
if (numBlocks <= 0) {
return 0;
}
return (numBlocks - 1) / this.numDataBlocks + 1;
}
public int getNumParityBlocks(int numBlocks) {
return getNumStripes(numBlocks) * this.numParityBlocks;
}
/**
* Used by unit test only
*/
static void addCodec(RaidCodec codec) {
List<RaidCodec> newCodecs = new ArrayList<RaidCodec>();
newCodecs.addAll(codecs);
newCodecs.add(codec);
codecs = Collections.unmodifiableList(newCodecs);
Map<String, RaidCodec> newIdToCodec = new HashMap<String, RaidCodec>();
newIdToCodec.putAll(idToCodec);
newIdToCodec.put(codec.id, codec);
idToCodec = Collections.unmodifiableMap(newIdToCodec);
}
/**
* Used by unit test only
*/
static void clearCodecs() {
codecs = Collections.emptyList();
idToCodec = Collections.emptyMap();
}
/**
* Used by unit test only
*/
RaidCodec(String id,
int numParityBlocks,
int numDataBlocks,
long stripeChunkSize,
short parityReplication,
short minSourceReplication,
String erasureCodeClass,
String description) {
this.jsonStr = null;
this.id = id;
this.numParityBlocks = numParityBlocks;
this.numDataBlocks = numDataBlocks;
this.numStripeBlocks = this.numDataBlocks + this.numParityBlocks;
this.stripeChunkSize = stripeChunkSize;
this.parityReplication = parityReplication;
this.minSourceReplication = minSourceReplication;
this.erasureCodeClass = erasureCodeClass;
this.description = description;
}
// Return only the source blocks of the raided file
public BlockInfo[] getSourceBlocks(BlockInfo[] blocks) {
int numSourceBlocks = blocks.length -
(blocks.length / numStripeBlocks) * numParityBlocks -
((blocks.length % numStripeBlocks == 0) ? 0 : numParityBlocks);
BlockInfo[] sourceBlocks = new BlockInfo[numSourceBlocks];
int pos = numParityBlocks;
int stripeEnd = numStripeBlocks;
for (int i = 0; i < numSourceBlocks; i++) {
sourceBlocks[i] = blocks[pos];
pos++;
if (pos == stripeEnd) {
pos += numParityBlocks;
stripeEnd += numStripeBlocks;
}
}
return sourceBlocks;
}
// Used only by testing
// Return only the parity blocks of the raided file
public BlockInfo[] getParityBlocks(BlockInfo[] blocks) {
int numBlocks = (blocks.length / numStripeBlocks) * numParityBlocks
+ ((blocks.length % numStripeBlocks == 0) ? 0 : numParityBlocks);
BlockInfo[] parityBlocks = new BlockInfo[numBlocks];
int pos = 0;
int parityEnd = numParityBlocks;
for (int i = 0; i < numBlocks; i++) {
parityBlocks[i] = blocks[pos];
pos++;
if (pos == parityEnd) {
pos += numDataBlocks;
parityEnd += numStripeBlocks;
}
}
return parityBlocks;
}
public Block getLastBlock(BlockInfo[] blocks) {
if (blocks == null ||
blocks.length == 0)
return null;
int mod = (blocks.length - 1) % numStripeBlocks;
Block lastBlock = blocks[blocks.length - 1];
if (mod < numParityBlocks) {
LOG.error("Last block is not source block " + lastBlock +
" numBlocks: " + blocks.length + " codec: " + this);
return null;
}
return lastBlock;
}
public long getFileSize(BlockInfo[] blocks) {
if (blocks == null) {
return 0L;
}
long fileSize = 0L;
for (int i = 0; i < blocks.length; i+=numStripeBlocks) {
for (int dataBlockId = numParityBlocks;
i + dataBlockId < blocks.length && dataBlockId < numStripeBlocks;
dataBlockId++) {
fileSize += blocks[i + dataBlockId].getNumBytes();
}
}
return fileSize;
}
public long diskspaceConsumed(Block[] blocks, boolean isUnderConstruction,
long preferredBlockSize, short replication) {
long dataSize = 0;
long paritySize = 0;
if(blocks == null || blocks.length == 0) {
return 0;
}
for (int i = 0, stripeIdx = 0; i < blocks.length; i++) {
if (blocks[i] != null) {
if (stripeIdx < numParityBlocks) {
paritySize += blocks[i].getNumBytes();
} else {
dataSize += blocks[i].getNumBytes();
}
}
stripeIdx = (stripeIdx + 1) % numStripeBlocks;
}
return dataSize * replication + paritySize * parityReplication;
}
public BlockInfo[] convertToRaidStorage(BlockInfo[] parityBlocks,
BlockInfo[] blocks, int[] checksums, BlocksMap blocksMap,
short replication, INodeFile inode) throws IOException {
BlockInfo[] newList = new BlockInfo[parityBlocks.length + blocks.length];
int pPos = 0;
int sPos = 0;
int pos = 0;
int numStripes = getNumStripes(blocks.length);
for (int i = 0; i < numStripes; i++) {
System.arraycopy(parityBlocks, pPos, newList, pos, numParityBlocks);
for (int j = pos; j < pos + numParityBlocks; j++) {
blocksMap.updateINode(newList[j],
new RaidBlockInfo(newList[j], parityReplication, j), inode,
parityReplication, true);
}
pPos += numParityBlocks;
pos += numParityBlocks;
for (int j = 0; j < numDataBlocks && sPos < blocks.length;
j++, pos++, sPos++) {
newList[pos] = blocks[sPos];
if (checksums != null) {
if (blocks[sPos].getChecksum() != BlockInfo.NO_BLOCK_CHECKSUM
&& blocks[sPos].getChecksum() != checksums[sPos]) {
throw new IOException("Checksum mismatch for the " + sPos +
"th source blocks. New=" + checksums[sPos] +
", Existing=" + blocks[sPos].getChecksum());
}
blocks[sPos].setChecksum(checksums[sPos]);
}
blocksMap.updateINode(newList[pos], new RaidBlockInfo(newList[pos],
replication, pos), inode, replication, true);
}
}
return newList;
}
/**
* Count the number of live replicas of each parity block in the raided file
* If any stripe has not enough parity block replicas, add the stripe to
* raidEncodingTasks to schedule encoding.
* If forceAdd is true, we always add the stripe to raidEncodingTasks
* without checking
* @param sourceINode
* @param raidTasks
* @param fs
* @param forceAdd
* @return true if all parity blocks of the file have enough replicas
* @throws IOException
*/
public boolean checkRaidProgress(INodeFile sourceINode,
LightWeightLinkedSet<RaidBlockInfo> raidEncodingTasks, FSNamesystem fs,
boolean forceAdd) throws IOException {
boolean result = true;
BlockInfo[] blocks = sourceINode.getBlocks();
for (int i = 0; i < blocks.length;
i += numStripeBlocks) {
boolean hasParity = true;
if (!forceAdd) {
for (int j = 0; j < numParityBlocks; j++) {
if (fs.countLiveNodes(blocks[i + j]) < this.parityReplication) {
hasParity = false;
break;
}
}
}
if (!hasParity || forceAdd) {
raidEncodingTasks.add(new RaidBlockInfo(blocks[i], parityReplication, i));
result = false;
}
}
return result;
}
public BlockInfo[] getBlocksInOneStripe(BlockInfo[] blocks,
RaidBlockInfo rbi) {
int size = Math.min(this.numStripeBlocks, blocks.length - rbi.getIndex());
BlockInfo[] stripeBlocks = new BlockInfo[size];
System.arraycopy(blocks, rbi.getIndex(), stripeBlocks, 0, size);
return stripeBlocks;
}
}