Package org.apache.hadoop.raid

Source Code of org.apache.hadoop.raid.StripeReader$LocationPair

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.raid;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockMissingException;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlockWithFileName;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.StripeStore.StripeInfo;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public abstract class StripeReader {
  public static final Log LOG = LogFactory.getLog(StripeReader.class);
  Codec codec;
  Configuration conf;
  FileSystem fs;
  long stripeStartIdx;
  long stripeEndIdx;
  int bufferSize;
 
  protected long currentStripeIdx;
 
  public static class LocationPair {
    private int stripeIdx;
    private int blockIdxInStripe;
    private List<FileStatus> lfs;
    public LocationPair(int stripeIdx, int blockIdxInStripe,
        List<FileStatus> lfs) {
      this.stripeIdx = stripeIdx;
      this.blockIdxInStripe = blockIdxInStripe;
      this.lfs = lfs;
    }
   
    int getStripeIdx() {
      return stripeIdx;
    }
   
    int getBlockIdxInStripe() {
      return blockIdxInStripe;
    }
   
    List<FileStatus> getListFileStatus() {
      return lfs;
    }
  }
 
  public static class StripeInputInfo {
    private final InputStream[] inputs;
    private final Path[] srcPaths;
    private final long[] offSets;
   
    public StripeInputInfo(InputStream[] inputs,
                           Path[] srcPaths,
                           long[] offSets) {
      this.inputs = inputs;
      this.srcPaths = srcPaths;
      this.offSets = offSets;
    }
   
    InputStream[] getInputs() {
      return inputs;
    }
   
    Path[] getSrcPaths() {
      return srcPaths;
    }
   
    long[] getBlockOffsets() {
      return offSets;
    }
  }
 
  public StripeReader(Configuration conf, Codec codec,
      FileSystem fs, long stripeStartIdx) {
    this.codec = codec;
    this.conf = conf;
    this.fs = fs;
    this.stripeStartIdx = stripeStartIdx;
    this.currentStripeIdx = stripeStartIdx;
    this.bufferSize = conf.getInt("io.file.buffer.size", 64 * 1024);
  }
 
  /**
   * get current Stripe index
   */
  public long getCurrentStripeIdx() {
    return this.currentStripeIdx;
  }

  /**
   * Has next stripe to read?
   */
  public boolean hasNext() {
    return currentStripeIdx < stripeEndIdx;
  }
 
  /**
   * Get the input streams for the next stripe
   */
  public StripeInputInfo getNextStripeInputs() throws IOException {
    StripeInputInfo stripeInputInfo = getStripeInputs(currentStripeIdx);
    currentStripeIdx ++;
    return stripeInputInfo;
  }
 
  public abstract StripeInputInfo getStripeInputs(long StripeIdx)
      throws IOException;
 
  /**
   * Get the index of a block in a file, according to the blockId.
   *
   * Any better ways to do this?
   */
  private static int getBlockIdInFile(DistributedFileSystem srcFs,
      Path srcPath, long blockId) throws IOException {
    FileStatus srcStat = srcFs.getFileStatus(srcPath);
    LocatedBlocks lbs =
        srcFs.getClient().getLocatedBlocks(srcPath.toUri().getPath(), 0,
            srcStat.getLen());
   
    int i = 0;
    LOG.info("Look for block " + blockId + " in file " + srcPath);
    for (LocatedBlock lb: lbs.getLocatedBlocks()) {
      if (lb.getBlock().getBlockId() == blockId) {
        return i;
      }
      i++;
    }
   
    return -1;
  }
 
  /**
   * Build the InputStream arrays from the StripeInfo.
   *
   * @return
   * @throws IOException
   */
  public static InputStream[] buildInputsFromStripeInfo(
      DistributedFileSystem srcFs,
      FileStatus srcStat, Codec codec,
      StripeInfo si, long offsetInBlock, long limit,
      List<Integer> erasedLocations,
      Set<Integer> locationsToNotRead, ErasureCode code
      ) throws IOException{
   
    InputStream[] inputs = new InputStream[codec.stripeLength +
                                           codec.parityLength];
   
    boolean redo = false;
    do {
      redo = false;
      locationsToNotRead.clear();
      List<Integer> locationsToRead =
          code.locationsToReadForDecode(erasedLocations);
     
      for (int i = 0; i < inputs.length; i++) {
        boolean isErased = (erasedLocations.indexOf(i) != -1);
        boolean shouldRead = (locationsToRead.indexOf(i) != -1);
        try {
          InputStream stm = null;
          if (isErased || !shouldRead) {
            if (isErased) {
              LOG.info("Location " + i + " is erased, using zeros");
            } else {
              LOG.info("Location " + i + " need not be read, using zeros");
            }
            locationsToNotRead.add(i);
            stm = new RaidUtils.ZeroInputStream(limit);
          } else {
            long blockId;
            if (i < codec.parityLength) {
              blockId = si.parityBlocks.get(i).getBlockId();
            } else if ((i - codec.parityLength) < si.srcBlocks.size()) {
              blockId = si.srcBlocks.get(i - codec.parityLength).getBlockId();
            } else {
              LOG.info("Using zeros for location " + i);
              inputs[i] = new RaidUtils.ZeroInputStream(limit);
              continue;
            }
            LocatedBlockWithFileName lb =
                srcFs.getClient().getBlockInfo(blockId);
            if (lb == null) {
              throw new BlockMissingException(String.valueOf(blockId),
                "Location " + i + " can not be found. Block id: " + blockId, 0);
            } else {
              Path filePath = new Path(lb.getFileName());
              FileStatus stat = srcFs.getFileStatus(filePath);
              long blockSize = stat.getBlockSize();
              if (offsetInBlock > blockSize) {
                stm = new RaidUtils.ZeroInputStream(limit);
              } else {
                if (srcFs.exists(filePath)) {
                  long startOffset =
                      getBlockIdInFile(srcFs, filePath, blockId) * blockSize;
                  long offset = startOffset + offsetInBlock;
                  LOG.info("Opening " + lb.getFileName() + ":" + offset +
                      " for location " + i);
                  FSDataInputStream is = srcFs.open(filePath);
                  is.seek(offset);
                  stm = is;
                } else {
                  LOG.info("Location " + i + ", File " + lb.getFileName() +
                      " does not exist, using zeros");
                  locationsToNotRead.add(i);
                  stm = new RaidUtils.ZeroInputStream(limit);
                }
              }
            }
          }
          inputs[i] = stm;
        } catch (IOException e) {
          if (e instanceof BlockMissingException ||
              e instanceof ChecksumException) {
            erasedLocations.add(i);
            redo = true;
            RaidUtils.closeStreams(inputs);
            break;
          } else {
            throw e;
          }
        }
      }
    } while (redo);
    assert(locationsToNotRead.size() == codec.parityLength);
    return inputs;
  }
 
  /**
   * Builds (codec.stripeLength + codec.parityLength) inputs given some erased locations.
   * Outputs:
   *  - the array of input streams @param inputs
   *  - the list of erased locations @param erasedLocations.
   *  - the list of locations that are not read @param locationsToNotRead.
   */
  public InputStream[] buildInputs(
    FileSystem srcFs, Path srcFile, FileStatus srcStat,
    FileSystem parityFs, Path parityFile, FileStatus parityStat,
    int stripeIdx, long offsetInBlock, List<Integer> erasedLocations,
    Set<Integer> locationsToNotRead, ErasureCode code)
      throws IOException {
    InputStream[] inputs = new InputStream[codec.stripeLength +
                                           codec.parityLength];
    boolean redo = false;
    do {
      locationsToNotRead.clear();
      List<Integer> locationsToRead =
        code.locationsToReadForDecode(erasedLocations);
      for (int i = 0; i < inputs.length; i++) {
        boolean isErased = (erasedLocations.indexOf(i) != -1);
        boolean shouldRead = (locationsToRead.indexOf(i) != -1);
        try {
          InputStream stm = null;
          if (isErased || !shouldRead) {
            if (isErased) {
              LOG.info("Location " + i + " is erased, using zeros");
            } else {
              LOG.info("Location " + i + " need not be read, using zeros");
            }
            locationsToNotRead.add(i);
            stm = new RaidUtils.ZeroInputStream(srcStat.getBlockSize() * (
              (i < codec.parityLength) ?
              stripeIdx * codec.parityLength + i :
              stripeIdx * codec.stripeLength + i - codec.parityLength));
          } else {
            stm = buildOneInput(i, offsetInBlock,
                                srcFs, srcFile, srcStat,
                                parityFs, parityFile, parityStat);
          }
          inputs[i] = stm;
        } catch (IOException e) {
          if (e instanceof BlockMissingException || e instanceof ChecksumException) {
            erasedLocations.add(i);
            redo = true;
            RaidUtils.closeStreams(inputs);
            break;
          } else {
            throw e;
          }
        }
      }
    } while (redo);
    assert(locationsToNotRead.size() == codec.parityLength);
    return inputs;
  }
 
  public static LocationPair getBlockLocation(Codec codec, FileSystem srcFs,
      Path srcFile, int blockIdxInFile, Configuration conf)
          throws IOException {
    return getBlockLocation(codec, srcFs, srcFile, blockIdxInFile, conf, null);
  }
 
  /**
   * Given a block in the file and specific codec, return the LocationPair
   * object which contains id of the stripe it belongs to and its
   * location in the stripe
   */
  public static LocationPair getBlockLocation(Codec codec, FileSystem srcFs,
      Path srcFile, int blockIdxInFile, Configuration conf,
      List<FileStatus> lfs) throws IOException {
    int stripeIdx = 0;
    int blockIdxInStripe = 0;
    int blockIdx = blockIdxInFile;
    if (codec.isDirRaid) {
      Path parentPath = srcFile.getParent();
      if (lfs == null) {
        lfs = RaidNode.listDirectoryRaidFileStatus(conf, srcFs, parentPath);
      }
      if (lfs == null) {
        throw new IOException("Couldn't list files under " + parentPath);
      }
      int blockNum = 0;
      Path qSrcFile = srcFs.makeQualified(srcFile);
      for (FileStatus fsStat: lfs) {
        if (!fsStat.getPath().equals(qSrcFile)) {
          blockNum += RaidNode.getNumBlocks(fsStat);
        } else {
          blockNum += blockIdxInFile;
          break;
        }
      }
      blockIdx = blockNum;
    }
    stripeIdx = blockIdx / codec.stripeLength;
    blockIdxInStripe = blockIdx % codec.stripeLength;
    return new LocationPair(stripeIdx, blockIdxInStripe, lfs);
  }
 
  public static LocationPair getParityBlockLocation(Codec codec,
      final int blockIdxInFile) {
   
    int stripeIdx = blockIdxInFile / codec.parityLength;
    int blockIdxInStripe = blockIdxInFile % codec.parityLength;
   
    return new LocationPair(stripeIdx, blockIdxInStripe, null);
  }

  public static StripeReader getStripeReader(Codec codec, Configuration conf,
      long blockSize, FileSystem fs, long stripeIdx, FileStatus srcStat)
          throws IOException {
    if (codec.isDirRaid) {
      Path srcDir = srcStat.isDir()? srcStat.getPath():
        srcStat.getPath().getParent();
      return new DirectoryStripeReader(conf, codec, fs, stripeIdx, -1L, srcDir,
          RaidNode.listDirectoryRaidFileStatus(conf, fs, srcDir));
    } else {
      return new FileStripeReader(conf, blockSize, codec, fs, stripeIdx, -1L,
          srcStat.getPath(), srcStat.getLen());
    }
  }
 
  protected abstract InputStream buildOneInput(
      int locationIndex, long offsetInBlock,
      FileSystem srcFs, Path srcFile, FileStatus srcStat,
      FileSystem parityFs, Path parityFile, FileStatus parityStat
      ) throws IOException;
 
  protected InputStream getParityFileInput(int locationIndex, Path parityFile,
      FileSystem parityFs, FileStatus parityStat, long offsetInBlock,
      long parityBlockSize)
          throws IOException {
    // Dealing with a parity file here.
    int parityBlockIdx = (int)(codec.parityLength * stripeStartIdx + locationIndex);
    long offset = parityBlockSize * parityBlockIdx + offsetInBlock;
    assert(offset < parityStat.getLen());
    LOG.info("Opening " + parityFile + ":" + offset +
      " for location " + locationIndex);
    FSDataInputStream s = parityFs.open(
      parityFile, conf.getInt("io.file.buffer.size", 64 * 1024));
    s.seek(offset);
    return s;
  }
}
TOP

Related Classes of org.apache.hadoop.raid.StripeReader$LocationPair

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.