Package com.asakusafw.runtime.directio.hadoop

Source Code of com.asakusafw.runtime.directio.hadoop.BlockMap

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.directio.hadoop;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;

import com.asakusafw.runtime.directio.DirectInputFragment;

/**
* Utilities for file blocks.
* @see BlockInfo
* @since 0.7.0
*/
public final class BlockMap {

    static final double MIN_LOCALITY = 0.125;

    static final double PRUNE_REL_LOCALITY = 0.75;

    private final String path;

    private final BlockInfo[] blocks;

    private final long size;

    private BlockMap(String path, BlockInfo[] blocks) {
        assert path != null;
        assert blocks != null;
        assert blocks.length >= 1;
        this.path = path;
        this.blocks = blocks;
        this.size = blocks[blocks.length - 1].end - blocks[0].start;
    }

    /**
     * Returns the file size.
     * @return the size
     */
    public long getFileSize() {
        return size;
    }

    /**
     * Returns the file blocks in this map.
     * @return the file blocks
     */
    public List<BlockInfo> getBlocks() {
        return Arrays.asList(blocks);
    }

    /**
     * Returns a list of {@link BlockInfo} for the target file.
     * @param fs the target file
     * @param status the target file status
     * @return the computed information
     * @throws IOException if failed to compute information
     */
    public static List<BlockInfo> computeBlocks(FileSystem fs, FileStatus status) throws IOException {
        BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
        List<BlockInfo> results = new ArrayList<BlockInfo>();
        for (BlockLocation location : locations) {
            long length = location.getLength();
            long start = location.getOffset();
            results.add(new BlockInfo(start, start + length, location.getHosts()));
        }
        return results;
    }

    /**
     * Create {@link BlockMap}.
     * @param path the target file path
     * @param fileSize the target file size
     * @param blockList the original block list
     * @param combineBlocks {@code true} to combine consecutive blocks with same owners
     * @return the built object
     */
    public static BlockMap create(
            String path, long fileSize,
            Collection<BlockInfo> blockList,
            boolean combineBlocks) {
        assert path != null;
        assert blockList != null;
        BlockInfo[] blocks = blockList.toArray(new BlockInfo[blockList.size()]);
        Arrays.sort(blocks, new Comparator<BlockInfo>() {
            @Override
            public int compare(BlockInfo o1, BlockInfo o2) {
                int startDiff = compareLong(o1.start, o2.start);
                if (startDiff != 0) {
                    return startDiff;
                }
                return -compareLong(o1.hosts.length, o2.hosts.length);
            }
        });
        long lastOffset = 0;
        List<BlockInfo> results = new ArrayList<BlockInfo>();
        for (BlockInfo block : blocks) {
            // if block is out of bounds, skip it
            if (block.start >= fileSize) {
                continue;
            }
            // if block is gapped, add a padding block
            if (lastOffset < block.start) {
                results.add(new BlockInfo(lastOffset, block.start, null));
            }
            long start = Math.max(lastOffset, block.start);
            long end = Math.min(fileSize, block.end);
            // if block is empty, skip it
            if (start >= end) {
                continue;
            }

            results.add(new BlockInfo(start, end, block.hosts));
            lastOffset = end;
        }
        assert lastOffset <= fileSize;
        if (lastOffset < fileSize) {
            results.add(new BlockInfo(lastOffset, fileSize, null));
        }
        if (results.isEmpty()) {
            results.add(new BlockInfo(0, fileSize, null));
        }
        if (combineBlocks) {
            results = combine(results);
        }
        return new BlockMap(path, results.toArray(new BlockInfo[results.size()]));
    }

    private static List<BlockInfo> combine(List<BlockInfo> blocks) {
        assert blocks != null;
        List<BlockInfo> results = new ArrayList<BlockInfo>(blocks.size());
        Iterator<BlockInfo> iter = blocks.iterator();
        assert iter.hasNext();
        BlockInfo last = iter.next();
        while (iter.hasNext()) {
            BlockInfo next = iter.next();
            if (last.isSameOwner(next)) {
                last = new BlockInfo(last.start, next.end, last.hosts);
            } else {
                results.add(last);
                last = next;
            }
        }
        results.add(last);
        return results;
    }

    /**
     * Returns {@link DirectInputFragment} for the range.
     * @param start the start offset (inclusive)
     * @param end the end offset (exclusive)
     * @return the computed fragment
     */
    public DirectInputFragment get(long start, long end) {
        List<String> hosts = computeHosts(start, end);
        return new DirectInputFragment(path, start, end - start, hosts);
    }

    private List<String> computeHosts(long start, long end) {
        assert start <= end;
        if (start == end) {
            return Collections.emptyList();
        }
        List<Map.Entry<String, Long>> rank = computeLocalityRank(start, end);
        if (rank.isEmpty()) {
            return Collections.emptyList();
        }
        long max = rank.get(0).getValue();
        if (max < (end - start) * MIN_LOCALITY) {
            return Collections.emptyList();
        }
        long threshold = (long) (max * PRUNE_REL_LOCALITY);
        List<String> results = new ArrayList<String>();
        for (int i = 0, n = rank.size(); i < n; i++) {
            Map.Entry<String, Long> block = rank.get(i);
            if (block.getValue() < threshold) {
                break;
            }
            results.add(block.getKey());
        }
        return results;
    }

    private List<Map.Entry<String, Long>> computeLocalityRank(long start, long end) {
        Map<String, Long> ownBytes = new HashMap<String, Long>();
        for (BlockInfo block : blocks) {
            if (block.end < start) {
                continue;
            }
            if (block.start >= end) {
                break;
            }
            long s = Math.max(start, block.start);
            long e = Math.min(end, block.end);
            long length = e - s;
            for (String node : block.getHosts()) {
                Long bytes = ownBytes.get(node);
                if (bytes == null) {
                    ownBytes.put(node, length);
                } else {
                    ownBytes.put(node, bytes + length);
                }
            }
        }
        if (ownBytes.isEmpty()) {
            return Collections.emptyList();
        }
        List<Map.Entry<String, Long>> entries = new ArrayList<Map.Entry<String, Long>>(ownBytes.entrySet());
        Collections.sort(entries, new Comparator<Map.Entry<String, Long>>() {
            @Override
            public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
                return -compareLong(o1.getValue(), o2.getValue());
            }
        });
        return entries;
    }

    static int compareLong(long offset1, long offset2) {
        if (offset1 < offset2) {
            return -1;
        } else if (offset1 > offset2) {
            return +1;
        }
        return 0;
    }
}
TOP

Related Classes of com.asakusafw.runtime.directio.hadoop.BlockMap

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.