Package com.asakusafw.runtime.util.cache

Source Code of com.asakusafw.runtime.util.cache.HadoopFileCacheRepository

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.util.cache;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.zip.CRC32;
import java.util.zip.Checksum;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.asakusafw.runtime.util.lock.LockObject;
import com.asakusafw.runtime.util.lock.LockProvider;
import com.asakusafw.runtime.util.lock.RetryObject;
import com.asakusafw.runtime.util.lock.RetryStrategy;

/**
* Manages cache files on Hadoop file system.
* @since 0.7.0
*/
public class HadoopFileCacheRepository implements FileCacheRepository {

    static final Log LOG = LogFactory.getLog(HadoopFileCacheRepository.class);

    static final String KEY_CHECK_BEFORE_DELETE = "com.asakusafw.cache.hadoop.deleteOnlyIfExists";

    static final boolean DEFAULT_CHECK_BEFORE_DELETE = true;

    private final Configuration configuration;

    private final Path repository;

    private final LockProvider<? super Path> lockProvider;

    private final RetryStrategy retryStrategy;

    private final boolean checkBeforeDelete;

    private final ThreadLocal<byte[]> byteBuffers = new ThreadLocal<byte[]>() {
        @Override
        protected byte[] initialValue() {
            return new byte[1024];
        }
    };

    /**
     * Creates a new instance.
     * @param configuration the current configuration
     * @param repository the cache root path (must be absolute)
     * @param lockProvider the cache lock provider
     * @param retryStrategy the retry strategy
     */
    public HadoopFileCacheRepository(
            Configuration configuration,
            Path repository,
            LockProvider<? super Path> lockProvider,
            RetryStrategy retryStrategy) {
        if (repository.toUri().getScheme() == null) {
            throw new IllegalArgumentException(MessageFormat.format(
                    "Cache repository location must contan the scheme: {0}",
                    repository));
        }
        this.configuration = configuration;
        this.repository = repository;
        this.lockProvider = lockProvider;
        this.retryStrategy = retryStrategy;
        this.checkBeforeDelete = configuration.getBoolean(KEY_CHECK_BEFORE_DELETE, DEFAULT_CHECK_BEFORE_DELETE);
    }

    @Override
    public Path resolve(Path file) throws IOException, InterruptedException {
        FileSystem fs = file.getFileSystem(configuration);
        Path qualified = fs.makeQualified(file);
        return doResolve(qualified);
    }

    private Path doResolve(Path sourcePath) throws IOException, InterruptedException {
        assert sourcePath.isAbsolute();
        FileSystem fs = sourcePath.getFileSystem(configuration);
        if (fs.exists(sourcePath) == false) {
            throw new FileNotFoundException(sourcePath.toString());
        }
        long sourceChecksum = computeChecksum(fs, sourcePath);
        Path cachePath = computeCachePath(sourcePath);
        Path cacheChecksumPath = computeCacheChecksumPath(cachePath);

        IOException firstException = null;
        RetryObject retry = retryStrategy.newInstance(MessageFormat.format(
                "preparing cache ({0} -> {1})",
                sourcePath,
                cachePath));
        do {
            try {
                // TODO reduce lock scope?
                LockObject<? super Path> lock = lockProvider.tryLock(cachePath);
                if (lock == null) {
                    continue;
                }
                try {
                    if (isCached(cachePath, cacheChecksumPath, sourceChecksum)) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(MessageFormat.format(
                                    "cache hit: {0} -> {1}",
                                    sourcePath,
                                    cachePath));
                        }
                        // just returns cached file
                    } else {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(MessageFormat.format(
                                    "cache miss: {0} -> {1}",
                                    sourcePath,
                                    cachePath));
                        }
                        updateCache(sourcePath, sourceChecksum, cachePath, cacheChecksumPath);
                    }
                    return cachePath;
                } finally {
                    lock.close();
                }
            } catch (IOException e) {
                LOG.warn(MessageFormat.format(
                        "Failed to prepare cache: {0} -> {1}",
                        sourcePath,
                        cachePath), e);
                if (firstException == null) {
                    firstException = e;
                }
            }
        } while (retry.waitForNextAttempt());
        if (firstException == null) {
            throw new IOException(MessageFormat.format(
                    "Failed to acquire a lock for remote cache file: {0} ({1})",
                    sourcePath,
                    cachePath));
        }
        throw firstException;
    }

    private long computeChecksum(FileSystem fs, Path file) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "Computing checksum: {0}",
                    file));
        }
        Checksum checksum = new CRC32();
        byte[] buf = byteBuffers.get();
        FSDataInputStream input = fs.open(file);
        try {
            while (true) {
                int read = input.read(buf);
                if (read < 0) {
                    break;
                }
                checksum.update(buf, 0, read);
            }
        } finally {
            input.close();
        }
        return checksum.getValue();
    }

    private Path computeCachePath(Path file) {
        assert repository != null;
        String directoryName;
        Path parent = file.getParent();
        if (parent == null) {
            directoryName = String.format("%08x", 0);
        } else {
            directoryName = String.format("%08x", parent.toString().hashCode());
        }
        Path directory = new Path(repository, directoryName);
        Path target = new Path(directory, file.getName());
        return target;
    }

    private Path computeCacheChecksumPath(Path cachePath) {
        Path parent = cachePath.getParent();
        String name = String.format("%s.acrc", cachePath.getName());
        return new Path(parent, name);
    }

    private boolean isCached(Path cacheFilePath, Path cacheChecksumPath, long checksum) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format(
                    "checking remote cache: {0}",
                    cacheFilePath));
        }
        FileSystem fs = cacheChecksumPath.getFileSystem(configuration);
        if (fs.exists(cacheChecksumPath) == false || fs.exists(cacheFilePath) == false) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format(
                        "remote cache is not found: {0}",
                        cacheFilePath));
            }
            return false;
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format(
                        "reading remote cache checksum: {0}",
                        cacheFilePath));
            }
            long other;
            FSDataInputStream input = fs.open(cacheChecksumPath);
            try {
                other = input.readLong();
            } finally {
                input.close();
            }
            return checksum == other;
        }
    }

    private void updateCache(Path file, long checksum, Path cachePath, Path cacheChecksumPath) throws IOException {
        if (LOG.isInfoEnabled()) {
            LOG.info(MessageFormat.format(
                    "updating library cache: {0} -> {1}",
                    file,
                    cachePath));
        }

        FileSystem sourceFs = file.getFileSystem(configuration);
        FileSystem cacheFs = cachePath.getFileSystem(configuration);

        // remove checksum file -> cachePath
        delete(cacheFs, cacheChecksumPath);
        delete(cacheFs, cachePath);

        // sync source file to cache file
        FSDataOutputStream checksumOutput = cacheFs.create(cacheChecksumPath, false);
        try {
            checksumOutput.writeLong(checksum);
            syncFile(sourceFs, file, cacheFs, cachePath);
        } finally {
            checksumOutput.close();
        }
    }

    private void delete(FileSystem fs, Path path) throws IOException {
        if (checkBeforeDelete && fs.exists(path) == false) {
            return;
        }
        fs.delete(path, false);
    }

    private void syncFile(
            FileSystem sourceFs, Path sourceFile,
            FileSystem targetFs, Path targetFile) throws IOException {
        byte[] buf = byteBuffers.get();
        FSDataOutputStream output = targetFs.create(targetFile, false);
        try {
            FSDataInputStream input = sourceFs.open(sourceFile);
            try {
                while (true) {
                    int read = input.read(buf);
                    if (read < 0) {
                        break;
                    }
                    output.write(buf, 0, read);
                }
            } finally {
                input.close();
            }
        } finally {
            output.close();
        }
    }
}
TOP

Related Classes of com.asakusafw.runtime.util.cache.HadoopFileCacheRepository

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.