Package eu.stratosphere.nephele.execution.librarycache

Source Code of eu.stratosphere.nephele.execution.librarycache.LibraryCacheManager$LibraryTranslationKey

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.nephele.execution.librarycache;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;

import eu.stratosphere.core.fs.FSDataInputStream;
import eu.stratosphere.core.fs.FSDataOutputStream;
import eu.stratosphere.core.fs.FileStatus;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.io.StringRecord;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.util.StringUtils;

/**
* For each job graph that is submitted to the system the library cache manager maintains
* a set of libraries (typically JAR files) which the job requires to run. The library cache manager
* caches library files in order to avoid unnecessary retransmission of data. It is based on a singleton
* programming pattern, so there exists at most on library manager at a time.
* <p>
* This class is thread-safe.
*
*/
public final class LibraryCacheManager {

  /**
   * The instance of the library cache manager accessible through a singleton pattern.
   */
  private static LibraryCacheManager libraryManager = null;

  /**
   * Map to translate client paths of libraries to the file name used by the cache manager.
   */
  private final ConcurrentMap<LibraryTranslationKey, String> clientPathToCacheName = new ConcurrentHashMap<LibraryTranslationKey, String>();

  /**
   * Name of the directory to put cached libraries in.
   */
  private static final String LIBRARYCACHENAME = "libraryCache";

  /**
   * Algorithm to be used for calculating the checksum of the libraries.
   */
  private static final String HASHING_ALGORITHM = "SHA-1";

  /**
   * Dummy object used in the lock map.
   */
  private static final Object LOCK_OBJECT = new Object();

  /**
   * File system object used to access the local file system.
   */
  private final FileSystem fs;

  /**
   * The message digest object used to calculate the checksums of the libraries.
   */
  private final MessageDigest md;

  /**
   * Path pointing to the library cache directory.
   */
  private final Path libraryCachePath;

  /**
   * Map to translate a job ID to the responsible library cache manager entry.
   */
  private final ConcurrentMap<JobID, LibraryManagerEntry> libraryManagerEntries = new ConcurrentHashMap<JobID, LibraryManagerEntry>();

  /**
   * Map to store the number of references to a specific library manager entry.
   */
  private final ConcurrentMap<JobID, AtomicInteger> libraryReferenceCounter = new ConcurrentHashMap<JobID, AtomicInteger>();

  /**
   * Map to guarantee atomicity of of register/unregister operations.
   */
  private final ConcurrentMap<JobID, Object> lockMap = new ConcurrentHashMap<JobID, Object>();

  /**
   * Returns the singleton instance of the library cache manager.
   *
   * @return the singleton instance of the library cache manager.
   * @throws IOException
   *         thrown if access to the file system can not be obtained or the requested hashing algorithm does not exist
   */
  private static synchronized LibraryCacheManager get() throws IOException {

    // Lazy initialization
    if (libraryManager == null) {
      libraryManager = new LibraryCacheManager();
    }

    return libraryManager;
  }

  /**
   * Constructs a new instance of the library cache manager.
   *
   * @throws IOException
   *         thrown if access to the file system can not be obtained or the requested hashing algorithm does not exist
   */
  private LibraryCacheManager() throws IOException {

    // Check if the library cache directory exists, otherwise create it
    final String tmp = System.getProperty("java.io.tmpdir");
    if (tmp == null) {
      throw new IOException("Cannot find directory for temporary files");
    }

    this.fs = FileSystem.getLocalFileSystem();

    // Add user name of library cache path to avoid conflicts with regard to file permissions
    String userName = System.getProperty("user.name");
    if (userName == null) {
      userName = "default";
    }

    // Use the File object to the convert the path to a proper URI
    final File path = new File(tmp + File.separator + LIBRARYCACHENAME + "-" + userName);
    final URI uri = path.toURI();

    this.libraryCachePath = new Path(uri);

    this.fs.mkdirs(this.libraryCachePath);

    // Create an MD5 message digest object we can use
    try {
      this.md = MessageDigest.getInstance(HASHING_ALGORITHM);
    } catch (NoSuchAlgorithmException e) {
      throw new IOException("Cannot find algorithm " + HASHING_ALGORITHM + ": "
        + StringUtils.stringifyException(e));
    }
  }

  /**
   * Increments the reference counter for the library manager entry with the given job ID.
   *
   * @param jobID
   *        the job ID identifying the library manager entry
   * @return the increased reference counter
   */
  private int incrementReferenceCounter(final JobID jobID) {

    while (true) {

      AtomicInteger ai = this.libraryReferenceCounter.get(jobID);
      if (ai == null) {

        ai = new AtomicInteger(1);
        if (this.libraryReferenceCounter.putIfAbsent(jobID, ai) == null) {
          return 1;
        }

        // We had a race, try again
      } else {
        return ai.incrementAndGet();
      }
    }
  }

  /**
   * Decrements the reference counter for the library manager entry with the given job ID.
   *
   * @param jobID
   *        the job ID identifying the library manager entry
   * @return the decremented reference counter
   */
  private int decrementReferenceCounter(final JobID jobID) {

    final AtomicInteger ai = this.libraryReferenceCounter.get(jobID);

    if (ai == null) {
      throw new IllegalStateException("Cannot find reference counter entry for job " + jobID);
    }

    int retVal = ai.decrementAndGet();

    if (retVal == 0) {
      this.libraryReferenceCounter.remove(jobID);
    }

    return retVal;
  }

  /**
   * Registers a job ID with a set of library paths that are required to run the job. The library paths are given in
   * terms
   * of client paths, so the method first translates the client paths into the corresponding internal cache names. For
   * every registered
   * job the library cache manager creates a class loader that is used to instantiate the job's environment later on.
   *
   * @param id
   *        the ID of the job to be registered
   * @param clientPaths
   *        the client path's of the required libraries
   * @throws IOException
   *         thrown if the library cache manager could not be instantiated, no mapping between the job ID and a job ID
   *         exists or the requested library is not in the cache.
   */
  public static void register(final JobID id, final Path[] clientPaths) throws IOException {

    final LibraryCacheManager lib = get();
    lib.registerInternal(id, clientPaths);
  }

  /**
   * Registers a job ID with a set of library paths that are required to run the job. The library paths are given in
   * terms
   * of client paths, so the method first translates the client paths into the corresponding internal cache names. For
   * every registered
   * job the library cache manager creates a class loader that is used to instantiate the job's environment later on.
   *
   * @param id
   *        the ID of the job to be registered.
   * @param clientPaths
   *        the client path's of the required libraries
   * @throws IOException
   *         thrown if no mapping between the job ID and a job ID exists or the requested library is not in the cache.
   */
  private void registerInternal(final JobID id, final Path[] clientPaths) throws IOException {

    final String[] cacheNames = new String[clientPaths.length];

    for (int i = 0; i < clientPaths.length; ++i) {
      final LibraryTranslationKey key = new LibraryTranslationKey(id, clientPaths[i]);
      cacheNames[i] = this.clientPathToCacheName.get(key);
      if (cacheNames[i] == null) {
        throw new IOException("Cannot map" + clientPaths[i].toString() + " to cache name");
      }
    }

    // Register as regular
    registerInternal(id, cacheNames);
  }

  /**
   * Registers a job ID with a set of library paths that are required to run the job. For every registered
   * job the library cache manager creates a class loader that is used to instantiate the job's environment later on.
   *
   * @param id
   *        the ID of the job to be registered.
   * @param clientPaths
   *        the client path's of the required libraries
   * @throws IOException
   *         thrown if the library cache manager could not be instantiated or one of the requested libraries is not in
   *         the cache
   */
  public static void register(final JobID id, final String[] requiredJarFiles) throws IOException {

    final LibraryCacheManager lib = get();
    lib.registerInternal(id, requiredJarFiles);
  }

  /**
   * Registers a job ID with a set of library paths that are required to run the job. For every registered
   * job the library cache manager creates a class loader that is used to instantiate the vertex's environment later
   * on.
   *
   * @param id
   *        the ID of the job to be registered.
   * @param clientPaths
   *        the client path's of the required libraries
   * @throws IOException
   *         thrown if one of the requested libraries is not in the cache
   */
  private void registerInternal(final JobID id, final String[] requiredJarFiles) throws IOException {

    // Use spin lock here
    while (this.lockMap.putIfAbsent(id, LOCK_OBJECT) != null);

    try {
      if (incrementReferenceCounter(id) > 1) {
        return;
      }

      // Check if library manager entry for this id already exists
      if (this.libraryManagerEntries.containsKey(id)) {
        throw new IllegalStateException("Library cache manager already contains entry for job ID " + id);
      }

      // Check if all the required jar files exist in the cache
      URL[] urls = null;
      if (requiredJarFiles != null) {

        urls = new URL[requiredJarFiles.length];

        for (int i = 0; i < requiredJarFiles.length; i++) {
          final Path p = contains(requiredJarFiles[i]);
          if (p == null) {
            throw new IOException(requiredJarFiles[i] + " does not exist in the library cache");
          }

          // Add file to the URL array
          try {
            urls[i] = p.toUri().toURL();
          } catch (MalformedURLException e) {
            throw new IOException(StringUtils.stringifyException(e));
          }
        }
      }

      final LibraryManagerEntry entry = new LibraryManagerEntry(id, requiredJarFiles, urls);

      this.libraryManagerEntries.put(id, entry);

    } finally {
      this.lockMap.remove(id);
    }
  }

  /**
   * Unregisters a job ID and releases the resources associated with it.
   *
   * @param id
   *        the job ID to unregister
   * @throws IOException
   *         thrown if the library cache manager could not be instantiated
   */
  public static void unregister(final JobID id) throws IOException {

    final LibraryCacheManager lib = get();
    lib.unregisterInternal(id);
  }

  /**
   * Unregisters a job ID and releases the resources associated with it.
   *
   * @param id
   *        the job ID to unregister
   */
  private void unregisterInternal(final JobID id) {

    // Use spin lock here
    while (this.lockMap.putIfAbsent(id, LOCK_OBJECT) != null);

    if (decrementReferenceCounter(id) == 0) {
      this.libraryManagerEntries.remove(id);
    }

    this.lockMap.remove(id);
  }

  /**
   * Checks if the given library is in the local cache.
   *
   * @param cacheName
   *        The name of the library to be checked for.
   * @return the path object of the library if it is cached, <code>null</code> otherwise
   * @throws IOException
   *         thrown if the library cache manager could not be instantiated or no access to the file system could be
   *         obtained
   */
  public static Path contains(final String cacheName) throws IOException {

    final LibraryCacheManager lib = get();
    return lib.containsInternal(cacheName);
  }

  /**
   * Checks if the given library is in the local cache.
   *
   * @param cacheName
   *        The name of the library to be checked for.
   * @return the path object of the library if it is cached, <code>null</code> otherwise
   * @throws IOException
   *         thrown if no access to the file system could be obtained
   */
  private Path containsInternal(final String cacheName) throws IOException {

    // Create a path object from the external name string
    final Path p = new Path(this.libraryCachePath + File.separator + cacheName);

    synchronized (this.fs) {
      if (fs.exists(p)) {
        return p;
      }
    }

    return null;
  }

  /**
   * Returns the class loader to the specified vertex.
   *
   * @param id
   *        the ID of the job to return the class loader for
   * @return the class loader of requested vertex or <code>null</code> if no class loader has been registered with the
   *         given ID.
   * @throws IOException
   *         thrown if the library cache manager could not be instantiated
   */
  public static ClassLoader getClassLoader(final JobID id) throws IOException {

    if (id == null) {
      return null;
    }

    final LibraryCacheManager lib = get();
    return lib.getClassLoaderInternal(id);
  }

  /**
   * Returns the class loader to the specified vertex.
   *
   * @param id
   *        the ID of the job to return the class loader for
   * @return the class loader of requested vertex or <code>null</code> if no class loader has been registered with the
   *         given ID.
   * @throws IOException
   *         thrown if the library cache manager could not be instantiated
   */
  private ClassLoader getClassLoaderInternal(final JobID id) {

    final LibraryManagerEntry entry = this.libraryManagerEntries.get(id);

    if (entry == null) {
      return null;
    }

    return entry.getClassLoader();
  }

  /**
   * Returns the names of the required libraries of the specified job.
   *
   * @param id
   *        the ID of the job to return the names of required libraries for.
   * @return the names of the required libraries or <code>null</code> if the specified job ID is unknown
   * @throws IOException
   *         thrown if the library cache manager could not be instantiated
   */
  public static String[] getRequiredJarFiles(final JobID id) throws IOException {

    if (id == null) {
      return new String[0];
    }

    final LibraryCacheManager lib = get();

    return lib.getRequiredJarFilesInternal(id);
  }

  /**
   * Returns the names of the required libraries of the specified job.
   *
   * @param id
   *        the ID of the job to return the names of required libraries for.
   * @return the names of the required libraries or <code>null</code> if the specified job ID is unknown
   */
  private String[] getRequiredJarFilesInternal(final JobID id) {

    final LibraryManagerEntry entry = this.libraryManagerEntries.get(id);

    if (entry == null) {
      return null;
    }

    return entry.getRequiredJarFiles();
  }

  /**
   * Writes data from the library with the given file name to the specified stream.
   *
   * @param libraryFileName
   *        the name of the library
   * @param out
   *        the stream to write the data to
   * @throws IOException
   *         thrown if an error occurs while writing the data
   */
  public static void writeLibraryToStream(final String libraryFileName, final DataOutput out) throws IOException {

    final LibraryCacheManager lib = get();
    lib.writeLibraryToStreamInternal(libraryFileName, out);

  }

  /**
   * Writes data from the library with the given file name to the specified stream.
   *
   * @param libraryFileName
   *        the name of the library
   * @param out
   *        the stream to write the data to
   * @throws IOException
   *         thrown if an error occurs while writing the data
   */
  private void writeLibraryToStreamInternal(final String libraryFileName, final DataOutput out) throws IOException {

    if (libraryFileName == null) {
      throw new IOException("libraryName is null!");
    }

    final Path storePath = new Path(this.libraryCachePath + "/" + libraryFileName);

    synchronized (this.fs) {

      if (!fs.exists(storePath)) {
        throw new IOException(storePath + " does not exist!");
      }

      final FileStatus status = fs.getFileStatus(storePath);

      StringRecord.writeString(out, libraryFileName);
      out.writeLong(status.getLen());

      final FSDataInputStream inStream = fs.open(storePath);
      final byte[] buf = new byte[8192]; // 8K Buffer*/
      int read = inStream.read(buf, 0, buf.length);
      while (read > 0) {
        out.write(buf, 0, read);
        read = inStream.read(buf, 0, buf.length);
      }

      inStream.close();
    }
  }

  /**
   * Reads library data from the given stream.
   *
   * @param in
   *        the stream to read the library data from
   * @throws IOException
   *         throws if an error occurs while reading from the stream
   */
  public static void readLibraryFromStream(final DataInput in) throws IOException {

    final LibraryCacheManager lib = get();
    lib.readLibraryFromStreamInternal(in);

  }

  /**
   * Reads library data from the given stream.
   *
   * @param in
   *        the stream to read the library data from
   * @throws IOException
   *         throws if an error occurs while reading from the stream
   */
  private void readLibraryFromStreamInternal(final DataInput in) throws IOException {

    final String libraryFileName = StringRecord.readString(in);

    if (libraryFileName == null) {
      throw new IOException("libraryFileName is null!");
    }

    final long length = in.readLong();

    if (length > (long) Integer.MAX_VALUE) {
      throw new IOException("Submitted jar file " + libraryFileName + " is too large");
    }

    final byte[] buf = new byte[(int) length];
    in.readFully(buf);

    final Path storePath = new Path(this.libraryCachePath + "/" + libraryFileName);

    synchronized (this.fs) {

      // Check if file already exists in our library cache, if not write it to the cache directory
      if (!fs.exists(storePath)) {
        final FSDataOutputStream fos = fs.create(storePath, false);
        fos.write(buf, 0, buf.length);
        fos.close();
      }
    }
  }

  /**
   * Reads a library from the given input stream and adds it to the local library cache. The cache name of
   * the library is determined by the checksum of the received data and cannot be specified manually.
   *
   * @param jobID
   *        the ID of the job the library data belongs to
   * @param name
   *        the name of the library at the clients host
   * @param size
   *        the size of the library to be read from the input stream
   * @param in
   *        the data input stream
   * @throws IOException
   *         thrown if the library cache manager could not be instantiated or an error occurred while reading the
   *         library data from the input stream
   */
  public static void addLibrary(final JobID jobID, final Path name, final long size, final DataInput in)
      throws IOException {

    final LibraryCacheManager lib = get();
    lib.addLibraryInternal(jobID, name, size, in);
  }

  /**
   * Reads a library from the given input stream and adds it to the local library cache. The cache name of
   * the library is determined by the checksum of the received data and cannot be specified manually.
   *
   * @param jobID
   *        the ID of the job the library data belongs to
   * @param name
   *        the name of the library at the clients host
   * @param size
   *        the size of the library to be read from the input stream
   * @param in
   *        the data input stream
   * @throws IOException
   *         thrown if an error occurred while reading the library data from the input stream
   */
  private void addLibraryInternal(final JobID jobID, final Path name, final long size, final DataInput in)
      throws IOException {

    if (size > (long) Integer.MAX_VALUE) {
      throw new IOException("Submitted jar file " + name + " is too large");
    }

    // Map the entire jar file to memory
    final byte[] buf = new byte[(int) size];
    in.readFully(buf);

    // Reset and calculate message digest from jar file
    this.md.reset();
    this.md.update(buf);

    // Construct internal jar name from digest
    final String cacheName = StringUtils.byteToHexString(md.digest()) + ".jar";
    final Path storePath = new Path(this.libraryCachePath + "/" + cacheName);

    synchronized (this.fs) {

      // Check if file already exists in our library cache, if not write it to the cache directory
      if (!this.fs.exists(storePath)) {
        final FSDataOutputStream fos = this.fs.create(storePath, false);
        fos.write(buf, 0, buf.length);
        fos.close();
      }
    }

    // Create mapping for client path and cache name
    final LibraryTranslationKey key = new LibraryTranslationKey(jobID, name);
    this.clientPathToCacheName.putIfAbsent(key, cacheName);
  }

  /**
   * Auxiliary class that stores the class loader object as well as the names of the required
   * libraries for a job vertex.
   *
   */
  private static class LibraryManagerEntry {

    /**
     * The class loader object for the Nephele job this object belongs to.
     */
    private final ClassLoader classLoader;

    /**
     * A list containing the names of the JAR files required by the Nephele job this object belongs to.
     */
    private final String[] requiredJarFiles;

    /**
     * Constructs a <code>LibraryManagerEntry</code> object from the given job ID and array of required library
     * files.
     *
     * @param id
     *        the ID of the job to create a <code>LibraryManagerEntry</code> for.
     * @param requiredJarFiles
     *        an array with the names of required libraries by the corresponding job (plain names)
     * @param urls
     *        an array with the names of required libraries by the corresponding job (URL objects required by the
     *        class loader)
     */
    public LibraryManagerEntry(final JobID id, final String[] requiredJarFiles, URL[] urls) {

      String[] temp = requiredJarFiles;
      if (temp == null) {
        temp = new String[0];
      }

      this.requiredJarFiles = temp;

      if (urls == null) {
        urls = new URL[0];
      }

      this.classLoader = new URLClassLoader(urls, ClassLoader.getSystemClassLoader());
    }

    /**
     * Returns the class loader associated with this library manager entry.
     *
     * @return the class loader associated with this library manager entry
     */
    public ClassLoader getClassLoader() {
      return this.classLoader;
    }

    /**
     * Returns a (possibly empty) array of library names required by the associated job vertex to run.
     *
     * @return a (possibly empty) array of library names required by the associated job vertex to run
     */
    public String[] getRequiredJarFiles() {
      return this.requiredJarFiles;
    }
  }

  /**
   * Auxiliary class that acts as a key for the translation of the names a client uses to refer to required libraries
   * for a vertex
   * and the internal names used by the library cache manager.
   *
   */
  private static class LibraryTranslationKey {

    /**
     * The ID of the job this object belongs to.
     */
    private final JobID jobID;

    /**
     * The path at which the library has been stored at the client.
     */
    private final Path clientPath;

    /**
     * Construct a <code>LibraryTranslationKey</code> object from a fiven job ID and a client path that specifies
     * the name of
     * required library at the job client.
     *
     * @param jobID
     *        the job ID
     * @param clientPath
     *        the client path
     */
    public LibraryTranslationKey(final JobID jobID, final Path clientPath) {

      this.jobID = jobID;
      this.clientPath = clientPath;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public int hashCode() {

      final long temp = (this.jobID.hashCode() + this.clientPath.hashCode()) % Integer.MAX_VALUE;

      return (int) temp;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public boolean equals(final Object obj) {

      if (obj == null) {
        return false;
      }

      if (this.jobID == null) {
        return false;
      }

      if (this.clientPath == null) {
        return false;
      }

      if (obj instanceof LibraryTranslationKey) {

        final LibraryTranslationKey key = (LibraryTranslationKey) obj;
        if (this.jobID.equals(key.getJobID()) && this.clientPath.equals(key.getClientPath())) {
          return true;
        }
      }

      return false;
    }

    /**
     * Returns the client path associated with this object.
     *
     * @return the client path associated with this object
     */
    public Path getClientPath() {
      return this.clientPath;
    }

    /**
     * Returns the job ID associated with this object.
     *
     * @return the job ID associated with this object
     */
    public JobID getJobID() {
      return this.jobID;
    }
  }
}
TOP

Related Classes of eu.stratosphere.nephele.execution.librarycache.LibraryCacheManager$LibraryTranslationKey

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.