Package co.cask.cdap.explore.service

Source Code of co.cask.cdap.explore.service.ExploreServiceUtils

/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package co.cask.cdap.explore.service;

import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.data2.datafabric.dataset.service.DatasetService;
import co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory;
import co.cask.cdap.explore.guice.ExploreRuntimeModule;
import co.cask.cdap.explore.service.hive.Hive12ExploreService;
import co.cask.cdap.explore.service.hive.Hive13ExploreService;
import co.cask.cdap.explore.service.hive.HiveCDH4ExploreService;
import co.cask.cdap.explore.service.hive.HiveCDH5ExploreService;
import com.google.common.base.Function;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.VersionInfo;
import org.apache.twill.internal.utils.Dependencies;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Pattern;

/**
* Utility class for the explore service.
*/
public class ExploreServiceUtils {
  private static final Logger LOG = LoggerFactory.getLogger(ExploreServiceUtils.class);

  /**
   * Hive support enum.
   */
  public enum HiveSupport {
    HIVE_CDH4(Pattern.compile("^.*cdh4\\..*$"), HiveCDH4ExploreService.class),
    HIVE_CDH5(Pattern.compile("^.*cdh5\\..*$"), HiveCDH5ExploreService.class),

    HIVE_12(null, Hive12ExploreService.class),
    HIVE_13(null, Hive13ExploreService.class);

    private final Pattern hadoopVersionPattern;
    private final Class<? extends ExploreService> hiveExploreServiceClass;

    private HiveSupport(Pattern hadoopVersionPattern, Class<? extends ExploreService> hiveExploreServiceClass) {
      this.hadoopVersionPattern = hadoopVersionPattern;
      this.hiveExploreServiceClass = hiveExploreServiceClass;
    }

    public Pattern getHadoopVersionPattern() {
      return hadoopVersionPattern;
    }

    public Class<? extends ExploreService> getHiveExploreServiceClass() {
      return hiveExploreServiceClass;
    }
  }

  // Caching the dependencies so that we don't trace them twice
  private static Set<File> exploreDependencies = null;
  // Caching explore class loader
  private static ClassLoader exploreClassLoader = null;

  private static final Pattern HIVE_SITE_FILE_PATTERN = Pattern.compile("^.*/hive-site\\.xml$");

  /**
   * Get all the files contained in a class path.
   */
  public static Iterable<File> getClassPathJarsFiles(String hiveClassPath) {
    if (hiveClassPath == null) {
      return null;
    }
    return Iterables.transform(Splitter.on(':').split(hiveClassPath), STRING_FILE_FUNCTION);
  }

  private static final Function<String, File> STRING_FILE_FUNCTION =
    new Function<String, File>() {
      @Override
      public File apply(String input) {
        return new File(input).getAbsoluteFile();
      }
    };

  /**
   * Builds a class loader with the class path provided.
   */
  public static ClassLoader getExploreClassLoader() {
    if (exploreClassLoader != null) {
      return exploreClassLoader;
    }

    // EXPLORE_CLASSPATH will be defined in startup scripts if Hive is installed.
    String exploreClassPathStr = System.getProperty(Constants.Explore.EXPLORE_CLASSPATH);
    LOG.debug("Explore classpath = {}", exploreClassPathStr);
    if (exploreClassPathStr == null) {
      throw new RuntimeException("System property " + Constants.Explore.EXPLORE_CLASSPATH + " is not set.");
    }

    Iterable<File> hiveClassPath = getClassPathJarsFiles(exploreClassPathStr);
    ImmutableList.Builder<URL> builder = ImmutableList.builder();
    for (File jar : hiveClassPath) {
      try {
        builder.add(jar.toURI().toURL());
      } catch (MalformedURLException e) {
        LOG.error("Jar URL is malformed", e);
        Throwables.propagate(e);
      }
    }
    exploreClassLoader = new URLClassLoader(Iterables.toArray(builder.build(), URL.class),
                                            ClassLoader.getSystemClassLoader());
    return exploreClassLoader;
  }

  public static Class<? extends ExploreService> getHiveService(Configuration hConf) {
    HiveSupport hiveVersion = checkHiveSupportWithSecurity(hConf, null);
    Class<? extends ExploreService> hiveServiceCl = hiveVersion.getHiveExploreServiceClass();
    return hiveServiceCl;
  }

  /**
   * Check that Hive is in the class path - with a right version. Use a separate class loader to load Hive classes,
   * built using the explore classpath passed as a system property to master.
   */
  public static HiveSupport checkHiveSupportWithoutSecurity() {
    ClassLoader classLoader = getExploreClassLoader();
    return checkHiveSupportWithoutSecurity(classLoader);
  }

  /**
   * Check that Hive is in the class path - with a right version.
   *
   * @param hiveClassLoader class loader to use to load hive classes.
   *                        If null, the class loader of this class is used.
   */
  public static HiveSupport checkHiveSupportWithoutSecurity(ClassLoader hiveClassLoader) {
    try {
      ClassLoader usingCL = hiveClassLoader;
      if (usingCL == null) {
        usingCL = ExploreServiceUtils.class.getClassLoader();
      }

      // First try to figure which hive support is relevant based on Hadoop distribution name
      String hadoopVersion = VersionInfo.getVersion();
      LOG.info("Hadoop version is: {}", hadoopVersion);
      for (HiveSupport hiveSupport : HiveSupport.values()) {
        if (hiveSupport.getHadoopVersionPattern() != null &&
          hiveSupport.getHadoopVersionPattern().matcher(hadoopVersion).matches()) {
          return hiveSupport;
        }
      }

      // In Hive 12, CLIService.getOperationStatus returns OperationState.
      // In Hive 13, CLIService.getOperationStatus returns OperationStatus.
      Class cliServiceClass = usingCL.loadClass("org.apache.hive.service.cli.CLIService");
      Class operationHandleCl = usingCL.loadClass("org.apache.hive.service.cli.OperationHandle");
      Method getStatusMethod = cliServiceClass.getDeclaredMethod("getOperationStatus", operationHandleCl);

      // Rowset is an interface in Hive 13, but a class in Hive 12
      Class rowSetClass = usingCL.loadClass("org.apache.hive.service.cli.RowSet");

      if (rowSetClass.isInterface()
        && getStatusMethod.getReturnType() == usingCL.loadClass("org.apache.hive.service.cli.OperationStatus")) {
        return HiveSupport.HIVE_13;
      } else if (!rowSetClass.isInterface()
        && getStatusMethod.getReturnType() == usingCL.loadClass("org.apache.hive.service.cli.OperationState")) {
        return HiveSupport.HIVE_12;
      }
      throw new RuntimeException("Hive distribution not supported. Set the configuration '" +
                                 Constants.Explore.EXPLORE_ENABLED +
                                 "' to false to start up without Explore.");
    } catch (RuntimeException e) {
      throw e;
    } catch (Throwable e) {
      throw new RuntimeException("Hive jars not present in classpath. Set the configuration '" +
                                 Constants.Explore.EXPLORE_ENABLED +
                                 "' to false to start up without Explore.", e);
    }
  }

  /**
   * Check that Hive is in the class path - with a right version. Use a separate class loader to load Hive classes,
   * built using the explore classpath passed as a system property to master. Also check that Hadoop cluster is
   * not secure, as it is not supported by Explore yet.
   *
   * @param hConf HBase configuration used to check if Hadoop cluster is secure.
   */
  public static HiveSupport checkHiveSupportWithSecurity(Configuration hConf) {
    ClassLoader classLoader = getExploreClassLoader();
    return checkHiveSupportWithSecurity(hConf, classLoader);
  }

  /**
   * Check that Hive is in the class path - with a right version. Also check that Hadoop
   * cluster is not secure, as it is not supported by Explore yet.
   *
   * @param hConf HBase configuration used to check if Hadoop cluster is secure.
   * @param hiveClassLoader class loader to use to load hive classes.
   *                        If null, the class loader of this class is used.
   */
  public static HiveSupport checkHiveSupportWithSecurity(Configuration hConf, ClassLoader hiveClassLoader) {
    if (User.isHBaseSecurityEnabled(hConf)) {
      throw new RuntimeException("Explore is not supported on secure Hadoop clusters. Set the configuration '" +
                                 Constants.Explore.EXPLORE_ENABLED +
                                 "' to false to start without Explore.");
    }
    return checkHiveSupportWithoutSecurity(hiveClassLoader);
  }

  /**
   * Return the list of absolute paths of the bootstrap classes.
   */
  public static Set<String> getBoostrapClasses() {
    ImmutableSet.Builder<String> builder = ImmutableSet.builder();
    for (String classpath : Splitter.on(File.pathSeparatorChar).split(System.getProperty("sun.boot.class.path"))) {
      File file = new File(classpath);
      builder.add(file.getAbsolutePath());
      try {
        builder.add(file.getCanonicalPath());
      } catch (IOException e) {
        LOG.warn("Could not add canonical path to aux class path for file {}", file.toString(), e);
      }
    }
    return builder.build();
  }

  /**
   * Trace the jar dependencies needed by the Explore container. Uses a separate class loader to load Hive classes,
   * built using the explore classpath passed as a system property to master.
   *
   * @return an ordered set of jar files.
   */
  public static Set<File> traceExploreDependencies() throws IOException {
    if (exploreDependencies != null) {
      return exploreDependencies;
    }

    ClassLoader classLoader = getExploreClassLoader();
    return traceExploreDependencies(classLoader);
  }

  /**
   * Trace the jar dependencies needed by the Explore container.
   *
   * @param classLoader class loader to use to trace the dependencies.
   *                    If it is null, use the class loader of this class.
   * @return an ordered set of jar files.
   */
  public static Set<File> traceExploreDependencies(ClassLoader classLoader)
    throws IOException {
    if (exploreDependencies != null) {
      return exploreDependencies;
    }

    ClassLoader usingCL = classLoader;
    if (classLoader == null) {
      usingCL = ExploreRuntimeModule.class.getClassLoader();
    }
    Set<String> bootstrapClassPaths = getBoostrapClasses();

    Set<File> hBaseTableDeps = traceDependencies(new HBaseTableUtilFactory().get().getClass().getCanonicalName(),
                                                 bootstrapClassPaths, usingCL);

    // Note the order of dependency jars is important so that HBase jars come first in the classpath order
    // LinkedHashSet maintains insertion order while removing duplicate entries.
    Set<File> orderedDependencies = new LinkedHashSet<File>();
    orderedDependencies.addAll(hBaseTableDeps);
    orderedDependencies.addAll(traceDependencies(DatasetService.class.getCanonicalName(),
                                                 bootstrapClassPaths, usingCL));
    orderedDependencies.addAll(traceDependencies("co.cask.cdap.hive.datasets.DatasetStorageHandler",
                                                 bootstrapClassPaths, usingCL));
    orderedDependencies.addAll(traceDependencies("org.apache.hadoop.hive.ql.exec.mr.ExecDriver",
                                                 bootstrapClassPaths, usingCL));
    orderedDependencies.addAll(traceDependencies("org.apache.hive.service.cli.CLIService",
                                                 bootstrapClassPaths, usingCL));
    orderedDependencies.addAll(traceDependencies("org.apache.hadoop.mapred.YarnClientProtocolProvider",
                                                 bootstrapClassPaths, usingCL));

    // Needed for - at least - CDH 4.4 integration
    orderedDependencies.addAll(traceDependencies("org.apache.hive.builtins.BuiltinUtils",
                                                 bootstrapClassPaths, usingCL));

    // Needed for - at least - CDH 5 integration
    orderedDependencies.addAll(traceDependencies("org.apache.hadoop.hive.shims.Hadoop23Shims",
                                                 bootstrapClassPaths, usingCL));

    exploreDependencies = orderedDependencies;
    return orderedDependencies;
  }

  /**
   * Trace the dependencies files of the given className, using the classLoader,
   * and excluding any class contained in the bootstrapClassPaths and Kryo classes.
   * We need to remove Kryo dependency in the Explore container. Spark introduced version 2.21 version of Kryo,
   * which would be normally shipped to the Explore container. Yet, Hive requires Kryo 2.22,
   * and gets it from the Hive jars - hive-exec.jar to be precise.
   *
   * Nothing is returned if the classLoader does not contain the className.
   */
  public static Set<File> traceDependencies(String className, final Set<String> bootstrapClassPaths,
                                            ClassLoader classLoader)
    throws IOException {
    ClassLoader usingCL = classLoader;
    if (usingCL == null) {
      usingCL = ExploreRuntimeModule.class.getClassLoader();
    }
    final Set<File> jarFiles = Sets.newHashSet();

    Dependencies.findClassDependencies(
      usingCL,
      new Dependencies.ClassAcceptor() {
        @Override
        public boolean accept(String className, URL classUrl, URL classPathUrl) {
          if (bootstrapClassPaths.contains(classPathUrl.getFile())) {
            return false;
          }

          if (className.startsWith("com.esotericsoftware.kryo")) {
            return false;
          }

          jarFiles.add(new File(classPathUrl.getFile()));
          return true;
        }
      },
      className
    );

    return jarFiles;
  }

  /**
   * Check that the file is a hive-site.xml file, and return a temp copy of it to which are added
   * necessary options. If it is not a hive-site.xml file, return it as is.
   */
  public static File hijackHiveConfFile(File confFile) {
    if (!HIVE_SITE_FILE_PATTERN.matcher(confFile.getAbsolutePath()).matches()) {
      return confFile;
    }

    Configuration conf = new Configuration(false);
    try {
      conf.addResource(confFile.toURI().toURL());
    } catch (MalformedURLException e) {
      LOG.error("File {} is malformed.", confFile, e);
      throw Throwables.propagate(e);
    }

    // Prefer our job jar in the classpath
    // Set both old and new keys
    // Those settings will be in hive-site.xml in the classpath of the Explore Service. Therefore,
    // all HiveConf objects created there will have those settings, and they will be passed to
    // the map reduces jobs launched by Hive.
    conf.setBoolean("mapreduce.user.classpath.first", true);
    conf.setBoolean(Job.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);

    File newHiveConfFile = new File(Files.createTempDir(), "hive-site.xml");
    FileOutputStream fos;
    try {
      fos = new FileOutputStream(newHiveConfFile);
    } catch (FileNotFoundException e) {
      LOG.error("Problem creating temporary hive-site.xml conf file at {}", newHiveConfFile, e);
      throw Throwables.propagate(e);
    }

    try {
      conf.writeXml(fos);
    } catch (IOException e) {
      LOG.error("Could not write modified configuration to temporary hive-site.xml at {}", newHiveConfFile, e);
      throw Throwables.propagate(e);
    } finally {
      Closeables.closeQuietly(fos);
    }

    return newHiveConfFile;
  }
}
TOP

Related Classes of co.cask.cdap.explore.service.ExploreServiceUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.