Package org.apache.sqoop.submission.mapreduce

Source Code of org.apache.sqoop.submission.mapreduce.MapreduceSubmissionEngine

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.submission.mapreduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
import org.apache.sqoop.common.MapContext;
import org.apache.sqoop.common.SqoopException;
import org.apache.sqoop.execution.mapreduce.MRSubmissionRequest;
import org.apache.sqoop.execution.mapreduce.MapreduceExecutionEngine;
import org.apache.sqoop.framework.SubmissionRequest;
import org.apache.sqoop.framework.SubmissionEngine;
import org.apache.sqoop.job.JobConstants;
import org.apache.sqoop.job.mr.ConfigurationUtils;
import org.apache.sqoop.submission.counter.Counter;
import org.apache.sqoop.submission.counter.CounterGroup;
import org.apache.sqoop.submission.counter.Counters;
import org.apache.sqoop.submission.SubmissionStatus;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Map;


/**
* This is very simple and straightforward implementation of map-reduce based
* submission engine.
*/
public class MapreduceSubmissionEngine extends SubmissionEngine {

  private static Logger LOG = Logger.getLogger(MapreduceSubmissionEngine.class);

  /**
   * Global configuration object that is build from hadoop configuration files
   * on engine initialization and cloned during each new submission creation.
   */
  private Configuration globalConfiguration;

  /**
   * Job client that is configured to talk to one specific Job tracker.
   */
  private JobClient jobClient;


  /**
   * {@inheritDoc}
   */
  @Override
  public void initialize(MapContext context, String prefix) {
    LOG.info("Initializing Map-reduce Submission Engine");

    // Build global configuration, start with empty configuration object
    globalConfiguration = new Configuration();
    globalConfiguration.clear();

    // Load configured hadoop configuration directory
    String configDirectory = context.getString(prefix + Constants.CONF_CONFIG_DIR);

    // Git list of files ending with "-site.xml" (configuration files)
    File dir = new File(configDirectory);
    String [] files = dir.list(new FilenameFilter() {
      @Override
      public boolean accept(File dir, String name) {
        return name.endsWith("-site.xml");
      }
    });

    if(files == null) {
      throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0002,
        "Invalid Hadoop configuration directory (not a directory or permission issues): " + configDirectory);
    }

    // Add each such file to our global configuration object
    for (String file : files) {
      LOG.info("Found hadoop configuration file " + file);
      try {
        globalConfiguration.addResource(new File(configDirectory, file).toURI().toURL());
      } catch (MalformedURLException e) {
        LOG.error("Can't load configuration file: " + file, e);
      }
    }

    // Save our own property inside the job to easily identify Sqoop jobs
    globalConfiguration.setBoolean(Constants.SQOOP_JOB, true);

    // Create job client
    try {
      jobClient = new JobClient(new JobConf(globalConfiguration));
    } catch (IOException e) {
      throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0002, e);
    }

    if(isLocal()) {
      LOG.info("Detected MapReduce local mode, some methods might not work correctly.");
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void destroy() {
    LOG.info("Destroying Mapreduce Submission Engine");

    // Closing job client
    try {
      jobClient.close();
    } catch (IOException e) {
      throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0005, e);
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public boolean isExecutionEngineSupported(Class executionEngineClass) {
    return executionEngineClass == MapreduceExecutionEngine.class;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public boolean submit(SubmissionRequest generalRequest) {
    // We're supporting only map reduce jobs
    MRSubmissionRequest request = (MRSubmissionRequest) generalRequest;

    // Clone global configuration
    Configuration configuration = new Configuration(globalConfiguration);

    // Serialize job type as it will be needed by underlying execution engine
    ConfigurationUtils.setJobType(configuration, request.getJobType());

    // Serialize framework context into job configuration
    for(Map.Entry<String, String> entry: request.getFrameworkContext()) {
      if (entry.getValue() == null) {
        LOG.warn("Ignoring null framework context value for key " + entry.getKey());
        continue;
      }
      configuration.set(entry.getKey(), entry.getValue());
    }

    // Serialize connector context as a sub namespace
    for(Map.Entry<String, String> entry :request.getConnectorContext()) {
      if (entry.getValue() == null) {
        LOG.warn("Ignoring null connector context value for key " + entry.getKey());
        continue;
      }
      configuration.set(
        JobConstants.PREFIX_CONNECTOR_CONTEXT + entry.getKey(),
        entry.getValue());
    }

    // Set up notification URL if it's available
    if(request.getNotificationUrl() != null) {
      configuration.set("job.end.notification.url", request.getNotificationUrl());
    }

    // Turn off speculative execution
    configuration.setBoolean("mapred.map.tasks.speculative.execution", false);
    configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false);

    // Promote all required jars to the job
    StringBuilder sb = new StringBuilder();
    boolean first = true;
    for(String jar : request.getJars()) {
      if(first) {
        first = false;
      } else {
        sb.append(",");
      }
      LOG.debug("Adding jar to the job: " + jar);
      sb.append(jar);
    }
    configuration.set("tmpjars", sb.toString());

    try {
      Job job = new Job(configuration);

      // And finally put all configuration objects to credentials cache
      ConfigurationUtils.setConfigConnectorConnection(job, request.getConfigConnectorConnection());
      ConfigurationUtils.setConfigConnectorJob(job, request.getConfigConnectorJob());
      ConfigurationUtils.setConfigFrameworkConnection(job, request.getConfigFrameworkConnection());
      ConfigurationUtils.setConfigFrameworkJob(job, request.getConfigFrameworkJob());

      if(request.getJobName() != null) {
        job.setJobName("Sqoop: " + request.getJobName());
      } else {
        job.setJobName("Sqoop job with id: " + request.getJobId());
      }

      job.setInputFormatClass(request.getInputFormatClass());

      job.setMapperClass(request.getMapperClass());
      job.setMapOutputKeyClass(request.getMapOutputKeyClass());
      job.setMapOutputValueClass(request.getMapOutputValueClass());

      String outputDirectory = request.getOutputDirectory();
      if(outputDirectory != null) {
        FileOutputFormat.setOutputPath(job, new Path(outputDirectory));
      }

      // Set number of reducers as number of configured loaders  or suppress
      // reduce phase entirely if loaders are not set at all.
      if(request.getLoaders() != null) {
        job.setNumReduceTasks(request.getLoaders());
      } else {
        job.setNumReduceTasks(0);
      }

      job.setOutputFormatClass(request.getOutputFormatClass());
      job.setOutputKeyClass(request.getOutputKeyClass());
      job.setOutputValueClass(request.getOutputValueClass());

      // If we're in local mode than wait on completion. Local job runner do not
      // seems to be exposing API to get previously submitted job which makes
      // other methods of the submission engine quite useless.
      if(isLocal()) {
        job.waitForCompletion(true);
      } else {
        job.submit();
      }

      String jobId = job.getJobID().toString();
      request.getSummary().setExternalId(jobId);
      request.getSummary().setExternalLink(job.getTrackingURL());

      LOG.debug("Executed new map-reduce job with id " + jobId);
    } catch (Exception e) {
      request.getSummary().setException(e);
      LOG.error("Error in submitting job", e);
      return false;
    }
    return true;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void stop(String submissionId) {
    try {
      RunningJob runningJob = jobClient.getJob(JobID.forName(submissionId));
      if(runningJob == null) {
        return;
      }

      runningJob.killJob();
    } catch (IOException e) {
      throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0003, e);
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public SubmissionStatus status(String submissionId) {
    try {
      RunningJob runningJob = jobClient.getJob(JobID.forName(submissionId));
      if(runningJob == null) {
        return SubmissionStatus.UNKNOWN;
      }

      int status = runningJob.getJobState();
      return convertMapreduceState(status);

    } catch (IOException e) {
      throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0003, e);
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public double progress(String submissionId) {
    try {
      // Get some reasonable approximation of map-reduce job progress
      // TODO(jarcec): What if we're running without reducers?
      RunningJob runningJob = jobClient.getJob(JobID.forName(submissionId));
      if(runningJob == null) {
        // Return default value
        return super.progress(submissionId);
      }

      return (runningJob.mapProgress() + runningJob.reduceProgress()) / 2;
    } catch (IOException e) {
      throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0003, e);
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public Counters counters(String submissionId) {
    try {
      RunningJob runningJob = jobClient.getJob(JobID.forName(submissionId));
      if(runningJob == null) {
        // Return default value
        return super.counters(submissionId);
      }

      return convertMapreduceCounters(runningJob.getCounters());
    } catch (IOException e) {
      throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0003, e);
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String externalLink(String submissionId) {
    try {
      RunningJob runningJob = jobClient.getJob(JobID.forName(submissionId));
      if(runningJob == null) {
        return null;
      }

      return runningJob.getTrackingURL();
    } catch (IOException e) {
      throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0003, e);
    }
  }

  /**
   * Convert map-reduce specific job status constants to Sqoop job status
   * constants.
   *
   * @param status Map-reduce job constant
   * @return Equivalent submission status
   */
  protected SubmissionStatus convertMapreduceState(int status) {
    if(status == JobStatus.PREP) {
      return SubmissionStatus.BOOTING;
    } else if (status == JobStatus.RUNNING) {
      return SubmissionStatus.RUNNING;
    } else if (status == JobStatus.FAILED) {
      return SubmissionStatus.FAILED;
    } else if (status == JobStatus.KILLED) {
      return SubmissionStatus.FAILED;
    } else if (status == JobStatus.SUCCEEDED) {
      return SubmissionStatus.SUCCEEDED;
    }

    throw new SqoopException(MapreduceSubmissionError.MAPREDUCE_0004,
      "Unknown status " + status);
  }

  /**
   * Convert Hadoop counters to Sqoop counters.
   *
   * @param hadoopCounters Hadoop counters
   * @return Appropriate Sqoop counters
   */
  private Counters convertMapreduceCounters(org.apache.hadoop.mapred.Counters hadoopCounters) {
    Counters sqoopCounters = new Counters();

    if(hadoopCounters == null) {
      return sqoopCounters;
    }

    for(org.apache.hadoop.mapred.Counters.Group hadoopGroup : hadoopCounters) {
      CounterGroup sqoopGroup = new CounterGroup(hadoopGroup.getName());
      for(org.apache.hadoop.mapred.Counters.Counter hadoopCounter : hadoopGroup) {
        Counter sqoopCounter = new Counter(hadoopCounter.getName(), hadoopCounter.getValue());
        sqoopGroup.addCounter(sqoopCounter);
      }
      sqoopCounters.addCounterGroup(sqoopGroup);
    }

    return sqoopCounters;
  }

  /**
   * Detect MapReduce local mode.
   *
   * @return True if we're running in local mode
   */
  private boolean isLocal() {
    // If framework is set to YARN, then we can't be running in local mode
    if("yarn".equals(globalConfiguration.get("mapreduce.framework.name"))) {
      return false;
    }

    // If job tracker address is "local" then we're running in local mode
    return "local".equals(globalConfiguration.get("mapreduce.jobtracker.address"))
        || "local".equals(globalConfiguration.get("mapred.job.tracker"));
  }
}
TOP

Related Classes of org.apache.sqoop.submission.mapreduce.MapreduceSubmissionEngine

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.