Examples of org.apache.hadoop.mapred.JobClient

org.apache.hadoop.mapred.JobClient
te a new JobConf JobConf job = new JobConf(new Configuration(), MyJob.class); // Specify various job-specific parameters job.setJobName("myjob"); job.setInputPath(new Path("in")); job.setOutputPath(new Path("out")); job.setMapperClass(MyJob.MyMapper.class); job.setReducerClass(MyJob.MyReducer.class); // Submit the job, then poll for progress until the job is complete JobClient.runJob(job);

Job Control

At times clients would chain map-reduce jobs to accomplish complex tasks which cannot be done via a single map-reduce job. This is fairly easy since the output of the job, typically, goes to distributed file-system and that can be used as the input for the next job.

However, this also means that the onus on ensuring jobs are complete (success/failure) lies squarely on the clients. In such situations the various job-control options are:
1. {@link #runJob(JobConf)} : submits the job and returns only after the job has completed.
2. {@link #submitJob(JobConf)} : only submits the job, then poll the returned handle to the {@link RunningJob} to query status and make scheduling decisions.
3. {@link JobConf#setJobEndNotificationURI(String)} : setup a notificationon job-completion, thus avoiding polling.
@see JobConf @see ClusterStatus @see Tool @see DistributedCache

      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
      }
      JobClient jc = new JobClient(job);


      String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
      if (!addedJars.isEmpty()) {
        job.set("tmpjars", addedJars);
      }


      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      if (work.isGatheringStats()) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS);
        if (StatsFactory.setImplementation(statsImplementationClass, job)) {
          statsPublisher = StatsFactory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
        }
      }


      // Finally SUBMIT the JOB!
      rj = jc.submitJob(job);


      returnVal = jobExecHelper.progress(rj, jc);
      success = (returnVal == 0);


    } catch (Exception e) {

View Full Code Here

    NullOutputFormat<NullWritable, NullWritable> of
      = new NullOutputFormat<NullWritable, NullWritable>();
    job.setOutputFormatClass(of.getClass());
    job.setNumReduceTasks(0);


    JobClient jc = new JobClient(new JobConf(job.getConfiguration()));


    Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token"));
    job.getCredentials().addToken(new Text("mr token"), mrdt);
    job.submit();


    submittedJobId = job.getJobID();

View Full Code Here

      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
      }
      JobClient jc = new JobClient(job);


      String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
      if (!addedJars.isEmpty()) {
        job.set("tmpjars", addedJars);
      }


      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      // Finally SUBMIT the JOB!
      rj = jc.submitJob(job);


      returnVal = jobExecHelper.progress(rj, jc);
      success = (returnVal == 0);


    } catch (Exception e) {

View Full Code Here

    }
    return this.callBackObj.checkFatalErrors(ctrs, errMsg);
  }


  private MapRedStats progress(ExecDriverTaskHandle th) throws IOException {
    JobClient jc = th.getJobClient();
    RunningJob rj = th.getRunningJob();
    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
    //DecimalFormat longFormatter = new DecimalFormat("###,###");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval =
        HiveConf.getLongVar(job, HiveConf.ConfVars.HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL);
    boolean fatal = false;
    StringBuilder errMsg = new StringBuilder();
    long pullInterval = HiveConf.getLongVar(job, HiveConf.ConfVars.HIVECOUNTERSPULLINTERVAL);
    boolean initializing = true;
    boolean initOutputPrinted = false;
    long cpuMsec = -1;
    int numMap = -1;
    int numReduce = -1;
    List<ClientStatsPublisher> clientStatPublishers = getClientStatPublishers();


    while (!rj.isComplete()) {
      try {
        Thread.sleep(pullInterval);
      } catch (InterruptedException e) {
      }


      if (initializing && ShimLoader.getHadoopShims().isJobPreparing(rj)) {
        // No reason to poll untill the job is initialized
        continue;
      } else {
        // By now the job is initialized so no reason to do
        // rj.getJobState() again and we do not want to do an extra RPC call
        initializing = false;
      }


      if (!initOutputPrinted) {
        SessionState ss = SessionState.get();


        String logMapper;
        String logReducer;


        TaskReport[] mappers = jc.getMapTaskReports(rj.getJobID());
        if (mappers == null) {
          logMapper = "no information for number of mappers; ";
        } else {
          numMap = mappers.length;
          if (ss != null) {
            ss.getHiveHistory().setTaskProperty(SessionState.get().getQueryId(), getId(),
              Keys.TASK_NUM_MAPPERS, Integer.toString(numMap));
          }
          logMapper = "number of mappers: " + numMap + "; ";
        }


        TaskReport[] reducers = jc.getReduceTaskReports(rj.getJobID());
        if (reducers == null) {
          logReducer = "no information for number of reducers. ";
        } else {
          numReduce = reducers.length;
          if (ss != null) {
            ss.getHiveHistory().setTaskProperty(SessionState.get().getQueryId(), getId(),
              Keys.TASK_NUM_REDUCERS, Integer.toString(numReduce));
          }
          logReducer = "number of reducers: " + numReduce;
        }


        console
            .printInfo("Hadoop job information for " + getId() + ": " + logMapper + logReducer);
        initOutputPrinted = true;
      }


      RunningJob newRj = jc.getJob(rj.getJobID());
      if (newRj == null) {
        // under exceptional load, hadoop may not be able to look up status
        // of finished jobs (because it has purged them from memory). From
        // hive's perspective - it's equivalent to the job having failed.
        // So raise a meaningful exception

View Full Code Here

      // logs
      String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
      }
      JobClient jc = new JobClient(job);
      // make this client wait if job trcker is not behaving well.
      Throttle.checkJobTracker(job, LOG);


      if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
        // initialize stats publishing table
        StatsPublisher statsPublisher;
        String statsImplementationClass = HiveConf.getVar(job, HiveConf.ConfVars.HIVESTATSDBCLASS);
        if (StatsFactory.setImplementation(statsImplementationClass, job)) {
          statsPublisher = StatsFactory.getStatsPublisher();
          if (!statsPublisher.init(job)) { // creating stats table if not exists
            if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
              throw
                new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
            }
          }
        }
      }


      Utilities.createTmpDirs(job, mWork);
      Utilities.createTmpDirs(job, rWork);


      // Finally SUBMIT the JOB!
      rj = jc.submitJob(job);
      // replace it back
      if (pwd != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd);
      }

View Full Code Here


        try {
            JobConf jobConf = new JobConf();
            jobConf.set("mapred.job.tracker", executeUrl);
            jobConf.set("yarn.resourcemanager.address", executeUrl);
            JobClient jobClient = new JobClient(jobConf);
            jobClient.getClusterStatus().getMapTasks();
        } catch (IOException e) {
            throw new ValidationException("Invalid Execute server or port: " + executeUrl, e);
        }
    }

View Full Code Here

    }


    jobConf.setInt("distcp.file.count", cnfiles);
    jobConf.setLong("distcp.total.size", cbsize);


    JobClient client = new JobClient(jobConf);
    jobConf.setNumMapTasks(getMapCount(cbsize,
          client.getClusterStatus().getTaskTrackers()));
  }

View Full Code Here

      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }


    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);

View Full Code Here

   * @param job
   * @return true for success
   * @throws IOException
   */
  public static boolean runJob(JobConf job) throws IOException {
    JobClient jc = new JobClient(job);
    boolean sucess = true;
    RunningJob running = null;
    try {
      running = jc.submitJob(job);
      String jobId = running.getJobID();
      System.out.println("Job " + jobId + " is submitted");
      while (!running.isComplete()) {
        System.out.println("Job " + jobId + " is still running.");
        try {
          Thread.sleep(60000);
        } catch (InterruptedException e) {
        }
        running = jc.getJob(jobId);
      }
      sucess = running.isSuccessful();
    } finally {
      if (!sucess && (running != null)) {
        running.killJob();
      }
      jc.close();
    }
    return sucess;
  }

View Full Code Here

      throws IOException {
    int numMaps =
      (int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
    numMaps = Math.min(numMaps, 
        job.getInt(MAX_MAPS_LABEL, MAX_MAPS_PER_NODE *
          new JobClient(job).getClusterStatus().getTaskTrackers()));
    job.setNumMapTasks(Math.max(numMaps, 1));
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapred.JobClient

com.atlantbh.jmeter.plugins.hadooputilities.jobstatistics.JobLayer

com.atlantbh.jmeter.plugins.hadooputilities.jobstatistics.TaskLayer

com.cloudera.circus.test.TestXTest

com.cloudera.lib.service.hadoop.HadoopService

com.cloudera.lib.service.hadoop.TestHadoopService

com.fb2pdf.hadoop.cluster.MeanShiftCanopyDriver

com.ikanow.infinit.e.processing.custom.CustomProcessingController

com.linkedin.camus.etl.kafka.CamusJob

com.netflix.lipstick.pigtolipstick.BasicP2LClient

com.netflix.lipstick.warnings.JobWarnings

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.

Examples of org.apache.hadoop.mapred.JobClient

Job Control

Related Classes of org.apache.hadoop.mapred.JobClient