Package org.apache.hadoop.mapred.jobcontrol

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl


        conf.set("mapred.job.tracker", "local");
        JobClient jobClient = new JobClient(new JobConf(conf));

        JobControlCompiler jcc = new JobControlCompiler();
       
        JobControl jc = jcc.compile(mrp, grpName, conf, pc);
       
       
        int numMRJobs = jc.getWaitingJobs().size();
       
        new Thread(jc).start();

        double lastProg = -1;
        int perCom = 0;
        while(!jc.allFinished()){
            try {
                Thread.sleep(sleepTime);
            } catch (InterruptedException e) {}
            double prog = calculateProgress(jc, jobClient)/numMRJobs;
            if(prog>=(lastProg+0.01)){
                perCom = (int)(prog * 100);
                if(perCom!=100)
                    log.info( perCom + "% complete");
            }
            lastProg = prog;
        }
        // Look to see if any jobs failed.  If so, we need to report that.
        List<Job> failedJobs = jc.getFailedJobs();
        if (failedJobs != null && failedJobs.size() > 0) {
            log.error("Map reduce job failed");
            for (Job fj : failedJobs) {
                log.error(fj.getMessage());
                getStats(fj, jobClient, true, pc);
            }
            jc.stop();
            return false;
        }

        List<Job> succJobs = jc.getSuccessfulJobs();
        if(succJobs!=null)
            for(Job job : succJobs){
                getStats(job,jobClient, false, pc);
            }

        jc.stop();
        log.info( "100% complete");
        log.info("Success!");
        return true;
    }
View Full Code Here


        MapReduceOper mro = mrp.getLeaves().get(0);
        mro.combinePlan = combinePlan;
       
        JobControlCompiler jcc = new JobControlCompiler();

        JobControl jc = jcc.compile(mrp, grpName, conf, pc);

        int numMRJobs = jc.getWaitingJobs().size();

        new Thread(jc).start();

        double lastProg = -1;
        while (!jc.allFinished()) {
            try {
                Thread.sleep(sleepTime);
            } catch (InterruptedException e) {
            }
            double prog = calculateProgress(jc, jobClient) / numMRJobs;
            if (prog > lastProg)
                log.info((int)(prog * 100) + "% complete");
            lastProg = prog;
        }
        lastProg = calculateProgress(jc, jobClient) / numMRJobs;
        if (isComplete(lastProg))
            log.info("Completed Successfully");
        else {
            log.info("Unsuccessful attempt. Completed " + lastProg * 100
                    + "% of the job");
            List<Job> failedJobs = jc.getFailedJobs();
            if (failedJobs == null)
                throw new ExecException(
                        "Something terribly wrong with Job Control.");
            for (Job job : failedJobs) {
                getStats(job, jobClient, true, pc);
            }
        }
        List<Job> succJobs = jc.getSuccessfulJobs();
        if (succJobs != null)
            for (Job job : succJobs) {
                getStats(job, jobClient, false, pc);
            }

        jc.stop();

        return isComplete(lastProg);
    }
View Full Code Here

public class ValueAggregatorJob {

  public static JobControl createValueAggregatorJobs(String args[]
    , Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException {
   
    JobControl theControl = new JobControl("ValueAggregatorJobs");
    ArrayList<Job> dependingJobs = new ArrayList<Job>();
    JobConf aJobConf = createValueAggregatorJob(args, (Class<?>) null);
    if(descriptors != null)
      setAggregatorDescriptors(aJobConf, descriptors);
    Job aJob = new Job(aJobConf, dependingJobs);
    theControl.addJob(aJob);
    return theControl;
  }
View Full Code Here

*/
public class ValueAggregatorJob {

  public static JobControl createValueAggregatorJobs(String args[])
      throws IOException {
    JobControl theControl = new JobControl("ValueAggregatorJobs");
    ArrayList dependingJobs = new ArrayList();
    JobConf aJobConf = createValueAggregatorJob(args);
    Job aJob = new Job(aJobConf, dependingJobs);
    theControl.addJob(aJob);
    return theControl;
  }
View Full Code Here

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertTrue(parallel==100);
       
        pc.defaultParallel = -1;       
View Full Code Here

        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        JobControl jc=jcc.compile(mrPlan, "Test");
        Job job = jc.getWaitingJobs().get(0);
        long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);
        assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), reducer);
       
        // use the PARALLEL key word, it will override the estimated reducer number
        query = "a = load '/passwd';" +
                "b = group a by $0 PARALLEL 2;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);
        mrPlan = Util.buildMRPlan(pp, pc);
              
        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        ConfigurationValidator.validatePigProperties(pc.getProperties());
        conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);
        assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), 2);
       
        final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
        HTable table = util.createTable(Bytes.toBytesBinary("passwd"),
                COLUMNFAMILY);
       
        // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
        query = "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
                "b = group a by $0 ;" +
                "store b into 'output';";
        pp = Util.buildPp(ps, query);
        mrPlan = Util.buildMRPlan(pp, pc);
               
        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);
        assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), 1);
        util.deleteTable(Bytes.toBytesBinary("passwd"));
    }
View Full Code Here

       
        List<Job> failedJobs = new LinkedList<Job>();
        List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        JobControl jc;
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
       
        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();
       
        boolean stop_on_failure =
            pc.getProperties().getProperty("stop.on.failure", "false").equals("true");
       
        // jc is null only when mrp.size == 0
        while(mrp.size() != 0) {
            jc = jcc.compile(mrp, grpName);
            if(jc == null) {
                List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
                roots.addAll(mrp.getRoots());
               
                // run the native mapreduce roots first then run the rest of the roots
                for(MapReduceOper mro: roots) {
                    if(mro instanceof NativeMapReduceOper) {
                        NativeMapReduceOper natOp = (NativeMapReduceOper)mro;
                        try {
                            ScriptState.get().emitJobsSubmittedNotification(1);
                            natOp.runJob();
                            numMRJobsCompl++;
                        } catch (IOException e) {
                           
                            mrp.trimBelow(natOp);
                            failedNativeMR.add(natOp);
                           
                            String msg = "Error running native mapreduce" +
                            " operator job :" + natOp.getJobId() + e.getMessage();
                           
                            String stackTrace = getStackStraceStr(e);
                            LogUtils.writeLog(msg,
                                    stackTrace,
                                    pc.getProperties().getProperty("pig.logfile"),
                                    log
                            );    
                            log.info(msg);
                           
                            if (stop_on_failure) {
                                int errCode = 6017;
                              
                                throw new ExecException(msg, errCode,
                                        PigException.REMOTE_ENVIRONMENT);
                            }
                           
                        }
                        double prog = ((double)numMRJobsCompl)/totalMRJobs;
                        notifyProgress(prog, lastProg);
                        lastProg = prog;
                        mrp.remove(natOp);
                    }
                }
                continue;
            }
          // Initially, all jobs are in wait state.
            List<Job> jobsWithoutIds = jc.getWaitingJobs();
            log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
            //notify listeners about jobs submitted
            ScriptState.get().emitJobsSubmittedNotification(jobsWithoutIds.size());
           
            // update Pig stats' job DAG with just compiled jobs
            PigStatsUtil.updateJobMroMap(jcc.getJobMroMap());
           
            // determine job tracker url
            String jobTrackerLoc;
            JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
            try {
                String port = jobConf.get("mapred.job.tracker.http.address");
                String jobTrackerAdd = jobConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);
               
                jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":"))
                + port.substring(port.indexOf(":"));
            }
            catch(Exception e){
                // Could not get the job tracker location, most probably we are running in local mode.
                // If it is the case, we don't print out job tracker location,
                // because it is meaningless for local mode.
              jobTrackerLoc = null;
                log.debug("Failed to get job tracker location.");
            }
           
            completeFailedJobsInThisRun.clear();
           
            Thread jcThread = new Thread(jc);
            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);
           
            jcThread.setContextClassLoader(PigContext.getClassLoader());
           
            //All the setup done, now lets launch the jobs.
            jcThread.start();
           
            // Now wait, till we are finished.
            while(!jc.allFinished()){

              try { Thread.sleep(sleepTime); }
              catch (InterruptedException e) {}
             
              List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();

              for(Job job : jobsWithoutIds){
                if (job.getAssignedJobID() != null){

                  jobsAssignedIdInThisRun.add(job);
                  log.info("HadoopJobId: "+job.getAssignedJobID());
                  if(jobTrackerLoc != null){
                    log.info("More information at: http://"+ jobTrackerLoc+
                        "/jobdetails.jsp?jobid="+job.getAssignedJobID());
                 
                 
                  ScriptState.get().emitJobStartedNotification(
                                job.getAssignedJobID().toString());                       
                }
                else{
                  // This job is not assigned an id yet.
                }
              }
              jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);

              double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
              notifyProgress(prog, lastProg);
              lastProg = prog;
             
              // collect job stats by frequently polling of completed jobs (PIG-1829)
              PigStatsUtil.accumulateStats(jc);
                      
            }
           
            //check for the jobControlException first
            //if the job controller fails before launching the jobs then there are
            //no jobs to check for failure
            if (jobControlException != null) {
                if (jobControlException instanceof PigException) {
                    if (jobControlExceptionStackTrace != null) {
                        LogUtils.writeLog("Error message from job controller",
                                jobControlExceptionStackTrace, pc
                                        .getProperties().getProperty(
                                                "pig.logfile"), log);
                    }
                    throw jobControlException;
                } else {
                    int errCode = 2117;
                    String msg = "Unexpected error when launching map reduce job.";
                    throw new ExecException(msg, errCode, PigException.BUG,
                            jobControlException);
                }
            }
           
            if (!jc.getFailedJobs().isEmpty() ) {
                if (stop_on_failure){
                    int errCode = 6017;
                    StringBuilder msg = new StringBuilder();
                   
                    for (int i=0; i<jc.getFailedJobs().size(); i++) {
                        Job j = jc.getFailedJobs().get(i);
                        msg.append(getFirstLineFromMessage(j.getMessage()));
                        if (i!=jc.getFailedJobs().size()-1) {
                            msg.append("\n");
                        }
                    }
                   
                    throw new ExecException(msg.toString(), errCode,
                            PigException.REMOTE_ENVIRONMENT);
                }
               
                // If we only have one store and that job fail, then we sure
                // that the job completely fail, and we shall stop dependent jobs
                for (Job job : jc.getFailedJobs()) {
                    completeFailedJobsInThisRun.add(job);
                    log.info("job " + job.getAssignedJobID() + " has failed! Stop running all dependent jobs");
                }
                failedJobs.addAll(jc.getFailedJobs());
            }
           
            int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);
           
            numMRJobsCompl += removedMROp;

            List<Job> jobs = jc.getSuccessfulJobs();
            jcc.moveResults(jobs);
            succJobs.addAll(jobs);
                       
            // collecting final statistics
            PigStatsUtil.accumulateStats(jc);

            jc.stop();
        }

        ScriptState.get().emitProgressUpdatedNotification(100);
       
        log.info( "100% complete");
View Full Code Here

     */
    public JobControl compile(MROperPlan plan, String grpName) throws JobCreationException{
        // Assert plan.size() != 0
        this.plan = plan;

        JobControl jobCtrl = new JobControl(grpName);

        try {
            List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
            roots.addAll(plan.getRoots());
            for (MapReduceOper mro: roots) {
                if(mro instanceof NativeMapReduceOper) {
                    return null;
                }
                Job job = getJob(mro, conf, pigContext);
                jobMroMap.put(job, mro);
                jobCtrl.addJob(job);
            }
        } catch (JobCreationException jce) {
          throw jce;
        } catch(Exception e) {
            int errCode = 2017;
View Full Code Here

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
       
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jc;
        int numMRJobsCompl = 0;
        DataBag input;
        List<Pair<PigNullableWritable, Writable>> intermediateData = new ArrayList<Pair<PigNullableWritable, Writable>>();

        Map<Job, MapReduceOper> jobToMroMap = jcc.getJobMroMap();
        HashMap<String, DataBag> output = new HashMap<String, DataBag>();
        Configuration jobConf;
        // jc is null only when mrp.size == 0
        boolean needFileInput;
        final ArrayList<OperatorKey> emptyInpTargets = new ArrayList<OperatorKey>();
        while(mrp.size() != 0) {
            jc = jcc.compile(mrp, "Illustrator");
            if(jc == null) {
                throw new ExecException("Native execution is not supported");
            }
            List<Job> jobs = jc.getWaitingJobs();
            for (Job job : jobs) {
                jobConf = job.getJobConf();
                FileLocalizer.setInitialized(false);
                ArrayList<ArrayList<OperatorKey>> inpTargets =
                    (ArrayList<ArrayList<OperatorKey>>)
View Full Code Here

        ConfigurationValidator.validatePigProperties(pc.getProperties());
        Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertEquals(100, parallel);
        Util.assertParallelValues(100, -1, -1, 100, job.getJobConf());
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.jobcontrol.JobControl

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.