Package org.apache.hadoop.mapred.jobcontrol

Examples of org.apache.hadoop.mapred.jobcontrol.JobControl


        HExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertTrue(parallel==100);
       
        pc.defaultParallel = -1;       
View Full Code Here


        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        HExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        JobControl jc=jcc.compile(mrPlan, "Test");
        Job job = jc.getWaitingJobs().get(0);
        long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);
        assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), reducer);
       
        // use the PARALLEL key word, it will override the estimated reducer number
        planTester = new LogicalPlanTester(pc) ;
        planTester.buildPlan("a = load '/passwd';");
        lp = planTester.buildPlan("b = group a by $0 PARALLEL 2;");
        pp = Util.buildPhysicalPlan(lp, pc);
        store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);
        mrPlan = Util.buildMRPlan(pp, pc);
              
        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);
        assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), 2);
       
        // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
        planTester = new LogicalPlanTester(pc) ;
        planTester.buildPlan("a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');");
        lp = planTester.buildPlan("b = group a by $0 ;");
        pp = Util.buildPhysicalPlan(lp, pc);
        store = GenPhyOp.dummyPigStorageOp();
        pp.addAsLeaf(store);
        mrPlan = Util.buildMRPlan(pp, pc);
               
        pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
        pc.getConf().setProperty("pig.exec.reducers.max", "10");
        exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        jcc = new JobControlCompiler(pc, conf);
        jc=jcc.compile(mrPlan, "Test");
        job = jc.getWaitingJobs().get(0);
        assertEquals(job.getJobConf().getLong("mapred.reduce.tasks",10), 1);
    }
View Full Code Here

*/
public class ValueAggregatorJob {

  public static JobControl createValueAggregatorJobs(String args[])
    throws IOException {
    JobControl theControl = new JobControl("ValueAggregatorJobs");
    ArrayList dependingJobs = new ArrayList();
    JobConf aJobConf = createValueAggregatorJob(args);
    Job aJob = new Job(aJobConf, dependingJobs);
    theControl.addJob(aJob);
    return theControl;
  }
View Full Code Here

        HExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();

        assertEquals("parallism", 1, parallel);
    }
View Full Code Here

        HExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        JobControl jobControl = jcc.compile(mrPlan, "Test");
        Job job = jobControl.getWaitingJobs().get(0);
        int parallel = job.getJobConf().getNumReduceTasks();
       
        assertEquals("parallism", 100, parallel);
    }
View Full Code Here

     */
    public JobControl compile(MROperPlan plan, String grpName) throws JobCreationException{
        // Assert plan.size() != 0
        this.plan = plan;

        JobControl jobCtrl = new JobControl(grpName);

        try {
            List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
            roots.addAll(plan.getRoots());
            for (MapReduceOper mro: roots) {
                if(mro instanceof NativeMapReduceOper) {
                    return null;
                }
                Job job = getJob(mro, conf, pigContext);
                jobMroMap.put(job, mro);
                jobCtrl.addJob(job);
            }
        } catch (JobCreationException jce) {
          throw jce;
        } catch(Exception e) {
            int errCode = 2017;
View Full Code Here

*/
public class ValueAggregatorJob {

  public static JobControl createValueAggregatorJobs(String args[])
    throws IOException {
    JobControl theControl = new JobControl("ValueAggregatorJobs");
    ArrayList dependingJobs = new ArrayList();
    JobConf aJobConf = createValueAggregatorJob(args);
    Job aJob = new Job(aJobConf, dependingJobs);
    theControl.addJob(aJob);
    return theControl;
  }
View Full Code Here

       
        List<Job> failedJobs = new LinkedList<Job>();
        List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        JobControl jc;
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
       
        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();
       
        boolean stop_on_failure =
            pc.getProperties().getProperty("stop.on.failure", "false").equals("true");
       
        // jc is null only when mrp.size == 0
        while(mrp.size() != 0) {
            jc = jcc.compile(mrp, grpName);
            if(jc == null) {
                List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
                roots.addAll(mrp.getRoots());
               
                // run the native mapreduce roots first then run the rest of the roots
                for(MapReduceOper mro: roots) {
                    if(mro instanceof NativeMapReduceOper) {
                        NativeMapReduceOper natOp = (NativeMapReduceOper)mro;
                        try {
                            ScriptState.get().emitJobsSubmittedNotification(1);
                            natOp.runJob();
                            numMRJobsCompl++;
                        } catch (IOException e) {
                           
                            mrp.trimBelow(natOp);
                            failedNativeMR.add(natOp);
                           
                            String msg = "Error running native mapreduce" +
                            " operator job :" + natOp.getJobId() + e.getMessage();
                           
                            String stackTrace = getStackStraceStr(e);
                            LogUtils.writeLog(msg,
                                    stackTrace,
                                    pc.getProperties().getProperty("pig.logfile"),
                                    log
                            );    
                            log.info(msg);
                           
                            if (stop_on_failure) {
                                int errCode = 6017;
                              
                                throw new ExecException(msg, errCode,
                                        PigException.REMOTE_ENVIRONMENT);
                            }
                           
                        }
                        double prog = ((double)numMRJobsCompl)/totalMRJobs;
                        notifyProgress(prog, lastProg);
                        lastProg = prog;
                        mrp.remove(natOp);
                    }
                }
                continue;
            }
          // Initially, all jobs are in wait state.
            List<Job> jobsWithoutIds = jc.getWaitingJobs();
            log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
            //notify listeners about jobs submitted
            ScriptState.get().emitJobsSubmittedNotification(jobsWithoutIds.size());
           
            // determine job tracker url
            String jobTrackerLoc;
            JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
            try {
                String port = jobConf.get("mapred.job.tracker.http.address");
                String jobTrackerAdd = jobConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);
               
                jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":"))
                + port.substring(port.indexOf(":"));
            }
            catch(Exception e){
                // Could not get the job tracker location, most probably we are running in local mode.
                // If it is the case, we don't print out job tracker location,
                // because it is meaningless for local mode.
              jobTrackerLoc = null;
                log.debug("Failed to get job tracker location.");
            }
           
            completeFailedJobsInThisRun.clear();
           
            Thread jcThread = new Thread(jc);
            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);
           
            jcThread.setContextClassLoader(LogicalPlanBuilder.classloader);
           
            //All the setup done, now lets launch the jobs.
            jcThread.start();
           
            // Now wait, till we are finished.
            while(!jc.allFinished()){

              try { Thread.sleep(sleepTime); }
              catch (InterruptedException e) {}
             
              List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();

              for(Job job : jobsWithoutIds){
                if (job.getAssignedJobID() != null){

                  jobsAssignedIdInThisRun.add(job);
                  log.info("HadoopJobId: "+job.getAssignedJobID());
                  if(jobTrackerLoc != null){
                    log.info("More information at: http://"+ jobTrackerLoc+
                        "/jobdetails.jsp?jobid="+job.getAssignedJobID());
                 
                 
                  ScriptState.get().emitJobStartedNotification(
                                job.getAssignedJobID().toString());                       
                }
                else{
                  // This job is not assigned an id yet.
                }
              }
              jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);

              double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
              notifyProgress(prog, lastProg);
              lastProg = prog;
            }
           
            //check for the jobControlException first
            //if the job controller fails before launching the jobs then there are
            //no jobs to check for failure
            if (jobControlException != null) {
                if (jobControlException instanceof PigException) {
                    if (jobControlExceptionStackTrace != null) {
                        LogUtils.writeLog("Error message from job controller",
                                jobControlExceptionStackTrace, pc
                                        .getProperties().getProperty(
                                                "pig.logfile"), log);
                    }
                    throw jobControlException;
                } else {
                    int errCode = 2117;
                    String msg = "Unexpected error when launching map reduce job.";
                    throw new ExecException(msg, errCode, PigException.BUG,
                            jobControlException);
                }
            }
           
            if (!jc.getFailedJobs().isEmpty() ) {
                if (stop_on_failure){
                    int errCode = 6017;
                    StringBuilder msg = new StringBuilder();
                   
                    for (int i=0; i<jc.getFailedJobs().size(); i++) {
                        Job j = jc.getFailedJobs().get(i);
                        msg.append(getFirstLineFromMessage(j.getMessage()));
                        if (i!=jc.getFailedJobs().size()-1) {
                            msg.append("\n");
                        }
                    }
                   
                    throw new ExecException(msg.toString(), errCode,
                            PigException.REMOTE_ENVIRONMENT);
                }
               
                // If we only have one store and that job fail, then we sure
                // that the job completely fail, and we shall stop dependent jobs
                for (Job job : jc.getFailedJobs()) {
                    completeFailedJobsInThisRun.add(job);
                    log.info("job " + job.getAssignedJobID() + " has failed! Stop running all dependent jobs");
                }
                failedJobs.addAll(jc.getFailedJobs());
            }

            // update Pig stats' job DAG with job ids of just completed jobs
            PigStatsUtil.updateJobMroMap(jcc.getJobMroMap());
           
            int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);
           
            numMRJobsCompl += removedMROp;

            List<Job> jobs = jc.getSuccessfulJobs();
            jcc.moveResults(jobs);
            succJobs.addAll(jobs);
                       
            // collecting statistics
            PigStatsUtil.accumulateStats(jc);

            jc.stop();
        }

        ScriptState.get().emitProgressUpdatedNotification(100);
       
        log.info( "100% complete");
View Full Code Here

        if (plan.size() == 0) {
            return null;
        }

        JobControl jobCtrl = new JobControl(grpName);

        try {
            List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
            roots.addAll(plan.getRoots());
            for (MapReduceOper mro: roots) {
                Job job = getJob(mro, conf, pigContext);
                jobMroMap.put(job, mro);
                jobCtrl.addJob(job);
            }
        } catch (JobCreationException jce) {
          throw jce;
        } catch(Exception e) {
            int errCode = 2017;
View Full Code Here

        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
       
        List<Job> failedJobs = new LinkedList<Job>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        JobControl jc;
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
       
        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();

        while((jc = jcc.compile(mrp, grpName)) != null) {
           
          // Initially, all jobs are in wait state.
            List<Job> jobsWithoutIds = jc.getWaitingJobs();
            log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
           
            String jobTrackerAdd;
            String port;
            String jobTrackerLoc;
            JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
            try {
                port = jobConf.get("mapred.job.tracker.http.address");
                jobTrackerAdd = jobConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);
                jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":")) + port.substring(port.indexOf(":"));
            }
            catch(Exception e){
                // Could not get the job tracker location, most probably we are running in local mode.
                // If it is the case, we don't print out job tracker location,
                // because it is meaningless for local mode.
              jobTrackerLoc = null;
                log.debug("Failed to get job tracker location.");
            }
           
            completeFailedJobsInThisRun.clear();
           
            Thread jcThread = new Thread(jc);
            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);
           
            //All the setup done, now lets launch the jobs.
            jcThread.start();
           
            // Now wait, till we are finished.
            while(!jc.allFinished()){

              try { Thread.sleep(sleepTime); }
              catch (InterruptedException e) {}
             
              List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();

              for(Job job : jobsWithoutIds){
                if (job.getAssignedJobID() != null){

                  jobsAssignedIdInThisRun.add(job);
                  log.info("HadoopJobId: "+job.getAssignedJobID());
                  if(jobTrackerLoc != null){
                    log.info("More information at: http://"+ jobTrackerLoc+
                        "/jobdetails.jsp?jobid="+job.getAssignedJobID());
                 
                }
                else{
                  // This job is not assigned an id yet.
                }
              }
              jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);

              double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
              if(prog>=(lastProg+0.01)){
                int perCom = (int)(prog * 100);
                if(perCom!=100)
                  log.info( perCom + "% complete");
              }
              lastProg = prog;
            }

            //check for the jobControlException first
            //if the job controller fails before launching the jobs then there are
            //no jobs to check for failure
            if(jobControlException != null) {
                if(jobControlException instanceof PigException) {
                        if(jobControlExceptionStackTrace != null) {
                            LogUtils.writeLog("Error message from job controller", jobControlExceptionStackTrace,
                                    pc.getProperties().getProperty("pig.logfile"),
                                    log);
                        }
                        throw jobControlException;
                } else {
                        int errCode = 2117;
                        String msg = "Unexpected error when launching map reduce job.";         
                        throw new ExecException(msg, errCode, PigException.BUG, jobControlException);
                }
            }

            if (!jc.getFailedJobs().isEmpty() )
            {
                if ("true".equalsIgnoreCase(
                  pc.getProperties().getProperty("stop.on.failure","false"))) {
                    int errCode = 6017;
                    StringBuilder msg = new StringBuilder();
                   
                    for (int i=0;i<jc.getFailedJobs().size();i++) {
                        Job j = jc.getFailedJobs().get(i);
                        msg.append(getFirstLineFromMessage(j.getMessage()));
                        if (i!=jc.getFailedJobs().size()-1)
                            msg.append("\n");
                    }
                   
                    throw new ExecException(msg.toString(),
                                            errCode, PigException.REMOTE_ENVIRONMENT);
                }
                // If we only have one store and that job fail, then we sure that the job completely fail, and we shall stop dependent jobs
                for (Job job : jc.getFailedJobs())
                {
                    List<POStore> sts = jcc.getStores(job);
                    if (sts.size()==1)
                        completeFailedJobsInThisRun.add(job);
                }
                failedJobs.addAll(jc.getFailedJobs());
            }
           
            int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);
           
            numMRJobsCompl += removedMROp;

            List<Job> jobs = jc.getSuccessfulJobs();
            jcc.moveResults(jobs);
            succJobs.addAll(jobs);
           
           
            stats.setJobClient(jobClient);
            stats.setJobControl(jc);
            stats.accumulateStats();
           
            jc.stop();
        }

        log.info( "100% complete");

        boolean failed = false;
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.jobcontrol.JobControl

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.