Examples of MRIntermediateDataVisitor

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MRIntermediateDataVisitor
An {@link MROpPlanVisitor} that gathers the paths for allintermediate data from a {@link MROperPlan}

Examples of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MRIntermediateDataVisitor

        // start collecting statistics
        PigStatsUtil.startCollection(pc, jobClient, jcc, mrp); 
        
        // Find all the intermediate data stores. The plan will be destroyed during compile/execution
        // so this needs to be done before.
        MRIntermediateDataVisitor intermediateVisitor = new MRIntermediateDataVisitor(mrp);
        intermediateVisitor.visit();
        
        List<Job> failedJobs = new LinkedList<Job>();
        List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
        long scriptSubmittedTimestamp = System.currentTimeMillis();
        
        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();
        
        boolean stop_on_failure = 
            pc.getProperties().getProperty("stop.on.failure", "false").equals("true");
        
        // jc is null only when mrp.size == 0
        while(mrp.size() != 0) {
            jc = jcc.compile(mrp, grpName);
            if(jc == null) {
                List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
                roots.addAll(mrp.getRoots());
                
                // run the native mapreduce roots first then run the rest of the roots
                for(MapReduceOper mro: roots) {
                    if(mro instanceof NativeMapReduceOper) {
                        NativeMapReduceOper natOp = (NativeMapReduceOper)mro;
                        try {
                            ScriptState.get().emitJobsSubmittedNotification(1);
                            natOp.runJob();
                            numMRJobsCompl++;
                        } catch (IOException e) {
                            
                            mrp.trimBelow(natOp);
                            failedNativeMR.add(natOp);
                            
                            String msg = "Error running native mapreduce" +
                            " operator job :" + natOp.getJobId() + e.getMessage();
                            
                            String stackTrace = getStackStraceStr(e);
                            LogUtils.writeLog(msg,
                                    stackTrace,
                                    pc.getProperties().getProperty("pig.logfile"),
                                    log
                            );     
                            log.info(msg);
                            
                            if (stop_on_failure) {
                                int errCode = 6017;
                               
                                throw new ExecException(msg, errCode,
                                        PigException.REMOTE_ENVIRONMENT);
                            }
                            
                        }
                        double prog = ((double)numMRJobsCompl)/totalMRJobs;
                        notifyProgress(prog, lastProg);
                        lastProg = prog;
                        mrp.remove(natOp);
                    }
                }
                continue;
            }
          // Initially, all jobs are in wait state.
            List<Job> jobsWithoutIds = jc.getWaitingJobs();
            log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
            //notify listeners about jobs submitted
            ScriptState.get().emitJobsSubmittedNotification(jobsWithoutIds.size());
            
            // update Pig stats' job DAG with just compiled jobs
            PigStatsUtil.updateJobMroMap(jcc.getJobMroMap());
            
            // determine job tracker url 
            String jobTrackerLoc;
            JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
            try {
                String port = jobConf.get("mapred.job.tracker.http.address");
                String jobTrackerAdd = jobConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);
                
                jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":")) 
                + port.substring(port.indexOf(":"));
            }
            catch(Exception e){
                // Could not get the job tracker location, most probably we are running in local mode.
                // If it is the case, we don't print out job tracker location,
                // because it is meaningless for local mode.
              jobTrackerLoc = null;
                log.debug("Failed to get job tracker location.");
            }
            
            completeFailedJobsInThisRun.clear();


            // Set the thread UDFContext so registered classes are available.
            final UDFContext udfContext = UDFContext.getUDFContext();
            Thread jcThread = new Thread(jc, "JobControl") {
                @Override
                public void run() {
                    UDFContext.setUdfContext(udfContext.clone()); //PIG-2576
                    super.run();
                }
            };


            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);
            
            jcThread.setContextClassLoader(PigContext.getClassLoader());


            // mark the times that the jobs were submitted so it's reflected in job history props
            for (Job job : jc.getWaitingJobs()) {
                JobConf jobConfCopy = job.getJobConf();
                jobConfCopy.set("pig.script.submitted.timestamp",
                        Long.toString(scriptSubmittedTimestamp));
                jobConfCopy.set("pig.job.submitted.timestamp",
                        Long.toString(System.currentTimeMillis()));
                job.setJobConf(jobConfCopy);
            }


            //All the setup done, now lets launch the jobs.
            jcThread.start();
            
            try {
                // a flag whether to warn failure during the loop below, so users can notice failure earlier.
                boolean warn_failure = true;
                
                // Now wait, till we are finished.
                while(!jc.allFinished()){
    
                  try { jcThread.join(sleepTime); }
                  catch (InterruptedException e) {}
    
                  List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();
    
                  for(Job job : jobsWithoutIds){
                    if (job.getAssignedJobID() != null){
    
                      jobsAssignedIdInThisRun.add(job);
                      log.info("HadoopJobId: "+job.getAssignedJobID());
                      
                            // display the aliases being processed
                            MapReduceOper mro = jcc.getJobMroMap().get(job);
                            if (mro != null) {
                                String alias = ScriptState.get().getAlias(mro);
                                log.info("Processing aliases " + alias);
                                String aliasLocation = ScriptState.get().getAliasLocation(mro);
                                log.info("detailed locations: " + aliasLocation);
                            }
    
                            
                      if(jobTrackerLoc != null){
                        log.info("More information at: http://"+ jobTrackerLoc+
                            "/jobdetails.jsp?jobid="+job.getAssignedJobID());
                      }  
    
                            // update statistics for this job so jobId is set
                            PigStatsUtil.addJobStats(job);
                      ScriptState.get().emitJobStartedNotification(
                                    job.getAssignedJobID().toString());                        
                    }
                    else{
                      // This job is not assigned an id yet.
                    }
                  }
                  jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);
    
                  double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
                  if (notifyProgress(prog, lastProg)) {
                        lastProg = prog;
                    }
    
                  // collect job stats by frequently polling of completed jobs (PIG-1829)
                  PigStatsUtil.accumulateStats(jc);
                  
                    // if stop_on_failure is enabled, we need to stop immediately when any job has failed
                    checkStopOnFailure(stop_on_failure);
                    // otherwise, we just display a warning message if there's any failure
                    if (warn_failure && !jc.getFailedJobs().isEmpty()) {
                        // we don't warn again for this group of jobs
                        warn_failure = false;
                        log.warn("Ooops! Some job has failed! Specify -stop_on_failure if you "
                                + "want Pig to stop immediately on failure.");
                    }
                }
                
                //check for the jobControlException first
                //if the job controller fails before launching the jobs then there are
                //no jobs to check for failure
                if (jobControlException != null) {
                    if (jobControlException instanceof PigException) {
                        if (jobControlExceptionStackTrace != null) {
                            LogUtils.writeLog("Error message from job controller",
                                    jobControlExceptionStackTrace, pc
                                            .getProperties().getProperty(
                                                    "pig.logfile"), log);
                        }
                        throw jobControlException;
                    } else {
                        int errCode = 2117;
                        String msg = "Unexpected error when launching map reduce job.";
                        throw new ExecException(msg, errCode, PigException.BUG,
                                jobControlException);
                    }
                }
                
                if (!jc.getFailedJobs().isEmpty() ) {
                    // stop if stop_on_failure is enabled
                    checkStopOnFailure(stop_on_failure);
                    
                    // If we only have one store and that job fail, then we sure 
                    // that the job completely fail, and we shall stop dependent jobs
                    for (Job job : jc.getFailedJobs()) {
                        completeFailedJobsInThisRun.add(job);
                        log.info("job " + job.getAssignedJobID() + " has failed! Stop running all dependent jobs"); 
                    }
                    failedJobs.addAll(jc.getFailedJobs());
                }
                
                int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);
                
                numMRJobsCompl += removedMROp;
    
                List<Job> jobs = jc.getSuccessfulJobs();
                jcc.moveResults(jobs);
                succJobs.addAll(jobs);
                            
                // collecting final statistics
                PigStatsUtil.accumulateStats(jc);


          }
          catch (Exception e) {
            throw e;
          }
          finally {
            jc.stop();
          }
        }


        ScriptState.get().emitProgressUpdatedNotification(100);
        
        log.info( "100% complete");
             
        boolean failed = false;
        
        if(failedNativeMR.size() > 0){
            failed = true;
        }


        // Clean up all the intermediate data
        for (String path : intermediateVisitor.getIntermediate()) {
            FileLocalizer.delete(path, pc);
        }


        // Look to see if any jobs failed.  If so, we need to report that.
        if (failedJobs != null && failedJobs.size() > 0) {

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MRIntermediateDataVisitor

        PigStats.start(pc.getExecutionEngine().instantiatePigStats());
        MRPigStatsUtil.startCollection(pc, statsJobClient, jcc, mrp);


        // Find all the intermediate data stores. The plan will be destroyed during compile/execution
        // so this needs to be done before.
        MRIntermediateDataVisitor intermediateVisitor = new MRIntermediateDataVisitor(mrp);
        intermediateVisitor.visit();


        List<Job> failedJobs = new LinkedList<Job>();
        List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
        long scriptSubmittedTimestamp = System.currentTimeMillis();


        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();


        boolean stop_on_failure =
            Boolean.valueOf(pc.getProperties().getProperty("stop.on.failure", "false"));


        // jc is null only when mrp.size == 0
        while(mrp.size() != 0) {
            jc = jcc.compile(mrp, grpName);
            if(jc == null) {
                List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
                roots.addAll(mrp.getRoots());


                // run the native mapreduce roots first then run the rest of the roots
                for(MapReduceOper mro: roots) {
                    if(mro instanceof NativeMapReduceOper) {
                        NativeMapReduceOper natOp = (NativeMapReduceOper)mro;
                        try {
                            MRScriptState.get().emitJobsSubmittedNotification(1);
                            natOp.runJob();
                            numMRJobsCompl++;
                        } catch (IOException e) {


                            mrp.trimBelow(natOp);
                            failedNativeMR.add(natOp);


                            String msg = "Error running native mapreduce" +
                                    " operator job :" + natOp.getJobId() + e.getMessage();


                            String stackTrace = Utils.getStackStraceStr(e);
                            LogUtils.writeLog(msg,
                                    stackTrace,
                                    pc.getProperties().getProperty("pig.logfile"),
                                    log
                                    );
                            log.info(msg);


                            if (stop_on_failure) {
                                int errCode = 6017;


                                throw new ExecException(msg, errCode,
                                        PigException.REMOTE_ENVIRONMENT);
                            }


                        }
                        double prog = ((double)numMRJobsCompl)/totalMRJobs;
                        notifyProgress(prog, lastProg);
                        lastProg = prog;
                        mrp.remove(natOp);
                    }
                }
                continue;
            }
            // Initially, all jobs are in wait state.
            List<Job> jobsWithoutIds = jc.getWaitingJobs();
            log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
            //notify listeners about jobs submitted
            MRScriptState.get().emitJobsSubmittedNotification(jobsWithoutIds.size());


            // update Pig stats' job DAG with just compiled jobs
            MRPigStatsUtil.updateJobMroMap(jcc.getJobMroMap());


            // determine job tracker url
            String jobTrackerLoc;
            JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
            try {
                String port = jobConf.get(MRConfiguration.JOB_TRACKER_HTTP_ADDRESS);
                String jobTrackerAdd = jobConf.get(MRConfiguration.JOB_TRACKER);


                jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":"))
                        + port.substring(port.indexOf(":"));
            }
            catch(Exception e){
                // Could not get the job tracker location, most probably we are running in local mode.
                // If it is the case, we don't print out job tracker location,
                // because it is meaningless for local mode.
                jobTrackerLoc = null;
                log.debug("Failed to get job tracker location.");
            }


            completeFailedJobsInThisRun.clear();


            // Set the thread UDFContext so registered classes are available.
            final UDFContext udfContext = UDFContext.getUDFContext();
            Thread jcThread = new Thread(jc, "JobControl") {
                @Override
                public void run() {
                    UDFContext.setUdfContext(udfContext.clone()); //PIG-2576
                    super.run();
                }
            };


            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);


            jcThread.setContextClassLoader(PigContext.getClassLoader());


            // mark the times that the jobs were submitted so it's reflected in job history props
            for (Job job : jc.getWaitingJobs()) {
                JobConf jobConfCopy = job.getJobConf();
                jobConfCopy.set("pig.script.submitted.timestamp",
                        Long.toString(scriptSubmittedTimestamp));
                jobConfCopy.set("pig.job.submitted.timestamp",
                        Long.toString(System.currentTimeMillis()));
                job.setJobConf(jobConfCopy);
            }


            //All the setup done, now lets launch the jobs.
            jcThread.start();


            try {
                // a flag whether to warn failure during the loop below, so users can notice failure earlier.
                boolean warn_failure = true;


                // Now wait, till we are finished.
                while(!jc.allFinished()){


                    try { jcThread.join(sleepTime); }
                    catch (InterruptedException e) {}


                    List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();


                    for(Job job : jobsWithoutIds){
                        if (job.getAssignedJobID() != null){


                            jobsAssignedIdInThisRun.add(job);
                            log.info("HadoopJobId: "+job.getAssignedJobID());


                            // display the aliases being processed
                            MapReduceOper mro = jcc.getJobMroMap().get(job);
                            if (mro != null) {
                                String alias = MRScriptState.get().getAlias(mro);
                                log.info("Processing aliases " + alias);
                                String aliasLocation = MRScriptState.get().getAliasLocation(mro);
                                log.info("detailed locations: " + aliasLocation);
                            }


                            if (!HadoopShims.isHadoopYARN() && jobTrackerLoc != null) {
                                log.info("More information at: http://" + jobTrackerLoc
                                        + "/jobdetails.jsp?jobid=" + job.getAssignedJobID());
                            }


                            // update statistics for this job so jobId is set
                            MRPigStatsUtil.addJobStats(job);
                            MRScriptState.get().emitJobStartedNotification(
                                    job.getAssignedJobID().toString());
                        }
                        else{
                            // This job is not assigned an id yet.
                        }
                    }
                    jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);


                    double prog = (numMRJobsCompl+calculateProgress(jc))/totalMRJobs;
                    if (notifyProgress(prog, lastProg)) {
                        List<Job> runnJobs = jc.getRunningJobs();
                        if (runnJobs != null) {
                            StringBuilder msg = new StringBuilder();
                            for (Object object : runnJobs) {
                                Job j = (Job) object;
                                if (j != null) {
                                    msg.append(j.getAssignedJobID()).append(",");
                                }
                            }
                            if (msg.length() > 0) {
                                msg.setCharAt(msg.length() - 1, ']');
                                log.info("Running jobs are [" + msg);
                            }
                        }
                        lastProg = prog;
                    }


                    // collect job stats by frequently polling of completed jobs (PIG-1829)
                    MRPigStatsUtil.accumulateStats(jc);


                    // if stop_on_failure is enabled, we need to stop immediately when any job has failed
                    checkStopOnFailure(stop_on_failure);
                    // otherwise, we just display a warning message if there's any failure
                    if (warn_failure && !jc.getFailedJobs().isEmpty()) {
                        // we don't warn again for this group of jobs
                        warn_failure = false;
                        log.warn("Ooops! Some job has failed! Specify -stop_on_failure if you "
                                + "want Pig to stop immediately on failure.");
                    }
                }


                //check for the jobControlException first
                //if the job controller fails before launching the jobs then there are
                //no jobs to check for failure
                if (jobControlException != null) {
                    if (jobControlException instanceof PigException) {
                        if (jobControlExceptionStackTrace != null) {
                            LogUtils.writeLog("Error message from job controller",
                                    jobControlExceptionStackTrace, pc
                                    .getProperties().getProperty(
                                            "pig.logfile"), log);
                        }
                        throw jobControlException;
                    } else {
                        int errCode = 2117;
                        String msg = "Unexpected error when launching map reduce job.";
                        throw new ExecException(msg, errCode, PigException.BUG,
                                jobControlException);
                    }
                }


                if (!jc.getFailedJobs().isEmpty() ) {
                    // stop if stop_on_failure is enabled
                    checkStopOnFailure(stop_on_failure);


                    // If we only have one store and that job fail, then we sure
                    // that the job completely fail, and we shall stop dependent jobs
                    for (Job job : jc.getFailedJobs()) {
                        completeFailedJobsInThisRun.add(job);
                        log.info("job " + job.getAssignedJobID() + " has failed! Stop running all dependent jobs");
                    }
                    failedJobs.addAll(jc.getFailedJobs());
                }


                int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);


                numMRJobsCompl += removedMROp;


                List<Job> jobs = jc.getSuccessfulJobs();
                jcc.moveResults(jobs);
                succJobs.addAll(jobs);


                // collecting final statistics
                MRPigStatsUtil.accumulateStats(jc);


            }
            catch (Exception e) {
                throw e;
            }
            finally {
                jc.stop();
            }
        }


        MRScriptState.get().emitProgressUpdatedNotification(100);


        log.info( "100% complete");


        boolean failed = false;


        if(failedNativeMR.size() > 0){
            failed = true;
        }


        if (Boolean.valueOf(pc.getProperties().getProperty(PigConfiguration.PIG_DELETE_TEMP_FILE, "true"))) {
            // Clean up all the intermediate data
            for (String path : intermediateVisitor.getIntermediate()) {
                // Skip non-file system paths such as hbase, see PIG-3617
                if (HadoopShims.hasFileSystemImpl(new Path(path), conf)) {
                    FileLocalizer.delete(path, pc);
                }
            }

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MRIntermediateDataVisitor

        PigStats.start(pc.getExecutionEngine().instantiatePigStats());
        MRPigStatsUtil.startCollection(pc, statsJobClient, jcc, mrp);


        // Find all the intermediate data stores. The plan will be destroyed during compile/execution
        // so this needs to be done before.
        MRIntermediateDataVisitor intermediateVisitor = new MRIntermediateDataVisitor(mrp);
        intermediateVisitor.visit();


        List<Job> failedJobs = new LinkedList<Job>();
        List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
        long scriptSubmittedTimestamp = System.currentTimeMillis();


        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();


        boolean stop_on_failure =
                pc.getProperties().getProperty("stop.on.failure", "false").equals("true");


        // jc is null only when mrp.size == 0
        while(mrp.size() != 0) {
            jc = jcc.compile(mrp, grpName);
            if(jc == null) {
                List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
                roots.addAll(mrp.getRoots());


                // run the native mapreduce roots first then run the rest of the roots
                for(MapReduceOper mro: roots) {
                    if(mro instanceof NativeMapReduceOper) {
                        NativeMapReduceOper natOp = (NativeMapReduceOper)mro;
                        try {
                            MRScriptState.get().emitJobsSubmittedNotification(1);
                            natOp.runJob();
                            numMRJobsCompl++;
                        } catch (IOException e) {


                            mrp.trimBelow(natOp);
                            failedNativeMR.add(natOp);


                            String msg = "Error running native mapreduce" +
                                    " operator job :" + natOp.getJobId() + e.getMessage();


                            String stackTrace = Utils.getStackStraceStr(e);
                            LogUtils.writeLog(msg,
                                    stackTrace,
                                    pc.getProperties().getProperty("pig.logfile"),
                                    log
                                    );
                            log.info(msg);


                            if (stop_on_failure) {
                                int errCode = 6017;


                                throw new ExecException(msg, errCode,
                                        PigException.REMOTE_ENVIRONMENT);
                            }


                        }
                        double prog = ((double)numMRJobsCompl)/totalMRJobs;
                        notifyProgress(prog, lastProg);
                        lastProg = prog;
                        mrp.remove(natOp);
                    }
                }
                continue;
            }
            // Initially, all jobs are in wait state.
            List<Job> jobsWithoutIds = jc.getWaitingJobs();
            log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
            //notify listeners about jobs submitted
            MRScriptState.get().emitJobsSubmittedNotification(jobsWithoutIds.size());


            // update Pig stats' job DAG with just compiled jobs
            MRPigStatsUtil.updateJobMroMap(jcc.getJobMroMap());


            // determine job tracker url
            String jobTrackerLoc;
            JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
            try {
                String port = jobConf.get("mapred.job.tracker.http.address");
                String jobTrackerAdd = jobConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);


                jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":"))
                        + port.substring(port.indexOf(":"));
            }
            catch(Exception e){
                // Could not get the job tracker location, most probably we are running in local mode.
                // If it is the case, we don't print out job tracker location,
                // because it is meaningless for local mode.
                jobTrackerLoc = null;
                log.debug("Failed to get job tracker location.");
            }


            completeFailedJobsInThisRun.clear();


            // Set the thread UDFContext so registered classes are available.
            final UDFContext udfContext = UDFContext.getUDFContext();
            Thread jcThread = new Thread(jc, "JobControl") {
                @Override
                public void run() {
                    UDFContext.setUdfContext(udfContext.clone()); //PIG-2576
                    super.run();
                }
            };


            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);


            jcThread.setContextClassLoader(PigContext.getClassLoader());


            // mark the times that the jobs were submitted so it's reflected in job history props
            for (Job job : jc.getWaitingJobs()) {
                JobConf jobConfCopy = job.getJobConf();
                jobConfCopy.set("pig.script.submitted.timestamp",
                        Long.toString(scriptSubmittedTimestamp));
                jobConfCopy.set("pig.job.submitted.timestamp",
                        Long.toString(System.currentTimeMillis()));
                job.setJobConf(jobConfCopy);
            }


            //All the setup done, now lets launch the jobs.
            jcThread.start();


            try {
                // a flag whether to warn failure during the loop below, so users can notice failure earlier.
                boolean warn_failure = true;


                // Now wait, till we are finished.
                while(!jc.allFinished()){


                    try { jcThread.join(sleepTime); }
                    catch (InterruptedException e) {}


                    List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();


                    for(Job job : jobsWithoutIds){
                        if (job.getAssignedJobID() != null){


                            jobsAssignedIdInThisRun.add(job);
                            log.info("HadoopJobId: "+job.getAssignedJobID());


                            // display the aliases being processed
                            MapReduceOper mro = jcc.getJobMroMap().get(job);
                            if (mro != null) {
                                String alias = MRScriptState.get().getAlias(mro);
                                log.info("Processing aliases " + alias);
                                String aliasLocation = MRScriptState.get().getAliasLocation(mro);
                                log.info("detailed locations: " + aliasLocation);
                            }




                            if(jobTrackerLoc != null){
                                log.info("More information at: http://"+ jobTrackerLoc+
                                        "/jobdetails.jsp?jobid="+job.getAssignedJobID());
                            }


                            // update statistics for this job so jobId is set
                            MRPigStatsUtil.addJobStats(job);
                            MRScriptState.get().emitJobStartedNotification(
                                    job.getAssignedJobID().toString());
                        }
                        else{
                            // This job is not assigned an id yet.
                        }
                    }
                    jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);


                    double prog = (numMRJobsCompl+calculateProgress(jc))/totalMRJobs;
                    if (notifyProgress(prog, lastProg)) {
                        List<Job> runnJobs = jc.getRunningJobs();
                        if (runnJobs != null) {
                            StringBuilder msg = new StringBuilder();
                            for (Object object : runnJobs) {
                                Job j = (Job) object;
                                if (j != null) {
                                    msg.append(j.getAssignedJobID()).append(",");
                                }
                            }
                            if (msg.length() > 0) {
                                msg.setCharAt(msg.length() - 1, ']');
                                log.info("Running jobs are [" + msg);
                            }
                        }
                        lastProg = prog;
                    }


                    // collect job stats by frequently polling of completed jobs (PIG-1829)
                    MRPigStatsUtil.accumulateStats(jc);


                    // if stop_on_failure is enabled, we need to stop immediately when any job has failed
                    checkStopOnFailure(stop_on_failure);
                    // otherwise, we just display a warning message if there's any failure
                    if (warn_failure && !jc.getFailedJobs().isEmpty()) {
                        // we don't warn again for this group of jobs
                        warn_failure = false;
                        log.warn("Ooops! Some job has failed! Specify -stop_on_failure if you "
                                + "want Pig to stop immediately on failure.");
                    }
                }


                //check for the jobControlException first
                //if the job controller fails before launching the jobs then there are
                //no jobs to check for failure
                if (jobControlException != null) {
                    if (jobControlException instanceof PigException) {
                        if (jobControlExceptionStackTrace != null) {
                            LogUtils.writeLog("Error message from job controller",
                                    jobControlExceptionStackTrace, pc
                                    .getProperties().getProperty(
                                            "pig.logfile"), log);
                        }
                        throw jobControlException;
                    } else {
                        int errCode = 2117;
                        String msg = "Unexpected error when launching map reduce job.";
                        throw new ExecException(msg, errCode, PigException.BUG,
                                jobControlException);
                    }
                }


                if (!jc.getFailedJobs().isEmpty() ) {
                    // stop if stop_on_failure is enabled
                    checkStopOnFailure(stop_on_failure);


                    // If we only have one store and that job fail, then we sure
                    // that the job completely fail, and we shall stop dependent jobs
                    for (Job job : jc.getFailedJobs()) {
                        completeFailedJobsInThisRun.add(job);
                        log.info("job " + job.getAssignedJobID() + " has failed! Stop running all dependent jobs");
                    }
                    failedJobs.addAll(jc.getFailedJobs());
                }


                int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);


                numMRJobsCompl += removedMROp;


                List<Job> jobs = jc.getSuccessfulJobs();
                jcc.moveResults(jobs);
                succJobs.addAll(jobs);


                // collecting final statistics
                MRPigStatsUtil.accumulateStats(jc);


            }
            catch (Exception e) {
                throw e;
            }
            finally {
                jc.stop();
            }
        }


        MRScriptState.get().emitProgressUpdatedNotification(100);


        log.info( "100% complete");


        boolean failed = false;


        if(failedNativeMR.size() > 0){
            failed = true;
        }


        if (!"false".equalsIgnoreCase(pc.getProperties().getProperty(PigConfiguration.PIG_DELETE_TEMP_FILE))) {
            // Clean up all the intermediate data
            for (String path : intermediateVisitor.getIntermediate()) {
                // Skip non-file system paths such as hbase, see PIG-3617
                if (HadoopShims.hasFileSystemImpl(new Path(path), conf)) {
                    FileLocalizer.delete(path, pc);
                }
            }

View Full Code Here

Examples of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MRIntermediateDataVisitor

        // start collecting statistics
        PigStatsUtil.startCollection(pc, jobClient, jcc, mrp);


        // Find all the intermediate data stores. The plan will be destroyed during compile/execution
        // so this needs to be done before.
        MRIntermediateDataVisitor intermediateVisitor = new MRIntermediateDataVisitor(mrp);
        intermediateVisitor.visit();


        List<Job> failedJobs = new LinkedList<Job>();
        List<NativeMapReduceOper> failedNativeMR = new LinkedList<NativeMapReduceOper>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
        long scriptSubmittedTimestamp = System.currentTimeMillis();


        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();


        boolean stop_on_failure =
            pc.getProperties().getProperty("stop.on.failure", "false").equals("true");


        // jc is null only when mrp.size == 0
        while(mrp.size() != 0) {
            jc = jcc.compile(mrp, grpName);
            if(jc == null) {
                List<MapReduceOper> roots = new LinkedList<MapReduceOper>();
                roots.addAll(mrp.getRoots());


                // run the native mapreduce roots first then run the rest of the roots
                for(MapReduceOper mro: roots) {
                    if(mro instanceof NativeMapReduceOper) {
                        NativeMapReduceOper natOp = (NativeMapReduceOper)mro;
                        try {
                            ScriptState.get().emitJobsSubmittedNotification(1);
                            natOp.runJob();
                            numMRJobsCompl++;
                        } catch (IOException e) {


                            mrp.trimBelow(natOp);
                            failedNativeMR.add(natOp);


                            String msg = "Error running native mapreduce" +
                            " operator job :" + natOp.getJobId() + e.getMessage();


                            String stackTrace = getStackStraceStr(e);
                            LogUtils.writeLog(msg,
                                    stackTrace,
                                    pc.getProperties().getProperty("pig.logfile"),
                                    log
                            );
                            log.info(msg);


                            if (stop_on_failure) {
                                int errCode = 6017;


                                throw new ExecException(msg, errCode,
                                        PigException.REMOTE_ENVIRONMENT);
                            }


                        }
                        double prog = ((double)numMRJobsCompl)/totalMRJobs;
                        notifyProgress(prog, lastProg);
                        lastProg = prog;
                        mrp.remove(natOp);
                    }
                }
                continue;
            }
          // Initially, all jobs are in wait state.
            List<Job> jobsWithoutIds = jc.getWaitingJobs();
            log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
            //notify listeners about jobs submitted
            ScriptState.get().emitJobsSubmittedNotification(jobsWithoutIds.size());


            // update Pig stats' job DAG with just compiled jobs
            PigStatsUtil.updateJobMroMap(jcc.getJobMroMap());


            // determine job tracker url
            String jobTrackerLoc;
            JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
            try {
                String port = jobConf.get("mapred.job.tracker.http.address");
                String jobTrackerAdd = jobConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);


                jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":"))
                + port.substring(port.indexOf(":"));
            }
            catch(Exception e){
                // Could not get the job tracker location, most probably we are running in local mode.
                // If it is the case, we don't print out job tracker location,
                // because it is meaningless for local mode.
              jobTrackerLoc = null;
                log.debug("Failed to get job tracker location.");
            }


            completeFailedJobsInThisRun.clear();


            // Set the thread UDFContext so registered classes are available.
            final UDFContext udfContext = UDFContext.getUDFContext();
            Thread jcThread = new Thread(jc, "JobControl") {
                @Override
                public void run() {
                    UDFContext.setUdfContext(udfContext.clone()); //PIG-2576
                    super.run();
                }
            };


            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);


            jcThread.setContextClassLoader(PigContext.getClassLoader());


            // mark the times that the jobs were submitted so it's reflected in job history props
            for (Job job : jc.getWaitingJobs()) {
                JobConf jobConfCopy = job.getJobConf();
                jobConfCopy.set("pig.script.submitted.timestamp",
                        Long.toString(scriptSubmittedTimestamp));
                jobConfCopy.set("pig.job.submitted.timestamp",
                        Long.toString(System.currentTimeMillis()));
                job.setJobConf(jobConfCopy);
            }


            //All the setup done, now lets launch the jobs.
            jcThread.start();


            try {
                // a flag whether to warn failure during the loop below, so users can notice failure earlier.
                boolean warn_failure = true;


                // Now wait, till we are finished.
                while(!jc.allFinished()){


                  try { jcThread.join(sleepTime); }
                  catch (InterruptedException e) {}


                  List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();


                  for(Job job : jobsWithoutIds){
                    if (job.getAssignedJobID() != null){


                      jobsAssignedIdInThisRun.add(job);
                      log.info("HadoopJobId: "+job.getAssignedJobID());
                      
                            // display the aliases being processed
                            MapReduceOper mro = jcc.getJobMroMap().get(job);
                            if (mro != null) {
                                String alias = ScriptState.get().getAlias(mro);
                                log.info("Processing aliases " + alias);
                                String aliasLocation = ScriptState.get().getAliasLocation(mro);
                                log.info("detailed locations: " + aliasLocation);
                            }




                      if(jobTrackerLoc != null){
                        log.info("More information at: http://"+ jobTrackerLoc+
                            "/jobdetails.jsp?jobid="+job.getAssignedJobID());
                      }


                            // update statistics for this job so jobId is set
                            PigStatsUtil.addJobStats(job);
                      ScriptState.get().emitJobStartedNotification(
                                    job.getAssignedJobID().toString());
                    }
                    else{
                      // This job is not assigned an id yet.
                    }
                  }
                  jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);


                  double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
                  if (notifyProgress(prog, lastProg)) {
                        lastProg = prog;
                    }


                  // collect job stats by frequently polling of completed jobs (PIG-1829)
                  PigStatsUtil.accumulateStats(jc);
                  
                    // if stop_on_failure is enabled, we need to stop immediately when any job has failed
                    checkStopOnFailure(stop_on_failure);
                    // otherwise, we just display a warning message if there's any failure
                    if (warn_failure && !jc.getFailedJobs().isEmpty()) {
                        // we don't warn again for this group of jobs
                        warn_failure = false;
                        log.warn("Ooops! Some job has failed! Specify -stop_on_failure if you "
                                + "want Pig to stop immediately on failure.");
                    }
                }


                //check for the jobControlException first
                //if the job controller fails before launching the jobs then there are
                //no jobs to check for failure
                if (jobControlException != null) {
                    if (jobControlException instanceof PigException) {
                        if (jobControlExceptionStackTrace != null) {
                            LogUtils.writeLog("Error message from job controller",
                                    jobControlExceptionStackTrace, pc
                                            .getProperties().getProperty(
                                                    "pig.logfile"), log);
                        }
                        throw jobControlException;
                    } else {
                        int errCode = 2117;
                        String msg = "Unexpected error when launching map reduce job.";
                        throw new ExecException(msg, errCode, PigException.BUG,
                                jobControlException);
                    }
                }


                if (!jc.getFailedJobs().isEmpty() ) {
                    // stop if stop_on_failure is enabled
                    checkStopOnFailure(stop_on_failure);


                    // If we only have one store and that job fail, then we sure
                    // that the job completely fail, and we shall stop dependent jobs
                    for (Job job : jc.getFailedJobs()) {
                        completeFailedJobsInThisRun.add(job);
                        log.info("job " + job.getAssignedJobID() + " has failed! Stop running all dependent jobs");
                    }
                    failedJobs.addAll(jc.getFailedJobs());
                }


                int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);


                numMRJobsCompl += removedMROp;


                List<Job> jobs = jc.getSuccessfulJobs();
                jcc.moveResults(jobs);
                succJobs.addAll(jobs);


                // collecting final statistics
                PigStatsUtil.accumulateStats(jc);


          }
          catch (Exception e) {
            throw e;
          }
          finally {
            jc.stop();
          }
        }


        ScriptState.get().emitProgressUpdatedNotification(100);


        log.info( "100% complete");


        boolean failed = false;


        if(failedNativeMR.size() > 0){
            failed = true;
        }


        // Clean up all the intermediate data
        for (String path : intermediateVisitor.getIntermediate()) {
            FileLocalizer.delete(path, pc);
        }


        // Look to see if any jobs failed.  If so, we need to report that.
        if (failedJobs != null && failedJobs.size() > 0) {

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.