Source Code of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$JobControlThreadExceptionHandler

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer;


import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.List;
import java.util.LinkedList;
import java.util.Map;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.hadoop.mapred.jobcontrol.JobControl;
import org.apache.pig.PigException;
import org.apache.pig.PigWarning;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.executionengine.ExecutionEngine;
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
import org.apache.pig.impl.PigContext;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler.LastInputStreamingOptimizer;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.EndOfAllInputSetter;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MRPrinter;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.DotMRPrinter;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.POPackageAnnotator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POJoinPackage;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
import org.apache.pig.impl.plan.CompilationMessageCollector;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.plan.CompilationMessageCollector.MessageType;
import org.apache.pig.impl.util.ConfigurationValidator;
import org.apache.pig.impl.util.LogUtils;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.tools.pigstats.PigStats;


/**
 * Main class that launches pig for Map Reduce
 *
 */
public class MapReduceLauncher extends Launcher{
    private static final Log log = LogFactory.getLog(MapReduceLauncher.class);
 
    //used to track the exception thrown by the job control which is run in a separate thread
    private Exception jobControlException = null;
    private String jobControlExceptionStackTrace = null;
    private boolean aggregateWarning = false;
    private Map<FileSpec, Exception> failureMap;


    /**
     * Get the exception that caused a failure on the backend for a
     * store location (if any).
     */
    public Exception getError(FileSpec spec) {
        return failureMap.get(spec);
    }


    @Override
    public void reset() {
        failureMap = new HashMap<FileSpec, Exception>();
        super.reset();
    }


    @Override
    public PigStats launchPig(PhysicalPlan php,
                              String grpName,
                              PigContext pc) throws PlanException,
                                                    VisitorException,
                                                    IOException,
                                                    ExecException,
                                                    JobCreationException,
                                                    Exception {
        long sleepTime = 500;
        aggregateWarning = "true".equalsIgnoreCase(pc.getProperties().getProperty("aggregate.warning"));
        MROperPlan mrp = compile(php, pc);
        PigStats stats = new PigStats();
        stats.setMROperatorPlan(mrp);
        stats.setExecType(pc.getExecType());
        stats.setPhysicalPlan(php);
        
        ExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobClient jobClient = ((HExecutionEngine)exe).getJobClient();


        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        
        List<Job> failedJobs = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        JobControl jc;
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        int numMRJobsCurrent = 0;
        double lastProg = -1;
        
        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();


        while((jc = jcc.compile(mrp, grpName)) != null) {
            numMRJobsCurrent = jc.getWaitingJobs().size();


            Thread jcThread = new Thread(jc);
            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);
            jcThread.start();


            while(!jc.allFinished()){
                try {
                    Thread.sleep(sleepTime);
                } catch (InterruptedException e) {}
                double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
                if(prog>=(lastProg+0.01)){
                    int perCom = (int)(prog * 100);
                    if(perCom!=100)
                        log.info( perCom + "% complete");
                }
                lastProg = prog;
            }


            //check for the jobControlException first
            //if the job controller fails before launching the jobs then there are
            //no jobs to check for failure
            if(jobControlException != null) {
          if(jobControlException instanceof PigException) {
                  if(jobControlExceptionStackTrace != null) {
                      LogUtils.writeLog("Error message from job controller", jobControlExceptionStackTrace, 
                              pc.getProperties().getProperty("pig.logfile"), 
                                log);
                  }
                    throw jobControlException;
          } else {
                    int errCode = 2117;
                    String msg = "Unexpected error when launching map reduce job.";          
                    throw new ExecException(msg, errCode, PigException.BUG, jobControlException);
          }
            }


            numMRJobsCompl += numMRJobsCurrent;
            failedJobs.addAll(jc.getFailedJobs());


            if (!failedJobs.isEmpty() 
                && "true".equalsIgnoreCase(
                  pc.getProperties().getProperty("stop.on.failure","false"))) {
                int errCode = 6017;
                StringBuilder msg = new StringBuilder("Execution failed, while processing ");
                
                for (Job j: failedJobs) {
                    List<POStore> sts = jcc.getStores(j);
                    for (POStore st: sts) {
                        msg.append(st.getSFile().getFileName());
                        msg.append(", ");
                    }
                }
                
                throw new ExecException(msg.substring(0,msg.length()-2), 
                                        errCode, PigException.REMOTE_ENVIRONMENT);
            }


            List<Job> jobs = jc.getSuccessfulJobs();
            jcc.moveResults(jobs);
            succJobs.addAll(jobs);
            
            
            stats.setJobClient(jobClient);
            stats.setJobControl(jc);
            stats.accumulateStats();
            
            jc.stop(); 
        }


        log.info( "100% complete");


        boolean failed = false;
        int finalStores = 0;
        // Look to see if any jobs failed.  If so, we need to report that.
        if (failedJobs != null && failedJobs.size() > 0) {
            log.error(failedJobs.size()+" map reduce job(s) failed!");
            Exception backendException = null;


            for (Job fj : failedJobs) {
                
                try {
                    getStats(fj, jobClient, true, pc);
                } catch (Exception e) {
                    backendException = e;
                }


                List<POStore> sts = jcc.getStores(fj);
                for (POStore st: sts) {
                    if (!st.isTmpStore()) {
                        failedStores.add(st.getSFile());
                        failureMap.put(st.getSFile(), backendException);
                        finalStores++;
                    }


                    FileLocalizer.registerDeleteOnFail(st.getSFile().getFileName(), pc);
                    log.error("Failed to produce result in: \""+st.getSFile().getFileName()+"\"");
                }
            }
            failed = true;
        }


        Map<Enum, Long> warningAggMap = new HashMap<Enum, Long>();
                
        if(succJobs!=null) {
            for(Job job : succJobs){
                List<POStore> sts = jcc.getStores(job);
                for (POStore st: sts) {
                    if (!st.isTmpStore()) {
                        succeededStores.add(st.getSFile());
                        finalStores++;
                    }
                    log.info("Successfully stored result in: \""+st.getSFile().getFileName()+"\"");
                }
                getStats(job,jobClient, false, pc);
                if(aggregateWarning) {
                    computeWarningAggregate(job, jobClient, warningAggMap);
                }
            }
        }
        
        if(aggregateWarning) {
            CompilationMessageCollector.logAggregate(warningAggMap, MessageType.Warning, log) ;
        }
        
        // Report records and bytes written.  Only do this in the single store case.  Multi-store
        // scripts mess up the stats reporting from hadoop.
        List<String> rji = stats.getRootJobIDs();
        if (rji != null && rji.size() == 1 && finalStores == 1) {
            if(stats.getRecordsWritten()==-1) {
                log.info("Records written : Unable to determine number of records written");
            } else {
                log.info("Records written : " + stats.getRecordsWritten());
            }
            if(stats.getBytesWritten()==-1) {
                log.info("Bytes written : Unable to determine number of bytes written");
            } else {
                log.info("Bytes written : " + stats.getBytesWritten());
            }
        }


        if (!failed) {
            log.info("Success!");
        } else {
            if (succJobs != null && succJobs.size() > 0) {
                log.info("Some jobs have failed!");
            } else {
                log.info("Failed!");
            }
        }
        jcc.reset();


        return stats;
    }


    @Override
    public void explain(
            PhysicalPlan php,
            PigContext pc,
            PrintStream ps,
            String format,
            boolean verbose) throws PlanException, VisitorException,
                                   IOException {
        log.trace("Entering MapReduceLauncher.explain");
        MROperPlan mrp = compile(php, pc);


        if (format.equals("text")) {
            MRPrinter printer = new MRPrinter(ps, mrp);
            printer.setVerbose(verbose);
            printer.visit();
        } else {
            ps.println("#--------------------------------------------------");
            ps.println("# Map Reduce Plan                                  ");
            ps.println("#--------------------------------------------------");
            
            DotMRPrinter printer =new DotMRPrinter(mrp, ps);
            printer.setVerbose(verbose);
            printer.dump();
            ps.println("");
        }
    }


    private MROperPlan compile(
            PhysicalPlan php,
            PigContext pc) throws PlanException, IOException, VisitorException {
        MRCompiler comp = new MRCompiler(php, pc);
        comp.randomizeFileLocalizer();
        comp.compile();
        MROperPlan plan = comp.getMRPlan();
        
        //display the warning message(s) from the MRCompiler
        comp.getMessageCollector().logMessages(MessageType.Warning, aggregateWarning, log);
        
        String lastInputChunkSize = 
            pc.getProperties().getProperty(
                    "last.input.chunksize", POJoinPackage.DEFAULT_CHUNK_SIZE);
        String prop = System.getProperty("pig.exec.nocombiner");
        if (!("true".equals(prop)))  {
            CombinerOptimizer co = new CombinerOptimizer(plan, lastInputChunkSize);
            co.visit();
            //display the warning message(s) from the CombinerOptimizer
            co.getMessageCollector().logMessages(MessageType.Warning, aggregateWarning, log);
        }
        
        // Optimize the jobs that have a load/store only first MR job followed
        // by a sample job.
        SampleOptimizer so = new SampleOptimizer(plan);
        so.visit();            
        
        // optimize key - value handling in package
        POPackageAnnotator pkgAnnotator = new POPackageAnnotator(plan);
        pkgAnnotator.visit();
        
        // optimize joins
        LastInputStreamingOptimizer liso = 
            new MRCompiler.LastInputStreamingOptimizer(plan, lastInputChunkSize);
        liso.visit();
        
        // figure out the type of the key for the map plan
        // this is needed when the key is null to create
        // an appropriate NullableXXXWritable object
        KeyTypeDiscoveryVisitor kdv = new KeyTypeDiscoveryVisitor(plan);
        kdv.visit();


        // removes the filter(constant(true)) operators introduced by
        // splits.
        NoopFilterRemover fRem = new NoopFilterRemover(plan);
        fRem.visit();
        
        // reduces the number of MROpers in the MR plan generated 
        // by multi-query (multi-store) script.
        MultiQueryOptimizer mqOptimizer = new MultiQueryOptimizer(plan);
        mqOptimizer.visit();


        // removes unnecessary stores (as can happen with splits in
        // some cases.). This has to run after the MultiQuery and
        // NoopFilterRemover.
        NoopStoreRemover sRem = new NoopStoreRemover(plan);
        sRem.visit();
      
        // check whether stream operator is present
        // after MultiQueryOptimizer because it can shift streams from
        // map to reduce, etc.
        EndOfAllInputSetter checker = new EndOfAllInputSetter(plan);
        checker.visit();
        
        return plan;
    }
    
    /**
     * An exception handler class to handle exceptions thrown by the job controller thread
     * Its a local class. This is the only mechanism to catch unhandled thread exceptions
     * Unhandled exceptions in threads are handled by the VM if the handler is not registered
     * explicitly or if the default handler is null
     */
    class JobControlThreadExceptionHandler implements Thread.UncaughtExceptionHandler {
      
      public void uncaughtException(Thread thread, Throwable throwable) {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        PrintStream ps = new PrintStream(baos);
        throwable.printStackTrace(ps);
        jobControlExceptionStackTrace = baos.toString();        
        try {  
          jobControlException = getExceptionFromString(jobControlExceptionStackTrace);
        } catch (Exception e) {
          String errMsg = "Could not resolve error that occured when launching map reduce job: "
                        + getFirstLineFromMessage(jobControlExceptionStackTrace);
          jobControlException = new RuntimeException(errMsg);
        }
      }
    }
    
    void computeWarningAggregate(Job job, JobClient jobClient, Map<Enum, Long> aggMap) {
      JobID mapRedJobID = job.getAssignedJobID();
      RunningJob runningJob = null;
      try {
        runningJob = jobClient.getJob(mapRedJobID);
        if(runningJob != null) {
            Counters counters = runningJob.getCounters();
                if (counters==null)
                {
                    long nullCounterCount = aggMap.get(PigWarning.NULL_COUNTER_COUNT)==null?0 : aggMap.get(PigWarning.NULL_COUNTER_COUNT);
                    nullCounterCount++;
                    aggMap.put(PigWarning.NULL_COUNTER_COUNT, nullCounterCount);
                }
                for (Enum e : PigWarning.values()) {
                    if (e != PigWarning.NULL_COUNTER_COUNT) {
                        Long currentCount = aggMap.get(e);
                        currentCount = (currentCount == null ? 0 : currentCount);
                        // This code checks if the counters is null, if it is,
                        // we need to report to the user that the number
                        // of warning aggregations may not be correct. In fact,
                        // Counters should not be null, it is
                        // a hadoop bug, once this bug is fixed in hadoop, the
                        // null handling code should never be hit.
                        // See Pig-943
                        if (counters != null)
                            currentCount += counters.getCounter(e);
                        aggMap.put(e, currentCount);
                    }
                }
        }
      } catch (IOException ioe) {
        String msg = "Unable to retrieve job to compute warning aggregation.";
        log.warn(msg);
      }      
    }


}
Source Code of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$JobControlThreadExceptionHandler

Related Classes of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher$JobControlThreadExceptionHandler