Examples of org.apache.pig.tools.pigstats.PigStats

org.apache.pig.tools.pigstats.PigStats
PigStats encapsulates the statistics collected from a running script. It includes status of the execution, the DAG of its MR jobs, as well as information about outputs and inputs of the script.

        // discover pig features used in this script
        ScriptState.get().setScriptFeatures(compiledLp);
        PhysicalPlan pp = compilePp(compiledLp);
        // execute using appropriate engine
        List<ExecJob> jobs = pigContext.getExecutionEngine().execute(pp, "job_pigexec_");
        PigStats stats = null;
        if (jobs.size() > 0) {
            stats = jobs.get(0).getStatistics();
        } else {
            stats = PigStatsUtil.getEmptyPigStats();
        }
        for (OutputStats output : stats.getOutputStats()) {
            if (!output.isSuccessful()) {
                POStore store = output.getPOStore();
                try {
                    store.getStoreFunc().cleanupOnFailure(store.getSFile().getFileName(),
                            new Job(output.getConf()));

View Full Code Here

            pigContext.getProperties().setProperty(PigContext.JOB_NAME, jobName);
            if (jobPriority != null) {
              pigContext.getProperties().setProperty(PigContext.JOB_PRIORITY, jobPriority);
            }
            
            PigStats stats = PigServer.this.execute(null);
            processedStores = storeOpTable.keySet().size();
            return stats;
        }

View Full Code Here

            String filePath = outputFile.getAbsolutePath();
            outputFile.delete();
            PigServer pig = new PigServer(ExecType.LOCAL);
            pig.registerQuery("A = load 'test/org/apache/pig/test/data/passwd';");
            ExecJob job = pig.store("A", filePath);
            PigStats stats = job.getStatistics();
            File dataFile = new File( outputFile.getAbsoluteFile() + File.separator + "part-00000" );
            assertEquals(dataFile.length(), stats.getBytesWritten());
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println( e.getMessage() );
            fail("IOException happened");
        } finally {

View Full Code Here

        w.println("store B into '" + OUTPUT_FILE + "';");
        w.close();
        
        try {
            String[] args = { "-x", "local", PIG_FILE };
            PigStats stats = PigRunner.run(args, null);
     
            assertTrue(!stats.isSuccessful());
 
            Properties props = stats.getPigProperties();
            String logfile = props.getProperty("pig.logfile");
            File f = new File(logfile);
            assertTrue(f.exists());            
        } finally {
            new File(PIG_FILE).delete();

View Full Code Here

                if (spec != null)
                    leafMap.put(spec.toString(), physOp);
            }
        }
        try {
            PigStats stats = launcher.launchPig(plan, jobName, pigContext);


            for (POStore store: launcher.getSucceededFiles()) {
                FileSpec spec = store.getSFile();
                String alias = leafMap.containsKey(spec.toString()) ? leafMap.get(spec.toString()).getAlias() : null;
                jobs.add(new HJob(ExecJob.JOB_STATUS.COMPLETED, pigContext, store, alias, stats));

View Full Code Here

                                                    JobCreationException,
                                                    Exception {
        long sleepTime = 500;
        aggregateWarning = "true".equalsIgnoreCase(pc.getProperties().getProperty("aggregate.warning"));
        MROperPlan mrp = compile(php, pc);
        PigStats stats = new PigStats();
        stats.setMROperatorPlan(mrp);
        stats.setExecType(pc.getExecType());
        stats.setPhysicalPlan(php);
        
        ExecutionEngine exe = pc.getExecutionEngine();
        ConfigurationValidator.validatePigProperties(exe.getConfiguration());
        Configuration conf = ConfigurationUtil.toConfiguration(exe.getConfiguration());
        JobClient jobClient = new JobClient(((HExecutionEngine)exe).getJobConf());


        JobControlCompiler jcc = new JobControlCompiler(pc, conf);
        
        List<Job> failedJobs = new LinkedList<Job>();
        List<Job> completeFailedJobsInThisRun = new LinkedList<Job>();
        List<Job> succJobs = new LinkedList<Job>();
        JobControl jc;
        int totalMRJobs = mrp.size();
        int numMRJobsCompl = 0;
        double lastProg = -1;
        
        //create the exception handler for the job control thread
        //and register the handler with the job control thread
        JobControlThreadExceptionHandler jctExceptionHandler = new JobControlThreadExceptionHandler();


        while((jc = jcc.compile(mrp, grpName)) != null) {
            
          // Initially, all jobs are in wait state.
            List<Job> jobsWithoutIds = jc.getWaitingJobs();
            log.info(jobsWithoutIds.size() +" map-reduce job(s) waiting for submission.");
            
            String jobTrackerAdd;
            String port;
            String jobTrackerLoc;
            JobConf jobConf = jobsWithoutIds.get(0).getJobConf();
            try {
                port = jobConf.get("mapred.job.tracker.http.address");
                jobTrackerAdd = jobConf.get(HExecutionEngine.JOB_TRACKER_LOCATION);
                jobTrackerLoc = jobTrackerAdd.substring(0,jobTrackerAdd.indexOf(":")) + port.substring(port.indexOf(":"));
            }
            catch(Exception e){
                // Could not get the job tracker location, most probably we are running in local mode.
                // If it is the case, we don't print out job tracker location,
                // because it is meaningless for local mode.
              jobTrackerLoc = null;
                log.debug("Failed to get job tracker location.");
            }
            
            completeFailedJobsInThisRun.clear();
            
            Thread jcThread = new Thread(jc);
            jcThread.setUncaughtExceptionHandler(jctExceptionHandler);
            
            //All the setup done, now lets launch the jobs.
            jcThread.start();
            
            // Now wait, till we are finished.
            while(!jc.allFinished()){


              try { Thread.sleep(sleepTime); } 
              catch (InterruptedException e) {}
              
              List<Job> jobsAssignedIdInThisRun = new ArrayList<Job>();


              for(Job job : jobsWithoutIds){
                if (job.getAssignedJobID() != null){


                  jobsAssignedIdInThisRun.add(job);
                  log.info("HadoopJobId: "+job.getAssignedJobID());
                  if(jobTrackerLoc != null){
                    log.info("More information at: http://"+ jobTrackerLoc+
                        "/jobdetails.jsp?jobid="+job.getAssignedJobID());
                  }  
                }
                else{
                  // This job is not assigned an id yet.
                }
              }
              jobsWithoutIds.removeAll(jobsAssignedIdInThisRun);


              double prog = (numMRJobsCompl+calculateProgress(jc, jobClient))/totalMRJobs;
              if(prog>=(lastProg+0.01)){
                int perCom = (int)(prog * 100);
                if(perCom!=100)
                  log.info( perCom + "% complete");
              }
              lastProg = prog;
            }


            //check for the jobControlException first
            //if the job controller fails before launching the jobs then there are
            //no jobs to check for failure
            if(jobControlException != null) {
                if(jobControlException instanceof PigException) {
                        if(jobControlExceptionStackTrace != null) {
                            LogUtils.writeLog("Error message from job controller", jobControlExceptionStackTrace, 
                                    pc.getProperties().getProperty("pig.logfile"), 
                                    log);
                        }
                        throw jobControlException;
                } else {
                        int errCode = 2117;
                        String msg = "Unexpected error when launching map reduce job.";          
                        throw new ExecException(msg, errCode, PigException.BUG, jobControlException);
                }
            }


            if (!jc.getFailedJobs().isEmpty() )
            {
                if ("true".equalsIgnoreCase(
                  pc.getProperties().getProperty("stop.on.failure","false"))) {
                    int errCode = 6017;
                    StringBuilder msg = new StringBuilder();
                    
                    for (int i=0;i<jc.getFailedJobs().size();i++) {
                        Job j = jc.getFailedJobs().get(i);
                        msg.append(getFirstLineFromMessage(j.getMessage()));
                        if (i!=jc.getFailedJobs().size()-1)
                            msg.append("\n");
                    }
                    
                    throw new ExecException(msg.toString(), 
                                            errCode, PigException.REMOTE_ENVIRONMENT);
                }
                // If we only have one store and that job fail, then we sure that the job completely fail, and we shall stop dependent jobs
                for (Job job : jc.getFailedJobs())
                {
                    List<POStore> sts = jcc.getStores(job);
                    if (sts.size()==1)
                        completeFailedJobsInThisRun.add(job);
                }
                failedJobs.addAll(jc.getFailedJobs());
            }
            
            int removedMROp = jcc.updateMROpPlan(completeFailedJobsInThisRun);
            
            numMRJobsCompl += removedMROp;


            List<Job> jobs = jc.getSuccessfulJobs();
            jcc.moveResults(jobs);
            succJobs.addAll(jobs);
            
            
            stats.setJobClient(jobClient);
            stats.setJobControl(jc);
            stats.accumulateStats();
            
            jc.stop(); 
        }


        log.info( "100% complete");


        boolean failed = false;
        int finalStores = 0;
        // Look to see if any jobs failed.  If so, we need to report that.
        if (failedJobs != null && failedJobs.size() > 0) {
            log.error(failedJobs.size()+" map reduce job(s) failed!");
            Exception backendException = null;


            for (Job fj : failedJobs) {
                
                try {
                    getStats(fj, jobClient, true, pc);
                } catch (Exception e) {
                    backendException = e;
                }


                List<POStore> sts = jcc.getStores(fj);
                for (POStore st: sts) {
                    if (!st.isTmpStore()) {
                        finalStores++;
                        log.error("Failed to produce result in: \""+st.getSFile().getFileName()+"\"");
                    }
                    failedStores.add(st);
                    failureMap.put(st.getSFile(), backendException);
                    //log.error("Failed to produce result in: \""+st.getSFile().getFileName()+"\"");
                }
            }
            failed = true;
        }


        Map<Enum, Long> warningAggMap = new HashMap<Enum, Long>();
                
        if(succJobs!=null) {
            for(Job job : succJobs){
                List<POStore> sts = jcc.getStores(job);
                for (POStore st: sts) {
                    // Currently (as of Feb 3 2010), hadoop's local mode does not
                    // call cleanupJob on OutputCommitter (see https://issues.apache.org/jira/browse/MAPREDUCE-1447)
                    // So to workaround that bug, we are calling setStoreSchema on
                    // StoreFunc's which implement StoreMetadata here
                    /**********************************************************/
                    // NOTE: THE FOLLOWING IF SHOULD BE REMOVED ONCE MAPREDUCE-1447
                    // IS FIXED - TestStore.testSetStoreSchema() should fail at
                    // that time and removing this code should fix it.
                    /**********************************************************/
                    if(pc.getExecType() == ExecType.LOCAL) {
                        storeSchema(job, st);
                    }
                    if (!st.isTmpStore()) {
                        succeededStores.add(st);
                        finalStores++;
                        log.info("Successfully stored result in: \""+st.getSFile().getFileName()+"\"");
                    }
                    else
                        log.debug("Successfully stored result in: \""+st.getSFile().getFileName()+"\"");
                }
                getStats(job,jobClient, false, pc);
                if(aggregateWarning) {
                    computeWarningAggregate(job, jobClient, warningAggMap);
                }
            }
        }
        
        if(aggregateWarning) {
            CompilationMessageCollector.logAggregate(warningAggMap, MessageType.Warning, log) ;
        }
        
        // Report records and bytes written.  Only do this in the single store case.  Multi-store
        // scripts mess up the stats reporting from hadoop.
        List<String> rji = stats.getRootJobIDs();
        if ( (rji != null && rji.size() == 1 && finalStores == 1) || pc.getExecType() == ExecType.LOCAL ) {
            // currently counters are not working in local mode - see PIG-1286
            if(stats.getRecordsWritten()==-1 || pc.getExecType() == ExecType.LOCAL) {
                log.info("Records written : Unable to determine number of records written");
            } else {
                log.info("Records written : " + stats.getRecordsWritten());
            }
            if(stats.getBytesWritten()==-1 || pc.getExecType() == ExecType.LOCAL) {
                log.info("Bytes written : Unable to determine number of bytes written");
            } else {
                log.info("Bytes written : " + stats.getBytesWritten());
            }
            if(stats.getSMMSpillCount()==-1) {
                log.info("Spillable Memory Manager spill count : Unable to determine spillable memory manager spill count");
            } else {
                log.info("Spillable Memory Manager spill count : " + stats.getSMMSpillCount());
            }
            if(stats.getProactiveSpillCount() == -1) {
                log.info("Proactive spill count : Unable to determine proactive spill count");
            } else {
                log.info("Proactive spill count : " + stats.getProactiveSpillCount());
            }
        }


        if (!failed) {
            log.info("Success!");

View Full Code Here

            String filePath = outputFile.getAbsolutePath();
            outputFile.delete();
            PigServer pig = new PigServer(ExecType.LOCAL);
            pig
                    .registerQuery("A = load 'test/org/apache/pig/test/data/passwd';");
            PigStats stats = pig.store("A", filePath)
                    .getStatistics();
            File dataFile = new File( outputFile.getAbsoluteFile() + File.separator + "part-00000" );
            assertEquals(dataFile.length(), stats.getBytesWritten());
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println( e.getMessage() );
            fail("IOException happened");
        } finally {

View Full Code Here

        pw.close();
        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = filter a by $0 > 50;");
        pigServer.registerQuery("c = foreach b generate $0 - 50;");
        PigStats pigStats = pigServer.store("c", "output_map_only").getStatistics();


        //PigStats pigStats = pigServer.getPigStats();
        Map<String, Map<String, String>> stats = pigStats.getPigStats();
        
        //counting the no. of bytes in the output file
        //long filesize = cluster.getFileSystem().getFileStatus(new Path("output_map_only")).getLen();
        InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output_map_only", pigServer.getPigContext()), ExecType.MAPREDUCE, pigServer.getPigContext().getDfs());
        long filesize = 0;
        while(is.read() != -1) filesize++;
        
        is.close();
        
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output_map_only"), true);


        System.out.println("============================================");
        System.out.println("Test case Map Only");
        System.out.println("============================================");
        System.out.println("MRPlan : \n" + pigStats.getMRPlan());
        for(Map.Entry<String, Map<String, String>> entry : stats.entrySet()) {
            System.out.println("============================================");
            System.out.println("Job : " + entry.getKey());
            for(Map.Entry<String, String> e1 : entry.getValue().entrySet()) {
                System.out.println(" - " + e1.getKey() + " : \n" + e1.getValue());
            }
            System.out.println("============================================");
        }


        Map.Entry<String, Map<String, String>> e = stats.entrySet().iterator().next();


        //System.out.println("Job Name : " + e.getKey());


        Map<String, String> jobStats = e.getValue();


        System.out.println("Map input records : " + jobStats.get("PIG_STATS_MAP_INPUT_RECORDS"));
        assertEquals(MAX, Integer.parseInt(jobStats.get("PIG_STATS_MAP_INPUT_RECORDS")));
        System.out.println("Map output records : " + jobStats.get("PIG_STATS_MAP_OUTPUT_RECORDS"));
        assertEquals(count, Integer.parseInt(jobStats.get("PIG_STATS_MAP_OUTPUT_RECORDS")));
        assertNull(jobStats.get("PIG_STATS_REDUCE_PLAN"));
        assertNull(jobStats.get("PIG_STATS_COMBINE_PLAN"));
        assertNotNull(jobStats.get("PIG_STATS_MAP_PLAN"));
        assertEquals(0, Integer.parseInt(jobStats.get("PIG_STATS_REDUCE_INPUT_RECORDS")));
        assertEquals(0, Integer.parseInt(jobStats.get("PIG_STATS_REDUCE_OUTPUT_RECORDS")));


        assertEquals(count, pigStats.getRecordsWritten());
        assertEquals(filesize, pigStats.getBytesWritten());


    }

View Full Code Here

        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = filter a by $0 > 50;");
        pigServer.registerQuery("c = foreach b generate $0 - 50;");
        //pigServer.store("c", "output_map_only");
        PigStats pigStats = pigServer.store("c", "output_map_only", "BinStorage").getStatistics();
        
        InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output_map_only", pigServer.getPigContext()), ExecType.MAPREDUCE, pigServer.getPigContext().getDfs());
        long filesize = 0;
        while(is.read() != -1) filesize++;
        
        is.close();


        Map<String, Map<String, String>> stats = pigStats.getPigStats();
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output_map_only"), true);


        System.out.println("============================================");
        System.out.println("Test case Map Only");
        System.out.println("============================================");
        System.out.println("MRPlan : \n" + pigStats.getMRPlan());
        for(Map.Entry<String, Map<String, String>> entry : stats.entrySet()) {
            System.out.println("============================================");
            System.out.println("Job : " + entry.getKey());
            for(Map.Entry<String, String> e1 : entry.getValue().entrySet()) {
                System.out.println(" - " + e1.getKey() + " : \n" + e1.getValue());
            }
            System.out.println("============================================");
        }


        Map.Entry<String, Map<String, String>> e = stats.entrySet().iterator().next();


        //System.out.println("Job Name : " + e.getKey());


        Map<String, String> jobStats = e.getValue();


        System.out.println("Map input records : " + jobStats.get("PIG_STATS_MAP_INPUT_RECORDS"));
        assertEquals(MAX, Integer.parseInt(jobStats.get("PIG_STATS_MAP_INPUT_RECORDS")));
        System.out.println("Map output records : " + jobStats.get("PIG_STATS_MAP_OUTPUT_RECORDS"));
        assertEquals(count, Integer.parseInt(jobStats.get("PIG_STATS_MAP_OUTPUT_RECORDS")));
        assertNull(jobStats.get("PIG_STATS_REDUCE_PLAN"));
        assertNull(jobStats.get("PIG_STATS_COMBINE_PLAN"));
        assertNotNull(jobStats.get("PIG_STATS_MAP_PLAN"));
        assertEquals(0, Integer.parseInt(jobStats.get("PIG_STATS_REDUCE_INPUT_RECORDS")));
        assertEquals(0, Integer.parseInt(jobStats.get("PIG_STATS_REDUCE_OUTPUT_RECORDS")));


        assertEquals(count, pigStats.getRecordsWritten());
        assertEquals(filesize, pigStats.getBytesWritten());
    }

View Full Code Here


        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = group a by $0;");
        pigServer.registerQuery("c = foreach b generate group;");
        PigStats pigStats = pigServer.store("c", "output").getStatistics();
        InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output", pigServer.getPigContext()), ExecType.MAPREDUCE, pigServer.getPigContext().getDfs());
        long filesize = 0;
        while(is.read() != -1) filesize++;
        
        is.close();


        Map<String, Map<String, String>> stats = pigStats.getPigStats();
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output"), true);


        System.out.println("============================================");
        System.out.println("Test case MapReduce");
        System.out.println("============================================");
        System.out.println("MRPlan : \n" + pigStats.getMRPlan());
        for(Map.Entry<String, Map<String, String>> entry : stats.entrySet()) {
            System.out.println("============================================");
            System.out.println("Job : " + entry.getKey());
            for(Map.Entry<String, String> e1 : entry.getValue().entrySet()) {
                System.out.println(" - " + e1.getKey() + " : \n" + e1.getValue());
            }
            System.out.println("============================================");
        }


        Map.Entry<String, Map<String, String>> e = stats.entrySet().iterator().next();


        Map<String, String> jobStats = e.getValue();


        System.out.println("Map input records : " + jobStats.get("PIG_STATS_MAP_INPUT_RECORDS"));
        assertEquals(MAX, Integer.parseInt(jobStats.get("PIG_STATS_MAP_INPUT_RECORDS")));
        System.out.println("Map output records : " + jobStats.get("PIG_STATS_MAP_OUTPUT_RECORDS"));
        assertEquals(MAX, Integer.parseInt(jobStats.get("PIG_STATS_MAP_OUTPUT_RECORDS")));
        System.out.println("Reduce input records : " + jobStats.get("PIG_STATS_REDUCE_INPUT_RECORDS"));
        assertEquals(MAX, Integer.parseInt(jobStats.get("PIG_STATS_REDUCE_INPUT_RECORDS")));
        System.out.println("Reduce output records : " + jobStats.get("PIG_STATS_REDUCE_OUTPUT_RECORDS"));
        assertEquals(count, Integer.parseInt(jobStats.get("PIG_STATS_REDUCE_OUTPUT_RECORDS")));


        assertNull(jobStats.get("PIG_STATS_COMBINE_PLAN"));
        assertNotNull(jobStats.get("PIG_STATS_MAP_PLAN"));
        assertNotNull(jobStats.get("PIG_STATS_REDUCE_PLAN"));


        assertEquals(count, pigStats.getRecordsWritten());
        assertEquals(filesize, pigStats.getBytesWritten());
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.pig.tools.pigstats.PigStats

org.apache.hadoop.fs.Path

org.apache.hadoop.mapred.Counters

org.apache.hadoop.mapred.JobConf

org.apache.hadoop.mapred.JobID

org.apache.hadoop.mapred.RunningJob

org.apache.hive.hcatalog.pig.TestHCatLoader

org.apache.oozie.action.hadoop.PigMain

org.apache.pig.backend.hadoop.executionengine.HExecutionEngine

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.