Package org.apache.pig.tools.pigstats

Examples of org.apache.pig.tools.pigstats.PigStats$JobComparator


                                 String jobName) throws ExecException {
        MapReduceLauncher launcher = new MapReduceLauncher();
        List<ExecJob> jobs = new ArrayList<ExecJob>();

        try {
            PigStats stats = launcher.launchPig(plan, jobName, pigContext);

            for (FileSpec spec: launcher.getSucceededFiles()) {
                jobs.add(new HJob(ExecJob.JOB_STATUS.COMPLETED, pigContext, spec, stats));
            }
View Full Code Here


        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = filter a by $0 > 50;");
        pigServer.registerQuery("c = foreach b generate $0 - 50;");
        ExecJob job = pigServer.store("c", "output_map_only");
        PigStats pigStats = job.getStatistics();
       
        //counting the no. of bytes in the output file
        //long filesize = cluster.getFileSystem().getFileStatus(new Path("output_map_only")).getLen();
        InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
                "output_map_only", pigServer.getPigContext()), pigServer
                .getPigContext());

        long filesize = 0;
        while(is.read() != -1) filesize++;
       
        is.close();
       
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output_map_only"), true);

        System.out.println("============================================");
        System.out.println("Test case Map Only");
        System.out.println("============================================");

        JobGraph jg = pigStats.getJobGraph();
        Iterator<JobStats> iter = jg.iterator();
        while (iter.hasNext()) {
            JobStats js = iter.next();                   

            System.out.println("Map input records : " + js.getMapInputRecords());
View Full Code Here

        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = filter a by $0 > 50;");
        pigServer.registerQuery("c = foreach b generate $0 - 50;");
        ExecJob job = pigServer.store("c", "output_map_only", "BinStorage");
        PigStats pigStats = job.getStatistics();
       
        InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
                "output_map_only", pigServer.getPigContext()),
                pigServer.getPigContext());

        long filesize = 0;
        while(is.read() != -1) filesize++;
       
        is.close();

        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output_map_only"), true);

        System.out.println("============================================");
        System.out.println("Test case Map Only");
        System.out.println("============================================");

        JobGraph jp = pigStats.getJobGraph();
        Iterator<JobStats> iter = jp.iterator();
        while (iter.hasNext()) {
            JobStats js = iter.next();
       
            System.out.println("Map input records : " + js.getMapInputRecords());
            assertEquals(MAX, js.getMapInputRecords());
            System.out.println("Map output records : " + js.getMapOutputRecords());
            assertEquals(count, js.getMapOutputRecords());
            assertEquals(0, js.getReduceInputRecords());
            assertEquals(0, js.getReduceOutputRecords());
        }
           
        System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
        assertEquals(filesize, pigStats.getBytesWritten());
    }
View Full Code Here

        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = group a by $0;");
        pigServer.registerQuery("c = foreach b generate group;");

        ExecJob job = pigServer.store("c", "output");
        PigStats pigStats = job.getStatistics();
        InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
                pigServer.getPigContext()), pigServer.getPigContext());

        long filesize = 0;
        while(is.read() != -1) filesize++;
       
        is.close();

        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output"), true);

        System.out.println("============================================");
        System.out.println("Test case MapReduce");
        System.out.println("============================================");

        JobGraph jp = pigStats.getJobGraph();
        Iterator<JobStats> iter = jp.iterator();
        while (iter.hasNext()) {
            JobStats js = iter.next();
            System.out.println("Map input records : " + js.getMapInputRecords());
            assertEquals(MAX, js.getMapInputRecords());
            System.out.println("Map output records : " + js.getMapOutputRecords());
            assertEquals(MAX, js.getMapOutputRecords());
            System.out.println("Reduce input records : " + js.getReduceInputRecords());
            assertEquals(MAX, js.getReduceInputRecords());
            System.out.println("Reduce output records : " + js.getReduceOutputRecords());
            assertEquals(count, js.getReduceOutputRecords());
        }
        System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
        assertEquals(filesize, pigStats.getBytesWritten());
    }
View Full Code Here

        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = group a by $0;");
        pigServer.registerQuery("c = foreach b generate group;");
        ExecJob job = pigServer.store("c", "output", "BinStorage");
        PigStats pigStats = job.getStatistics();

        InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
                pigServer.getPigContext()), pigServer.getPigContext());
        long filesize = 0;
        while(is.read() != -1) filesize++;
       
        is.close();
       
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output"), true);

        System.out.println("============================================");
        System.out.println("Test case MapReduce");
        System.out.println("============================================");

        JobGraph jp = pigStats.getJobGraph();
        Iterator<JobStats> iter = jp.iterator();
        while (iter.hasNext()) {
            JobStats js = iter.next();
            System.out.println("Map input records : " + js.getMapInputRecords());
            assertEquals(MAX, js.getMapInputRecords());
            System.out.println("Map output records : " + js.getMapOutputRecords());
            assertEquals(MAX, js.getMapOutputRecords());
            System.out.println("Reduce input records : " + js.getReduceInputRecords());
            assertEquals(MAX, js.getReduceInputRecords());
            System.out.println("Reduce output records : " + js.getReduceOutputRecords());
            assertEquals(count, js.getReduceOutputRecords());
        }
        System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
        assertEquals(filesize, pigStats.getBytesWritten());
    }
View Full Code Here

        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = group a by $0;");
        pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);");
        ExecJob job = pigServer.store("c", "output");
        PigStats pigStats = job.getStatistics();

        InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
                pigServer.getPigContext()), pigServer.getPigContext());
        long filesize = 0;
        while(is.read() != -1) filesize++;
       
        is.close();
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output"), true);

        System.out.println("============================================");
        System.out.println("Test case MapCombineReduce");
        System.out.println("============================================");
       
        JobGraph jp = pigStats.getJobGraph();
        Iterator<JobStats> iter = jp.iterator();
        while (iter.hasNext()) {
            JobStats js = iter.next();
            System.out.println("Map input records : " + js.getMapInputRecords());
            assertEquals(MAX, js.getMapInputRecords());
            System.out.println("Map output records : " + js.getMapOutputRecords());
            assertEquals(MAX, js.getMapOutputRecords());
            System.out.println("Reduce input records : " + js.getReduceInputRecords());
            assertEquals(count, js.getReduceInputRecords());
            System.out.println("Reduce output records : " + js.getReduceOutputRecords());
            assertEquals(count, js.getReduceOutputRecords());
        }
        System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
        assertEquals(filesize, pigStats.getBytesWritten());
    }
View Full Code Here

        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = group a by $0;");
        pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);");

        ExecJob job = pigServer.store("c", "output", "BinStorage");
        PigStats pigStats = job.getStatistics();
       
        InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
                pigServer.getPigContext()), pigServer.getPigContext());

        long filesize = 0;
        while(is.read() != -1) filesize++;
       
        is.close();
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output"), true);

        System.out.println("============================================");
        System.out.println("Test case MapCombineReduce");
        System.out.println("============================================");
        JobGraph jp = pigStats.getJobGraph();
        Iterator<JobStats> iter = jp.iterator();
        while (iter.hasNext()) {
            JobStats js = iter.next();
            System.out.println("Map input records : " + js.getMapInputRecords());
            assertEquals(MAX, js.getMapInputRecords());
            System.out.println("Map output records : " + js.getMapOutputRecords());
            assertEquals(MAX, js.getMapOutputRecords());
            System.out.println("Reduce input records : " + js.getReduceInputRecords());
            assertEquals(count, js.getReduceInputRecords());
            System.out.println("Reduce output records : " + js.getReduceOutputRecords());
            assertEquals(count, js.getReduceOutputRecords());
        }
        System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
        assertEquals(filesize, pigStats.getBytesWritten());
    }
View Full Code Here

        pigServer.registerQuery("a = load '" + file + "';");
        pigServer.registerQuery("b = order a by $0;");
        pigServer.registerQuery("c = group b by $0;");
        pigServer.registerQuery("d = foreach c generate group, SUM(b.$1);");
        ExecJob job = pigServer.store("d", "output");
        PigStats pigStats = job.getStatistics();
       
        InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
                pigServer.getPigContext()), pigServer.getPigContext());
        long filesize = 0;
        while(is.read() != -1) filesize++;
       
        is.close();
       
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("output"), true);
       
        System.out.println("============================================");
        System.out.println("Test case MultipleMRJobs");
        System.out.println("============================================");
       
        JobGraph jp = pigStats.getJobGraph();
        JobStats js = (JobStats)jp.getSinks().get(0);
       
        System.out.println("Job id: " + js.getName());
        System.out.println(jp.toString());
       
View Full Code Here

        pigServer.registerQuery("b = filter a by $0 > 50;");
        pigServer.registerQuery("c = filter a by $0 <= 50;");
        pigServer.registerQuery("store b into '/tmp/outout1';");
        pigServer.registerQuery("store c into '/tmp/outout2';");
        List<ExecJob> jobs = pigServer.executeBatch();
        PigStats stats = jobs.get(0).getStatistics();
        assertTrue(stats.getOutputLocations().size() == 2);
       
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("/tmp/outout1"), true);
        cluster.getFileSystem().delete(new Path("/tmp/outout2"), true);

        JobStats js = (JobStats)stats.getJobGraph().getSinks().get(0);
       
        Map<String, Long> entry = js.getMultiStoreCounters();
        long counter = 0;
        for (Long val : entry.values()) {
            counter += val;
View Full Code Here

        pigServer.registerQuery("f = group e by $0;");
        pigServer.registerQuery("g = foreach f generate group;");
        pigServer.registerQuery("store d into '/tmp/outout1';");
        pigServer.registerQuery("store g into '/tmp/outout2';");
        List<ExecJob> jobs = pigServer.executeBatch();
        PigStats stats = jobs.get(0).getStatistics();
       
        assertTrue(stats.getOutputLocations().size() == 2);
              
        cluster.getFileSystem().delete(new Path(file), true);
        cluster.getFileSystem().delete(new Path("/tmp/outout1"), true);
        cluster.getFileSystem().delete(new Path("/tmp/outout2"), true);

        JobStats js = (JobStats)stats.getJobGraph().getSinks().get(0);
       
        Map<String, Long> entry = js.getMultiStoreCounters();
        long counter = 0;
        for (Long val : entry.values()) {
            counter += val;
View Full Code Here

TOP

Related Classes of org.apache.pig.tools.pigstats.PigStats$JobComparator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.