Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.Counters$Counter


 
  /*
   * Parse and add the reduce task counters
   */
  private void parseAndAddReduceTaskCounters(ReduceTaskStatistics reduceTask, String counters) throws ParseException {
    Counters cnt = Counters.fromEscapedCompactString(counters);
    for (java.util.Iterator<Counters.Group> grps = cnt.iterator(); grps.hasNext(); ) {
      Counters.Group grp = grps.next();
      //String groupname = "<" + grp.getName() + ">::<" + grp.getDisplayName() + ">";
      for (java.util.Iterator<Counters.Counter> mycounters = grp.iterator(); mycounters.hasNext(); ) {
        Counters.Counter counter = mycounters.next();
        //String countername = "<"+counter.getName()+">::<"+counter.getDisplayName()+">::<"+counter.getValue()+">";
View Full Code Here


      } else {
        rJob = Submitter.runJob(job);
      }
      assertTrue("pipes job failed", rJob.isSuccessful());
     
      Counters counters = rJob.getCounters();
      Counters.Group wordCountCounters = counters.getGroup("WORDCOUNT");
      int numCounters = 0;
      for (Counter c : wordCountCounters) {
        System.out.println(c);
        ++numCounters;
      }
View Full Code Here

  @Test
  public void testCommandLine() throws Exception  {
    super.testCommandLine();
    // validate combiner counters
    String counterGrp = "org.apache.hadoop.mapred.Task$Counter";
    Counters counters = job.running_.getCounters();
    assertTrue(counters.findCounter(
               counterGrp, "COMBINE_INPUT_RECORDS").getValue() != 0);
    assertTrue(counters.findCounter(
               counterGrp, "COMBINE_OUTPUT_RECORDS").getValue() != 0);
  }
View Full Code Here

      File outFile = new File(OUTPUT_DIR, "part-00000").getAbsoluteFile();
      String output = StreamUtil.slurp(outFile);
      outFile.delete();
      assertEquals(outputExpect, output);
     
      Counters counters = job.running_.getCounters();
      assertNotNull("Counters", counters);
      Group group = counters.getGroup("UserCounters");
      assertNotNull("Group", group);
      Counter counter = group.getCounterForName("InputLines");
      assertNotNull("Counter", counter);
      assertEquals(3, counter.getCounter());
    } finally {
View Full Code Here

    assertTrue(runningJob.isSuccessful());
   
    if(validateCount) {
     //validate counters
      String counterGrp = "org.apache.hadoop.mapred.Task$Counter";
      Counters counters = runningJob.getCounters();
      assertEquals(counters.findCounter(counterGrp, "MAP_SKIPPED_RECORDS").
          getCounter(),MAPPER_BAD_RECORDS.size());
     
      int mapRecs = INPUTSIZE - MAPPER_BAD_RECORDS.size();
      assertEquals(counters.findCounter(counterGrp, "MAP_INPUT_RECORDS").
          getCounter(),mapRecs);
      assertEquals(counters.findCounter(counterGrp, "MAP_OUTPUT_RECORDS").
          getCounter(),mapRecs);
     
      int redRecs = mapRecs - REDUCER_BAD_RECORDS.size();
      assertEquals(counters.findCounter(counterGrp, "REDUCE_SKIPPED_RECORDS").
          getCounter(),REDUCER_BAD_RECORDS.size());
      assertEquals(counters.findCounter(counterGrp, "REDUCE_SKIPPED_GROUPS").
          getCounter(),REDUCER_BAD_RECORDS.size());
      assertEquals(counters.findCounter(counterGrp, "REDUCE_INPUT_GROUPS").
          getCounter(),redRecs);
      assertEquals(counters.findCounter(counterGrp, "REDUCE_INPUT_RECORDS").
          getCounter(),redRecs);
      assertEquals(counters.findCounter(counterGrp, "REDUCE_OUTPUT_RECORDS").
          getCounter(),redRecs);
    }
   
    List<String> badRecs = new ArrayList<String>();
    badRecs.addAll(MAPPER_BAD_RECORDS);
View Full Code Here

    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(MyMapper.class);

    RunningJob job = JobClient.runJob(conf);
    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Records.PAGES).getCounter();

    LOG.info("Read " + numDocs + " docs.");

    if (cmdline.hasOption(COUNT_OPTION)) {
      String f = cmdline.getOptionValue(COUNT_OPTION);
View Full Code Here

        }
    }

    public long numOutputRecordsFromCounters(JobStats jobStats, String jobId) {
        JobClient jobClient = PigStats.get().getJobClient();
        Counters counters;
        try {
            RunningJob rj = jobClient.getJob(jobId);
            counters = rj.getCounters();
        } catch (IOException e) {
            log.error("Error getting job client, continuing", e);
            return 1;
        }

        Group fsGroup = counters.getGroup("FileSystemCounters");
        long hdfsBytes = fsGroup.getCounter("HDFS_BYTES_WRITTEN");
        long s3Bytes = fsGroup.getCounter("S3N_BYTES_WRITTEN");
        return hdfsBytes + s3Bytes;
    }
View Full Code Here

        return reducerDurations;
    }

    public boolean countersShowRecordsWritten(JobStats jobStats, String jobId) {
        JobClient jobClient = PigStats.get().getJobClient();
        Counters counters;
        try {
            RunningJob rj = jobClient.getJob(jobId);
            counters = rj.getCounters();
        } catch (IOException e) {
            log.error("Error getting job client, continuing", e);
            return true;
        }

        Group fsGroup = counters.getGroup("FileSystemCounters");
        long hdfsBytes = fsGroup.getCounter("HDFS_BYTES_WRITTEN");
        long s3Bytes = fsGroup.getCounter("S3N_BYTES_WRITTEN");
        log.info(String.format("Total of %s bytes were written by this m/r job", (hdfsBytes + s3Bytes)));
        if ((0 == s3Bytes) && (HDFS_DIRECTORY_SIZE == hdfsBytes)) {
            log.info("No s3 output and empty HDFS directory created");
View Full Code Here

    conf.setInputFormat(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(bitext));
    FileOutputFormat.setOutputPath(conf, new Path("somealigns.test"));

    RunningJob rj = JobClient.runJob(conf);
    Counters cs = rj.getCounters();
    double lp = (double)cs.getCounter(CrossEntropyCounters.LOGPROB);
    double wc = (double)cs.getCounter(CrossEntropyCounters.WORDCOUNT);
    double ce = (lp / wc) / Math.log(2.0);
    System.out.println("Viterbi cross-entropy: " + ce + "   perplexity: " + Math.pow(2.0, ce));
  }
View Full Code Here

        FileOutputFormat.setOutputPath(conf, new Path(hac.getRoot()+"/"+outputPath.toString()));
        fileSys.delete(new Path(hac.getRoot()+"/"+outputPath.toString()), true);
        conf.setOutputFormat(SequenceFileOutputFormat.class);

        RunningJob job = JobClient.runJob(conf);
        Counters c = job.getCounters();
        double lp = c.getCounter(CrossEntropyCounters.LOGPROB);
        double wc = c.getCounter(CrossEntropyCounters.WORDCOUNT);
        double ce = lp/wc/Math.log(2);
        double perp = Math.pow(2.0, ce);
        double aer = ComputeAER(c);
        System.out.println("Iteration " + iteration + ": (" + modelType + ")\tCROSS-ENTROPY: " + ce + "   PERPLEXITY: " + perp);
        System.out.println("Iteration " + iteration + ": " + aer + " AER");
        aers.add(aer);     
        perps.add(perp);
      } finally { stopPServers(); }


      JobConf conf = new JobConf(hac, ModelMergeMapper2.class);
      System.err.println("Setting " + TTABLE_ITERATION_OUTPUT + " to " + outputPath.toString());
      conf.set(TTABLE_ITERATION_OUTPUT, hac.getRoot()+"/"+outputPath.toString());
      conf.setJobName("EMTrain.ModelMerge");
      //      conf.setOutputKeyClass(LongWritable.class);
      conf.setMapperClass(ModelMergeMapper2.class);           
      conf.setSpeculativeExecution(false);
      conf.setNumMapTasks(1);
      conf.setNumReduceTasks(0);
      conf.setInputFormat(NullInputFormat.class);
      conf.setOutputFormat(NullOutputFormat.class);
      conf.set("mapred.map.child.java.opts", "-Xmx2048m");
      conf.set("mapred.reduce.child.java.opts", "-Xmx2048m");

      //      FileInputFormat.setInputPaths(conf, root+"/dummy");
      //      fileSys.delete(new Path(root+"/dummy.out"), true);
      //      FileOutputFormat.setOutputPath(conf, new Path(root+"/dummy.out"));
      //      conf.setOutputFormat(SequenceFileOutputFormat.class);

      System.out.println("Running job "+conf.getJobName());
      System.out.println("Input: "+hac.getRoot()+"/dummy");
      System.out.println("Output: "+hac.getRoot()+"/dummy.out");

      JobClient.runJob(conf);
      fileSys.delete(new Path(hac.getRoot()+"/"+outputPath.toString()), true);

      if (lastIteration || lastModel1Iteration) {
        //hac.setBoolean("ha.generate.posteriors", true);
        conf = new JobConf(hac, HadoopAlign.class);
        sOutputPath=modelType + ".data." + iteration;
        outputPath = new Path(sOutputPath);

        conf.setJobName(modelType + ".align");
        conf.set("mapred.map.child.java.opts", "-Xmx2048m");
        conf.set("mapred.reduce.child.java.opts", "-Xmx2048m");

        // TODO use file cache
        /*try {
          if (hmm || iteration > 0) {
            URI ttable = new URI(fileSys.getHomeDirectory() + Path.SEPARATOR + hac.getTTablePath().toString());
            DistributedCache.addCacheFile(ttable, conf);
            System.out.println("cache<-- " + ttable);
          }

        } catch (Exception e) { throw new RuntimeException("Caught " + e); }
         */
        conf.setInputFormat(SequenceFileInputFormat.class);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        conf.set(KEY_TRAINER, MODEL1_TRAINER);
        conf.set(KEY_ITERATION, Integer.toString(iteration));
        if (hmm)
          conf.set(KEY_TRAINER, HMM_TRAINER);
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(PhrasePair.class);

        conf.setMapperClass(AlignMapper.class);
        conf.setReducerClass(IdentityReducer.class);

        conf.setNumMapTasks(mapTasks);
        conf.setNumReduceTasks(reduceTasks);
        FileOutputFormat.setOutputPath(conf, new Path(hac.getRoot()+"/"+outputPath.toString()));

        //if last model1 iteration, save output path, to be used as input path in later iterations
        if (lastModel1Iteration) {
          FileInputFormat.setInputPaths(conf, cbtxt);
          model1PosteriorsPath = new Path(hac.getRoot()+"/"+outputPath.toString());
        } else {
          FileInputFormat.setInputPaths(conf, model1PosteriorsPath);         
        }

        fileSys.delete(outputPath, true);

        System.out.println("Running job "+conf.getJobName());

        RunningJob job = JobClient.runJob(conf);
        System.out.println("GENERATED: " + model1PosteriorsPath);
        Counters c = job.getCounters();
        double aer = ComputeAER(c);
        //        System.out.println("Iteration " + iteration + ": (" + modelType + ")\tCROSS-ENTROPY: " + ce + "   PERPLEXITY: " + perp);
        System.out.println("Iteration " + iteration + ": " + aer + " AER");
        aers.add(aer);     
        perps.add(0.0);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.Counters$Counter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.