Package com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat

Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat.TupleInputReader


 
  /*
   * Read the Tuples from a TupleOutput using TupleInputReader.
   */
  public static void readTuples(Path file, Configuration conf, TupleVisitor iterator) throws IOException, InterruptedException {
    TupleInputReader reader = new TupleInputReader(conf);
    reader.initialize(new Path(file + ""), conf);
    while(reader.nextKeyValueNoSync()) {
      ITuple tuple = reader.getCurrentKey();
      iterator.onTuple(tuple);
    }
    reader.close();
  }
View Full Code Here


    if(args.length == 2 && args[1].equals("local")) {
      fS = FileSystem.getLocal(conf);
    }
   
    for(FileStatus fStatus: fS.globStatus(path)) {
      TupleInputReader reader = new TupleInputReader(conf);
      reader.initialize(fStatus.getPath(), conf);
      while(reader.nextKeyValueNoSync()) {
        System.out.println(fStatus.getPath() + "\t" + reader.getCurrentKey());
      }
    }
  }
View Full Code Here

    }
    // Use a HashSet to calculate the total vocabulary size
    Set<String> vocabulary = new HashSet<String>();
    // Read tuples from generate job
    for(FileStatus fileStatus : fileSystem.globStatus(generatedModel)) {
      TupleInputReader reader = new TupleInputReader(conf);
      reader.initialize(fileStatus.getPath(), conf);
      while(reader.nextKeyValueNoSync()) {
        // Read Tuple
        ITuple tuple = reader.getCurrentKey();
        Integer count = (Integer) tuple.get("count");
        Category category = (Category) tuple.get("category");
        String word = tuple.get("word").toString();
        vocabulary.add(word);
        tokensPerCategory.put(category, MapUtils.getInteger(tokensPerCategory, category, 0) + count);
View Full Code Here

  public List<Pair<Object, Object>> ensureTupleOutput(String output) throws IOException {
    List<Pair<Object, Object>> outs = outputs.get(output);
    try {
      if(outs == null) {
        outs = new ArrayList<Pair<Object, Object>>();
        TupleInputReader reader = new TupleInputReader(getConf());
        reader.initialize(new Path(output), getConf());
        while(reader.nextKeyValueNoSync()) {
          ITuple tuple = reader.getCurrentKey();
          outs.add(new Pair<Object, Object>(tuple, NullWritable.get()));
        }
        reader.close();
      }
    } catch(InterruptedException e) {
      throw new IOException(e);
    }
    return outs;
View Full Code Here

    }
    // Use a HashSet to calculate the total vocabulary size
    Set<String> vocabulary = new HashSet<String>();
    // Read tuples from generate job
    for(FileStatus fileStatus : fileSystem.globStatus(generatedModel)) {
      TupleInputReader reader = new TupleInputReader(conf);
      reader.initialize(fileStatus.getPath(), conf);
      while(reader.nextKeyValueNoSync()) {
        // Read Tuple
        ITuple tuple = reader.getCurrentKey();
        Integer count = (Integer) tuple.get("count");
        Category category = (Category) tuple.get("category");
        String word = tuple.get("word").toString();
        vocabulary.add(word);
        tokensPerCategory.put(category, MapUtils.getInteger(tokensPerCategory, category, 0) + count);
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.input.TupleInputFormat.TupleInputReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.