Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Tuple


    protected ObjectMapper mapper;

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      this.mapper = new ObjectMapper();
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
      record = new Record(getAvroSchema());
      tuple.set("my_avro", record);
    };
View Full Code Here


    job.setOutput(new Path(output), new HadoopOutputFormat(NullOutputFormat.class), ITuple.class,
        NullWritable.class);
    // The reducer will just emit the tuple to the corresponding Category output
    job.setTupleReducer(new TupleReducer<ITuple, NullWritable>() {

      ITuple outTuple = new Tuple(OUT_SCHEMA);

      public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context,
          Collector collector) throws IOException, InterruptedException, TupleMRException {

        for(ITuple tuple : tuples) {
          Category category = (Category) tuple.get("category");
          outTuple.set("line", tuple.get("line"));
          outTuple.set("text", tuple.get("text"));
          outTuple.set("title", tuple.get("title"));
          collector.getNamedOutput(category.toString().toLowerCase())
              .write(outTuple, NullWritable.get());
        }
      }
    });
View Full Code Here

    // Use a HashSet to calculate the total vocabulary size
    Set<String> vocabulary = new HashSet<String>();
    // Read tuples from generate job
    for(FileStatus fileStatus : fileSystem.globStatus(generatedModel)) {
      TupleFile.Reader reader = new TupleFile.Reader(fileSystem, conf, fileStatus.getPath());
      Tuple tuple = new Tuple(reader.getSchema());
      while(reader.next(tuple)) {
        // Read Tuple
        Integer count = (Integer) tuple.get("count");
        Category category = (Category) tuple.get("category");
        String word = tuple.get("word").toString();
        vocabulary.add(word);
        tokensPerCategory.put(category, MapUtils.getInteger(tokensPerCategory, category, 0) + count);
        wordCountPerCategory.get(category).put(word, count);
      }
    }
View Full Code Here

      int newSize = 0;
      if(line == null) {
        this.line = new Text();
      }
      if(tuple == null) {
        this.tuple = new Tuple(schema);
      }
      while(position < end) {
        newSize = in.readLine(line, maxLineLength,
            Math.max((int) Math.min(Integer.MAX_VALUE, end - position), maxLineLength));
View Full Code Here

            }
            i++;
          }
          Schema schema = new Schema(schemaName, fields);
          log.info("Lazily instantiated a Pangool Schema from Cascading Tuple: [" + schema + "]");
          tuple = new Tuple(schema);
        }
       
        // Just perform a normal Object copying - without checking Schema everytime.
        // This is more efficient but it will raise errors later.
        for(int i = 0; i < tuple.getSchema().getFields().size(); i++) {
View Full Code Here

    final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split,
        taskContext);

    return new RecordReader<ITuple, NullWritable>() {

      ITuple tuple = new Tuple(pangoolSchema);

      @Override
      public void close() throws IOException {
        hCatRecordReader.close();
      }

      @Override
      public ITuple getCurrentKey() throws IOException, InterruptedException {
        HCatRecord record = hCatRecordReader.getCurrentValue();
        // Perform conversion between HCatRecord and Tuple
        for(int pos = 0; pos < schema.size(); pos++) {
          tuple.set(pos, record.get(pos));
        }
        return tuple;
      }

      @Override
View Full Code Here

    this.start = in.getPosition();
    more = start < end;

    if(targetSchema == null) {
      tuple = new Tuple(in.getSchema());
    } else {
      tuple = new Tuple(targetSchema);
    }
  }
View Full Code Here

  private static CachedTuples createCachedTuples(TupleMRConfig config) {
    SerializationInfo serInfo = config.getSerializationInfo();
    boolean multipleSources = config.getNumIntermediateSchemas() >= 2;
    CachedTuples r = new CachedTuples();
    r.commonTuple = new Tuple(serInfo.getCommonSchema());
    for(Schema sourceSchema : config.getIntermediateSchemas()) {
      r.resultTuples.add(new Tuple(sourceSchema));
    }

    if(multipleSources) {
      for(Schema specificSchema : serInfo.getSpecificSchemas()) {
        r.specificTuples.add(new Tuple(specificSchema));
      }
    }
    return r;
  }
View Full Code Here

  /*
   * Read the Tuples from a TupleOutput using TupleInputReader.
   */
  public static void readTuples(Path file, Configuration conf, TupleVisitor iterator) throws IOException, InterruptedException {
    TupleFile.Reader reader = new TupleFile.Reader(FileSystem.get(file.toUri(), conf), conf, file);
    Tuple tuple = new Tuple(reader.getSchema());
    while(reader.next(tuple)) {
      iterator.onTuple(tuple);
    }
    reader.close();
  }
View Full Code Here

  }

  @Override
  public ITuple deserialize(ITuple tuple) throws IOException {
    if(tuple == null) {
      tuple = new Tuple(targetSchema);
    }
    readFields(tuple, deserializers);
    return tuple;
  }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Tuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.