Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Tuple


 
  /**
   * Returns a Tuple conforming to a simple schema: {@link #SCHEMA}.
   */
  public static ITuple getTuple(String id, int value) {
    ITuple tuple = new Tuple(SCHEMA);
    tuple.set("id", id);
    tuple.set("value", value);
    return tuple;
  }
View Full Code Here


    builder.addIntermediateSchema(NullableSchema.nullableSchema(metaSchema1));

    builder.addInput(new Path(INPUT1), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tupleInTuple1 = new Tuple(metaSchema1);

          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            tupleInTuple1.set("a", split[0]);
            tupleInTuple1.set("b", Integer.parseInt(split[1]));
            tupleInTuple1.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, 0);
            collector.write(tupleInTuple1);
          }
        });

    TableSpec table1 = new TableSpec(tupleSchema1, tupleSchema1.getField(0));
View Full Code Here

  // The resulting Table Tuple
  private ITuple tuple;

  public PageCountsRecordProcessor(Schema pageCountsSchema, String date, String hour) {
    this.tuple = new Tuple(pageCountsSchema);
    this.tuple.set("date", date);
    this.tuple.set("hour", hour);
  }
View Full Code Here

        final RecordProcessor recordProcessor = inputFile.getRecordProcessor();

        for (Path path : inputFile.getPaths()) {
          builder.addInput(path, inputFile.getFormat(), new TupleMapper<ITuple, NullWritable>() {

            Tuple tableTuple = new Tuple(tableSchema);
            JavascriptEngine jsEngine = null;
            CounterInterface counterInterface = null;

            @Override
            public void map(ITuple fileTuple, NullWritable value, TupleMRContext context,
                            Collector collector) throws IOException, InterruptedException {

              if (counterInterface == null) {
                counterInterface = new CounterInterface(context.getHadoopContext());
              }

              // Initialize JavaScript engine if needed
              if (jsEngine == null && tableSpec.getPartitionByJavaScript() != null) {
                try {
                  jsEngine = new JavascriptEngine(tableSpec.getPartitionByJavaScript());
                } catch (Throwable e) {
                  throw new RuntimeException(e);
                }
              }

              // For each input Tuple from this File execute the RecordProcessor
              // The Default IdentityRecordProcessor just bypasses the same Tuple
              ITuple processedTuple = null;
              try {
                processedTuple = recordProcessor.process(fileTuple, counterInterface);
              } catch (Throwable e1) {
                throw new RuntimeException(e1);
              }
              if (processedTuple == null) {
                // The tuple has been filtered out by the user
                return;
              }

              // Get the partition Id from this record
              String strKey = "";
              try {
                strKey = getPartitionByKey(processedTuple, tableSpec, jsEngine);
              } catch (Throwable e) {
                throw new RuntimeException(e);
              }
              int shardId = partitionMap.findPartition(strKey);
              if (shardId == -1) {
                throw new RuntimeException(
                    "shard id = -1 must be some sort of software bug. This shouldn't happen if PartitionMap is complete.");
              }

              // Finally write it to the Hadoop output
              for (Field field : processedTuple.getSchema().getFields()) {
                tableTuple.set(field.getName(), processedTuple.get(field.getName()));
              }
              tableTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, shardId);
              collector.write(tableTuple);
            }
          }, inputFile.getSpecificHadoopInputFormatContext());
        }
      }
      tableSpecs.add(table.getTableSpec());
    }

    // We do the same for the replicated tables but the Mapper logic will be different
    // We will send the data to all the partitions
    for (final Table table : tablespace.getReplicateAllTables()) {
      List<Field> fields = new ArrayList<Field>();
      fields.addAll(table.getTableSpec().getSchema().getFields());
      fields.add(SploutSQLOutputFormat.getPartitionField());
      final Schema tableSchema = new Schema(table.getTableSpec().getSchema().getName(), fields);
      schemaCounter++;
      builder.addIntermediateSchema(NullableSchema.nullableSchema(tableSchema));
      // For each input file for the Table we add an input and a TupleMapper
      for (TableInput inputFile : table.getFiles()) {

        final RecordProcessor recordProcessor = inputFile.getRecordProcessor();

        for (Path path : inputFile.getPaths()) {
          builder.addInput(path, inputFile.getFormat(), new TupleMapper<ITuple, NullWritable>() {

            Tuple tableTuple = new Tuple(tableSchema);
            CounterInterface counterInterface = null;

            @Override
            public void map(ITuple key, NullWritable value, TupleMRContext context, Collector collector)
                throws IOException, InterruptedException {

              if (counterInterface == null) {
                counterInterface = new CounterInterface(context.getHadoopContext());
              }

              // For each input Tuple from this File execute the RecordProcessor
              // The Default IdentityRecordProcessor just bypasses the same Tuple
              ITuple processedTuple = null;
              try {
                processedTuple = recordProcessor.process(key, counterInterface);
              } catch (Throwable e1) {
                throw new RuntimeException(e1);
              }
              if (processedTuple == null) {
                // The tuple has been filtered out by the user
                return;
              }

              // Finally write it to the Hadoop output
              for (Field field : processedTuple.getSchema().getFields()) {
                tableTuple.set(field.getName(), processedTuple.get(field.getName()));
              }

              // Send the data of the replicated table to all partitions!
              for (int i = 0; i < nPartitions; i++) {
                tableTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, i);
                collector.write(tableTuple);
              }
            }
          }, inputFile.getSpecificHadoopInputFormatContext());
        }
View Full Code Here

    withOutput(firstReducerOutput(OUTPUT + "/" + OUTPUT_1), new Text("Pere"), new Text("ES"));
    withOutput(firstMapOutput(OUTPUT + "/" + OUTPUT_1), new Text("Pere"), new Text("ES"));
    withOutput(firstReducerOutput(OUTPUT + "/" + OUTPUT_2), new IntWritable(100), NullWritable.get());
    withOutput(firstMapOutput(OUTPUT + "/" + OUTPUT_2), new IntWritable(100), NullWritable.get());

    Tuple tuple = new Tuple(baseSchema);
    tuple.set(0, "Pere");
    tuple.set(1, 100);
    tuple.set(2, "ES");

    withTupleOutput(firstMapOutput(OUTPUT + "/" + TUPLEOUTPUT_1), tuple);
    withTupleOutput(firstReducerOutput(OUTPUT + "/" + TUPLEOUTPUT_1), tuple);

    trash(INPUT, OUTPUT);
View Full Code Here

    private Tuple tuple;

    public void setup(TupleMRContext context, Collector collector) throws IOException,
        InterruptedException {
      tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
    }
View Full Code Here

  public void testSplits(long maxSplitSize, int generatedRows) throws IOException, InterruptedException, IllegalArgumentException, SecurityException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    logger.info("Testing maxSplitSize: " + maxSplitSize + " and generatedRows:" + generatedRows);
    FileSystem fS = FileSystem.get(getConf());
    Random r = new Random(1);
    Schema schema = new Schema("schema", Fields.parse("i:int,s:string"));
    ITuple tuple = new Tuple(schema);

    Path outPath = new Path(OUT);
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(getConf()), getConf(), outPath, schema);
    for(int i = 0; i < generatedRows; i++) {
      tuple.set("i", r.nextInt());
      tuple.set("s", r.nextLong() + "");
      writer.append(tuple);
    }
    writer.close();

    TupleInputFormat format = ReflectionUtils.newInstance(TupleInputFormat.class, getConf());
View Full Code Here

    @Override
    public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {
      if(tuple == null) {
        tuple = new Tuple(context.getTupleMRConfig().getIntermediateSchema(0));
      }
      tuple.set(0, "title");
      tuple.set(1, value);
      collector.write(tuple);
    }
View Full Code Here

  public static int assertOutput(String output, Configuration conf) throws NumberFormatException, IOException, InterruptedException {
    int validatedOutputLines = 0;

    Path outPath = new Path(output);
    TupleFile.Reader reader = new TupleFile.Reader(FileSystem.get(outPath.toUri(), conf), conf, outPath);
    Tuple tuple = new Tuple(reader.getSchema());

    while(reader.next(tuple)) {
      Record record = (Record)tuple.get("my_avro");
      int topicId = (Integer) record.get("topic");
      String word = (record.get("word")).toString();
      int count   = (Integer) record.get("count");
      if(topicId == 1) {
        if(word.equals("bar") || word.equals("foo")) {
View Full Code Here

    }

    Path toRead = new Path(out, "part-r-00000");
    assertTrue(fS.exists(toRead));
    TupleFile.Reader reader = new TupleFile.Reader(fS, conf, toRead);
    Tuple tuple = new Tuple(reader.getSchema());
     
    char base = 'a';
    for(int i = 0; i < 7; i++) {
      reader.next(tuple);
      assertEquals((char)(base + (char)i) + "", tuple.get("group").toString());
      assertEquals((char)(base + (char)i) + "", ((ITuple)(((ITuple)tuple.get("metatuple")).get("tuple"))).get("b").toString());
    }
   
    fS.delete(out, true);
  }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Tuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.