Package com.splout.db.hadoop.engine

Source Code of com.splout.db.hadoop.engine.SploutSQLOutputFormatTester

package com.splout.db.hadoop.engine;

/*
* #%L
* Splout SQL Hadoop library
* %%
* Copyright (C) 2012 - 2014 Datasalt Systems S.L.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.junit.AfterClass;
import org.junit.BeforeClass;

import com.datasalt.pangool.io.Fields;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Tuple;
import com.datasalt.pangool.tuplemr.IdentityTupleReducer;
import com.datasalt.pangool.tuplemr.TupleMRBuilder;
import com.datasalt.pangool.tuplemr.TupleMapper;
import com.datasalt.pangool.tuplemr.mapred.lib.input.HadoopInputFormat;
import com.splout.db.engine.OutputFormatFactory;
import com.splout.db.engine.SploutEngine;
import com.splout.db.hadoop.NullableSchema;
import com.splout.db.hadoop.TableSpec;

/**
* A very simple base code for testing {@link SploutSQLOutputFormat} classes.
* To be reused or to be used as guide for other tests.
*/
@SuppressWarnings("serial")
public class SploutSQLOutputFormatTester implements Serializable {

  public final static String INPUT1 = "in1-" + SploutSQLOutputFormatTester.class.getName();
  public final static String OUTPUT = "out-" + SploutSQLOutputFormatTester.class.getName();

  final Schema tupleSchema1 = new Schema("schema1", Fields.parse("a:string, b:int"));

  public Schema getTupleSchema1() {
    return tupleSchema1;
  }
 
  /**
   * Executes a MapReduce Job that uses the outputformat of the given Engine and writes some foo data.
   * The data written has two fields: a string field ("a") and an int field ("b").
   * <p>
   * The data is partitioned by the string field ("a").
   * <p>
   * The table name is "schema1". The values in column "a" go from "foo1" to "foo6".
   * The values in column "b" are 30, 20, 140, 110, 220 and 260.
   */
  protected void runTest(SploutEngine engine) throws Exception {
    // Prepare input
    BufferedWriter writer;

    writer = new BufferedWriter(new FileWriter(INPUT1));
    writer.write("foo1" + "\t" + "30" + "\n");
    writer.write("foo2" + "\t" + "20" + "\n");
    writer.write("foo3" + "\t" + "140" + "\n");
    writer.write("foo4" + "\t" + "110" + "\n");
    writer.write("foo5" + "\t" + "220" + "\n");
    writer.write("foo6" + "\t" + "260" + "\n");
    writer.close();

    List<Field> fields = new ArrayList<Field>();
    fields.addAll(tupleSchema1.getFields());
    fields.add(Field.create(SQLite4JavaOutputFormat.PARTITION_TUPLE_FIELD, Schema.Field.Type.INT));
    final Schema metaSchema1 = new Schema("schema1", fields);

    TupleMRBuilder builder = new TupleMRBuilder(new Configuration());
    builder.addIntermediateSchema(NullableSchema.nullableSchema(metaSchema1));

    builder.addInput(new Path(INPUT1), new HadoopInputFormat(TextInputFormat.class),
        new TupleMapper<LongWritable, Text>() {

          ITuple tupleInTuple1 = new Tuple(metaSchema1);

          @Override
          public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
              throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            tupleInTuple1.set("a", split[0]);
            tupleInTuple1.set("b", Integer.parseInt(split[1]));
            tupleInTuple1.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, 0);
            collector.write(tupleInTuple1);
          }
        });

    TableSpec table1 = new TableSpec(tupleSchema1, tupleSchema1.getField(0));
      
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setGroupByFields(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
    builder.setOutput(new Path(OUTPUT), OutputFormatFactory.getOutputFormat(engine, 10000, new TableSpec[] { table1 }),
        ITuple.class, NullWritable.class);

    Job job = builder.createJob();
    try {
      job.waitForCompletion(true);
    } finally {
      builder.cleanUpInstanceFiles();
    }
  }
 
  @AfterClass
  @BeforeClass
  public static void cleanup() throws IOException, InterruptedException {
    Runtime.getRuntime().exec("rm -rf " + INPUT1).waitFor();
    Runtime.getRuntime().exec("rm -rf " + OUTPUT).waitFor();
 
}
TOP

Related Classes of com.splout.db.hadoop.engine.SploutSQLOutputFormatTester

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.