Package com.datasalt.pangool.tuplemr.mapred.lib.input

Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat


    HadoopUtils.deleteIfExists(fS, outPath);

    Schema schema = new Schema("schema", Fields.parse("n1:int,n2:long,n3:float,n4:double"));

    MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf);
    mO.addInput(inPath, new TupleTextInputFormat(schema, false, true, ',', '"', '\\',
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING),
        new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

          protected void map(ITuple key, NullWritable value, Context context,
              MultipleOutputsCollector collector) throws IOException, InterruptedException {
View Full Code Here


        + "population:int," + "life_expectancy:double," + "gnp:double," + "gnp_old:double,"
        + "local_name:string," + "government_form:string," + "head_of_state:string," + "capital:int,"
        + "code2:string"));

    MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf);
    mO.addInput(inPath, new TupleTextInputFormat(schema, false, false, ',', '"', '\\',
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING),
        new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

          protected void map(ITuple key, NullWritable value, Context context,
              MultipleOutputsCollector collector) throws IOException, InterruptedException {
View Full Code Here

    TupleMRBuilder builder = new TupleMRBuilder(conf);
    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("floatField"); // but we don't care, really
    // Define the Input Format and the Output Format!
    // Add the selector to the input format
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER, selector,
        TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);
View Full Code Here

    builder.setGroupByFields("plugin"); // but we don't care, really
    /*
     * Define the Input Format and the Output Format!
     */

    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ',', '"', '\\', null, null);
    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setTupleOutput(outPath, schema);
    Job job = builder.createJob();
    try {
View Full Code Here

    builder.setGroupByFields("strField1"); // but we don't care, really
    /*
     * Define the Input Format and the Output Format!
     */

    InputFormat inputFormat = new TupleTextInputFormat(schema, fieldsPos, false, null);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
View Full Code Here

    builder.setGroupByFields("strField1"); // but we don't care, really
    /*
     * Define the Input Format and the Output Format!
     */

    InputFormat inputFormat = new TupleTextInputFormat(schema, fieldsPos, false, null);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
View Full Code Here

    HadoopUtils.deleteIfExists(fS, outPath);

    Schema schema = new Schema("schema", Fields.parse("name:string,name2:string"));

    MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf);
    mO.addInput(inPath, new TupleTextInputFormat(schema, fieldsPos, false, "-"),
        new MapOnlyMapper<ITuple, NullWritable, NullWritable, NullWritable>() {

          protected void map(ITuple key, NullWritable value, Context context,
              MultipleOutputsCollector collector) throws IOException, InterruptedException {
View Full Code Here

      Schema schema = new Schema("sch", Fields.parse(pangoolSchema));
      Path inputP = new Path(inputPath);

      // Use Pangool API - parse CSV, etc
      TupleMRBuilder builder = new TupleMRBuilder(conf);
      TupleTextInputFormat parsingInputFormat = new TupleTextInputFormat(schema, skipHeading, false,
          separator.charAt(0), quotes.charAt(0), escape.charAt(0), FieldSelector.NONE, null);
      TupleTextOutputFormat outputFormat = new TupleTextOutputFormat(schema, false, separator.charAt(0),
          quotes.charAt(0), escape.charAt(0));

      builder.addIntermediateSchema(schema);
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.