Package com.datasalt.pangool.tuplemr.mapred.lib.input

Examples of com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat


    delete(output);

    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Url Resolution");
    mr.addIntermediateSchema(getURLMapSchema());
    mr.addIntermediateSchema(getURLRegisterSchema());
    mr.addInput(new Path(input1), new TupleTextInputFormat(getURLMapSchema(), false, false, '\t',
        NO_QUOTE_CHARACTER, NO_ESCAPE_CHARACTER, null, null), new IdentityTupleMapper());
    mr.addInput(new Path(input2), new TupleTextInputFormat(getURLRegisterSchema(), false, false, '\t',
        NO_QUOTE_CHARACTER, NO_ESCAPE_CHARACTER, null, null), new IdentityTupleMapper());
    mr.setFieldAliases("urlMap", new Aliases().add("url", "nonCanonicalUrl"));
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).addSchemaOrder(Order.ASC));
    mr.setSpecificOrderBy("urlRegister", new OrderBy().add("timestamp", Order.ASC));
View Full Code Here


    delete(output);

    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Url Resolution");
    mr.addIntermediateSchema(getURLMapSchema());
    mr.addIntermediateSchema(getURLRegisterSchema());
    mr.addInput(new Path(input1), new TupleTextInputFormat(getURLMapSchema(), false, false, '\t',
        NO_QUOTE_CHARACTER, NO_ESCAPE_CHARACTER, null, null), new IdentityTupleMapper());
    mr.addInput(new Path(input2), new TupleTextInputFormat(getURLRegisterSchema(), false, false, '\t',
        NO_QUOTE_CHARACTER, NO_ESCAPE_CHARACTER, null, null), new IdentityTupleMapper());
    mr.setFieldAliases("urlMap", new Aliases().add("url", "nonCanonicalUrl"));
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).addSchemaOrder(Order.ASC));
    mr.setSpecificOrderBy("urlRegister", new OrderBy().add("timestamp", Order.ASC));
View Full Code Here

    this.hadoopConf = hadoopConf;
  }

  public TableBuilder addFixedWidthTextFile(Path path, Schema schema, int[] fields, boolean hasHeader,
      String nullString, RecordProcessor recordProcessor) {
    addFile(new TableInput(new TupleTextInputFormat(schema, fields, hasHeader, nullString),
        new HashMap<String, String>(), schema, (recordProcessor == null) ? new IdentityRecordProcessor()
            : recordProcessor, path));
    return this;
  }
View Full Code Here

  public TableBuilder addCSVTextFile(Path path, char separator, char quoteCharacter,
      char escapeCharacter, boolean hasHeader, boolean strictQuotes, String nullString,
      Schema fileSchema, RecordProcessor recordProcessor) {
    return addFile(new TableInput(
        new TupleTextInputFormat(fileSchema, hasHeader, strictQuotes, separator, quoteCharacter,
            escapeCharacter, TupleTextInputFormat.FieldSelector.NONE, nullString),
        new HashMap<String, String>(), fileSchema, recordProcessor, path));
  }
View Full Code Here

      writer.write("str1" + " " + "str2" + " " + "30" + " " + "4000" + "\n");
    }
    writer.close();

    Schema schema = new Schema("schema", Fields.parse("a:string, b:string, c:int, d:long"));
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ',
        TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);

    Configuration conf = getConf();
    conf.setLong("mapred.min.split.size", 10 * 1024);
 
View Full Code Here

  }

  @Test
  public void testInputCompression() throws Exception {
    Schema schema = new Schema("schema", Fields.parse("a:string, b:string, c:int, d:long"));
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ' ',
        TupleTextInputFormat.NO_QUOTE_CHARACTER, TupleTextInputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);

    Configuration conf = getConf();
    FileSystem fS = FileSystem.get(conf);
View Full Code Here

    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("strField1"); // but we don't care, really
    /*
     * Define the Input Format and the Output Format!
     */
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, '\t',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, '\t',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

View Full Code Here

    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("id"); // but we don't care, really
    /*
     * Define the Input Format and the Output Format!
     */
    InputFormat inputFormat = new TupleTextInputFormat(schema, false, false, ',', '"', '\\',
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ',', '"', '\\');

    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
View Full Code Here

    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("strField1"); // but we don't care, really
    /*
     * Define the Input Format and the Output Format!
     */
    InputFormat inputFormat = new TupleTextInputFormat(schema, true, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER,
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, true, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);

View Full Code Here

    Schema schema = new Schema("schema",
        Fields.parse("name:string,name2:string,age:int,name3:string,emptystring:string"));

    MapOnlyJobBuilder mO = new MapOnlyJobBuilder(conf);
    mO.addInput(inPath, new TupleTextInputFormat(schema, false, true, ',', '"', '\\',
        FieldSelector.NONE, TupleTextInputFormat.NO_NULL_STRING),
        new MapOnlyMapper<ITuple, NullWritable, ITuple, NullWritable>() {

          protected void map(ITuple key, NullWritable value, Context context,
              MultipleOutputsCollector collector) throws IOException, InterruptedException {
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.