Package com.datasalt.pangool.tuplemr

Examples of com.datasalt.pangool.tuplemr.OrderBy


    TupleMRBuilder mr = new TupleMRBuilder(conf, "AvroTweetsJoin");
    mr.addIntermediateSchema(getPangoolTweetSchema());
    mr.addIntermediateSchema(getPangoolRetweetSchema());
    mr.setGroupByFields("tweet_id");
    mr.setOrderBy(new OrderBy().add("tweet_id", Order.ASC).addSchemaOrder(Order.ASC));
    mr.setSpecificOrderBy("retweet", new OrderBy().add("username", Order.ASC));

    mr.addInput(tweetsPath, new AvroInputFormat<Record>(getAvroTweetSchema()), new TweetsMapper());
    mr.addInput(retweetsPath, new HadoopInputFormat(TextInputFormat.class), new RetweetsMapper());
    mr.setOutput(outputPath, new AvroOutputFormat<Record>(getAvroOutputSchema()), AvroWrapper.class,
        NullWritable.class);
View Full Code Here


    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Url Resolution");
    mr.addIntermediateSchema(getURLMapSchema());
    mr.addIntermediateSchema(getURLRegisterSchema());
    mr.setFieldAliases("urlMap", new Aliases().add("url", "nonCanonicalUrl"));
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).addSchemaOrder(Order.ASC));
    mr.setSpecificOrderBy("urlRegister", new OrderBy().add("timestamp", Order.ASC));
    mr.setTupleReducer(new Handler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    mr.addInput(new Path(input1), new HadoopInputFormat(TextInputFormat.class), new UrlMapProcessor());
    mr.addInput(new Path(input2), new HadoopInputFormat(TextInputFormat.class), new UrlProcessor());
View Full Code Here

    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).add("date", Order.ASC));
    // Input / output and such
    mr.setTupleReducer(new MovingAverageHandler(nDaysAverage));
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new URLVisitsProcessor());
View Full Code Here

    TupleMRBuilder builder = new TupleMRBuilder(getConf());
    Schema baseSchema = new Schema("schema", Fields.parse("name:string, money:int, country:string"));
    builder.addIntermediateSchema(baseSchema);
    builder.setGroupByFields("country");
    builder.setOrderBy(new OrderBy().add("country", Order.ASC).add("money", Order.DESC)
        .add("name", Order.ASC));
    builder.addInput(new Path(INPUT), new HadoopInputFormat(TextInputFormat.class),
        new MyInputProcessor());
    builder.setTupleReducer(new MyGroupHandler());
    builder.setOutput(new Path(OUTPUT), new HadoopOutputFormat(SequenceFileOutputFormat.class),
View Full Code Here

    Schema originalSchema = new Schema("schema", Fields.parse("title:string, content:string"));

    TupleMRBuilder builder = new TupleMRBuilder(conf);
    builder.addIntermediateSchema(originalSchema);
    builder.setGroupByFields("title");
    builder.setOrderBy(new OrderBy().add("title", Order.ASC).add("content", Order.ASC));

    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setTupleOutput(outPath, originalSchema);
    builder.addInput(inPath, new HadoopInputFormat(TextInputFormat.class), new MyInputProcessor());
View Full Code Here

    cg.setJarByClass(TestCombiner.class);
    cg.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Split());
    cg.setOutput(new Path(output), new HadoopOutputFormat(SequenceFileOutputFormat.class), Utf8.class,
        IntWritable.class);
    cg.setGroupByFields("word");
    cg.setOrderBy(new OrderBy().add("word", Order.ASC));
    cg.setTupleReducer(new Count());
    cg.setTupleCombiner(new CountCombiner());

    return cg;
  }
View Full Code Here

        schema = decorateWithNullables(schema);
      }
      b.addIntermediateSchema(schema);
    }
    b.setGroupByFields("boolean_field", "int_field");
    b.setOrderBy(new OrderBy().add("boolean_field", Order.ASC).add("int_field", Order.DESC).addSchemaOrder(Order.DESC));
    b.setSpecificOrderBy("schema1", new OrderBy().add("string_field", Order.DESC));
    b.setSpecificOrderBy("schema2", new OrderBy().add("long_field", Order.ASC));
    return b.buildConf();
  }
View Full Code Here

    Path outputPath = new Path(output);

    TupleMRBuilder builder = new TupleMRBuilder(getConf());
    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("country", "age", "name");
    builder.setOrderBy(new OrderBy().add("country", Order.ASC).add("age", Order.ASC)
        .add("name", Order.ASC).add("height", Order.DESC));
    builder.setRollupFrom("country");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
View Full Code Here

    Path outputPath = new Path(output);

    TupleMRBuilder builder = new TupleMRBuilder(getConf());
    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("age", "name", "country");
    builder.setOrderBy(new OrderBy().add("country", Order.ASC).add("age", Order.ASC)
        .add("name", Order.ASC).add("height", Order.DESC));
    builder.setRollupFrom("age");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
View Full Code Here

    Path outputPath = new Path(output);

    TupleMRBuilder builder = new TupleMRBuilder(getConf());
    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("age", "name", "country");
    builder.setOrderBy(new OrderBy().add("country", Order.ASC).add("age", Order.ASC)
        .add("name", Order.ASC));
    builder.setRollupFrom("age");
    builder.setTupleReducer(new IdentityRed());
    builder.setOutput(outputPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class,
        Text.class);
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.OrderBy

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.