Package com.datasalt.pangool.tuplemr.mapred.lib.output

Examples of com.datasalt.pangool.tuplemr.mapred.lib.output.HadoopOutputFormat


      }
      // Add a named output for each category
      job.addNamedOutput(categoryString, new TupleSolrOutputFormat(new File(
          "src/test/resources/shakespeare-solr"), job.getConf()), ITuple.class, NullWritable.class);
    }
    job.setOutput(new Path(output), new HadoopOutputFormat(NullOutputFormat.class), ITuple.class,
        NullWritable.class);
    // The reducer will just emit the tuple to the corresponding Category output
    job.setTupleReducer(new TupleReducer<ITuple, NullWritable>() {

      ITuple outTuple = new Tuple(OUT_SCHEMA);
View Full Code Here


          }
        }
      };
    });

    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    try {
      job.createJob().waitForCompletion(true);
    } finally {
      job.cleanUpInstanceFiles();
View Full Code Here

    mr.setGroupByFields("intField", "strField");
    mr.setOrderBy(new OrderBy().add("intField", Order.ASC).add("strField", Order.ASC)
        .add("longField", Order.ASC));
    mr.setTupleReducer(new Handler());
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new IProcessor());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        DoubleWritable.class);

    try {
      mr.createJob().waitForCompletion(true);
    } finally {
View Full Code Here

    // --- This is the most important part (what makes it work with MongoDB: ---
    // Set the URL of the MongoDB we will write to. Here we specify the DB and the final Table.
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/test.qype");
    // Set the output format to HadoopOutputFormat(MongoOutputFormat.class)
    // The key will be the documentIds for the Mongo table and the value a Mongo BSONObject with all the properties we wish.
    builder.setOutput(new Path(outPath), new HadoopOutputFormat(MongoOutputFormat.class), Text.class,
        BSONObject.class);

    // Finally, build and execute the Pangool Job.
    try {
      builder.createJob().waitForCompletion(true);
View Full Code Here

    delete(output);
   
    init(conf, new Path(modelFolder));
   
    MapOnlyJobBuilder job = new MapOnlyJobBuilder(conf);
    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new MapOnlyMapper<LongWritable, Text, Text, NullWritable>() {
      protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException {
        value.set(value.toString() + "\t" + classify(value.toString()));
        context.write(value, NullWritable.get());
      }
View Full Code Here

    mr.setFieldAliases("urlMap", new Aliases().add("url", "nonCanonicalUrl"));
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).addSchemaOrder(Order.ASC));
    mr.setSpecificOrderBy("urlRegister", new OrderBy().add("timestamp", Order.ASC));
    mr.setTupleReducer(new Handler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    mr.addInput(new Path(input1), new HadoopInputFormat(TextInputFormat.class), new UrlMapProcessor());
    mr.addInput(new Path(input2), new HadoopInputFormat(TextInputFormat.class), new UrlProcessor());

    try {
View Full Code Here

    // --- This is the most important part (what makes it work with MongoDB: ---
    // Set the URL of the MongoDB we will write to. Here we specify the DB and the final Table.
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/test.qype");
    // Set the output format to HadoopOutputFormat(MongoOutputFormat.class)
    // The key will be the documentIds for the Mongo table and the value a Mongo BSONObject with all the properties we wish.
    builder.setOutput(new Path(outPath), new HadoopOutputFormat(MongoOutputFormat.class), Text.class,
        BSONObject.class);

    // Finally, build and execute the Pangool Job.
    try {
      builder.createJob().waitForCompletion(true);
View Full Code Here

    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).add("date", Order.ASC));
    // Input / output and such
    mr.setTupleReducer(new MovingAverageHandler(nDaysAverage));
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new URLVisitsProcessor());

    try {
      mr.createJob().waitForCompletion(true);
View Full Code Here

              InterruptedException {
            context.write(new Text(key.toString()), NullWritable.get());
          }
        });

    builder.setOutput(new Path(OUTPUT), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    Job job = builder.createJob();
    try {
      assertRun(job);
    } finally {
View Full Code Here

    TupleMRBuilder cg = new TupleMRBuilder(conf);
    cg.addIntermediateSchema(new Schema("schema", fields));
    cg.setJarByClass(TestCombiner.class);
    cg.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Split());
    cg.setOutput(new Path(output), new HadoopOutputFormat(SequenceFileOutputFormat.class), Utf8.class,
        IntWritable.class);
    cg.setGroupByFields("word");
    cg.setOrderBy(new OrderBy().add("word", Order.ASC));
    cg.setTupleReducer(new Count());
    cg.setTupleCombiner(new CountCombiner());
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.output.HadoopOutputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.