Examples of com.datasalt.pangool.tuplemr.mapred.lib.output.HadoopOutputFormat

Package com.datasalt.pangool.tuplemr.mapred.lib.output

Examples of com.datasalt.pangool.tuplemr.mapred.lib.output.HadoopOutputFormat

com.datasalt.pangool.tuplemr.mapred.lib.output.HadoopOutputFormat

      }
      // Add a named output for each category
      job.addNamedOutput(categoryString, new TupleSolrOutputFormat(new File(
          "src/test/resources/shakespeare-solr"), job.getConf()), ITuple.class, NullWritable.class);
    }
    job.setOutput(new Path(output), new HadoopOutputFormat(NullOutputFormat.class), ITuple.class,
        NullWritable.class);
    // The reducer will just emit the tuple to the corresponding Category output
    job.setTupleReducer(new TupleReducer<ITuple, NullWritable>() {


      ITuple outTuple = new Tuple(OUT_SCHEMA);

View Full Code Here

          }
        }
      };
    });


    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    try {
      job.createJob().waitForCompletion(true);
    } finally {
      job.cleanUpInstanceFiles();

View Full Code Here

    mr.setGroupByFields("intField", "strField");
    mr.setOrderBy(new OrderBy().add("intField", Order.ASC).add("strField", Order.ASC)
        .add("longField", Order.ASC));
    mr.setTupleReducer(new Handler());
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new IProcessor());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        DoubleWritable.class);


    try {
      mr.createJob().waitForCompletion(true);
    } finally {

View Full Code Here

    // --- This is the most important part (what makes it work with MongoDB: ---
    // Set the URL of the MongoDB we will write to. Here we specify the DB and the final Table.
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/test.qype");
    // Set the output format to HadoopOutputFormat(MongoOutputFormat.class)
    // The key will be the documentIds for the Mongo table and the value a Mongo BSONObject with all the properties we wish.
    builder.setOutput(new Path(outPath), new HadoopOutputFormat(MongoOutputFormat.class), Text.class,
        BSONObject.class);


    // Finally, build and execute the Pangool Job.
    try {
      builder.createJob().waitForCompletion(true);

View Full Code Here

    delete(output);
    
    init(conf, new Path(modelFolder));
    
    MapOnlyJobBuilder job = new MapOnlyJobBuilder(conf);
    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new MapOnlyMapper<LongWritable, Text, Text, NullWritable>() {
      protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException {
        value.set(value.toString() + "\t" + classify(value.toString()));
        context.write(value, NullWritable.get());
      }

View Full Code Here

    mr.setFieldAliases("urlMap", new Aliases().add("url", "nonCanonicalUrl"));
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).addSchemaOrder(Order.ASC));
    mr.setSpecificOrderBy("urlRegister", new OrderBy().add("timestamp", Order.ASC));
    mr.setTupleReducer(new Handler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    mr.addInput(new Path(input1), new HadoopInputFormat(TextInputFormat.class), new UrlMapProcessor());
    mr.addInput(new Path(input2), new HadoopInputFormat(TextInputFormat.class), new UrlProcessor());


    try {

View Full Code Here

    // --- This is the most important part (what makes it work with MongoDB: ---
    // Set the URL of the MongoDB we will write to. Here we specify the DB and the final Table.
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/test.qype");
    // Set the output format to HadoopOutputFormat(MongoOutputFormat.class)
    // The key will be the documentIds for the Mongo table and the value a Mongo BSONObject with all the properties we wish.
    builder.setOutput(new Path(outPath), new HadoopOutputFormat(MongoOutputFormat.class), Text.class,
        BSONObject.class);


    // Finally, build and execute the Pangool Job.
    try {
      builder.createJob().waitForCompletion(true);

View Full Code Here

    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).add("date", Order.ASC));
    // Input / output and such
    mr.setTupleReducer(new MovingAverageHandler(nDaysAverage));
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    mr.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new URLVisitsProcessor());


    try {
      mr.createJob().waitForCompletion(true);

View Full Code Here

              InterruptedException {
            context.write(new Text(key.toString()), NullWritable.get());
          }
        });


    builder.setOutput(new Path(OUTPUT), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
        NullWritable.class);
    Job job = builder.createJob();
    try {
      assertRun(job);
    } finally {

View Full Code Here


    TupleMRBuilder cg = new TupleMRBuilder(conf);
    cg.addIntermediateSchema(new Schema("schema", fields));
    cg.setJarByClass(TestCombiner.class);
    cg.addInput(new Path(input), new HadoopInputFormat(SequenceFileInputFormat.class), new Split());
    cg.setOutput(new Path(output), new HadoopOutputFormat(SequenceFileOutputFormat.class), Utf8.class,
        IntWritable.class);
    cg.setGroupByFields("word");
    cg.setOrderBy(new OrderBy().add("word", Order.ASC));
    cg.setTupleReducer(new Count());
    cg.setTupleCombiner(new CountCombiner());

View Full Code Here

0 1 2 3 4

TOP

Related Classes of com.datasalt.pangool.tuplemr.mapred.lib.output.HadoopOutputFormat

com.datasalt.pangool.examples.gameoflife.GameOfLifeJob

com.datasalt.pangool.examples.Grep

com.datasalt.pangool.examples.HCatalogIntegrationTest

com.datasalt.pangool.examples.mongo.QypeScrapper

com.datasalt.pangool.examples.movingaverage.MovingAverage

com.datasalt.pangool.examples.naivebayes.NaiveBayesClassifier

com.datasalt.pangool.examples.secondarysort.SecondarySort

com.datasalt.pangool.examples.simplesecondarysort.SimpleSecondarySort

com.datasalt.pangool.examples.solr.MultiShakespeareIndexer

com.datasalt.pangool.examples.topnhashtags.TopNHashTags

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.