Package org.apache.crunch.impl.mr

Examples of org.apache.crunch.impl.mr.MRPipeline.run()


    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    PCollection<Person> retrievedPeople = pipeline.read(new TrevniKeySource<Person>(
        new Path(outputFile.toURI()), Avros.records(Person.class)));
View Full Code Here


      public String map(KeyValue input) {
        return input.toString();
      }
    }, strings());
    texts.write(To.textFile(outputPath));
    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());

    List<String> lines = FileUtils.readLines(new File(outputPath.toString(), "part-m-00000"));
    assertEquals(kvs.size(), lines.size());
    for (int i = 0; i < kvs.size(); i++) {
View Full Code Here

    // The aggregate method groups a collection into a single PObject.
    PObject<Long> totalCount = numberOfWords.aggregate(Aggregators.SUM_LONGS()).first();

    // Execute the pipeline as a MapReduce.
    PipelineResult result = pipeline.run();

    System.out.println("Total number of words: " + totalCount.getValue());
   
    pipeline.done();
View Full Code Here

    PCollection<String> words = split(shakespeare, "\\s+");
    PTable<String,Long> wordCounts = words.count();
    PCollection<KeyValue> wordCountKeyValues = convertToKeyValues(wordCounts);
    pipeline.write(wordCountKeyValues, ToHBase.hfile(outputPath));

    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());

    FileSystem fs = FileSystem.get(conf);
    KeyValue kv = readFromHFiles(fs, outputPath, "and");
    assertEquals(427L, Bytes.toLong(kv.getValue()));
View Full Code Here

    HFileUtils.writePutsToHFilesForIncrementalLoad(
        wordCountPuts,
        testTable,
        outputPath);

    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());

    new LoadIncrementalHFiles(HBASE_TEST_UTILITY.getConfiguration())
        .doBulkLoad(outputPath, testTable);
View Full Code Here

    HFileUtils.writePutsToHFilesForIncrementalLoad(
        convertToPuts(longWordCounts),
        table2,
        outputPath2);

    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());
    loader.doBulkLoad(outputPath1, table1);
    loader.doBulkLoad(outputPath2, table2);

    assertEquals(396L, getWordCountFromTable(table1, "of"));
View Full Code Here

    HFileUtils.writePutsToHFilesForIncrementalLoad(
        wordCountPuts,
        testTable,
        outputPath);

    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());

    int hfilesCount = 0;
    FileSystem fs = outputPath.getFileSystem(conf);
    for (FileStatus e : fs.listStatus(new Path(outputPath, Bytes.toString(TEST_FAMILY)))) {
View Full Code Here

  @Test(expected=CrunchRuntimeException.class)
  public void testTextToAvro() throws Exception {
    String shakes = tmpDir.copyResourceFileName("shakes.txt");
    Pipeline pipeline = new MRPipeline(TextToAvroIT.class, tmpDir.getDefaultConfiguration());
    pipeline.read(From.textFile(shakes)).write(To.avroFile("output"));
    pipeline.run();
  }
}
View Full Code Here

    Pipeline mrPipeline = new MRPipeline(getClass());
    PCollection<String> mrPColl = mrPipeline.readTextFile(infilename);
    Target mrTarget = new TextFileTarget(mrOutFilename);
    mrPipeline.write(mrPColl, mrTarget, WriteMode.OVERWRITE);
    mrPipeline.run();
    String actualMrText = Files.readFirstLine(new File(mrOutFilename + "/part-m-00000"), Charsets.UTF_8);

    Assert.assertEquals("MR file mismatch", expected, actualMrText);
    Assert.assertEquals("Mem file mismatch", expected, actualMemText);
  }
View Full Code Here

    // for map side join
    final MapsideJoinStrategy<Text, Text, Text> mapSideJoinStrategy = new MapsideJoinStrategy<Text, Text, Text>();

    final PTable<Text, Pair<Text, Text>> updatedJoinedRows = mapSideJoinStrategy.join(convertedValues1, convertedValues2, JoinType.INNER_JOIN);
    pipeline.run();

    // Join should have 2 results
    // Join should have contentBytes1 and contentBytes2
    assertEquals(4, updatedJoinedRows.materializeToMap().size());
  }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.