Package org.apache.crunch.impl.mr

Examples of org.apache.crunch.impl.mr.MRPipeline


  @Rule
  public TemporaryPath tmpDir = TemporaryPaths.create();
 
  @Test
  public void testMR() throws Exception {
    run(new MRPipeline(LongPipelinePlannerIT.class, tmpDir.getDefaultConfiguration()),
        tmpDir.copyResourceFileName("shakes.txt"),
        tmpDir.getFileName("output"));
  }
View Full Code Here


  // total number of documents, should calculate
  protected static final double N = 2;

  @Test
  public void testWritablesSingleRun() throws IOException {
    run(new MRPipeline(TfIdfIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), true);
  }
View Full Code Here

    run(new MRPipeline(TfIdfIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), true);
  }

  @Test
  public void testWritablesMultiRun() throws IOException {
    run(new MRPipeline(TfIdfIT.class, tmpDir.getDefaultConfiguration()), WritableTypeFamily.getInstance(), false);
  }
View Full Code Here

    jos.closeEntry();
  }

  @Test
  public void testWordCount() throws IOException {
    run(new MRPipeline(WordCountHBaseIT.class, hbaseTestUtil.getConfiguration()));
  }
View Full Code Here

      pipeline.done();

      //verify HBaseTarget supports deletes.
      Scan clearScan = new Scan();
      clearScan.addFamily(COUNTS_COLFAM);
      pipeline = new MRPipeline(WordCountHBaseIT.class, hbaseTestUtil.getConfiguration());
      HBaseSourceTarget clearSource = new HBaseSourceTarget(outputTableName, clearScan);
      PTable<ImmutableBytesWritable, Result> counts = pipeline.read(clearSource);
      pipeline.write(clearCounts(counts), new HBaseTarget(outputTableName));
      pipeline.done();
     
View Full Code Here

  public TemporaryPath tmpDir = TemporaryPaths.create();
 
  @Test
  public void testCheckpoints() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("shakes.txt");
    Pipeline p = new MRPipeline(CheckpointIT.class);
    String inter = tmpDir.getFileName("intermediate");
    PipelineResult one = run(p, tmpDir, inputPath, inter, false);
    assertTrue(one.succeeded());
    assertEquals(2, one.getStageResults().size());
    PipelineResult two = run(p, tmpDir, inputPath, inter, false);
View Full Code Here

  }
 
  @Test
  public void testUnsuccessfulCheckpoint() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("shakes.txt");
    Pipeline p = new MRPipeline(CheckpointIT.class);
    String inter = tmpDir.getFileName("intermediate");
    PipelineResult one = run(p, tmpDir, inputPath, inter, true);
    assertFalse(one.succeeded());
    PipelineResult two = run(p, tmpDir, inputPath, inter, false);
    assertTrue(two.succeeded());
View Full Code Here

  }
 
  @Test
  public void testModifiedFileCheckpoint() throws Exception {
    String inputPath = tmpDir.copyResourceFileName("shakes.txt");
    Pipeline p = new MRPipeline(CheckpointIT.class);
    Path inter = tmpDir.getPath("intermediate");
    PipelineResult one = run(p, tmpDir, inputPath, inter.toString(), false);
    assertTrue(one.succeeded());
    assertEquals(2, one.getStageResults().size());
    // Update the input path
View Full Code Here

    List<KeyValue> kvs = generateKeyValues(100);
    Path inputPath = tmpDir.getPath("in");
    Path outputPath = tmpDir.getPath("out");
    writeKeyValuesToHFile(inputPath, kvs);

    Pipeline pipeline = new MRPipeline(HFileSourceIT.class, conf);
    PCollection<KeyValue> in = pipeline.read(FromHBase.hfile(inputPath));
    PCollection<String> texts = in.parallelDo(new MapFn<KeyValue, String>() {
      @Override
      public String map(KeyValue input) {
        return input.toString();
      }
    }, strings());
    texts.write(To.textFile(outputPath));
    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());

    List<String> lines = FileUtils.readLines(new File(outputPath.toString(), "part-m-00000"));
    assertEquals(kvs.size(), lines.size());
    for (int i = 0; i < kvs.size(); i++) {
View Full Code Here

  private List<Result> doTestScanHFiles(List<KeyValue> kvs, Scan scan) throws IOException {
    Path inputPath = tmpDir.getPath("in");
    writeKeyValuesToHFile(inputPath, kvs);

    Pipeline pipeline = new MRPipeline(HFileSourceIT.class, conf);
    PCollection<Result> results = HFileUtils.scanHFiles(pipeline, inputPath, scan);
    return ImmutableList.copyOf(results.materialize());
  }
View Full Code Here

TOP

Related Classes of org.apache.crunch.impl.mr.MRPipeline

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.