Package org.apache.crunch.impl.mr

Examples of org.apache.crunch.impl.mr.MRPipeline$StringifyFn


    return new DoCollectionImpl<T>(name, getChainingCollection(), fn, type, options);
  }
 
  @Override
  public <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> fn, PTableType<K, V> type) {
    MRPipeline pipeline = (MRPipeline) getPipeline();
    return parallelDo("S" + pipeline.getNextAnonymousStageId(), fn, type);
  }
View Full Code Here


  }
 
  public PipelineExecution run() throws IOException {
    String shakes = tmpDir.copyResourceFileName("shakes.txt");
    String out = tmpDir.getFileName("cancel");
    Pipeline p = new MRPipeline(CancelJobsIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> words = p.readTextFile(shakes);
    p.write(words.count().top(20), To.textFile(out));
    return p.runAsync(); // need to hack to slow down job start up if this test becomes flaky.
  }
View Full Code Here

    MemPipeline.clearCounters();
  }
 
  @Test
  public void testStageResultsCountersMRWritables() throws Exception {
    testSpecialKeywordCount(new MRPipeline(StageResultsCountersIT.class, tmpDir.getDefaultConfiguration()),
        WritableTypeFamily.getInstance());
  }
View Full Code Here

        WritableTypeFamily.getInstance());
  }

  @Test
  public void testStageResultsCountersMRAvro() throws Exception {
    testSpecialKeywordCount(new MRPipeline(StageResultsCountersIT.class, tmpDir.getDefaultConfiguration()),
        AvroTypeFamily.getInstance());
  }
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(TrevniKeyPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    File trvFile = new File(outputFile, "part-m-00000-part-0.trv");
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(TrevniKeyPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File output1File = tmpDir.getFile("output1");
    File output2File = tmpDir.getFile("output2");
    pipeline.write(genericCollection, new TrevniKeyTarget(output1File.getAbsolutePath()));
    pipeline.write(genericCollection, new TrevniKeyTarget(output2File.getAbsolutePath()));
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    File trv1File = new File(output1File, "part-m-00000-part-0.trv");
    File trv2File = new File(output2File, "part-m-00000-part-0.trv");
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(TrevniKeyPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    PCollection<Person> retrievedPeople = pipeline.read(new TrevniKeySource<Person>(
        new Path(outputFile.toURI()), Avros.records(Person.class)));

    Person retrievedPerson = retrievedPeople.materialize().iterator().next();

    assertThat(retrievedPerson, is(person));
View Full Code Here

  private PCollection<String> lines2;


  @Before
  public void setUp() throws IOException {
    pipeline = new MRPipeline(CogroupIT.class, tmpDir.getDefaultConfiguration());
    lines1 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src1.txt")));
    lines2 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src2.txt")));
  }
View Full Code Here

    return new UnionCollection<S>(internal);
  }

  @Override
  public <T> PCollection<T> parallelDo(DoFn<S, T> fn, PType<T> type) {
    MRPipeline pipeline = (MRPipeline) getPipeline();
    return parallelDo("S" + pipeline.getNextAnonymousStageId(), fn, type);
  }
View Full Code Here

    return new DoCollectionImpl<T>(name, getChainingCollection(), fn, type, options);
  }
 
  @Override
  public <K, V> PTable<K, V> parallelDo(DoFn<S, Pair<K, V>> fn, PTableType<K, V> type) {
    MRPipeline pipeline = (MRPipeline) getPipeline();
    return parallelDo("S" + pipeline.getNextAnonymousStageId(), fn, type);
  }
View Full Code Here

TOP

Related Classes of org.apache.crunch.impl.mr.MRPipeline$StringifyFn

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.