Package org.apache.crunch

Examples of org.apache.crunch.Pipeline


    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(TrevniKeyPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    File trvFile = new File(outputFile, "part-m-00000.trv-part-0.trv");
View Full Code Here


    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(TrevniKeyPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File output1File = tmpDir.getFile("output1");
    File output2File = tmpDir.getFile("output2");
    pipeline.write(genericCollection, new TrevniKeyTarget(output1File.getAbsolutePath()));
    pipeline.write(genericCollection, new TrevniKeyTarget(output2File.getAbsolutePath()));
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    File trv1File = new File(output1File, "part-m-00000.trv-part-0.trv");
    File trv2File = new File(output2File, "part-m-00000.trv-part-0.trv");
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(TrevniKeyPipelineIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    PCollection<Person> retrievedPeople = pipeline.read(new TrevniKeySource<Person>(
        new Path(outputFile.toURI()), Avros.records(Person.class)));

    Person retrievedPerson = retrievedPeople.materialize().iterator().next();

    assertThat(retrievedPerson, is(person));
View Full Code Here

  @Rule
  public transient TemporaryPath tmpDir = TemporaryPaths.create();

  @Test
  public void testReflection() throws IOException {
    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
    PCollection<StringWrapper> stringWrapperCollection = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
        .parallelDo(new MapFn<String, StringWrapper>() {

          @Override
          public StringWrapper map(String input) {
            StringWrapper stringWrapper = new StringWrapper();
            stringWrapper.setValue(input);
            return stringWrapper;
          }
        }, Avros.reflects(StringWrapper.class));

    List<StringWrapper> stringWrappers = Lists.newArrayList(stringWrapperCollection.materialize());

    pipeline.done();

    assertEquals(Lists.newArrayList(new StringWrapper("b"), new StringWrapper("c"), new StringWrapper("a"),
        new StringWrapper("e")), stringWrappers);

  }
View Full Code Here

  // Verify that running with a combination of reflect and specific schema
  // doesn't crash
  @Test
  public void testCombinationOfReflectionAndSpecific() throws IOException {
    Assume.assumeTrue(Avros.CAN_COMBINE_SPECIFIC_AND_REFLECT_SCHEMAS);
    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Pair<StringWrapper, Person>> hybridPairCollection = pipeline.readTextFile(
        tmpDir.copyResourceFileName("set1.txt")).parallelDo(new MapFn<String, Pair<StringWrapper, Person>>() {

      @Override
      public Pair<StringWrapper, Person> map(String input) {
        Person person = new Person();
        person.name = input;
        person.age = 42;
        person.siblingnames = Lists.<CharSequence> newArrayList(input);

        return Pair.of(new StringWrapper(input), person);
      }
    }, Avros.pairs(Avros.reflects(StringWrapper.class), Avros.records(Person.class)));

    PCollection<Pair<String, Long>> countCollection = Aggregate.count(hybridPairCollection).parallelDo(
        new MapFn<Pair<Pair<StringWrapper, Person>, Long>, Pair<String, Long>>() {

          @Override
          public Pair<String, Long> map(Pair<Pair<StringWrapper, Person>, Long> input) {
            return Pair.of(input.first().first().getValue(), input.second());
          }
        }, Avros.pairs(Avros.strings(), Avros.longs()));

    List<Pair<String, Long>> materialized = Lists.newArrayList(countCollection.materialize());
    List<Pair<String, Long>> expected = Lists.newArrayList(Pair.of("a", 1L), Pair.of("b", 1L), Pair.of("c", 1L),
        Pair.of("e", 1L));
    Collections.sort(materialized);

    assertEquals(expected, materialized);
    pipeline.done();
  }
View Full Code Here

      new MapFn<String, StringWrapper>() { public StringWrapper map(String out) { return new StringWrapper(out); }},
      Avros.reflects(StringWrapper.class));

  @Test
  public void testDerivedReflection() throws Exception {
    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> stringWrapperCollection = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
        .parallelDo(IdentityFn.<String>getInstance(), STRING_PTYPE);
    List<String> strings = Lists.newArrayList(stringWrapperCollection.materialize());
    pipeline.done();
    assertEquals(Lists.newArrayList("b", "c", "a", "e"), strings);
  }
View Full Code Here

    assertEquals(Lists.newArrayList("b", "c", "a", "e"), strings);
  }

  @Test
  public void testWrappedDerivedReflection() throws Exception {
    Pipeline pipeline = new MRPipeline(AvroReflectIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Pair<Long, String>> stringWrapperCollection = pipeline.readTextFile(tmpDir.copyResourceFileName("set1.txt"))
        .parallelDo(new MapFn<String, Pair<Long, String>>() {
          @Override
          public Pair<Long, String> map(String input) {
            return Pair.of(1L, input);
          }
        }, Avros.pairs(Avros.longs(), STRING_PTYPE));
    List<Pair<Long, String>> pairs = Lists.newArrayList(stringWrapperCollection.materialize());
    pipeline.done();
    assertEquals(pairs.size(), 4);
    assertEquals(Pair.of(1L, "a"), pairs.get(2));
  }
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target textFile = To.textFile(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, textFile);
    pipeline.run();
    Person person = genericCollection.materialize().iterator().next();
    String outputString = FileUtils.readFileToString(new File(outputFile, "part-m-00000"));
    assertTrue(outputString.contains(person.toString()));
  }
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    PTable<Long, StringWrapper> pt = genericCollection.parallelDo(new MapFn<Person, Pair<Long, StringWrapper>>() {
      @Override
      public Pair<Long, StringWrapper> map(Person input) {
        return Pair.of(1L, new StringWrapper(input.getName().toString()));
      }
    }, Avros.tableOf(Avros.longs(), Avros.reflects(StringWrapper.class)))
        .groupByKey()
        .ungroup();
    List<Pair<Long, StringWrapper>> ret = Lists.newArrayList(pt.materialize());
    pipeline.done();
    assertEquals(1, ret.size());
  }
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));

    List<Person> personList = Lists.newArrayList(genericCollection.materialize());

    Person expectedPerson = new Person();
View Full Code Here

TOP

Related Classes of org.apache.crunch.Pipeline

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.