Package org.apache.crunch

Examples of org.apache.crunch.Pipeline


  @Rule
  public TemporaryPath tmpDir = TemporaryPaths.create();

  @Test
  public void testWritables() throws Exception {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
    runMinMax(shakes, WritableTypeFamily.getInstance());
    pipeline.done();
  }
View Full Code Here


    pipeline.done();
  }

  @Test
  public void testAvro() throws Exception {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    String shakesInputPath = tmpDir.copyResourceFileName("shakes.txt");
    PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
    runMinMax(shakes, AvroTypeFamily.getInstance());
    pipeline.done();
  }
View Full Code Here

    }
  }

  @Test
  public void testCollectUrls() throws Exception {
    Pipeline p = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    String urlsInputPath = tmpDir.copyResourceFileName("urls.txt");
    PTable<String, Collection<String>> urls = Aggregate.collectValues(p.readTextFile(urlsInputPath).parallelDo(
        new SplitFn(), tableOf(strings(), strings())));
    for (Pair<String, Collection<String>> e : urls.materialize()) {
      String key = e.first();
      int expectedSize = 0;
      if ("www.A.com".equals(key)) {
        expectedSize = 4;
      } else if ("www.B.com".equals(key) || "www.F.com".equals(key)) {
        expectedSize = 2;
      } else if ("www.C.com".equals(key) || "www.D.com".equals(key) || "www.E.com".equals(key)) {
        expectedSize = 1;
      }
      assertEquals("Checking key = " + key, expectedSize, e.second().size());
      p.done();
    }
  }
View Full Code Here

    assertEquals(ImmutableList.of(Pair.of("foo", 12), Pair.of("bar", 17)), bottom2.materialize());
  }

  @Test
  public void testTopN_MRPipeline() throws IOException {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    PTable<StringWrapper, String> entries = pipeline
        .read(From.textFile(tmpDir.copyResourceFileName("set1.txt"), Avros.strings()))
        .by(new StringWrapper.StringToStringWrapperMapFn(), Avros.reflects(StringWrapper.class));
    PTable<StringWrapper, String> topEntries = Aggregate.top(entries, 3, true);
    List<Pair<StringWrapper, String>> expectedTop3 = Lists.newArrayList(
        Pair.of(StringWrapper.wrap("e"), "e"),
View Full Code Here

  }

  @Test
  public void testCollectValues_Writables() throws IOException {
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    Map<Integer, Collection<Text>> collectionMap = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
        .parallelDo(new MapStringToTextPair(), Writables.tableOf(Writables.ints(), Writables.writables(Text.class)))
        .collectValues().materializeToMap();

    assertEquals(1, collectionMap.size());
View Full Code Here

  @Test
  public void testCollectValues_Avro() throws IOException {

    MapStringToEmployeePair mapFn = new MapStringToEmployeePair();
    Pipeline pipeline = new MRPipeline(AggregateIT.class, tmpDir.getDefaultConfiguration());
    Map<Integer, Collection<Employee>> collectionMap = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt"))
        .parallelDo(mapFn, Avros.tableOf(Avros.ints(), Avros.records(Employee.class))).collectValues()
        .materializeToMap();

    assertEquals(1, collectionMap.size());
View Full Code Here

    }
  };

  @Test
  public void testKill() throws Exception {
    Pipeline pipeline = new MRPipeline(FailIT.class, tempDir.getDefaultConfiguration());
    PCollection<String> p = pipeline.readTextFile(tempDir.copyResourceFileName("shakes.txt"));
    PCollection<Integer> result = p.parallelDo(new InverseFn(), Writables.ints());
    result.cache();

    PipelineExecution execution = pipeline.runAsync();

    while (!execution.isDone() && !execution.isCancelled()
        && execution.getStatus() != PipelineExecution.Status.FAILED
        && execution.getResult() == null) {
      try {
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);

    Pipeline pipeline = new MRPipeline(TrevniFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Person> genericCollection = pipeline.read(new TrevniKeySource(new Path(avroFile.getAbsolutePath()),
        Avros.records(Person.class)));

    List<Person> personList = Lists.newArrayList(genericCollection.materialize());

    Person expectedPerson = new Person();
View Full Code Here

    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), genericPersonSchema);

    Pipeline pipeline = new MRPipeline(TrevniFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
    PCollection<Record> genericCollection = pipeline.read(new TrevniKeySource(new Path(avroFile.getAbsolutePath()),
        Avros.generics(genericPersonSchema)));

    List<Record> recordList = Lists.newArrayList(genericCollection.materialize());

    assertEquals(Lists.newArrayList(savedRecord), Lists.newArrayList(recordList));
View Full Code Here

    Schema schema = strType.getSchema();
    GenericRecord savedRecord = new Record(schema);
    savedRecord.put("value", "stringvalue");
    populateGenericFile(Lists.newArrayList(savedRecord), schema);

    Pipeline pipeline = new MRPipeline(TrevniFileSourceTargetIT.class, tmpDir.getDefaultConfiguration());
    PCollection<StringWrapper> stringValueCollection = pipeline.read(new TrevniKeySource(new Path(avroFile.getAbsolutePath()),
        strType));

    List<StringWrapper> recordList = Lists.newArrayList(stringValueCollection.materialize());

    assertEquals(1, recordList.size());
View Full Code Here

TOP

Related Classes of org.apache.crunch.Pipeline

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.