Package org.apache.crunch.impl.spark

Examples of org.apache.crunch.impl.spark.SparkPipeline


    pipeline.done();
  }

  @Test
  public void testAvroReflectSortTable() throws IOException {
    Pipeline pipeline = new SparkPipeline("local", "sort");
    PTable<String, StringWrapper> unsorted = pipeline.readTextFile(tmpDir.copyResourceFileName("set2.txt")).parallelDo(
        new MapFn<String, Pair<String, StringWrapper>>() {

          @Override
          public Pair<String, StringWrapper> map(String input) {
            return Pair.of(input, wrap(input));
          }
        }, Avros.tableOf(Avros.strings(), Avros.reflects(StringWrapper.class)));

    PTable<String, StringWrapper> sorted = Sort.sort(unsorted);

    List<Pair<String, StringWrapper>> expected = Lists.newArrayList();
    expected.add(Pair.of("a", wrap("a")));
    expected.add(Pair.of("c", wrap("c")));
    expected.add(Pair.of("d", wrap("d")));

    assertEquals(expected, Lists.newArrayList(sorted.materialize()));
    pipeline.done();
  }
View Full Code Here


    pipeline.done();
  }

  @Test
  public void testAvroSortTable() throws Exception {
    runTable(new SparkPipeline("local", "sort"), AvroTypeFamily.getInstance(), "A");
  }
View Full Code Here

  @Rule
  public TemporaryPath tempDir = new TemporaryPath();

  @Test
  public void testCount() throws Exception {
    SparkPipeline pipeline = new SparkPipeline("local", "aggregator");
    PCollection<String> set1 = pipeline.read(From.textFile(tempDir.copyResourceFileName("set1.txt")));
    PCollection<String> set2 = pipeline.read(From.textFile(tempDir.copyResourceFileName("set2.txt")));
    Iterable<Pair<Integer, Long>> cnts = set1.union(set2)
        .parallelDo(new CntFn(), Avros.ints())
        .count().materialize();
    assertEquals(ImmutableList.of(Pair.of(1, 7L)), Lists.newArrayList(cnts));
    pipeline.done();
  }
View Full Code Here

  private PCollection<String> lines3;
  private PCollection<String> lines4;

  @Before
  public void setUp() throws IOException {
    pipeline = new SparkPipeline("local", "wordcount");
    lines1 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src1.txt")));
    lines2 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src2.txt")));
    lines3 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src1.txt")));
    lines4 = pipeline.readTextFile(tmpDir.copyResourceFileName(Tests.resource(this, "src2.txt")));
  }
View Full Code Here

import static junit.framework.Assert.assertFalse;

public class SparkPipelineCallableIT extends CrunchTestSupport {
  @Test
  public void testSparkShakes() throws Exception {
    run(new SparkPipeline("local", "PC", SparkPipelineCallableIT.class, tempDir.getDefaultConfiguration()),
        tempDir.copyResourceFileName("shakes.txt"), false /* fail */);
  }
View Full Code Here

        tempDir.copyResourceFileName("shakes.txt"), false /* fail */);
  }

  @Test
  public void testFailure() throws Exception {
    run(new SparkPipeline("local", "PC", SparkPipelineCallableIT.class, tempDir.getDefaultConfiguration()),
        tempDir.copyResourceFileName("shakes.txt"), true /* fail */);
  }
View Full Code Here

import static org.junit.Assert.assertEquals;

public class SparkSecondarySortIT extends CrunchTestSupport implements Serializable {
  @Test
  public void testSecondarySort() throws Exception {
    Pipeline p = new SparkPipeline("local", "secondarysort");
    String inputFile = tempDir.copyResourceFileName("secondary_sort_input.txt");

    PTable<String, Pair<Integer, Integer>> in = p.read(From.textFile(inputFile))
        .parallelDo(new MapFn<String, Pair<String, Pair<Integer, Integer>>>() {
          @Override
          public Pair<String, Pair<Integer, Integer>> map(String input) {
            String[] pieces = input.split(",");
            return Pair.of(pieces[0],
                Pair.of(Integer.valueOf(pieces[1].trim()), Integer.valueOf(pieces[2].trim())));
          }
        }, tableOf(strings(), pairs(ints(), ints())));
    Iterable<String> lines = SecondarySort.sortAndApply(in, new MapFn<Pair<String, Iterable<Pair<Integer, Integer>>>, String>() {
      @Override
      public String map(Pair<String, Iterable<Pair<Integer, Integer>>> input) {
        Joiner j = Joiner.on(',');
        return j.join(input.first(), j.join(input.second()));
      }
    }, strings()).materialize();
    assertEquals(ImmutableList.of("one,[-5,10],[1,1],[2,-3]", "three,[0,-1]", "two,[1,7],[2,6],[4,5]"),
        ImmutableList.copyOf(lines));
    p.done();
  }
View Full Code Here

  public TemporaryPath tmpDir = new TemporaryPath();
  private Pipeline pipeline;

  @Before
  public void setUp() throws Exception {
    pipeline = new SparkPipeline("local", "pagerank");
  }
View Full Code Here

  @Rule
  public TemporaryPath tempDir = new TemporaryPath();

  @Test
  public void testEmptyMR() throws Exception {
    Pipeline p = new SparkPipeline("local", "empty");
    assertTrue(Iterables.isEmpty(p.emptyPCollection(Writables.strings())
        .parallelDo(new SplitFn(), Writables.tableOf(Writables.strings(), Writables.longs()))
        .groupByKey()
        .combineValues(Aggregators.SUM_LONGS())
        .materialize()));
    p.done();
  }
View Full Code Here

    p.done();
  }

  @Test
  public void testUnionWithEmptyMR() throws Exception {
    Pipeline p = new SparkPipeline("local", "empty");
    assertFalse(Iterables.isEmpty(p.emptyPCollection(Writables.strings())
        .parallelDo(new SplitFn(), Writables.tableOf(Writables.strings(), Writables.longs()))
        .union(
            p.read(From.textFile(tempDir.copyResourceFileName("shakes.txt")))
                .parallelDo(new SplitFn(), Writables.tableOf(Writables.strings(), Writables.longs())))
        .groupByKey()
        .combineValues(Aggregators.SUM_LONGS())
        .materialize()));
    p.done();
  }
View Full Code Here

TOP

Related Classes of org.apache.crunch.impl.spark.SparkPipeline

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.