Package cascading.scheme.hadoop

Examples of cascading.scheme.hadoop.SequenceFile


      System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.BloomJoinExampleWithoutCascadingUtil <output dir>");
      return;
    }

    String outputDir = args[0];
    Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);

    Pipe source1 = new Pipe("source1");

    Pipe source2 = new Pipe("source2");
View Full Code Here


      System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.FlowWithCustomCascadingUtil <output dir>");
      return;
    }

    String outputDir = args[0];
    Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);

    Pipe source1 = new Pipe("source1");

    Pipe source2 = new Pipe("source2");
View Full Code Here

  private Tap<JobConf, ?, ?> inputRhs;

  @Before
  public void setUp() throws IOException {

    inputLhs = new Hfs(new SequenceFile(new Fields("a", "b")), getTestRoot() + "/lhs");
    inputRhs = new Hfs(new SequenceFile(new Fields("c", "d")), getTestRoot() + "/rhs");

    TapHelper.writeToTap(inputLhs,
        new Tuple("1", "A"),
        new Tuple("1", "B"),
        new Tuple("1", "C"),
View Full Code Here

        new Tuple("2", "G"));
  }

  @Test
  public void checkLimits() throws IOException {
    Tap<JobConf, ?, ?> output = new Hfs(new SequenceFile(new Fields("a", "b", "d")), getTestRoot() + "/output");

    Pipe lhs = new Pipe("lhs");

    Pipe rhs = new Pipe("rhs");
View Full Code Here

      new Fields("in"));

    Pipe pipe = new Pipe("batch-pipe");
    pipe = new Each(pipe, new AnnotateWithSizeOfCurrentBatch(4));

    Tap<JobConf, RecordReader, OutputCollector> dst = new Lfs(new SequenceFile(OUT_FIELD), getTestRoot()+"/out");

    Flow f = CascadingUtil.get().getFlowConnector().connect(src, dst, pipe);
    f.complete();

    TupleEntryIterator tupleEntryIterator = dst.openForRead(CascadingUtil.get().getFlowProcess());
View Full Code Here

  private final String OUTPUT_PATH = getTestRoot() + "/output";

  @Test
  public void testSimpleCombiner() throws IOException {

    Hfs source = new Hfs(new SequenceFile(new Fields("key", "value")), INPUT_PATH);
    TupleEntryCollector tc = source.openForWrite(CascadingUtil.get().getFlowProcess());
    tc.add(new Tuple("k0", 1));
    tc.add(new Tuple("k0", 2));
    tc.add(new Tuple("k1", 1));
    tc.add(new Tuple("k1", -3));
    tc.add(new Tuple("k1", 10));
    tc.close();

    Tap sink = new Hfs(new SequenceFile(new Fields("key", "sum")), OUTPUT_PATH);

    Pipe pipe = new Pipe("pipe");
    pipe = Combiner.assembly(pipe, new SimpleAggregator(), new Fields("key"), new Fields("value"), new Fields("sum"));

    CascadingUtil.get().getFlowConnector().connect(source, sink, pipe).complete();
View Full Code Here


  @Test
  public void testSimpleCombinerWithMemoryLimit() throws IOException {

    Hfs source = new Hfs(new SequenceFile(new Fields("key", "value")), INPUT_PATH);
    TupleEntryCollector tc = source.openForWrite(CascadingUtil.get().getFlowProcess());
    tc.add(new Tuple("key0", 1));
    tc.add(new Tuple("key0", 2));
    tc.add(new Tuple("key1", 1));
    tc.add(new Tuple("key1", -3));
    tc.add(new Tuple("key0", 10));
    tc.close();

    Tap sink = new Hfs(new SequenceFile(new Fields("key", "sum")), OUTPUT_PATH);

    Pipe pipe = new Pipe("pipe");
    pipe = new Each(pipe, Combiner.function(new SimpleAggregator(), new Fields("key"), new Fields("value"), new Fields("sum"), MemoryBoundLruHashMap.UNLIMITED_ITEM_CAPACITY, 100, new SimpleTupleMemoryUsageEstimator(), new LongMemoryUsageEstimator(), false));

    CascadingUtil.get().getFlowConnector().connect(source, sink, pipe).complete();
View Full Code Here

  }

  @Test
  public void testComplexCombiner() throws IOException {

    Hfs source = new Hfs(new SequenceFile(new Fields("key", "value")), INPUT_PATH);
    TupleEntryCollector tc = source.openForWrite(CascadingUtil.get().getFlowProcess());
    tc.add(new Tuple("k0", 1));
    tc.add(new Tuple("k0", 2));
    tc.add(new Tuple("k1", 1));
    tc.add(new Tuple("k1", 4));
    tc.add(new Tuple("k1", 10));
    tc.close();

    Tap sink = new Hfs(new SequenceFile(new Fields("key", "sum", "num_values", "average")), OUTPUT_PATH);

    Pipe pipe = new Pipe("pipe");
    pipe = Combiner.assembly(pipe,
        new ComplexAggregator(),
        new Fields("key"),
View Full Code Here

  private final String INPUT_PATH = getTestRoot() + "/input";
  private final String OUTPUT_PATH = getTestRoot() + "/output";

  @Test
  public void testMain() throws IOException {
    Hfs source = new Hfs(new SequenceFile(new Fields("key", "value")), INPUT_PATH);
    TupleEntryCollector tc = source.openForWrite(CascadingUtil.get().getFlowProcess());
    tc.add(new Tuple("k0", 1));
    tc.add(new Tuple("k0", 2));
    tc.add(new Tuple("k1", 10));
    tc.add(new Tuple("k1", -2));
    tc.add(new Tuple("k1", -9));
    tc.add(new Tuple("k2", -3));
    tc.add(new Tuple("k2", -6));
    tc.close();

    Tap sink = new Hfs(new SequenceFile(new Fields("key", "sum")), OUTPUT_PATH);
   
    Pipe pipe = new Pipe("pipe");
    pipe = Combiner.assembly(pipe, new SumExactAggregator(1), new Fields("key"), new Fields("value"), new Fields("sum"));

    CascadingUtil.get().getFlowConnector().connect(source, sink, pipe).complete();
View Full Code Here

  protected Hfs output;

  @Before
  public void setUp() throws Exception {
    output = new Hfs(new SequenceFile(new Fields("key", "key2", "lhs-value")), getTestRoot() + "/output");
  }
View Full Code Here

TOP

Related Classes of cascading.scheme.hadoop.SequenceFile

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.