Examples of cascading.flow.hadoop.HadoopFlowProcess

cascading.flow.hadoop.HadoopFlowProcess
Class HadoopFlowProcess is an implementation of {@link FlowProcess} for Hadoop. Use this interface to get directaccess to the Hadoop JobConf and Reporter interfaces.
Be warned that coupling to this implementation will cause custom {@link cascading.operation.Operation}s to fail if they are executed on a system other than Hadoop. @see cascading.flow.FlowSession @see JobConf @see Reporter

    
    @Override
    protected FlowProcess<?> makeFlowProcess() {
        Map<Object, Object> props = new HashMap<Object, Object>();
        TupleSerializationProps.addSerialization(props, BytesSerialization.class.getName());
        return new HadoopFlowProcess(HadoopUtil.createJobConf(props, null));
    }

View Full Code Here

  private void performMapTest( int numKeys, int listSize, int mapThreshold, int listThreshold, Configuration jobConf )
    {
    jobConf.set( "io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName() ); // disable/replace WritableSerialization class
    jobConf.set( "cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName() ); // not using Text, just testing parsing


    HadoopFlowProcess flowProcess = new HadoopFlowProcess( jobConf );
    HadoopSpillableTupleMap map = new HadoopSpillableTupleMap( SpillableProps.defaultMapInitialCapacity, SpillableProps.defaultMapLoadFactor, mapThreshold, listThreshold, flowProcess );


    Set<Integer> keySet = new HashSet<Integer>();
    Random gen = new Random( 1 );

View Full Code Here

    }


  @Override
  public FlowProcess getFlowProcess()
    {
    return new HadoopFlowProcess( FlowSession.NULL, (JobConf) getConfiguration(), true );
    }

View Full Code Here

    tributaries.remove( this.source ); // we cannot stream and accumulate the same source


    // accumulated paths
    for( Object source : tributaries )
      {
      HadoopFlowProcess hadoopProcess = (HadoopFlowProcess) flowProcess;
      JobConf conf = hadoopProcess.getJobConf();


      // allows client side config to be used cluster side
      String property = conf.getRaw( "cascading.node.accumulated.source.conf." + Tap.id( (Tap) source ) );


      if( property == null )
        throw new IllegalStateException( "accumulated source conf property missing for: " + ( (Tap) source ).getIdentifier() );


      conf = getSourceConf( hadoopProcess, conf, property );
      flowProcess = new HadoopFlowProcess( hadoopProcess, conf );


      handleHead( (Tap) source, flowProcess );
      }
    }

View Full Code Here


    Hfs temp = new Hfs( new TextLine(), statePath, SinkMode.REPLACE );


    try
      {
      TupleEntryCollector writer = temp.openForWrite( new HadoopFlowProcess( conf ) );


      writer.add( new Tuple( stepState ) );


      writer.close();
      }

View Full Code Here


    TupleEntryIterator reader = null;


    try
      {
      reader = temp.openForRead( new HadoopFlowProcess( jobConf ) );


      if( !reader.hasNext() )
        throw new FlowException( "step state path is empty: " + temp.getIdentifier() );


      return reader.next().getString( 0 );

View Full Code Here

    }


  @Override
  public FlowProcess getFlowProcess()
    {
    return new HadoopFlowProcess( FlowSession.NULL, getConfiguration(), true );
    }

View Full Code Here

  public void testAsGroupByValue() throws Exception {
    FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
    FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);


    Tap t = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector tec = t.openForWrite(new HadoopFlowProcess(new JobConf()));


    HashSet<Tuple> expectedTuples = new HashSet<Tuple>(){{
      add(new Tuple(Example.Person.newBuilder().setName("bryan").setId(1).build()));
      add(new Tuple(Example.Person.newBuilder().setName("lucas").setId(2).build()));
    }};


    for (Tuple tuple : expectedTuples) {
      tec.add(tuple);
    }


    tec.close();


    Pipe inPipe = new Pipe("input");
    Pipe injectedPipe = new Each(inPipe, Fields.NONE, new Insert(new Fields("key"), 7), new Fields("key", "value"));
    Pipe groupByPipe = new GroupBy(injectedPipe, new Fields("key"));


    Hfs sink = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/output");
    Map<Object, Object> properties = new HashMap<Object, Object>(){{
      put("io.serializations", new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
    }};
    new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();


    TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
    Set<Tuple> tuples = new HashSet<Tuple>();
    while (tei.hasNext()) {
      tuples.add(tei.next().getTupleCopy());
    }

View Full Code Here

  public void testAsGroupByKey() throws Exception {
    FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
    FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);


    Tap t = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector tec = t.openForWrite(new HadoopFlowProcess(new JobConf()));


    HashSet<Tuple> expectedTuples = new HashSet<Tuple>(){{
      add(new Tuple(Example.Person.newBuilder().setName("bryan").setId(1).build()));
      add(new Tuple(Example.Person.newBuilder().setName("lucas").setId(2).build()));
    }};


    for (Tuple tuple : expectedTuples) {
      tec.add(tuple);
    }


    tec.close();


    Pipe inPipe = new Pipe("input");
    Pipe groupByPipe = new GroupBy(inPipe, new Fields("value"));


    Hfs sink = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/output");
    Map<Object, Object> properties = new HashMap<Object, Object>(){{
      put("io.serializations",
          new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
    }};
    new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();


    TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
    Set<Tuple> tuples = new HashSet<Tuple>();
    while (tei.hasNext()) {
      tuples.add(tei.next().getTupleCopy());
    }

View Full Code Here

    expected.add(fixture("bryan", "bryan.duxbury@mail.com", 1));
    expected.add(fixture("lucas", "lucas@mail.com", 2));
    expected.add(fixture("vida", null, 3));


    Tap inputTap = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector tec = inputTap.openForWrite(new HadoopFlowProcess(), null);


    for (Tuple t : expected) {
      tec.add(new TupleEntry(new Fields("value"), t));
    }
    tec.close();


    // read results back out
    Tap outputTap = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryIterator iter = outputTap.openForRead(new HadoopFlowProcess(), null);
    List<Tuple> tuples = new ArrayList<Tuple>();
    while (iter.hasNext()) {
      tuples.add(iter.next().getTupleCopy());
    }

View Full Code Here

0 1 2

TOP

Related Classes of cascading.flow.hadoop.HadoopFlowProcess

cascading.flow.hadoop.stream.graph.HadoopMapStreamGraph

cascading.flow.hadoop.util.HadoopMRUtil

cascading.lingual.platform.hadoop.HadoopPlatformBroker

cascading.lingual.platform.hadoop2.Hadoop2MR1PlatformBroker

cascading.platform.hadoop.HadoopPlatform

cascading.platform.hadoop2.Hadoop2MR1Platform

cascading.tuple.hadoop.SpillableTupleHadoopTest

cascading.tuple.TupleEntryCollector

com.etsy.cascading.tap.local.LocalTap

com.etsy.cascading.tap.local.LocalTap$LocalScheme

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.