Package cascading.flow.hadoop

Examples of cascading.flow.hadoop.HadoopFlowProcess


   
    @Override
    protected FlowProcess<?> makeFlowProcess() {
        Map<Object, Object> props = new HashMap<Object, Object>();
        TupleSerializationProps.addSerialization(props, BytesSerialization.class.getName());
        return new HadoopFlowProcess(HadoopUtil.createJobConf(props, null));
    }
View Full Code Here


  private void performMapTest( int numKeys, int listSize, int mapThreshold, int listThreshold, Configuration jobConf )
    {
    jobConf.set( "io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName() ); // disable/replace WritableSerialization class
    jobConf.set( "cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName() ); // not using Text, just testing parsing

    HadoopFlowProcess flowProcess = new HadoopFlowProcess( jobConf );
    HadoopSpillableTupleMap map = new HadoopSpillableTupleMap( SpillableProps.defaultMapInitialCapacity, SpillableProps.defaultMapLoadFactor, mapThreshold, listThreshold, flowProcess );

    Set<Integer> keySet = new HashSet<Integer>();
    Random gen = new Random( 1 );
View Full Code Here

    }

  @Override
  public FlowProcess getFlowProcess()
    {
    return new HadoopFlowProcess( FlowSession.NULL, (JobConf) getConfiguration(), true );
    }
View Full Code Here

    tributaries.remove( this.source ); // we cannot stream and accumulate the same source

    // accumulated paths
    for( Object source : tributaries )
      {
      HadoopFlowProcess hadoopProcess = (HadoopFlowProcess) flowProcess;
      JobConf conf = hadoopProcess.getJobConf();

      // allows client side config to be used cluster side
      String property = conf.getRaw( "cascading.node.accumulated.source.conf." + Tap.id( (Tap) source ) );

      if( property == null )
        throw new IllegalStateException( "accumulated source conf property missing for: " + ( (Tap) source ).getIdentifier() );

      conf = getSourceConf( hadoopProcess, conf, property );
      flowProcess = new HadoopFlowProcess( hadoopProcess, conf );

      handleHead( (Tap) source, flowProcess );
      }
    }
View Full Code Here

    Hfs temp = new Hfs( new TextLine(), statePath, SinkMode.REPLACE );

    try
      {
      TupleEntryCollector writer = temp.openForWrite( new HadoopFlowProcess( conf ) );

      writer.add( new Tuple( stepState ) );

      writer.close();
      }
View Full Code Here

    TupleEntryIterator reader = null;

    try
      {
      reader = temp.openForRead( new HadoopFlowProcess( jobConf ) );

      if( !reader.hasNext() )
        throw new FlowException( "step state path is empty: " + temp.getIdentifier() );

      return reader.next().getString( 0 );
View Full Code Here

    }

  @Override
  public FlowProcess getFlowProcess()
    {
    return new HadoopFlowProcess( FlowSession.NULL, getConfiguration(), true );
    }
View Full Code Here

  public void testAsGroupByValue() throws Exception {
    FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
    FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);

    Tap t = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector tec = t.openForWrite(new HadoopFlowProcess(new JobConf()));

    HashSet<Tuple> expectedTuples = new HashSet<Tuple>(){{
      add(new Tuple(Example.Person.newBuilder().setName("bryan").setId(1).build()));
      add(new Tuple(Example.Person.newBuilder().setName("lucas").setId(2).build()));
    }};

    for (Tuple tuple : expectedTuples) {
      tec.add(tuple);
    }

    tec.close();

    Pipe inPipe = new Pipe("input");
    Pipe injectedPipe = new Each(inPipe, Fields.NONE, new Insert(new Fields("key"), 7), new Fields("key", "value"));
    Pipe groupByPipe = new GroupBy(injectedPipe, new Fields("key"));

    Hfs sink = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/output");
    Map<Object, Object> properties = new HashMap<Object, Object>(){{
      put("io.serializations", new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
    }};
    new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();

    TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
    Set<Tuple> tuples = new HashSet<Tuple>();
    while (tei.hasNext()) {
      tuples.add(tei.next().getTupleCopy());
    }
View Full Code Here

  public void testAsGroupByKey() throws Exception {
    FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
    FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);

    Tap t = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector tec = t.openForWrite(new HadoopFlowProcess(new JobConf()));

    HashSet<Tuple> expectedTuples = new HashSet<Tuple>(){{
      add(new Tuple(Example.Person.newBuilder().setName("bryan").setId(1).build()));
      add(new Tuple(Example.Person.newBuilder().setName("lucas").setId(2).build()));
    }};

    for (Tuple tuple : expectedTuples) {
      tec.add(tuple);
    }

    tec.close();

    Pipe inPipe = new Pipe("input");
    Pipe groupByPipe = new GroupBy(inPipe, new Fields("value"));

    Hfs sink = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/output");
    Map<Object, Object> properties = new HashMap<Object, Object>(){{
      put("io.serializations",
          new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
    }};
    new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();

    TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
    Set<Tuple> tuples = new HashSet<Tuple>();
    while (tei.hasNext()) {
      tuples.add(tei.next().getTupleCopy());
    }
View Full Code Here

    expected.add(fixture("bryan", "bryan.duxbury@mail.com", 1));
    expected.add(fixture("lucas", "lucas@mail.com", 2));
    expected.add(fixture("vida", null, 3));

    Tap inputTap = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector tec = inputTap.openForWrite(new HadoopFlowProcess(), null);

    for (Tuple t : expected) {
      tec.add(new TupleEntry(new Fields("value"), t));
    }
    tec.close();

    // read results back out
    Tap outputTap = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryIterator iter = outputTap.openForRead(new HadoopFlowProcess(), null);
    List<Tuple> tuples = new ArrayList<Tuple>();
    while (iter.hasNext()) {
      tuples.add(iter.next().getTupleCopy());
    }
View Full Code Here

TOP

Related Classes of cascading.flow.hadoop.HadoopFlowProcess

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.