Package cascading.tap.hadoop

Examples of cascading.tap.hadoop.Hfs$CombinedInputFormat


   * @throws Exception
   */
  @Test
  public void testSplitEachOnGroup() throws Exception
    {
    Tap sourceLower = new Hfs( new TextLine( new Fields( "num", "char" ) ), "foo" );

    Map sources = new HashMap();

    sources.put( "lower1", sourceLower );

    // using null pos so all fields are written
    Tap sink1 = new Hfs( new TextLine(), "output1", true );
    Tap sink2 = new Hfs( new TextLine(), "output2", true );

    Map sinks = new HashMap();

    sinks.put( "output1", sink1 );
    sinks.put( "output2", sink2 );
View Full Code Here


    }

  @Test
  public void testSplitEveryOnGroup() throws Exception
    {
    Tap sourceLower = new Hfs( new TextLine( new Fields( "num", "char" ) ), "foo" );

    Map sources = new HashMap();

    sources.put( "lower1", sourceLower );

    // using null pos so all fields are written
    Tap sink1 = new Hfs( new TextLine(), "output1", true );
    Tap sink2 = new Hfs( new TextLine(), "output2", true );

    Map sinks = new HashMap();

    sinks.put( "output1", sink1 );
    sinks.put( "output2", sink2 );
View Full Code Here

    }

  @Test
  public void testSplitOutput() throws Exception
    {
    Tap sourceLower = new Hfs( new TextLine( new Fields( "num", "char" ) ), "foo" );

    Map sources = new HashMap();

    sources.put( "lower1", sourceLower );

    // using null pos so all fields are written
    Tap sink1 = new Hfs( new TextLine(), "output1", true );
    Tap sink2 = new Hfs( new TextLine(), "output2", true );

    Map sinks = new HashMap();

    sinks.put( "output1", sink1 );
    sinks.put( "output2", sink2 );
View Full Code Here

  @Test
  public void testSimpleGroup() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache );

    Pipe pipe = new Pipe( "test" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );

    pipe = new Each( pipe, new InsertBytes( new Fields( "bytes" ), "inserted text as bytes" ), Fields.ALL );

    pipe = new GroupBy( pipe, new Fields( "ip" ) );

    pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );

    pipe = new Each( pipe, new InsertBoolean( new Fields( "boolean" ), false ), Fields.ALL );

    Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "serialization" ), SinkMode.REPLACE );

    Map<Object, Object> jobProperties = getProperties();

    TupleSerialization.addSerializationToken( jobProperties, 1000, BooleanWritable.class.getName() );

View Full Code Here

  @Test
  public void testSimpleGroupOnBytes() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache );

    Pipe pipe = new Pipe( "test" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );

    pipe = new Each( pipe, new InsertRawBytes( new Fields( "bytes" ), "inserted text as bytes", true, true ), Fields.ALL );

    Fields bytes = new Fields( "bytes" );
    bytes.setComparator( "bytes", new BytesComparator() );
    pipe = new GroupBy( pipe, bytes );

    pipe = new Every( pipe, new Count(), new Fields( "bytes", "count" ) );

    Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "grouponbytes" ), SinkMode.REPLACE );

    Map<Object, Object> properties = getProperties();

    TupleSerializationProps.addSerialization( properties, BytesSerialization.class.getName() );

View Full Code Here

  public void testCoGroupWritableAsKeyValue() throws Exception
    {
    getPlatform().copyFromLocal( inputFileLower );
    getPlatform().copyFromLocal( inputFileUpper );

    Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLower );
    Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileUpper );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "writablekeyvalue" ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new Each( pipeLower, new InsertBytes( new Fields( "group" ), "inserted text as bytes" ), Fields.ALL );
    pipeLower = new Each( pipeLower, new InsertBytes( new Fields( "value" ), "inserted text as bytes" ), Fields.ALL );
View Full Code Here

  public void testCoGroupBytesWritableAsKeyValue() throws Exception
    {
    getPlatform().copyFromLocal( inputFileLower );
    getPlatform().copyFromLocal( inputFileUpper );

    Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLower );
    Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileUpper );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Tap sink = new Hfs( new TextLine( new Fields( "line" ) ), getOutputPath( "byteswritablekeyvalue" ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new Each( pipeLower, new Fields( "char" ), new ReplaceAsBytes( new Fields( "char" ) ), Fields.REPLACE );

    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );
View Full Code Here

  public void testCoGroupSpillCustomWritable() throws Exception
    {
    getPlatform().copyFromLocal( inputFileLower );
    getPlatform().copyFromLocal( inputFileUpper );

    Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLower );
    Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileUpper );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "customerwritable" ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new Each( pipeLower, new InsertTestText( new Fields( "group" ), "inserted text as bytes", false ), Fields.ALL );
    pipeLower = new Each( pipeLower, new InsertTestText( new Fields( "value" ), "inserted text as bytes", false ), Fields.ALL );
    pipeLower = new Each( pipeLower, new InsertTestText( new Fields( "text" ), "inserted text as custom text", false ), Fields.ALL );
View Full Code Here

    throws IOException
    {
    getPlatform().copyFromLocal( inputFileLower );
    getPlatform().copyFromLocal( inputFileUpper );

    Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLower );
    Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileUpper );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Fields fields = new Fields( "num", "char", "group", "value", "num2", "char2", "group2", "value2" );
    Tap sink = new Hfs( new SequenceFile( fields ), getOutputPath( "/rawbyteskeyvalue/" + useDefaultComparator + "/" + secondarySortOnValue + "/" + ignoreSerializationToken + "/" + compositeGrouping ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new Each( pipeLower, new InsertTestText( new Fields( "group" ), "inserted text as bytes", true, 3, 4 ), Fields.ALL );
    pipeLower = new Each( pipeLower, new InsertRawBytes( new Fields( "value" ), "inserted text as bytes", true ), Fields.ALL );
View Full Code Here

    {
    getPlatform().copyFromLocal( inputFileApache );

    Fields sourceFields = new Fields( "offset", "line" ).applyTypes( Coercions.BIG_DECIMAL, String.class );

    Tap source = new Hfs( new TextLine( sourceFields ), inputFileApache );

    Pipe pipe = new Pipe( "test" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "offset", "ip" ) );

    pipe = new GroupBy( pipe, new Fields( "offset" ) );

    pipe = new Every( pipe, new Count(), new Fields( "offset", "count" ) );

    Fields sinkFields = new Fields( "offset", "count" ).applyTypes( Coercions.BIG_DECIMAL, long.class );
    Tap sink = new Hfs( new SequenceFile( sinkFields ), getOutputPath( "bigdecimal" ), SinkMode.REPLACE );

    Map<Object, Object> jobProperties = getProperties();

    TupleSerialization.addSerialization( jobProperties, BigDecimalSerialization.class.getName() );

View Full Code Here

TOP

Related Classes of cascading.tap.hadoop.Hfs$CombinedInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.