Package cascading

Source Code of cascading.TrapPlatformTest

/*
* Copyright (c) 2007-2014 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cascading;

import java.io.IOException;
import java.util.Map;
import java.util.regex.Pattern;

import cascading.cascade.Cascades;
import cascading.flow.Flow;
import cascading.flow.FlowDef;
import cascading.operation.AssertionLevel;
import cascading.operation.aggregator.Count;
import cascading.operation.assertion.AssertNotEquals;
import cascading.operation.regex.RegexParser;
import cascading.pipe.Each;
import cascading.pipe.Every;
import cascading.pipe.GroupBy;
import cascading.pipe.Pipe;
import cascading.scheme.Scheme;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import org.junit.Test;

import static data.InputData.inputFileApache;
import static data.InputData.testDelimitedProblematic;

/**
*
*/
public class TrapPlatformTest extends PlatformTestCase
  {
  public TrapPlatformTest()
    {
    super( true, 4, 4 );
    }

  @Test
  public void testTrapNone() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( inputFileApache );

    Pipe pipe = new Pipe( "map" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
    pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
    pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );

    Tap sink = getPlatform().getTextFile( getOutputPath( "none/tap" ), SinkMode.REPLACE );
    Tap trap = getPlatform().getTextFile( getOutputPath( "none/trap" ), SinkMode.REPLACE );

    Flow flow = getPlatform().getFlowConnector().connect( "trap test", source, sink, trap, pipe );

    flow.complete();

    validateLength( flow, 8, null );

    try
      {
      flow.openTrap();
      fail(); // should throw a file not found exception
      }
    catch( IOException exception )
      {
      // do nothing
      }
    }

  @Test
  public void testTrapEachAll() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( inputFileApache );

    Pipe pipe = new Pipe( "map" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );

    // always fail
    pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );

    pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
    pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );

    Tap sink = getPlatform().getTextFile( getOutputPath( "all/tap" ), SinkMode.REPLACE );
    Tap trap = getPlatform().getTextFile( getOutputPath( "all/trap" ), SinkMode.REPLACE );

    Flow flow = getPlatform().getFlowConnector().connect( "trap test", source, sink, trap, pipe );

    flow.complete();

    validateLength( flow, 0, null );
    validateLength( flow.openTrap(), 10 );
    }

//  @Test
//  public void testCoGroup() throws Exception
//    {
//    getPlatform().copyFromLocal( inputFileLhs );
//    getPlatform().copyFromLocal( inputFileRhs );
//
//    FlowDef flowDef = flowDef();
//
//    flowDef.addSource( "lhs", getPlatform().getTextFile( inputFileLhs ) );
//    flowDef.addSource( "rhs", getPlatform().getTextFile( inputFileRhs ) );
//
//    Pipe pipeLower = new Each( "lhs", new Fields( "line" ), new RegexSplitter( new Fields( "numLHS", "charLHS" ), " " ) );
//    pipeLower = new Each( pipeLower, Fields.ALL, new Identity() ); // adding a little complexity
//    pipeLower = new Each( pipeLower, Fields.ALL, new Identity() ); // adding a little complexity
//
//    Pipe pipeUpper = new Each( "rhs", new Fields( "line" ), new RegexSplitter( new Fields( "numRHS", "charRHS" ), " " ) );
//    pipeUpper = new Each( pipeUpper, Fields.ALL, new Identity() ); // adding a little complexity
//    pipeUpper = new Each( pipeUpper, Fields.ALL, new Identity() ); // adding a little complexity
//
//    Pipe cross = new CoGroup( pipeLower, new Fields( "numLHS" ), pipeUpper, new Fields( "numRHS" ), new InnerJoin() );
//
//    cross = new Each( cross, new TestFunction( new Fields( "test" ), new Tuple( 4 ), 4 ), Fields.ALL );
//    cross = new Discard( cross, new Fields( "test" ) );
//    cross = new Each( cross, Fields.ALL, new Identity() ); // adding a little complexity
//    cross = new Each( cross, Fields.ALL, new Identity() ); // adding a little complexity
//
//    flowDef.addTailSink( cross, getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cogroup/result" ), SinkMode.REPLACE ) );
//
//    String outputPath = getOutputPath( "cogroup/trap" );
//    Tap trap = getPlatform().getTextFile( new Fields( "line" ), outputPath, SinkMode.REPLACE );
//
//    flowDef.addTrap( pipeLower, trap );
//    flowDef.addTrap( pipeUpper, trap );
//    flowDef.addTrap( cross, trap );
//
//    Flow flow = getPlatform().getFlowConnector().connect( flowDef );
//
//    flow.complete();
//
//    validateLength( flow, 34 );
//
//    List<Tuple> sinkValues = getSinkAsList( flow );
//
//    assertTrue( sinkValues.contains( new Tuple( "1\ta\t1\tA" ) ) );
//    assertTrue( sinkValues.contains( new Tuple( "1\ta\t1\tB" ) ) );
//
//    List<Tuple> trapValues = asList( flow, trap );
//    assertTrue( trapValues.contains( new Tuple( "1\tb\t1\tB" ) ) );
//    }

  @Test
  public void testTrapEachAllSequence() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( inputFileApache );

    Pipe pipe = new Pipe( "map" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );

    // always fail
    pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );

    pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
    pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );

    Tap sink = getPlatform().getTabDelimitedFile( Fields.ALL, getOutputPath( "allseq/tap" ), SinkMode.REPLACE );
    Tap trap = getPlatform().getTabDelimitedFile( Fields.ALL, getOutputPath( "allseq/trap" ), SinkMode.REPLACE );

    Flow flow = getPlatform().getFlowConnector().connect( "trap test", source, sink, trap, pipe );

    flow.complete();

    validateLength( flow, 0, null );
    validateLength( flow.openTrap(), 10 );
    }

  @Test
  public void testTrapEveryAllAtStart() throws Exception
    {
    runTrapEveryAll( 0, "everystart", 8 );
    }

  @Test
  public void testTrapEveryAllAtAggregate() throws Exception
    {
    runTrapEveryAll( 1, "everyaggregate", 10 ); // fails at all values
    }

  @Test
  public void testTrapEveryAllAtComplete() throws Exception
    {
    runTrapEveryAll( 2, "everycomplete", 8 );
    }

  private void runTrapEveryAll( int failAt, String path, int failSize ) throws IOException
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( inputFileApache );

    Pipe pipe = new Pipe( "map" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );

    pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
    pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );
    pipe = new Every( pipe, new TestFailAggregator( new Fields( "fail" ), failAt ), new Fields( "ip", "count" ) );

    Tap sink = getPlatform().getTextFile( getOutputPath( path + "/tap" ), SinkMode.REPLACE );
    Tap trap = getPlatform().getTextFile( getOutputPath( path + "/trap" ), SinkMode.REPLACE );

    Map<String, Tap> traps = Cascades.tapsMap( "reduce", trap );

    Flow flow = getPlatform().getFlowConnector().connect( "trap test", source, sink, traps, pipe );

    flow.complete();

    validateLength( flow, 0, null );
    validateLength( flow.openTrap(), failSize );
    }

  /**
   * verify we can fail in randome places into the same trap
   *
   * @throws Exception
   */
  @Test
  public void testTrapEachAllChained() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( inputFileApache );

    Pipe pipe = new Pipe( "map" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );

    // always fail
    pipe = new Each( pipe, new TestFunction( new Fields( "test" ), new Tuple( 1 ), 1 ), Fields.ALL );
    pipe = new Each( pipe, new TestFunction( new Fields( "test2" ), new Tuple( 2 ), 2 ), Fields.ALL );
    pipe = new Each( pipe, new TestFunction( new Fields( "test3" ), new Tuple( 3 ), 3 ), Fields.ALL );
    pipe = new Each( pipe, new TestFunction( new Fields( "test4" ), new Tuple( 4 ), 4 ), Fields.ALL );

    Tap sink = getPlatform().getTextFile( getOutputPath( "allchain/tap-nondeterministic" ), SinkMode.REPLACE );
    Tap trap = getPlatform().getTextFile( getOutputPath( "allchain/trap-nondeterministic" ), SinkMode.REPLACE );

    Flow flow = getPlatform().getFlowConnector().connect( "trap test", source, sink, trap, pipe );

    flow.complete();

    validateLength( flow, 6, null );
    validateLength( flow.openTrap(), 4 );
    }

  /**
   * This test verifies traps can cross m/r and step boundaries.
   *
   * @throws Exception
   */
  @Test
  public void testTrapEachEveryAllChained() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( inputFileApache );

    Pipe pipe = new Pipe( "map" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );

    // always fail
    pipe = new Each( pipe, AssertionLevel.VALID, new AssertNotEquals( "75.185.76.245" ) );
    pipe = new GroupBy( pipe, new Fields( "ip" ) );
    pipe = new Each( pipe, AssertionLevel.VALID, new AssertNotEquals( "68.46.103.112" ) );
    pipe = new GroupBy( pipe, new Fields( "ip" ) );
    pipe = new Each( pipe, AssertionLevel.VALID, new AssertNotEquals( "76.197.151.0" ) );
    pipe = new Each( pipe, AssertionLevel.VALID, new AssertNotEquals( "12.215.138.88" ) );

    Tap sink = getPlatform().getTextFile( getOutputPath( "eacheverychain/tap" ), SinkMode.REPLACE );
    Tap trap = getPlatform().getTextFile( getOutputPath( "eacheverychain/trap" ), SinkMode.REPLACE );

    Flow flow = getPlatform().getFlowConnector().connect( "trap test", source, sink, trap, pipe );

    flow.complete();

    validateLength( flow, 6, null );
    validateLength( flow.openTrap(), 4 );
    }

  @Test
  public void testTrapToSequenceFile() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Tap source = getPlatform().getTextFile( inputFileApache );

    Pipe pipe = new Pipe( "map" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );

    // always fail
    pipe = new Each( pipe, new Fields( "ip" ), new TestFunction( new Fields( "test" ), null ), Fields.ALL );

    pipe = new GroupBy( "reduce", pipe, new Fields( "ip" ) );
    pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );

    Tap sink = getPlatform().getTextFile( getOutputPath( "seq/tap" ), SinkMode.REPLACE );
    Tap trap = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( "seq/trap" ), SinkMode.REPLACE );

    Flow flow = getPlatform().getFlowConnector().connect( "trap test", source, sink, trap, pipe );

    flow.complete();

    validateLength( flow, 0, null );
    validateLength( flow.openTrap(), 10 );
    }

  @Test
  public void testTrapTapSourceSink() throws Exception
    {
    getPlatform().copyFromLocal( inputFileApache );

    Scheme scheme = getPlatform().getTestFailScheme();

    Tap source = getPlatform().getTap( scheme, inputFileApache, SinkMode.KEEP );

    Pipe pipe = new Pipe( "map" );

    pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) );
    pipe = new GroupBy( pipe, new Fields( "ip" ) );
    pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );

    Tap sink = getPlatform().getTap( scheme, getOutputPath( "trapsourcesink/sink" ), SinkMode.REPLACE );

    Tap trap = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "trapsourcesink/trap" ), SinkMode.REPLACE );

    Map<Object, Object> properties = getProperties();

    // compensate for running in cluster mode
    getPlatform().setNumMapTasks( properties, 1 );
    getPlatform().setNumReduceTasks( properties, 1 );

    Flow flow = getPlatform().getFlowConnector( properties ).connect( "trap test", source, sink, trap, pipe );

    flow.complete();

    validateLength( flow.openTapForRead( getPlatform().getTextFile( sink.getIdentifier() ) ), 7 );
    validateLength( flow.openTrap(), 2, Pattern.compile( "bad data" ) ); // confirm the payload is written
    }

  @Test
  public void testTrapNoOperation() throws Exception
    {
    getPlatform().copyFromLocal( testDelimitedProblematic );

    Tap source = getPlatform().getDelimitedFile( new Fields( "id", "name" ).applyTypes( int.class, String.class ), ",", testDelimitedProblematic );
    Tap sink = getPlatform().getDelimitedFile( new Fields( "id", "name" ).applyTypes( int.class, String.class ), ",", getOutputPath( getTestName() ), SinkMode.REPLACE );
    Tap trap = getPlatform().getTextFile( getOutputPath( getTestName() + "_trap" ), SinkMode.REPLACE );

    Pipe pipe = new Pipe( "copy" );

    FlowDef flowDef = FlowDef.flowDef()
      .addSource( pipe, source )
      .addTailSink( pipe, sink )
      .addTrap( pipe, trap );

    Flow flow = getPlatform().getFlowConnector().connect( flowDef );

    flow.complete();

    validateLength( flow.openTrap(), 1 );
    }
  }
TOP

Related Classes of cascading.TrapPlatformTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.