Package cascading.scheme.hadoop

Examples of cascading.scheme.hadoop.TextLine


  // test is not executed, just guarantees flow is run locally

  @Test
  public void testLocalModeSink() throws Exception
    {
    Tap source = new Hfs( new TextLine(), "input/path" );
    Tap sink = new Lfs( new TextLine(), "output/path", SinkMode.REPLACE );

    Pipe pipe = new Pipe( "test" );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
View Full Code Here


  public void testHfsBracketAsterisk() throws Exception
    {
    getPlatform().copyFromLocal( inputFileLower );
    getPlatform().copyFromLocal( inputFileUpper );

    Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "{*}" );

    assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) );

    TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() );
    assertTrue( iterator.hasNext() );
    iterator.close();

    try
      {
      Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" );
      iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() );
      fail();
      }
    catch( IOException exception )
      {
View Full Code Here

    getPlatform().copyFromLocal( inputFileLower );
    getPlatform().copyFromLocal( inputFileUpper );

    Configuration jobConf = ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();

    Hfs tap = new Hfs( new TextLine( new Fields( "offset", "line" ) ), getOutputPath( "multifiles" ) );

    tap.deleteResource( getPlatform().getFlowProcess() );

    assertEqualsSize( "missing", 0, tap.getChildIdentifiers( jobConf ) );
    assertEqualsSize( "missing", 0, tap.getChildIdentifiers( jobConf, 2, true ) );
    assertEqualsSize( "missing", 0, tap.getChildIdentifiers( jobConf, 2, false ) );
    assertEqualsSize( "missing", 0, tap.getChildIdentifiers( jobConf, 1, true ) );
    assertEqualsSize( "missing", 0, tap.getChildIdentifiers( jobConf, 1, false ) );
    assertEqualsSize( "missing", 0, tap.getChildIdentifiers( jobConf, 0, true ) );
    assertEqualsSize( "missing", 0, tap.getChildIdentifiers( jobConf, 0, false ) );

    tap.createResource( getPlatform().getFlowProcess() );

    assertEqualsSize( "no children", 0, tap.getChildIdentifiers( jobConf ) );
    assertEqualsSize( "no children", 0, tap.getChildIdentifiers( jobConf, 2, true ) );
    assertEqualsSize( "no children", 0, tap.getChildIdentifiers( jobConf, 2, false ) );
    assertEqualsSize( "no children", 0, tap.getChildIdentifiers( jobConf, 1, true ) );
    assertEqualsSize( "no children", 0, tap.getChildIdentifiers( jobConf, 1, false ) );
    assertEqualsSize( "no children", 1, tap.getChildIdentifiers( jobConf, 0, true ) );
    assertEqualsSize( "no children", 1, tap.getChildIdentifiers( jobConf, 0, false ) );

    writeFileTo( "multifiles/A" );
    writeFileTo( "multifiles/B" );

    assertEqualsSize( "children", 2, tap.getChildIdentifiers( jobConf ) );
    assertEqualsSize( "children", 2, tap.getChildIdentifiers( jobConf, 2, true ) );
    assertEqualsSize( "children", 2, tap.getChildIdentifiers( jobConf, 2, false ) );
    assertEqualsSize( "children", 2, tap.getChildIdentifiers( jobConf, 1, true ) );
    assertEqualsSize( "children", 2, tap.getChildIdentifiers( jobConf, 1, false ) );
    assertEqualsSize( "children", 1, tap.getChildIdentifiers( jobConf, 0, true ) );
    assertEqualsSize( "children", 1, tap.getChildIdentifiers( jobConf, 0, false ) );

    tap = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "/" );

    assertEqualsSize( "root", -1, tap.getChildIdentifiers( jobConf ) );
    assertEqualsSize( "root", -1, tap.getChildIdentifiers( jobConf, 2, true ) );
    assertEqualsSize( "root", -1, tap.getChildIdentifiers( jobConf, 2, false ) );
    assertEqualsSize( "root", -1, tap.getChildIdentifiers( jobConf, 1, true ) );
    assertEqualsSize( "root", -1, tap.getChildIdentifiers( jobConf, 1, false ) );
    assertEqualsSize( "root", 1, tap.getChildIdentifiers( jobConf, 0, true ) );
    assertEqualsSize( "root", 1, tap.getChildIdentifiers( jobConf, 0, false ) );

    tap = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "./" );

    assertEqualsSize( "current", -1, tap.getChildIdentifiers( jobConf ) );
    assertEqualsSize( "current", -1, tap.getChildIdentifiers( jobConf, 2, true ) );
    assertEqualsSize( "current", -1, tap.getChildIdentifiers( jobConf, 2, false ) );
    assertEqualsSize( "current", -1, tap.getChildIdentifiers( jobConf, 1, true ) );
View Full Code Here

  public void testNotLocalMode() throws Exception
    {
    if( !getPlatform().isUseCluster() )
      return;

    Tap source = new Hfs( new TextLine(), "input/path" );
    Tap sink = new Hfs( new TextLine(), "output/path", true );

    Pipe pipe = new Pipe( "test" );

    Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe );
View Full Code Here

    assertEquals( expected, actual.length );
    }

  private void writeFileTo( String path ) throws IOException
    {
    Hfs tap = new Hfs( new TextLine( new Fields( "offset", "line" ) ), getOutputPath( path ) );

    TupleEntryCollector collector = tap.openForWrite( getPlatform().getFlowProcess() );

    collector.add( new Tuple( 1, "1" ) );
View Full Code Here

      return;

    getPlatform().copyFromLocal( inputFileLower );
    getPlatform().copyFromLocal( inputFileUpper );

    Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLower );
    Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileUpper );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Tap sink = new Hfs( new TextLine(), getOutputPath( "stopped" ), true );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );

    pipeLower = new GroupBy( pipeLower, new Fields( "num" ) );
View Full Code Here

  @Test
  public void testFailedSerialization() throws Exception
    {
    getPlatform().copyFromLocal( inputFileLower );

    Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLower );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Tap sink = new Hfs( new TextLine(), getOutputPath( "badserialization" ), true );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );

    pipeLower = new Each( pipeLower, new Fields( "num" ), new BadFilter() );
View Full Code Here

  @Test
  public void testStartStopRace() throws Exception
    {
    getPlatform().copyFromLocal( inputFileLower );

    Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLower );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Tap sink = new Hfs( new TextLine(), getOutputPath( "startstop" ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );

    pipeLower = new GroupBy( pipeLower, new Fields( "num" ) );
View Full Code Here

  private void failingListenerTest( FailingFlowListener.OnFail onFail ) throws Exception
    {
    getPlatform().copyFromLocal( inputFileLower );
    getPlatform().copyFromLocal( inputFileUpper );

    Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLower );
    Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileUpper );

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Tap sink = new Hfs( new TextLine(), getOutputPath( onFail + "/stopped" ), true );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );

    if( onFail == FailingFlowListener.OnFail.THROWABLE )
      {
View Full Code Here

    }

  @Test
  public void testFlowID() throws Exception
    {
    Tap source = new Lfs( new TextLine(), "input/path" );
    Tap sink = new Hfs( new TextLine(), "output/path", true );

    Pipe pipe = new Pipe( "test" );

    Map<Object, Object> props = getProperties();
    Flow flow1 = getPlatform().getFlowConnector( props ).connect( source, sink, pipe );
View Full Code Here

TOP

Related Classes of cascading.scheme.hadoop.TextLine

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.