Examples of cascading.pipe.Pipe

cascading.pipe.Pipe
Class Pipe is used to name branches in pipe assemblies, and as a base class for core processing model types, specifically {@link Each}, {@link Every}, {@link GroupBy}, {@link CoGroup}, {@link Merge}, {@link HashJoin}, and {@link SubAssembly}.
Pipes are chained together through their constructors.
To effect a split in the pipe, simply pass a Pipe instance to two or more constructors of subsequent Pipe instances.
A join can be achieved by passing two or more Pipe instances to a {@link CoGroup} or {@link HashJoin} pipe.
A merge can be achieved by passing two or more Pipe instances to a {@link GroupBy} or {@link Merge} pipe. @see Each @see Every @see GroupBy @see Merge @see CoGroup @see HashJoin @see SubAssembly

      throw new PlannerException( "assembly can only accept one inbound branch, got: " + context.getTails().size() );


    if( context.getTails().size() == 1 && getHeadName() != null )
      throw new PlannerException( "cannot specify a head name if there are incoming branches" );


    Pipe tail = null;


    String headName = findHeadName( context );


    if( context.getTails().size() == 0 && headName != null )
      tail = new Pipe( headName );
    else if( context.getTails().size() == 1 )
      tail = context.getTails().get( 0 );


    tail = applyCoercion( tail, context.getFlow().getSource( headName ) );


    tail = resolveAssembly( tail ); // branch name is applied


    tail = new Pipe( findTailName( context ), tail ); // bind the tail to the sink tailName


    return Arrays.asList( tail );
    }

View Full Code Here

   * @param pipe the pipe
   * @return the pipe
   */
  public Pipe resolveAssembly( Pipe pipe )
    {
    Pipe tail;


    if( pipe == null )
      tail = new Pipe( getBranchName() );
    else
      tail = new Pipe( getBranchName(), pipe );


    if( getRetainIncomingFields() != null )
      tail = new Retain( tail, getRetainIncomingFields() );


    for( Model model : getPMMLModel().getModels() )

View Full Code Here

    PMMLPlanner planner = new PMMLPlanner()
      .setPMMLInput( new FileInputStream( file ) )
      .addDataTypes( predictorFields )
      .setDefaultPredictedField( new Fields( "predict", String.class ) );


    Pipe pipe = new Pipe( "head" );


    Fields discardFields = trainingFields.appendSelector( predictorFields );


    if( !discardFields.isNone() )
      pipe = new Discard( pipe, discardFields );

View Full Code Here

      {
      LOG.debug( "creating: {}", function.getSpec() );
      LOG.debug( "input: {}, output: {}", inputFields, declaredFields );
      }


    tail = new Pipe( "model-" + ordinal, tail );


    return new Each( tail, inputFields, function, Fields.ALL );
    }

View Full Code Here

    enableLogging( "cascading.pattern", "debug" );
    }


  protected void performTest( String inputData, Fields predictedFields, Fields expectedFields, EnsembleSpec<TreeSpec> ensembleSpec ) throws IOException
    {
    Pipe pipe = new Pipe( "head" );
    pipe = new Discard( pipe, predictedFields );
    pipe = new ParallelEnsembleAssembly( pipe, ensembleSpec );
    pipe = new Pipe( "tail", pipe );


    Tap source = getPlatform().getDelimitedFile( expectedFields.append( predictedFields ), true, ",", "\"", DATA_PATH + inputData, SinkMode.KEEP );
    Tap sink = getPlatform().getDelimitedFile( Fields.ALL, true, ",", "\"", getResultPath(), SinkMode.REPLACE );


    FlowDef flowDef = FlowDef.flowDef()

View Full Code Here


    // CREATE NEW TABLE FROM SOURCE


    Tap source = new Lfs( new TextLine(), inputFile );


    Pipe parsePipe = new Each( "insert", new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ), "\\s" ) );


    String url = "jdbc:hsqldb:hsql://localhost/testing";
    String driver = "org.hsqldb.jdbcDriver";
    String tableName = "testingtable";
    String[] columnNames = {"num", "lower", "upper"};
    String[] columnDefs = {"VARCHAR(100) NOT NULL", "VARCHAR(100) NOT NULL", "VARCHAR(100) NOT NULL"};
    String[] primaryKeys = {"num", "lower"};
    TableDesc tableDesc = new TableDesc( tableName, columnNames, columnDefs, primaryKeys );


    Tap replaceTap = new JDBCTap( url, driver, tableDesc, new JDBCScheme( columnNames ), SinkMode.REPLACE );


    Flow parseFlow = new FlowConnector( getProperties() ).connect( source, replaceTap, parsePipe );


    parseFlow.complete();


    verifySink( parseFlow, 13 );


    // READ DATA FROM TABLE INTO TEXT FILE


    // create flow to read from hbase and save to local file
    Tap sink = new Lfs( new TextLine(), "build/test/jdbc", SinkMode.REPLACE );


    Pipe copyPipe = new Each( "read", new Identity() );


    Flow copyFlow = new FlowConnector( getProperties() ).connect( replaceTap, sink, copyPipe );


    copyFlow.complete();


    verifySink( copyFlow, 13 );


    // READ DATA FROM TEXT FILE AND UPDATE TABLE


    JDBCScheme jdbcScheme = new JDBCScheme( columnNames, null, new String[]{"num", "lower"} );
    Tap updateTap = new JDBCTap( url, driver, tableDesc, jdbcScheme, SinkMode.APPEND );


    Flow updateFlow = new FlowConnector( getProperties() ).connect( sink, updateTap, parsePipe );


    updateFlow.complete();


    verifySink( updateFlow, 13 );


    // READ DATA FROM TABLE INTO TEXT FILE, USING CUSTOM QUERY


    Tap sourceTap = new JDBCTap( url, driver, new JDBCScheme( columnNames, "select num, lower, upper from testingtable as testingtable", "select count(*) from testingtable" ) );


    Pipe readPipe = new Each( "read", new Identity() );


    Flow readFlow = new FlowConnector( getProperties() ).connect( sourceTap, sink, readPipe );


    readFlow.complete();

View Full Code Here

    // CREATE NEW TABLE FROM SOURCE


    Tap source = new Lfs( new TextLine(), inputFile );


    Fields columnFields = new Fields( "num", "lower", "upper" );
    Pipe parsePipe = new Each( "insert", new Fields( "line" ), new RegexSplitter( columnFields, "\\s" ) );


    String url = "jdbc:hsqldb:hsql://localhost/testing";
    String driver = "org.hsqldb.jdbcDriver";
    String tableName = "testingtablealias";
    String[] columnNames = {"db_num", "db_lower", "db_upper"};
    String[] columnDefs = {"VARCHAR(100) NOT NULL", "VARCHAR(100) NOT NULL", "VARCHAR(100) NOT NULL"};
    String[] primaryKeys = {"db_num", "db_lower"};
    TableDesc tableDesc = new TableDesc( tableName, columnNames, columnDefs, primaryKeys );


    Tap replaceTap = new JDBCTap( url, driver, tableDesc, new JDBCScheme( columnFields, columnNames ), SinkMode.REPLACE );


    Flow parseFlow = new FlowConnector( getProperties() ).connect( source, replaceTap, parsePipe );


    parseFlow.complete();


    verifySink( parseFlow, 13 );


    // READ DATA FROM TABLE INTO TEXT FILE


    // create flow to read from hbase and save to local file
    Tap sink = new Lfs( new TextLine(), "build/test/jdbc", SinkMode.REPLACE );


    Pipe copyPipe = new Each( "read", new Identity() );


    Flow copyFlow = new FlowConnector( getProperties() ).connect( replaceTap, sink, copyPipe );


    copyFlow.complete();


    verifySink( copyFlow, 13 );


    // READ DATA FROM TEXT FILE AND UPDATE TABLE


    Fields updateByFields = new Fields( "num", "lower" );
    String[] updateBy = {"db_num", "db_lower"};
    JDBCScheme jdbcScheme = new JDBCScheme( columnFields, columnNames, null, updateByFields, updateBy );
    Tap updateTap = new JDBCTap( url, driver, tableDesc, jdbcScheme, SinkMode.APPEND );


    Flow updateFlow = new FlowConnector( getProperties() ).connect( sink, updateTap, parsePipe );


    updateFlow.complete();


    verifySink( updateFlow, 13 );


    // READ DATA FROM TABLE INTO TEXT FILE, USING CUSTOM QUERY


    Tap sourceTap = new JDBCTap( url, driver, new JDBCScheme( columnFields, columnNames, "select db_num, db_lower, db_upper from testingtablealias as testingtablealias", "select count(*) from testingtablealias" ) );


    Pipe readPipe = new Each( "read", new Identity() );


    Flow readFlow = new FlowConnector( getProperties() ).connect( sourceTap, sink, readPipe );


    readFlow.complete();

View Full Code Here

    RegexParser parser = new RegexParser(apacheFields, apacheRegex, allGroups);


    // create the import pipe element, with the name 'import', and with the input argument named "line"
    // replace the incoming tuple with the parser results
    // "line" -> parser -> "ts"
    Pipe pipeline = new Each("import", new Fields("line"), parser, Fields.RESULTS);




    // group the Tuple stream by the "word" value
    pipeline = new GroupBy(pipeline, new Fields("resource"));

View Full Code Here

    // declares: "time", "method", "event", "status", "size"
    Fields apacheFields = new Fields( "ip", "time", "method", "event", "status", "size" );
    String apacheRegex = "^([^ ]*) +[^ ]* +[^ ]* +\\[([^]]*)\\] +\\\"([^ ]*) ([^ ]*) [^ ]*\\\" ([^ ]*) ([^ ]*).*$";
    int[] apacheGroups = {1, 2, 3, 4, 5, 6};
    RegexParser parser = new RegexParser( apacheFields, apacheRegex, apacheGroups );
    Pipe importPipe = new Each( "import", new Fields( "line" ), parser );


    // create tap to read a resource from the local file system, if not an url for an external resource
    // Lfs allows for relative paths
    Tap logTap =
      inputPath.matches( "^[^:]+://.*" ) ? new Hfs( new TextLine(), inputPath ) : new Lfs( new TextLine(), inputPath );
    // create a tap to read/write from the default filesystem
    Tap parsedLogTap = new Hfs( apacheFields, logsPath );


    // connect the assembly to source and sink taps
    Flow importLogFlow = flowConnector.connect( logTap, parsedLogTap, importPipe );


    // create an assembly to parse out the time field into a timestamp
    // then count the number of requests per second and per minute


    // apply a text parser to create a timestamp with 'second' granularity
    // declares field "ts"
    DateParser dateParser = new DateParser( new Fields( "ts" ), "dd/MMM/yyyy:HH:mm:ss Z" );
    Pipe tsPipe = new Each( "arrival rate", new Fields( "time" ), dateParser, Fields.RESULTS );


    // name the per second assembly and split on tsPipe
    Pipe tsCountPipe = new Pipe( "tsCount", tsPipe );
    tsCountPipe = new GroupBy( tsCountPipe, new Fields( "ts" ) );
    tsCountPipe = new Every( tsCountPipe, Fields.GROUP, new Count() );


    // apply expression to create a timestamp with 'minute' granularity
    // declares field "tm"
    Pipe tmPipe = new Each( tsPipe, new ExpressionFunction( new Fields( "tm" ), "ts - (ts % (60 * 1000))", long.class ) );


    // name the per minute assembly and split on tmPipe
    Pipe tmCountPipe = new Pipe( "tmCount", tmPipe );
    tmCountPipe = new GroupBy( tmCountPipe, new Fields( "tm" ) );
    tmCountPipe = new Every( tmCountPipe, Fields.GROUP, new Count() );


    // create taps to write the results the default filesystem, using the given fields
    Tap tsSinkTap = new Hfs( new TextLine(), arrivalRateSecPath );

View Full Code Here

    // the imported file will be a native Hadoop sequence file with the fields "page" and "url"
    // note this examples stores crawl pages as a tabbed file, with the first field being the "url"
    // and the second being the "raw" document that had all new line chars ("\n") converted to the text ":nl:".


    // a predefined pipe assembly that returns fields named "url" and "page"
    Pipe importPipe = new ImportCrawlDataAssembly( "import pipe" );


    // create the tap instances
    Tap localPagesSource = new Lfs( new TextLine(), inputPath );
    Tap importedPages = new Hfs( new SequenceFile( new Fields( "url", "page" ) ), pagesPath );


    // connect the pipe assembly to the tap instances
    Flow importPagesFlow = flowConnector.connect( "import pages", localPagesSource, importedPages, importPipe );


    // a predefined pipe assembly that splits the stream into two named "url pipe" and "word pipe"
    // these pipes could be retrieved via the getTails() method and added to new pipe instances
    SubAssembly wordCountPipe = new WordCountSplitAssembly( "wordcount pipe", "url pipe", "word pipe" );


    // create Hadoop sequence files to store the results of the counts
    Tap sinkUrl = new Hfs( new SequenceFile( new Fields( "url", "word", "count" ) ), urlsPath );
    Tap sinkWord = new Hfs( new SequenceFile( new Fields( "word", "count" ) ), wordsPath );


    // convenience method to bind multiple pipes and taps
    Map<String, Tap> sinks = Cascades.tapsMap( new String[]{"url pipe", "word pipe"}, Tap.taps( sinkUrl, sinkWord ) );


    // wordCountPipe will be recognized as an assembly and handled appropriately
    Flow count = flowConnector.connect( importedPages, sinks, wordCountPipe );


    // create an assembly to export the Hadoop sequence file to local text files
    Pipe exportPipe = new Each( "export pipe", new Identity() );


    Tap localSinkUrl = new Lfs( new TextLine(), localUrlsPath );
    Tap localSinkWord = new Lfs( new TextLine(), localWordsPath );


    // connect up both sinks using the same exportPipe assembly

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of cascading.pipe.Pipe

bixo.fetcher.FetcherTest

bixo.pipes.AbstractFetchPipeTest

cascading.BasicPipesPlatformTest

cascading.BasicTrapPlatformTest

cascading.BufferPipesPlatformTest

cascading.cascade.CascadePlatformTest

cascading.cascade.hadoop.RiffleCascadePlatformTest

cascading.cascade.ParallelCascadePlatformTest

cascading.CoGroupFieldedPipesPlatformTest

cascading.DistanceUseCasePlatformTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.