Package cascading.pipe.assembly

Examples of cascading.pipe.assembly.Unique


    // create an OPERATION split the text into a token stream
    RegexSplitGenerator splitter = new RegexSplitGenerator( new Fields( "token" ), " " );
    Fields outputSelector = new Fields( "uid", "token" );
    tweetPipe = new Each( tweetPipe, new Fields( "text" ), splitter, outputSelector );

    tweetPipe = new Unique( tweetPipe, Fields.ALL );

    RegexFilter filter = new RegexFilter( "^\\S\\S+$" );
    tweetPipe = new Each( tweetPipe, new Fields( "token" ), filter );

    // create PIPEs for left join on the stop words
View Full Code Here


    // one branch counts the number of documents (D)
    Fields doc_id = new Fields( "doc_id" );
    Fields tally = new Fields( "tally" );
    Fields rhs_join = new Fields( "rhs_join" );
    Fields n_docs = new Fields( "n_docs" );
    Pipe dPipe = new Unique( "D", tokenPipe, doc_id );
    dPipe = new Each( dPipe, new Insert( tally, 1 ), Fields.ALL );
    dPipe = new Each( dPipe, new Insert( rhs_join, 1 ), Fields.ALL );
    dPipe = new SumBy( dPipe, rhs_join, tally, n_docs, long.class );

    // one branch tallies the token counts for document frequency (DF)
    Pipe dfPipe = new Unique( "DF", tokenPipe, Fields.ALL );
    Fields df_count = new Fields( "df_count" );
    dfPipe = new CountBy( dfPipe, token, df_count );

    Fields df_token = new Fields( "df_token" );
    Fields lhs_join = new Fields( "lhs_join" );
View Full Code Here

    Fields outgoingNamedFields = RelUtil.createTypedFieldsFor( this, false );

    // assumption here is if aggCalls is empty, we are performing a DISTINCT on the group set
    if( getAggCallList().isEmpty() )
      {
      Pipe current = new Unique( branch.current, outgoingNamedFields );

      current = stack.addDebug( this, current );

      return new Branch( current, branch );
      }
View Full Code Here

      Fields aggResultFields = makeFieldsFor( aggCall );

      Pipe current = previous;

      current = new Retain( current, uniqueFields );
      current = new Unique( aggResultFields.toString(), current, uniqueFields, Unique.Include.NO_NULLS );

      current = stack.addDebug( this, current );

      if( aggregationName.equals( "COUNT" ) )
        aggregates.add( new CountBy( current, groupFields, argFields, aggResultFields, CountBy.Include.NO_NULLS ) );
View Full Code Here

    String name = stack.getNameFor( GroupBy.class, pipes );

    Pipe pipe;

    if( !all )
      pipe = new Unique( name, pipes, Fields.ALL );
    else
      pipe = new GroupBy( name, pipes, Fields.ALL );

    pipe = stack.addDebug( this, pipe );
View Full Code Here

TOP

Related Classes of cascading.pipe.assembly.Unique

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.