Package cascading.pipe.assembly

Examples of cascading.pipe.assembly.CountBy


    tokenPipe = new Retain( tokenPipe, fieldSelector );

    // one branch of the flow tallies the token counts for term frequency (TF)
    Pipe tfPipe = new Pipe( "TF", tokenPipe );
    Fields tf_count = new Fields( "tf_count" );
    tfPipe = new CountBy( tfPipe, new Fields( "doc_id", "token" ), tf_count );

    Fields tf_token = new Fields( "tf_token" );
    tfPipe = new Rename( tfPipe, token, tf_token );

    // one branch counts the number of documents (D)
    Fields doc_id = new Fields( "doc_id" );
    Fields tally = new Fields( "tally" );
    Fields rhs_join = new Fields( "rhs_join" );
    Fields n_docs = new Fields( "n_docs" );
    Pipe dPipe = new Unique( "D", tokenPipe, doc_id );
    dPipe = new Each( dPipe, new Insert( tally, 1 ), Fields.ALL );
    dPipe = new Each( dPipe, new Insert( rhs_join, 1 ), Fields.ALL );
    dPipe = new SumBy( dPipe, rhs_join, tally, n_docs, long.class );

    // one branch tallies the token counts for document frequency (DF)
    Pipe dfPipe = new Unique( "DF", tokenPipe, Fields.ALL );
    Fields df_count = new Fields( "df_count" );
    dfPipe = new CountBy( dfPipe, token, df_count );

    Fields df_token = new Fields( "df_token" );
    Fields lhs_join = new Fields( "lhs_join" );
    dfPipe = new Rename( dfPipe, token, df_token );
    dfPipe = new Each( dfPipe, new Insert( lhs_join, 1 ), Fields.ALL );
View Full Code Here


    assembly = new AggregateBy(
      assembly,
      groupingFields,
      new SumBy( new Fields( "size" ), new Fields( "size" ), double.class ),
      new SumBy( new Fields( "size" ), new Fields( "size2" ), double.class ),
      new CountBy( new Fields( "sizes" ) ), new CountBy( new Fields( "sizes2" ) )

    );

    assembly2 = new AggregateBy(
      assembly2,
      groupingFields,
      new SumBy( new Fields( "size" ), new Fields( "size" ), double.class ),
      new SumBy( new Fields( "size" ), new Fields( "size2" ), double.class ),
      new CountBy( new Fields( "sizes" ) ), new CountBy( new Fields( "sizes2" ) )

    );

    Map<String, Tap> sinks = new HashMap<String, Tap>();
    sinks.put( "assembly", sink );
View Full Code Here

      current = new Unique( aggResultFields.toString(), current, uniqueFields, Unique.Include.NO_NULLS );

      current = stack.addDebug( this, current );

      if( aggregationName.equals( "COUNT" ) )
        aggregates.add( new CountBy( current, groupFields, argFields, aggResultFields, CountBy.Include.NO_NULLS ) );
      else if( aggregationName.equals( "SUM" ) )
        aggregates.add( new SumBy( current, groupFields, argFields, aggResultFields ) );
      else if( aggregationName.equals( "MIN" ) )
        aggregates.add( new MinBy( current, groupFields, argFields, aggResultFields ) );
      else if( aggregationName.equals( "MAX" ) )
View Full Code Here

        argFields = Fields.ALL;

      Fields aggResultFields = makeFieldsFor( aggCall );

      if( aggregationName.equals( "COUNT" ) )
        aggregates.add( new CountBy( argFields, aggResultFields, CountBy.Include.NO_NULLS ) );
      else if( aggregationName.equals( "SUM" ) )
        aggregates.add( new SumBy( argFields, aggResultFields ) );
      else if( aggregationName.equals( "MIN" ) )
        aggregates.add( new MinBy( argFields, aggResultFields ) );
      else if( aggregationName.equals( "MAX" ) )
View Full Code Here

TOP

Related Classes of cascading.pipe.assembly.CountBy

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.