Examples of cascading.operation.aggregator.Count

cascading.operation.aggregator.Count
Class Count is an {@link Aggregator} that calculates the number of items in the current group.
Note the resulting value for count is always a long. So any comparisons should be against a long value.

        pipe = new Each(pipe, new FilterNotNull());
        pipe = new Each(pipe, AssertionLevel.STRICT, new AssertSizeLessThan(5));
        // can't select when using unknown
        //pipe = new Each(pipe, new Fields("name"), AssertionLevel.STRICT, new AssertNotNull());
        pipe = new GroupBy(pipe);
        pipe = new Every(pipe, new Count());


        // print out
        Tap out = new OutputStreamTap(new TextLine(), OUT);
        build(cfg(), in, out, pipe);
    }

View Full Code Here

        Tap in = new EsTap("cascading-local/artists", query, new Fields("url", "name"));
        Pipe pipe = new Pipe("copy");
        pipe = new Each(pipe, AssertionLevel.STRICT, new AssertSizeEquals(2));
        pipe = new Each(pipe, AssertionLevel.STRICT, new AssertNotNull());
        pipe = new GroupBy(pipe);
        pipe = new Every(pipe, new Count());


        // print out
        Tap out = new OutputStreamTap(new TextLine(), OUT);
        build(cfg(), in, out, pipe);
    }

View Full Code Here

    public void testReadFromESAliasedField() throws Exception {
        Tap in = new EsTap("cascading-local/alias", query, new Fields("address"));
        Pipe pipe = new Pipe("copy");
        pipe = new Each(pipe, AssertionLevel.STRICT, new AssertNotNull());
        pipe = new GroupBy(pipe);
        pipe = new Every(pipe, new Count());


        // print out
        Tap out = new OutputStreamTap(new TextLine(), OUT);
        build(cfg(), in, out, pipe);
    }

View Full Code Here

    public void testReadFromESWithFieldAlias() throws Exception {
        Tap in = new EsTap("cascading-local/alias", query, new Fields("url"));
        Pipe pipe = new Pipe("copy");
        pipe = new Each(pipe, AssertionLevel.STRICT, new AssertNotNull());
        pipe = new GroupBy(pipe);
        pipe = new Every(pipe, new Count());


        // print out
        // print out
        Tap out = new OutputStreamTap(new TextLine(), OUT);
        Properties cfg = cfg();

View Full Code Here

        pipe = new Each(pipe, new FilterNotNull());
        pipe = new Each(pipe, AssertionLevel.STRICT, new AssertSizeLessThan(5));
        // can't select when using unknown
        //pipe = new Each(pipe, new Fields("name"), AssertionLevel.STRICT, new AssertNotNull());
        pipe = new GroupBy(pipe);
        pipe = new Every(pipe, new Count());


        // print out
        Tap out = new HadoopPrintStreamTap(Stream.NULL);
        build(cfg(), in, out, pipe);
    }

View Full Code Here

        pipe = new Each(pipe, new FilterNotNull());
        pipe = new Each(pipe, AssertionLevel.STRICT, new AssertSizeLessThan(5));
        // can't select when using unknown
        //pipe = new Each(pipe, new Fields("name"), AssertionLevel.STRICT, new AssertNotNull());
        pipe = new GroupBy(pipe);
        pipe = new Every(pipe, new Count());


        // print out
        Tap out = new OutputStreamTap(new TextLine(), OUT);
        build(cfg(), in, out, pipe);
    }

View Full Code Here

    stop words -- based on an R script
    */


    Pipe tokenPipe = new Pipe( "token", joinPipe ); // name branch
    tokenPipe = new GroupBy( tokenPipe, new Fields( "token" ) );
    tokenPipe = new Every( tokenPipe, Fields.ALL, new Count(), Fields.ALL );


    /*
    flow part #3
    generate an inverted index for ((uid1,uid2), token) to avoid having to perform
    a cross-product, which would impose a bottleneck in the parallelism
    */


    Pipe invertPipe = new Pipe( "inverted index", joinPipe );
    invertPipe = new CoGroup( invertPipe, new Fields( "token" ), 1, new Fields( "uid1", "ignore", "uid2", "token" ) );


    Fields filterArguments = new Fields( "uid1", "uid2" );
    String uidFilter = "uid1.compareToIgnoreCase( uid2 ) >= 0";
    invertPipe = new Each( invertPipe, filterArguments, new ExpressionFilter( uidFilter, String.class ) );
    Fields ignore = new Fields( "ignore" );
    invertPipe = new Discard( invertPipe, ignore );


    /*
    flow part #4
    count the number of tokens in common for each uid pair and apply a threshold
    */


    Pipe commonPipe = new GroupBy( new Pipe( "uid common", invertPipe ), new Fields( "uid1", "uid2" ) );
    commonPipe = new Every( commonPipe, Fields.ALL, new Count( new Fields( "common" ) ), Fields.ALL );


    String commonFilter = String.format( "common < %d", MIN_COMMON_TOKENS );
    commonPipe = new Each( commonPipe, new Fields( "common" ), new ExpressionFilter( commonFilter, Integer.TYPE ) );


    /*
    flow part #5
    count the number of tokens overall for each uid, then join to calculate
    the vector length for uid1
    */


    Fields tokenCount = new Fields( "token_count" );
    Pipe countPipe = new GroupBy( "count", joinPipe, new Fields( "uid" ) );
    countPipe = new Every( countPipe, Fields.ALL, new Count( tokenCount ), Fields.ALL );


    joinPipe = new CoGroup( countPipe, new Fields( "uid" ), commonPipe, new Fields( "uid1" ) );
    joinPipe = new Pipe( "common", joinPipe );
    joinPipe = new Discard( joinPipe, new Fields( "uid" ) );

View Full Code Here

    pipeline = new GroupBy(pipeline, new Fields("resource"));


    // For every Tuple group
    // count the number of occurrences of "word" and store result in
    // a field named "count"
    Aggregator count = new Count(new Fields("resource"));
    pipeline = new Every(pipeline, count);




    // create a SINK tap to write to the default filesystem
    // by default, TextLine writes all fields out

View Full Code Here

    Pipe tsPipe = new Each( "arrival rate", new Fields( "time" ), dateParser, Fields.RESULTS );


    // name the per second assembly and split on tsPipe
    Pipe tsCountPipe = new Pipe( "tsCount", tsPipe );
    tsCountPipe = new GroupBy( tsCountPipe, new Fields( "ts" ) );
    tsCountPipe = new Every( tsCountPipe, Fields.GROUP, new Count() );


    // apply expression to create a timestamp with 'minute' granularity
    // declares field "tm"
    Pipe tmPipe = new Each( tsPipe, new ExpressionFunction( new Fields( "tm" ), "ts - (ts % (60 * 1000))", long.class ) );


    // name the per minute assembly and split on tmPipe
    Pipe tmCountPipe = new Pipe( "tmCount", tmPipe );
    tmCountPipe = new GroupBy( tmCountPipe, new Fields( "tm" ) );
    tmCountPipe = new Every( tmCountPipe, Fields.GROUP, new Count() );


    // create taps to write the results the default filesystem, using the given fields
    Tap tsSinkTap = new Hfs( new TextLine(), arrivalRateSecPath );
    Tap tmSinkTap = new Hfs( new TextLine(), arrivalRateMinPath );

View Full Code Here

      RegexGenerator wordGenerator = new RegexGenerator( new Fields( "word" ), "(?<!\\pL)(?=\\pL)[^ ]*(?<=\\pL)(?!\\pL)" );
      pipe = new Each( pipe, new Fields( "words" ), wordGenerator, new Fields( "url", "word" ) );


      // group on "url"
      Pipe urlCountPipe = new GroupBy( sinkUrlName, pipe, new Fields( "url", "word" ) );
      urlCountPipe = new Every( urlCountPipe, new Fields( "url", "word" ), new Count(), new Fields( "url", "word", "count" ) );


      // group on "word"
      Pipe wordCountPipe = new GroupBy( sinkWordName, pipe, new Fields( "word" ) );
      wordCountPipe = new Every( wordCountPipe, new Fields( "word" ), new Count(), new Fields( "word", "count" ) );


      setTails( urlCountPipe, wordCountPipe );
      }

View Full Code Here

0 1 2 3 4 5 6 7

TOP

Related Classes of cascading.operation.aggregator.Count

cascading.BasicPipesPlatformTest

cascading.BasicTrapPlatformTest

cascading.BufferPipesPlatformTest

cascading.CoGroupFieldedPipesPlatformTest

cascading.FieldedPipesPlatformTest

cascading.flow.AssemblyPlannerPlatformTest

cascading.flow.FlowStrategiesPlatformTest

cascading.flow.hadoop.BuildJobsHadoopPlatformTest

cascading.flow.ResolverExceptionsPlatformTest

cascading.JoinFieldedPipesPlatformTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.