Examples of cascading.operation.expression.ExpressionFilter

cascading.operation.expression.ExpressionFilter
anino.net/">Janino compiler.
Specifically this filter uses the {@link ExpressionEvaluator}, thus the syntax from that class is inherited here.
An expression may use field names directly as parameters in the expression, or field positions with the syntax "$n", where n is an integer.
Given an argument tuple with the fields "a" and "b", the following expression returns true:
a + b == $0 + $1

Further, the types of the tuple elements will be coerced into the given parameterTypes. Regardless of the actual tuple element values, they will be converted to the types expected by the expression.
Field names used in the expression should be valid Java variable names; for example, '+' or '-' are not allowed. Also the use of a field name that begins with an upper-case character is likely to fail and should be avoided.

            // Now we want to sum the scores for each user, which is another grouping/summing.
            analysisPipe = new GroupBy(analysisPipe, new Fields(FieldNames.EMAIL_ADDRESS));
            analysisPipe = new Every(analysisPipe, new SumScoresBuffer(), Fields.RESULTS);
            
            // Let's filter out anybody with an uninteresting score.
            ExpressionFilter filter = new ExpressionFilter(String.format("%s <= 0.0", FieldNames.SUMMED_SCORE), Double.class);
            analysisPipe = new Each(analysisPipe, filter);
            
            // And let's sort in reverse order (high to low score)
            analysisPipe = new GroupBy(analysisPipe, new Fields(FieldNames.SUMMED_SCORE), true);

View Full Code Here

            // Now we want to sum the scores for each user, which is another grouping/summing.
            pipe = new GroupBy(pipe, new Fields(FieldNames.EMAIL_ADDRESS));
            pipe = new Every(pipe, new SumScoresBuffer(), Fields.RESULTS);
            
            // Let's filter out anybody with an uninteresting score.
            ExpressionFilter filter = new ExpressionFilter(String.format("%s <= 0.0", FieldNames.SUMMED_SCORE), Double.class);
            pipe = new Each(pipe, filter);
            
            // And let's sort in reverse order (high to low score)
            pipe = new GroupBy(pipe, new Fields(FieldNames.SUMMED_SCORE), true);

View Full Code Here

    Pipe invertPipe = new Pipe( "inverted index", joinPipe );
    invertPipe = new CoGroup( invertPipe, new Fields( "token" ), 1, new Fields( "uid1", "ignore", "uid2", "token" ) );


    Fields filterArguments = new Fields( "uid1", "uid2" );
    String uidFilter = "uid1.compareToIgnoreCase( uid2 ) >= 0";
    invertPipe = new Each( invertPipe, filterArguments, new ExpressionFilter( uidFilter, String.class ) );
    Fields ignore = new Fields( "ignore" );
    invertPipe = new Discard( invertPipe, ignore );


    /*
    flow part #4
    count the number of tokens in common for each uid pair and apply a threshold
    */


    Pipe commonPipe = new GroupBy( new Pipe( "uid common", invertPipe ), new Fields( "uid1", "uid2" ) );
    commonPipe = new Every( commonPipe, Fields.ALL, new Count( new Fields( "common" ) ), Fields.ALL );


    String commonFilter = String.format( "common < %d", MIN_COMMON_TOKENS );
    commonPipe = new Each( commonPipe, new Fields( "common" ), new ExpressionFilter( commonFilter, Integer.TYPE ) );


    /*
    flow part #5
    count the number of tokens overall for each uid, then join to calculate
    the vector length for uid1
    */


    Fields tokenCount = new Fields( "token_count" );
    Pipe countPipe = new GroupBy( "count", joinPipe, new Fields( "uid" ) );
    countPipe = new Every( countPipe, Fields.ALL, new Count( tokenCount ), Fields.ALL );


    joinPipe = new CoGroup( countPipe, new Fields( "uid" ), commonPipe, new Fields( "uid1" ) );
    joinPipe = new Pipe( "common", joinPipe );
    joinPipe = new Discard( joinPipe, new Fields( "uid" ) );


    joinPipe = new Rename( joinPipe, tokenCount, new Fields( "token_count1" ) );


    /*
    flow part #6 join to be able to calculate the vector length for
    uid2, remove instances where one uid merely retweets another,
    then calculate an Ochiai similarity metric to find the nearest
    "neighbors" for each uid -- as recommended users to "follow"
    */


    joinPipe = new CoGroup( "similarity", countPipe, new Fields( "uid" ), joinPipe, new Fields( "uid2" ) );


    joinPipe = new Rename( joinPipe, tokenCount, new Fields( "token_count2" ) );


    // use a DEBUG to check the values in the tuple stream; turn off in the FLOWDEF below
    joinPipe = new Each( joinPipe, DebugLevel.VERBOSE, new Debug( true ) );


    Fields expressionArguments = new Fields( "token_count1", "token_count2", "common" );
    commonFilter = "( token_count1 == common ) || ( token_count2 == common )";
    joinPipe = new Each( joinPipe, expressionArguments, new ExpressionFilter( commonFilter, Integer.TYPE ) );


    Fields ochiaiArguments = new Fields( "uid1", "token_count1", "uid2", "token_count2", "common" );
    Fields resultFields = new Fields( "uid", "recommend_uid", "similarity" );
    joinPipe = new Each( joinPipe, ochiaiArguments, new OchiaiFunction( resultFields ), Fields.RESULTS );


    /*
    flow part #7
    apply thresholds to filter out poor recommendations
    */


    Fields similarityArguments = new Fields( "similarity" );
    commonFilter = String.format(Locale.US, "similarity < %f || similarity > %f", MIN_SIMILARITY, MAX_SIMILARITY );
    joinPipe = new Each( joinPipe, similarityArguments, new ExpressionFilter( commonFilter, Double.TYPE ) );


    /*
    connect up all the flow, generate a flow diagram, then run the flow.
    results for recommended users get stored in the "similarityPath" sink tap.
    */

View Full Code Here

    // calculate a rough estimate for distance from tree to road, then filter for "< ~1 block"
    Fields treeDistArguments = new Fields( "tree_lat", "tree_lng", "lat0", "lng0", "lat1", "lng1" );
    Fields tree_dist = new Fields( "tree_dist" );
    shadePipe = new Each( shadePipe, treeDistArguments, new TreeDistanceFunction( tree_dist ), Fields.ALL );


    ExpressionFilter distFilter = new ExpressionFilter( "tree_dist > 25.0", Double.class );
    shadePipe = new Each( shadePipe, tree_dist, distFilter );


    // checkpoint this (big) calculation too
    fieldSelector = new Fields( "road_name", "year_construct", "traffic_count", "traffic_index", "traffic_class", "paving_length", "paving_width", "paving_area", "surface_type", "bike_lane", "bus_route", "truck_route", "albedo", "lat0", "lng0", "lat1", "lng1", "tree_name", "priv", "tree_id", "situs", "tree_site", "species", "wikipedia", "calflora", "min_height", "max_height", "tree_lat", "tree_lng", "tree_alt", "tree_dist", "tree_geohash" );
    shadePipe = new Retain( shadePipe, fieldSelector );

View Full Code Here

  // these tests verify an Expression can be safely nested.


  public void testOrExpression()
    {
    Fields inputFields = new Fields( "a", "b" );
    ExpressionFilter f1 = new ExpressionFilter( "( 100f < a )", new String[]{"a"}, new Class<?>[]{Float.TYPE} );
    ExpressionFilter f2 = new ExpressionFilter( "( 100f < b )", new String[]{"b"}, new Class<?>[]{Float.TYPE} );
    Or logic = new Or( new Fields( "a" ), f1, new Fields( "b" ), f2 );


    boolean[] results = invokeFilter( logic,
      new TupleEntry[]{
        new TupleEntry( inputFields, new Tuple( "1", "10" ) ),

View Full Code Here

    }


  public void testXorExpression()
    {
    Fields inputFields = new Fields( "a", "b" );
    ExpressionFilter f1 = new ExpressionFilter( "( 100f < a )", new String[]{"a"}, new Class<?>[]{Float.TYPE} );
    ExpressionFilter f2 = new ExpressionFilter( "( 100f < b )", new String[]{"b"}, new Class<?>[]{Float.TYPE} );
    Xor logic = new Xor( new Fields( "a" ), f1, new Fields( "b" ), f2 );


    boolean[] results = invokeFilter( logic,
      new TupleEntry[]{
        new TupleEntry( inputFields, new Tuple( "1", "10" ) ),

View Full Code Here

    }


  public void testAndExpression()
    {
    Fields inputFields = new Fields( "a", "b" );
    ExpressionFilter f1 = new ExpressionFilter( "( 100f < a )", new String[]{"a"}, new Class<?>[]{Float.TYPE} );
    ExpressionFilter f2 = new ExpressionFilter( "( 100f < b )", new String[]{"b"}, new Class<?>[]{Float.TYPE} );
    And logic = new And( new Fields( "a" ), f1, new Fields( "b" ), f2 );


    boolean[] results = invokeFilter( logic,
      new TupleEntry[]{
        new TupleEntry( inputFields, new Tuple( "1", "10" ) ),

View Full Code Here

    Tap sink = getPlatform().getTextFile( Fields.size( 1 ), getOutputPath( "simpleresult" ), SinkMode.REPLACE );


    Pipe pipe = new Pipe( "test" );


    // skip the first line
    pipe = new Each( pipe, new Fields( 0 ), new ExpressionFilter( "$0 == 0", Long.class ) );


    pipe = new Each( pipe, new Fields( 1 ), new Identity() );


    pipe = new Each( pipe, Fields.ALL, new RegexFilter( "a|b|c" ) );

View Full Code Here


    Tap sink = new Hfs( new TextLine(), "foo" );


    Pipe head = new Pipe( "source" );


    head = new Each( head, new Fields( "line" ), new ExpressionFilter( "line.length() != 0", String.class ) );


    Pipe left = new Each( new Pipe( "left", head ), new Fields( "line" ), new RegexFilter( ".*46.*" ) );
    Pipe right = new Each( new Pipe( "right", head ), new Fields( "line" ), new RegexFilter( ".*192.*" ) );


    Pipe merge = new GroupBy( "merge", Pipe.pipes( left, right ), new Fields( "offset" ) );

View Full Code Here

TOP

Related Classes of cascading.operation.expression.ExpressionFilter

cascading.BasicPipesPlatformTest

cascading.flow.hadoop.BuildJobsHadoopPlatformTest

cascading.operation.filter.FilterTest

com.scaleunlimited.helpful.tools.AnalyzeEmail

com.scaleunlimited.helpful.tools.AnalyzeMbox

copa.Main

sample.recommender.Main

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.