fieldSelector = new Fields( "road_name", "year_construct", "traffic_count", "traffic_index", "traffic_class", "paving_length", "paving_width", "paving_area", "surface_type", "bike_lane", "bus_route", "truck_route", "albedo", "lat0", "lng0", "alt0", "lat1", "lng1", "alt1", "road_geohash" );
roadPipe = new Retain( roadPipe, fieldSelector );
// join the tree and road pipes to estimate shade
Pipe shadePipe = new Pipe( "shade", roadPipe );
shadePipe = new CoGroup( shadePipe, new Fields( "road_geohash" ), treePipe, new Fields( "tree_geohash" ), new InnerJoin() );
// calculate a rough estimate for distance from tree to road, then filter for "< ~1 block"
Fields treeDistArguments = new Fields( "tree_lat", "tree_lng", "lat0", "lng0", "lat1", "lng1" );
Fields tree_dist = new Fields( "tree_dist" );
shadePipe = new Each( shadePipe, treeDistArguments, new TreeDistanceFunction( tree_dist ), Fields.ALL );
ExpressionFilter distFilter = new ExpressionFilter( "tree_dist > 25.0", Double.class );
shadePipe = new Each( shadePipe, tree_dist, distFilter );
// checkpoint this (big) calculation too
fieldSelector = new Fields( "road_name", "year_construct", "traffic_count", "traffic_index", "traffic_class", "paving_length", "paving_width", "paving_area", "surface_type", "bike_lane", "bus_route", "truck_route", "albedo", "lat0", "lng0", "lat1", "lng1", "tree_name", "priv", "tree_id", "situs", "tree_site", "species", "wikipedia", "calflora", "min_height", "max_height", "tree_lat", "tree_lng", "tree_alt", "tree_dist", "tree_geohash" );
shadePipe = new Retain( shadePipe, fieldSelector );
shadePipe = new GroupBy( shadePipe, new Fields( "tree_name" ), new Fields( "tree_dist" ) );
Checkpoint shadeCheck = new Checkpoint( "shade", shadePipe );
// determine the geohash for GPS tracks log events
Pipe logsPipe = new Pipe( "logs" );
geohashArguments = new Fields( "lat", "lng" );
logsPipe = new Each( logsPipe, geohashArguments, new GeoHashFunction( new Fields( "gps_geohash" ), 6 ), Fields.ALL );
// prepare data for recommendations
// NB: RHS is large given the sample data, but in practice the logs on the LHS could be much larger
Pipe recoPipe = new Pipe( "reco", logsPipe );
recoPipe = new CoGroup( recoPipe, new Fields( "gps_geohash" ), shadeCheck, new Fields( "tree_geohash" ), new InnerJoin() );
// connect the taps, pipes, etc., into a flow
FlowDef flowDef = FlowDef.flowDef()
.setName( "copa" )
.addSource( gisPipe, gisTap )