Package cascading.pipe.joiner

Examples of cascading.pipe.joiner.InnerJoin


        MultiSourceTap dRM1Source = getTaps(dRM1InputPath, inFieldsDRM1);
        MultiSourceTap dRM2Source = getTaps(dRM2InputPath, inFieldsDRM2);

        Pipe lhs = new Pipe("DRM1");
        Pipe rhs = new Pipe("DRM2");
        Pipe groupByItemIDPipe = new CoGroup(lhs, common, rhs, common, grouped, new InnerJoin());
        groupByItemIDPipe = new Each(groupByItemIDPipe, new VectorsToCSVFunction(joinedOutFields));
        //the DRMs (Mahout Distributed Row Matrices) have row and items indexes the two dictionary BiHashMaps
        //pass these to the output function so the strings from the indexes can be written instead of the
        //binary values of the Keys and Vectors in the DRMs
        groupByItemIDPipe.getStepConfigDef().setProperty("itemIndexPath", itemIndexPath.toString());
View Full Code Here


    ExpressionFunction exprFunc = new ExpressionFunction( new Fields( "tree_species" ), expression, String.class );
    treePipe = new Each( treePipe, new Fields( "scrub_species" ), exprFunc, Fields.ALL );

    // join with tree metadata
    Pipe metaTreePipe = new Pipe( "meta_tree" );
    treePipe = new HashJoin( treePipe, new Fields( "tree_species" ), metaTreePipe, new Fields( "species" ), new InnerJoin() );
    treePipe = new Rename( treePipe, new Fields( "blurb" ), new Fields( "tree_name" ) );

    regex = "^(\\S+),(\\S+),(\\S+)\\s*$";
    int[] gpsGroups = { 1, 2, 3 };
    parser = new RegexParser( new Fields( "tree_lat", "tree_lng", "tree_alt" ), regex, gpsGroups );
    treePipe = new Each( treePipe, new Fields( "geo" ), parser, Fields.ALL );

    // determine a tree geohash
    Fields geohashArguments = new Fields( "tree_lat", "tree_lng" );
    treePipe = new Each( treePipe, geohashArguments, new GeoHashFunction( new Fields( "tree_geohash" ), 6 ), Fields.ALL );

    Fields fieldSelector = new Fields( "tree_name", "priv", "tree_id", "situs", "tree_site", "species", "wikipedia", "calflora", "min_height", "max_height", "tree_lat", "tree_lng", "tree_alt", "tree_geohash" );
    treePipe = new Retain( treePipe, fieldSelector );

    // parse the "road" output
    Pipe roadPipe = new Pipe( "road", tsvCheck );
    regex = "^\\s+Sequence\\:.*\\s+Year Constructed\\:\\s+(\\d+)\\s+Traffic Count\\:\\s+(\\d+)\\s+Traffic Index\\:\\s+(\\w.*\\w)\\s+Traffic Class\\:\\s+(\\w.*\\w)\\s+Traffic Date.*\\s+Paving Length\\:\\s+(\\d+)\\s+Paving Width\\:\\s+(\\d+)\\s+Paving Area\\:\\s+(\\d+)\\s+Surface Type\\:\\s+(\\w.*\\w)\\s+Surface Thickness.*\\s+Bike Lane\\:\\s+(\\w+)\\s+Bus Route\\:\\s+(\\w+)\\s+Truck Route\\:\\s+(\\w+)\\s+Remediation.*$";
    roadPipe = new Each( roadPipe, new Fields( "misc" ), new RegexFilter( regex ) );
    Fields roadFields = new Fields( "year_construct", "traffic_count", "traffic_index", "traffic_class", "paving_length", "paving_width", "paving_area", "surface_type", "bike_lane", "bus_route", "truck_route" );
    int[] roadGroups = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
    parser = new RegexParser( roadFields, regex, roadGroups );
    roadPipe = new Each( roadPipe, new Fields( "misc" ), parser, Fields.ALL );

    // join with road metadata
    Pipe metaRoadPipe = new Pipe( "meta_road" );
    roadPipe = new HashJoin( roadPipe, new Fields( "surface_type" ), metaRoadPipe, new Fields( "pavement_type" ), new InnerJoin() );
    roadPipe = new Rename( roadPipe, new Fields( "blurb" ), new Fields( "road_name" ) );

    // estimate albedo based on the road surface age and pavement type
    Fields albedoArguments = new Fields( "year_construct", "albedo_new", "albedo_worn" );
    roadPipe = new Each( roadPipe, albedoArguments, new AlbedoFunction( new Fields( "albedo" ), 2002 ), Fields.ALL );

    // generate road segments, with midpoint, y=mx+b, and road_geohash for each
    Fields segmentArguments = new Fields( "geo" );
    Fields segmentResults = new Fields( "lat0", "lng0", "alt0", "lat1", "lng1", "alt1", "lat_mid", "lng_mid" );
    roadPipe = new Each( roadPipe, segmentArguments, new RoadSegmentFunction( segmentResults ), Fields.ALL );

    geohashArguments = new Fields( "lat_mid", "lng_mid" );
    roadPipe = new Each( roadPipe, geohashArguments, new GeoHashFunction( new Fields( "road_geohash" ), 6 ), Fields.ALL );

    fieldSelector = new Fields( "road_name", "year_construct", "traffic_count", "traffic_index", "traffic_class", "paving_length", "paving_width", "paving_area", "surface_type", "bike_lane", "bus_route", "truck_route", "albedo", "lat0", "lng0", "alt0", "lat1", "lng1", "alt1", "road_geohash" );
    roadPipe = new Retain( roadPipe, fieldSelector );

    // join the tree and road pipes to estimate shade
    Pipe shadePipe = new Pipe( "shade", roadPipe );
    shadePipe = new CoGroup( shadePipe, new Fields( "road_geohash" ), treePipe, new Fields( "tree_geohash" ), new InnerJoin() );

    // calculate a rough estimate for distance from tree to road, then filter for "< ~1 block"
    Fields treeDistArguments = new Fields( "tree_lat", "tree_lng", "lat0", "lng0", "lat1", "lng1" );
    Fields tree_dist = new Fields( "tree_dist" );
    shadePipe = new Each( shadePipe, treeDistArguments, new TreeDistanceFunction( tree_dist ), Fields.ALL );

    ExpressionFilter distFilter = new ExpressionFilter( "tree_dist > 25.0", Double.class );
    shadePipe = new Each( shadePipe, tree_dist, distFilter );

    // checkpoint this (big) calculation too
    fieldSelector = new Fields( "road_name", "year_construct", "traffic_count", "traffic_index", "traffic_class", "paving_length", "paving_width", "paving_area", "surface_type", "bike_lane", "bus_route", "truck_route", "albedo", "lat0", "lng0", "lat1", "lng1", "tree_name", "priv", "tree_id", "situs", "tree_site", "species", "wikipedia", "calflora", "min_height", "max_height", "tree_lat", "tree_lng", "tree_alt", "tree_dist", "tree_geohash" );
    shadePipe = new Retain( shadePipe, fieldSelector );
    shadePipe = new GroupBy( shadePipe, new Fields( "tree_name" ), new Fields( "tree_dist" ) );

    Checkpoint shadeCheck = new Checkpoint( "shade", shadePipe );

    // determine the geohash for GPS tracks log events
    Pipe logsPipe = new Pipe( "logs" );
    geohashArguments = new Fields( "lat", "lng" );
    logsPipe = new Each( logsPipe, geohashArguments, new GeoHashFunction( new Fields( "gps_geohash" ), 6 ), Fields.ALL );

    // prepare data for recommendations
    // NB: RHS is large given the sample data, but in practice the logs on the LHS could be much larger
    Pipe recoPipe = new Pipe( "reco", logsPipe );
    recoPipe = new CoGroup( recoPipe, new Fields( "gps_geohash" ), shadeCheck, new Fields( "tree_geohash" ), new InnerJoin() );

    // connect the taps, pipes, etc., into a flow
    FlowDef flowDef = FlowDef.flowDef()
     .setName( "copa" )
     .addSource( gisPipe, gisTap )
View Full Code Here

    Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cross" ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( "lhs", new Fields( "line" ), new RegexSplitter( new Fields( "numLHS", "charLHS" ), " " ) );
    Pipe pipeUpper = new Each( "rhs", new Fields( "line" ), new RegexSplitter( new Fields( "numRHS", "charRHS" ), " " ) );

    Pipe cross = new CoGroup( pipeLower, new Fields( "numLHS" ), pipeUpper, new Fields( "numRHS" ), new InnerJoin() );

    Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cross );

    flow.complete();
View Full Code Here

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter );

    Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new InnerJoin( Fields.size( 4 ) ) );

    Map<Object, Object> properties = getProperties();

    // make sure hasher is getting called, but does nothing special
    FlowProps.setDefaultTupleElementComparator( properties, getPlatform().getStringComparator( false ).getClass().getCanonicalName() );
View Full Code Here

    results.add( new Tuple( "4", "d1", "4", "D1" ) );
    results.add( new Tuple( "4", "d2", "4", "D1" ) );
    results.add( new Tuple( "4", "d3", "4", "D1" ) );
    results.add( new Tuple( null, "h1", null, "H1" ) );

    handleJoins( "cogroupinner", new InnerJoin(), results, 8, false, null );
    handleJoins( "cogroupinner-resultgroup", new InnerJoin(), results, 8, true, null );
    }
View Full Code Here

    results.add( new Tuple( "2", "b1", "2", "B3" ) );
    results.add( new Tuple( "4", "d1", "4", "D1" ) );
    results.add( new Tuple( "4", "d2", "4", "D1" ) );
    results.add( new Tuple( "4", "d3", "4", "D1" ) );

    handleJoins( "cogroupinnernull", new InnerJoin(), results, 9, false, new NullNotEquivalentComparator() );
    handleJoins( "cogroupinnernull-resultgroup", new InnerJoin(), results, 9, true, new NullNotEquivalentComparator() );
    }
View Full Code Here

    fielded = new Each( fielded, new Fields( "line" ), new RegexSplitter( "\\s" ) );

//    fielded = new Each( fielded, new Debug( true ) );

    Pipe inner = new CoGroup( "inner", fielded, new Fields( 0 ), uniques, new Fields( "word" ), new InnerJoin() );
    Pipe outer = new CoGroup( "outer", fielded, new Fields( 0 ), uniques, new Fields( "word" ), new OuterJoin() );
    Pipe left = new CoGroup( "left", fielded, new Fields( 0 ), uniques, new Fields( "word" ), new LeftJoin() );
    Pipe right = new CoGroup( "right", fielded, new Fields( 0 ), uniques, new Fields( "word" ), new RightJoin() );

    Pipe[] heads = Pipe.pipes( uniques, fielded );
View Full Code Here

    Pipe join;

    if( isMerge && isGroup )
      join = new GroupBy( Pipe.pipes( pipeLower, pipeUpper ), numLHS );
    else if( !isMerge && isGroup )
      join = new CoGroup( pipeLower, numLHS, pipeUpper, numRHS, declaredFields, new InnerJoin() );
    else if( isMerge && !isGroup )
      join = new Merge( pipeLower, pipeUpper );
    else
      join = new HashJoin( pipeLower, numLHS, pipeUpper, numRHS, declaredFields, new InnerJoin() );

    Flow flow = null;
    try
      {
      flow = getPlatform().getFlowConnector().connect( sources, sink, join );
View Full Code Here

      if( sortFields != null )
        this.sortFieldsMap.put( pipe.getName(), sortFields );
      }

    this.reverseOrder = reverseOrder;
    this.joiner = new InnerJoin();
    }
View Full Code Here

    if( isJoin() && joiner instanceof BufferJoin )
      throw new IllegalArgumentException( "invalid joiner, may not use BufferJoiner in a HashJoin" );

    if( joiner == null )
      {
      joiner = new InnerJoin();
      return;
      }

    if( joiner.numJoins() == -1 )
      return;
View Full Code Here

TOP

Related Classes of cascading.pipe.joiner.InnerJoin

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.