Package cascading.pipe.assembly

Examples of cascading.pipe.assembly.Discard


    Fields filterArguments = new Fields( "uid1", "uid2" );
    String uidFilter = "uid1.compareToIgnoreCase( uid2 ) >= 0";
    invertPipe = new Each( invertPipe, filterArguments, new ExpressionFilter( uidFilter, String.class ) );
    Fields ignore = new Fields( "ignore" );
    invertPipe = new Discard( invertPipe, ignore );

    /*
    flow part #4
    count the number of tokens in common for each uid pair and apply a threshold
    */

    Pipe commonPipe = new GroupBy( new Pipe( "uid common", invertPipe ), new Fields( "uid1", "uid2" ) );
    commonPipe = new Every( commonPipe, Fields.ALL, new Count( new Fields( "common" ) ), Fields.ALL );

    String commonFilter = String.format( "common < %d", MIN_COMMON_TOKENS );
    commonPipe = new Each( commonPipe, new Fields( "common" ), new ExpressionFilter( commonFilter, Integer.TYPE ) );

    /*
    flow part #5
    count the number of tokens overall for each uid, then join to calculate
    the vector length for uid1
    */

    Fields tokenCount = new Fields( "token_count" );
    Pipe countPipe = new GroupBy( "count", joinPipe, new Fields( "uid" ) );
    countPipe = new Every( countPipe, Fields.ALL, new Count( tokenCount ), Fields.ALL );

    joinPipe = new CoGroup( countPipe, new Fields( "uid" ), commonPipe, new Fields( "uid1" ) );
    joinPipe = new Pipe( "common", joinPipe );
    joinPipe = new Discard( joinPipe, new Fields( "uid" ) );

    joinPipe = new Rename( joinPipe, tokenCount, new Fields( "token_count1" ) );

    /*
    flow part #6 join to be able to calculate the vector length for
View Full Code Here


    Pipe pipe = new Pipe( "head" );

    Fields discardFields = trainingFields.appendSelector( predictorFields );

    if( !discardFields.isNone() )
      pipe = new Discard( pipe, discardFields );

    Tap source = getPlatform().getDelimitedFile( "\t", "\"", planner.getFieldTypeResolver(), DATA_PATH + testModel + ".tsv", SinkMode.KEEP );
    Tap sink = getPlatform().getDelimitedFile( "\t", "\"", null, getResultPath(), SinkMode.REPLACE );

    FlowDef flowDef = FlowDef.flowDef()
View Full Code Here

      buffer = new PredictionSelectionBuffer( ensembleSpec );

    pipe = new Every( pipe, predictedFields, buffer, Fields.SWAP );

    if( modelSchema.getKeyFields().isNone() )
      pipe = new Discard( pipe, keyFields );

    setTails( pipe );
    }
View Full Code Here

    }

  protected void performTest( String inputData, Fields predictedFields, Fields expectedFields, EnsembleSpec<TreeSpec> ensembleSpec ) throws IOException
    {
    Pipe pipe = new Pipe( "head" );
    pipe = new Discard( pipe, predictedFields );
    pipe = new ParallelEnsembleAssembly( pipe, ensembleSpec );
    pipe = new Pipe( "tail", pipe );

    Tap source = getPlatform().getDelimitedFile( expectedFields.append( predictedFields ), true, ",", "\"", DATA_PATH + inputData, SinkMode.KEEP );
    Tap sink = getPlatform().getDelimitedFile( Fields.ALL, true, ",", "\"", getResultPath(), SinkMode.REPLACE );
View Full Code Here

    Function splitterLower = new RegexSplitter( new Fields( "numA", "left" ), " " );
    Function splitterUpper = new RegexSplitter( new Fields( "numB", "right" ), " " );

    Pipe offsetLower = new Pipe( "offsetLower" );
    offsetLower = new Discard( offsetLower, new Fields( "offset" ) );
    offsetLower = new Each( offsetLower, new Fields( "line" ), splitterLower );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitterUpper );

    Pipe cogroup = new CoGroup( offsetLower, new Fields( "numA" ), pipeLower, new Fields( "numB" ) );
View Full Code Here

          newKeyFields = newKeyFields.append(new Fields("__bloom_join_tmp_" + i));
        }
        rhsOrig = new Each(rhsOrig, smallJoinFields, new Identity(newKeyFields), newKeyFields);
        filterPipe = getCoGroup(filterPipe, largeJoinFields, rhsOrig, newKeyFields, renameFields, joiner, coGroupOrder, operationType);

        filterPipe = new Discard(filterPipe, newKeyFields);
      } else if (operationType == Mode.JOIN) {
        filterPipe = getCoGroup(filterPipe, largeJoinFields, rhsOrig, smallJoinFields, renameFields, joiner, coGroupOrder, operationType);
      }

      setTails(filterPipe);
View Full Code Here

TOP

Related Classes of cascading.pipe.assembly.Discard

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.