Package org.apache.pig.data

Examples of org.apache.pig.data.DataBag


                // value and output of it is put into a bag. The bag containing
                // all AVGInitial output is provided as input to AVGFinal
               
                // The tuple we got above has a bag with input
                // values. Lets call AVGInitial with each value:
                DataBag bg = (DataBag) tup.get(0);
                DataBag  finalInputBg = bagFactory.newDefaultBag();
                for (Tuple tuple : bg) {
                    DataBag initialInputBg = bagFactory.newDefaultBag();
                    initialInputBg.add(tuple);
                    Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                    finalInputBg.add((Tuple)avgInitial.exec(initialInputTuple));
                }
   
                Tuple finalInputTuple = tupleFactory.newTuple(finalInputBg);
View Full Code Here


                // The outputs from the two calls to <Agg>Intermediate are put into a bag
                // and sent as input to <Agg>Final
               
                // The tuple we got above has a bag with input
                // values. Lets call <Agg>Initial with each value:
                DataBag bg = (DataBag) tup.get(0);
                DataBag  intermediateInputBg1 = bagFactory.newDefaultBag();
                DataBag  intermediateInputBg2 = bagFactory.newDefaultBag();
                int i = 0;
                for (Tuple tuple : bg) {
                    DataBag initialInputBg = bagFactory.newDefaultBag();
                    initialInputBg.add(tuple);
                    Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                    if(i < bg.size()/2) {
                        intermediateInputBg1.add((Tuple)aggInitial.exec(initialInputTuple));
                    } else {
                        intermediateInputBg2.add((Tuple)aggInitial.exec(initialInputTuple));
                    }
                    i++;
                }

                EvalFunc<?> avgIntermediate = evalFuncMap.get(aggIntermediateTypes[k]);
                DataBag finalInputBg = bagFactory.newDefaultBag();
                Tuple intermediateInputTuple = tupleFactory.newTuple(intermediateInputBg1);
                finalInputBg.add((Tuple)avgIntermediate.exec(intermediateInputTuple));
                intermediateInputTuple = tupleFactory.newTuple(intermediateInputBg2);
                finalInputBg.add((Tuple)avgIntermediate.exec(intermediateInputTuple));
               
                Tuple finalInputTuple = tupleFactory.newTuple(finalInputBg);
                EvalFunc<?> aggFinal = evalFuncMap.get(aggFinalTypes[k]);
                String msg = "[Testing " + aggGroup[k] + " on input type: " + getInputType(aggFinalTypes[k]);
                System.err.println(msg + " for single combiner case]");
View Full Code Here

                // The outputs from the two calls to <Agg>Intermediate are put into a bag
                // and sent as input to <Agg>Final
               
                // The tuple we got above has a bag with input
                // values. Lets call <Agg>Initial with each value:
                DataBag bg = (DataBag) tup.get(0);
                DataBag[]  mapIntermediateInputBgs = new DataBag[4];
                for (int i = 0; i < mapIntermediateInputBgs.length; i++) {
                    mapIntermediateInputBgs[i] = bagFactory.newDefaultBag();
                }
                Iterator<Tuple> it = bg.iterator();
                for(int i = 0; i < 4; i++) {
                    for(int j = 0; j < bg.size()/4; j++) {
                        DataBag initialInputBg = bagFactory.newDefaultBag();
                        initialInputBg.add(it.next());
                        Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                        mapIntermediateInputBgs[i].add((Tuple)aggInitial.exec(initialInputTuple));
                    }
                    if(i == 3) {
                        // if the last quarter has more elements process them
                        while(it.hasNext()) {
                            DataBag initialInputBg = bagFactory.newDefaultBag();
                            initialInputBg.add(it.next());
                            Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                            mapIntermediateInputBgs[i].add((Tuple)aggInitial.exec(initialInputTuple));
                        }
                    }
                }

                EvalFunc<?> aggIntermediate = evalFuncMap.get(aggIntermediateTypes[k]);
                DataBag[] reduceIntermediateInputBgs = new DataBag[2];
                for (int i = 0; i < reduceIntermediateInputBgs.length; i++) {
                    reduceIntermediateInputBgs[i] = bagFactory.newDefaultBag();                   
                }

                // simulate call to combine after map
                for(int i = 0; i < 4; i++) {
                    Tuple intermediateInputTuple = tupleFactory.newTuple(mapIntermediateInputBgs[i]);
                    if(i < 2) {
                        reduceIntermediateInputBgs[0].add((Tuple)aggIntermediate.exec(intermediateInputTuple));
                    } else {
                        reduceIntermediateInputBgs[1].add((Tuple)aggIntermediate.exec(intermediateInputTuple));
                    }
                }
              
                DataBag finalInputBag = bagFactory.newDefaultBag();
                // simulate call to combine before reduce
                for(int i = 0; i < 2; i++) {
                    Tuple intermediateInputTuple = tupleFactory.newTuple(reduceIntermediateInputBgs[i]);
                    finalInputBag.add((Tuple)aggIntermediate.exec(intermediateInputTuple));
                }
               
                // simulate call to final (in reduce)
                Tuple finalInputTuple = tupleFactory.newTuple(finalInputBag);
                EvalFunc<?> aggFinal = evalFuncMap.get(aggFinalTypes[k]);
View Full Code Here

                // put into one bag and the next half into another. Then these two
                // bags are provided as inputs to two separate calls of <Agg>Intermediate.
                // The outputs from the two calls to <Agg>Intermediate are put into a bag
                // and sent as input to <Agg>Final
               
                DataBag  intermediateInputBg1 = bagFactory.newDefaultBag();
                DataBag  intermediateInputBg2 = bagFactory.newDefaultBag();
                Tuple outputTuple = null;
                for(int i = 0; i < 10; i++) {
                    // create empty bag input to be provided as input
                    // argument to the "Initial" function
                    DataBag initialInputBg = bagFactory.newDefaultBag();
                    Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                   
                    if(i < 5) {
                        outputTuple = (Tuple)aggInitial.exec(initialInputTuple);
                        // check that output is null for all aggs except COUNT
                        // COUNT will give an output of 0 for empty bag input
                        checkZeroOrNull(aggInitial, outputTuple.get(0));
                        intermediateInputBg1.add(outputTuple);
                    } else {
                        outputTuple = (Tuple)aggInitial.exec(initialInputTuple);
                        // check that output is null for all aggs except COUNT
                        // COUNT will give an output of 0 for empty bag input
                        checkZeroOrNull(aggInitial, outputTuple.get(0));
                        intermediateInputBg2.add(outputTuple);
                    }
                }

                EvalFunc<?> aggIntermediate = evalFuncMap.get(aggIntermediateTypes[k]);
                DataBag finalInputBg = bagFactory.newDefaultBag();
                Tuple intermediateInputTuple = tupleFactory.newTuple(intermediateInputBg1);
                outputTuple = (Tuple)aggIntermediate.exec(intermediateInputTuple);
                // check that output is null for all aggs except COUNT
                // COUNT will give an output of 0 for empty bag input
                checkZeroOrNull(aggIntermediate, outputTuple.get(0));
                finalInputBg.add(outputTuple);
                intermediateInputTuple = tupleFactory.newTuple(intermediateInputBg2);
                outputTuple = (Tuple)aggIntermediate.exec(intermediateInputTuple);
                // check that output is null for all aggs except COUNT
                // COUNT will give an output of 0 for empty bag input
                checkZeroOrNull(aggIntermediate, outputTuple.get(0));
                finalInputBg.add(outputTuple);
               
                Tuple finalInputTuple = tupleFactory.newTuple(finalInputBg);
               
                EvalFunc<?> aggFinal = evalFuncMap.get(aggFinalTypes[k]);
                Object output = aggFinal.exec(finalInputTuple);
View Full Code Here

           
            for(int k = 0; k < aggGroup.length; k++) {
                EvalFunc<?> agg = evalFuncMap.get(aggGroup[k]);

                // call agg with empty bag as input
                DataBag inputBag = bagFactory.newDefaultBag();
                Tuple inputTuple = tupleFactory.newTuple(inputBag);
               
                Object output = agg.exec(inputTuple);
                // check that output is null for all aggs except COUNT
                // COUNT will give an output of 0 for empty bag input
View Full Code Here

            // The tuple we got above has a bag with input
            // values. Input to the Intermediate.exec() however comes
            // from the map which would put each value and a count of
            // 1 in a tuple and send it down. So lets create a bag with
            // tuples that have two fields - the value and a count 1.
            DataBag bag = (DataBag) tup.get(0);
            DataBag  bg = bagFactory.newDefaultBag();
            for (Tuple t: bag) {
                Tuple newTuple = tupleFactory.newTuple(2);
                newTuple.set(0, t.get(0));
                newTuple.set(1, new Long(1));
                bg.add(newTuple);               
            }
            Tuple intermediateInput = tupleFactory.newTuple();
            intermediateInput.append(bg);
           
            Object output = avg.exec(intermediateInput);
View Full Code Here

            // 1 in a tuple and send it down. So lets create a bag with
            // tuples that have two fields - the value and a count 1.
            // The input has 10 values - lets put the first five of them
            // in the input to the first call of AVGIntermediate and the
            // remaining five in the second call.
            DataBag bg = (DataBag) tup.get(0);
            DataBag  bg1 = bagFactory.newDefaultBag();
            DataBag  bg2 = bagFactory.newDefaultBag();
            int i = 0;
            for (Tuple t: bg) {
                Tuple newTuple = tupleFactory.newTuple(2);
                newTuple.set(0, t.get(0));
                if ( t.get(0) == null)
                    newTuple.set(1, new Long(0));
                else
                    newTuple.set(1, new Long(1));
                if(i < 5) {
                    bg1.add(newTuple);
                } else {
                    bg2.add(newTuple);
                }
                i++;
            }
            Tuple intermediateInput1 = tupleFactory.newTuple();
            intermediateInput1.append(bg1);
            Object output1 = avgIntermediate.exec(intermediateInput1);
            Tuple intermediateInput2 = tupleFactory.newTuple();
            intermediateInput2.append(bg2);
            Object output2 = avgIntermediate.exec(intermediateInput2);
           
            DataBag bag = Util.createBag(new Tuple[]{(Tuple)output1, (Tuple)output2});
           
            Tuple finalTuple = TupleFactory.getInstance().newTuple(1);
            finalTuple.set(0, bag);
            Object output = avg.exec(finalTuple);
            String msg = "[Testing " + avgTypes[k] + " on input type: " + getInputType(avgTypes[k]) + " ( (output) " +
View Full Code Here

    @Test
    public void testCOUNTIntermed() throws Exception {
        Integer input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
       
        DataBag intermediateInputBag = bagFactory.newDefaultBag();
        // call initial and then Intermed
        for (Integer i : input) {
            Tuple t = tupleFactory.newTuple(i);
            DataBag b = bagFactory.newDefaultBag();
            b.add(t);
            Tuple initialInput = tupleFactory.newTuple(b);
            EvalFunc<?> initial = new COUNT.Initial();
            intermediateInputBag.add((Tuple)initial.exec(initialInput));
        }
View Full Code Here

    @Test
    public void testCOUNT_STARIntermed() throws Exception {
        Integer input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
       
        DataBag intermediateInputBag = bagFactory.newDefaultBag();
        // call initial and then Intermed
        for (Integer i : input) {
            Tuple t = tupleFactory.newTuple(i);
            DataBag b = bagFactory.newDefaultBag();
            b.add(t);
            Tuple initialInput = tupleFactory.newTuple(b);
            EvalFunc<?> initial = new COUNT_STAR.Initial();
            intermediateInputBag.add((Tuple)initial.exec(initialInput));
        }
View Full Code Here

    }
   
    @Test
    public void testStatsFunc() throws Exception {
        COV cov = new COV("a","b");
        DataBag dBag = DefaultBagFactory.getInstance().newDefaultBag();
        Tuple tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 1.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 8.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 7.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 8.0);
        dBag.add(tup1);
        DataBag dBag1 = DefaultBagFactory.getInstance().newDefaultBag();
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 3.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 3.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag1.add(tup1);
        Tuple input = TupleFactory.getInstance().newTuple(2);
        input.set(0, dBag);
        input.set(1, dBag1);
        DataBag output = cov.exec(input);
        Iterator<Tuple> it = output.iterator();
        Tuple ans = (Tuple)it.next();
        assertEquals((String)ans.get(0),"a");
        assertEquals((String)ans.get(1),"b");
        assertEquals(1.11111, (Double)ans.get(2),0.0005);
       
        COR cor = new COR("a","b");
        dBag = DefaultBagFactory.getInstance().newDefaultBag();
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 1.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 8.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 7.0);
        dBag.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 8.0);
        dBag.add(tup1);
        dBag1 = DefaultBagFactory.getInstance().newDefaultBag();
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 3.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 3.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 2.0);
        dBag1.add(tup1);
        tup1 = TupleFactory.getInstance().newTuple(1);
        tup1.set(0, 4.0);
        dBag1.add(tup1);
        input = TupleFactory.getInstance().newTuple(2);
        input.set(0, dBag);
        input.set(1, dBag1);
        output = cor.exec(input);
        it = output.iterator();
        ans = (Tuple) it.next();
        assertEquals((String)ans.get(0),"a");
        assertEquals((String)ans.get(1),"b");
        assertEquals(0.582222509739582, (Double)ans.get(2) ,0.0005);
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.data.DataBag

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.