Package org.apache.pig.data

Examples of org.apache.pig.data.BagFactory


            objList.add(new Double(1.0));
            objList.add(new Float(1.0));
            objList.add(new String("World!"));
            Tuple tuple = tupleFactory.newTuple(objList);

            BagFactory bagFactory = BagFactory.getInstance();
            DataBag bag = bagFactory.newDefaultBag();
            bag.add(tuple);

            Map<String, Object> mapInMap = new HashMap<String, Object>();
            mapInMap.put("int", new Integer(10));
            mapInMap.put("float", new Float(10.0));
View Full Code Here


  assertEquals("", m.get("k3"), "foo");


        TOP top = new TOP();
        TupleFactory tupleFactory = TupleFactory.getInstance();
        BagFactory bagFactory = DefaultBagFactory.getInstance();
        Tuple inputTuple = tupleFactory.newTuple(3);
        DataBag dBag = bagFactory.newDefaultBag();

        // set N = 10 i.e retain top 10 tuples
        inputTuple.set(0, 10);
        // compare tuples by field number 1
        inputTuple.set(1, 1);
        // set the data bag containing the tuples
        inputTuple.set(2, dBag);

        // generate tuples of the form (group-1, 1), (group-2, 2) ...
        for (long i = 0; i < 100; i++) {
            Tuple nestedTuple = tupleFactory.newTuple(2);
            nestedTuple.set(0, "group-" + i);
            nestedTuple.set(1, i);
            dBag.add(nestedTuple);
        }

        DataBag outBag = top.exec(inputTuple);
        assertEquals(outBag.size(), 10L);
        checkItemsGT(outBag, 1, 89);

        // two initial results
        Tuple init1 = (new TOP.Initial()).exec(inputTuple);
        Tuple init2 = (new TOP.Initial()).exec(inputTuple);
        // two intermediate results

        DataBag intermedBag = bagFactory.newDefaultBag();
        intermedBag.add(init1);
        intermedBag.add(init2);
        Tuple intermedInput = tupleFactory.newTuple(intermedBag);
        Tuple intermedOutput1 = (new TOP.Intermed()).exec(intermedInput);
        Tuple intermedOutput2 = (new TOP.Intermed()).exec(intermedInput);
        checkItemsGT((DataBag)intermedOutput1.get(2), 1, 94);

        // final result
        DataBag finalInputBag = bagFactory.newDefaultBag();
        finalInputBag.add(intermedOutput1);
        finalInputBag.add(intermedOutput2);
        Tuple finalInput = tupleFactory.newTuple(finalInputBag);
        outBag = (new TOP.Final()).exec(finalInput);
        assertEquals(outBag.size(), 10L);
View Full Code Here

    }

    @Test
    public void testDIFF() throws Exception {
        // Test it in the case with two bags.
        BagFactory bf = BagFactory.getInstance();
        TupleFactory tf = TupleFactory.getInstance();

        DataBag b1 = bf.newDefaultBag();
        DataBag b2 = bf.newDefaultBag();
        for (int i = 0; i < 10; i++) b1.add(tf.newTuple(new Integer(i)));
        for (int i = 0; i < 10; i += 2) b2.add(tf.newTuple(new Integer(i)));
        Tuple t = tf.newTuple(2);
        t.set(0, b1);
        t.set(1, b2);
View Full Code Here

        TupleFactory tf = TupleFactory.getInstance();
        return tf.newTuple( objList );
    }
   
    static DataBag createDataBag() {
        BagFactory bagFactory = BagFactory.getInstance();
        return bagFactory.newDefaultBag();
    }
View Full Code Here

    public void testMap() throws Exception {
        String size = "100";
        String numHash = "3";
        String hashFunc = "JENKINS";
        TupleFactory tf = TupleFactory.getInstance();
        BagFactory bf = BagFactory.getInstance();

        Tuple t = tf.newTuple(1);
        t.set(0, 1);
        DataBag b = bf.newDefaultBag();
        b.add(t);
        Tuple input = tf.newTuple(b);
       
        BuildBloom.Initial map =
            new BuildBloom.Initial(hashFunc, "fixed", size, numHash);
View Full Code Here

    public void testCombiner() throws Exception {
        String size = "100";
        String numHash = "3";
        String hashFunc = "jenkins";
        TupleFactory tf = TupleFactory.getInstance();
        BagFactory bf = BagFactory.getInstance();

        DataBag combinerBag = bf.newDefaultBag();
        for (int j = 0; j < 3; j++) { // map loop
            Tuple t = tf.newTuple(1);
            t.set(0, 10 + j);
            DataBag mapBag = bf.newDefaultBag();
            mapBag.add(t);
            Tuple input = tf.newTuple(mapBag);
            BuildBloom.Initial map =
                new BuildBloom.Initial(hashFunc, "fixed", size, numHash);
            combinerBag.add(map.exec(input));
View Full Code Here

    public void testSingleKey() throws Exception {
        String size = "100";
        String numHash = "3";
        String hashFunc = "MURMUR";
        TupleFactory tf = TupleFactory.getInstance();
        BagFactory bf = BagFactory.getInstance();

        DataBag reducerBag = bf.newDefaultBag();
        for (int i = 0; i < 3; i++) { // combiner loop
            DataBag combinerBag = bf.newDefaultBag();
            for (int j = 0; j < 3; j++) { // map loop
                Tuple t = tf.newTuple(1);
                t.set(0, i * 10 + j);
                DataBag mapBag = bf.newDefaultBag();
                mapBag.add(t);
                Tuple input = tf.newTuple(mapBag);
                BuildBloom.Initial map =
                    new BuildBloom.Initial(hashFunc, "fixed", size, numHash);
                combinerBag.add(map.exec(input));
View Full Code Here

    public void testMultiKey() throws Exception {
        String numElements = "10";
        String falsePositive = "0.001";
        String hashFunc = "murmur";
        TupleFactory tf = TupleFactory.getInstance();
        BagFactory bf = BagFactory.getInstance();

        String[][] strs = {
            { "fred", "joe", "bob" },
            { "mary", "sally", "jane" },
            { "fido", "spot", "fluffly" }};

        DataBag reducerBag = bf.newDefaultBag();
        for (int i = 0; i < 3; i++) { // combiner loop
            DataBag combinerBag = bf.newDefaultBag();
            for (int j = 0; j < 3; j++) { // map loop
                Tuple t = tf.newTuple(2);
                t.set(0, i * 10 + j);
                t.set(1, strs[i][j]);
                DataBag mapBag = bf.newDefaultBag();
                mapBag.add(t);
                Tuple input = tf.newTuple(mapBag);
                BuildBloom.Initial map =
                    new BuildBloom.Initial(hashFunc, numElements,
                        falsePositive);
View Full Code Here

            objList.add(new Double(1.0));
            objList.add(new Float(1.0));
            objList.add(new String("World!"));
            Tuple tuple = tupleFactory.newTuple(objList);

            BagFactory bagFactory = BagFactory.getInstance();
            DataBag bag = bagFactory.newDefaultBag();
            bag.add(tuple);

            Map<String, Object> mapInMap = new HashMap<String, Object>();
            mapInMap.put("int", new Integer(10));
            mapInMap.put("float", new Float(10.0));
View Full Code Here

        TupleFactory tf = TupleFactory.getInstance();
        return tf.newTuple( objList );
    }
   
    static DataBag createDataBag() {
        BagFactory bagFactory = BagFactory.getInstance();
        return bagFactory.newDefaultBag();
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.data.BagFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.