Package org.apache.pig.builtin

Examples of org.apache.pig.builtin.TOBAG


    @Test
    public void testMiscFunc() throws Exception {
       
        //TEST TOBAG
        TOBAG tb = new TOBAG();

        //test output schema of udf
        Schema expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);

        //check schema of TOBAG when given input tuple having only integers
        Schema inputSch = new Schema();
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add another int column
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add a long column
        inputSch.add(new FieldSchema(null, DataType.LONG));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has ints and long",
                expectedSch, tb.outputSchema(inputSch));

       
        //test schema when input is a tuple with inner schema
        Schema tupInSchema = new Schema(new FieldSchema("x", DataType.CHARARRAY));
        inputSch = new Schema();
        inputSch.add(new FieldSchema("a", tupInSchema, DataType.TUPLE));
        Schema inputSchCp = new Schema(inputSch);
        inputSchCp.getField(0).alias = null;
        expectedSch = new Schema(new FieldSchema(null, inputSchCp, DataType.BAG));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));
       
        inputSch.add(new FieldSchema("b", tupInSchema, DataType.TUPLE));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));
       
        //add a column of type tuple with different inner schema
        tupInSchema = new Schema(new FieldSchema("x", DataType.BYTEARRAY));
        inputSch.add(new FieldSchema("c", tupInSchema, DataType.TUPLE));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has cols of type tuple with diff inner schema",
                expectedSch, tb.outputSchema(inputSch));

       
       
        Tuple input = TupleFactory.getInstance().newTuple();
        for (int i = 0; i < 100; ++i) {
            input.append(i);
        }     
        //test null value in input
        input.append(null);
       
        Set<Integer> s = new HashSet<Integer>();
        DataBag db = tb.exec(input);
        for (Tuple t : db) {
            s.add((Integer) t.get(0));
        }

        // finally check the bag had everything we put in the tuple.
View Full Code Here


    @Test
    public void testMiscFunc() throws Exception {
       
        //TEST TOBAG
        TOBAG tb = new TOBAG();

        //test output schema of udf
        Schema expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);

        //check schema of TOBAG when given input tuple having only integers
        Schema inputSch = new Schema();
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add another int column
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add a long column
        inputSch.add(new FieldSchema(null, DataType.LONG));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has ints and long",
                expectedSch, tb.outputSchema(inputSch));

       
        //test schema when input is a tuple with inner schema
        Schema tupInSchema = new Schema(new FieldSchema("x", DataType.CHARARRAY));
        inputSch = new Schema();
        inputSch.add(new FieldSchema("a", tupInSchema, DataType.TUPLE));
        Schema inputSchCp = new Schema(inputSch);
        inputSchCp.getField(0).alias = null;
        expectedSch = new Schema(new FieldSchema(null, inputSchCp, DataType.BAG));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));
       
        inputSch.add(new FieldSchema("b", tupInSchema, DataType.TUPLE));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));
       
        //add a column of type tuple with different inner schema
        tupInSchema = new Schema(new FieldSchema("x", DataType.BYTEARRAY));
        inputSch.add(new FieldSchema("c", tupInSchema, DataType.TUPLE));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has cols of type tuple with diff inner schema",
                expectedSch, tb.outputSchema(inputSch));

       
       
        Tuple input = TupleFactory.getInstance().newTuple();
        for (int i = 0; i < 100; ++i) {
            input.append(i);
        }     
        //test null value in input
        input.append(null);
       
        Set<Integer> s = new HashSet<Integer>();
        DataBag db = tb.exec(input);
        for (Tuple t : db) {
            s.add((Integer) t.get(0));
        }

        // finally check the bag had everything we put in the tuple.
View Full Code Here

    @Test
    public void testMiscFunc() throws Exception {
       
        //TEST TOBAG
        TOBAG tb = new TOBAG();

        //test output schema of udf
        Schema expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);

        //check schema of TOBAG when given input tuple having only integers
        Schema inputSch = new Schema();
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add another int column
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add a long column
        inputSch.add(new FieldSchema(null, DataType.LONG));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has ints and long",
                expectedSch, tb.outputSchema(inputSch));

       
        //test schema when input is a tuple with inner schema
        Schema tupInSchema = new Schema(new FieldSchema("x", DataType.CHARARRAY));
        inputSch = new Schema();
        inputSch.add(new FieldSchema("a", tupInSchema, DataType.TUPLE));
        Schema inputSchCp = new Schema(inputSch);
        inputSchCp.getField(0).alias = null;
        expectedSch = new Schema(new FieldSchema(null, inputSchCp, DataType.BAG));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));
       
        inputSch.add(new FieldSchema("b", tupInSchema, DataType.TUPLE));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));
       
        //add a column of type tuple with different inner schema
        tupInSchema = new Schema(new FieldSchema("x", DataType.BYTEARRAY));
        inputSch.add(new FieldSchema("c", tupInSchema, DataType.TUPLE));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has cols of type tuple with diff inner schema",
                expectedSch, tb.outputSchema(inputSch));

       
       
        Tuple input = TupleFactory.getInstance().newTuple();
        for (int i = 0; i < 100; ++i) {
            input.append(i);
        }     
        //test null value in input
        input.append(null);
       
        Set<Integer> s = new HashSet<Integer>();
        DataBag db = tb.exec(input);
        for (Tuple t : db) {
            s.add((Integer) t.get(0));
        }

        // finally check the bag had everything we put in the tuple.
View Full Code Here

    }

    @Test
    public void testToBag() throws Exception {
        //TEST TOBAG
        TOBAG tb = new TOBAG();

        //test output schema of udf
        Schema expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);

        //check schema of TOBAG when given input tuple having only integers
        Schema inputSch = new Schema();
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add another int column
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add a long column
        inputSch.add(new FieldSchema(null, DataType.LONG));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has ints and long",
                expectedSch, tb.outputSchema(inputSch));


        //test schema when input is a tuple with inner schema
        Schema tupInSchema = new Schema(new FieldSchema("x", DataType.CHARARRAY));
        inputSch = new Schema();
        inputSch.add(new FieldSchema("a", tupInSchema, DataType.TUPLE));
        Schema inputSchCp = new Schema(inputSch);
        inputSchCp.getField(0).alias = null;
        expectedSch = new Schema(new FieldSchema(null, inputSchCp, DataType.BAG));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));

        inputSch.add(new FieldSchema("b", tupInSchema, DataType.TUPLE));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));

        //add a column of type tuple with different inner schema
        tupInSchema = new Schema(new FieldSchema("x", DataType.BYTEARRAY));
        inputSch.add(new FieldSchema("c", tupInSchema, DataType.TUPLE));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has cols of type tuple with diff inner schema",
                expectedSch, tb.outputSchema(inputSch));



        Tuple input = TupleFactory.getInstance().newTuple();
        for (int i = 0; i < 100; ++i) {
            input.append(i);
        }
        //test null value in input
        input.append(null);

        Set<Integer> s = new HashSet<Integer>();
        DataBag db = tb.exec(input);
        for (Tuple t : db) {
            s.add((Integer) t.get(0));
        }

        // finally check the bag had everything we put in the tuple.
View Full Code Here

    public void testTOBAGSupportsTuplesInInput() throws IOException {
        String[][] expected = {
                { "a", "b" },
                { "c", "d" }
        };
        TOBAG function = new TOBAG();
        Tuple input = TupleFactory.getInstance().newTuple(); // input represents a tuple of all the params sent to TOBAG
        Tuple firstItem = TupleFactory.getInstance().newTuple(); // first item of the params is a Tuple
        firstItem.append(expected[0][0]); // containing a and b
        firstItem.append(expected[0][1]);
        Tuple secondItem = TupleFactory.getInstance().newTuple(); // second item of the params is a Tuple
        secondItem.append(expected[1][0]); // containing c and d
        secondItem.append(expected[1][1]);

        input.append(firstItem);
        input.append(secondItem);

        DataBag result = function.exec(input); // run TOBAG on ((a,b),(c,d))

        assertEquals("number of tuples in the bag", 2, result.size()); // we should have 2 tuples in the output bag
        int position = 0;
        for (Tuple t : result) {
            assertEquals("number of items in tuple " + position, 2, t.size()); // each tuple should contain 2 items
View Full Code Here

    }

    @Test
    public void testToBag() throws Exception {
        //TEST TOBAG
        TOBAG tb = new TOBAG();

        //test output schema of udf
        Schema expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);

        //check schema of TOBAG when given input tuple having only integers
        Schema inputSch = new Schema();
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add another int column
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add a long column
        inputSch.add(new FieldSchema(null, DataType.LONG));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has ints and long",
                expectedSch, tb.outputSchema(inputSch));


        //test schema when input is a tuple with inner schema
        Schema tupInSchema = new Schema(new FieldSchema("x", DataType.CHARARRAY));
        inputSch = new Schema();
        inputSch.add(new FieldSchema("a", tupInSchema, DataType.TUPLE));
        Schema inputSchCp = new Schema(inputSch);
        inputSchCp.getField(0).alias = null;
        expectedSch = new Schema(new FieldSchema(null, inputSchCp, DataType.BAG));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));

        inputSch.add(new FieldSchema("b", tupInSchema, DataType.TUPLE));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));

        //add a column of type tuple with different inner schema
        tupInSchema = new Schema(new FieldSchema("x", DataType.BYTEARRAY));
        inputSch.add(new FieldSchema("c", tupInSchema, DataType.TUPLE));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has cols of type tuple with diff inner schema",
                expectedSch, tb.outputSchema(inputSch));



        Tuple input = TupleFactory.getInstance().newTuple();
        for (int i = 0; i < 100; ++i) {
            input.append(i);
        }
        //test null value in input
        input.append(null);

        Set<Integer> s = new HashSet<Integer>();
        DataBag db = tb.exec(input);
        for (Tuple t : db) {
            s.add((Integer) t.get(0));
        }

        // finally check the bag had everything we put in the tuple.
View Full Code Here

    public void testTOBAGSupportsTuplesInInput() throws IOException {
        String[][] expected = {
                { "a", "b" },
                { "c", "d" }
        };
        TOBAG function = new TOBAG();
        Tuple input = TupleFactory.getInstance().newTuple(); // input represents a tuple of all the params sent to TOBAG
        Tuple firstItem = TupleFactory.getInstance().newTuple(); // first item of the params is a Tuple
        firstItem.append(expected[0][0]); // containing a and b
        firstItem.append(expected[0][1]);
        Tuple secondItem = TupleFactory.getInstance().newTuple(); // second item of the params is a Tuple
        secondItem.append(expected[1][0]); // containing c and d
        secondItem.append(expected[1][1]);

        input.append(firstItem);
        input.append(secondItem);

        DataBag result = function.exec(input); // run TOBAG on ((a,b),(c,d))

        assertEquals("number of tuples in the bag", 2, result.size()); // we should have 2 tuples in the output bag
        int position = 0;
        for (Tuple t : result) {
            assertEquals("number of items in tuple " + position, 2, t.size()); // each tuple should contain 2 items
View Full Code Here

    }

    @Test
    public void testToBag() throws Exception {
        //TEST TOBAG
        TOBAG tb = new TOBAG();

        //test output schema of udf
        Schema expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);

        //check schema of TOBAG when given input tuple having only integers
        Schema inputSch = new Schema();
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add another int column
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add a long column
        inputSch.add(new FieldSchema(null, DataType.LONG));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has ints and long",
                expectedSch, tb.outputSchema(inputSch));


        //test schema when input is a tuple with inner schema
        Schema tupInSchema = new Schema(new FieldSchema("x", DataType.CHARARRAY));
        inputSch = new Schema();
        inputSch.add(new FieldSchema("a", tupInSchema, DataType.TUPLE));
        Schema inputSchCp = new Schema(inputSch);
        inputSchCp.getField(0).alias = null;
        expectedSch = new Schema(new FieldSchema(null, inputSchCp, DataType.BAG));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));

        inputSch.add(new FieldSchema("b", tupInSchema, DataType.TUPLE));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));

        //add a column of type tuple with different inner schema
        tupInSchema = new Schema(new FieldSchema("x", DataType.BYTEARRAY));
        inputSch.add(new FieldSchema("c", tupInSchema, DataType.TUPLE));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has cols of type tuple with diff inner schema",
                expectedSch, tb.outputSchema(inputSch));



        Tuple input = TupleFactory.getInstance().newTuple();
        for (int i = 0; i < 100; ++i) {
            input.append(i);
        }
        //test null value in input
        input.append(null);

        Set<Integer> s = new HashSet<Integer>();
        DataBag db = tb.exec(input);
        for (Tuple t : db) {
            s.add((Integer) t.get(0));
        }

        // finally check the bag had everything we put in the tuple.
View Full Code Here

    public void testTOBAGSupportsTuplesInInput() throws IOException {
        String[][] expected = {
                { "a", "b" },
                { "c", "d" }
        };
        TOBAG function = new TOBAG();
        Tuple input = TupleFactory.getInstance().newTuple(); // input represents a tuple of all the params sent to TOBAG
        Tuple firstItem = TupleFactory.getInstance().newTuple(); // first item of the params is a Tuple
        firstItem.append(expected[0][0]); // containing a and b
        firstItem.append(expected[0][1]);
        Tuple secondItem = TupleFactory.getInstance().newTuple(); // second item of the params is a Tuple
        secondItem.append(expected[1][0]); // containing c and d
        secondItem.append(expected[1][1]);

        input.append(firstItem);
        input.append(secondItem);

        DataBag result = function.exec(input); // run TOBAG on ((a,b),(c,d))

        assertEquals("number of tuples in the bag", 2, result.size()); // we should have 2 tuples in the output bag
        int position = 0;
        for (Tuple t : result) {
            assertEquals("number of items in tuple " + position, 2, t.size()); // each tuple should contain 2 items
View Full Code Here

    }

    @Test
    public void testToBag() throws Exception {
        //TEST TOBAG
        TOBAG tb = new TOBAG();

        //test output schema of udf
        Schema expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);

        //check schema of TOBAG when given input tuple having only integers
        Schema inputSch = new Schema();
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add another int column
        inputSch.add(new FieldSchema(null, DataType.INTEGER));
        assertEquals("schema of tobag when input has only ints",
                expectedSch, tb.outputSchema(inputSch));

        //add a long column
        inputSch.add(new FieldSchema(null, DataType.LONG));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has ints and long",
                expectedSch, tb.outputSchema(inputSch));


        //test schema when input is a tuple with inner schema
        Schema tupInSchema = new Schema(new FieldSchema("x", DataType.CHARARRAY));
        inputSch = new Schema();
        inputSch.add(new FieldSchema("a", tupInSchema, DataType.TUPLE));
        Schema inputSchCp = new Schema(inputSch);
        inputSchCp.getField(0).alias = null;
        expectedSch = new Schema(new FieldSchema(null, inputSchCp, DataType.BAG));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));

        inputSch.add(new FieldSchema("b", tupInSchema, DataType.TUPLE));
        assertEquals("schema of tobag when input has cols of type tuple ",
                expectedSch, tb.outputSchema(inputSch));

        //add a column of type tuple with different inner schema
        tupInSchema = new Schema(new FieldSchema("x", DataType.BYTEARRAY));
        inputSch.add(new FieldSchema("c", tupInSchema, DataType.TUPLE));
        //expect null inner schema
        expectedSch =
            Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
        assertEquals("schema of tobag when input has cols of type tuple with diff inner schema",
                expectedSch, tb.outputSchema(inputSch));



        Tuple input = TupleFactory.getInstance().newTuple();
        for (int i = 0; i < 100; ++i) {
            input.append(i);
        }
        //test null value in input
        input.append(null);

        Set<Integer> s = new HashSet<Integer>();
        DataBag db = tb.exec(input);
        for (Tuple t : db) {
            s.add((Integer) t.get(0));
        }

        // finally check the bag had everything we put in the tuple.
View Full Code Here

TOP

Related Classes of org.apache.pig.builtin.TOBAG

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.