Package org.apache.pig.data

Examples of org.apache.pig.data.DataBag


    @Test
    public void testSkewedJoinMapKey() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE4 + "' as (m:[]);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE4 + "' as (n:[]);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            {
                pigServer.registerQuery("C = join A by (chararray)m#'a100', B by (chararray)n#'a100' using \"skewed\" parallel 20;");
                Iterator<Tuple> iter = pigServer.openIterator("C");
               
                while(iter.hasNext()) {
                    dbfrj.add(iter.next());
                }
            }
        }catch(Exception e) {
      System.out.println(e.getMessage());
      e.printStackTrace();
View Full Code Here


    @Test
    public void testSkewedJoinNullKeys() throws IOException {
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE5 + "' as (id,name);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE5 + "' as (id,name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            {
                pigServer.registerQuery("C = join A by id, B by id using \"skewed\";");
                Iterator<Tuple> iter = pigServer.openIterator("C");
               
                while(iter.hasNext()) {
                    dbfrj.add(iter.next());
                }
            }
        } catch(Exception e) {
      System.out.println(e.getMessage());
      e.printStackTrace();
View Full Code Here

    @Test
    public void testSkewedJoinOuter() throws IOException {
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE5 + "' as (id,name);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE5 + "' as (id,name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            {
                pigServer.registerQuery("C = join A by id left, B by id using \"skewed\";");
                Iterator<Tuple> iter = pigServer.openIterator("C");
               
                while(iter.hasNext()) {
                    dbfrj.add(iter.next());
                }
            }
            {
                pigServer.registerQuery("C = join A by id right, B by id using \"skewed\";");
                Iterator<Tuple> iter = pigServer.openIterator("C");
               
                while(iter.hasNext()) {
                    dbfrj.add(iter.next());
                }
            }
            {
                pigServer.registerQuery("C = join A by id full, B by id using \"skewed\";");
                Iterator<Tuple> iter = pigServer.openIterator("C");
               
                while(iter.hasNext()) {
                    dbfrj.add(iter.next());
                }
            }
        } catch(Exception e) {
            System.out.println(e.getMessage());
            e.printStackTrace();
View Full Code Here

        pigServer.registerQuery("C = FILTER A by id == 400;");
        pigServer.registerQuery("D = FILTER B by id == 400;");

       
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbrj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("E = join C by id, D by id using \"skewed\";");
            Iterator<Tuple> iter = pigServer.openIterator("E");
               
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
          pigServer.registerQuery("E = join C by id, D by id;");
          Iterator<Tuple> iter = pigServer.openIterator("E");
       
          while(iter.hasNext()) {
            dbrj.add(iter.next());
          }
        }
        Assert.assertEquals(dbfrj.size(), dbrj.size());
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbrj));      
      
    }
View Full Code Here

    public void testSkewedJoinManyReducers() throws IOException {
        pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.maxtuple", "2");
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE6 + "' as (id,name);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE7 + "' as (id,name);");
          
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbrj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("E = join A by id, B by id using \"skewed\" parallel 300;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
               
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("E = join A by id, B by id;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
       
            while(iter.hasNext()) {
                dbrj.add(iter.next());
            }
        }
        Assert.assertEquals(dbfrj.size(), dbrj.size());
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbrj));      
      
    }
View Full Code Here

    @Test
    public void testSkewedJoinUDF() throws IOException {
        PartitionSkewedKeys udf = new PartitionSkewedKeys(new String[]{"0.1", "2", "1.txt"});
        Tuple t = TupleFactory.getInstance().newTuple();
        t.append(3);    // use 3 reducers
        DataBag db = new DefaultDataBag();
        Tuple sample;
        for (int i=0;i<=3;i++) {
            sample = TupleFactory.getInstance().newTuple();
            if (i!=3)
                sample.append("1");
            else
                sample.append("2");
            sample.append((long)200);
            if (i!=3)
                sample.append((long)0);
            else
                sample.append((long)30);
            db.add(sample);
        }
        t.append(db);
        Map<String, Object> output = udf.exec(t);
        DataBag parList = (DataBag)output.get(PartitionSkewedKeys.PARTITION_LIST);
        for (Tuple par : parList) {
            if (par.get(0).equals("1")) {
                par.get(1).equals(0);
                par.get(2).equals(2);
            }
View Full Code Here

    public  void testBytesToBag() throws IOException
    {
        ResourceFieldSchema fs = GenRandomData.getFullTupTextDataBagFieldSchema();
       
        for (int i = 0; i < MAX; i++) {
            DataBag b = GenRandomData.genRandFullTupTextDataBag(r,5,100);
            DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs);
            assertTrue(TestHelper.bagEquals(b, convertedBag));
        }
       
    }
View Full Code Here

        assertTrue(DataType.equalByteArrays(t.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(t)));
    }
       
    @Test
    public void testBagToBytes() throws IOException {
        DataBag b = GenRandomData.genRandFullTupTextDataBag(r,5,100);
        assertTrue(DataType.equalByteArrays(b.toString().getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(b)));
    }
View Full Code Here

        assertTrue(DataType.equalByteArrays(DataType.mapToString(m).getBytes(), ((Utf8StorageConverter)ps.getLoadCaster()).toBytes(m)));
    }
   
    @Test
    public void testBytesToBagWithConversion() throws IOException {
        DataBag b = GenRandomData.genFloatDataBag(r,5,100);
        ResourceFieldSchema fs = GenRandomData.getFloatDataBagFieldSchema(5);
        DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs);
       
        Iterator<Tuple> iter1 = b.iterator();
        Iterator<Tuple> iter2 = convertedBag.iterator();
        for (int i=0;i<100;i++) {
            Tuple t1 = (Tuple)iter1.next();
            assertTrue(iter2.hasNext());
            Tuple t2 = (Tuple)iter2.next();
            for (int j=0;j<5;j++) {
View Full Code Here

        assertTrue(t==null);
       
        s = "{(a,b}";
        schema = Utils.getSchemaFromString("b:bag{t:tuple(a:chararray, b:chararray)}");
        rfs = new ResourceSchema(schema).getFields()[0];
        DataBag b = ps.getLoadCaster().bytesToBag(s.getBytes(), rfs);
        assertTrue(b==null);
       
        s = "{(a,b)";
        schema = Utils.getSchemaFromString("b:bag{t:tuple(a:chararray, b:chararray)}");
        rfs = new ResourceSchema(schema).getFields()[0];
View Full Code Here

TOP

Related Classes of org.apache.pig.data.DataBag

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.