Package org.apache.pig.data

Examples of org.apache.pig.data.DataBag


   
    @Test
    public void testFRJoinOut1() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("C = join A by $0, B by $0 using \"replicated\";");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("C = join A by $0, B by $0;");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here


   
    @Test
    public void testFRJoinOut2() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "';");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("C = join A by $0, B by $0 using \"replicated\";");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("C = join A by $0, B by $0;");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

    @Test
    public void testFRJoinOut3() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("C = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("D = join A by $0, B by $0, C by $0 using \"replicated\";");
            Iterator<Tuple> iter = pigServer.openIterator("D");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("D = join A by $0, B by $0, C by $0;");
            Iterator<Tuple> iter = pigServer.openIterator("D");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

    @Test
    public void testFRJoinOut4() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "';");
        pigServer.registerQuery("C = LOAD '" + INPUT_FILE + "';");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("D = join A by $0, B by $0, C by $0 using \"replicated\";");
            Iterator<Tuple> iter = pigServer.openIterator("D");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("D = join A by $0, B by $0, C by $0;");
            Iterator<Tuple> iter = pigServer.openIterator("D");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

   
    @Test
    public void testFRJoinOut5() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1) using \"replicated\";");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

   
    @Test
    public void testFRJoinOut6() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "';");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1) using \"replicated\";");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

   
    @Test
    public void testFRJoinOut7() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("C = join A by $0, B by $0 using \"replicated\";");
            pigServer.registerQuery("D = join A by $1, B by $1 using \"replicated\";");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("C = join A by $0, B by $0;");
            pigServer.registerQuery("D = join A by $1, B by $1;");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

    @Test
    public void testFRJoinOut8() throws IOException {
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (x:int,y:int);");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        Map<String,Tuple> hashFRJoin = new HashMap<String,Tuple>();
        Map<String,Tuple> hashJoin = new HashMap<String,Tuple>();
        {
            pigServer.registerQuery("C = join A by $0 left, B by $0 using \"replicated\";");
            pigServer.registerQuery("D = join A by $1 left, B by $1 using \"replicated\";");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
           
            while(iter.hasNext()) {
                Tuple tuple = iter.next();
                String Key = tuple.toDelimitedString(",");
                hashFRJoin.put( Key, tuple);
                dbfrj.add(tuple);
               
            }
        }
        {
            pigServer.registerQuery("C = join A by $0 left, B by $0;");
            pigServer.registerQuery("D = join A by $1 left, B by $1;");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
            while(iter.hasNext()) {
                Tuple tuple = iter.next();
                String Key = tuple.toDelimitedString(",");
                hashJoin.put( Key, tuple);
                dbshj.add(tuple);
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
               
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

   
    @Test
    public void testFRJoinOut9() throws IOException {
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (x:int,y:int);");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        Map<String,Tuple> hashFRJoin = new HashMap<String,Tuple>();
        Map<String,Tuple> hashJoin = new HashMap<String,Tuple>();
        {
            pigServer.registerQuery("C = join A by $0 left, B by $0 using \"repl\";");
            pigServer.registerQuery("D = join A by $1 left, B by $1 using \"repl\";");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
           
            while(iter.hasNext()) {
                Tuple tuple = iter.next();
                String Key = tuple.toDelimitedString(",");
                hashFRJoin.put( Key, tuple);
                dbfrj.add(tuple);
               
            }
        }
        {
            pigServer.registerQuery("C = join A by $0 left, B by $0;");
            pigServer.registerQuery("D = join A by $1 left, B by $1;");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
            while(iter.hasNext()) {
                Tuple tuple = iter.next();
                String Key = tuple.toDelimitedString(",");
                hashJoin.put( Key, tuple);
                dbshj.add(tuple);
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);       
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

                    LoadFunc lf = ((LoadFunc)pc.instantiateFuncFromSpec(ld.getLFile().getFuncSpec()));
                    String key = lf.getLoadCaster().bytesToCharArray(((DataByteArray)tup.get(keyField)).get());
                    Tuple csttup = TupleFactory.getInstance().newTuple(2);
                    csttup.set(0, key);
                    csttup.set(1, lf.getLoadCaster().bytesToInteger(((DataByteArray)tup.get(1)).get()));
                    DataBag vals = null;
                    if(replTbl.containsKey(key)){
                        vals = replTbl.get(key);
                    }
                    else{
                        vals = BagFactory.getInstance().newDefaultBag();
                        replTbl.put(key, vals);
                    }
                    vals.add(csttup);
                }
            } catch (ExecException e) {
                throw new IOException(e.getMessage());
            }
        }
View Full Code Here

TOP

Related Classes of org.apache.pig.data.DataBag

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.