Package org.apache.pig.data

Examples of org.apache.pig.data.DataBag


        public Tuple exec(Tuple input) throws IOException {
            Tuple t = mTupleFactory.newTuple(2);
            try {
                // input is a bag with one tuple containing
                // the column we are trying to avg
                DataBag bg = (DataBag) input.get(0);
                DataByteArray dba = null;
                if(bg.iterator().hasNext()) {
                    Tuple tp = bg.iterator().next();
                    dba = (DataByteArray)tp.get(0);
                }
                t.set(0, dba != null ? Double.valueOf(dba.toString()) : null);
                if (dba == null)
                    t.set(1, 0L);
View Full Code Here


    static public class Intermediate extends EvalFunc<Tuple> {
        @Override
        public Tuple exec(Tuple input) throws IOException {
            try {
                DataBag b = (DataBag)input.get(0);
                return combine(b);
            } catch (ExecException ee) {
                throw ee;
            } catch (Exception e) {
                int errCode = 2106;
View Full Code Here

    static public class Final extends EvalFunc<Double> {
        @Override
        public Double exec(Tuple input) throws IOException {
            try {
                DataBag b = (DataBag)input.get(0);
                Tuple combined = combine(b);

                Double sum = (Double)combined.get(0);
                if(sum == null) {
                    return null;
View Full Code Here

    }

    static protected Long sumLongs(Tuple input) throws ExecException {
        // Can't just call sum, because the intermediate results are
        // now Longs insteads of Integers.
        DataBag values = (DataBag)input.get(0);

        // if we were handed an empty bag, return NULL
        // this is in compliance with SQL standard
        if(values.size() == 0) {
            return null;
        }

        long sum = 0;
        boolean sawNonNull = false;
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple t = it.next();
            try {
                Long l = (Long)(t.get(0));
                if (l == null) continue;
                sawNonNull = true;
View Full Code Here

            return null;
        }
    }

    static protected  Long sum(Tuple input) throws ExecException {
        DataBag values = (DataBag)input.get(0);
       
        // if we were handed an empty bag, return NULL
        // this is in compliance with SQL standard
        if(values.size() == 0) {
            return null;
        }

        long sum = 0;
        boolean sawNonNull = false;
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple t = it.next();
            try {
                Integer i = (Integer)(t.get(0));
                if (i == null) continue;
                sawNonNull = true;
View Full Code Here

            // Initial is called in the map - for SUM
            // we just send the tuple down
            try {
                // input is a bag with one tuple containing
                // the column we are trying to sum
                DataBag bg = (DataBag) input.get(0);
                Integer i = null;
                if(bg.iterator().hasNext()) {
                    Tuple tp = bg.iterator().next();
                    i = (Integer)tp.get(0);
                }
                return tfact.newTuple(i != null ?
                        Long.valueOf(i) : null);
            }catch(NumberFormatException nfe){
View Full Code Here

        pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
        pigServer.registerQuery("C = GROUP A by id;");
        pigServer.registerQuery("D = GROUP B by id;");
       
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("E = join C by group, D by group using \"skewed\" parallel 5;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("E = join C by group, D by group;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }   
View Full Code Here

        pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

        pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            DataBag dbshj = BagFactory.getInstance().newDefaultBag();
            {
                pigServer.registerQuery("C = join A by (id, name), B by (id, name) using \"skewed\" parallel 5;");
                Iterator<Tuple> iter = pigServer.openIterator("C");

                while(iter.hasNext()) {
                    dbfrj.add(iter.next());
                }
            }
      {
              pigServer.registerQuery("E = join A by(id, name), B by (id, name);");
              Iterator<Tuple> iter = pigServer.openIterator("E");

              while(iter.hasNext()) {
                    dbshj.add(iter.next());
          }
            }
            Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
            Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));

        }catch(Exception e) {
             fail(e.getMessage());
        }
View Full Code Here

    @Test
    public void testSkewedJoinReducers() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            {
                pigServer.registerQuery("C = join A by id, B by id using \"skewed\" parallel 1;");
                Iterator<Tuple> iter = pigServer.openIterator("C");
               
                while(iter.hasNext()) {
                    dbfrj.add(iter.next());
                }
            }
        }catch(Exception e) {
          fail("Should not throw exception, should continue execution");
        }
View Full Code Here

    public void testSkewedJoin3Way() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
        pigServer.registerQuery("C = LOAD '" + INPUT_FILE3 + "' as (id, name);");
        try {
            DataBag dbfrj = BagFactory.getInstance().newDefaultBag();
            {
                pigServer.registerQuery("D = join A by id, B by id, C by id using \"skewed\" parallel 5;");
                Iterator<Tuple> iter = pigServer.openIterator("D");
               
                while(iter.hasNext()) {
                    dbfrj.add(iter.next());
                }
            }
        }catch(Exception e) {
          return;
        }
View Full Code Here

TOP

Related Classes of org.apache.pig.data.DataBag

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.