Package org.apache.pig.data

Examples of org.apache.pig.data.DataBag$BagDelimiterTuple


        //(chuck,{(kick,3)})
        it = pig.openIterator("cc_rows");
        if (it.hasNext()) {
            Tuple t = it.next();
            Assert.assertEquals(t.get(0), "chuck");          
            DataBag columns = (DataBag) t.get(1);
            Iterator<Tuple> iter = columns.iterator();
            if(iter.hasNext())
            {
                Tuple column = iter.next();
                Assert.assertEquals(column.get(0), "kick");
                Assert.assertEquals(column.get(1), 3L);
View Full Code Here


        innerTuple.append("k");

        input.append(innerTuple);
        result = rbsc.exec(input);
        assertEquals("a b c d e f g h i j k", result);
        DataBag db = new DefaultDataBag();
        Tuple dbTuple = new DefaultTuple();
        dbTuple.append("l");
        dbTuple.append("m");
        db.add(dbTuple);
        innerTuple.append(db);
        result = rbsc.exec(input);
        assertEquals("a b c d e f g h i j k l m", result);
    }
View Full Code Here

        input.append(innerTuple);
        rbsc = new RangeBasedStringConcat("0,9", " ");
        result = rbsc.exec(input);
        assertEquals("a j k", result);
        DataBag db = new DefaultDataBag();
        Tuple dbTuple = new DefaultTuple();
        dbTuple.append("l");
        dbTuple.append("m");
        db.add(dbTuple);
        innerTuple.append(db);
        rbsc = new RangeBasedStringConcat("0,9,10", " ");
        result = rbsc.exec(input);
        assertEquals("a j k l m", result);
    }
View Full Code Here

        this.context = context;
    }

    public Tuple exec(Tuple input) throws IOException {
        Tuple row = TupleFactory.getInstance().newTuple(2);
        DataBag columns = BagFactory.getInstance().newDefaultBag();
        UDFContext context = UDFContext.getUDFContext();
        Properties property = context.getUDFProperties(ToCassandraBag.class);
        String fieldString = property.getProperty(getSchemaKey());
        String [] fieldnames = INPUT_DELIM.split(fieldString);
        if (log.isDebugEnabled()) {
            log.debug("Tuple: " + input.toDelimitedString(",") + " Fields: " + fieldString);
        }

        // IT IS ALWAYS ASSUMED THAT THE OBJECT AT INDEX 0 IS THE ROW KEY
        if(input.get(0)==null)
            throw new IOException("The object at index 0 is the row key, its value can't be null!");
        if (input.size() != fieldnames.length){
            throw new IOException("There is a mismatch between the number of inputs (" + input.size() + " and fieldnames (" + fieldnames.length + ")");
        }
        for (int i=1; i<input.size(); i++) {
            if (input.get(i) instanceof DataBag) {
                columns.addAll((DataBag) input.get(i));
            } else {
                columns.add(getColumnDef(fieldnames[i], input.get(i)));
            }
        }

        row.set(0, input.get(0));
        row.set(1, columns);
View Full Code Here

            throw new IOException("Invalid input. Please pass in both a list of column names and the columns themselves.");
        if (input.isNull(0) || input.isNull(1))
            return null;

        String columnSelector = input.get(0).toString();
        DataBag cassandraBag  = (DataBag)input.get(1);
        String[] selections   = DELIM_PATTERN.split(columnSelector);
        Tuple output          = TupleFactory.getInstance().newTuple(selections.length);

        for (int i = 0; i < selections.length; i++) {
            String selection = selections[i];
            if (selection.endsWith(GREEDY_OPERATOR)) {
                String namePrefix  = selection.substring(0,selection.length()-1);
                DataBag columnsBag = BagFactory.getInstance().newDefaultBag();

                // Find all columns in the input bag that begin with 'namePrefix'
                // and add them to the 'columnsBag'
                for (Tuple cassandraColumn : cassandraBag) {
                    String name = cassandraColumn.get(0).toString();
                    if (name.startsWith(namePrefix)) {
                        columnsBag.add(cassandraColumn);
                    }
                }

                // Sometimes this bag will have no columns in it, this _is_ the desired behavior.
                output.set(i, columnsBag);
View Full Code Here

    private static String INPUT_DELIM = "[\\s,]+";
    private static char OUTPUT_DELIM = ',';

    public Tuple exec(Tuple input) throws IOException {
        Tuple row = TupleFactory.getInstance().newTuple(2);
        DataBag columns = BagFactory.getInstance().newDefaultBag();
        UDFContext context = UDFContext.getUDFContext();
        Properties property = context.getUDFProperties(DeleteColumns.class);
        String fieldString = property.getProperty(UDFCONTEXT_SCHEMA_KEY);
        String [] fieldnames = fieldString.split(INPUT_DELIM);

        // IT IS ALWAYS ASSUMED THAT THE OBJECT AT INDEX 0 IS THE ROW KEY

        for (int i=1; i<input.size(); i++) {
            if (input.get(i) instanceof DataBag) {
                for (Tuple cassandraColumn : (DataBag) input.get(i)) {
                    String name = cassandraColumn.get(0).toString();
                    columns.add(getColumnDef(name, null));
                }
            } else {
                columns.add(getColumnDef(fieldnames[i], null));
            }
        }

        row.set(0, input.get(0));
        row.set(1, columns);
View Full Code Here

                Tuple tmp = (Tuple) o;
                if (tmp.size() > 0){
                    processTuple(tmp, builder);
                }
            } else if (o instanceof DataBag){
                DataBag db = (DataBag) o;
                for (Tuple tuple : db) {
                    processTuple(tuple, builder);
                }
            } else {
                String s = o.toString();
View Full Code Here

    public void testSortFRJoin() throws IOException{
      pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
      pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
      pigServer.registerQuery("D = ORDER A by y;");
      pigServer.registerQuery("E = ORDER B by y;");
      DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
      {
          pigServer.registerQuery("C = join D by $0, E by $0 using \"replicated\";");
          Iterator<Tuple> iter = pigServer.openIterator("C");
         
          while(iter.hasNext()) {
              dbfrj.add(iter.next());
          }
      }
      {
          pigServer.registerQuery("C = join D by $0, E by $0;");
          Iterator<Tuple> iter = pigServer.openIterator("C");
         
          while(iter.hasNext()) {
              dbshj.add(iter.next());
          }
      }
      Assert.assertEquals(dbfrj.size(), dbshj.size());
      Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));       
    }
View Full Code Here

    public void testDistinctFRJoin() throws IOException{
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
        pigServer.registerQuery("D = distinct A ;");
        pigServer.registerQuery("E = distinct B ;");
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            pigServer.registerQuery("C = join D by $0, E by $0 using \"replicated\";");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("C = join D by $0, E by $0;");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertEquals(dbfrj.size(), dbshj.size());
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));       
      }
View Full Code Here

    @Test
    public void testUDFFRJ() throws IOException {
        pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:chararray,y:int);");
        pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:chararray,y:int);");
       
        DataBag dbfrj = BagFactory.getInstance().newDefaultBag(), dbshj = BagFactory.getInstance().newDefaultBag();
        {
            String fSpec = FRJoin.class.getName()+ "('" + INPUT_FILE + "')";
            pigServer.registerFunction("FRJ", new FuncSpec(fSpec));
            pigServer.registerQuery("C = foreach A generate *, flatten(FRJ(*));");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbfrj.add(iter.next());
            }
        }
        {
            pigServer.registerQuery("C = join A by $0, B by $0;");
            Iterator<Tuple> iter = pigServer.openIterator("C");
           
            while(iter.hasNext()) {
                dbshj.add(iter.next());
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
        Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj));
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.data.DataBag$BagDelimiterTuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.