Package org.apache.pig.data

Examples of org.apache.pig.data.Tuple


    }

    @Test
    public void testRange() throws Exception {
        RangeBasedStringConcat rbsc = new RangeBasedStringConcat("0,1", " ");
        Tuple input = new DefaultTuple();
        for (String field : fields) {
            input.append(field);
        }
        String result = rbsc.exec(input);
        assertEquals("a b", result);
        rbsc = new RangeBasedStringConcat("2,6", " ");
        result = rbsc.exec(input);
        assertEquals("c g", result);
        //test out of range
        rbsc = new RangeBasedStringConcat("0,9,1000", " ");
        result = rbsc.exec(input);
        assertEquals("a", result);

        Tuple innerTuple = new DefaultTuple();
        innerTuple.append("j");
        innerTuple.append("k");

        input.append(innerTuple);
        rbsc = new RangeBasedStringConcat("0,9", " ");
        result = rbsc.exec(input);
        assertEquals("a j k", result);
        DataBag db = new DefaultDataBag();
        Tuple dbTuple = new DefaultTuple();
        dbTuple.append("l");
        dbTuple.append("m");
        db.add(dbTuple);
        innerTuple.append(db);
        rbsc = new RangeBasedStringConcat("0,9,10", " ");
        result = rbsc.exec(input);
        assertEquals("a j k l m", result);
View Full Code Here


    public ToCassandraBag(String context) {
        this.context = context;
    }

    public Tuple exec(Tuple input) throws IOException {
        Tuple row = TupleFactory.getInstance().newTuple(2);
        DataBag columns = BagFactory.getInstance().newDefaultBag();
        UDFContext context = UDFContext.getUDFContext();
        Properties property = context.getUDFProperties(ToCassandraBag.class);
        String fieldString = property.getProperty(getSchemaKey());
        String [] fieldnames = INPUT_DELIM.split(fieldString);
        if (log.isDebugEnabled()) {
            log.debug("Tuple: " + input.toDelimitedString(",") + " Fields: " + fieldString);
        }

        // IT IS ALWAYS ASSUMED THAT THE OBJECT AT INDEX 0 IS THE ROW KEY
        if(input.get(0)==null)
            throw new IOException("The object at index 0 is the row key, its value can't be null!");
        if (input.size() != fieldnames.length){
            throw new IOException("There is a mismatch between the number of inputs (" + input.size() + " and fieldnames (" + fieldnames.length + ")");
        }
        for (int i=1; i<input.size(); i++) {
            if (input.get(i) instanceof DataBag) {
                columns.addAll((DataBag) input.get(i));
            } else {
                columns.add(getColumnDef(fieldnames[i], input.get(i)));
            }
        }

        row.set(0, input.get(0));
        row.set(1, columns);
        return row;
    }
View Full Code Here

        row.set(1, columns);
        return row;
    }

    private Tuple getColumnDef(String name, Object value) throws ExecException {
        Tuple column = TupleFactory.getInstance().newTuple(2);
        column.set(0, name);
        column.set(1, value);
        return column;
    }
View Full Code Here

            return null;

        String columnSelector = input.get(0).toString();
        DataBag cassandraBag  = (DataBag)input.get(1);
        String[] selections   = DELIM_PATTERN.split(columnSelector);
        Tuple output          = TupleFactory.getInstance().newTuple(selections.length);

        for (int i = 0; i < selections.length; i++) {
            String selection = selections[i];
            if (selection.endsWith(GREEDY_OPERATOR)) {
                String namePrefix  = selection.substring(0,selection.length()-1);
                DataBag columnsBag = BagFactory.getInstance().newDefaultBag();

                // Find all columns in the input bag that begin with 'namePrefix'
                // and add them to the 'columnsBag'
                for (Tuple cassandraColumn : cassandraBag) {
                    String name = cassandraColumn.get(0).toString();
                    if (name.startsWith(namePrefix)) {
                        columnsBag.add(cassandraColumn);
                    }
                }

                // Sometimes this bag will have no columns in it, this _is_ the desired behavior.
                output.set(i, columnsBag);

            } else {

                // Find the column in the input bag that has a name equal to 'selection'
                // and add _only_ the value to the output tuple. This is what you actually
                // want since you're specifying both an order and a name in the 'columnSelector'
                // string.
                for (Tuple cassandraColumn : cassandraBag) {
                    String name = cassandraColumn.get(0).toString();
                    if (name.equals(selection)) {
                        output.set(i, cassandraColumn.get(1));
                        break;
                    }
                }
            }
        }
View Full Code Here

    private static String UDFCONTEXT_SCHEMA_KEY = "cassandra.input_field_schema";
    private static String INPUT_DELIM = "[\\s,]+";
    private static char OUTPUT_DELIM = ',';

    public Tuple exec(Tuple input) throws IOException {
        Tuple row = TupleFactory.getInstance().newTuple(2);
        DataBag columns = BagFactory.getInstance().newDefaultBag();
        UDFContext context = UDFContext.getUDFContext();
        Properties property = context.getUDFProperties(DeleteColumns.class);
        String fieldString = property.getProperty(UDFCONTEXT_SCHEMA_KEY);
        String [] fieldnames = fieldString.split(INPUT_DELIM);

        // IT IS ALWAYS ASSUMED THAT THE OBJECT AT INDEX 0 IS THE ROW KEY

        for (int i=1; i<input.size(); i++) {
            if (input.get(i) instanceof DataBag) {
                for (Tuple cassandraColumn : (DataBag) input.get(i)) {
                    String name = cassandraColumn.get(0).toString();
                    columns.add(getColumnDef(name, null));
                }
            } else {
                columns.add(getColumnDef(fieldnames[i], null));
            }
        }

        row.set(0, input.get(0));
        row.set(1, columns);
        return row;
    }
View Full Code Here

        row.set(1, columns);
        return row;
    }

    private Tuple getColumnDef(String name, Object value) throws ExecException {
        Tuple column = TupleFactory.getInstance().newTuple(2);
        column.set(0, name);
        column.set(1, value);
        return column;
    }
View Full Code Here

    }

    private void appendObject(Object o, Appendable builder) throws IOException {
        if (o != null) {
            if (o instanceof Tuple){
                Tuple tmp = (Tuple) o;
                if (tmp.size() > 0){
                    processTuple(tmp, builder);
                }
            } else if (o instanceof DataBag){
                DataBag db = (DataBag) o;
                for (Tuple tuple : db) {
View Full Code Here

    @Test
    public void test() throws Exception {
        ToCassandraBag tcb = new ToCassandraBag();
        UDFContext context = UDFContext.getUDFContext();
        Properties properties = context.getUDFProperties(ToCassandraBag.class);
        Tuple input = new DefaultTuple();
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < fields.length; i++){
            builder.append(fields[i]);
            input.append("foo" + i);
            if (i < fields.length - 1){
                builder.append(',');
            }
        }
        properties.setProperty(ToCassandraBag.UDFCONTEXT_SCHEMA_KEY + ".default_context", builder.toString());
        Tuple tuple = tcb.exec(input);
        assertNotNull("Tuple is null", tuple);
        assertEquals(2, tuple.size());
        //first is the key, rest is a set of columns
        Object one = tuple.get(0);
        assertTrue(one instanceof String);
        Object two = tuple.get(1);
        assertTrue(two instanceof DataBag);
        //Bad input
        input = new DefaultTuple();
        input.append(null);
        input.append("foo");
View Full Code Here

            pigServer.registerQuery("D = join A by $1 left, B by $1 using \"replicated\";");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
           
            while(iter.hasNext()) {
                Tuple tuple = iter.next();
                String Key = tuple.toDelimitedString(",");
                hashFRJoin.put( Key, tuple);
                dbfrj.add(tuple);
               
            }
        }
        {
            pigServer.registerQuery("C = join A by $0 left, B by $0;");
            pigServer.registerQuery("D = join A by $1 left, B by $1;");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
            while(iter.hasNext()) {
                Tuple tuple = iter.next();
                String Key = tuple.toDelimitedString(",");
                hashJoin.put( Key, tuple);
                dbshj.add(tuple);
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);
View Full Code Here

            pigServer.registerQuery("D = join A by $1 left, B by $1 using \"repl\";");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
           
            while(iter.hasNext()) {
                Tuple tuple = iter.next();
                String Key = tuple.toDelimitedString(",");
                hashFRJoin.put( Key, tuple);
                dbfrj.add(tuple);
               
            }
        }
        {
            pigServer.registerQuery("C = join A by $0 left, B by $0;");
            pigServer.registerQuery("D = join A by $1 left, B by $1;");
            pigServer.registerQuery("E = union C,D;");
            Iterator<Tuple> iter = pigServer.openIterator("E");
            while(iter.hasNext()) {
                Tuple tuple = iter.next();
                String Key = tuple.toDelimitedString(",");
                hashJoin.put( Key, tuple);
                dbshj.add(tuple);
            }
        }
        Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0);       
View Full Code Here

TOP

Related Classes of org.apache.pig.data.Tuple

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.