Package org.apache.pig.data

Examples of org.apache.pig.data.TupleFactory


        assertTrue(r==null);
        r = func.exec(t3);
        assertTrue(r==null);
       
        String matchRegex = "^(.+)\\b\\s+is a\\s+\\b(.+)$";
        TupleFactory tupleFactory = TupleFactory.getInstance();
        Tuple te1 = tupleFactory.newTuple(2);
        te1.set(0,"this is a match");
        te1.set(1, matchRegex);
       
        Tuple te2 = tupleFactory.newTuple(2);
        te2.set(0, "no match");
        te2.set(1, matchRegex);
       
        Tuple te3 = tupleFactory.newTuple(2);
        te3.set(0, null);
        te3.set(1, matchRegex);
    
        REGEX_EXTRACT_ALL funce = new REGEX_EXTRACT_ALL();
        Tuple re = funce.exec(te1);
View Full Code Here


        Tuple output = tt.exec(input);
        assertTrue(!(input == output));
        assertEquals(input, output);
       
        TOP top = new TOP();
        TupleFactory tupleFactory = TupleFactory.getInstance();
        BagFactory bagFactory = DefaultBagFactory.getInstance();
        Tuple inputTuple = tupleFactory.newTuple(3);
        DataBag dBag = bagFactory.newDefaultBag();
       
        // set N = 10 i.e retain top 10 tuples
        inputTuple.set(0, 10);
        // compare tuples by field number 1
        inputTuple.set(1, 1);
        // set the data bag containing the tuples
        inputTuple.set(2, dBag);

        // generate tuples of the form (group-1, 1), (group-2, 2) ...
        for (long i = 0; i < 100; i++) {
            Tuple nestedTuple = tupleFactory.newTuple(2);
            nestedTuple.set(0, "group-" + i);
            nestedTuple.set(1, i);
            dBag.add(nestedTuple);
        }
       
        DataBag outBag = top.exec(inputTuple);
        assertEquals(outBag.size(), 10L);
        checkItemsGT(outBag, 1, 89);
       
        // two initial results
        Tuple init1 = (new TOP.Initial()).exec(inputTuple);
        Tuple init2 = (new TOP.Initial()).exec(inputTuple);
        // two intermediate results

        DataBag intermedBag = bagFactory.newDefaultBag();
        intermedBag.add(init1);
        intermedBag.add(init2);
        Tuple intermedInput = tupleFactory.newTuple(intermedBag);
        Tuple intermedOutput1 = (new TOP.Intermed()).exec(intermedInput);
        Tuple intermedOutput2 = (new TOP.Intermed()).exec(intermedInput);
        checkItemsGT((DataBag)intermedOutput1.get(2), 1, 94);

        // final result
        DataBag finalInputBag = bagFactory.newDefaultBag();
        finalInputBag.add(intermedOutput1);
        finalInputBag.add(intermedOutput2);
        Tuple finalInput = tupleFactory.newTuple(finalInputBag);
        outBag = (new TOP.Final()).exec(finalInput);
        assertEquals(outBag.size(), 10L);
        checkItemsGT(outBag, 1, 96);
    }
View Full Code Here

        assertEquals("amy smith", t.get(3));
    }
   
    @Test
    public void testTOKENIZE() throws Exception {
        TupleFactory tf = TupleFactory.getInstance();
        Tuple t1 = tf.newTuple(1);
        t1.set(0, "123 456\"789");
        Tuple t2 = tf.newTuple(1);
        t2.set(0, null);
        Tuple t3 = tf.newTuple(0);
       
        TOKENIZE f = new TOKENIZE();
        DataBag b = f.exec(t1);
        assertTrue(b.size()==3);
        Iterator<Tuple> i = b.iterator();
View Full Code Here

    @Test
    public void testDIFF() throws Exception {
        // Test it in the case with two bags.
        BagFactory bf = BagFactory.getInstance();
        TupleFactory tf = TupleFactory.getInstance();

        DataBag b1 = bf.newDefaultBag();
        DataBag b2 = bf.newDefaultBag();
        for (int i = 0; i < 10; i++) b1.add(tf.newTuple(new Integer(i)));
        for (int i = 0; i < 10; i += 2) b2.add(tf.newTuple(new Integer(i)));
        Tuple t = tf.newTuple(2);
        t.set(0, b1);
        t.set(1, b2);
        DIFF d = new DIFF();
        DataBag result = d.exec(t);

        assertEquals(5, result.size());
        Iterator<Tuple> i = result.iterator();
        int[] values = new int[5];
        for (int j = 0; j < 5; j++) values[j] = (Integer)i.next().get(0);
        Arrays.sort(values);
        for (int j = 1; j < 10; j += 2) assertEquals(j, values[j/2]);

        // Test it in the case of two objects that are equals
        t = tf.newTuple(2);
        t.set(0, new Integer(1));
        t.set(1, new Integer(1));
        result = d.exec(t);
        assertEquals(0, result.size());

        // Test it in the case of two objects that are not equal
        t = tf.newTuple(2);
        t.set(0, new Integer(1));
        t.set(1, new Integer(2));
        result = d.exec(t);
        assertEquals(2, result.size());
    }
View Full Code Here

    if (in == null || in.getPosition() > end) {
      return null;
    }
    Pattern pattern = getPattern();
    Matcher matcher = pattern.matcher("");
    TupleFactory mTupleFactory = DefaultTupleFactory.getInstance();
    String line;
    boolean tryNext = true;
    while (tryNext) {
      if ((line = in.readLine(utf8, recordDel)) == null) {
        break;
      }
      if (line.length() > 0 && line.charAt(line.length() - 1) == '\r')
        line = line.substring(0, line.length() - 1);

      matcher = matcher.reset(line);
      ArrayList<DataByteArray> list = new ArrayList<DataByteArray>();
      if (matcher.find()) {
        tryNext=false;
        for (int i = 1; i <= matcher.groupCount(); i++) {
          list.add(new DataByteArray(matcher.group(i)));
        }
        return mTupleFactory.newTuple(list)
      }
    }


    return null;
View Full Code Here

     * @return - Tuple appended with special marker string column, num-rows column
     * @throws ExecException
     */
    private Tuple createNumRowTuple(Tuple sample) throws ExecException {
        int sz = (sample == null) ? 0 : sample.size();
        TupleFactory factory = TupleFactory.getInstance();      
        Tuple t = factory.newTuple(sz + 2);
        if (sample != null) {
            for(int i=0; i<sample.size(); i++){
                t.set(i, sample.get(i));
            }
View Full Code Here

    }
   
    static public class MyBagFunction extends EvalFunc<DataBag>{
        @Override
        public DataBag exec(Tuple input) throws IOException {
            TupleFactory tf = TupleFactory.getInstance();
            DataBag output = BagFactory.getInstance().newDefaultBag();
            output.add(tf.newTuple("a"));
            output.add(tf.newTuple("a"));
            output.add(tf.newTuple("a"));
            return output;
           
        }
View Full Code Here

    static public class MapUDF extends EvalFunc<Map<String, Object>> {
        @Override
        public Map<String, Object> exec(Tuple input) throws IOException {

            TupleFactory tupleFactory = TupleFactory.getInstance();
            ArrayList<Object> objList = new ArrayList<Object>();
            objList.add(new Integer(1));
            objList.add(new Double(1.0));
            objList.add(new Float(1.0));
            objList.add(new String("World!"));
            Tuple tuple = tupleFactory.newTuple(objList);

            BagFactory bagFactory = BagFactory.getInstance();
            DataBag bag = bagFactory.newDefaultBag();
            bag.add(tuple);
View Full Code Here

        assertTrue(r==null);
        r = func.exec(t3);
        assertTrue(r==null);
       
        String matchRegex = "^(.+)\\b\\s+is a\\s+\\b(.+)$";
        TupleFactory tupleFactory = TupleFactory.getInstance();
        Tuple te1 = tupleFactory.newTuple(2);
        te1.set(0,"this is a match");
        te1.set(1, matchRegex);
       
        Tuple te2 = tupleFactory.newTuple(2);
        te2.set(0, "no match");
        te2.set(1, matchRegex);
       
        Tuple te3 = tupleFactory.newTuple(2);
        te3.set(0, null);
        te3.set(1, matchRegex);
    
        REGEX_EXTRACT_ALL funce = new REGEX_EXTRACT_ALL();
        Tuple re = funce.exec(te1);
View Full Code Here

        Tuple output = tt.exec(input);
        assertTrue(!(input == output));
        assertEquals(input, output);
       
        TOP top = new TOP();
        TupleFactory tupleFactory = TupleFactory.getInstance();
        BagFactory bagFactory = DefaultBagFactory.getInstance();
        Tuple inputTuple = tupleFactory.newTuple(3);
        DataBag dBag = bagFactory.newDefaultBag();
       
        // set N = 10 i.e retain top 10 tuples
        inputTuple.set(0, 10);
        // compare tuples by field number 1
        inputTuple.set(1, 1);
        // set the data bag containing the tuples
        inputTuple.set(2, dBag);

        // generate tuples of the form (group-1, 1), (group-2, 2) ...
        for (long i = 0; i < 100; i++) {
            Tuple nestedTuple = tupleFactory.newTuple(2);
            nestedTuple.set(0, "group-" + i);
            nestedTuple.set(1, i);
            dBag.add(nestedTuple);
        }
       
        DataBag outBag = top.exec(inputTuple);
        assertEquals(outBag.size(), 10L);
        checkItemsGT(outBag, 1, 89);
       
        // two initial results
        Tuple init1 = (new TOP.Initial()).exec(inputTuple);
        Tuple init2 = (new TOP.Initial()).exec(inputTuple);
        // two intermediate results

        DataBag intermedBag = bagFactory.newDefaultBag();
        intermedBag.add(init1);
        intermedBag.add(init2);
        Tuple intermedInput = tupleFactory.newTuple(intermedBag);
        Tuple intermedOutput1 = (new TOP.Intermed()).exec(intermedInput);
        Tuple intermedOutput2 = (new TOP.Intermed()).exec(intermedInput);
        checkItemsGT((DataBag)intermedOutput1.get(2), 1, 94);

        // final result
        DataBag finalInputBag = bagFactory.newDefaultBag();
        finalInputBag.add(intermedOutput1);
        finalInputBag.add(intermedOutput2);
        Tuple finalInput = tupleFactory.newTuple(finalInputBag);
        outBag = (new TOP.Final()).exec(finalInput);
        assertEquals(outBag.size(), 10L);
        checkItemsGT(outBag, 1, 96);
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.data.TupleFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.