Package org.apache.pig.data

Examples of org.apache.pig.data.BagFactory


    public void testMap() throws Exception {
        String size = "100";
        String numHash = "3";
        String hashFunc = "JENKINS";
        TupleFactory tf = TupleFactory.getInstance();
        BagFactory bf = BagFactory.getInstance();

        Tuple t = tf.newTuple(1);
        t.set(0, 1);
        DataBag b = bf.newDefaultBag();
        b.add(t);
        Tuple input = tf.newTuple(b);

        BuildBloom.Initial map =
                new BuildBloom.Initial(hashFunc, "fixed", size, numHash);
View Full Code Here


    public void testCombiner() throws Exception {
        String size = "100";
        String numHash = "3";
        String hashFunc = "jenkins";
        TupleFactory tf = TupleFactory.getInstance();
        BagFactory bf = BagFactory.getInstance();

        DataBag combinerBag = bf.newDefaultBag();
        for (int j = 0; j < 3; j++) { // map loop
            Tuple t = tf.newTuple(1);
            t.set(0, 10 + j);
            DataBag mapBag = bf.newDefaultBag();
            mapBag.add(t);
            Tuple input = tf.newTuple(mapBag);
            BuildBloom.Initial map =
                    new BuildBloom.Initial(hashFunc, "fixed", size, numHash);
            combinerBag.add(map.exec(input));
View Full Code Here

    public void testSingleKey() throws Exception {
        String size = "100";
        String numHash = "3";
        String hashFunc = "MURMUR";
        TupleFactory tf = TupleFactory.getInstance();
        BagFactory bf = BagFactory.getInstance();

        DataBag reducerBag = bf.newDefaultBag();
        for (int i = 0; i < 3; i++) { // combiner loop
            DataBag combinerBag = bf.newDefaultBag();
            for (int j = 0; j < 3; j++) { // map loop
                Tuple t = tf.newTuple(1);
                t.set(0, i * 10 + j);
                DataBag mapBag = bf.newDefaultBag();
                mapBag.add(t);
                Tuple input = tf.newTuple(mapBag);
                BuildBloom.Initial map =
                        new BuildBloom.Initial(hashFunc, "fixed", size, numHash);
                combinerBag.add(map.exec(input));
View Full Code Here

    public void testMultiKey() throws Exception {
        String numElements = "10";
        String falsePositive = "0.001";
        String hashFunc = "murmur";
        TupleFactory tf = TupleFactory.getInstance();
        BagFactory bf = BagFactory.getInstance();

        String[][] strs = {
                        { "fred", "joe", "bob" },
                        { "mary", "sally", "jane" },
                        { "fido", "spot", "fluffly" } };

        DataBag reducerBag = bf.newDefaultBag();
        for (int i = 0; i < 3; i++) { // combiner loop
            DataBag combinerBag = bf.newDefaultBag();
            for (int j = 0; j < 3; j++) { // map loop
                Tuple t = tf.newTuple(2);
                t.set(0, i * 10 + j);
                t.set(1, strs[i][j]);
                DataBag mapBag = bf.newDefaultBag();
                mapBag.add(t);
                Tuple input = tf.newTuple(mapBag);
                BuildBloom.Initial map =
                        new BuildBloom.Initial(hashFunc, numElements,
                                falsePositive);
View Full Code Here

            objList.add(new Double(1.0));
            objList.add(new Float(1.0));
            objList.add(new String("World!"));
            Tuple tuple = tupleFactory.newTuple(objList);

            BagFactory bagFactory = BagFactory.getInstance();
            DataBag bag = bagFactory.newDefaultBag();
            bag.add(tuple);

            Map<String, Object> mapInMap = new HashMap<String, Object>();
            mapInMap.put("int", new Integer(10));
            mapInMap.put("float", new Float(10.0));
View Full Code Here

    }

    @Test
    public void testDIFF() throws Exception {
        // Test it in the case with two bags.
        BagFactory bf = BagFactory.getInstance();
        TupleFactory tf = TupleFactory.getInstance();

        DataBag b1 = bf.newDefaultBag();
        DataBag b2 = bf.newDefaultBag();
        for (int i = 0; i < 10; i++) b1.add(tf.newTuple(new Integer(i)));
        for (int i = 0; i < 10; i += 2) b2.add(tf.newTuple(new Integer(i)));
        Tuple t = tf.newTuple(2);
        t.set(0, b1);
        t.set(1, b2);
View Full Code Here

            objList.add(new Double(1.0));
            objList.add(new Float(1.0));
            objList.add(new String("World!"));
            Tuple tuple = tupleFactory.newTuple(objList);

            BagFactory bagFactory = BagFactory.getInstance();
            DataBag bag = bagFactory.newDefaultBag();
            bag.add(tuple);

            Map<String, Object> mapInMap = new HashMap<String, Object>();
            mapInMap.put("int", new Integer(10));
            mapInMap.put("float", new Float(10.0));
View Full Code Here

    assertEquals("a-5-c-6", result);
  }

  @Test
  public void testNestedTupleForBagToStringUDF() throws Exception {
    BagFactory bf = BagFactory.getInstance();
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple(2);
    t1.set(0, "a");
    t1.set(1, 5);

    Tuple nestedTuple = tf.newTuple(2);
    nestedTuple.set(0, "d");
    nestedTuple.set(1, 7);

    Tuple t2 = tf.newTuple(3);
    t2.set(0, "c");
    t2.set(1, 6);
    t2.set(2, nestedTuple);

    DataBag inputBag = bf.newDefaultBag();
    inputBag.add(t1);
    inputBag.add(t2);

    BagToString udf = new BagToString();
    Tuple udfInput = tf.newTuple(2);
View Full Code Here

    udf.exec(udfInput);
  }

  @Test
  public void testUseDefaultDelimiterBagToStringUDF() throws Exception {
    BagFactory bf = BagFactory.getInstance();
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple(2);
    t1.set(0, "a");
    t1.set(1, 5);

    Tuple t2 = tf.newTuple(2);
    t2.set(0, "c");
    t2.set(1, 6);

    DataBag bag = bf.newDefaultBag();
    bag.add(t1);
    bag.add(t2);

    BagToString udf = new BagToString();
    Tuple udfInput = tf.newTuple(1);
View Full Code Here

    assertEquals("a_5_c_6", result);
  }

  @Test
  public void testBasicBagToStringUDF() throws Exception {
    BagFactory bf = BagFactory.getInstance();
    TupleFactory tf = TupleFactory.getInstance();

    Tuple t1 = tf.newTuple(2);
    t1.set(0, "a");
    t1.set(1, 5);

    Tuple t2 = tf.newTuple(2);
    t2.set(0, "c");
    t2.set(1, 6);

    DataBag bag = bf.newDefaultBag();
    bag.add(t1);
    bag.add(t2);

    BagToString udf = new BagToString();
    Tuple udfInput = tf.newTuple(2);
View Full Code Here

TOP

Related Classes of org.apache.pig.data.BagFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.