Package org.apache.pig.test

Source Code of org.apache.pig.test.TestPOGenerate

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.util.LinkedList;
import java.util.List;
import java.util.Random;

import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.ExpressionOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.plan.OperatorKey;
import org.junit.Before;
import org.junit.Test;

public class TestPOGenerate {

    DataBag cogroup;
    DataBag partialFlatten;
    DataBag simpleGenerate;
    Random r = new Random();
    BagFactory bf = BagFactory.getInstance();
    TupleFactory tf = TupleFactory.getInstance();

    @Before
    public void setUp() throws Exception {
        Tuple [] inputA = new Tuple[4];
        Tuple [] inputB = new Tuple[4];
        for(int i = 0; i < 4; i++) {
            inputA[i] = tf.newTuple(2);
            inputB[i] = tf.newTuple(1);
        }
        inputA[0].set(0, 'a');
        inputA[0].set(1, '1');
        inputA[1].set(0, 'b');
        inputA[1].set(1, '1');
        inputA[2].set(0, 'a');
        inputA[2].set(1, '1');
        inputA[3].set(0, 'c');
        inputA[3].set(1, '1');
        inputB[0].set(0, 'b');
        inputB[1].set(0, 'b');
        inputB[2].set(0, 'a');
        inputB[3].set(0, 'd');
        DataBag cg11 = bf.newDefaultBag();
        cg11.add(inputA[0]);
        cg11.add(inputA[2]);
        DataBag cg21 = bf.newDefaultBag();
        cg21.add(inputA[1]);
        DataBag cg31 = bf.newDefaultBag();
        cg31.add(inputA[3]);
        DataBag emptyBag = bf.newDefaultBag();
        DataBag cg12 = bf.newDefaultBag();
        cg12.add(inputB[2]);
        DataBag cg22 = bf.newDefaultBag();
        cg22.add(inputB[0]);
        cg22.add(inputB[1]);
        DataBag cg42 = bf.newDefaultBag();
        cg42.add(inputB[3]);
        Tuple [] tIn = new Tuple[4];
        for(int i = 0; i < 4; ++i) {
            tIn[i] = tf.newTuple(2);
        }
        tIn[0].set(0, cg11);
        tIn[0].set(1, cg12);
        tIn[1].set(0, cg21);
        tIn[1].set(1, cg22);
        tIn[2].set(0, cg31);
        tIn[2].set(1, emptyBag);
        tIn[3].set(0, emptyBag);
        tIn[3].set(1, cg42);

        cogroup = bf.newDefaultBag();
        for(int i = 0; i < 4; ++i) {
            cogroup.add(tIn[i]);
        }

        Tuple[] tPartial = new Tuple[4];
        for(int i = 0; i < 4; ++i) {
            tPartial[i] = tf.newTuple(2);
            tPartial[i].set(0, inputA[i].get(0));
            tPartial[i].set(1, inputA[i].get(1));
        }

        tPartial[0].append(cg12);

        tPartial[1].append(cg22);

        tPartial[2].append(cg12);

        tPartial[3].append(emptyBag);

        partialFlatten = bf.newDefaultBag();
        for(int i = 0; i < 4; ++i) {
            partialFlatten.add(tPartial[i]);
        }

        simpleGenerate = bf.newDefaultBag();
        for(int i = 0; i < 4; ++i) {
            simpleGenerate.add(inputA[i]);
        }

        //System.out.println("Cogroup : " + cogroup);
        //System.out.println("Partial : " + partialFlatten);
        //System.out.println("Simple : " + simpleGenerate);

    }

    @Test
    public void testJoin() throws Exception {
        ExpressionOperator prj1 = new POProject(new OperatorKey("", r.nextLong()), -1, 0);
        ExpressionOperator prj2 = new POProject(new OperatorKey("", r.nextLong()), -1, 1);
        prj1.setResultType(DataType.BAG);
        prj2.setResultType(DataType.BAG);
        List<Boolean> toBeFlattened = new LinkedList<Boolean>();
        toBeFlattened.add(true);
        toBeFlattened.add(true);
        PhysicalPlan plan1 = new PhysicalPlan();
        plan1.add(prj1);
        PhysicalPlan plan2 = new PhysicalPlan();
        plan2.add(prj2);
        List<PhysicalPlan> inputs = new LinkedList<PhysicalPlan>();
        inputs.add(plan1);
        inputs.add(plan2);
        PhysicalOperator poGen = new POForEach(new OperatorKey("", r.nextLong()), 1, inputs, toBeFlattened);
        //DataBag obtained = bf.newDefaultBag();
        for (Tuple t : cogroup) {
            /*plan1.attachInput(t);
            plan2.attachInput(t);*/
            poGen.attachInput(t);
            Result output = poGen.getNextTuple();
            while(output.result != null && output.returnStatus != POStatus.STATUS_EOP) {
                //System.out.println(output.result);
                Tuple tObtained = (Tuple) output.result;
                assertTrue(tObtained.get(0).toString().equals(tObtained.get(2).toString()));
                //obtained.add((Tuple) output.result);
                output = poGen.getNextTuple();
            }
        }

    }

    @Test
    public void testPartialJoin() throws Exception {
        ExpressionOperator prj1 = new POProject(new OperatorKey("", r.nextLong()), -1, 0);
        ExpressionOperator prj2 = new POProject(new OperatorKey("", r.nextLong()), -1, 1);
        prj1.setResultType(DataType.BAG);
        prj2.setResultType(DataType.BAG);
        List<Boolean> toBeFlattened = new LinkedList<Boolean>();
        toBeFlattened.add(true);
        toBeFlattened.add(false);
        PhysicalPlan plan1 = new PhysicalPlan();
        plan1.add(prj1);
        PhysicalPlan plan2 = new PhysicalPlan();
        plan2.add(prj2);
        List<PhysicalPlan> inputs = new LinkedList<PhysicalPlan>();
        inputs.add(plan1);
        inputs.add(plan2);
        PhysicalOperator poGen = new POForEach(new OperatorKey("", r.nextLong()), 1, inputs, toBeFlattened);

        //DataBag obtained = bf.newDefaultBag();
        List<String> obtained = new LinkedList<String>();
        for (Tuple t : cogroup) {
            /*plan1.attachInput(t);
            plan2.attachInput(t);*/
            poGen.attachInput(t);
            Result output = poGen.getNextTuple();
            while(output.result != null && output.returnStatus != POStatus.STATUS_EOP) {
                //System.out.println(output.result);
                obtained.add(((Tuple) output.result).toString());
                output = poGen.getNextTuple();
            }
        }
        int count = 0;
        for (Tuple t : partialFlatten) {
            assertTrue(obtained.contains(t.toString()));
            ++count;
        }
        assertEquals(partialFlatten.size(), count);

    }

    @Test
    public void testSimpleGenerate() throws Exception {
        ExpressionOperator prj1 = new POProject(new OperatorKey("", r.nextLong()), -1, 0);
        ExpressionOperator prj2 = new POProject(new OperatorKey("", r.nextLong()), -1, 1);
        prj1.setResultType(DataType.INTEGER);
        prj2.setResultType(DataType.INTEGER);
        List<Boolean> toBeFlattened = new LinkedList<Boolean>();
        toBeFlattened.add(true);
        toBeFlattened.add(false);
        PhysicalPlan plan1 = new PhysicalPlan();
        plan1.add(prj1);
        PhysicalPlan plan2 = new PhysicalPlan();
        plan2.add(prj2);
        List<PhysicalPlan> inputs = new LinkedList<PhysicalPlan>();
        inputs.add(plan1);
        inputs.add(plan2);
        PhysicalOperator poGen = new POForEach(new OperatorKey("", r.nextLong()), 1, inputs, toBeFlattened);

        //DataBag obtained = bf.newDefaultBag();
        List<String> obtained = new LinkedList<String>();
        for (Tuple t : simpleGenerate) {
            /*plan1.attachInput(t);
            plan2.attachInput(t);*/
            poGen.attachInput(t);
            Result output = poGen.getNextTuple();
            while(output.result != null && output.returnStatus != POStatus.STATUS_EOP) {
                //System.out.println(output.result);
                obtained.add(((Tuple) output.result).toString());
                output = poGen.getNextTuple();
            }
        }

        int count = 0;
        for (Tuple t : simpleGenerate) {
            assertTrue(obtained.contains(t.toString()));
            ++count;
        }
        assertEquals(simpleGenerate.size(), count);

    }
}
TOP

Related Classes of org.apache.pig.test.TestPOGenerate

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.