Package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators

Source Code of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POUnion

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators;

import java.util.BitSet;
import java.util.LinkedList;
import java.util.List;

import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.IdentityHashSet;
import org.apache.pig.pen.util.ExampleTuple;

/**
* The union operator that combines the two inputs into a single
* stream. Note that this doesn't eliminate duplicate tuples.
* The Operator will also be added to every map plan which processes
* more than one input. This just pulls out data from the piepline
* using the proposed single threaded shared execution model. By shared
* execution I mean, one input to the Union operator is called
* once and the execution moves to the next non-drained input till
* all the inputs are drained.
*
*/
public class POUnion extends PhysicalOperator {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    //Used for efficiently shifting between non-drained
    //inputs
    BitSet done;

    boolean nextReturnEOP = false ;
    private static Result eopResult = new Result(POStatus.STATUS_EOP, null) ;
   
    //The index of the last input that was read
    int lastInd = 0;

    public POUnion(OperatorKey k) {
        this(k, -1, null);
    }

    public POUnion(OperatorKey k, int rp) {
        this(k, rp, null);
    }

    public POUnion(OperatorKey k, List<PhysicalOperator> inp) {
        this(k, -1, inp);
    }

    public POUnion(OperatorKey k, int rp, List<PhysicalOperator> inp) {
        super(k, rp, inp);
    }

    @Override
    public void setInputs(List<PhysicalOperator> inputs) {
        super.setInputs(inputs);
        if (inputs != null) {
            done = new BitSet(inputs.size());
        }
        else {
            done = new BitSet(0) ;
        }
    }

    @Override
    public void visit(PhyPlanVisitor v) throws VisitorException {
        v.visitUnion(this);
    }

    @Override
    public String name() {
        return getAliasString() + "Union" + "[" + DataType.findTypeName(resultType)
                + "]" + " - " + mKey.toString();
    }

    @Override
    public boolean supportsMultipleInputs() {
        return true;
    }

    @Override
    public boolean supportsMultipleOutputs() {
        return false;
    }

    public void clearDone() {
        done.clear();
    }

    /**
     * The code below, tries to follow our single threaded
     * shared execution model with execution being passed
     * around each non-drained input
     */
    @Override
    public Result getNextTuple() throws ExecException {

        if (nextReturnEOP) {
            nextReturnEOP = false ;
            return eopResult ;
        }

        // Case 1 : Normal connected plan
        if (!isInputAttached()) {
           
            if (inputs == null || inputs.size()==0) {
                // Neither does this Union have predecessors nor
                // was any input attached! This can happen when we have
                // a plan like below
                // POUnion
                // |
                // |--POLocalRearrange
                // |    |
                // |    |-POUnion (root 2)--> This union's getNext() can lead the code here
                // |
                // |--POLocalRearrange (root 1)
               
                // The inner POUnion above is a root in the plan which has 2 roots.
                // So these 2 roots would have input coming from different input
                // sources (dfs files). So certain maps would be working on input only
                // meant for "root 1" above and some maps would work on input
                // meant only for "root 2". In the former case, "root 2" would
                // neither get input attached to it nor does it have predecessors
                // which is the case which can lead us here.
                return eopResult;
            }
         
            while(true){
                if (done.nextClearBit(0) >= inputs.size()) {
                    clearDone();
                    return eopResult ;
                }
                if(lastInd >= inputs.size() || done.nextClearBit(lastInd) >= inputs.size())
                    lastInd = 0;
                int ind = done.nextClearBit(lastInd);
                Result res;

                while(true){
                    if(getReporter()!=null) {
                        getReporter().progress();
                    }
                    res = inputs.get(ind).getNextTuple();
                    lastInd = ind + 1;

                    if(res.returnStatus == POStatus.STATUS_OK ||
                            res.returnStatus == POStatus.STATUS_NULL || res.returnStatus == POStatus.STATUS_ERR) {
                        illustratorMarkup(res.result, res.result, ind);
                        return res;
                    }

                    if (res.returnStatus == POStatus.STATUS_EOP) {
                        done.set(ind);
                        break;
                    }
                }
            }
        }
        // Case 2 : Input directly injected
        else {
            res.result = input;
            res.returnStatus = POStatus.STATUS_OK;
            detachInput();
            nextReturnEOP = true ;
            illustratorMarkup(res.result, res.result, 0);
            return res;
        }


    }
   
    @Override
    public Tuple illustratorMarkup(Object in, Object out, int eqClassIndex) {
        if(illustrator != null) {
            if (illustrator.getEquivalenceClasses() == null) {
                int size = (inputs == null ? 1 : inputs.size());
                LinkedList<IdentityHashSet<Tuple>> equivalenceClasses = new LinkedList<IdentityHashSet<Tuple>>();
                for (int i = 0; i < size; ++i) {
                    IdentityHashSet<Tuple> equivalenceClass = new IdentityHashSet<Tuple>();
                    equivalenceClasses.add(equivalenceClass);
                }
                illustrator.setEquivalenceClasses(equivalenceClasses, this);
            }
            ExampleTuple tIn = (ExampleTuple) in;
            illustrator.getEquivalenceClasses().get(eqClassIndex).add(tIn);
            illustrator.addData((Tuple) out);
        }
        return null;
    }
}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POUnion

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.