Package org.apache.pig.newplan.logical.relational

Source Code of org.apache.pig.newplan.logical.relational.LOUnion

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.newplan.logical.relational;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.pig.PigException;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.Pair;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.PlanVisitor;
import org.apache.pig.newplan.logical.expression.LogicalExpression;
import org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema;

public class LOUnion extends LogicalRelationalOperator {
    private boolean onSchema;
   
    // uid mapping from output uid to input uid
    private List<Pair<Long, Long>> uidMapping = new ArrayList<Pair<Long, Long>>();
   
    public LOUnion(OperatorPlan plan) {
        super("LOUnion", plan);
    }
   
    public LOUnion(OperatorPlan plan, boolean onSchema) {
        this( plan );
        this.onSchema = onSchema;
    }
   
    public boolean isOnSchema() {
        return onSchema;
    }
   
    @Override
    public LogicalSchema getSchema() throws FrontendException {
        if (schema != null) {
            return schema;
        }
       
        List<Operator> inputs = plan.getPredecessors(this);
        // If any predecessor's schema is null, then the schema for union is null
        for (Operator input : inputs) {
            LogicalRelationalOperator op = (LogicalRelationalOperator)input;
            if( op.getSchema() == null ) {
                if( isOnSchema() ) {
                    String msg = "Schema of relation " + op.getAlias()
                        + " is null."
                        + " UNION ONSCHEMA cannot be used with relations that"
                        + " have null schema.";
                    throw new FrontendException(this, msg, 1116, PigException.INPUT);

                } else {
                    return null;
                }
            }
        }
       
        LogicalSchema mergedSchema = null;
        if ( inputs.size() == 1 )
            return schema = ((LogicalRelationalOperator)inputs.get(0)).getSchema();
       
        List<String> inputAliases = new ArrayList<String>(inputs.size());
        List<LogicalSchema> inputSchemas = new ArrayList<LogicalSchema>(inputs.size());
        for (Operator input : inputs) {
            LogicalRelationalOperator lop = (LogicalRelationalOperator)input;
            inputAliases.add(lop.getAlias());
            inputSchemas.add(lop.getSchema());
        }
       
        if( isOnSchema() ) {
            mergedSchema = createMergedSchemaOnAlias( inputSchemas, inputAliases );
        } else {
            LogicalSchema s0 = inputSchemas.get(0);
            LogicalSchema s1 = inputSchemas.get(1);
            mergedSchema = LogicalSchema.merge(s0, s1, LogicalSchema.MergeMode.Union);
            if (mergedSchema==null)
                return null;
           
            // Merge schema
            for (int i=2;i<inputSchemas.size();i++) {
                LogicalSchema otherSchema = inputSchemas.get(i);
                if (mergedSchema==null || otherSchema==null)
                    return null;
                mergedSchema = LogicalSchema.merge(mergedSchema, otherSchema, LogicalSchema.MergeMode.Union);
                if (mergedSchema == null)
                    return null;
            }
        }

        // Bring back cached uid if any; otherwise, cache uid generated
        for (int i=0;i<mergedSchema.size();i++)
        {
            LogicalSchema.LogicalFieldSchema outputFieldSchema = mergedSchema.getField(i);

            long uid = -1;
           
            // Search all the cached uid mappings by input field to see if
            // we've cached an output uid for this output field
            for (LogicalSchema inputSchema : inputSchemas) {
                LogicalSchema.LogicalFieldSchema inputFieldSchema;
                if (onSchema) {
                    inputFieldSchema = inputSchema.getFieldSubNameMatch(outputFieldSchema.alias);
                } else {
                    inputFieldSchema = inputSchema.getField(i);
                }
               
                if (inputFieldSchema != null) {
                    uid = getCachedOuputUid(inputFieldSchema.uid);
                    if (uid >= 0) break;
                }
            }
           
            // No cached uid. Allocate one, and locate and cache all inputs.
            if (uid==-1) {
                uid = LogicalExpression.getNextUid();
                for (LogicalSchema inputSchema : inputSchemas) {
                    long inputUid;
                    LogicalFieldSchema matchedInputFieldSchema;
                  if (onSchema) {
                      matchedInputFieldSchema = inputSchema.getFieldSubNameMatch(mergedSchema.getField(i).alias);
                        if (matchedInputFieldSchema!=null) {
                            inputUid = matchedInputFieldSchema.uid;
                            uidMapping.add(new Pair<Long, Long>(uid, inputUid));
                        }
                    }
                    else {
                        matchedInputFieldSchema = mergedSchema.getField(i);
                    inputUid = inputSchema.getField(i).uid;
                    uidMapping.add(new Pair<Long, Long>(uid, inputUid));
                    }
                }
            }

            outputFieldSchema.uid = uid;
        }
       
        return schema = mergedSchema;
    }

    /**
     * create schema for union-onschema
     */
    private LogicalSchema createMergedSchemaOnAlias(List<LogicalSchema> inputSchemas,
            List<String> inputAliases)
    throws FrontendException {
        ArrayList<LogicalSchema> schemas = new ArrayList<LogicalSchema>();
        for (int i = 0; i < inputSchemas.size(); i++){
            LogicalSchema sch = inputSchemas.get(i);
            for( LogicalFieldSchema fs : sch.getFields() ) {
                if(fs.alias == null){
                    String msg = "Schema of relation " + inputAliases.get(i)
                        + " has a null fieldschema for column(s). Schema :" + sch.toString(false);
                    throw new FrontendException( this, msg, 1116, PigException.INPUT );
                }
            }
            schemas.add( sch );
        }
       
        //create the merged schema
        LogicalSchema mergedSchema = null;
        try {
            mergedSchema = LogicalSchema.mergeSchemasByAlias( schemas );  
        } catch(FrontendException e)                 {
            String msg = "Error merging schemas for union operator : "
                + e.getMessage();
            throw new FrontendException(this, msg, 1116, PigException.INPUT, e);
        }
       
        return mergedSchema;
    }
   
    private long getCachedOuputUid(long inputUid) {
        long uid = -1;
       
        for (Pair<Long, Long> pair : uidMapping) {
            if (pair.second==inputUid) {
                uid = pair.first;
                break;
            }
        }
       
        return uid;
    }

    @Override
    public void accept(PlanVisitor v) throws FrontendException {
        if (!(v instanceof LogicalRelationalNodesVisitor)) {
            throw new FrontendException("Expected LogicalPlanVisitor", 2223);
        }
        ((LogicalRelationalNodesVisitor)v).visit(this);
    }

    @Override
    public boolean isEqual(Operator other) throws FrontendException {
        if (other != null && other instanceof LOUnion) {
            return checkEquality((LOUnion)other);
        } else {
            return false;
        }
    }

    // Get input uids mapping to the output uid
    public Set<Long> getInputUids(long uid) {
        Set<Long> result = new HashSet<Long>();
        for (Pair<Long, Long> pair : uidMapping) {
            if (pair.first==uid)
                result.add(pair.second);
        }
        return result;
    }
   
    @Override
    public void resetUid() {
        uidMapping = new ArrayList<Pair<Long, Long>>();
    }
   
    public List<Operator> getInputs() {
        return plan.getPredecessors(this);
    }
   
    public List<Operator> getInputs(LogicalPlan plan) {
        return plan.getPredecessors(this);
    }
   
    public void setUnionOnSchema(boolean flag) {
        onSchema = flag;
    }
}
TOP

Related Classes of org.apache.pig.newplan.logical.relational.LOUnion

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.