Package org.apache.pig.backend.hadoop.executionengine.tez.plan.operator

Source Code of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POIdentityInOutTez

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.backend.hadoop.executionengine.tez.plan.operator;

import java.io.IOException;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange;
import org.apache.pig.backend.hadoop.executionengine.tez.runtime.TezInput;
import org.apache.pig.backend.hadoop.executionengine.tez.runtime.TezOutput;
import org.apache.pig.impl.io.NullablePartitionWritable;
import org.apache.pig.impl.io.PigNullableWritable;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.tez.runtime.api.LogicalInput;
import org.apache.tez.runtime.api.LogicalOutput;
import org.apache.tez.runtime.api.Reader;
import org.apache.tez.runtime.library.api.KeyValueReader;
import org.apache.tez.runtime.library.api.KeyValueWriter;
import org.apache.tez.runtime.library.api.KeyValuesReader;

/**
* POIdentityInOutTez is used to pass through tuples as is to next vertex from
* previous vertex's POLocalRearrangeTez. For eg: In case of Order By, the
* partition vertex which just applies the WeightedRangePartitioner on the
* previous vertex data uses POIdentityInOutTez.
*/
@InterfaceAudience.Private
public class POIdentityInOutTez extends POLocalRearrangeTez implements TezInput, TezOutput {

    private static final long serialVersionUID = 1L;
    private static final Log LOG = LogFactory.getLog(POIdentityInOutTez.class);
    private String inputKey;
    private transient KeyValueReader reader;
    private transient KeyValuesReader shuffleReader;
    private transient boolean shuffleInput;

    public POIdentityInOutTez(OperatorKey k, POLocalRearrange inputRearrange) {
        super(inputRearrange);
        this.mKey = k;
    }

    public void setInputKey(String inputKey) {
        this.inputKey = inputKey;
    }

    @Override
    public String[] getTezInputs() {
        return new String[] { inputKey };
    }

    @Override
    public void replaceInput(String oldInputKey, String newInputKey) {
        if (oldInputKey.equals(inputKey)) {
            inputKey = newInputKey;
        }
    }

    @Override
    public void addInputsToSkip(Set<String> inputsToSkip) {
    }

    @Override
    public void attachInputs(Map<String, LogicalInput> inputs,
            Configuration conf) throws ExecException {
        LogicalInput input = inputs.get(inputKey);
        if (input == null) {
            throw new ExecException("Input from vertex " + inputKey + " is missing");
        }
        try {
            Reader r = input.getReader();
            if (r instanceof KeyValueReader) {
                reader = (KeyValueReader) r;
            } else {
                shuffleInput = true;
                shuffleReader = (KeyValuesReader) r;
            }
            LOG.info("Attached input from vertex " + inputKey + " : input=" + input + ", reader=" + r);
        } catch (Exception e) {
            throw new ExecException(e);
        }
    }

    @Override
    public void attachOutputs(Map<String, LogicalOutput> outputs,
            Configuration conf) throws ExecException {
        LogicalOutput output = outputs.get(outputKey);
        if (output == null) {
            throw new ExecException("Output to vertex " + outputKey + " is missing");
        }
        try {
            writer = (KeyValueWriter) output.getWriter();
            LOG.info("Attached output to vertex " + outputKey + " : output=" + output + ", writer=" + writer);
        } catch (Exception e) {
            throw new ExecException(e);
        }
    }

    @Override
    public Result getNextTuple() throws ExecException {
        try {
            if (shuffleInput) {
                while (shuffleReader.next()) {
                    Object curKey = shuffleReader.getCurrentKey();
                    Iterable<Object> vals = shuffleReader.getCurrentValues();
                    if (isSkewedJoin) {
                        NullablePartitionWritable wrappedKey = new NullablePartitionWritable(
                                (PigNullableWritable) curKey);
                        wrappedKey.setPartition(-1);
                        curKey = wrappedKey;
                    }
                    for (Object val : vals) {
                        writer.write(curKey, val);
                    }
                }
            } else {
                while (reader.next()) {
                    if (isSkewedJoin) {
                        NullablePartitionWritable wrappedKey = new NullablePartitionWritable(
                                (PigNullableWritable) reader.getCurrentKey());
                        // Skewed join wraps key with NullablePartitionWritable
                        // The partitionIndex in NullablePartitionWritable is not serialized.
                        // So setting it here instead of the previous vertex POLocalRearrangeTez.
                        // Serializing it would add overhead for MR as well.
                        wrappedKey.setPartition(-1);
                        writer.write(wrappedKey, reader.getCurrentValue());
                    } else {
                        writer.write(reader.getCurrentKey(),
                                reader.getCurrentValue());
                    }
                }
            }
            return RESULT_EOP;
        } catch (IOException e) {
            throw new ExecException(e);
        }
    }

    @Override
    public String name() {
        return "POIdentityInOutTez - " + mKey.toString() + "\t<-\t " + inputKey + "\t->\t " + outputKey;
    }

}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.tez.plan.operator.POIdentityInOutTez

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.