Package org.apache.pig.backend.hadoop.executionengine.tez.runtime

Source Code of org.apache.pig.backend.hadoop.executionengine.tez.runtime.WeightedRangePartitionerTez

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.tez.runtime;

import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Writable;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.DiscreteProbabilitySampleGenerator;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.WeightedRangePartitioner;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.InternalMap;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.builtin.FindQuantiles;
import org.apache.pig.impl.io.PigNullableWritable;
import org.apache.tez.runtime.library.common.ConfigUtils;

public class WeightedRangePartitionerTez extends WeightedRangePartitioner {
    private static final Log LOG = LogFactory.getLog(WeightedRangePartitionerTez.class);

    private Integer estimatedNumPartitions;

    @Override
    public int getPartition(PigNullableWritable key, Writable value,
            int numPartitions){
        if (estimatedNumPartitions!=null) {
            numPartitions = estimatedNumPartitions;
        }
        return super.getPartition(key, value, numPartitions);
    }

    @Override
    public void init() {
        Map<String, Object> quantileMap = null;
        if (PigProcessor.sampleMap != null) {
            // We've collected sampleMap in PigProcessor
            quantileMap = PigProcessor.sampleMap;
        } else {
            LOG.warn("Quantiles map is empty");
            inited = true;
            return;
        }

        long start = System.currentTimeMillis();
        try {
            DataBag quantilesList = (DataBag) quantileMap.get(FindQuantiles.QUANTILES_LIST);
            InternalMap weightedPartsData = (InternalMap) quantileMap.get(FindQuantiles.WEIGHTED_PARTS);
            estimatedNumPartitions = (Integer)quantileMap.get(PigProcessor.ESTIMATED_NUM_PARALLELISM);
            convertToArray(quantilesList);
            for (Entry<Object, Object> ent : weightedPartsData.entrySet()) {
                Tuple key = (Tuple) ent.getKey(); // sample item which repeats
                float[] probVec = getProbVec((Tuple) ent.getValue());
                weightedParts.put(getPigNullableWritable(key),
                        new DiscreteProbabilitySampleGenerator(probVec));
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        comparator = ConfigUtils.getIntermediateInputKeyComparator(job);
        LOG.info("Initialized WeightedRangePartitionerTez. Time taken: " + (System.currentTimeMillis() - start));
        inited = true;
    }
}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.tez.runtime.WeightedRangePartitionerTez

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.