Package org.apache.pig.piggybank.evaluation

Source Code of org.apache.pig.piggybank.evaluation.MaxTupleBy1stField$Final

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.piggybank.evaluation;

import java.io.IOException;
import java.util.Iterator;

import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PigProgressable;
import org.apache.pig.data.*;
import org.apache.pig.impl.logicalLayer.schema.Schema;

/**
* MaxTupleBy1stField UDF returns a tuple with max value of the first field in a
* given bag.
*
* Caveat: first field assumed to have type 'long'. You may need to enforece this
* via schema when loading data, as sown in sample usage below.
*
* Sample usage:
*
* A = load 'test.tsv' as (first: long, second, third);
* B = GROUP A by second;
* C = FOREACH B GENERATE group, MaxTupleBy1stField(A);
*
* @author Vadim Zaliva <lord@codemindes.com>
*/
public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic
{
    /**
     * Indicates once for how many items progress hartbeat should be sent.
     */
    private static final int PROGRESS_FREQUENCY = 10;

    static public class Initial extends EvalFunc<Tuple>
    {
        //TODO: private static TupleFactory tfact = TupleFactory.getInstance();

        @Override
        public Tuple exec(Tuple input) throws IOException
        {
            try
            {
                // input is a bag with one tuple containing
                // the column we are trying to max on
                DataBag bg = (DataBag) input.get(0);
                Tuple tp = bg.iterator().next();
                return tp; //TODO: copy?
            } catch(ExecException ee)
            {
                IOException oughtToBeEE = new IOException();
                oughtToBeEE.initCause(ee);
                throw oughtToBeEE;
            }
        }
    }

    public Schema outputSchema(Schema input)
    {
        return input;
    }

    static public class Intermediate extends EvalFunc<Tuple>
    {
        //TODO: private static TupleFactory tfact = TupleFactory.getInstance();

        @Override
        public Tuple exec(Tuple input) throws IOException
        {
            try
            {
                return max(input, reporter);
            } catch(ExecException ee)
            {
                IOException oughtToBeEE = new IOException();
                oughtToBeEE.initCause(ee);
                throw oughtToBeEE;
            }
        }
    }

    static public class Final extends EvalFunc<Tuple>
    {
        @Override
        public Tuple exec(Tuple input) throws IOException
        {
            try
            {
                return max(input, reporter);
            } catch(ExecException ee)
            {
                IOException oughtToBeEE = new IOException();
                oughtToBeEE.initCause(ee);
                throw oughtToBeEE;
            }
        }
    }

    @Override
    public Tuple exec(Tuple input) throws IOException
    {
        try
        {
            return max(input, reporter);
        } catch(ExecException ee)
        {
            IOException oughtToBeEE = new IOException();
            oughtToBeEE.initCause(ee);
            throw oughtToBeEE;
        }
    }

    protected static Tuple max(Tuple input, PigProgressable reporter) throws ExecException
    {
        DataBag values = (DataBag) input.get(0);

        // if we were handed an empty bag, return NULL
        // this is in compliance with SQL standard
        if(values.size() == 0)
            return null;

        long curMax = 0;
        Tuple curMaxTuple = null;
        int n=0;
        for(Iterator<Tuple> it = values.iterator(); it.hasNext();)
        {
            if(reporter!=null && ++n%PROGRESS_FREQUENCY==0)
                reporter.progress();
            Tuple t = it.next();
            try
            {
                long d = (Long) t.get(0);
                if(curMaxTuple == null || d > curMax)
                {
                    curMax = d;
                    curMaxTuple = t;
                }

            } catch(RuntimeException exp)
            {
                ExecException newE = new ExecException("Error processing: " + t.toString() + exp.getMessage());
                newE.initCause(exp);
                throw newE;
            }
        }

        return curMaxTuple;
    }

    @Override
    public String getInitial()
    {
        return Initial.class.getName();
    }

    @Override
    public String getIntermed()
    {
        return Intermediate.class.getName();
    }

    @Override
    public String getFinal()
    {
        return Final.class.getName();
    }

}
TOP

Related Classes of org.apache.pig.piggybank.evaluation.MaxTupleBy1stField$Final

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.