Package hivemall.utils.hadoop

Source Code of hivemall.utils.hadoop.HiveUtils

/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2013
*   National Institute of Advanced Industrial Science and Technology (AIST)
*   Registration Number: H25PRO-1520
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/
package hivemall.utils.hadoop;

import static hivemall.HivemallConstants.BIGINT_TYPE_NAME;
import static hivemall.HivemallConstants.BOOLEAN_TYPE_NAME;
import static hivemall.HivemallConstants.INT_TYPE_NAME;
import static hivemall.HivemallConstants.SMALLINT_TYPE_NAME;
import static hivemall.HivemallConstants.STRING_TYPE_NAME;
import static hivemall.HivemallConstants.TINYINT_TYPE_NAME;

import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.LazyString;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantBooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantLongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantStringObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

public final class HiveUtils {

    private HiveUtils() {}

    public static int parseInt(final Object o) {
        if(o instanceof Integer) {
            return ((Integer) o).intValue();
        }
        if(o instanceof IntWritable) {
            return ((IntWritable) o).get();
        }
        if(o instanceof LongWritable) {
            long l = ((LongWritable) o).get();
            if(l > 0x7fffffffL) {
                throw new IllegalArgumentException("feature index must be less than "
                        + Integer.MAX_VALUE + ", but was " + l);
            }
            return (int) l;
        }
        String s = o.toString();
        return Integer.parseInt(s);
    }

    public static Text asText(final Object o) {
        if(o == null) {
            return null;
        }
        if(o instanceof Text) {
            return (Text) o;
        }
        if(o instanceof LazyString) {
            LazyString l = (LazyString) o;
            return l.getWritableObject();
        }
        if(o instanceof String) {
            String s = (String) o;
            return new Text(s);
        }
        String s = o.toString();
        return new Text(s);
    }

    public static int asJavaInt(final Object o, final int nullValue) {
        if(o == null) {
            return nullValue;
        }
        return asJavaInt(o);
    }

    public static int asJavaInt(final Object o) {
        if(o == null) {
            throw new IllegalArgumentException();
        }
        if(o instanceof Integer) {
            return ((Integer) o).intValue();
        }
        if(o instanceof LazyInteger) {
            IntWritable i = ((LazyInteger) o).getWritableObject();
            return i.get();
        }
        if(o instanceof IntWritable) {
            return ((IntWritable) o).get();
        }
        String s = o.toString();
        return Integer.parseInt(s);
    }

    public static String getConstString(ObjectInspector oi) throws UDFArgumentException {
        if(!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
            throw new UDFArgumentException("argument must be a constant value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        WritableConstantStringObjectInspector stringOI = (WritableConstantStringObjectInspector) oi;
        return stringOI.getWritableConstantValue().toString();
    }

    public static boolean getConstBoolean(ObjectInspector oi) throws UDFArgumentException {
        if(!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
            throw new UDFArgumentException("argument must be a constant value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        String typeName = oi.getTypeName();
        if(!BOOLEAN_TYPE_NAME.equals(typeName)) {
            throw new UDFArgumentException("argument must be a boolean value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        WritableConstantBooleanObjectInspector booleanOI = (WritableConstantBooleanObjectInspector) oi;
        return booleanOI.getWritableConstantValue().get();
    }

    public static boolean isBigInt(ObjectInspector oi) {
        String typeName = oi.getTypeName();
        return BIGINT_TYPE_NAME.equals(typeName);
    }

    public static int getConstInt(ObjectInspector oi) throws UDFArgumentException {
        if(!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
            throw new UDFArgumentException("argument must be a constant value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        String typeName = oi.getTypeName();
        if(!INT_TYPE_NAME.equals(typeName)) {
            throw new UDFArgumentException("argument must be a int value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        WritableConstantIntObjectInspector intOI = (WritableConstantIntObjectInspector) oi;
        return intOI.getWritableConstantValue().get();
    }

    public static long getConstLong(ObjectInspector oi) throws UDFArgumentException {
        if(!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
            throw new UDFArgumentException("argument must be a constant value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        String typeName = oi.getTypeName();
        if(!BIGINT_TYPE_NAME.equals(typeName)) {
            throw new UDFArgumentException("argument must be a bigint value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        WritableConstantLongObjectInspector longOI = (WritableConstantLongObjectInspector) oi;
        return longOI.getWritableConstantValue().get();
    }

    public static long getAsConstLong(ObjectInspector numberOI) throws UDFArgumentException {
        if(!ObjectInspectorUtils.isConstantObjectInspector(numberOI)) {
            throw new UDFArgumentException("argument must be a constant value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(numberOI));
        }
        String typeName = numberOI.getTypeName();
        if(BIGINT_TYPE_NAME.equals(typeName)) {
            WritableConstantLongObjectInspector longOI = (WritableConstantLongObjectInspector) numberOI;
            return longOI.getWritableConstantValue().get();
        } else if(INT_TYPE_NAME.equals(typeName)) {
            WritableConstantIntObjectInspector intOI = (WritableConstantIntObjectInspector) numberOI;
            return (long) intOI.getWritableConstantValue().get();
        } else if(SMALLINT_TYPE_NAME.equals(typeName)) {
            WritableConstantShortObjectInspector shortOI = (WritableConstantShortObjectInspector) numberOI;
            return (long) shortOI.getWritableConstantValue().get();
        } else if(TINYINT_TYPE_NAME.equals(typeName)) {
            WritableConstantByteObjectInspector byteOI = (WritableConstantByteObjectInspector) numberOI;
            return (long) byteOI.getWritableConstantValue().get();
        }
        throw new UDFArgumentException("Unexpected argument type to cast as long: "
                + TypeInfoUtils.getTypeInfoFromObjectInspector(numberOI));
    }

    public static Object getConstValue(ObjectInspector oi) throws UDFArgumentException {
        if(!ObjectInspectorUtils.isConstantObjectInspector(oi)) {
            throw new UDFArgumentException("argument must be a constant value: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        return ((ConstantObjectInspector) oi).getWritableConstantValue();
    }

    public static PrimitiveObjectInspector asPrimitiveObjectInspector(ObjectInspector oi)
            throws UDFArgumentException {
        if(oi.getCategory() != Category.PRIMITIVE) {
            throw new UDFArgumentException("Is not PrimitiveObjectInspector: "
                    + TypeInfoUtils.getTypeInfoFromObjectInspector(oi));
        }
        return (PrimitiveObjectInspector) oi;
    }

    public static boolean isStringOI(ObjectInspector oi) {
        String typeName = oi.getTypeName();
        return STRING_TYPE_NAME.equals(typeName);
    }

    public static LazySimpleSerDe getKeyValueLineSerde(PrimitiveObjectInspector keyOI, PrimitiveObjectInspector valueOI)
            throws SerDeException {
        LazySimpleSerDe serde = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = new Properties();
        tbl.setProperty("columns", "key,value");
        tbl.setProperty("columns.types", keyOI.getTypeName() + "," + valueOI.getTypeName());
        serde.initialize(conf, tbl);
        return serde;
    }

    public static LazySimpleSerDe getLineSerde(PrimitiveObjectInspector... OIs)
            throws SerDeException {
        if(OIs.length == 0) {
            throw new IllegalArgumentException("OIs must be specified");
        }
        LazySimpleSerDe serde = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = new Properties();

        StringBuilder columnNames = new StringBuilder();
        StringBuilder columnTypes = new StringBuilder();
        for(int i = 0; i < OIs.length; i++) {
            columnNames.append('c').append(i + 1).append(',');
            columnTypes.append(OIs[i].getTypeName()).append(',');
        }
        columnNames.deleteCharAt(columnNames.length() - 1);
        columnTypes.deleteCharAt(columnTypes.length() - 1);

        tbl.setProperty("columns", columnNames.toString());
        tbl.setProperty("columns.types", columnTypes.toString());
        serde.initialize(conf, tbl);
        return serde;
    }
}
TOP

Related Classes of hivemall.utils.hadoop.HiveUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.