/**
* Copyright 2011 Nube Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package co.nubetech.hiho.hive;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.ArrayList;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.log4j.Logger;
import co.nubetech.hiho.mapreduce.lib.db.apache.DBConfiguration;
import co.nubetech.hiho.common.HIHOConf;
import co.nubetech.hiho.common.HIHOException;
import co.nubetech.hiho.common.OutputStrategyEnum;
import co.nubetech.hiho.mapreduce.lib.db.ColumnInfo;
import co.nubetech.hiho.mapreduce.lib.db.GenericDBWritable;
public class HiveUtility {
private final static Logger logger = Logger
.getLogger(co.nubetech.hiho.hive.HiveUtility.class);
public static void createTable(Configuration conf, Job job,
GenericDBWritable writable, int jobCounter) throws HIHOException {
String createQuery = null;
String loadQuery = null;
String tableName = getTableName(conf);
FileOutputStream fos = null;
boolean isCreateTable = true;
boolean isDyanamicPartition = false;
if (jobCounter > 0) {
isCreateTable = false;
}
try {
fos = getFileOutputStream(conf, jobCounter, tableName);
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(fos));
String partitionBy = conf.get(HIHOConf.HIVE_PARTITION_BY);
// tableName = getTableName(conf);
if (conf.get(HIHOConf.HIVE_PARTITION_BY) == null) {
if (isCreateTable) {
createQuery = getCreateQuery(conf, writable);
w.append(createQuery + "\n");
}
loadQuery = getLoadQuery(conf,
conf.get(HIHOConf.INPUT_OUTPUT_PATH), writable);
w.append(loadQuery + "\n");
logger.warn("\nThe queries are: " + createQuery + "\n"
+ loadQuery);
} else {
loadQuery = getLoadQuery(conf,
conf.get(HIHOConf.INPUT_OUTPUT_PATH), writable,
partitionBy);
if (isCreateTable) {
createQuery = getCreateQuery(conf, writable, partitionBy);
w.append(createQuery + "\n");
}
w.append(loadQuery + "\n");
logger.warn("\nThe queries are: " + createQuery + "\n"
+ loadQuery);
if (getDynamicPartitionBy(partitionBy) != null) {
isDyanamicPartition = true;
}
}
if (!isDyanamicPartition) {
runQuery(createQuery, loadQuery, isCreateTable, conf);
} else {
String insertQuery = getInsertQueryFromTmpToMain(conf,
writable, partitionBy);
String tmpCreateQuery = getTmpCreateQuery(conf, writable);
runQuery(tmpCreateQuery, insertQuery, createQuery, loadQuery,
conf, isCreateTable);
w.append(tmpCreateQuery + "\n" + loadQuery + "\n");
if (isCreateTable) {
w.append(createQuery + "\n");
}
w.append(insertQuery + "\n");
logger.warn("\nThe queries are: " + createQuery + "\n"
+ loadQuery + "\n" + tmpCreateQuery + "\n"
+ insertQuery);
}
// w.append(createQuery + "\n" + loadQuery);
w.close();
fos.close();
} catch (Exception e) {
e.printStackTrace();
throw new HIHOException("Could not generate hive table", e);
}
}
// This function is used to get the FileOutputStream for storing script of
// hive queries
public static FileOutputStream getFileOutputStream(Configuration conf,
int jobCounter, String tableName) throws HIHOException {
FileOutputStream fos = null;
try {
if (jobCounter == 0) {
File file = new File(new File(
conf.get(HIHOConf.INPUT_OUTPUT_LOADTO_PATH)),
"hiveScript" + tableName + ".txt");
fos = new FileOutputStream(file);
} else {
String path = conf.get(HIHOConf.INPUT_OUTPUT_LOADTO_PATH)
+ "hiveScript" + tableName + ".txt";
fos = new FileOutputStream(path, true);
}
} catch (FileNotFoundException e) {
throw new HIHOException();
}
return fos;
}
// This function is used to get tableName for table we going to create in
// Hive
public static String getTableName(Configuration conf) throws HIHOException {
String tableName = conf.get(HIHOConf.HIVE_TABLE_NAME);
// if user did not specify hive table name, lets try to deduce it
// automatically
// get it from the name of the table we are querying against
if (tableName == null) {
tableName = conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY);
}
if (tableName == null) {
if (conf.get(DBConfiguration.INPUT_QUERY) != null) {
String query = conf.get(DBConfiguration.INPUT_QUERY);
StringTokenizer queryTokens = new StringTokenizer(query, " ");
while (queryTokens.hasMoreTokens()) {
if (queryTokens.nextToken().equalsIgnoreCase("FROM")) {
tableName = queryTokens.nextToken();
}
}
}
}
// we are missing a couple of cases here. FROM table1, table2. FROM
// table1 inner join blah blah
// lets leave them for now and address them when the user need is
// clearer
/*
* if (conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY) != null) {
* tableName = conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY); }
* else if (conf.get(DBConfiguration.INPUT_QUERY) != null) { String
* query = conf.get(DBConfiguration.INPUT_QUERY); StringTokenizer
* queryTokens = new StringTokenizer(query, " "); while
* (queryTokens.hasMoreTokens()) { if
* (queryTokens.nextToken().equalsIgnoreCase("FROM")) { tableName =
* queryTokens.nextToken(); } } } if
* (conf.get(HIHOConf.HIVE_MULTIPLE_PARTITION_BY) != null &&
* conf.get(HIHOConf.HIVE_TABLE_NAME) != null) { tableName =
* conf.get(HIHOConf.HIVE_TABLE_NAME); }
*/
if (tableName == null) {
throw new HIHOException("Cannot get hive table name");
}
return tableName;
}
/*
* This function is used to generate insertQuery which loads data from one
* table to another table in Hive, in case of dynamicPartition we create
* temporary table to load data then we transfer that data to our main table
* through this query
*/
public static String getInsertQueryFromTmpToMain(Configuration conf,
GenericDBWritable writable, String partitionBy)
throws HIHOException {
StringBuilder builder = new StringBuilder();
String tableName = getTableName(conf);
builder.append("FROM `" + tableName
+ "tmp` tmp INSERT OVERWRITE TABLE `" + tableName
+ "` PARTITION (");
// Here we are computing columnName and its value for partition,in
// format columnName='columnValue',columnName...
StringTokenizer tempToken = new StringTokenizer(partitionBy, ",");
String columnsData = " ";
while (tempToken.hasMoreTokens()) {
StringTokenizer partitionData = new StringTokenizer(
tempToken.nextToken(), ":");
columnsData = columnsData + partitionData.nextToken();
partitionData.nextToken();
if (partitionData.hasMoreTokens()) {
columnsData = columnsData + "='" + partitionData.nextToken()
+ "'";
}
if (tempToken.hasMoreTokens()) {
columnsData = columnsData + ",";
}
}
//
builder.append(columnsData + ") SELECT " + getTmpTableColumns(writable));
return builder.toString();
}
// This function is used to get columns name from which we want to insert
// values in our table columns,in format
// `tableAlias`.`columnName`,`tableAlias`.`columnName`...
public static String getTmpTableColumns(GenericDBWritable writable)
throws HIHOException {
StringBuilder builder = new StringBuilder();
ArrayList<ColumnInfo> columns = writable.getColumns();
for (ColumnInfo column : columns) {
builder.append("`tmp`");
builder.append(".`" + column.getName());
builder.append("`,");
}
// above loop adds extra comma, removing.
if (builder.length() > 0) {
builder.deleteCharAt(builder.lastIndexOf(","));
}
return builder.toString();
}
// This function is used to get query for temporary table, in case of
// dynamicPartition
public static String getTmpCreateQuery(Configuration conf,
GenericDBWritable writable) throws HIHOException {
StringBuilder builder = new StringBuilder();
builder.append("CREATE TABLE `" + getTableName(conf) + "tmp` ( ");
builder.append(getColumns(writable));
builder.append(") ROW FORMAT ");
OutputStrategyEnum outputStrategy = OutputStrategyEnum.valueOf(conf
.get(HIHOConf.INPUT_OUTPUT_STRATEGY));
if (outputStrategy == OutputStrategyEnum.DELIMITED) {
builder.append("DELIMITED FIELDS TERMINATED BY '");
builder.append(conf.get(HIHOConf.INPUT_OUTPUT_DELIMITER));
builder.append("' STORED AS TEXTFILE");
}
return builder.toString();
}
// This function is used return columnNames and columnType for partitionBy
// clause in createTable query in right format.It takes data in format
// columnName:columnType:[columnValue],columnName:columnType:[columnValue]...
// and return in format columnName columnValue,columnName columnValue...
public static String getPartitionBy(String partitionBy) {
StringTokenizer partitionToken = new StringTokenizer(partitionBy, ";");
String columnsData = "";
while (partitionToken.hasMoreTokens()) {
StringTokenizer partitionData = new StringTokenizer(
partitionToken.nextToken(), ":");
columnsData = columnsData + partitionData.nextToken() + " "
+ partitionData.nextToken();
if (partitionToken.hasMoreTokens()) {
columnsData = columnsData + ",";
}
}
return columnsData;
}
public static String getColumnsForPartitionedCreateTables(
Configuration conf, String columns) {
// columns format columnsName columnType,columnsName1 columnType1
// get the column by which we want to do partition by
// and remove that from the table creation
String dynamicPartitionBy = getDynamicPartitionBy(conf
.get(HIHOConf.HIVE_PARTITION_BY));
StringTokenizer columnTokens = new StringTokenizer(columns, ",");
columns = " ";
int count = 0;
while (columnTokens.hasMoreTokens()) {
String columnData = columnTokens.nextToken();
StringTokenizer columnDataTokens = new StringTokenizer(columnData,
"` ");
String columnName = columnDataTokens.nextToken();
if (!columnName.equals(dynamicPartitionBy) && count > 0) {
columns = columns + ",";
}
if (!columnName.equals(dynamicPartitionBy)) {
columns = columns + columnData;
count++;
}
}
return columns;
}
// This function is used to get create table query in the case where user
// has defined values in partitioned by clause
public static String getCreateQuery(Configuration conf,
GenericDBWritable writable, String partitionBy)
throws HIHOException {
StringBuilder builder = new StringBuilder();
String tableName = getTableName(conf);
if (conf.get(HIHOConf.HIVE_TABLE_OVERWRITE, "false").equals("true")) {
builder.append("CREATE TABLE IF NOT EXISTS `" + tableName + "` (");
} else {
builder.append("CREATE TABLE `" + tableName + "` (");
}
String columns = getColumns(writable);
builder.append(getColumnsForPartitionedCreateTables(conf, columns));
builder.append(") PARTITIONED BY (" + getPartitionBy(partitionBy));
builder.append(")");
builder = appendClusteredByToCreateQuery(conf, builder);
builder.append(" ROW FORMAT ");
builder = appendDelimitedDataToCreateQuery(conf, builder);
return builder.toString();
}
// This function is used to get create table query in the case where user
// has not defined values in partitioned by clause
public static String getCreateQuery(Configuration conf,
GenericDBWritable writable) throws HIHOException {
StringBuilder builder = new StringBuilder();
String tableName = getTableName(conf);
if (conf.get(HIHOConf.HIVE_TABLE_OVERWRITE, "false").equals("true")) {
builder.append("CREATE TABLE IF NOT EXISTS `" + tableName + "` ( ");
} else {
builder.append("CREATE TABLE `" + tableName + "` ( ");
}
builder.append(getColumns(writable));
builder.append(")");
builder = appendClusteredByToCreateQuery(conf, builder);
builder.append(" ROW FORMAT ");
builder = appendDelimitedDataToCreateQuery(conf, builder);
return builder.toString();
}
// //
public static StringBuilder appendDelimitedDataToCreateQuery(
Configuration conf, StringBuilder builder) {
OutputStrategyEnum outputStrategy = OutputStrategyEnum.valueOf(conf
.get(HIHOConf.INPUT_OUTPUT_STRATEGY));
if (outputStrategy == OutputStrategyEnum.DELIMITED) {
builder.append("DELIMITED FIELDS TERMINATED BY '");
builder.append(conf.get(HIHOConf.INPUT_OUTPUT_DELIMITER));
builder.append("' STORED AS TEXTFILE");
}
return builder;
}
public static StringBuilder appendClusteredByToCreateQuery(
Configuration conf, StringBuilder builder) throws HIHOException {
if (conf.get(HIHOConf.HIVE_CLUSTERED_BY) != null) {
StringTokenizer clusterData = new StringTokenizer(
conf.get(HIHOConf.HIVE_CLUSTERED_BY), ":");
builder.append(" CLUSTERED BY (" + clusterData.nextToken());
builder.append(")");
if (conf.get(HIHOConf.HIVE_SORTED_BY) != null) {
builder.append(" SORTED BY ("
+ conf.get(HIHOConf.HIVE_SORTED_BY));
builder.append(")");
}
try {
String buckets = clusterData.nextToken();
if (buckets == null) {
throw new HIHOException(
"The number of buckets wih clustered by is not defined");
}
builder.append(" INTO " + buckets + " BUCKETS");
} catch (NoSuchElementException e) {
throw new HIHOException(
"The number of buckets wih clustered by is not defined");
}
}
return builder;
}
/*
* Parses the partition by configuration of the form
* col:type[:value],col1:type1[:value1] and provides the name of the dynamic
* partition
*/
public static String getDynamicPartitionBy(String partitionBy) {
StringTokenizer partitionToken = new StringTokenizer(partitionBy, ";");
String dynamicPartitionBy = null;
while (partitionToken.hasMoreTokens()) {
StringTokenizer columnTokens = new StringTokenizer(
partitionToken.nextToken(), ":");
String columnNames = columnTokens.nextToken();
columnTokens.nextToken();
if (!columnTokens.hasMoreTokens()) {
dynamicPartitionBy = columnNames;
}
}
return dynamicPartitionBy;
}
public static String getLoadQuery(Configuration conf, String hdfsDir,
GenericDBWritable writable, String partitionBy)
throws HIHOException {
String loadQuery = getLoadQuery(conf, hdfsDir, writable);
StringTokenizer tempToken = new StringTokenizer(partitionBy, ";");
String columnsData = " ";
while (tempToken.hasMoreTokens()) {
StringTokenizer columnTokens = new StringTokenizer(
tempToken.nextToken(), ":");
String columnNames = columnTokens.nextToken();
columnsData = columnsData + columnNames;
columnTokens.nextToken();
if (columnTokens.hasMoreTokens()) {
columnsData = columnsData + "='" + columnTokens.nextToken()
+ "'";
}
if (tempToken.hasMoreTokens()) {
columnsData = columnsData + ",";
}
}
if (getDynamicPartitionBy(conf.get(HIHOConf.HIVE_PARTITION_BY)) == null) {
loadQuery = loadQuery + "` PARTITION (" + columnsData + ")";
} else {
loadQuery = loadQuery + "tmp`";
}
return loadQuery;
}
public static String getLoadQuery(Configuration conf, String hdfsDir,
GenericDBWritable writable) throws HIHOException {
StringBuilder builder = new StringBuilder();
builder.append("LOAD DATA INPATH '");
builder.append(hdfsDir);
builder.append("' OVERWRITE INTO TABLE `");
builder.append(getTableName(conf));
if (conf.get(HIHOConf.HIVE_PARTITION_BY) == null) {
builder.append("`");
}
return builder.toString();
}
// //Accept String query to be run
public static void runQuery(String createQuery, String loadQuery,
boolean createTable, Configuration conf) throws HIHOException {
try {
Class.forName(conf.get(HIHOConf.HIVE_DRIVER));
Connection con = DriverManager.getConnection(
conf.get(HIHOConf.HIVE_URL),
conf.get(HIHOConf.HIVE_USR_NAME),
conf.get(HIHOConf.HIVE_PASSWORD));
Statement stmt = con.createStatement();
// stmt.executeQuery("drop table " + tableName);
if (createTable) {
stmt.executeQuery("drop table " + getTableName(conf));
stmt.executeQuery(createQuery);
}
stmt.executeQuery(loadQuery);
/*
* ResultSet rs = stmt.executeQuery("select * from " + tableName);
* while (rs.next()) { System.out.println(rs.getString(1) + "\t" +
* rs.getString(2)); }
*/
stmt.close();
con.close();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
public static void runQuery(String tmpCreateQuery, String insertQuery,
String createQuery, String loadQuery, Configuration conf,
boolean createTable) throws HIHOException {
try {
Class.forName(conf.get(HIHOConf.HIVE_DRIVER));
Connection con = DriverManager.getConnection(
conf.get(HIHOConf.HIVE_URL),
conf.get(HIHOConf.HIVE_USR_NAME),
conf.get(HIHOConf.HIVE_PASSWORD));
Statement stmt = con.createStatement();
stmt.executeQuery("drop table " + getTableName(conf) + "tmp");
stmt.executeQuery("drop table " + getTableName(conf));
stmt.executeQuery(tmpCreateQuery);
stmt.executeQuery(loadQuery);
/*
* ResultSet rs2 = stmt.executeQuery("select * from " + tableName +
* "tmp"); while (rs2.next()) { System.out.println(rs2.getString(1)
* + "\t" + rs2.getString(2)); }
*/
if (createTable) {
stmt.executeQuery(createQuery);
}
stmt.executeQuery(insertQuery);
stmt.executeQuery("drop table " + getTableName(conf) + "tmp");
stmt.close();
con.close();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
public static String getColumns(GenericDBWritable writable)
throws HIHOException {
StringBuilder builder = new StringBuilder();
ArrayList<ColumnInfo> columns = writable.getColumns();
for (ColumnInfo column : columns) {
builder.append("`");
builder.append(column.getName());
builder.append("` ");
builder.append(getColumnType(column.getType()));
builder.append(",");
}
if (builder.length() > 0) {
builder.deleteCharAt(builder.lastIndexOf(","));
}
return builder.toString();
}
// //Column Types to be verified and junits added
public static String getColumnType(int columnType) throws HIHOException {
String returnType = null;
switch (columnType) {
case Types.BIGINT:
returnType = "long";
break;
case Types.BOOLEAN:
case Types.CHAR:
returnType = "string";
break;
case Types.DECIMAL:
case Types.REAL:
case Types.NUMERIC:
case Types.DOUBLE:
returnType = "double";
break;
case Types.FLOAT:
returnType = "float";
break;
case Types.TINYINT:
case Types.SMALLINT:
case Types.INTEGER:
returnType = "int";
break;
case Types.BINARY:
case Types.CLOB:
case Types.BLOB:
case Types.VARBINARY:
case Types.LONGVARBINARY:
returnType = "bytearray";
break;
case Types.VARCHAR:
case Types.LONGVARCHAR:
returnType = "string";
break;
default:
throw new HIHOException("Unsupported type");
}
return returnType;
}
}