/*
* Copyright (c) 2014 InfiniDB, Inc.
*
* InfiniDB, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package infinidb.hadoop.db;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.mapred.lib.db.*;
/**
* A InputFormat that reads input data from an SQL table.
* <p>
* DBInputFormat emits LongWritables containing the record number as
* key and DBWritables as value.
*
* The SQL query, and input class can be using one of the two
* setInput methods.
*/
public class InfiniDBInputFormat<T extends DBWritable>
implements InputFormat<LongWritable, T>, JobConfigurable
{
/**
* A RecordReader that reads records from a SQL table.
* Emits LongWritables containing the record number as
* key and DBWritables as value.
*/
protected class DBRecordReader implements RecordReader<LongWritable, T>
{
private ResultSet results;
private Statement statement;
private Class<T> inputClass;
private JobConf job;
private InfiniDBInputSplit split;
private long pos = 0;
/**
* @param split The InputSplit to read data for
* @throws SQLException
*/
protected DBRecordReader(InfiniDBInputSplit split, Class<T> inputClass, JobConf job) throws SQLException
{
this.inputClass = inputClass;
this.split = split;
this.job = job;
statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
//statement.setFetchSize(Integer.MIN_VALUE);
results = statement.executeQuery(getSelectQuery());
}
/** @InfiniDB */
public <S> String concat(S[] arr, String sep)
{
String ret = "";
for(int i=0; i < arr.length; i++)
{
ret = ret + arr[i];
if(i < arr.length-1)
{
ret = ret + sep;
}
}
return ret;
}
/** @InfiniDB Returns the query for selecting the records,
* subclasses can override this for custom behaviour.*/
protected String getSelectQuery()
{
InfiniDBConfiguration conf = new InfiniDBConfiguration(job);
StringBuilder query = new StringBuilder();
query.append("SELECT ");
query.append(concat(conf.getInputFieldNames(), ","));
query.append(" FROM ");
query.append(conf.getInputTableName());
query.append(" WHERE ");
query.append(split.splitKey + ">=" + split.getStart());
query.append(" AND ");
query.append(split.splitKey + "<" + split.getEnd());
if (conditions != null && conditions.length() > 0)
query.append(" AND (").append(conditions).append(")");
return query.toString();
}
/** {@inheritDoc} */
public void close() throws IOException
{
try
{
connection.commit();
results.close();
statement.close();
} catch (SQLException e)
{
throw new IOException(e.getMessage());
}
}
/** {@inheritDoc} */
public LongWritable createKey()
{
return new LongWritable();
}
/** {@inheritDoc} */
public T createValue()
{
return ReflectionUtils.newInstance(inputClass, job);
}
/** {@inheritDoc} */
public long getPos() throws IOException
{
return pos;
}
/** {@inheritDoc} */
public float getProgress() throws IOException
{
return pos / (float)split.getLength();
}
/** {@inheritDoc} */
public boolean next(LongWritable key, T value) throws IOException
{
try
{
if (!results.next())
return false;
// Set the key field value as the output key value
key.set(pos + split.getStart());
value.readFields(results);
pos ++;
} catch (SQLException e)
{
throw new IOException(e.getMessage());
}
return true;
}
}
/**
* A Class that does nothing, implementing DBWritable
*/
public static class NullDBWritable implements DBWritable, Writable {
@Override
public void readFields(DataInput in) throws IOException { }
@Override
public void readFields(ResultSet arg0) throws SQLException { }
@Override
public void write(DataOutput out) throws IOException { }
@Override
public void write(PreparedStatement arg0) throws SQLException { }
}
/**
* A InputSplit that spans a set of rows
*/
protected static class InfiniDBInputSplit implements InputSplit {
private long end = 0;
private long start = 0;
private String splitKey;
/**
* Default Constructor
*/
public InfiniDBInputSplit() {
}
/**
* @InfiniDB
* Convenience Constructor
* @param start the index of the first row to select
* @param end the index of the last row to select
*/
public InfiniDBInputSplit(long start, long end, String key) {
this.start = start;
this.end = end;
this.splitKey = key;
}
/** {@inheritDoc} */
public String[] getLocations() throws IOException {
return new String[] {};
}
/**
* @return The index of the first row to select
*/
public long getStart() {
return start;
}
/**
* @return The index of the last row to select
*/
public long getEnd() {
return end;
}
/**
* @return The total row count in this split
*/
public long getLength() throws IOException {
return end - start;
}
/** {@inheritDoc} */
public void readFields(DataInput input) throws IOException {
start = input.readLong();
end = input.readLong();
splitKey = WritableUtils.readString(input);
}
/** {@inheritDoc} */
public void write(DataOutput output) throws IOException {
output.writeLong(start);
output.writeLong(end);
WritableUtils.writeString(output, splitKey);
}
}
private String conditions;
private Connection connection;
private String tableName;
private String[] fieldNames;
private InfiniDBConfiguration dbConf;
/** {@inheritDoc} */
public void configure(JobConf job) {
dbConf = new InfiniDBConfiguration(job);
try {
this.connection = dbConf.getConnection();
this.connection.setAutoCommit(false);
connection.setTransactionIsolation(Connection.TRANSACTION_SERIALIZABLE);
}
catch (Exception ex) {
throw new RuntimeException(ex);
}
tableName = dbConf.getInputTableName();
fieldNames = dbConf.getInputFieldNames();
conditions = dbConf.getInputConditions();
}
/** {@inheritDoc} */
@SuppressWarnings("unchecked")
public RecordReader<LongWritable, T> getRecordReader(InputSplit split,
JobConf job, Reporter reporter) throws IOException {
Class inputClass = dbConf.getInputClass();
try {
return new DBRecordReader((InfiniDBInputSplit) split, inputClass, job);
}
catch (SQLException ex) {
throw new IOException(ex.getMessage());
}
}
/** @InfiniDB */
private long getMaxVal(InfiniDBConfiguration conf, Connection conn, String tableName, String col) {
if(conf.getMaxVal()!=null) {
return conf.getMaxVal();
}
try {
PreparedStatement s = conn.prepareStatement("SELECT MAX(" + col + ") FROM " + tableName);
ResultSet rs = s.executeQuery();
rs.next();
long ret = rs.getLong(1);
rs.close();
s.close();
return ret;
} catch(SQLException e) {
throw new RuntimeException(e);
}
}
/** @InfiniDB */
private long getMinVal(InfiniDBConfiguration conf, Connection conn, String tableName, String col ) {
if(conf.getMinVal()!=null) {
return conf.getMinVal();
}
try {
PreparedStatement s = conn.prepareStatement("SELECT MIN(" + col + ") FROM " + tableName);
ResultSet rs = s.executeQuery();
rs.next();
long ret = rs.getLong(1);
rs.close();
s.close();
return ret;
} catch(SQLException e) {
throw new RuntimeException(e);
}
}
/** {@inheritDoc}
* @InfiniDB
*/
public InputSplit[] getSplits(JobConf job, int chunks) throws IOException {
try {
InfiniDBConfiguration conf = new InfiniDBConfiguration(job);
Connection conn = conf.getConnection();
String splitKey = conf.getSplitKey();
long maxVal = getMaxVal(conf, conn, conf.getInputTableName(), conf.getSplitKey());
long minVal = getMinVal(conf, conn, conf.getInputTableName(), conf.getSplitKey());
System.out.println("max=" + maxVal);
System.out.println("min=" + minVal);
InputSplit[] ret = new InputSplit[chunks];
long chunkSize = (maxVal - minVal + 1) / chunks + 1;
long start = minVal;
for (int i = 0; i < chunks; i++){
ret[i] = new InfiniDBInputSplit(start, start+chunkSize, splitKey);
start += chunkSize;
}
conn.close();
return ret;
} catch(SQLException e) {
throw new RuntimeException(e);
}
}
/** Returns the query for getting the total number of rows,
* subclasses can override this for custom behaviour.*/
protected String getCountQuery() {
if(dbConf.getInputCountQuery() != null) {
return dbConf.getInputCountQuery();
}
StringBuilder query = new StringBuilder();
query.append("SELECT COUNT(*) FROM " + tableName);
if (conditions != null && conditions.length() > 0)
query.append(" WHERE " + conditions);
return query.toString();
}
/**
* @InfiniDB
* Initializes the map-part of the job with the appropriate input settings.
*
* @param job The job
* @param inputClass the class object implementing DBWritable, which is the
* Java object holding tuple fields.
* @param tableName The table to read data from
* @param conditions The condition which to select data with, eg. '(updated >
* 20070101 AND length > 0)'
* @param key the field name used for split key.
* @param fieldNames The field names in the table
* @see #setInput(JobConf, Class, String, String)
*/
public static void setInput(JobConf job, Class<? extends DBWritable> inputClass,
String tableName,String conditions, String key, String... fieldNames) {
job.setInputFormat(InfiniDBInputFormat.class);
InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job);
dbConf.setInputClass(inputClass);
dbConf.setInputTableName(tableName);
dbConf.setInputFieldNames(fieldNames);
dbConf.setInputConditions(conditions);
dbConf.setSplitKey(key);
}
/**
* @InfiniDB
* Initializes the map-part of the job with the appropriate input settings.
*
* @param job The job
* @param inputClass the class object implementing DBWritable, which is the
* Java object holding tuple fields.
* @param inputQuery the input query to select fields. Example :
* "SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
* @param inputCountQuery the input query that returns the number of records in
* the table.
* Example : "SELECT COUNT(f1) FROM Mytable"
* @see #setInput(JobConf, Class, String, String, String, String...)
*/
public static void setInput(JobConf job, Class<? extends DBWritable> inputClass,
String inputQuery, String inputCountQuery) {
job.setInputFormat(InfiniDBInputFormat.class);
InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job);
dbConf.setInputClass(inputClass);
dbConf.setInputQuery(inputQuery);
dbConf.setInputCountQuery(inputCountQuery);
}
}