/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2013
* National Institute of Advanced Industrial Science and Technology (AIST)
* Registration Number: H25PRO-1520
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package hivemall.utils.hadoop;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URI;
import java.util.Iterator;
import java.util.Map.Entry;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.exec.MapredContextAccessor;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.TaskID;
public final class HadoopUtils {
private HadoopUtils() {}
public static BufferedReader getBufferedReader(File file) throws IOException {
MapredContext context = MapredContextAccessor.get();
return getBufferedReader(file, context);
}
public static BufferedReader getBufferedReader(File file, MapredContext context)
throws IOException {
URI fileuri = file.toURI();
Path path = new Path(fileuri);
Configuration conf = context.getJobConf();
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(path);
if(codec == null) {
return new BufferedReader(new FileReader(file));
} else {
Decompressor decompressor = CodecPool.getDecompressor(codec);
FileInputStream fis = new FileInputStream(file);
CompressionInputStream cis = codec.createInputStream(fis, decompressor);
BufferedReader br = new BufferedReaderExt(new InputStreamReader(cis), decompressor);
return br;
}
}
private static final class BufferedReaderExt extends BufferedReader {
private Decompressor decompressor;
BufferedReaderExt(Reader in, Decompressor decompressor) {
super(in);
this.decompressor = decompressor;
}
@Override
public void close() throws IOException {
super.close();
if(decompressor != null) {
CodecPool.returnDecompressor(decompressor);
this.decompressor = null;
}
}
}
@Nonnull
public static String getJobId() {
MapredContext ctx = MapredContextAccessor.get();
if(ctx == null) {
throw new IllegalStateException("MapredContext is not set");
}
JobConf conf = ctx.getJobConf();
if(conf == null) {
throw new IllegalStateException("JobConf is not set");
}
String jobId = conf.get("mapred.job.id");
if(jobId == null) {
jobId = conf.get("mapreduce.job.id");
if(jobId == null) {
String queryId = conf.get("hive.query.id");
if(queryId != null) {
return queryId;
}
String taskidStr = conf.get("mapred.task.id");
if(taskidStr == null) {
throw new IllegalStateException("Cannot resolve jobId: " + toString(conf));
}
jobId = getJobIdFromTaskId(taskidStr);
}
}
return jobId;
}
@Nonnull
public static String getJobIdFromTaskId(@Nonnull String taskidStr) {
if(!taskidStr.startsWith("task_")) {// workaround for Tez
taskidStr = taskidStr.replace("task", "task_");
taskidStr = taskidStr.substring(0, taskidStr.lastIndexOf('_'));
}
TaskID taskId = TaskID.forName(taskidStr);
JobID jobId = taskId.getJobID();
return jobId.toString();
}
public static int getTaskId() {
MapredContext ctx = MapredContextAccessor.get();
if(ctx == null) {
throw new IllegalStateException("MapredContext is not set");
}
JobConf jobconf = ctx.getJobConf();
if(jobconf == null) {
throw new IllegalStateException("JobConf is not set");
}
int taskid = jobconf.getInt("mapred.task.partition", -1);
if(taskid == -1) {
taskid = jobconf.getInt("mapreduce.task.partition", -1);
if(taskid == -1) {
throw new IllegalStateException("Both mapred.task.partition and mapreduce.task.partition are not set: "
+ toString(jobconf));
}
}
return taskid;
}
@Nonnull
public static String toString(@Nonnull JobConf jobconf) {
return toString(jobconf, null);
}
@Nonnull
public static String toString(@Nonnull JobConf jobconf, @Nullable String regexKey) {
final Iterator<Entry<String, String>> itor = jobconf.iterator();
boolean hasNext = itor.hasNext();
if(!hasNext) {
return "";
}
final StringBuilder buf = new StringBuilder(1024);
do {
Entry<String, String> e = itor.next();
hasNext = itor.hasNext();
String k = e.getKey();
if(k == null) {
continue;
}
if(regexKey == null || k.matches(regexKey)) {
String v = e.getValue();
buf.append(k).append('=').append(v);
if(hasNext) {
buf.append(',');
}
}
} while(hasNext);
return buf.toString();
}
}