/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;
import com.facebook.hive.orc.OrcFile;
import com.facebook.hive.orc.OrcProto;
import com.facebook.hive.orc.OrcProto.Type;
import com.facebook.hive.orc.OrcSerde;
import com.facebook.hive.orc.Reader;
import com.facebook.hive.orc.RecordReader;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.TupleDomain;
import com.facebook.presto.spi.type.TypeManager;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.mapred.JobConf;
import org.joda.time.DateTimeZone;
import java.util.List;
import java.util.Properties;
import static com.facebook.presto.hive.HiveUtil.getDeserializer;
import static com.facebook.presto.hive.HiveUtil.getTableObjectInspector;
import static com.google.common.collect.Iterables.all;
public class DwrfRecordCursorProvider
implements HiveRecordCursorProvider
{
@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(
String clientId,
Configuration configuration,
ConnectorSession session,
Path path,
long start,
long length,
Properties schema,
List<HiveColumnHandle> columns,
List<HivePartitionKey> partitionKeys,
TupleDomain<HiveColumnHandle> tupleDomain,
DateTimeZone hiveStorageTimeZone,
TypeManager typeManager)
{
@SuppressWarnings("deprecation")
Deserializer deserializer = getDeserializer(schema);
if (!(deserializer instanceof OrcSerde)) {
return Optional.absent();
}
StructObjectInspector rowInspector = getTableObjectInspector(schema);
if (!all(rowInspector.getAllStructFieldRefs(), isSupportedDwrfType())) {
throw new IllegalArgumentException("DWRF does not support DATE type");
}
ReaderWriterProfiler.setProfilerOptions(configuration);
RecordReader recordReader;
try {
FileSystem fileSystem = path.getFileSystem(configuration);
Reader reader = OrcFile.createReader(fileSystem, path, new JobConf(configuration));
boolean[] include = findIncludedColumns(reader.getTypes(), columns);
recordReader = reader.rows(start, length, include);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
return Optional.<HiveRecordCursor>of(new DwrfHiveRecordCursor(
recordReader,
length,
schema,
partitionKeys,
columns,
hiveStorageTimeZone,
DateTimeZone.forID(session.getTimeZoneKey().getId()),
typeManager));
}
private static Predicate<StructField> isSupportedDwrfType()
{
return new Predicate<StructField>()
{
@Override
public boolean apply(StructField hiveColumnHandle)
{
return !hasDateType(hiveColumnHandle.getFieldObjectInspector());
}
};
}
private static boolean[] findIncludedColumns(List<Type> types, List<HiveColumnHandle> columns)
{
boolean[] includes = new boolean[types.size()];
includes[0] = true;
OrcProto.Type root = types.get(0);
List<Integer> included = Lists.transform(columns, HiveColumnHandle.hiveColumnIndexGetter());
for (int i = 0; i < root.getSubtypesCount(); ++i) {
if (included.contains(i)) {
includeColumnRecursive(types, includes, root.getSubtypes(i));
}
}
// if we are filtering at least one column, return the boolean array
for (boolean include : includes) {
if (!include) {
return includes;
}
}
return null;
}
private static void includeColumnRecursive(List<OrcProto.Type> types, boolean[] result, int typeId)
{
result[typeId] = true;
OrcProto.Type type = types.get(typeId);
int children = type.getSubtypesCount();
for (int i = 0; i < children; ++i) {
includeColumnRecursive(types, result, type.getSubtypes(i));
}
}
static boolean hasDateType(ObjectInspector objectInspector)
{
if (objectInspector instanceof PrimitiveObjectInspector) {
PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) objectInspector;
return primitiveInspector.getPrimitiveCategory() == PrimitiveCategory.DATE;
}
if (objectInspector instanceof ListObjectInspector) {
ListObjectInspector listInspector = (ListObjectInspector) objectInspector;
return hasDateType(listInspector.getListElementObjectInspector());
}
if (objectInspector instanceof MapObjectInspector) {
MapObjectInspector mapInspector = (MapObjectInspector) objectInspector;
return hasDateType(mapInspector.getMapKeyObjectInspector()) ||
hasDateType(mapInspector.getMapValueObjectInspector());
}
if (objectInspector instanceof StructObjectInspector) {
for (StructField field : ((StructObjectInspector) objectInspector).getAllStructFieldRefs()) {
if (hasDateType(field.getFieldObjectInspector())) {
return true;
}
}
return false;
}
throw new IllegalArgumentException("Unknown object inspector type " + objectInspector);
}
}