Path path = null;
CatalogProtos.StoreType storeType = null;
org.apache.tajo.catalog.Schema schema = null;
Options options = null;
TableStats stats = null;
PartitionMethodDesc partitions = null;
//////////////////////////////////
// set tajo table schema.
//////////////////////////////////
try {
// get hive table schema
try {
client = clientPool.getClient();
table = HCatUtil.getTable(client.getHiveClient(), databaseName, tableName);
path = table.getPath();
} catch (NoSuchObjectException nsoe) {
throw new CatalogException("Table not found. - tableName:" + tableName, nsoe);
} catch (Exception e) {
throw new CatalogException(e);
}
// convert hcatalog field schema into tajo field schema.
schema = new org.apache.tajo.catalog.Schema();
HCatSchema tableSchema = null;
try {
tableSchema = HCatUtil.getTableSchemaWithPtnCols(table);
} catch (IOException ioe) {
throw new CatalogException("Fail to get table schema. - tableName:" + tableName, ioe);
}
List<HCatFieldSchema> fieldSchemaList = tableSchema.getFields();
boolean isPartitionKey = false;
for (HCatFieldSchema eachField : fieldSchemaList) {
isPartitionKey = false;
if (table.getPartitionKeys() != null) {
for (FieldSchema partitionKey : table.getPartitionKeys()) {
if (partitionKey.getName().equals(eachField.getName())) {
isPartitionKey = true;
}
}
}
if (!isPartitionKey) {
String fieldName = databaseName + CatalogConstants.IDENTIFIER_DELIMITER + tableName +
CatalogConstants.IDENTIFIER_DELIMITER + eachField.getName();
TajoDataTypes.Type dataType = HCatalogUtil.getTajoFieldType(eachField.getType().toString());
schema.addColumn(fieldName, dataType);
}
}
// validate field schema.
try {
HCatalogUtil.validateHCatTableAndTajoSchema(tableSchema);
} catch (Exception e) {
throw new CatalogException("HCatalog cannot support schema. - schema:" + tableSchema.toString(), e);
}
stats = new TableStats();
options = new Options();
options.putAll(table.getParameters());
Properties properties = table.getMetadata();
if (properties != null) {
// set field delimiter
String fieldDelimiter = "", nullFormat = "";
if (properties.getProperty(serdeConstants.FIELD_DELIM) != null) {
fieldDelimiter = properties.getProperty(serdeConstants.FIELD_DELIM);
} else {
// if hive table used default row format delimiter, Properties doesn't have it.
// So, Tajo must set as follows:
fieldDelimiter = "\u0001";
}
// set null format
if (properties.getProperty(serdeConstants.SERIALIZATION_NULL_FORMAT) != null) {
nullFormat = properties.getProperty(serdeConstants.SERIALIZATION_NULL_FORMAT);
} else {
nullFormat = "\\N";
}
// set file output format
String fileOutputformat = properties.getProperty(hive_metastoreConstants.FILE_OUTPUT_FORMAT);
storeType = CatalogUtil.getStoreType(HCatalogUtil.getStoreType(fileOutputformat));
if (storeType.equals(CatalogProtos.StoreType.CSV)) {
options.put(StorageConstants.CSVFILE_DELIMITER, StringEscapeUtils.escapeJava(fieldDelimiter));
options.put(StorageConstants.CSVFILE_NULL, StringEscapeUtils.escapeJava(nullFormat));
} else if (storeType.equals(CatalogProtos.StoreType.RCFILE)) {
options.put(StorageConstants.RCFILE_NULL, StringEscapeUtils.escapeJava(nullFormat));
String serde = properties.getProperty(serdeConstants.SERIALIZATION_LIB);
if (LazyBinaryColumnarSerDe.class.getName().equals(serde)) {
options.put(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE);
} else if (ColumnarSerDe.class.getName().equals(serde)) {
options.put(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE);
}
} else if (storeType.equals(CatalogProtos.StoreType.SEQUENCEFILE) ) {
options.put(StorageConstants.SEQUENCEFILE_DELIMITER, StringEscapeUtils.escapeJava(fieldDelimiter));
options.put(StorageConstants.SEQUENCEFILE_NULL, StringEscapeUtils.escapeJava(nullFormat));
String serde = properties.getProperty(serdeConstants.SERIALIZATION_LIB);
if (LazyBinarySerDe.class.getName().equals(serde)) {
options.put(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE);
} else if (LazySimpleSerDe.class.getName().equals(serde)) {
options.put(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE);
}
}
// set data size
long totalSize = 0;
if (properties.getProperty("totalSize") != null) {
totalSize = Long.parseLong(properties.getProperty("totalSize"));
} else {
try {
FileSystem fs = path.getFileSystem(conf);
if (fs.exists(path)) {
totalSize = fs.getContentSummary(path).getLength();
}
} catch (IOException ioe) {
throw new CatalogException("Fail to get path. - path:" + path.toString(), ioe);
}
}
stats.setNumBytes(totalSize);
}
// set partition keys
if (table.getPartitionKeys() != null) {
Schema expressionSchema = new Schema();
StringBuilder sb = new StringBuilder();
if (table.getPartitionKeys().size() > 0) {
List<FieldSchema> partitionKeys = table.getPartitionKeys();
for (int i = 0; i < partitionKeys.size(); i++) {
FieldSchema fieldSchema = partitionKeys.get(i);
TajoDataTypes.Type dataType = HCatalogUtil.getTajoFieldType(fieldSchema.getType().toString());
String fieldName = databaseName + CatalogConstants.IDENTIFIER_DELIMITER + tableName +
CatalogConstants.IDENTIFIER_DELIMITER + fieldSchema.getName();
expressionSchema.addColumn(new Column(fieldName, dataType));
if (i > 0) {
sb.append(",");
}
sb.append(fieldSchema.getName());
}
partitions = new PartitionMethodDesc(
databaseName,
tableName,
PartitionType.COLUMN,
sb.toString(),
expressionSchema);