Package org.apache.hadoop.hive.ql.metadata

Source Code of org.apache.hadoop.hive.ql.metadata.Table

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.metadata;

import java.io.IOException;
import java.net.URI;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Vector;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.util.StringUtils;


/**
* A Hive Table: is a fundamental unit of data in Hive that shares a common schema/DDL
*/
public class Table {

  static final private Log LOG = LogFactory.getLog("hive.ql.metadata.Table");

  private Properties schema;
  private Deserializer deserializer;
  private URI uri;
  private Class<? extends InputFormat> inputFormatClass;
  private Class<? extends OutputFormat> outputFormatClass;
  private org.apache.hadoop.hive.metastore.api.Table tTable;

  /**
   * Table (only used internally)
   * @throws HiveException
   *
   */
  protected Table() throws HiveException {
  }

  /**
   * Table
   *
   * Create a TableMetaInfo object presumably with the intent of saving it to the metastore
   *
   * @param name the name of this table in the metadb
   * @param schema an object that represents the schema that this SerDe must know
   * @param serDe a Class to be used for serializing and deserializing the data
   * @param dataLocation where is the table ? (e.g., dfs://hadoop001.sf2p.facebook.com:9000/user/facebook/warehouse/example) NOTE: should not be hardcoding this, but ok for now
   *
   * @exception HiveException on internal error. Note not possible now, but in the future reserve the right to throw an exception
   */
  public Table(String name, Properties schema, Deserializer deserializer,
      Class<? extends InputFormat<?, ?>> inputFormatClass,
      Class<? extends OutputFormat<?, ?>> outputFormatClass,
      URI dataLocation, Hive hive) throws HiveException {
    initEmpty();
    this.schema = schema;
    this.deserializer = deserializer; //TODO: convert to SerDeInfo format
    this.getTTable().getSd().getSerdeInfo().setSerializationLib(deserializer.getClass().getName());
    getTTable().setTableName(name);
    getSerdeInfo().setSerializationLib(deserializer.getClass().getName());
    setInputFormatClass(inputFormatClass);
    setOutputFormatClass(outputFormatClass);
    setDataLocation(dataLocation);
  }
 
  public Table(String name) {
    // fill in defaults
    initEmpty();
    getTTable().setTableName(name);
    getTTable().setDbName(MetaStoreUtils.DEFAULT_DATABASE_NAME);
    getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName());
    getSerdeInfo().getParameters().put(Constants.SERIALIZATION_FORMAT, "1");
  }
 
  void initEmpty() {
    setTTable(new org.apache.hadoop.hive.metastore.api.Table());
    getTTable().setSd(new StorageDescriptor());
    getTTable().setPartitionKeys(new ArrayList<FieldSchema>());
    getTTable().setParameters(new HashMap<String, String>());

    StorageDescriptor sd = getTTable().getSd();
    sd.setSerdeInfo(new SerDeInfo());
    sd.setNumBuckets(-1);
    sd.setBucketCols(new ArrayList<String>());
    sd.setCols(new ArrayList<FieldSchema>());
    sd.setParameters(new HashMap<String, String>());
    sd.setSortCols(new ArrayList<Order>());
   
    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
  }
 
  protected void initSerDe() throws HiveException {
    if (deserializer == null) {
      try {
        deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), this.getTTable());
      } catch (MetaException e) {
        throw new HiveException(e);
      }
    }
  }

  public void checkValidity() throws HiveException {
    // check for validity
    String name = getTTable().getTableName();
    if (null == name || name.length() == 0 || !MetaStoreUtils.validateName(name)) {
      throw new HiveException("[" + name + "]: is not a valid table name");
    }
    if (null == getDeserializer()) {
      throw new HiveException("must specify a non-null serDe");
    }
    if (null == getInputFormatClass()) {
      throw new HiveException("must specify an InputFormat class");
    }
    if (null == getOutputFormatClass()) {
      throw new HiveException("must specify an OutputFormat class");
    }
    return;
  }

  /**
   * @param inputFormatClass
   */
  public void setInputFormatClass(Class<? extends InputFormat> inputFormatClass) {
    this.inputFormatClass = inputFormatClass;
    tTable.getSd().setInputFormat(inputFormatClass.getName());
  }

  /**
   * @param outputFormatClass
   */
  public void setOutputFormatClass(Class<? extends OutputFormat> outputFormatClass) {
    this.outputFormatClass = outputFormatClass;
    tTable.getSd().setOutputFormat(outputFormatClass.getName());
  }

  final public Properties getSchema()  {
    return schema;
  }

  final public Path getPath() {
    return new Path(getTTable().getSd().getLocation());
  }

  final public String getName() {
    return getTTable().getTableName();
  }

  final public URI getDataLocation() {
    return uri;
  }

  final public Deserializer getDeserializer() {
    return deserializer;
  }

  final public Class<? extends InputFormat> getInputFormatClass() {
    return inputFormatClass;
  }

  final public Class<? extends OutputFormat> getOutputFormatClass() {
    return outputFormatClass;
  }

  final public boolean isValidSpec(AbstractMap<String, String> spec) throws HiveException {

    // TODO - types need to be checked.
    List<FieldSchema> partCols = getTTable().getPartitionKeys();
    if(partCols== null || (partCols.size() == 0)) {
      if (spec != null)
        throw new HiveException("table is not partitioned but partition spec exists: " + spec);
      else
        return true;
    }
   
    if((spec == null) || (spec.size() != partCols.size())) {
      throw new HiveException("table is partitioned but partition spec is not specified or tab: " + spec);
    }
   
    for (FieldSchema field : partCols) {
      if(spec.get(field.getName()) == null) {
        throw new HiveException(field.getName() + " not found in table's partition spec: " + spec);
      }
    }

    return true;
  }
 
  public void setProperty(String name, String value) {
    getTTable().getParameters().put(name, value);
  }

  /**
   * getProperty
   *
   */
  public String getProperty(String name) {
    return getTTable().getParameters().get(name);
  }

  public Vector<StructField> getFields() {

    Vector<StructField> fields = new Vector<StructField> ();
    try {
      Deserializer decoder = getDeserializer();

      // Expand out all the columns of the table
      StructObjectInspector structObjectInspector = (StructObjectInspector)decoder.getObjectInspector();
      List<? extends StructField> fld_lst = structObjectInspector.getAllStructFieldRefs();
      for(StructField field: fld_lst) {
        fields.add(field);
      }
    } catch (SerDeException e) {
      throw new RuntimeException(e);
    }
    return fields;
  }

  public StructField getField(String fld) {
    try {
      StructObjectInspector structObjectInspector = (StructObjectInspector)getDeserializer().getObjectInspector();
      return structObjectInspector.getStructFieldRef(fld);
    }
    catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
 
  /**
   * @param schema the schema to set
   */
  public void setSchema(Properties schema) {
    this.schema = schema;
  }

  /**
   * @param serDe the serDe to set
   */
  public void setDeserializer(Deserializer deserializer) {
    this.deserializer = deserializer;
  }

  public String toString() {
    return getTTable().getTableName();
  }

  public List<FieldSchema> getPartCols() {
    List<FieldSchema> partKeys = getTTable().getPartitionKeys();
    if(partKeys == null) {
      partKeys = new ArrayList<FieldSchema>();
      getTTable().setPartitionKeys(partKeys);
    }
    return partKeys;
  }
 
  public boolean isPartitionKey(String colName) {
    for (FieldSchema key : getPartCols()) {
      if(key.getName().toLowerCase().equals(colName)) {
        return true;
      }
    }
    return false;
  }

  //TODO merge this with getBucketCols function
  public String getBucketingDimensionId() {
    List<String> bcols = getTTable().getSd().getBucketCols();
    if(bcols == null || bcols.size() == 0) {
      return null;
    }
   
    if(bcols.size() > 1) {
      LOG.warn(this + " table has more than one dimensions which aren't supported yet");
    }
   
    return bcols.get(0);
  }

  /**
   * @return the tTable
   */
  public org.apache.hadoop.hive.metastore.api.Table getTTable() {
    return tTable;
  }

  /**
   * @param table the tTable to set
   */
  protected void setTTable(org.apache.hadoop.hive.metastore.api.Table table) {
    tTable = table;
  }

  public void setDataLocation(URI uri2) {
    uri = uri2;
    getTTable().getSd().setLocation(uri2.toString());
  }

  public void setBucketCols(List<String> bucketCols) throws HiveException {
    if (bucketCols == null) {
      return;
    }

    for (String col : bucketCols) {
      if(!isField(col))
        throw new HiveException("Bucket columns " + col + " is not part of the table columns" );
    }
    getTTable().getSd().setBucketCols(bucketCols);
  }

  public void setSortCols(List<Order> sortOrder) throws HiveException {
    getTTable().getSd().setSortCols(sortOrder);
  }

  private boolean isField(String col) {
    for (FieldSchema field : getCols()) {
      if(field.getName().equals(col)) {
        return true;
      }
    }
    return false;
  }

  public List<FieldSchema> getCols() {
    return getTTable().getSd().getCols();
  }

  public void setPartCols(List<FieldSchema> partCols) {
    getTTable().setPartitionKeys(partCols);
  }

  public String getDbName() {
    return getTTable().getDbName();
  }

  public int getNumBuckets() {
    return getTTable().getSd().getNumBuckets();
  }
 
  /**
   * Replaces files in the partition with new data set specified by srcf. Works by moving files
   * @param srcf Files to be replaced. Leaf directories or globbed file paths
   */
  protected void replaceFiles(Path srcf) throws HiveException {
    FileSystem fs;
    try {
      fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
      Hive.get().replaceFiles(srcf, new Path(getDataLocation().getPath()), fs);
    } catch (IOException e) {
      throw new HiveException("addFiles: filesystem error in check phase", e);
    }
  }

  /**
   * Inserts files specified into the partition. Works by moving files
   * @param srcf Files to be moved. Leaf directories or globbed file paths
   */
  protected void copyFiles(Path srcf) throws HiveException {
    FileSystem fs;
    try {
      fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
      Hive.get().copyFiles(srcf, new Path(getDataLocation().getPath()), fs);
    } catch (IOException e) {
      throw new HiveException("addFiles: filesystem error in check phase", e);
    }
  }

  public void setInputFormatClass(String name) throws HiveException {
    try {
      setInputFormatClass((Class<? extends InputFormat<WritableComparable, Writable>>)Class.forName(name));
    } catch (ClassNotFoundException e) {
      throw new HiveException("Class not found: " + name, e);
    }
  }

  public void setOutputFormatClass(String name) throws HiveException {
    try {
      setOutputFormatClass((Class<? extends OutputFormat<WritableComparable, Writable>>)Class.forName(name));
    } catch (ClassNotFoundException e) {
      throw new HiveException("Class not found: " + name, e);
    }
  }

 
  public boolean isPartitioned() {
    if(getPartCols() == null) {
      return false;
    }
    return (getPartCols().size() != 0);
  }

  public void setFields(List<FieldSchema> fields) {
    getTTable().getSd().setCols(fields);
  }

  public void setNumBuckets(int nb) {
    getTTable().getSd().setNumBuckets(nb);
  }

  /**
   * @return
   * @see org.apache.hadoop.hive.metastore.api.Table#getOwner()
   */
  public String getOwner() {
    return tTable.getOwner();
  }

  /**
   * @return
   * @see org.apache.hadoop.hive.metastore.api.Table#getParameters()
   */
  public Map<String, String> getParameters() {
    return tTable.getParameters();
  }

  /**
   * @return
   * @see org.apache.hadoop.hive.metastore.api.Table#getRetention()
   */
  public int getRetention() {
    return tTable.getRetention();
  }

  /**
   * @param owner
   * @see org.apache.hadoop.hive.metastore.api.Table#setOwner(java.lang.String)
   */
  public void setOwner(String owner) {
    tTable.setOwner(owner);
  }

  /**
   * @param retention
   * @see org.apache.hadoop.hive.metastore.api.Table#setRetention(int)
   */
  public void setRetention(int retention) {
    tTable.setRetention(retention);
  }

  private SerDeInfo getSerdeInfo() {
    return getTTable().getSd().getSerdeInfo();
  }

  public void setSerializationLib(String lib) {
    getSerdeInfo().setSerializationLib(lib);
  }

  public String getSerializationLib() {
    return getSerdeInfo().getSerializationLib();
  }
 
  public String getSerdeParam(String param) {
    return getSerdeInfo().getParameters().get(param);
  }
 
  public String setSerdeParam(String param, String value) {
    return getSerdeInfo().getParameters().put(param, value);
  }

  public List<String> getBucketCols() {
    return getTTable().getSd().getBucketCols();
  }

  public List<Order> getSortCols() {
    return getTTable().getSd().getSortCols();
  }

  private static void getPartPaths(FileSystem fs, Path p, Vector<String> partPaths) throws IOException {
    // Base case for recursion
    if (fs.isFile(p)) {
      if (!partPaths.contains(p.getParent().toString())) {
        partPaths.add(p.getParent().toString());
      }
    }
    else {
      FileStatus [] dirs = fs.listStatus(p);

      if (dirs.length != 0 ) {
        for(int i=0; i < dirs.length; ++i) {
          getPartPaths(fs, dirs[i].getPath(), partPaths);
        }
      }
      else {
        // This is an empty partition
        if (!partPaths.contains(p.toString())) {
          partPaths.add(p.toString());
        }
      }
    }

    return;
  }

  static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
  public List<Partition> getPartitionsFromHDFS() throws HiveException {
    ArrayList<Partition> ret = new ArrayList<Partition> ();
    FileSystem fs = null;
    Vector<String> partPaths = new Vector<String>();

    try {
      fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
      getPartPaths(fs, new Path(getDataLocation().getPath()), partPaths);
      for(String partPath: partPaths) {
        Path tmpPath = new Path(partPath);
        if(!fs.getFileStatus(tmpPath).isDir()) {
          throw new HiveException("Data in hdfs is messed up. Table " + getName() + " has a partition " + partPath + " that is not a directory");
        }
        ret.add(new Partition(this, tmpPath));
      }
    } catch (IOException e) {
      LOG.error(StringUtils.stringifyException(e));
      throw new HiveException("DB Error: Table " + getDataLocation() + " message: " + e.getMessage());
    }

    return ret;
  }
 
};
TOP

Related Classes of org.apache.hadoop.hive.ql.metadata.Table

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.