Package org.apache.sqoop.tool

Source Code of org.apache.sqoop.tool.ImportTool

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.sqoop.tool;

import java.io.IOException;

import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.List;
import java.util.Map;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.util.StringUtils;

import com.cloudera.sqoop.Sqoop;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
import com.cloudera.sqoop.cli.RelatedOptions;
import com.cloudera.sqoop.cli.ToolOptions;
import com.cloudera.sqoop.hive.HiveImport;
import com.cloudera.sqoop.manager.ImportJobContext;

import com.cloudera.sqoop.metastore.JobData;
import com.cloudera.sqoop.metastore.JobStorage;
import com.cloudera.sqoop.metastore.JobStorageFactory;
import com.cloudera.sqoop.util.AppendUtils;
import com.cloudera.sqoop.util.ImportException;
import org.apache.hadoop.fs.Path;

/**
* Tool that performs database imports to HDFS.
*/
public class ImportTool extends com.cloudera.sqoop.tool.BaseSqoopTool {

  public static final Log LOG = LogFactory.getLog(ImportTool.class.getName());

  private CodeGenTool codeGenerator;

  // true if this is an all-tables import. Set by a subclass which
  // overrides the run() method of this tool (which can only do
  // a single table).
  private boolean allTables;

  // store check column type for incremental option
  private int checkColumnType;

  public ImportTool() {
    this("import", false);
  }

  public ImportTool(String toolName, boolean allTables) {
    super(toolName);
    this.codeGenerator = new CodeGenTool();
    this.allTables = allTables;
  }

  @Override
  protected boolean init(SqoopOptions sqoopOpts) {
    boolean ret = super.init(sqoopOpts);
    codeGenerator.setManager(manager);
    return ret;
  }

  /**
   * @return a list of jar files generated as part of this import process
   */
  public List<String> getGeneratedJarFiles() {
    return this.codeGenerator.getGeneratedJarFiles();
  }

  /**
   * @return true if the supplied options specify an incremental import.
   */
  private boolean isIncremental(SqoopOptions options) {
    return !options.getIncrementalMode().equals(
        SqoopOptions.IncrementalMode.None);
  }

  /**
   * If this is an incremental import, then we should save the
   * user's state back to the metastore (if this job was run
   * from the metastore). Otherwise, log to the user what data
   * they need to supply next time.
   */
  private void saveIncrementalState(SqoopOptions options)
      throws IOException {
    if (!isIncremental(options)) {
      return;
    }

    Map<String, String> descriptor = options.getStorageDescriptor();
    String jobName = options.getJobName();

    if (null != jobName && null != descriptor) {
      // Actually save it back to the metastore.
      LOG.info("Saving incremental import state to the metastore");
      JobStorageFactory ssf = new JobStorageFactory(options.getConf());
      JobStorage storage = ssf.getJobStorage(descriptor);
      storage.open(descriptor);
      try {
        // Save the 'parent' SqoopOptions; this does not contain the mutations
        // to the SqoopOptions state that occurred over the course of this
        // execution, except for the one we specifically want to memorize:
        // the latest value of the check column.
        JobData data = new JobData(options.getParent(), this);
        storage.update(jobName, data);
        LOG.info("Updated data for job: " + jobName);
      } finally {
        storage.close();
      }
    } else {
      // If there wasn't a parent SqoopOptions, then the incremental
      // state data was stored in the current SqoopOptions.
      LOG.info("Incremental import complete! To run another incremental "
          + "import of all data following this import, supply the "
          + "following arguments:");
      SqoopOptions.IncrementalMode incrementalMode =
          options.getIncrementalMode();
      switch (incrementalMode) {
      case AppendRows:
        LOG.info(" --incremental append");
        break;
      case DateLastModified:
        LOG.info(" --incremental lastmodified");
        break;
      default:
        LOG.warn("Undefined incremental mode: " + incrementalMode);
        break;
      }
      LOG.info("  --check-column " + options.getIncrementalTestColumn());
      LOG.info("  --last-value " + options.getIncrementalLastValue());
      LOG.info("(Consider saving this with 'sqoop job --create')");
    }
  }

  /**
   * Return the max value in the incremental-import test column. This
   * value must be numeric.
   */
  private Object getMaxColumnId(SqoopOptions options) throws SQLException {
    StringBuilder sb = new StringBuilder();
    sb.append("SELECT MAX(");
    sb.append(options.getIncrementalTestColumn());
    sb.append(") FROM ");
    sb.append(options.getTableName());

    String where = options.getWhereClause();
    if (null != where) {
      sb.append(" WHERE ");
      sb.append(where);
    }

    Connection conn = manager.getConnection();
    Statement s = null;
    ResultSet rs = null;
    try {
      s = conn.createStatement();
      rs = s.executeQuery(sb.toString());
      if (!rs.next()) {
        // This probably means the table is empty.
        LOG.warn("Unexpected: empty results for max value query?");
        return null;
      }

      ResultSetMetaData rsmd = rs.getMetaData();
      checkColumnType = rsmd.getColumnType(1);
      if (checkColumnType == Types.TIMESTAMP) {
        return rs.getTimestamp(1);
      } else if (checkColumnType == Types.DATE) {
        return rs.getDate(1);
      } else if (checkColumnType == Types.TIME) {
        return rs.getTime(1);
      } else {
        return rs.getObject(1);
      }
    } finally {
      try {
        if (null != rs) {
          rs.close();
        }
      } catch (SQLException sqlE) {
        LOG.warn("SQL Exception closing resultset: " + sqlE);
      }

      try {
        if (null != s) {
          s.close();
        }
      } catch (SQLException sqlE) {
        LOG.warn("SQL Exception closing statement: " + sqlE);
      }
    }
  }

  /**
   * Determine if a column is date/time.
   * @return true if column type is TIMESTAMP, DATE, or TIME.
   */
  private boolean isDateTimeColumn(int columnType) {
    return (columnType == Types.TIMESTAMP)
        || (columnType == Types.DATE)
        || (columnType == Types.TIME);
  }

  /**
   * Initialize the constraints which set the incremental import range.
   * @return false if an import is not necessary, because the dataset has not
   * changed.
   */
  private boolean initIncrementalConstraints(SqoopOptions options,
      ImportJobContext context) throws ImportException, IOException {

    // If this is an incremental import, determine the constraints
    // to inject in the WHERE clause or $CONDITIONS for a query.
    // Also modify the 'last value' field of the SqoopOptions to
    // specify the current job start time / start row.

    if (!isIncremental(options)) {
      return true;
    }

    SqoopOptions.IncrementalMode incrementalMode = options.getIncrementalMode();
    String nextIncrementalValue = null;

    Object nextVal;
    switch (incrementalMode) {
    case AppendRows:
      try {
        nextVal = getMaxColumnId(options);
        if (isDateTimeColumn(checkColumnType)) {
          nextIncrementalValue = (nextVal == null) ? null
            : manager.datetimeToQueryString(nextVal.toString(),
                                            checkColumnType);
        } else {
          nextIncrementalValue = (nextVal == null) ? null : nextVal.toString();
        }
      } catch (SQLException sqlE) {
        throw new IOException(sqlE);
      }
      break;
    case DateLastModified:
      checkColumnType = Types.TIMESTAMP;
      nextVal = manager.getCurrentDbTimestamp();
      if (null == nextVal) {
        throw new IOException("Could not get current time from database");
      }
      nextIncrementalValue = manager.datetimeToQueryString(nextVal.toString(),
          checkColumnType);
      break;
    default:
      throw new ImportException("Undefined incremental import type: "
          + incrementalMode);
    }

    // Build the WHERE clause components that are used to import
    // only this incremental section.
    StringBuilder sb = new StringBuilder();
    String prevEndpoint = options.getIncrementalLastValue();

    if (isDateTimeColumn(checkColumnType) && null != prevEndpoint
        && !prevEndpoint.startsWith("\'") && !prevEndpoint.endsWith("\'")) {
      // Incremental imports based on date/time should be 'quoted' in
      // ANSI SQL. If the user didn't specify single-quotes, put them
      // around, here.
      prevEndpoint = manager.datetimeToQueryString(prevEndpoint,
          checkColumnType);
    }

    String checkColName = manager.escapeColName(
        options.getIncrementalTestColumn());
    LOG.info("Incremental import based on column " + checkColName);
    if (null != prevEndpoint) {
      if (prevEndpoint.equals(nextIncrementalValue)) {
        LOG.info("No new rows detected since last import.");
        return false;
      }
      LOG.info("Lower bound value: " + prevEndpoint);
      sb.append(checkColName);
      switch (incrementalMode) {
      case AppendRows:
        sb.append(" > ");
        break;
      case DateLastModified:
        sb.append(" >= ");
        break;
      default:
        throw new ImportException("Undefined comparison");
      }
      sb.append(prevEndpoint);
      sb.append(" AND ");
    }

    if (null != nextIncrementalValue) {
      sb.append(checkColName);
      switch (incrementalMode) {
      case AppendRows:
        sb.append(" <= ");
        break;
      case DateLastModified:
        sb.append(" < ");
        break;
      default:
        throw new ImportException("Undefined comparison");
      }
      sb.append(nextIncrementalValue);
    } else {
      sb.append(checkColName);
      sb.append(" IS NULL ");
    }

    LOG.info("Upper bound value: " + nextIncrementalValue);

    String prevWhereClause = options.getWhereClause();
    if (null != prevWhereClause) {
      sb.append(" AND (");
      sb.append(prevWhereClause);
      sb.append(")");
    }

    String newConstraints = sb.toString();
    options.setWhereClause(newConstraints);

    // Save this state for next time.
    SqoopOptions recordOptions = options.getParent();
    if (null == recordOptions) {
      recordOptions = options;
    }
    recordOptions.setIncrementalLastValue(
        (nextVal == null) ? null : nextVal.toString());

    return true;
  }

  /**
   * Import a table or query.
   * @return true if an import was performed, false otherwise.
   */
  protected boolean importTable(SqoopOptions options, String tableName,
      HiveImport hiveImport) throws IOException, ImportException {
    String jarFile = null;

    // Generate the ORM code for the tables.
    jarFile = codeGenerator.generateORM(options, tableName);

    // Do the actual import.
    ImportJobContext context = new ImportJobContext(tableName, jarFile,
        options, getOutputPath(options, tableName));

    // If we're doing an incremental import, set up the
    // filtering conditions used to get the latest records.
    if (!initIncrementalConstraints(options, context)) {
      return false;
    }

    if (null != tableName) {
      manager.importTable(context);
    } else {
      manager.importQuery(context);
    }

    if (options.isAppendMode()) {
      AppendUtils app = new AppendUtils(context);
      app.append();
    }

    // If the user wants this table to be in Hive, perform that post-load.
    if (options.doHiveImport()) {
      hiveImport.importTable(tableName, options.getHiveTableName(), false);
    }

    saveIncrementalState(options);

    return true;
  }

  /**
   * @return the output path for the imported files;
   * in append mode this will point to a temporary folder.
   * if importing to hbase, this may return null.
   */
  private Path getOutputPath(SqoopOptions options, String tableName) {
    // Get output directory
    String hdfsWarehouseDir = options.getWarehouseDir();
    String hdfsTargetDir = options.getTargetDir();
    Path outputPath = null;
    if (options.isAppendMode()) {
      // Use temporary path, later removed when appending
      outputPath = AppendUtils.getTempAppendDir(tableName);
      LOG.debug("Using temporary folder: " + outputPath.getName());
    } else {
      // Try in this order: target-dir or warehouse-dir
      if (hdfsTargetDir != null) {
        outputPath = new Path(hdfsTargetDir);
      } else if (hdfsWarehouseDir != null) {
        outputPath = new Path(hdfsWarehouseDir, tableName);
      } else if (null != tableName) {
        outputPath = new Path(tableName);
      }
    }

    return outputPath;
  }

  @Override
  /** {@inheritDoc} */
  public int run(SqoopOptions options) {
    HiveImport hiveImport = null;

    if (allTables) {
      // We got into this method, but we should be in a subclass.
      // (This method only handles a single table)
      // This should not be reached, but for sanity's sake, test here.
      LOG.error("ImportTool.run() can only handle a single table.");
      return 1;
    }

    if (!init(options)) {
      return 1;
    }

    codeGenerator.setManager(manager);

    try {
      if (options.doHiveImport()) {
        hiveImport = new HiveImport(options, manager, options.getConf(), false);
      }

      // Import a single table (or query) the user specified.
      importTable(options, options.getTableName(), hiveImport);
    } catch (IllegalArgumentException iea) {
        LOG.error("Imported Failed: " + iea.getMessage());
        if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) {
          throw iea;
        }
        return 1;
    } catch (IOException ioe) {
      LOG.error("Encountered IOException running import job: "
          + StringUtils.stringifyException(ioe));
      if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) {
        throw new RuntimeException(ioe);
      } else {
        return 1;
      }
    } catch (ImportException ie) {
      LOG.error("Error during import: " + ie.toString());
      if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) {
        throw new RuntimeException(ie);
      } else {
        return 1;
      }
    } finally {
      destroy(options);
    }

    return 0;
  }

  /**
   * Construct the set of options that control imports, either of one
   * table or a batch of tables.
   * @return the RelatedOptions that can be used to parse the import
   * arguments.
   */
  protected RelatedOptions getImportOptions() {
    // Imports
    RelatedOptions importOpts = new RelatedOptions("Import control arguments");

    importOpts.addOption(OptionBuilder
        .withDescription("Use direct import fast path")
        .withLongOpt(DIRECT_ARG)
        .create());

    if (!allTables) {
      importOpts.addOption(OptionBuilder.withArgName("table-name")
          .hasArg().withDescription("Table to read")
          .withLongOpt(TABLE_ARG)
          .create());
      importOpts.addOption(OptionBuilder.withArgName("col,col,col...")
          .hasArg().withDescription("Columns to import from table")
          .withLongOpt(COLUMNS_ARG)
          .create());
      importOpts.addOption(OptionBuilder.withArgName("column-name")
          .hasArg()
          .withDescription("Column of the table used to split work units")
          .withLongOpt(SPLIT_BY_ARG)
          .create());
      importOpts.addOption(OptionBuilder.withArgName("where clause")
          .hasArg().withDescription("WHERE clause to use during import")
          .withLongOpt(WHERE_ARG)
          .create());
      importOpts.addOption(OptionBuilder
          .withDescription("Imports data in append mode")
          .withLongOpt(APPEND_ARG)
          .create());
      importOpts.addOption(OptionBuilder.withArgName("dir")
          .hasArg().withDescription("HDFS plain table destination")
          .withLongOpt(TARGET_DIR_ARG)
          .create());
      importOpts.addOption(OptionBuilder.withArgName("statement")
          .hasArg()
          .withDescription("Import results of SQL 'statement'")
          .withLongOpt(SQL_QUERY_ARG)
          .create(SQL_QUERY_SHORT_ARG));
      importOpts.addOption(OptionBuilder.withArgName("statement")
          .hasArg()
          .withDescription("Set boundary query for retrieving max and min"
              + " value of the primary key")
          .withLongOpt(SQL_QUERY_BOUNDARY)
          .create());
    }

    importOpts.addOption(OptionBuilder.withArgName("dir")
        .hasArg().withDescription("HDFS parent for table destination")
        .withLongOpt(WAREHOUSE_DIR_ARG)
        .create());
    importOpts.addOption(OptionBuilder
        .withDescription("Imports data to SequenceFiles")
        .withLongOpt(FMT_SEQUENCEFILE_ARG)
        .create());
    importOpts.addOption(OptionBuilder
        .withDescription("Imports data as plain text (default)")
        .withLongOpt(FMT_TEXTFILE_ARG)
        .create());
    importOpts.addOption(OptionBuilder
        .withDescription("Imports data to Avro data files")
        .withLongOpt(FMT_AVRODATAFILE_ARG)
        .create());
    importOpts.addOption(OptionBuilder.withArgName("n")
        .hasArg().withDescription("Use 'n' map tasks to import in parallel")
        .withLongOpt(NUM_MAPPERS_ARG)
        .create(NUM_MAPPERS_SHORT_ARG));
    importOpts.addOption(OptionBuilder
        .withDescription("Enable compression")
        .withLongOpt(COMPRESS_ARG)
        .create(COMPRESS_SHORT_ARG));
    importOpts.addOption(OptionBuilder.withArgName("codec")
        .hasArg()
        .withDescription("Compression codec to use for import")
        .withLongOpt(COMPRESSION_CODEC_ARG)
        .create());
    importOpts.addOption(OptionBuilder.withArgName("n")
        .hasArg()
        .withDescription("Split the input stream every 'n' bytes "
        + "when importing in direct mode")
        .withLongOpt(DIRECT_SPLIT_SIZE_ARG)
        .create());
    importOpts.addOption(OptionBuilder.withArgName("n")
        .hasArg()
        .withDescription("Set the maximum size for an inline LOB")
        .withLongOpt(INLINE_LOB_LIMIT_ARG)
        .create());
    importOpts.addOption(OptionBuilder.withArgName("n")
        .hasArg()
        .withDescription("Set number 'n' of rows to fetch from the "
        + "database when more rows are needed")
        .withLongOpt(FETCH_SIZE_ARG)
        .create());

    return importOpts;
  }

  /**
   * Return options for incremental import.
   */
  protected RelatedOptions getIncrementalOptions() {
    RelatedOptions incrementalOpts =
        new RelatedOptions("Incremental import arguments");

    incrementalOpts.addOption(OptionBuilder.withArgName("import-type")
        .hasArg()
        .withDescription(
        "Define an incremental import of type 'append' or 'lastmodified'")
        .withLongOpt(INCREMENT_TYPE_ARG)
        .create());
    incrementalOpts.addOption(OptionBuilder.withArgName("column")
        .hasArg()
        .withDescription("Source column to check for incremental change")
        .withLongOpt(INCREMENT_COL_ARG)
        .create());
    incrementalOpts.addOption(OptionBuilder.withArgName("value")
        .hasArg()
        .withDescription("Last imported value in the incremental check column")
        .withLongOpt(INCREMENT_LAST_VAL_ARG)
        .create());

    return incrementalOpts;
  }

  @Override
  /** Configure the command-line arguments we expect to receive */
  public void configureOptions(ToolOptions toolOptions) {

    toolOptions.addUniqueOptions(getCommonOptions());
    toolOptions.addUniqueOptions(getImportOptions());
    if (!allTables) {
      toolOptions.addUniqueOptions(getIncrementalOptions());
    }
    toolOptions.addUniqueOptions(getOutputFormatOptions());
    toolOptions.addUniqueOptions(getInputFormatOptions());
    toolOptions.addUniqueOptions(getHiveOptions(true));
    toolOptions.addUniqueOptions(getHBaseOptions());

    // get common codegen opts.
    RelatedOptions codeGenOpts = getCodeGenOpts(allTables);

    // add import-specific codegen opts:
    codeGenOpts.addOption(OptionBuilder.withArgName("file")
        .hasArg()
        .withDescription("Disable code generation; use specified jar")
        .withLongOpt(JAR_FILE_NAME_ARG)
        .create());

    toolOptions.addUniqueOptions(codeGenOpts);
  }

  @Override
  /** {@inheritDoc} */
  public void printHelp(ToolOptions toolOptions) {
    super.printHelp(toolOptions);
    System.out.println("");
    if (allTables) {
      System.out.println("At minimum, you must specify --connect");
    } else {
      System.out.println(
          "At minimum, you must specify --connect and --table");
    }

    System.out.println(
        "Arguments to mysqldump and other subprograms may be supplied");
    System.out.println(
        "after a '--' on the command line.");
  }

  private void applyIncrementalOptions(CommandLine in, SqoopOptions out)
      throws InvalidOptionsException  {
    if (in.hasOption(INCREMENT_TYPE_ARG)) {
      String incrementalTypeStr = in.getOptionValue(INCREMENT_TYPE_ARG);
      if ("append".equals(incrementalTypeStr)) {
        out.setIncrementalMode(SqoopOptions.IncrementalMode.AppendRows);
        // This argument implies ability to append to the same directory.
        out.setAppendMode(true);
      } else if ("lastmodified".equals(incrementalTypeStr)) {
        out.setIncrementalMode(SqoopOptions.IncrementalMode.DateLastModified);
      } else {
        throw new InvalidOptionsException("Unknown incremental import mode: "
            + incrementalTypeStr + ". Use 'append' or 'lastmodified'."
            + HELP_STR);
      }
    }

    if (in.hasOption(INCREMENT_COL_ARG)) {
      out.setIncrementalTestColumn(in.getOptionValue(INCREMENT_COL_ARG));
    }

    if (in.hasOption(INCREMENT_LAST_VAL_ARG)) {
      out.setIncrementalLastValue(in.getOptionValue(INCREMENT_LAST_VAL_ARG));
    }
  }

  @Override
  /** {@inheritDoc} */
  public void applyOptions(CommandLine in, SqoopOptions out)
      throws InvalidOptionsException {

    try {
      applyCommonOptions(in, out);

      if (in.hasOption(DIRECT_ARG)) {
        out.setDirectMode(true);
      }

      if (!allTables) {
        if (in.hasOption(TABLE_ARG)) {
          out.setTableName(in.getOptionValue(TABLE_ARG));
        }

        if (in.hasOption(COLUMNS_ARG)) {
          String[] cols= in.getOptionValue(COLUMNS_ARG).split(",");
          for (int i=0; i<cols.length; i++) {
            cols[i] = cols[i].trim();
          }
          out.setColumns(cols);
        }

        if (in.hasOption(SPLIT_BY_ARG)) {
          out.setSplitByCol(in.getOptionValue(SPLIT_BY_ARG));
        }

        if (in.hasOption(WHERE_ARG)) {
          out.setWhereClause(in.getOptionValue(WHERE_ARG));
        }

        if (in.hasOption(TARGET_DIR_ARG)) {
          out.setTargetDir(in.getOptionValue(TARGET_DIR_ARG));
        }

        if (in.hasOption(APPEND_ARG)) {
          out.setAppendMode(true);
        }

        if (in.hasOption(SQL_QUERY_ARG)) {
          out.setSqlQuery(in.getOptionValue(SQL_QUERY_ARG));
        }

        if (in.hasOption(SQL_QUERY_BOUNDARY)) {
          out.setBoundaryQuery(in.getOptionValue(SQL_QUERY_BOUNDARY));
        }
      }

      if (in.hasOption(WAREHOUSE_DIR_ARG)) {
        out.setWarehouseDir(in.getOptionValue(WAREHOUSE_DIR_ARG));
      }

      if (in.hasOption(FMT_SEQUENCEFILE_ARG)) {
        out.setFileLayout(SqoopOptions.FileLayout.SequenceFile);
      }

      if (in.hasOption(FMT_TEXTFILE_ARG)) {
        out.setFileLayout(SqoopOptions.FileLayout.TextFile);
      }

      if (in.hasOption(FMT_AVRODATAFILE_ARG)) {
        out.setFileLayout(SqoopOptions.FileLayout.AvroDataFile);
      }

      if (in.hasOption(NUM_MAPPERS_ARG)) {
        out.setNumMappers(Integer.parseInt(in.getOptionValue(NUM_MAPPERS_ARG)));
      }

      if (in.hasOption(COMPRESS_ARG)) {
        out.setUseCompression(true);
      }

      if (in.hasOption(COMPRESSION_CODEC_ARG)) {
        out.setCompressionCodec(in.getOptionValue(COMPRESSION_CODEC_ARG));
      }

      if (in.hasOption(DIRECT_SPLIT_SIZE_ARG)) {
        out.setDirectSplitSize(Long.parseLong(in.getOptionValue(
            DIRECT_SPLIT_SIZE_ARG)));
      }

      if (in.hasOption(INLINE_LOB_LIMIT_ARG)) {
        out.setInlineLobLimit(Long.parseLong(in.getOptionValue(
            INLINE_LOB_LIMIT_ARG)));
      }

      if (in.hasOption(FETCH_SIZE_ARG)) {
        out.setFetchSize(new Integer(in.getOptionValue(FETCH_SIZE_ARG)));
      }

      if (in.hasOption(JAR_FILE_NAME_ARG)) {
        out.setExistingJarName(in.getOptionValue(JAR_FILE_NAME_ARG));
      }

      applyIncrementalOptions(in, out);
      applyHiveOptions(in, out);
      applyOutputFormatOptions(in, out);
      applyInputFormatOptions(in, out);
      applyCodeGenOptions(in, out, allTables);
      applyHBaseOptions(in, out);
    } catch (NumberFormatException nfe) {
      throw new InvalidOptionsException("Error: expected numeric argument.\n"
          + "Try --help for usage.");
    }
  }

  /**
   * Validate import-specific arguments.
   * @param options the configured SqoopOptions to check
   */
  protected void validateImportOptions(SqoopOptions options)
      throws InvalidOptionsException {
    if (!allTables && options.getTableName() == null
        && options.getSqlQuery() == null) {
      throw new InvalidOptionsException(
          "--table or --" + SQL_QUERY_ARG + " is required for import. "
          + "(Or use sqoop import-all-tables.)"
          + HELP_STR);
    } else if (options.getExistingJarName() != null
        && options.getClassName() == null) {
      throw new InvalidOptionsException("Jar specified with --jar-file, but no "
          + "class specified with --class-name." + HELP_STR);
    } else if (options.getTargetDir() != null
        && options.getWarehouseDir() != null) {
      throw new InvalidOptionsException(
          "--target-dir with --warehouse-dir are incompatible options."
          + HELP_STR);
    } else if (options.getTableName() != null
        && options.getSqlQuery() != null) {
      throw new InvalidOptionsException(
          "Cannot specify --" + SQL_QUERY_ARG + " and --table together."
          + HELP_STR);
    } else if (options.getSqlQuery() != null
        && options.getTargetDir() == null && options.getHBaseTable() == null) {
      throw new InvalidOptionsException(
          "Must specify destination with --target-dir."
          + HELP_STR);
    } else if (options.getSqlQuery() != null && options.doHiveImport()
        && options.getHiveTableName() == null) {
      throw new InvalidOptionsException(
          "When importing a query to Hive, you must specify --"
          + HIVE_TABLE_ARG + "." + HELP_STR);
    } else if (options.getSqlQuery() != null && options.getNumMappers() > 1
        && options.getSplitByCol() == null) {
      throw new InvalidOptionsException(
          "When importing query results in parallel, you must specify --"
          + SPLIT_BY_ARG + "." + HELP_STR);
    } else if (options.isDirect()
            && options.getFileLayout() != SqoopOptions.FileLayout.TextFile
            && options.getConnectString().contains("jdbc:mysql://")) {
      throw new InvalidOptionsException(
            "MySQL direct import currently supports only text output format."
             + "Parameters --as-sequencefile and --as-avrodatafile are not "
             + "supported with --direct params in MySQL case.");
    } else if (options.isDirect()
            && options.doHiveDropDelims()) {
      throw new InvalidOptionsException(
            "Direct import currently do not support dropping hive delimiters,"
            + " please remove parameter --hive-drop-import-delims.");
    }
  }

  /**
   * Validate the incremental import options.
   */
  private void validateIncrementalOptions(SqoopOptions options)
      throws InvalidOptionsException {
    if (options.getIncrementalMode() != SqoopOptions.IncrementalMode.None
        && options.getIncrementalTestColumn() == null) {
      throw new InvalidOptionsException(
          "For an incremental import, the check column must be specified "
          + "with --" + INCREMENT_COL_ARG + ". " + HELP_STR);
    }

    if (options.getIncrementalMode() == SqoopOptions.IncrementalMode.None
        && options.getIncrementalTestColumn() != null) {
      throw new InvalidOptionsException(
          "You must specify an incremental import mode with --"
          + INCREMENT_TYPE_ARG + ". " + HELP_STR);
    }

    if (options.getIncrementalMode() != SqoopOptions.IncrementalMode.None
        && options.getTableName() == null) {
      throw new InvalidOptionsException("Incremental imports require a table."
          + HELP_STR);
    }
  }

  @Override
  /** {@inheritDoc} */
  public void validateOptions(SqoopOptions options)
      throws InvalidOptionsException {

    // If extraArguments is full, check for '--' followed by args for
    // mysqldump or other commands we rely on.
    options.setExtraArgs(getSubcommandArgs(extraArguments));
    int dashPos = getDashPosition(extraArguments);

    if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) {
      throw new InvalidOptionsException(HELP_STR);
    }

    validateImportOptions(options);
    validateIncrementalOptions(options);
    validateCommonOptions(options);
    validateCodeGenOptions(options);
    validateOutputFormatOptions(options);
    validateHBaseOptions(options);
    validateHiveOptions(options);
  }
}
TOP

Related Classes of org.apache.sqoop.tool.ImportTool

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.