Package com.dp.nebula.wormhole.plugins.writer.hdfswriter

Source Code of com.dp.nebula.wormhole.plugins.writer.hdfswriter.HdfsWriterPeriphery

package com.dp.nebula.wormhole.plugins.writer.hdfswriter;

import java.io.IOException;
import java.net.URI;
import java.text.MessageFormat;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.log4j.Logger;

import com.dp.nebula.wormhole.common.JobStatus;
import com.dp.nebula.wormhole.common.WormholeException;
import com.dp.nebula.wormhole.common.interfaces.IParam;
import com.dp.nebula.wormhole.common.interfaces.ISourceCounter;
import com.dp.nebula.wormhole.common.interfaces.ITargetCounter;
import com.dp.nebula.wormhole.common.interfaces.IWriterPeriphery;
import com.dp.nebula.wormhole.plugins.common.DFSUtils;
import com.hadoop.compression.lzo.LzoIndexer;

public class HdfsWriterPeriphery implements IWriterPeriphery {

  private static final Logger logger = Logger
      .getLogger(HdfsWriterPeriphery.class.getName());

  private static final int HIVE_TABLE_ADD_PARTITION_PARAM_NUMBER = 2;
  private static final String HIDDEN_FILE_PREFIX = "_";
  private static final int MAX_LZO_CREATION_TRY_TIMES = 3;
  private static final long LZO_CREATION_TRY_INTERVAL_IN_MILLIS = 10000L;

  private String dir = "";
  private String prefixname = "";
  private int concurrency = 1;
  private String codecClass = "";
  private String fileType = "TXT";
  private String suffix = "";
  private boolean lzoCompressed = false;
  private String hiveTableAddPartitionOrNot = "false";
  private String hiveTableAddPartitionCondition = "";

  private final String ADD_PARTITION_SQL = "alter table {0} add if not exists partition({1}) location ''{2}'';";

  private FileSystem fs;
  private LzoIndexer lzoIndexer;

  @Override
  public void rollback(IParam param) {
    deleteFilesOnHdfs(this.dir, this.prefixname, suffix);
  }

  @Override
  public void doPost(IParam param, ITargetCounter counter) {
    deleteFilesOnHdfs(this.dir, this.prefixname, suffix);
    // rename temp files, make them visible to hdfs
    renameFiles();

    if (lzoCompressed
        && "true".equalsIgnoreCase(param.getValue(
            ParamKey.createLzoIndexFile, "true").trim())) {
      createLzoIndex(dir);
    }

    /* add hive table partition if necessary */
    hiveTableAddPartitionOrNot = param.getValue(
        ParamKey.hiveTableAddPartitionSwitch, "false").trim();
    if (hiveTableAddPartitionOrNot.equalsIgnoreCase("true")) {
      hiveTableAddPartitionCondition = param.getValue(
          ParamKey.hiveTableAddPartitionCondition,
          this.hiveTableAddPartitionCondition);

      if (!StringUtils.isBlank(hiveTableAddPartitionCondition)) {
        String[] hqlParam = StringUtils.split(
            hiveTableAddPartitionCondition, '@');
        if (HIVE_TABLE_ADD_PARTITION_PARAM_NUMBER == hqlParam.length) {
          String parCondition = hqlParam[0].trim().replace('"', '\'');
          String uri = hqlParam[1].trim();
          // split dbname and tablename
          String[] parts = StringUtils.split(uri, '.');

          if (StringUtils.isBlank(parCondition)
              || StringUtils.isBlank(uri) || parts.length != 2
              || StringUtils.isBlank(parts[0])
              || StringUtils.isBlank(parts[1])) {
            logger.error(ParamKey.hiveTableAddPartitionSwitch
                + " param can not be parsed correctly, please check it again");
            return;
          }

          try {
            addHiveTablePartition(parts[0].trim(), parts[1].trim(),
                parCondition, dir);
          } catch (IOException e) {
            logger.error("add hive table partition failed:"
                + e.getMessage());
          }
        }
      }
    }
    return;
  }

  private void renameFiles() {
    try {
      String tempPrefixnamex = HIDDEN_FILE_PREFIX + prefixname;

      fs = DFSUtils.createFileSystem(new URI(dir),
          DFSUtils.getConf(dir, null));
      for (FileStatus status : fs.listStatus(new Path(dir))) {
        if (!status.isDir()) {
          String fileName = status.getPath().getName();
          if (fileName.startsWith(tempPrefixnamex)) {
            String parentPathName = status.getPath().getParent()
                .toString();
            Path absoluteSrcPath = status.getPath();
            // trim the ahead one '_' character
            fileName = fileName.substring(1);
            Path absoluteDstPath = new Path(parentPathName + "/"
                + fileName);
            if (fs.rename(absoluteSrcPath, absoluteDstPath)) {
              logger.info("successfully rename file from "
                  + absoluteSrcPath.toString() + " to "
                  + absoluteDstPath.toString());
            } else {
              logger.error("failed to rename file from "
                  + absoluteSrcPath.toString() + " to "
                  + absoluteDstPath.toString());
            }
          }
        }
      }
    } catch (Exception e) {
      logger.error(String.format(
          "HdfsWriter rename temp files failed :%s,%s",
          e.getMessage(), e.getCause()));
      throw new WormholeException(
          "HdfsWriter rename temp files failed in do-post stage",
          JobStatus.POST_WRITE_FAILED.getStatus());
    } finally {
      closeAll();
    }
  }

  private void addHiveTablePartition(String dbName, String tableName,
      String partitionCondition, String location) throws IOException {
    StringBuffer addParitionCommand = new StringBuffer();

    addParitionCommand.append("hive -e \"");
    addParitionCommand.append("use " + dbName + ";");
    addParitionCommand.append(MessageFormat.format(ADD_PARTITION_SQL,
        tableName, partitionCondition, location));
    addParitionCommand.append("\"");

    logger.info(addParitionCommand.toString());

    String[] shellCmd = { "bash", "-c", addParitionCommand.toString() };
    ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd);
    shexec.execute();
    int exitCode = shexec.getExitCode();
    if (exitCode != 0) {
      throw new IOException("process exited with exit code " + exitCode);
    } else {
      logger.info("hive table add partion hql executed correctly:"
          + addParitionCommand.toString());
    }
  }

  // private void createHiveTable(IParam param){
  // String createTableOrNot = param.getValue(
  // ParamKey.hiveTableSwitch, "false");
  //
  // if (!createTableOrNot.equalsIgnoreCase("true"))
  // return;
  //
  // String tableFieldType = param.getValue(ParamKey.tableFieldType,
  // "").trim().toUpperCase();
  // if (StringUtils.isEmpty(tableFieldType))
  // return;
  //
  // String[] fieldTypes = tableFieldType.split(",");
  // for (int i = 0; i < fieldTypes.length; i++) {
  // String fieldType = fieldTypes[i];
  // if (fieldType.equalsIgnoreCase("bigint")){
  //
  // }else if(fieldType.equalsIgnoreCase("string")){
  //
  // }else{
  // logger.error(String.format("unnormal fieldType:%s in the param %s",
  // tableFieldType, ParamKey.tableFieldType));
  // return;
  // }
  // }
  // }

  private void createLzoIndex(String directory) {
    try {
      Configuration cfg = DFSUtils.getConf(directory, null);
      lzoIndexer = new LzoIndexer(cfg);

      int times = 1;
      boolean idxCreated = false;
      do {
        logger.info("start to create lzo index file on " + directory
            + " times: " + times);
        try {
          lzoIndexer.index(new Path(directory));
          idxCreated = true;
        } catch (Throwable t) {
          logger.error(String
              .format("HdfsWriter doPost stage create index %s failed, start to sleep %d millis sec, %s,%s",
                  directory,
                  LZO_CREATION_TRY_INTERVAL_IN_MILLIS,
                  t.getMessage(), t.getCause()));
          try {
            Thread.sleep(LZO_CREATION_TRY_INTERVAL_IN_MILLIS);
          } catch (InterruptedException ite) {
            ite.printStackTrace(System.err);
          }
        }
      } while (!idxCreated && times++ <= MAX_LZO_CREATION_TRY_TIMES);

      if (!idxCreated) {
        throw new WormholeException(
            "lzo index creation failed after try "
                + MAX_LZO_CREATION_TRY_TIMES + " times",
            JobStatus.POST_WRITE_FAILED.getStatus());
      }

      if (existIncompleteFile(dir)) {
        logger.error(String.format(
            "HdfsWriter doPost stage create index %s failed:",
            directory));
        throw new WormholeException(
            "dfsWriter doPost stage create index failed",
            JobStatus.POST_WRITE_FAILED.getStatus());
      }
    } catch (IOException e) {
      logger.error(String.format(
          "HdfsWriter doPost stage get configuration failed:%s,%s",
          e.getMessage(), e.getCause()));
      throw new WormholeException(e,
          JobStatus.POST_WRITE_FAILED.getStatus());
    }
  }

  @Override
  public void prepare(IParam param, ISourceCounter counter) {
    dir = param.getValue(ParamKey.dir);
    prefixname = param.getValue(ParamKey.prefixname, "prefix");
    concurrency = param.getIntValue(ParamKey.concurrency, 1);

    if (dir.endsWith("*")) {
      dir = dir.substring(0, dir.lastIndexOf("*"));
    }
    if (dir.endsWith("/")) {
      dir = dir.substring(0, dir.lastIndexOf("/"));
    }

    fileType = param.getValue(ParamKey.fileType, this.fileType);
    codecClass = param.getValue(ParamKey.codecClass, "");
    if (fileType.equalsIgnoreCase("TXT_COMP")) {
      suffix = DFSUtils.getCompressionSuffixMap().get(codecClass);
      if (StringUtils.isEmpty(suffix)) {
        suffix = "lzo";
      }
    }

    if (suffix.equalsIgnoreCase("lzo")) {
      lzoCompressed = true;
    }
    deleteFilesOnHdfs(this.dir, HIDDEN_FILE_PREFIX + this.prefixname,
        suffix);
  }

  private void closeAll() {
    try {
      IOUtils.closeStream(fs);
    } catch (Exception e) {
      logger.error(String.format(
          "HdfsWriter closing filesystem failed: %s,%s",
          e.getMessage(), e.getCause()));
    }
  }

  private boolean existIncompleteFile(String dir) {
    try {
      fs = DFSUtils.createFileSystem(new URI(dir),
          DFSUtils.getConf(dir, null));
      for (FileStatus status : fs.listStatus(new Path(dir))) {
        if (status.getPath().getName().endsWith(".tmp")) {
          return true;
        }
      }
    } catch (Exception e) {
      logger.error(String.format(
          "HdfsWriter Init file system failed:%s,%s", e.getMessage(),
          e.getCause()));
    } finally {
      closeAll();
    }
    return false;
  }

  private void deleteFilesOnHdfs(String dir, String prefix, String suffix) {
    try {
      fs = DFSUtils.createFileSystem(new URI(dir),
          DFSUtils.getConf(dir, null));

      if (concurrency == 1) {
        String file = dir + "/" + prefix;

        if (!StringUtils.isEmpty(suffix))
          file = file + "." + suffix;

        DFSUtils.deleteFile(fs, new Path(file), true);

        if (lzoCompressed) {
          file = file + ".index";
          if (fs.exists(new Path(file))) {
            DFSUtils.deleteFile(fs, new Path(file), true);
          }
        }
      } else {
        DFSUtils.deleteFiles(fs, new Path(dir + "/" + prefix + "-*"),
            true, true);
      }
    } catch (Exception e) {
      logger.error(String.format(
          "HdfsWriter Init file system failed:%s,%s", e.getMessage(),
          e.getCause()));
    } finally {
      closeAll();
    }
  }
}
TOP

Related Classes of com.dp.nebula.wormhole.plugins.writer.hdfswriter.HdfsWriterPeriphery

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.