Package com.asakusafw.bulkloader.importer

Source Code of com.asakusafw.bulkloader.importer.ImportFileSend

/**
* Copyright 2011-2014 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.bulkloader.importer;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.asakusafw.bulkloader.bean.ImportBean;
import com.asakusafw.bulkloader.bean.ImportTargetTableBean;
import com.asakusafw.bulkloader.common.ConfigurationLoader;
import com.asakusafw.bulkloader.common.Constants;
import com.asakusafw.bulkloader.common.FileCompType;
import com.asakusafw.bulkloader.exception.BulkLoaderSystemException;
import com.asakusafw.bulkloader.log.Log;
import com.asakusafw.bulkloader.transfer.FileList;
import com.asakusafw.bulkloader.transfer.FileListProvider;
import com.asakusafw.bulkloader.transfer.FileProtocol;
import com.asakusafw.bulkloader.transfer.OpenSshFileListProvider;
import com.asakusafw.runtime.core.context.RuntimeContext;

/**
* Import対象ファイルをDBサーバからHDFSのNameノードへ送信するクラス。
* @author yuta.shirai
*/
public class ImportFileSend {

    static final Log LOG = new Log(ImportFileSend.class);

    /**
     * Import対象ファイルをHDFSのNameノードへ送信する。
     * TSV形式のImport対象ファイルを読み込み、
     * ZIP形式に圧縮してNameノードの標準入力へsshで送信する。
     * @param bean パラメータを保持するBean
     * @return Import対象ファイル送信結果(true:成功、false:失敗)
     */
    public boolean sendImportFile(ImportBean bean) {
        // ZIP圧縮に関する情報を取得
        String strCompType = ConfigurationLoader.getProperty(Constants.PROP_KEY_IMP_FILE_COMP_TYPE);
        FileCompType compType = FileCompType.find(strCompType);

        FileListProvider provider = null;
        FileList.Writer writer = null;

        long totalStartTime = System.currentTimeMillis();
        try {
            provider = openFileList(
                    bean.getTargetName(),
                    bean.getBatchId(),
                    bean.getJobflowId(),
                    bean.getExecutionId());
            provider.discardReader();
            writer = provider.openWriter(compType == FileCompType.DEFLATED);

            // Import対象テーブル毎にファイルの読み込み・書き出しの処理を行う
            List<String> list = arrangeSendOrder(bean);
            for (String tableName : list) {
                long tableStartTime = System.currentTimeMillis();
                ImportTargetTableBean targetTable = bean.getTargetTable(tableName);
                LOG.info("TG-IMPORTER-04004",
                        tableName,
                        targetTable.getImportFile().getAbsolutePath(),
                        compType.getSymbol());
                long dumpFileSize = sendTableFile(writer, tableName, targetTable);
                LOG.info("TG-PROFILE-02003",
                        bean.getTargetName(),
                        bean.getBatchId(),
                        bean.getJobflowId(),
                        bean.getExecutionId(),
                        tableName,
                        dumpFileSize,
                        System.currentTimeMillis() - tableStartTime);
                LOG.info("TG-IMPORTER-04005",
                        tableName,
                        targetTable.getImportFile().getAbsolutePath(),
                        compType.getSymbol());
            }
            writer.close();
            provider.waitForComplete();
            LOG.info("TG-PROFILE-02001",
                    bean.getTargetName(),
                    bean.getBatchId(),
                    bean.getJobflowId(),
                    bean.getExecutionId(),
                    writer.getByteCount(),
                    System.currentTimeMillis() - totalStartTime);
        } catch (BulkLoaderSystemException e) {
            LOG.log(e);
            return false;
        } catch (Exception e) {
            LOG.error(e, "TG-IMPORTER-04002");
            return false;
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException ignored) {
                    ignored.printStackTrace();
                }
            }
            if (provider != null) {
                try {
                    provider.close();
                } catch (IOException ignored) {
                    ignored.printStackTrace();
                }
            }
        }
        return true;
    }

    private List<String> arrangeSendOrder(ImportBean bean) {
        assert bean != null;
        final Map<String, ImportTargetTableBean> tables = new HashMap<String, ImportTargetTableBean>();
        final Map<String, Long> sizes = new HashMap<String, Long>();
        List<String> tableNames = new ArrayList<String>(bean.getImportTargetTableList());
        for (String tableName : tableNames) {
            ImportTargetTableBean tableBean = bean.getTargetTable(tableName);
            tables.put(tableName, tableBean);
            sizes.put(tableName, tableBean.getImportFile().length());
        }
        Collections.sort(tableNames, new Comparator<String>() {
            @Override
            public int compare(String o1, String o2) {
                ImportTargetTableBean t1 = tables.get(o1);
                ImportTargetTableBean t2 = tables.get(o2);

                // put cached table on top
                if (t1.getCacheId() != null && t2.getCacheId() == null) {
                    return -1;
                } else if (t1.getCacheId() == null && t2.getCacheId() != null) {
                    return +1;
                }

                // put large file on top
                long s1 = sizes.get(o1);
                long s2 = sizes.get(o2);
                if (s1 > s2) {
                    return -1;
                } else if (s1 < s2) {
                    return +1;
                }

                // sort by its table name
                return o1.compareTo(o2);
            }
        });
        return tableNames;
    }

    private long sendTableFile(
            FileList.Writer writer,
            String tableName,
            ImportTargetTableBean targetTable) throws BulkLoaderSystemException {
        assert writer != null;
        assert tableName != null;
        assert targetTable != null;
        File localFile = targetTable.getImportFile();
        int buffSize = Integer.parseInt(ConfigurationLoader.getProperty(Constants.PROP_KEY_IMP_FILE_COMP_BUFSIZE));
        byte[] buf = new byte[buffSize];
        long dumpFileSize = 0;
        try {
            InputStream input = new FileInputStream(localFile);
            try {
                FileProtocol protocol = targetTable.getImportProtocol();
                assert protocol != null;
                OutputStream output = writer.openNext(protocol);
                try {
                    while (true) {
                        int read = input.read(buf);
                        if (read < 0) {
                            break;
                        }
                        dumpFileSize += read;
                        output.write(buf, 0, read);
                    }
                } finally {
                    output.close();
                }
            } finally {
                input.close();
            }
        } catch (IOException e) {
            throw new BulkLoaderSystemException(e, getClass(), "TG-IMPORTER-04001",
                    MessageFormat.format(
                            "Importファイルの転送に失敗。テーブル名:{0}, Importファイル名: {1}",
                            tableName,
                            localFile.getPath()));
        }
        return dumpFileSize;
    }

    /**
     * Opens a new {@link FileListProvider}.
     * @param targetName current target name
     * @param batchId current batch ID
     * @param jobflowId current jobflow ID
     * @param executionId current execution ID
     * @return the created provider
     * @throws IOException if failed to open the file list
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    protected FileListProvider openFileList(
            String targetName,
            String batchId,
            String jobflowId,
            String executionId) throws IOException {
        if (targetName == null) {
            throw new IllegalArgumentException("targetName must not be null"); //$NON-NLS-1$
        }
        if (batchId == null) {
            throw new IllegalArgumentException("batchId must not be null"); //$NON-NLS-1$
        }
        if (jobflowId == null) {
            throw new IllegalArgumentException("jobflowId must not be null"); //$NON-NLS-1$
        }
        if (executionId == null) {
            throw new IllegalArgumentException("executionId must not be null"); //$NON-NLS-1$
        }
        String sshPath = ConfigurationLoader.getProperty(Constants.PROP_KEY_SSH_PATH);
        String hostName = ConfigurationLoader.getProperty(Constants.PROP_KEY_NAMENODE_HOST);
        String userName = ConfigurationLoader.getProperty(Constants.PROP_KEY_NAMENODE_USER);
        String scriptPath = ConfigurationLoader.getRemoteScriptPath(Constants.PATH_REMOTE_EXTRACTOR);
        String variableTable = Constants.createVariableTable().toSerialString();
        List<String> command = new ArrayList<String>();
        command.add(scriptPath);
        command.add(targetName);
        command.add(batchId);
        command.add(jobflowId);
        command.add(executionId);
        command.add(variableTable);

        Map<String, String> env = new HashMap<String, String>();
        env.putAll(ConfigurationLoader.getPropSubMap(Constants.PROP_PREFIX_HC_ENV));
        env.putAll(RuntimeContext.get().unapply());

        LOG.info("TG-IMPORTER-04003",
                sshPath,
                hostName,
                userName,
                scriptPath,
                targetName,
                batchId,
                jobflowId,
                executionId);

        return new OpenSshFileListProvider(sshPath, userName, hostName, command, env);
    }
}
TOP

Related Classes of com.asakusafw.bulkloader.importer.ImportFileSend

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.