Package org.lilyproject.repository.bulk.serial

Source Code of org.lilyproject.repository.bulk.serial.BulkImportTool

/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.repository.bulk.serial;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;

import com.google.common.base.Charsets;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.lilyproject.repository.bulk.AbstractBulkImportCliTool;
import org.lilyproject.repository.bulk.BulkIngester;
import org.lilyproject.repository.bulk.LineMapper;
import org.lilyproject.repository.bulk.LineMappingContext;
import org.lilyproject.repository.bulk.RecordWriter;
import org.lilyproject.repository.bulk.jython.JythonLineMapper;
import org.python.core.PyException;
import org.python.google.common.io.Files;

/**
* A bulk import tool similar to {@link org.lilyproject.repository.bulk.mapreduce.BulkImportTool} that works without
* MapReduce.
*/
public class BulkImportTool extends AbstractBulkImportCliTool {

    private final Log log = LogFactory.getLog(BulkImportTool.class);

    private Option dryRunOption;

    private boolean dryRun;

    @SuppressWarnings("static-access")
    @Override
    public List<Option> getOptions() {
        dryRunOption =
                OptionBuilder.withDescription("Only print out the created records without writing them to Lily").withLongOpt(
                        "dryrun").create('d');

        List<Option> options = super.getOptions();
        options.add(dryRunOption);
        return options;
    }

    @Override
    protected int processOptions(CommandLine cmd) throws Exception {
        int status = super.processOptions(cmd);
        if (status != 0) {
            return status;
        }

        dryRun = cmd.hasOption(dryRunOption.getOpt());
        return 0;
    }

    @Override
    protected String getCmdName() {
        return "lily-bulk-import";
    }

    @Override
    public int run(CommandLine cmd) throws Exception {
        BulkIngester bulkIngester =
                BulkIngester.newBulkIngester(zkConnectionString, 30000, outputRepository, outputTable, bulkMode);

        BufferedReader bufferedReader = new BufferedReader(new FileReader(inputPath));
        RecordWriter recordWriter;
        if (dryRun) {
            recordWriter = new DebugRecordWriter(System.out);
        } else {
            recordWriter = new ThreadedRecordWriter(zkConnectionString, 10, outputRepository, outputTable, bulkMode);
        }
        long start = System.currentTimeMillis();
        int numLines = 0;

        try {
            LineMapper lineMapper = new JythonLineMapper(Files.toString(new File(pythonMapperPath), Charsets.UTF_8),
                    pythonSymbol);
            LineMappingContext mappingContext = new LineMappingContext(bulkIngester, recordWriter);
            String line;
            while ((line = bufferedReader.readLine()) != null) {
                lineMapper.mapLine(line, mappingContext);
                numLines++;
            }
        } catch (PyException pe) {
            pe.printStackTrace();    // Print the Jython-native stack trace
            log.error("Exception encountered in Python code", pe);
            return -1;
        } finally {
            bufferedReader.close();
            recordWriter.close();
        }
        float duration = (System.currentTimeMillis() - start) / 1000f;
        if (!dryRun) {
            System.out.printf("Imported %d lines as %d records in %.2f seconds\n", numLines, recordWriter.getNumRecords(),
                    duration);
        }

        return 0;
    }

    public static void main(String[] args) throws IOException {
        new BulkImportTool().start(args);
    }

}
TOP

Related Classes of org.lilyproject.repository.bulk.serial.BulkImportTool

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.