/*
* Copyright 2012 NGDATA nv
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.repository.bulk.serial;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import com.google.common.base.Charsets;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.lilyproject.repository.bulk.AbstractBulkImportCliTool;
import org.lilyproject.repository.bulk.BulkIngester;
import org.lilyproject.repository.bulk.LineMapper;
import org.lilyproject.repository.bulk.LineMappingContext;
import org.lilyproject.repository.bulk.RecordWriter;
import org.lilyproject.repository.bulk.jython.JythonLineMapper;
import org.python.core.PyException;
import org.python.google.common.io.Files;
/**
* A bulk import tool similar to {@link org.lilyproject.repository.bulk.mapreduce.BulkImportTool} that works without
* MapReduce.
*/
public class BulkImportTool extends AbstractBulkImportCliTool {
private final Log log = LogFactory.getLog(BulkImportTool.class);
private Option dryRunOption;
private boolean dryRun;
@SuppressWarnings("static-access")
@Override
public List<Option> getOptions() {
dryRunOption =
OptionBuilder.withDescription("Only print out the created records without writing them to Lily").withLongOpt(
"dryrun").create('d');
List<Option> options = super.getOptions();
options.add(dryRunOption);
return options;
}
@Override
protected int processOptions(CommandLine cmd) throws Exception {
int status = super.processOptions(cmd);
if (status != 0) {
return status;
}
dryRun = cmd.hasOption(dryRunOption.getOpt());
return 0;
}
@Override
protected String getCmdName() {
return "lily-bulk-import";
}
@Override
public int run(CommandLine cmd) throws Exception {
BulkIngester bulkIngester =
BulkIngester.newBulkIngester(zkConnectionString, 30000, outputRepository, outputTable, bulkMode);
BufferedReader bufferedReader = new BufferedReader(new FileReader(inputPath));
RecordWriter recordWriter;
if (dryRun) {
recordWriter = new DebugRecordWriter(System.out);
} else {
recordWriter = new ThreadedRecordWriter(zkConnectionString, 10, outputRepository, outputTable, bulkMode);
}
long start = System.currentTimeMillis();
int numLines = 0;
try {
LineMapper lineMapper = new JythonLineMapper(Files.toString(new File(pythonMapperPath), Charsets.UTF_8),
pythonSymbol);
LineMappingContext mappingContext = new LineMappingContext(bulkIngester, recordWriter);
String line;
while ((line = bufferedReader.readLine()) != null) {
lineMapper.mapLine(line, mappingContext);
numLines++;
}
} catch (PyException pe) {
pe.printStackTrace(); // Print the Jython-native stack trace
log.error("Exception encountered in Python code", pe);
return -1;
} finally {
bufferedReader.close();
recordWriter.close();
}
float duration = (System.currentTimeMillis() - start) / 1000f;
if (!dryRun) {
System.out.printf("Imported %d lines as %d records in %.2f seconds\n", numLines, recordWriter.getNumRecords(),
duration);
}
return 0;
}
public static void main(String[] args) throws IOException {
new BulkImportTool().start(args);
}
}