/*
* Copyright 2010 Outerthought bvba
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lilyproject.tools.import_.cli;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import com.ngdata.lily.security.hbase.client.AuthorizationContext;
import org.apache.commons.lang.StringUtils;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.ObjectNode;
import org.lilyproject.repository.api.FieldType;
import org.lilyproject.repository.api.LRepository;
import org.lilyproject.repository.api.LTable;
import org.lilyproject.repository.api.QName;
import org.lilyproject.repository.api.Record;
import org.lilyproject.repository.api.RecordId;
import org.lilyproject.repository.api.RecordType;
import org.lilyproject.repository.api.Repository;
import org.lilyproject.repository.api.RepositoryException;
import org.lilyproject.repository.spi.AuthorizationContextHolder;
import org.lilyproject.tools.import_.core.FieldTypeImport;
import org.lilyproject.tools.import_.core.IdentificationMode;
import org.lilyproject.tools.import_.core.ImportMode;
import org.lilyproject.tools.import_.core.ImportResult;
import org.lilyproject.tools.import_.core.RecordImport;
import org.lilyproject.tools.import_.core.RecordTypeImport;
import org.lilyproject.tools.import_.json.FieldTypeReader;
import org.lilyproject.tools.import_.json.JsonFormatException;
import org.lilyproject.tools.import_.json.Namespaces;
import org.lilyproject.tools.import_.json.NamespacesConverter;
import org.lilyproject.tools.import_.json.NamespacesImpl;
import org.lilyproject.tools.import_.json.RecordReader;
import org.lilyproject.tools.import_.json.RecordTypeReader;
import org.lilyproject.tools.import_.json.UnmodifiableNamespaces;
import org.lilyproject.util.concurrent.WaitPolicy;
import org.lilyproject.util.io.Closer;
import org.lilyproject.util.json.JsonFormat;
import static org.lilyproject.util.json.JsonUtil.getArray;
import static org.lilyproject.util.json.JsonUtil.getBoolean;
import static org.lilyproject.util.json.JsonUtil.getString;
public class JsonImport {
private Namespaces namespaces = new NamespacesImpl();
private LRepository repository;
private LTable table;
private ImportListener importListener;
private int threadCount;
private RecordReader recordReader;
private ThreadPoolExecutor executor;
private volatile boolean abortImport = false;
private boolean errorHappened = false;
private long maximumRecordErrors = 1;
private AtomicLong recordImportErrorCnt = new AtomicLong();
private static final int DEFAULT_THREAD_COUNT = 1;
private static final String FAIL_IF_EXISTS = "failIfExists";
private static final String FAIL_IF_NOT_EXISTS = "failIfNotExists";
public static class ImportSettings {
public int threadCount = DEFAULT_THREAD_COUNT;
public RecordReader recordReader = RecordReader.INSTANCE;
public ImportListener importListener = new DefaultImportListener();
/** After how many failures to import records do we give up? */
public long maximumRecordErrors = 1;
public ImportSettings() {
}
public ImportSettings(int threadCount, ImportListener importListener, RecordReader recordReader) {
this.threadCount = threadCount;
this.importListener = importListener;
this.recordReader = recordReader;
}
public ImportSettings(int threadCount, ImportListener importListener, RecordReader recordReader,
long maximumRecordErrors) {
this.threadCount = threadCount;
this.importListener = importListener;
this.recordReader = recordReader;
this.maximumRecordErrors = maximumRecordErrors;
}
}
/**
* The standard loading method: loads both schema and records from the default JSON format.
*/
public static void load(LTable table, LRepository repository, InputStream is, ImportSettings settings)
throws Exception {
new JsonImport(table, repository, settings).load(is, false);
}
/**
* Same as {@link #load(LTable, LRepository, InputStream, ImportSettings)}
* but using default settings.
*/
public static void load(LTable table, LRepository repository, InputStream is)
throws Exception {
new JsonImport(table, repository, new ImportSettings()).load(is, false);
}
/**
* Loads only the schema, ignores any records in the input.
*/
public static void loadSchema(LRepository repository, InputStream is, ImportSettings settings)
throws Exception {
new JsonImport(null, repository, settings).load(is, true);
}
/**
* Same as {@link #loadSchema(LRepository, InputStream, ImportSettings)}
* but using default settings.
*/
public static void loadSchema(LRepository repository, InputStream is)
throws Exception {
new JsonImport(null, repository, new ImportSettings()).load(is, true);
}
/**
* Imports an alternative input format where each line in the input contains a full json
* object describing a Lily record. This format does not support schemas.
*/
public static void loadJsonLines(LTable table, LRepository repository, InputStream is, ImportSettings settings)
throws Exception {
new JsonImport(table, repository, settings).loadJsonLines(is);
}
/**
* @deprecated use one of the variants taking LRepository and/or LTable as argument
*/
public static void load(Repository repository, InputStream is, boolean schemaOnly, int threadCount) throws Exception {
ImportSettings settings = new ImportSettings();
settings.threadCount = threadCount;
new JsonImport(repository, repository, settings).load(is, schemaOnly);
}
/**
* @deprecated use one of the variants taking LRepository and/or LTable as argument
*/
public static void load(Repository repository, InputStream is, boolean schemaOnly) throws Exception {
new JsonImport(repository, repository, new ImportSettings()).load(is, schemaOnly);
}
/**
* @deprecated use one of the variants taking LRepository and/or LTable as argument
*/
public static void load(Repository repository, ImportListener importListener, InputStream is, boolean schemaOnly)
throws Exception {
ImportSettings settings = new ImportSettings();
settings.importListener = importListener;
new JsonImport(repository, repository, settings).load(is, schemaOnly);
}
/**
* @deprecated use one of the variants taking LRepository and/or LTable as argument
*/
public static void load(Repository repository, ImportListener importListener, InputStream is, boolean schemaOnly,
int threadCount) throws Exception {
ImportSettings settings = new ImportSettings();
settings.importListener = importListener;
settings.threadCount = threadCount;
new JsonImport(repository, repository, settings).load(is, schemaOnly);
}
public JsonImport(LTable table, LRepository repository, ImportListener importListener) {
this(table, repository, new ImportSettings(1, importListener, RecordReader.INSTANCE));
}
public JsonImport(LTable table, LRepository repository, ImportSettings settings) {
this.importListener = new SynchronizedImportListener(settings.importListener);
this.table = table;
this.repository = repository;
this.threadCount = settings.threadCount;
this.recordReader = settings.recordReader;
this.maximumRecordErrors = settings.maximumRecordErrors;
}
public void load(InputStream is, boolean schemaOnly) throws Exception {
// A combination of the Jackson streaming and tree APIs is used: we move streaming through the
// whole of the file, but use the tree API to load individual items (field types, records, ...).
// This way things should still work fast and within little memory if anyone would use this to
// load large amounts of records.
try {
namespaces = new NamespacesImpl();
JsonParser jp = JsonFormat.JSON_FACTORY_NON_STD.createJsonParser(is);
JsonToken current;
current = jp.nextToken();
if (current != JsonToken.START_OBJECT) {
System.out.println("Error: expected object node as root of the input. Giving up.");
return;
}
while (jp.nextToken() != JsonToken.END_OBJECT && !abortImport) {
String fieldName = jp.getCurrentName();
current = jp.nextToken(); // move from field name to field value
if (fieldName.equals("namespaces")) {
if (current == JsonToken.START_OBJECT) {
readNamespaces((ObjectNode)jp.readValueAsTree());
} else {
System.out.println("Error: namespaces property should be an object. Skipping.");
jp.skipChildren();
}
} else if (fieldName.equals("fieldTypes")) {
if (current == JsonToken.START_ARRAY) {
startExecutor();
while (jp.nextToken() != JsonToken.END_ARRAY && !abortImport) {
pushTask(new FieldTypeImportTask(parseFieldType(jp.readValueAsTree())));
}
waitTasksFinished();
} else {
System.out.println("Error: fieldTypes property should be an array. Skipping.");
jp.skipChildren();
}
} else if (fieldName.equals("recordTypes")) {
if (current == JsonToken.START_ARRAY) {
Map<QName, FieldType> inlineDeclaredFieldTypes = new HashMap<QName, FieldType>();
List<RecordTypeImportTask> rtImportTasks = new ArrayList<RecordTypeImportTask>();
while (jp.nextToken() != JsonToken.END_ARRAY && !abortImport) {
JsonNode rtJson = jp.readValueAsTree();
extractFieldTypesFromRecordType(rtJson, inlineDeclaredFieldTypes);
rtImportTasks.add(new RecordTypeImportTask(rtJson));
}
if (inlineDeclaredFieldTypes.size() > 0) {
startExecutor();
for (FieldType fieldType : inlineDeclaredFieldTypes.values()) {
if (abortImport)
break;
pushTask(new FieldTypeImportTask(fieldType));
}
waitTasksFinished();
}
if (rtImportTasks.size() > 0) {
startExecutor();
pushTasks(rtImportTasks);
waitTasksFinished();
}
} else {
System.out.println("Error: recordTypes property should be an array. Skipping.");
jp.skipChildren();
}
} else if (fieldName.equals("records")) {
if (!schemaOnly) {
if (current == JsonToken.START_ARRAY) {
startExecutor();
while (jp.nextToken() != JsonToken.END_ARRAY && !abortImport) {
int lineNr = jp.getCurrentLocation().getLineNr();
pushTask(new RecordImportTask(jp.readValueAsTree(), lineNr));
}
waitTasksFinished();
} else {
System.out.println("Error: records property should be an array. Skipping.");
jp.skipChildren();
}
} else {
jp.skipChildren();
}
} else {
System.out.println("Encountered unexpected field: " + fieldName);
System.out.println("Maybe you want to use '--format json_lines'?");
jp.skipChildren();
}
}
} finally {
waitTasksFinished();
}
if (errorHappened) {
if (recordImportErrorCnt.get() > 0) {
throw new ImportException("Errors happened during import (record error count: "
+ recordImportErrorCnt.get() + ")");
} else {
throw new ImportException("Errors happened during import.");
}
}
}
private void loadJsonLines(InputStream is) throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
try {
startExecutor();
int lineNumber = 0;
String line;
while ((line = reader.readLine()) != null && !abortImport) {
lineNumber++;
// skip comment lines and whitespace lines
if (line.startsWith("#") || StringUtils.isBlank(line)) {
continue;
}
JsonNode node;
try {
node = JsonFormat.deserializeNonStd(line);
} catch (Exception e) {
handleRecordImportError(e, line, lineNumber);
continue;
}
pushTask(new RecordImportTask(node, lineNumber));
}
} finally {
waitTasksFinished();
Closer.close(reader);
}
if (errorHappened) {
throw new ImportException(recordImportErrorCnt.get() + " errors happened during import.");
}
}
public void readNamespaces(ObjectNode node) throws JsonFormatException {
// We don't expect the namespaces to be modified since we're reading rather than writing, still wrap it
// to make sure they are really not modified.
this.namespaces = new UnmodifiableNamespaces(NamespacesConverter.fromJson(node));
}
public Namespaces getNamespaces() {
return namespaces;
}
public FieldType parseFieldType(JsonNode node) throws RepositoryException, ImportException, JsonFormatException,
InterruptedException {
if (!node.isObject()) {
throw new ImportException("Field type should be specified as object node.");
}
FieldType fieldType = FieldTypeReader.INSTANCE.fromJson(node, namespaces, repository);
if (fieldType.getName() == null) {
throw new ImportException("Missing name property on field type.");
}
return fieldType;
}
public FieldType importFieldType(JsonNode node) throws RepositoryException, ImportConflictException,
ImportException, JsonFormatException, InterruptedException {
return importFieldType(parseFieldType(node));
}
public FieldType importFieldType(FieldType fieldType) throws RepositoryException, ImportConflictException,
ImportException, JsonFormatException, InterruptedException {
ImportResult<FieldType> result = FieldTypeImport.importFieldType(fieldType, ImportMode.CREATE_OR_UPDATE,
IdentificationMode.NAME, fieldType.getName(), repository.getTypeManager());
FieldType newFieldType = result.getEntity();
switch (result.getResultType()) {
case CREATED:
importListener.created(EntityType.FIELD_TYPE, newFieldType.getName().toString(), newFieldType.getId().toString());
break;
case UP_TO_DATE:
importListener.existsAndEqual(EntityType.FIELD_TYPE, newFieldType.getName().toString(), null);
break;
case CONFLICT:
importListener.conflict(EntityType.FIELD_TYPE, fieldType.getName().toString(),
result.getConflictingProperty(), result.getConflictingOldValue(),
result.getConflictingNewValue());
break;
default:
throw new ImportException("Unexpected import result type for field type: " + result.getResultType());
}
return newFieldType;
}
public List<FieldType> importFieldTypes(JsonNode node, int times) throws RepositoryException,
ImportConflictException, ImportException, JsonFormatException, InterruptedException {
List<FieldType> newFieldTypes = new ArrayList<FieldType>(times);
if (!node.isObject()) {
throw new ImportException("Field type should be specified as object node.");
}
FieldType fieldType = FieldTypeReader.INSTANCE.fromJson(node, namespaces, repository);
if (fieldType.getName() == null) {
throw new ImportException("Missing name property on field type.");
}
for (int i = 0; i < times; i++) {
FieldType ftToCreate = fieldType.clone();
ftToCreate.setName(new QName(fieldType.getName().getNamespace(), fieldType.getName().getName() + i));
ImportResult<FieldType> result = FieldTypeImport.importFieldType(ftToCreate, ImportMode.CREATE_OR_UPDATE,
IdentificationMode.NAME, ftToCreate.getName(), repository.getTypeManager());
FieldType newFieldType = result.getEntity();
switch (result.getResultType()) {
case CREATED:
importListener.created(EntityType.FIELD_TYPE, newFieldType.getName().toString(), newFieldType.getId()
.toString());
break;
case UP_TO_DATE:
importListener.existsAndEqual(EntityType.FIELD_TYPE, newFieldType.getName().toString(), null);
break;
case CONFLICT:
importListener.conflict(EntityType.FIELD_TYPE, ftToCreate.getName().toString(),
result.getConflictingProperty(), result.getConflictingOldValue(),
result.getConflictingNewValue());
break;
default:
throw new ImportException("Unexpected import result type for field type: " + result.getResultType());
}
newFieldTypes.add(newFieldType);
}
return newFieldTypes;
}
public RecordType importRecordType(JsonNode node) throws RepositoryException, ImportException, JsonFormatException,
InterruptedException {
if (!node.isObject()) {
throw new ImportException("Record type should be specified as object node.");
}
RecordType recordType = RecordTypeReader.INSTANCE.fromJson(node, namespaces, repository);
if (recordType.getName() == null) {
throw new ImportException("Missing name property on record type.");
}
ImportMode mode = getImportMode(node, ImportMode.CREATE_OR_UPDATE);
ImportResult<RecordType> result = RecordTypeImport.importRecordType(recordType, mode,
IdentificationMode.NAME, recordType.getName(), true, repository.getTypeManager());
RecordType newRecordType = result.getEntity();
switch (result.getResultType()) {
case CREATED:
importListener.created(EntityType.RECORD_TYPE, newRecordType.getName().toString(), newRecordType.getId().toString());
break;
case UPDATED:
importListener.updated(EntityType.RECORD_TYPE, newRecordType.getName().toString(),
newRecordType.getId().toString(), newRecordType.getVersion());
break;
case UP_TO_DATE:
importListener.existsAndEqual(EntityType.RECORD_TYPE, recordType.getName().toString(), null);
break;
case CANNOT_CREATE_EXISTS:
boolean failIfExists = getBoolean(node, FAIL_IF_EXISTS, true);
if (!failIfExists) {
importListener.allowedFailure(EntityType.RECORD_TYPE, recordType.getName().toString(), null,
"cannot create, record type exists");
break;
} else {
throw new ImportException("Cannot create record type, it already exists: " + recordType.getName());
}
case CANNOT_UPDATE_DOES_NOT_EXIST:
boolean failIfNotExists = getBoolean(node, FAIL_IF_NOT_EXISTS, true);
if (!failIfNotExists) {
importListener.allowedFailure(EntityType.RECORD_TYPE, recordType.getName().toString(), null,
"cannot update, record type does not exist");
break;
} else {
throw new ImportException("Cannot update record type, it does not exist: " + recordType.getName());
}
default:
throw new ImportException("Unexpected import result type for record type: " + result.getResultType());
}
return newRecordType;
}
/**
* Extracts field types declared inline in a record type. An inline definition is recognized by the
* presence of a valueType attribute on the field. Found field types are added to the passed map after
* checking for conflicting definitions.
*/
private void extractFieldTypesFromRecordType(JsonNode node, Map<QName, FieldType> fieldTypes)
throws RepositoryException, InterruptedException, JsonFormatException, ImportException {
if (node.has("fields")) {
ArrayNode fields = getArray(node, "fields");
for (int i = 0; i < fields.size(); i++) {
JsonNode field = fields.get(i);
if (field.has("valueType")) {
FieldType fieldType = parseFieldType(field);
if (fieldTypes.containsKey(fieldType.getName())) {
FieldType prevFieldType = fieldTypes.get(fieldType.getName());
if (!fieldType.equals(prevFieldType)) {
throw new ImportException("Found conflicting definitions of a field type in two record"
+ " types, field types: " + fieldType + " and " + prevFieldType);
}
} else {
fieldTypes.put(fieldType.getName(), fieldType);
}
}
}
}
}
private ImportMode getImportMode(JsonNode node, ImportMode defaultMode) throws ImportException {
String modeName = getString(node, "mode", null);
if (modeName != null) {
try {
return ImportMode.valueOf(modeName.toUpperCase());
} catch (IllegalArgumentException e) {
throw new ImportException(String.format("Illegal value for import mode: %s", modeName));
}
} else {
return defaultMode;
}
}
private Record importRecord(JsonNode node) throws RepositoryException, ImportException, JsonFormatException,
InterruptedException {
if (!node.isObject()) {
throw new ImportException("Record should be specified as object node.");
}
Record record = recordReader.fromJson(node, namespaces, repository);
ImportMode mode = getImportMode(node, ImportMode.CREATE_OR_UPDATE);
if (mode == ImportMode.UPDATE && record.getId() == null) {
throw new ImportException(String.format("Import mode %s is specified but the record has no id.",
ImportMode.UPDATE));
}
if (mode == ImportMode.CREATE_OR_UPDATE && record.getId() == null) {
// Create-or-update requires client to specify the ID
record.setId(repository.getIdGenerator().newRecordId());
}
RecordId inputRecordId = record.getId();
ImportResult<Record> result = RecordImport.importRecord(record, mode, table);
record = result.getEntity();
switch (result.getResultType()) {
case CREATED:
importListener.created(EntityType.RECORD, null, record.getId().toString());
break;
case UP_TO_DATE:
importListener.existsAndEqual(EntityType.RECORD, null, record.getId().toString());
break;
case UPDATED:
importListener.updated(EntityType.RECORD, null, record.getId().toString(), record.getVersion());
break;
case CANNOT_CREATE_EXISTS:
boolean failIfExists = getBoolean(node, FAIL_IF_EXISTS, true);
if (!failIfExists) {
importListener.allowedFailure(EntityType.RECORD, null, String.valueOf(inputRecordId),
"cannot create, record exists");
break;
} else {
throw new ImportException("Cannot create record, it already exists: " + inputRecordId);
}
case CANNOT_UPDATE_DOES_NOT_EXIST:
boolean failIfNotExists = getBoolean(node, FAIL_IF_NOT_EXISTS, true);
if (!failIfNotExists) {
importListener.allowedFailure(EntityType.RECORD, null, String.valueOf(inputRecordId),
"cannot update, record does not exist");
break;
} else {
throw new ImportException("Cannot update record, it does not exist: " + inputRecordId);
}
default:
throw new ImportException("Unexpected import result type for record: " + result.getResultType());
}
return record;
}
private synchronized void handleSchemaImportError(Throwable throwable) {
// In case of an error, we want to stop the import asap. Since it's multi-threaded, it can
// be that there are still a few operations done before it's done.
// We don't do an immediate shutdown of the ExecutorService since we don't want to interrupt running threads,
// they are allowed to finish what they are doing.
abortImport = true;
errorHappened = true;
executor.getQueue().clear();
importListener.exception(throwable);
}
private synchronized void handleRecordImportError(Throwable throwable, String json, int lineNumber) {
long currentErrors = recordImportErrorCnt.incrementAndGet();
importListener.recordImportException(throwable, json, lineNumber);
errorHappened = true;
if (!abortImport && currentErrors >= maximumRecordErrors) {
abortImport = true;
executor.getQueue().clear();
importListener.tooManyRecordImportErrors(currentErrors);
}
}
private void startExecutor() {
executor = new ThreadPoolExecutor(threadCount, threadCount, 10, TimeUnit.SECONDS,
new ArrayBlockingQueue<Runnable>(250));
executor.setRejectedExecutionHandler(new WaitPolicy());
}
private void waitTasksFinished() throws InterruptedException, ExecutionException {
if (executor == null) {
return;
}
executor.shutdown();
boolean successfulFinish = executor.awaitTermination(10, TimeUnit.MINUTES);
if (!successfulFinish) {
throw new RuntimeException("JSON import executor did not end successfully.");
}
executor = null;
}
private void pushTask(Runnable runnable) {
executor.submit(runnable);
}
private void pushTasks(List<? extends Runnable> runnables) {
for (Runnable runnable : runnables) {
if (abortImport) {
break;
}
executor.submit(runnable);
}
}
private abstract class AuthzEnabledTask implements Runnable {
private AuthorizationContext authzContext;
public AuthzEnabledTask() {
// Remember the authorization context of the current thread
this.authzContext = AuthorizationContextHolder.getCurrentContext();
}
@Override
public final void run() {
try {
AuthorizationContextHolder.setCurrentContext(authzContext);
runInt();
} finally {
AuthorizationContextHolder.clearContext();
}
}
protected abstract void runInt();
}
private class FieldTypeImportTask extends AuthzEnabledTask {
private FieldType fieldType;
public FieldTypeImportTask(FieldType fieldType) {
this.fieldType = fieldType;
}
@Override
protected void runInt() {
try {
importFieldType(fieldType);
} catch (Throwable t) {
handleSchemaImportError(t);
}
}
}
private class RecordTypeImportTask extends AuthzEnabledTask {
private JsonNode json;
RecordTypeImportTask(JsonNode json) {
this.json = json;
}
@Override
protected void runInt() {
try {
importRecordType(json);
} catch (Throwable t) {
handleSchemaImportError(t);
}
}
}
private class RecordImportTask extends AuthzEnabledTask {
private JsonNode json;
/** Line in the source file where the recod was read from. */
private int sourceLine;
RecordImportTask(JsonNode json, int sourceLine) {
this.json = json;
this.sourceLine = sourceLine;
}
@Override
protected void runInt() {
try {
importRecord(json);
} catch (Throwable t) {
String jsonAsString;
try {
jsonAsString = JsonFormat.serializeAsString(json);
} catch (Throwable t2) {
jsonAsString = "(error serializing json)";
}
handleRecordImportError(t, jsonAsString, sourceLine);
}
}
}
}