package crate.elasticsearch.import_;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchParseException;
import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexRequest.OpType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.mapper.internal.IdFieldMapper;
import org.elasticsearch.index.mapper.internal.IndexFieldMapper;
import org.elasticsearch.index.mapper.internal.RoutingFieldMapper;
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
import org.elasticsearch.index.mapper.internal.TTLFieldMapper;
import org.elasticsearch.index.mapper.internal.TimestampFieldMapper;
import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
import org.elasticsearch.indices.IndexAlreadyExistsException;
import org.elasticsearch.indices.IndexMissingException;
import crate.elasticsearch.action.import_.ImportContext;
import crate.elasticsearch.action.import_.NodeImportRequest;
public class Importer {
private Client client;
private final Injector injector;
private final ByteSizeValue bulkByteSize = new ByteSizeValue(5, ByteSizeUnit.MB);
private final TimeValue flushInterval = TimeValue.timeValueSeconds(5);
private final int concurrentRequests = 4;
@Inject
public Importer(Injector injector) {
this.injector = injector;
}
public Result execute(ImportContext context, NodeImportRequest request) {
if (this.client == null) {
// Inject here to avoid injection loop in constructor
this.client = injector.getInstance(Client.class);
}
String index = request.index();
String type = request.type();
int bulkSize = request.bulkSize();
Result result = new Result();
Date start = new Date();
File dir = new File(context.directory());
if (dir.isDirectory()) {
File[] files;
if (context.file_pattern() == null) {
files = dir.listFiles();
} else {
final Pattern file_pattern = context.file_pattern();
files = dir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
Matcher m = file_pattern.matcher(name);
if (m.find()) {
return true;
}
return false;
}
});
}
// import settings according to the given data file pattern
try {
if (context.settings()) {
Set<String> createdSettings = new HashSet<String>();
for (File file : files) {
String fileName = file.getName();
if (!fileName.endsWith(".mapping") && !fileName.endsWith(".settings") && file.isFile() && file.canRead()) {
loadSettings(file, createdSettings, index);
}
}
}
// import mappings according to the given data file pattern
if (context.mappings()) {
Map<String, Set<String>> createdMappings = new HashMap<String, Set<String>>();
for (File file : files) {
String fileName = file.getName();
if (!fileName.endsWith(".mapping") && !fileName.endsWith(".settings") && file.isFile() && file.canRead()) {
loadMappings(file, createdMappings, index, type);
}
}
}
} catch (Exception e) {
throw new ElasticSearchException("::" ,e);
}
// import data according to the given data file pattern
for (File file : files) {
String fileName = file.getName();
if (!fileName.endsWith(".mapping") && !fileName.endsWith(".settings")) {
ImportCounts counts = handleFile(file, index, type, bulkSize, context.compression());
if (counts != null) {
result.importCounts.add(counts);
}
}
}
}
result.took = new Date().getTime() - start.getTime();
return result;
}
private ImportCounts handleFile(File file, String index, String type, int bulkSize, boolean compression) {
if (file.isFile() && file.canRead()) {
ImportBulkListener bulkListener = new ImportBulkListener(file.getAbsolutePath());
BulkProcessor bulkProcessor = BulkProcessor.builder(client, bulkListener)
.setBulkActions(bulkSize)
.setBulkSize(bulkByteSize)
.setFlushInterval(flushInterval)
.setConcurrentRequests(concurrentRequests)
.build();
try {
BufferedReader r;
if (compression) {
GZIPInputStream is = new GZIPInputStream(new FileInputStream(file));
r = new BufferedReader(new InputStreamReader(is));
} else {
r = new BufferedReader(new FileReader(file));
}
String line;
while ((line = r.readLine()) != null) {
IndexRequest indexRequest;
try {
indexRequest = parseObject(line);
} catch (ObjectImportException e) {
bulkListener.addFailure();
continue;
}
if (indexRequest != null) {
indexRequest.opType(OpType.INDEX);
if (index != null) {
indexRequest.index(index);
}
if (type != null) {
indexRequest.type(type);
}
if (indexRequest.type() != null && indexRequest.index() != null) {
bulkProcessor.add(indexRequest);
} else {
bulkListener.addFailure();
}
} else {
bulkListener.addInvalid();
}
}
} catch (FileNotFoundException e) {
// Ignore not existing files, actually they should exist, as they are filtered before.
} catch (IOException e) {
} finally {
bulkProcessor.close();
}
try {
bulkListener.get();
} catch (InterruptedException e1) {
} catch (ExecutionException e1) {
}
return bulkListener.importCounts();
}
return null;
}
private IndexRequest parseObject(String line) throws ObjectImportException {
XContentParser parser = null;
try {
IndexRequest indexRequest = new IndexRequest();
parser = XContentFactory.xContent(line.getBytes()).createParser(line.getBytes());
Token token;
XContentBuilder sourceBuilder = XContentFactory.contentBuilder(XContentType.JSON);
long ttl = 0;
while ((token = parser.nextToken()) != Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
String fieldName = parser.currentName();
token = parser.nextToken();
if (fieldName.equals(IdFieldMapper.NAME) && token == Token.VALUE_STRING) {
indexRequest.id(parser.text());
} else if (fieldName.equals(IndexFieldMapper.NAME) && token == Token.VALUE_STRING) {
indexRequest.index(parser.text());
} else if (fieldName.equals(TypeFieldMapper.NAME) && token == Token.VALUE_STRING) {
indexRequest.type(parser.text());
} else if (fieldName.equals(RoutingFieldMapper.NAME) && token == Token.VALUE_STRING) {
indexRequest.routing(parser.text());
} else if (fieldName.equals(TimestampFieldMapper.NAME) && token == Token.VALUE_NUMBER) {
indexRequest.timestamp(String.valueOf(parser.longValue()));
} else if (fieldName.equals(TTLFieldMapper.NAME) && token == Token.VALUE_NUMBER) {
ttl = parser.longValue();
} else if (fieldName.equals("_version") && token == Token.VALUE_NUMBER) {
indexRequest.version(parser.longValue());
indexRequest.versionType(VersionType.EXTERNAL);
} else if (fieldName.equals(SourceFieldMapper.NAME) && token == Token.START_OBJECT) {
sourceBuilder.copyCurrentStructure(parser);
}
} else if (token == null) {
break;
}
}
if (ttl > 0) {
String ts = indexRequest.timestamp();
long start;
if (ts != null) {
start = Long.valueOf(ts);
} else {
start = new Date().getTime();
}
ttl = ttl - start;
if (ttl > 0) {
indexRequest.ttl(ttl);
} else {
// object is invalid, do not import
return null;
}
}
indexRequest.source(sourceBuilder);
return indexRequest;
} catch (ElasticSearchParseException e) {
throw new ObjectImportException(e);
} catch (IOException e) {
throw new ObjectImportException(e);
}
}
private void loadSettings(File file, Set<String> createdSettings, String restrictedIndex) {
File settingsFile = new File(file.getAbsolutePath() + ".settings");
if (settingsFile.exists() && settingsFile.isFile() && settingsFile.canRead()) {
Map<String, Object> map;
try {
map = getMapFromJSONFile(settingsFile);
} catch (Exception e) {
throw new SettingsImportException("Error while reading settings file " + settingsFile.getAbsolutePath(), e);
}
if (map != null) {
Set<String> keys = map.keySet();
keys.removeAll(createdSettings);
if (keys.size() > 0) {
keys = getMissingIndexes(keys);
}
for (String key : keys) {
if (restrictedIndex == null || restrictedIndex.equals(key)) {
try {
Object indexMap = map.get(key);
if (indexMap instanceof Map) {
Builder builder = ImmutableSettings.settingsBuilder();
Object settingsMap = ((Map<String, Object>) indexMap).get("settings");
if (settingsMap != null && settingsMap instanceof Map) {
XContentBuilder settingsBuilder = XContentFactory.contentBuilder(XContentType.JSON);
builder.loadFromSource(settingsBuilder.map((Map<String, Object>) settingsMap).string());
}
Settings settings = builder.build();
CreateIndexRequest cir = new CreateIndexRequest(key, settings);
try {
client.admin().indices().create(cir).actionGet();
} catch (IndexAlreadyExistsException e1) {
// ignore, maybe a concurrent shard created the index simultaneously
}
}
} catch (IOException e) {
throw new SettingsImportException("Error while creating index " + key + " from settings file " + settingsFile.getAbsolutePath(), e);
}
}
}
}
} else {
throw new SettingsImportException("Settings file " + settingsFile.getAbsolutePath() + " could not be found.");
}
}
private void loadMappings(File file, Map<String, Set<String>> createdMappings, String restrictedIndex, String restrictedType) {
File mappingFile = new File(file.getAbsolutePath() + ".mapping");
if (mappingFile.exists() && mappingFile.isFile() && mappingFile.canRead()) {
Map<String, Object> map;
try {
map = getMapFromJSONFile(mappingFile);
} catch (Exception e) {
throw new MappingImportException("Error while reading mapping file " + mappingFile.getAbsolutePath(), e);
}
if (map != null) {
for (String index : map.keySet()) {
if (restrictedIndex == null || restrictedIndex.equals(index)) {
Object o = map.get(index);
if (o instanceof Map) {
Map<String, Object> typesMap = (Map<String, Object>) o;
Set<String> created = createdMappings.get(index);
if (created == null) {
created = new HashSet<String>();
createdMappings.put(index, created);
}
for (String type : typesMap.keySet()) {
if ((restrictedType == null || restrictedType.equals(type)) && !created.contains(type)) {
Object m = typesMap.get(type);
if (m instanceof Map) {
Map<String, Object> mapping = new HashMap<String, Object>();
mapping.put(type, m);
PutMappingRequest mappingRequest = new PutMappingRequest(index);
mappingRequest.type(type);
mappingRequest.source(mapping);
try {
if (client.admin().indices().putMapping(mappingRequest).actionGet().isAcknowledged()) {
created.add(type);
}
} catch (IndexMissingException e) {
throw new MappingImportException("Unable to create mapping. Index " + index + " missing.", e);
}
}
}
}
}
}
}
}
} else {
throw new MappingImportException("Mapping file " + mappingFile.getAbsolutePath() + " could not be found.");
}
}
private Set<String> getMissingIndexes(Set<String> indexes) {
try {
ImmutableMap<String, IndexMetaData> foundIndices = getIndexMetaData(indexes);
indexes.removeAll(foundIndices.keySet());
} catch (IndexMissingException e) {
// all indexes are missing
}
return indexes;
}
private ImmutableMap<String, IndexMetaData> getIndexMetaData(Set<String> indexes) {
ClusterStateRequest clusterStateRequest = Requests.clusterStateRequest()
.filterRoutingTable(true)
.filterNodes(true)
.filteredIndices(indexes.toArray(new String[indexes.size()]));
clusterStateRequest.listenerThreaded(false);
ClusterStateResponse response = client.admin().cluster().state(clusterStateRequest).actionGet();
return ImmutableMap.copyOf(response.getState().metaData().indices());
}
private Map<String, Object> getMapFromJSONFile(File file) throws IOException {
BufferedReader reader = new BufferedReader(new FileReader(file));
StringBuilder sb = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
sb.append(line);
}
byte[] bytes = sb.toString().getBytes();
XContentParser parser = XContentFactory.xContent(bytes).createParser(bytes);
Map<String, Object> map = parser.map();
return map;
}
class MappingImportException extends ElasticSearchException {
private static final long serialVersionUID = 683146198427799700L;
public MappingImportException(String msg) {
super(msg);
}
public MappingImportException(String msg, Throwable cause) {
super(msg, cause);
}
}
class SettingsImportException extends ElasticSearchException {
private static final long serialVersionUID = -3697101419212831353L;
public SettingsImportException(String msg) {
super(msg);
}
public SettingsImportException(String msg, Throwable cause) {
super(msg, cause);
}
}
class ObjectImportException extends ElasticSearchException {
private static final long serialVersionUID = 2405764408378929056L;
public ObjectImportException(Throwable cause) {
super("Object could not be imported.", cause);
}
}
public static class Result {
public List<ImportCounts> importCounts = new ArrayList<Importer.ImportCounts>();
public long took;
}
public static class ImportCounts {
public String fileName;
public int successes = 0;
public int failures = 0;
public int invalid = 0;
}
}