/*
* Copyright 2011 Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.es;
import de.jetwick.data.DbObject;
import de.jetwick.util.AnyExecutor;
import de.jetwick.util.Helper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.Explanation;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoRequest;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
import org.elasticsearch.action.admin.indices.optimize.OptimizeRequest;
import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshResponse;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Base class of all data access objects for ElasticSearch
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public abstract class AbstractElasticSearch<T extends DbObject> implements CreateObjectsInterface<T> {
public static final String BOOST = "_boost";
private Logger logger = LoggerFactory.getLogger(getClass());
protected boolean testing = false;
protected Client client;
AbstractElasticSearch() {
}
public AbstractElasticSearch(Client client) {
this.client = client;
}
public AbstractElasticSearch(String url) {
client = createClient(ElasticNode.CLUSTER, url, ElasticNode.PORT);
}
public static Client createClient(String cluster, String url, int port) {
Settings s = ImmutableSettings.settingsBuilder().put("cluster.name", cluster).build();
TransportClient tmp = new TransportClient(s);
tmp.addTransportAddress(new InetSocketTransportAddress(url, port));
return tmp;
}
public void setTesting(boolean testing) {
this.testing = testing;
}
public boolean hasVersionSupport() {
return false;
}
public abstract String getIndexName();
public abstract void setIndexName(String indexName);
public abstract String getIndexType();
public boolean indexExists(String indexName) {
// make sure node is up to create the index otherwise we get: blocked by: [1/not recovered from gateway];
// waitForYellow();
Map map = client.admin().cluster().prepareState().execute().actionGet().
getState().getMetaData().getIndices();
return map.containsKey(indexName);
}
public void createIndex(String indexName) {
client.admin().indices().create(new CreateIndexRequest(indexName)).actionGet();
}
public void saveCreateIndex() {
saveCreateIndex(getIndexName(), true);
}
public void saveCreateIndex(String name, boolean log) {
try {
createIndex(name);
if (log)
logger.info("Created index: " + name);
} catch (Exception ex) {
if (log)
logger.info("Index " + getIndexName() + " already exists");
}
}
void waitForYellow() {
waitForYellow(getIndexName());
}
void waitForYellow(String name) {
client.admin().cluster().health(new ClusterHealthRequest(name).waitForYellowStatus()).actionGet();
}
void waitForGreen(String name) {
client.admin().cluster().health(new ClusterHealthRequest(name).waitForGreenStatus()).actionGet();
}
public void executeForAll(AnyExecutor<T> any, int pageSize) {
long keepTimeInMinutes = 60;
scanThis(any, QueryBuilders.matchAllQuery(), keepTimeInMinutes, pageSize);
}
public void scanThis(AnyExecutor<T> any, QueryBuilder query,
long keepTimeInMinutes, int pageSize) {
SearchRequestBuilder srb = client.prepareSearch(getIndexName()).
setQuery(query).setSize(pageSize).
setSearchType(SearchType.SCAN).
setScroll(TimeValue.timeValueMinutes(keepTimeInMinutes));
SearchResponse rsp = srb.execute().actionGet();
try {
int counter = 0;
while (true) {
rsp = client.prepareSearchScroll(rsp.scrollId()).
setScroll(TimeValue.timeValueMinutes(keepTimeInMinutes)).execute().actionGet();
long currentResults = rsp.hits().hits().length;
logger.info("(" + counter++ + ") scanquery with " + pageSize + " page size and " + currentResults + " hits");
if (currentResults == 0)
break;
for (T t : collectObjects(rsp)) {
any.execute(t);
}
}
} catch (Exception ex) {
logger.error("Cannot run scanThis", ex);
}
}
public void refresh() {
refresh(getIndexName());
}
public void refresh(Collection<String> indices) {
refresh(Helper.toStringArray(indices));
}
public void refresh(String... indices) {
RefreshResponse rsp = client.admin().indices().refresh(new RefreshRequest(indices)).actionGet();
//assertEquals(1, rsp.getFailedShards());
}
public long countAll() {
return countAll(getIndexName());
}
public long countAll(String... indices) {
CountResponse response = client.prepareCount(indices).
setQuery(QueryBuilders.matchAllQuery()).
execute().actionGet();
return response.getCount();
}
public void deleteById(String id) {
DeleteResponse response = client.prepareDelete(getIndexName(), getIndexType(), id).
execute().
actionGet();
}
public void deleteAll() {
deleteAll(getIndexName(), getIndexType());
}
public void deleteAll(String indexName, String indexType) {
//client.prepareIndex().setOpType(OpType.)
//there is an index delete operation
// http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/delete_index/
client.prepareDeleteByQuery(indexName).
setQuery(QueryBuilders.matchAllQuery()).
setTypes(indexType).
execute().actionGet();
refresh(indexName);
}
public OptimizeResponse optimize() {
return optimize(getIndexName(), 1);
}
public OptimizeResponse optimize(String indexName, int optimizeToSegmentsAfterUpdate) {
return client.admin().indices().optimize(new OptimizeRequest(indexName).maxNumSegments(optimizeToSegmentsAfterUpdate)).actionGet();
}
public void deleteIndex(String indexName) {
client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
}
public void addIndexAlias(String indexName, String alias) {
// new AliasAction(AliasAction.Type.ADD, index, alias)
client.admin().indices().aliases(new IndicesAliasesRequest().addAlias(indexName, alias)).actionGet();
}
public void nodeInfo() {
NodesInfoResponse rsp = client.admin().cluster().nodesInfo(new NodesInfoRequest()).actionGet();
String str = "Cluster:" + rsp.getClusterName() + ". Active nodes:";
str += rsp.getNodesMap().keySet();
logger.info(str);
}
public SearchResponse query(QueryBuilder queryBuilder) {
SearchRequestBuilder srb = createSearchBuilder();
srb.setQuery(queryBuilder);
return srb.execute().actionGet();
}
protected SearchRequestBuilder createSearchBuilder() {
return client.prepareSearch(getIndexName()).setTypes(getIndexType()).setVersion(hasVersionSupport());
}
public SearchResponse query(JetwickQuery query) {
return query(query, false, false);
}
public SearchResponse query(JetwickQuery query, boolean log, boolean explain) {
SearchRequestBuilder srb = createSearchBuilder();
srb.setExplain(query.isExplain());
query.initRequestBuilder(srb);
if (log)
try {
logger.info(srb.internalBuilder().toXContent(JsonXContent.contentBuilder(), null).string());
} catch (Exception ex) {
}
return srb.execute().actionGet();
}
public List<T> search(JetwickQuery q) {
return collectObjects(query(q));
}
@Override
public List<T> collectObjects(SearchResponse rsp) {
SearchHits docs = rsp.getHits();
List<T> list = new ArrayList<T>(docs.hits().length);
for (SearchHit sd : docs) {
if (sd.getExplanation() != null) {
String res = "";
for (Explanation str : sd.getExplanation().getDetails()) {
res += str.toString();
}
logger.info(sd.getId() + " " + res);
}
T o = readDoc(sd.getId(), sd.getVersion(), sd.getSource());
if (o != null)
list.add(o);
}
return list;
}
public abstract T readDoc(String idAsStr, long version, Map<String, Object> source);
public abstract XContentBuilder createDoc(T tw) throws IOException;
/**
* All indices has to be created before!
*/
public void mergeIndices(Collection<String> indexList, String intoIndex,
int hitsPerPage, boolean forceRefresh, CreateObjectsInterface<T> createObj,
FilterBuilder additionalFilter) {
if (forceRefresh) {
refresh(indexList);
refresh(intoIndex);
}
int keepTime = 100;
for (String fromIndex : indexList) {
SearchRequestBuilder srb = client.prepareSearch(fromIndex).
setVersion(true).
setQuery(QueryBuilders.matchAllQuery()).setSize(hitsPerPage).
setSearchType(SearchType.SCAN).
setScroll(TimeValue.timeValueMinutes(keepTime));
if (additionalFilter != null)
srb.setFilter(additionalFilter);
SearchResponse rsp = srb.execute().actionGet();
try {
long total = rsp.hits().totalHits();
int collectedResults = 0;
while (true) {
StopWatch queryWatch = new StopWatch().start();
rsp = client.prepareSearchScroll(rsp.scrollId()).
setScroll(TimeValue.timeValueMinutes(keepTime)).execute().actionGet();
long currentResults = rsp.hits().hits().length;
if (currentResults == 0)
break;
queryWatch.stop();
Collection<T> objs = createObj.collectObjects(rsp);
StopWatch updateWatch = new StopWatch().start();
int failed = bulkUpdate(objs, intoIndex, false, false).size();
// trying to enable flushing to avoid memory issues on the server side?
flush(intoIndex);
updateWatch.stop();
collectedResults += currentResults;
logger.info("Progress " + collectedResults + "/" + total + " fromIndex="
+ fromIndex + " update:" + updateWatch.totalTime().getSeconds() + " query:" + queryWatch.totalTime().getSeconds() + " failed:" + failed);
}
logger.info("Finished copying of index:" + fromIndex + ". Total:" + total + " collected:" + collectedResults);
} catch (Exception ex) {
// throw new RuntimeException(ex);
logger.error("Failed to copy data from index " + fromIndex + " into " + intoIndex + ".", ex);
}
}
if (forceRefresh)
refresh(intoIndex);
}
/**
* Stores the specified object into the index
*
* @return a string != null if indexing failed
*/
public Integer store(T obj, boolean refresh) {
try {
// normal indexing operation throws VersionConflictEngineException:
// IndexRequestBuilder irb = client.prepareIndex(getIndexName(), getIndexType(), id).
// setConsistencyLevel(WriteConsistencyLevel.DEFAULT).
// setSource(b);
// irb.execute().actionGet();
// but we want only one method to handle this failure
Collection<Integer> ret = bulkUpdate(Collections.singleton(obj), getIndexName(), refresh);
if (ret.size() > 0)
return ret.iterator().next();
return null;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Updates the specified objects
* @return the id's of the failed objects (e.g. due to versioning)
*/
public Collection<Integer> bulkUpdate(Collection<T> objects, String indexName) {
return bulkUpdate(objects, indexName, false);
}
public Collection<Integer> bulkUpdate(Collection<T> objects, String indexName, boolean refresh) {
return bulkUpdate(objects, indexName, refresh, hasVersionSupport());
}
/**
* Updates the specified objects
* @return the id's of the failed objects (e.g. due to versioning)
*/
public Collection<Integer> bulkUpdate(Collection<T> objects, String indexName, boolean refresh, boolean enableVersioning) {
// now using bulk API instead of feeding each doc separate with feedDoc
BulkRequestBuilder brb = client.prepareBulk();
// this works differently then the direct call to refresh!? maybe refresh is not async?
// brb.setRefresh(refresh);
for (T o : objects) {
if (o.getId() == null) {
logger.warn("Skipped object without id when bulkUpdate:" + o);
continue;
}
try {
XContentBuilder source = createDoc(o);
IndexRequest indexReq = Requests.indexRequest(indexName).type(getIndexType()).id(o.getId()).source(source);
if (enableVersioning)
indexReq.version(o.getVersion());
brb.add(indexReq);
} catch (IOException ex) {
logger.warn("Cannot add object:" + o + " to bulkIndexing action." + ex.getMessage());
}
}
if (brb.numberOfActions() > 0) {
BulkResponse rsp = brb.execute().actionGet();
if (rsp.hasFailures()) {
List<Integer> list = new ArrayList<Integer>(rsp.items().length);
for (BulkItemResponse br : rsp.items()) {
if(br.isFailed()) {
// logger.info("Error:" + br.failureMessage());
list.add(br.itemId());
}
}
return list;
}
if (refresh)
refresh(indexName);
}
return Collections.emptyList();
}
public void flush(String... indices) {
client.admin().indices().flush(new FlushRequest(indices)).actionGet();
}
public void waitUntilAvailable(long wait) throws InterruptedException {
logger.info("now waiting until node is ok");
while (true) {
try {
nodeInfo();
logger.info("Node is available now starting");
break;
} catch (Exception ex) {
Thread.sleep(wait);
}
}
}
}