Package de.jetwick.es

Source Code of de.jetwick.es.AbstractElasticSearch

/*
* Copyright 2011 Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.es;

import de.jetwick.data.DbObject;
import de.jetwick.util.AnyExecutor;
import de.jetwick.util.Helper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.Explanation;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoRequest;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
import org.elasticsearch.action.admin.indices.optimize.OptimizeRequest;
import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshResponse;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Base class of all data access objects for ElasticSearch
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public abstract class AbstractElasticSearch<T extends DbObject> implements CreateObjectsInterface<T> {

    public static final String BOOST = "_boost";
    private Logger logger = LoggerFactory.getLogger(getClass());
    protected boolean testing = false;
    protected Client client;
   
    AbstractElasticSearch() {
    }

    public AbstractElasticSearch(Client client) {
        this.client = client;
    }

    public AbstractElasticSearch(String url) {
        client = createClient(ElasticNode.CLUSTER, url, ElasticNode.PORT);
    }

    public static Client createClient(String cluster, String url, int port) {
        Settings s = ImmutableSettings.settingsBuilder().put("cluster.name", cluster).build();
        TransportClient tmp = new TransportClient(s);
        tmp.addTransportAddress(new InetSocketTransportAddress(url, port));
        return tmp;
    }

    public void setTesting(boolean testing) {
        this.testing = testing;
    }

    public boolean hasVersionSupport() {
        return false;
    }

    public abstract String getIndexName();

    public abstract void setIndexName(String indexName);

    public abstract String getIndexType();

    public boolean indexExists(String indexName) {
        // make sure node is up to create the index otherwise we get: blocked by: [1/not recovered from gateway];
        // waitForYellow();
        Map map = client.admin().cluster().prepareState().execute().actionGet().
                getState().getMetaData().getIndices();
        return map.containsKey(indexName);
    }

    public void createIndex(String indexName) {
        client.admin().indices().create(new CreateIndexRequest(indexName)).actionGet();
    }

    public void saveCreateIndex() {
        saveCreateIndex(getIndexName(), true);
    }

    public void saveCreateIndex(String name, boolean log) {
        try {
            createIndex(name);
            if (log)
                logger.info("Created index: " + name);
        } catch (Exception ex) {
            if (log)
                logger.info("Index " + getIndexName() + " already exists");
        }
    }

    void waitForYellow() {
        waitForYellow(getIndexName());
    }

    void waitForYellow(String name) {
        client.admin().cluster().health(new ClusterHealthRequest(name).waitForYellowStatus()).actionGet();
    }

    void waitForGreen(String name) {
        client.admin().cluster().health(new ClusterHealthRequest(name).waitForGreenStatus()).actionGet();
    }

    public void executeForAll(AnyExecutor<T> any, int pageSize) {
        long keepTimeInMinutes = 60;
        scanThis(any, QueryBuilders.matchAllQuery(), keepTimeInMinutes, pageSize);
    }

    public void scanThis(AnyExecutor<T> any, QueryBuilder query,
            long keepTimeInMinutes, int pageSize) {
        SearchRequestBuilder srb = client.prepareSearch(getIndexName()).
                setQuery(query).setSize(pageSize).
                setSearchType(SearchType.SCAN).
                setScroll(TimeValue.timeValueMinutes(keepTimeInMinutes));
        SearchResponse rsp = srb.execute().actionGet();

        try {
            int counter = 0;
            while (true) {
                rsp = client.prepareSearchScroll(rsp.scrollId()).
                        setScroll(TimeValue.timeValueMinutes(keepTimeInMinutes)).execute().actionGet();
                long currentResults = rsp.hits().hits().length;
                logger.info("(" + counter++ + ") scanquery with " + pageSize + " page size and " + currentResults + " hits");
                if (currentResults == 0)
                    break;

                for (T t : collectObjects(rsp)) {
                    any.execute(t);
                }
            }
        } catch (Exception ex) {
            logger.error("Cannot run scanThis", ex);
        }
    }

    public void refresh() {
        refresh(getIndexName());
    }

    public void refresh(Collection<String> indices) {
        refresh(Helper.toStringArray(indices));
    }

    public void refresh(String... indices) {
        RefreshResponse rsp = client.admin().indices().refresh(new RefreshRequest(indices)).actionGet();
        //assertEquals(1, rsp.getFailedShards());
    }

    public long countAll() {
        return countAll(getIndexName());
    }

    public long countAll(String... indices) {
        CountResponse response = client.prepareCount(indices).
                setQuery(QueryBuilders.matchAllQuery()).
                execute().actionGet();
        return response.getCount();
    }

    public void deleteById(String id) {
        DeleteResponse response = client.prepareDelete(getIndexName(), getIndexType(), id).
                execute().
                actionGet();
    }

    public void deleteAll() {       
        deleteAll(getIndexName(), getIndexType());
    }

    public void deleteAll(String indexName, String indexType) {
        //client.prepareIndex().setOpType(OpType.)
        //there is an index delete operation
        // http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/delete_index/

        client.prepareDeleteByQuery(indexName).
                setQuery(QueryBuilders.matchAllQuery()).
                setTypes(indexType).
                execute().actionGet();
        refresh(indexName);
    }

    public OptimizeResponse optimize() {
        return optimize(getIndexName(), 1);
    }

    public OptimizeResponse optimize(String indexName, int optimizeToSegmentsAfterUpdate) {
        return client.admin().indices().optimize(new OptimizeRequest(indexName).maxNumSegments(optimizeToSegmentsAfterUpdate)).actionGet();
    }

    public void deleteIndex(String indexName) {
        client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
    }

    public void addIndexAlias(String indexName, String alias) {
//        new AliasAction(AliasAction.Type.ADD, index, alias)
        client.admin().indices().aliases(new IndicesAliasesRequest().addAlias(indexName, alias)).actionGet();
    }

    public void nodeInfo() {
        NodesInfoResponse rsp = client.admin().cluster().nodesInfo(new NodesInfoRequest()).actionGet();
        String str = "Cluster:" + rsp.getClusterName() + ". Active nodes:";
        str += rsp.getNodesMap().keySet();
        logger.info(str);
    }
   
    public SearchResponse query(QueryBuilder queryBuilder) {
        SearchRequestBuilder srb = createSearchBuilder();
        srb.setQuery(queryBuilder);
        return srb.execute().actionGet();
    }

    protected SearchRequestBuilder createSearchBuilder() {
        return client.prepareSearch(getIndexName()).setTypes(getIndexType()).setVersion(hasVersionSupport());
    }

    public SearchResponse query(JetwickQuery query) {
        return query(query, false, false);
    }

    public SearchResponse query(JetwickQuery query, boolean log, boolean explain) {
        SearchRequestBuilder srb = createSearchBuilder();
        srb.setExplain(query.isExplain());
        query.initRequestBuilder(srb);
        if (log)
            try {
                logger.info(srb.internalBuilder().toXContent(JsonXContent.contentBuilder(), null).string());
            } catch (Exception ex) {
            }
        return srb.execute().actionGet();
    }

    public List<T> search(JetwickQuery q) {
        return collectObjects(query(q));
    }
   
    @Override
    public List<T> collectObjects(SearchResponse rsp) {
        SearchHits docs = rsp.getHits();
        List<T> list = new ArrayList<T>(docs.hits().length);
        for (SearchHit sd : docs) {
            if (sd.getExplanation() != null) {
                String res = "";
                for (Explanation str : sd.getExplanation().getDetails()) {
                    res += str.toString();
                }
                logger.info(sd.getId() + " " + res);
            }
            T o = readDoc(sd.getId(), sd.getVersion(), sd.getSource());
            if (o != null)
                list.add(o);
        }

        return list;
    }

    public abstract T readDoc(String idAsStr, long version, Map<String, Object> source);

    public abstract XContentBuilder createDoc(T tw) throws IOException;

    /**
     * All indices has to be created before!
     */
    public void mergeIndices(Collection<String> indexList, String intoIndex,
            int hitsPerPage, boolean forceRefresh, CreateObjectsInterface<T> createObj,
            FilterBuilder additionalFilter) {
        if (forceRefresh) {
            refresh(indexList);
            refresh(intoIndex);
        }

        int keepTime = 100;
        for (String fromIndex : indexList) {
            SearchRequestBuilder srb = client.prepareSearch(fromIndex).
                    setVersion(true).
                    setQuery(QueryBuilders.matchAllQuery()).setSize(hitsPerPage).
                    setSearchType(SearchType.SCAN).
                    setScroll(TimeValue.timeValueMinutes(keepTime));
            if (additionalFilter != null)
                srb.setFilter(additionalFilter);
            SearchResponse rsp = srb.execute().actionGet();

            try {
                long total = rsp.hits().totalHits();
                int collectedResults = 0;
                while (true) {
                    StopWatch queryWatch = new StopWatch().start();
                    rsp = client.prepareSearchScroll(rsp.scrollId()).
                            setScroll(TimeValue.timeValueMinutes(keepTime)).execute().actionGet();
                    long currentResults = rsp.hits().hits().length;
                    if (currentResults == 0)
                        break;

                    queryWatch.stop();
                    Collection<T> objs = createObj.collectObjects(rsp);
                    StopWatch updateWatch = new StopWatch().start();
                    int failed = bulkUpdate(objs, intoIndex, false, false).size();
                    // trying to enable flushing to avoid memory issues on the server side?
                    flush(intoIndex);
                    updateWatch.stop();
                    collectedResults += currentResults;
                    logger.info("Progress " + collectedResults + "/" + total + " fromIndex="
                            + fromIndex + " update:" + updateWatch.totalTime().getSeconds() + " query:" + queryWatch.totalTime().getSeconds() + " failed:" + failed);
                }
                logger.info("Finished copying of index:" + fromIndex + ". Total:" + total + " collected:" + collectedResults);
            } catch (Exception ex) {
//                throw new RuntimeException(ex);
                logger.error("Failed to copy data from index " + fromIndex + " into " + intoIndex + ".", ex);
            }
        }

        if (forceRefresh)
            refresh(intoIndex);
    }

    /**
     * Stores the specified object into the index
     *
     * @return a string != null if indexing failed
     */
    public Integer store(T obj, boolean refresh) {
        try {
            // normal indexing operation throws VersionConflictEngineException:
//          IndexRequestBuilder irb = client.prepareIndex(getIndexName(), getIndexType(), id).
//                setConsistencyLevel(WriteConsistencyLevel.DEFAULT).
//                setSource(b);
//          irb.execute().actionGet();

            // but we want only one method to handle this failure
            Collection<Integer> ret = bulkUpdate(Collections.singleton(obj), getIndexName(), refresh);
            if (ret.size() > 0)
                return ret.iterator().next();
            return null;
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * Updates the specified objects    
     * @return the id's of the failed objects (e.g. due to versioning)
     */
    public Collection<Integer> bulkUpdate(Collection<T> objects, String indexName) {
        return bulkUpdate(objects, indexName, false);

    }

    public Collection<Integer> bulkUpdate(Collection<T> objects, String indexName, boolean refresh) {
        return bulkUpdate(objects, indexName, refresh, hasVersionSupport());
    }

    /**
     * Updates the specified objects    
     * @return the id's of the failed objects (e.g. due to versioning)
     */
    public Collection<Integer> bulkUpdate(Collection<T> objects, String indexName, boolean refresh, boolean enableVersioning) {
        // now using bulk API instead of feeding each doc separate with feedDoc
        BulkRequestBuilder brb = client.prepareBulk();
        // this works differently then the direct call to refresh!? maybe refresh is not async?
//        brb.setRefresh(refresh);
        for (T o : objects) {
            if (o.getId() == null) {
                logger.warn("Skipped object without id when bulkUpdate:" + o);
                continue;
            }

            try {
                XContentBuilder source = createDoc(o);
                IndexRequest indexReq = Requests.indexRequest(indexName).type(getIndexType()).id(o.getId()).source(source);
               
                if (enableVersioning)
                    indexReq.version(o.getVersion());

                brb.add(indexReq);
            } catch (IOException ex) {
                logger.warn("Cannot add object:" + o + " to bulkIndexing action." + ex.getMessage());
            }
        }
        if (brb.numberOfActions() > 0) {
            BulkResponse rsp = brb.execute().actionGet();
            if (rsp.hasFailures()) {
                List<Integer> list = new ArrayList<Integer>(rsp.items().length);
                for (BulkItemResponse br : rsp.items()) {                   
                    if(br.isFailed()) {
//                        logger.info("Error:" + br.failureMessage());
                        list.add(br.itemId());
                    }
                }
                return list;
            }
            if (refresh)
                refresh(indexName);
        }

        return Collections.emptyList();
    }

    public void flush(String... indices) {
        client.admin().indices().flush(new FlushRequest(indices)).actionGet();
    }

    public void waitUntilAvailable(long wait) throws InterruptedException {
        logger.info("now waiting until node is ok");
        while (true) {
            try {
                nodeInfo();
                logger.info("Node is available now starting");
                break;
            } catch (Exception ex) {
                Thread.sleep(wait);
            }
        }
    }
}
TOP

Related Classes of de.jetwick.es.AbstractElasticSearch

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.