/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.benchmark.search.aggregations;
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile;
import com.google.common.collect.Maps;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.SizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.node.Node;
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles;
import java.util.*;
import java.util.concurrent.TimeUnit;
import static org.elasticsearch.client.Requests.createIndexRequest;
import static org.elasticsearch.client.Requests.getRequest;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
import static org.elasticsearch.search.aggregations.AggregationBuilders.percentiles;
public class PercentilesAggregationSearchBenchmark {
private static final int AMPLITUDE = 10000;
private static final int NUM_DOCS = (int) SizeValue.parseSizeValue("1m").singles();
private static final int BATCH = 100;
private static final String CLUSTER_NAME = PercentilesAggregationSearchBenchmark.class.getSimpleName();
private static final double[] PERCENTILES = new double[] { 0, 0.01, 0.1, 1, 10, 25, 50, 75, 90, 99, 99.9, 99.99, 100};
private static final int QUERY_WARMUP = 10;
private static final int QUERY_COUNT = 20;
private static Random R = new Random(0);
// we generate ints to not disadvantage qdigest which only works with integers
private enum Distribution {
UNIFORM {
@Override
int next() {
return (int) (R.nextDouble() * AMPLITUDE);
}
},
GAUSS {
@Override
int next() {
return (int) (R.nextDouble() * AMPLITUDE);
}
},
LOG_NORMAL {
@Override
int next() {
return (int) Math.exp(R.nextDouble() * Math.log(AMPLITUDE));
}
};
String indexName() {
return name().toLowerCase(Locale.ROOT);
}
abstract int next();
}
private static double accuratePercentile(double percentile, int[] sortedValues) {
final double index = percentile / 100 * (sortedValues.length - 1);
final int intIndex = (int) index;
final double delta = index - intIndex;
if (delta == 0) {
return sortedValues[intIndex];
} else {
return sortedValues[intIndex] * (1 - delta) + sortedValues[intIndex + 1] * delta;
}
}
public static void main(String[] args) throws Exception {
Settings settings = settingsBuilder()
.put("index.refresh_interval", "-1")
.put(SETTING_NUMBER_OF_SHARDS, 100) // to also test performance and accuracy of the reduce phase
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.build();
Node[] nodes = new Node[1];
for (int i = 0; i < nodes.length; i++) {
nodes[i] = nodeBuilder().clusterName(CLUSTER_NAME)
.settings(settingsBuilder().put(settings).put("name", "node" + i))
.node();
}
Node clientNode = nodeBuilder()
.clusterName(CLUSTER_NAME)
.settings(settingsBuilder().put(settings).put("name", "client")).client(true).node();
Client client = clientNode.client();
for (Distribution d : Distribution.values()) {
try {
// client.admin().indices().prepareDelete(d.indexName()).execute().actionGet();
client.admin().indices().create(createIndexRequest(d.indexName()).settings(settings)).actionGet();
} catch (Exception e) {
System.out.println("Index " + d.indexName() + " already exists, skipping index creation");
continue;
}
final int[] values = new int[NUM_DOCS];
for (int i = 0; i < NUM_DOCS; ++i) {
values[i] = d.next();
}
System.out.println("Indexing " + NUM_DOCS + " documents into " + d.indexName());
StopWatch stopWatch = new StopWatch().start();
for (int i = 0; i < NUM_DOCS; ) {
BulkRequestBuilder request = client.prepareBulk();
for (int j = 0; j < BATCH && i < NUM_DOCS; ++j) {
request.add(client.prepareIndex(d.indexName(), "values", Integer.toString(i)).setSource("v", values[i]));
++i;
}
BulkResponse response = request.execute().actionGet();
if (response.hasFailures()) {
System.err.println("--> failures...");
System.err.println(response.buildFailureMessage());
}
if ((i % 100000) == 0) {
System.out.println("--> Indexed " + i + " took " + stopWatch.stop().lastTaskTime());
stopWatch.start();
}
}
Arrays.sort(values);
XContentBuilder builder = JsonXContent.contentBuilder().startObject();
for (double percentile : PERCENTILES) {
builder.field(Double.toString(percentile), accuratePercentile(percentile, values));
}
client.prepareIndex(d.indexName(), "values", "percentiles").setSource(builder.endObject()).execute().actionGet();
client.admin().indices().prepareRefresh(d.indexName()).execute().actionGet();
}
ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
if (clusterHealthResponse.isTimedOut()) {
System.err.println("--> Timed out waiting for cluster health");
}
System.out.println("## Precision");
for (Distribution d : Distribution.values()) {
System.out.println("#### " + d);
final long count = client.prepareCount(d.indexName()).setQuery(matchAllQuery()).execute().actionGet().getCount();
if (count != NUM_DOCS + 1) {
throw new Error("Expected " + NUM_DOCS + " documents, got " + (count - 1));
}
Map<String, Object> percentilesUnsorted = client.get(getRequest(d.indexName()).type("values").id("percentiles")).actionGet().getSourceAsMap();
SortedMap<Double, Double> percentiles = Maps.newTreeMap();
for (Map.Entry<String, Object> entry : percentilesUnsorted.entrySet()) {
percentiles.put(Double.parseDouble(entry.getKey()), (Double) entry.getValue());
}
System.out.println("Expected percentiles: " + percentiles);
System.out.println();
SearchResponse resp = client.prepareSearch(d.indexName()).setSearchType(SearchType.COUNT).addAggregation(percentiles("pcts").field("v").percentiles(PERCENTILES)).execute().actionGet();
Percentiles pcts = resp.getAggregations().get("pcts");
Map<Double, Double> asMap = Maps.newLinkedHashMap();
double sumOfErrorSquares = 0;
for (Percentile percentile : pcts) {
asMap.put(percentile.getPercent(), percentile.getValue());
double error = percentile.getValue() - percentiles.get(percentile.getPercent());
sumOfErrorSquares += error * error;
}
System.out.println("Percentiles: " + asMap);
System.out.println("Sum of error squares: " + sumOfErrorSquares);
System.out.println();
}
System.out.println("## Performance");
for (int i = 0; i < 3; ++i) {
for (Distribution d : Distribution.values()) {
System.out.println("#### " + d);
for (int j = 0; j < QUERY_WARMUP; ++j) {
client.prepareSearch(d.indexName()).setSearchType(SearchType.COUNT).addAggregation(percentiles("pcts").field("v").percentiles(PERCENTILES)).execute().actionGet();
}
long start = System.nanoTime();
for (int j = 0; j < QUERY_COUNT; ++j) {
client.prepareSearch(d.indexName()).setSearchType(SearchType.COUNT).addAggregation(percentiles("pcts").field("v").percentiles(PERCENTILES)).execute().actionGet();
}
System.out.println(new TimeValue((System.nanoTime() - start) / QUERY_COUNT, TimeUnit.NANOSECONDS));
}
}
}
}