Skip to content

Commit 3134d6b

Browse files
authored
Add unit tests to percentile ranks aggregations. (#23240)
Relates #22278
1 parent 640ab1c commit 3134d6b

File tree

9 files changed

+403
-3
lines changed

9 files changed

+403
-3
lines changed

core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/hdr/AbstractInternalHDRPercentiles.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@
3030

3131
import java.io.IOException;
3232
import java.nio.ByteBuffer;
33+
import java.util.Arrays;
3334
import java.util.List;
3435
import java.util.Map;
36+
import java.util.Objects;
3537
import java.util.zip.DataFormatException;
3638

3739
abstract class AbstractInternalHDRPercentiles extends InternalNumericMetricsAggregation.MultiValue {
@@ -139,4 +141,20 @@ public XContentBuilder doXContentBody(XContentBuilder builder, Params params) th
139141
}
140142
return builder;
141143
}
144+
145+
@Override
146+
protected boolean doEquals(Object obj) {
147+
AbstractInternalHDRPercentiles that = (AbstractInternalHDRPercentiles) obj;
148+
return keyed == that.keyed
149+
&& Arrays.equals(keys, that.keys)
150+
&& Objects.equals(state, that.state);
151+
}
152+
153+
@Override
154+
protected int doHashCode() {
155+
// we cannot use state.hashCode at the moment because of:
156+
// https://github.com/HdrHistogram/HdrHistogram/issues/81
157+
// TODO: upgrade the HDRHistogram library
158+
return Objects.hash(keyed, Arrays.hashCode(keys), state.getIntegerToDoubleValueConversionRatio(), state.getTotalCount());
159+
}
142160
}

core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/AbstractInternalTDigestPercentiles.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@
2828
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
2929

3030
import java.io.IOException;
31+
import java.util.Arrays;
3132
import java.util.List;
3233
import java.util.Map;
34+
import java.util.Objects;
3335

3436
abstract class AbstractInternalTDigestPercentiles extends InternalNumericMetricsAggregation.MultiValue {
3537

@@ -122,4 +124,17 @@ public XContentBuilder doXContentBody(XContentBuilder builder, Params params) th
122124
}
123125
return builder;
124126
}
127+
128+
@Override
129+
protected boolean doEquals(Object obj) {
130+
AbstractInternalTDigestPercentiles that = (AbstractInternalTDigestPercentiles) obj;
131+
return keyed == that.keyed
132+
&& Arrays.equals(keys, that.keys)
133+
&& Objects.equals(state, that.state);
134+
}
135+
136+
@Override
137+
protected int doHashCode() {
138+
return Objects.hash(keyed, Arrays.hashCode(keys), state);
139+
}
125140
}

core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/InternalTDigestPercentileRanks.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,9 @@ public final void remove() {
116116
throw new UnsupportedOperationException();
117117
}
118118
}
119+
120+
@Override
121+
protected boolean doEquals(Object obj) {
122+
return super.doEquals(obj);
123+
}
119124
}

core/src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestState.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.common.io.stream.StreamOutput;
2525

2626
import java.io.IOException;
27+
import java.util.Iterator;
2728

2829
/**
2930
* Extension of {@link com.tdunning.math.stats.TDigest} with custom serialization.
@@ -61,4 +62,38 @@ public static TDigestState read(StreamInput in) throws IOException {
6162
return state;
6263
}
6364

65+
@Override
66+
public boolean equals(Object obj) {
67+
if (obj == null || obj instanceof TDigestState == false) {
68+
return false;
69+
}
70+
TDigestState that = (TDigestState) obj;
71+
if (compression != that.compression) {
72+
return false;
73+
}
74+
Iterator<? extends Centroid> thisCentroids = centroids().iterator();
75+
Iterator<? extends Centroid> thatCentroids = centroids().iterator();
76+
while (thisCentroids.hasNext()) {
77+
if (thatCentroids.hasNext() == false) {
78+
return false;
79+
}
80+
Centroid thisNext = thisCentroids.next();
81+
Centroid thatNext = thatCentroids.next();
82+
if (thisNext.mean() != thatNext.mean() || thisNext.count() != thatNext.count()) {
83+
return false;
84+
}
85+
}
86+
return thatCentroids.hasNext() == false;
87+
}
88+
89+
@Override
90+
public int hashCode() {
91+
int h = getClass().hashCode();
92+
h = 31 * h + Double.hashCode(compression);
93+
for (Centroid centroid : centroids()) {
94+
h = 31 * h + Double.hashCode(centroid.mean());
95+
h = 31 * h + centroid.count();
96+
}
97+
return h;
98+
}
6499
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.metrics.percentiles.hdr;
21+
22+
import org.apache.lucene.document.Document;
23+
import org.apache.lucene.document.SortedNumericDocValuesField;
24+
import org.apache.lucene.index.IndexReader;
25+
import org.apache.lucene.index.MultiReader;
26+
import org.apache.lucene.index.RandomIndexWriter;
27+
import org.apache.lucene.search.IndexSearcher;
28+
import org.apache.lucene.search.MatchAllDocsQuery;
29+
import org.apache.lucene.store.Directory;
30+
import org.apache.lucene.util.NumericUtils;
31+
import org.elasticsearch.index.mapper.MappedFieldType;
32+
import org.elasticsearch.index.mapper.NumberFieldMapper;
33+
import org.elasticsearch.search.aggregations.AggregatorTestCase;
34+
import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile;
35+
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentileRanks;
36+
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentileRanksAggregationBuilder;
37+
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentilesMethod;
38+
import org.hamcrest.Matchers;
39+
40+
import java.io.IOException;
41+
import java.util.Iterator;
42+
43+
public class HDRPercentileRanksAggregatorTests extends AggregatorTestCase {
44+
45+
public void testEmpty() throws IOException {
46+
PercentileRanksAggregationBuilder aggBuilder = new PercentileRanksAggregationBuilder("my_agg")
47+
.field("field")
48+
.method(PercentilesMethod.HDR)
49+
.values(0.5);
50+
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
51+
fieldType.setName("field");
52+
try (IndexReader reader = new MultiReader()) {
53+
IndexSearcher searcher = new IndexSearcher(reader);
54+
PercentileRanks ranks = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
55+
Percentile rank = ranks.iterator().next();
56+
assertEquals(Double.NaN, rank.getPercent(), 0d);
57+
assertEquals(0.5, rank.getValue(), 0d);
58+
}
59+
}
60+
61+
public void testSimple() throws IOException {
62+
try (Directory dir = newDirectory();
63+
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
64+
for (double value : new double[] {3, 0.2, 10}) {
65+
Document doc = new Document();
66+
doc.add(new SortedNumericDocValuesField("field", NumericUtils.doubleToSortableLong(value)));
67+
w.addDocument(doc);
68+
}
69+
70+
PercentileRanksAggregationBuilder aggBuilder = new PercentileRanksAggregationBuilder("my_agg")
71+
.field("field")
72+
.method(PercentilesMethod.HDR)
73+
.values(0.1, 0.5, 12);
74+
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
75+
fieldType.setName("field");
76+
try (IndexReader reader = w.getReader()) {
77+
IndexSearcher searcher = new IndexSearcher(reader);
78+
PercentileRanks ranks = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
79+
Iterator<Percentile> rankIterator = ranks.iterator();
80+
Percentile rank = rankIterator.next();
81+
assertEquals(0.1, rank.getValue(), 0d);
82+
assertThat(rank.getPercent(), Matchers.equalTo(0d));
83+
rank = rankIterator.next();
84+
assertEquals(0.5, rank.getValue(), 0d);
85+
assertThat(rank.getPercent(), Matchers.greaterThan(0d));
86+
assertThat(rank.getPercent(), Matchers.lessThan(100d));
87+
rank = rankIterator.next();
88+
assertEquals(12, rank.getValue(), 0d);
89+
assertThat(rank.getPercent(), Matchers.equalTo(100d));
90+
assertFalse(rankIterator.hasNext());
91+
}
92+
}
93+
}
94+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.metrics.percentiles.hdr;
21+
22+
import org.HdrHistogram.DoubleHistogram;
23+
import org.elasticsearch.common.io.stream.Writeable.Reader;
24+
import org.elasticsearch.search.DocValueFormat;
25+
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
26+
import org.elasticsearch.search.aggregations.metrics.percentiles.hdr.InternalHDRPercentileRanks;
27+
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
28+
29+
import java.util.List;
30+
import java.util.Map;
31+
32+
public class InternalHDRPercentilesRanksTests extends InternalAggregationTestCase<InternalHDRPercentileRanks> {
33+
34+
@Override
35+
protected InternalHDRPercentileRanks createTestInstance(String name, List<PipelineAggregator> pipelineAggregators,
36+
Map<String, Object> metaData) {
37+
double[] cdfValues = new double[] { 0.5 };
38+
int numberOfSignificantValueDigits = 3;
39+
DoubleHistogram state = new DoubleHistogram(numberOfSignificantValueDigits);
40+
int numValues = randomInt(100);
41+
for (int i = 0; i < numValues; ++i) {
42+
state.recordValue(randomDouble());
43+
}
44+
boolean keyed = false;
45+
DocValueFormat format = DocValueFormat.RAW;
46+
return new InternalHDRPercentileRanks(name, cdfValues, state, keyed, format, pipelineAggregators, metaData);
47+
}
48+
49+
@Override
50+
protected void assertReduced(InternalHDRPercentileRanks reduced, List<InternalHDRPercentileRanks> inputs) {
51+
// it is hard to check the values due to the inaccuracy of the algorithm
52+
long totalCount = 0;
53+
for (InternalHDRPercentileRanks ranks : inputs) {
54+
totalCount += ranks.state.getTotalCount();
55+
}
56+
assertEquals(totalCount, reduced.state.getTotalCount());
57+
}
58+
59+
@Override
60+
protected Reader<InternalHDRPercentileRanks> instanceReader() {
61+
return InternalHDRPercentileRanks::new;
62+
}
63+
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.metrics.percentiles.tdigest;
21+
22+
import org.elasticsearch.common.io.stream.Writeable.Reader;
23+
import org.elasticsearch.search.DocValueFormat;
24+
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
25+
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
26+
27+
import java.util.List;
28+
import java.util.Map;
29+
30+
public class InternalTDigestPercentilesRanksTests extends InternalAggregationTestCase<InternalTDigestPercentileRanks> {
31+
32+
@Override
33+
protected InternalTDigestPercentileRanks createTestInstance(String name, List<PipelineAggregator> pipelineAggregators,
34+
Map<String, Object> metaData) {
35+
double[] cdfValues = new double[] { 0.5 };
36+
TDigestState state = new TDigestState(100);
37+
int numValues = randomInt(100);
38+
for (int i = 0; i < numValues; ++i) {
39+
state.add(randomDouble());
40+
}
41+
boolean keyed = false;
42+
DocValueFormat format = DocValueFormat.RAW;
43+
return new InternalTDigestPercentileRanks(name, cdfValues, state, keyed, format, pipelineAggregators, metaData);
44+
}
45+
46+
@Override
47+
protected void assertReduced(InternalTDigestPercentileRanks reduced, List<InternalTDigestPercentileRanks> inputs) {
48+
// it is hard to check the values due to the inaccuracy of the algorithm
49+
// the min/max values should be accurate due to the way the algo works so we can at least test those
50+
double min = Double.POSITIVE_INFINITY;
51+
double max = Double.NEGATIVE_INFINITY;
52+
long totalCount = 0;
53+
for (InternalTDigestPercentileRanks ranks : inputs) {
54+
if (ranks.state.centroidCount() == 0) {
55+
// quantiles would return NaN
56+
continue;
57+
}
58+
totalCount += ranks.state.size();
59+
min = Math.min(ranks.state.quantile(0), min);
60+
max = Math.max(ranks.state.quantile(1), max);
61+
}
62+
assertEquals(totalCount, reduced.state.size());
63+
if (totalCount > 0) {
64+
assertEquals(reduced.state.quantile(0), min, 0d);
65+
assertEquals(reduced.state.quantile(1), max, 0d);
66+
}
67+
}
68+
69+
@Override
70+
protected Reader<InternalTDigestPercentileRanks> instanceReader() {
71+
return InternalTDigestPercentileRanks::new;
72+
}
73+
74+
}

0 commit comments

Comments
 (0)