Skip to content

Commit 49baa06

Browse files
authored
Fix doc_count on HistoBackedHistogramAggregator (#74650)
histogram aggregation on histogram field computes wrong doc_count values when _doc_count field is present. The root cause of the problem is correctly described here Closes #74617
1 parent fee554b commit 49baa06

File tree

3 files changed

+69
-10
lines changed

3 files changed

+69
-10
lines changed

x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregator.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,10 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
8686
} else {
8787
collectBucket(sub, doc, bucketOrd);
8888
}
89-
// We have added the document already. We should increment doc_count by count - 1
90-
// so that we have added it count times.
91-
incrementBucketDocCount(bucketOrd, count - 1);
89+
// We have added the document already and we have incremented bucket doc_count
90+
// by _doc_count times. To compensate for this, we should increment doc_count by
91+
// (count - _doc_count) so that we have added it count times.
92+
incrementBucketDocCount(bucketOrd, count - docCountProvider.getDocCount(doc));
9293
}
9394
previousKey = key;
9495
}

x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/bucket/histogram/HistoBackedHistogramAggregatorTests.java

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,12 @@
77

88
package org.elasticsearch.xpack.analytics.aggregations.bucket.histogram;
99

10-
import static java.util.Collections.singleton;
11-
import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues;
12-
13-
import java.util.Collections;
14-
import java.util.List;
15-
1610
import org.apache.lucene.index.IndexReader;
1711
import org.apache.lucene.index.RandomIndexWriter;
1812
import org.apache.lucene.search.IndexSearcher;
1913
import org.apache.lucene.search.MatchAllDocsQuery;
2014
import org.apache.lucene.store.Directory;
15+
import org.elasticsearch.index.mapper.CustomTermFreqField;
2116
import org.elasticsearch.index.mapper.MappedFieldType;
2217
import org.elasticsearch.plugins.SearchPlugin;
2318
import org.elasticsearch.search.aggregations.AggregationBuilder;
@@ -30,6 +25,12 @@
3025
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
3126
import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
3227

28+
import java.util.Collections;
29+
import java.util.List;
30+
31+
import static java.util.Collections.singleton;
32+
import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues;
33+
3334
public class HistoBackedHistogramAggregatorTests extends AggregatorTestCase {
3435

3536
private static final String FIELD_NAME = "field";
@@ -99,6 +100,27 @@ public void testMinDocCount() throws Exception {
99100
}
100101
}
101102

103+
public void testHistogramWithDocCountField() throws Exception {
104+
try (Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
105+
w.addDocument(List.of(
106+
// Add the _doc_dcount field
107+
new CustomTermFreqField("_doc_count", "_doc_count", 8),
108+
histogramFieldDocValues(FIELD_NAME, new double[] {0, 1.2, 10, 10, 12, 24, 24, 24}))
109+
);
110+
111+
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
112+
.field(FIELD_NAME)
113+
.interval(100);
114+
115+
try (IndexReader reader = w.getReader()) {
116+
IndexSearcher searcher = new IndexSearcher(reader);
117+
InternalHistogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, defaultFieldType(FIELD_NAME));
118+
assertTrue(AggregationInspectionHelper.hasValue(histogram));
119+
assertEquals(8, histogram.getBuckets().get(0).getDocCount());
120+
}
121+
}
122+
}
123+
102124
public void testRandomOffset() throws Exception {
103125
try (Directory dir = newDirectory();
104126
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {

x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/analytics/histogram.yml

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,46 @@ setup:
7474
field: "latency"
7575
interval: 0.3
7676

77-
7877
- match: { hits.total.value: 2 }
7978
- length: { aggregations.histo.buckets: 2 }
8079
- match: { aggregations.histo.buckets.0.key: 0.0 }
8180
- match: { aggregations.histo.buckets.0.doc_count: 20 }
8281
- match: { aggregations.histo.buckets.1.key: 0.3 }
8382
- match: { aggregations.histo.buckets.1.doc_count: 60 }
83+
84+
---
85+
"Histogram with _doc_count":
86+
- do:
87+
indices.create:
88+
index: "histo_with_doc_count"
89+
body:
90+
mappings:
91+
properties:
92+
latency:
93+
type: "histogram"
94+
- do:
95+
headers:
96+
Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
97+
bulk:
98+
index: "histo_with_doc_count"
99+
refresh: true
100+
body:
101+
- '{"index": {}}'
102+
- '{"_doc_count": 50, "latency": {"values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [3, 7, 23, 12, 5]}}'
103+
- '{"index": {}}'
104+
- '{"_doc_count": 10, "latency": {"values" : [0.1, 0.2, 0.3, 0.4, 0.5], "counts" : [1, 1, 1, 1, 6]}}'
105+
- do:
106+
search:
107+
index: "histo_with_doc_count"
108+
body:
109+
size: 0
110+
aggs:
111+
histo:
112+
histogram:
113+
field: "latency"
114+
interval: 1
115+
116+
- match: { hits.total.value: 2 }
117+
- length: { aggregations.histo.buckets: 1 }
118+
- match: { aggregations.histo.buckets.0.key: 0.0 }
119+
- match: { aggregations.histo.buckets.0.doc_count: 60 }

0 commit comments

Comments
 (0)