Skip to content

Commit cc17e09

Browse files
authored
[7.x] Integration tests for diversified sampler (#77810) (#78202)
Adds and integration test for the `diversified_sampler` aggregator.
1 parent 850e36b commit cc17e09

File tree

1 file changed

+236
-0
lines changed

1 file changed

+236
-0
lines changed
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
setup:
2+
- do:
3+
indices.create:
4+
index: test
5+
body:
6+
settings:
7+
number_of_shards: 1
8+
mappings:
9+
properties:
10+
tags:
11+
type: text
12+
author:
13+
type: keyword
14+
number:
15+
type: integer
16+
class:
17+
type: integer
18+
19+
- do:
20+
bulk:
21+
index: test
22+
refresh: true
23+
body:
24+
- '{"index": {}}'
25+
- '{"tags": "kibana", "author": "alice", "number": 1, "class": 1}'
26+
- '{"index": {}}'
27+
- '{"tags": "kibana", "author": "bob", "number": 2, "class": 1}'
28+
- '{"index": {}}'
29+
- '{"tags": "kibana", "author": "alice", "number": 3, "class": 2}'
30+
- '{"index": {}}'
31+
- '{"tags": "javascript", "author": "alice", "number": 4, "class": 1}'
32+
33+
---
34+
small shard_size:
35+
- do:
36+
search:
37+
rest_total_hits_as_int: true
38+
body:
39+
size: 0
40+
query:
41+
query_string:
42+
query: 'tags:kibana OR tags:javascript'
43+
aggs:
44+
diversified:
45+
diversified_sampler:
46+
field: author
47+
shard_size: 1
48+
aggs:
49+
min_number:
50+
min:
51+
field: number
52+
max_number:
53+
max:
54+
field: number
55+
56+
57+
- match: { hits.total: 4 }
58+
- match: { aggregations.diversified.doc_count: 1 }
59+
# The most relevant document has a value of 4 so we only aggregate that.
60+
- match: { aggregations.diversified.min_number.value: 4.0 }
61+
- match: { aggregations.diversified.max_number.value: 4.0 }
62+
63+
---
64+
defaults:
65+
- do:
66+
search:
67+
rest_total_hits_as_int: true
68+
body:
69+
size: 0
70+
query:
71+
query_string:
72+
query: 'tags:kibana OR tags:javascript'
73+
aggs:
74+
diversified:
75+
diversified_sampler:
76+
field: author
77+
aggs:
78+
min_number:
79+
min:
80+
field: number
81+
max_number:
82+
max:
83+
field: number
84+
85+
- match: { hits.total: 4 }
86+
# The max_docs_per_value defaults to 1 so we just get one doc for each of the two authors
87+
- match: { aggregations.diversified.doc_count: 2 }
88+
# Bob's only document is 2 so we get that as the min.
89+
- match: { aggregations.diversified.min_number.value: 2.0 }
90+
# Alice's most relevant document is 2 so we get that as the max.
91+
- match: { aggregations.diversified.max_number.value: 4.0 }
92+
93+
---
94+
override max_docs_per_value:
95+
- do:
96+
search:
97+
rest_total_hits_as_int: true
98+
body:
99+
size: 0
100+
query:
101+
query_string:
102+
query: 'tags:kibana OR tags:javascript'
103+
aggs:
104+
diversified:
105+
diversified_sampler:
106+
field: author
107+
max_docs_per_value: 3
108+
aggs:
109+
min_number:
110+
min:
111+
field: number
112+
max_number:
113+
max:
114+
field: number
115+
116+
- match: { hits.total: 4 }
117+
# We've bumped the max_docs_per_value high enough to get all docs
118+
- match: { aggregations.diversified.doc_count: 4 }
119+
- match: { aggregations.diversified.min_number.value: 1.0 }
120+
- match: { aggregations.diversified.max_number.value: 4.0 }
121+
122+
---
123+
run on number:
124+
- do:
125+
search:
126+
rest_total_hits_as_int: true
127+
body:
128+
size: 0
129+
query:
130+
query_string:
131+
query: 'tags:kibana OR tags:javascript'
132+
aggs:
133+
diversified:
134+
diversified_sampler:
135+
field: class
136+
aggs:
137+
min_number:
138+
min:
139+
field: number
140+
max_number:
141+
max:
142+
field: number
143+
144+
- match: { hits.total: 4 }
145+
# The max_docs_per_value defaults to 1 so we just get one doc for each of the two classes
146+
- match: { aggregations.diversified.doc_count: 2 }
147+
- match: { aggregations.diversified.min_number.value: 3.0 }
148+
- match: { aggregations.diversified.max_number.value: 4.0 }
149+
150+
---
151+
force map mode:
152+
- do:
153+
search:
154+
rest_total_hits_as_int: true
155+
body:
156+
size: 0
157+
query:
158+
query_string:
159+
query: 'tags:kibana OR tags:javascript'
160+
aggs:
161+
diversified:
162+
diversified_sampler:
163+
field: author
164+
execution_hint: map
165+
aggs:
166+
min_number:
167+
min:
168+
field: number
169+
max_number:
170+
max:
171+
field: number
172+
173+
- match: { hits.total: 4 }
174+
- match: { aggregations.diversified.doc_count: 2 }
175+
- match: { aggregations.diversified.min_number.value: 2.0 }
176+
- match: { aggregations.diversified.max_number.value: 4.0 }
177+
178+
---
179+
force global ordinals mode:
180+
- do:
181+
search:
182+
rest_total_hits_as_int: true
183+
body:
184+
size: 0
185+
query:
186+
query_string:
187+
query: 'tags:kibana OR tags:javascript'
188+
aggs:
189+
diversified:
190+
diversified_sampler:
191+
field: author
192+
execution_hint: global_ordinals
193+
aggs:
194+
min_number:
195+
min:
196+
field: number
197+
max_number:
198+
max:
199+
field: number
200+
201+
- match: { hits.total: 4 }
202+
- match: { aggregations.diversified.doc_count: 2 }
203+
- match: { aggregations.diversified.min_number.value: 2.0 }
204+
- match: { aggregations.diversified.max_number.value: 4.0 }
205+
206+
---
207+
enable hash mode mode:
208+
- do:
209+
search:
210+
rest_total_hits_as_int: true
211+
body:
212+
size: 0
213+
query:
214+
query_string:
215+
query: 'tags:kibana OR tags:javascript'
216+
aggs:
217+
diversified:
218+
diversified_sampler:
219+
field: author
220+
execution_hint: bytes_hash
221+
aggs:
222+
min_number:
223+
min:
224+
field: number
225+
max_number:
226+
max:
227+
field: number
228+
229+
# This mode can have hash collisions. The hash is seeded with tests.seed
230+
# so we have to have weaker constraints on these hits
231+
- match: { hits.total: 4 }
232+
- lte: { aggregations.diversified.doc_count: 2 }
233+
- gte: { aggregations.diversified.doc_count: 1 }
234+
- gte: { aggregations.diversified.min_number.value: 2.0 }
235+
- lte: { aggregations.diversified.min_number.value: 4.0 }
236+
- match: { aggregations.diversified.max_number.value: 4.0 }

0 commit comments

Comments
 (0)