Skip to content

Commit 3e5dbb0

Browse files
authored
Integration tests for diversified sampler (#77810)
Adds and integration test for the `diversified_sampler` aggregator.
1 parent 407d6ce commit 3e5dbb0

File tree

1 file changed

+229
-0
lines changed

1 file changed

+229
-0
lines changed
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
setup:
2+
- do:
3+
indices.create:
4+
index: test
5+
body:
6+
settings:
7+
number_of_shards: 1
8+
mappings:
9+
properties:
10+
tags:
11+
type: text
12+
author:
13+
type: keyword
14+
number:
15+
type: integer
16+
class:
17+
type: integer
18+
19+
- do:
20+
bulk:
21+
index: test
22+
refresh: true
23+
body:
24+
- '{"index": {}}'
25+
- '{"tags": "kibana", "author": "alice", "number": 1, "class": 1}'
26+
- '{"index": {}}'
27+
- '{"tags": "kibana", "author": "bob", "number": 2, "class": 1}'
28+
- '{"index": {}}'
29+
- '{"tags": "kibana", "author": "alice", "number": 3, "class": 2}'
30+
- '{"index": {}}'
31+
- '{"tags": "javascript", "author": "alice", "number": 4, "class": 1}'
32+
33+
---
34+
small shard_size:
35+
- do:
36+
search:
37+
body:
38+
size: 0
39+
query:
40+
query_string:
41+
query: 'tags:kibana OR tags:javascript'
42+
aggs:
43+
diversified:
44+
diversified_sampler:
45+
field: author
46+
shard_size: 1
47+
aggs:
48+
min_number:
49+
min:
50+
field: number
51+
max_number:
52+
max:
53+
field: number
54+
55+
56+
- match: { hits.total.value: 4 }
57+
- match: { aggregations.diversified.doc_count: 1 }
58+
# The most relevant document has a value of 4 so we only aggregate that.
59+
- match: { aggregations.diversified.min_number.value: 4.0 }
60+
- match: { aggregations.diversified.max_number.value: 4.0 }
61+
62+
---
63+
defaults:
64+
- do:
65+
search:
66+
body:
67+
size: 0
68+
query:
69+
query_string:
70+
query: 'tags:kibana OR tags:javascript'
71+
aggs:
72+
diversified:
73+
diversified_sampler:
74+
field: author
75+
aggs:
76+
min_number:
77+
min:
78+
field: number
79+
max_number:
80+
max:
81+
field: number
82+
83+
- match: { hits.total.value: 4 }
84+
# The max_docs_per_value defaults to 1 so we just get one doc for each of the two authors
85+
- match: { aggregations.diversified.doc_count: 2 }
86+
# Bob's only document is 2 so we get that as the min.
87+
- match: { aggregations.diversified.min_number.value: 2.0 }
88+
# Alice's most relevant document is 2 so we get that as the max.
89+
- match: { aggregations.diversified.max_number.value: 4.0 }
90+
91+
---
92+
override max_docs_per_value:
93+
- do:
94+
search:
95+
body:
96+
size: 0
97+
query:
98+
query_string:
99+
query: 'tags:kibana OR tags:javascript'
100+
aggs:
101+
diversified:
102+
diversified_sampler:
103+
field: author
104+
max_docs_per_value: 3
105+
aggs:
106+
min_number:
107+
min:
108+
field: number
109+
max_number:
110+
max:
111+
field: number
112+
113+
- match: { hits.total.value: 4 }
114+
# We've bumped the max_docs_per_value high enough to get all docs
115+
- match: { aggregations.diversified.doc_count: 4 }
116+
- match: { aggregations.diversified.min_number.value: 1.0 }
117+
- match: { aggregations.diversified.max_number.value: 4.0 }
118+
119+
---
120+
run on number:
121+
- do:
122+
search:
123+
body:
124+
size: 0
125+
query:
126+
query_string:
127+
query: 'tags:kibana OR tags:javascript'
128+
aggs:
129+
diversified:
130+
diversified_sampler:
131+
field: class
132+
aggs:
133+
min_number:
134+
min:
135+
field: number
136+
max_number:
137+
max:
138+
field: number
139+
140+
- match: { hits.total.value: 4 }
141+
# The max_docs_per_value defaults to 1 so we just get one doc for each of the two classes
142+
- match: { aggregations.diversified.doc_count: 2 }
143+
- match: { aggregations.diversified.min_number.value: 3.0 }
144+
- match: { aggregations.diversified.max_number.value: 4.0 }
145+
146+
---
147+
force map mode:
148+
- do:
149+
search:
150+
body:
151+
size: 0
152+
query:
153+
query_string:
154+
query: 'tags:kibana OR tags:javascript'
155+
aggs:
156+
diversified:
157+
diversified_sampler:
158+
field: author
159+
execution_hint: map
160+
aggs:
161+
min_number:
162+
min:
163+
field: number
164+
max_number:
165+
max:
166+
field: number
167+
168+
- match: { hits.total.value: 4 }
169+
- match: { aggregations.diversified.doc_count: 2 }
170+
- match: { aggregations.diversified.min_number.value: 2.0 }
171+
- match: { aggregations.diversified.max_number.value: 4.0 }
172+
173+
---
174+
force global ordinals mode:
175+
- do:
176+
search:
177+
body:
178+
size: 0
179+
query:
180+
query_string:
181+
query: 'tags:kibana OR tags:javascript'
182+
aggs:
183+
diversified:
184+
diversified_sampler:
185+
field: author
186+
execution_hint: global_ordinals
187+
aggs:
188+
min_number:
189+
min:
190+
field: number
191+
max_number:
192+
max:
193+
field: number
194+
195+
- match: { hits.total.value: 4 }
196+
- match: { aggregations.diversified.doc_count: 2 }
197+
- match: { aggregations.diversified.min_number.value: 2.0 }
198+
- match: { aggregations.diversified.max_number.value: 4.0 }
199+
200+
---
201+
enable hash mode mode:
202+
- do:
203+
search:
204+
body:
205+
size: 0
206+
query:
207+
query_string:
208+
query: 'tags:kibana OR tags:javascript'
209+
aggs:
210+
diversified:
211+
diversified_sampler:
212+
field: author
213+
execution_hint: bytes_hash
214+
aggs:
215+
min_number:
216+
min:
217+
field: number
218+
max_number:
219+
max:
220+
field: number
221+
222+
# This mode can have hash collisions. The hash is seeded with tests.seed
223+
# so we have to have weaker constraints on these hits
224+
- match: { hits.total.value: 4 }
225+
- lte: { aggregations.diversified.doc_count: 2 }
226+
- gte: { aggregations.diversified.doc_count: 1 }
227+
- gte: { aggregations.diversified.min_number.value: 2.0 }
228+
- lte: { aggregations.diversified.min_number.value: 4.0 }
229+
- match: { aggregations.diversified.max_number.value: 4.0 }

0 commit comments

Comments
 (0)