From ba66e30f4ddd545fbe554bea8eed3b8fe98cccff Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 22 Sep 2021 12:38:30 -0400 Subject: [PATCH 1/2] Integration tests for diversified sampler (#77810) Adds and integration test for the `diversified_sampler` aggregator. --- .../410_diversified_sampler.yml | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml new file mode 100644 index 0000000000000..894ac52617655 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml @@ -0,0 +1,229 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + mappings: + properties: + tags: + type: text + author: + type: keyword + number: + type: integer + class: + type: integer + + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{"tags": "kibana", "author": "alice", "number": 1, "class": 1}' + - '{"index": {}}' + - '{"tags": "kibana", "author": "bob", "number": 2, "class": 1}' + - '{"index": {}}' + - '{"tags": "kibana", "author": "alice", "number": 3, "class": 2}' + - '{"index": {}}' + - '{"tags": "javascript", "author": "alice", "number": 4, "class": 1}' + +--- +small shard_size: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + shard_size: 1 + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + + - match: { hits.total.value: 4 } + - match: { aggregations.diversified.doc_count: 1 } + # The most relevant document has a value of 4 so we only aggregate that. + - match: { aggregations.diversified.min_number.value: 4.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +defaults: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + # The max_docs_per_value defaults to 1 so we just get one doc for each of the two authors + - match: { aggregations.diversified.doc_count: 2 } + # Bob's only document is 2 so we get that as the min. + - match: { aggregations.diversified.min_number.value: 2.0 } + # Alice's most relevant document is 2 so we get that as the max. + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +override max_docs_per_value: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + max_docs_per_value: 3 + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + # We've bumped the max_docs_per_value high enough to get all docs + - match: { aggregations.diversified.doc_count: 4 } + - match: { aggregations.diversified.min_number.value: 1.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +run on number: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: class + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + # The max_docs_per_value defaults to 1 so we just get one doc for each of the two classes + - match: { aggregations.diversified.doc_count: 2 } + - match: { aggregations.diversified.min_number.value: 3.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +force map mode: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + execution_hint: map + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + - match: { aggregations.diversified.doc_count: 2 } + - match: { aggregations.diversified.min_number.value: 2.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +force global ordinals mode: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + execution_hint: global_ordinals + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + - match: { aggregations.diversified.doc_count: 2 } + - match: { aggregations.diversified.min_number.value: 2.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +enable hash mode mode: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + execution_hint: bytes_hash + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + # This mode can have hash collisions. The hash is seeded with tests.seed + # so we have to have weaker constraints on these hits + - match: { hits.total.value: 4 } + - lte: { aggregations.diversified.doc_count: 2 } + - gte: { aggregations.diversified.doc_count: 1 } + - gte: { aggregations.diversified.min_number.value: 2.0 } + - lte: { aggregations.diversified.min_number.value: 4.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } From 111ab1b7e89584a9020e077a6ff6e2de78c73776 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 22 Sep 2021 14:15:47 -0400 Subject: [PATCH 2/2] Fixup --- .../410_diversified_sampler.yml | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml index 894ac52617655..8cfc3f71a2405 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml @@ -34,6 +34,7 @@ setup: small shard_size: - do: search: + rest_total_hits_as_int: true body: size: 0 query: @@ -53,7 +54,7 @@ small shard_size: field: number - - match: { hits.total.value: 4 } + - match: { hits.total: 4 } - match: { aggregations.diversified.doc_count: 1 } # The most relevant document has a value of 4 so we only aggregate that. - match: { aggregations.diversified.min_number.value: 4.0 } @@ -63,6 +64,7 @@ small shard_size: defaults: - do: search: + rest_total_hits_as_int: true body: size: 0 query: @@ -80,7 +82,7 @@ defaults: max: field: number - - match: { hits.total.value: 4 } + - match: { hits.total: 4 } # The max_docs_per_value defaults to 1 so we just get one doc for each of the two authors - match: { aggregations.diversified.doc_count: 2 } # Bob's only document is 2 so we get that as the min. @@ -92,6 +94,7 @@ defaults: override max_docs_per_value: - do: search: + rest_total_hits_as_int: true body: size: 0 query: @@ -110,7 +113,7 @@ override max_docs_per_value: max: field: number - - match: { hits.total.value: 4 } + - match: { hits.total: 4 } # We've bumped the max_docs_per_value high enough to get all docs - match: { aggregations.diversified.doc_count: 4 } - match: { aggregations.diversified.min_number.value: 1.0 } @@ -120,6 +123,7 @@ override max_docs_per_value: run on number: - do: search: + rest_total_hits_as_int: true body: size: 0 query: @@ -137,7 +141,7 @@ run on number: max: field: number - - match: { hits.total.value: 4 } + - match: { hits.total: 4 } # The max_docs_per_value defaults to 1 so we just get one doc for each of the two classes - match: { aggregations.diversified.doc_count: 2 } - match: { aggregations.diversified.min_number.value: 3.0 } @@ -147,6 +151,7 @@ run on number: force map mode: - do: search: + rest_total_hits_as_int: true body: size: 0 query: @@ -165,7 +170,7 @@ force map mode: max: field: number - - match: { hits.total.value: 4 } + - match: { hits.total: 4 } - match: { aggregations.diversified.doc_count: 2 } - match: { aggregations.diversified.min_number.value: 2.0 } - match: { aggregations.diversified.max_number.value: 4.0 } @@ -174,6 +179,7 @@ force map mode: force global ordinals mode: - do: search: + rest_total_hits_as_int: true body: size: 0 query: @@ -192,7 +198,7 @@ force global ordinals mode: max: field: number - - match: { hits.total.value: 4 } + - match: { hits.total: 4 } - match: { aggregations.diversified.doc_count: 2 } - match: { aggregations.diversified.min_number.value: 2.0 } - match: { aggregations.diversified.max_number.value: 4.0 } @@ -201,6 +207,7 @@ force global ordinals mode: enable hash mode mode: - do: search: + rest_total_hits_as_int: true body: size: 0 query: @@ -221,7 +228,7 @@ enable hash mode mode: # This mode can have hash collisions. The hash is seeded with tests.seed # so we have to have weaker constraints on these hits - - match: { hits.total.value: 4 } + - match: { hits.total: 4 } - lte: { aggregations.diversified.doc_count: 2 } - gte: { aggregations.diversified.doc_count: 1 } - gte: { aggregations.diversified.min_number.value: 2.0 }