From 7ff2cf8f80042b922d07280da9ebc22195a7f0e5 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 1 Feb 2019 09:35:46 +0100 Subject: [PATCH 1/4] Don't load global ordinals with the `map` execution_hint (#37833) The terms aggregator loads the global ordinals to retrieve the cardinality of the field to aggregate on. This information is then used to select the strategy to use for the aggregation (breadth_first or depth_first). However this should be avoided if the execution_hint is explicitly set to map since this mode doesn't really need the global ordinals. Since we still need the cardinality of the field this change picks the maximum cardinality in the segments as an estimation of the total cardinality to select the strategy to use (breadth_first or depth_first). This estimation is only used if the execution hint is set to map, otherwise the global ordinals are still used to retrieve the accurate cardinality. Closes #37705 --- .../test/search.aggregation/20_terms.yml | 71 +++++++++++++++++++ .../bucket/terms/TermsAggregatorFactory.java | 21 ++++-- 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml index 5ac79a898816b..7a8085e493681 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml @@ -748,3 +748,74 @@ setup: - is_false: aggregations.str_terms.buckets.1.key_as_string - match: { aggregations.str_terms.buckets.1.doc_count: 2 } + +--- +"Global ordinals are not loaded with the map execution hint": + + - skip: + version: " - 6.99.99" + reason: bug fixed in 7.0 + + - do: + index: + refresh: true + index: test_1 + id: 1 + routing: 1 + body: { "str": "abc" } + + - do: + index: + refresh: true + index: test_1 + id: 2 + routing: 1 + body: { "str": "abc" } + + - do: + index: + refresh: true + index: test_1 + id: 3 + routing: 1 + body: { "str": "bcd" } + + - do: + indices.refresh: {} + + - do: + search: + index: test_1 + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "map" } } } } + + - match: { hits.total.value: 3} + - length: { aggregations.str_terms.buckets: 2 } + + - do: + indices.stats: + index: test_1 + metric: fielddata + fielddata_fields: str + + - match: { indices.test_1.total.fielddata.memory_size_in_bytes: 0} + + - do: + search: + index: test_1 + body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "global_ordinals" } } } } + + - match: { hits.total.value: 3} + - length: { aggregations.str_terms.buckets: 2 } + + - do: + indices.stats: + index: test_1 + metric: fielddata + fielddata_fields: str + + - gt: { indices.test_1.total.fielddata.memory_size_in_bytes: 0} + + + + + diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index b371a66b78e92..d74cde38184b1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.logging.log4j.LogManager; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.logging.DeprecationLogger; @@ -133,7 +134,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) { execution = ExecutionMode.MAP; } - final long maxOrd = getMaxOrd(valuesSource, context.searcher()); + final long maxOrd = getMaxOrd(context.searcher(), valuesSource, execution); if (execution == null) { execution = ExecutionMode.GLOBAL_ORDINALS; } @@ -205,13 +206,23 @@ static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) } /** - * Get the maximum global ordinal value for the provided {@link ValuesSource} or -1 + * Get the maximum ordinal value for the provided {@link ValuesSource} or -1 * if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}. */ - static long getMaxOrd(ValuesSource source, IndexSearcher searcher) throws IOException { + static long getMaxOrd(IndexSearcher searcher, ValuesSource source, ExecutionMode executionMode) throws IOException { if (source instanceof ValuesSource.Bytes.WithOrdinals) { ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) source; - return valueSourceWithOrdinals.globalMaxOrd(searcher); + if (executionMode == ExecutionMode.MAP) { + // global ordinals are not requested so we don't load them + // and return the biggest cardinality per segment instead. + long maxOrd = -1; + for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { + maxOrd = Math.max(maxOrd, valueSourceWithOrdinals.ordinalsValues(leaf).getValueCount()); + } + return maxOrd; + } else { + return valueSourceWithOrdinals.globalMaxOrd(searcher); + } } else { return -1; } @@ -256,7 +267,7 @@ Aggregator create(String name, List pipelineAggregators, Map metaData) throws IOException { - final long maxOrd = getMaxOrd(valuesSource, context.searcher()); + final long maxOrd = getMaxOrd(context.searcher(), valuesSource, ExecutionMode.GLOBAL_ORDINALS); assert maxOrd != -1; final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs()); From f5d3412a451588baf8a797934587ba7a3d7d3ea8 Mon Sep 17 00:00:00 2001 From: jimczi Date: Fri, 1 Feb 2019 09:38:22 +0100 Subject: [PATCH 2/4] adapt rest test version after backport --- .../rest-api-spec/test/search.aggregation/20_terms.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml index 7a8085e493681..eb84f9a4ce973 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml @@ -753,8 +753,8 @@ setup: "Global ordinals are not loaded with the map execution hint": - skip: - version: " - 6.99.99" - reason: bug fixed in 7.0 + version: " - 6.6.99" + reason: bug fixed in 6.7 - do: index: @@ -814,8 +814,3 @@ setup: fielddata_fields: str - gt: { indices.test_1.total.fielddata.memory_size_in_bytes: 0} - - - - - From dd6043c1c019974fe1c58810384b89e30cd8b89b Mon Sep 17 00:00:00 2001 From: jimczi Date: Fri, 1 Feb 2019 10:28:40 +0100 Subject: [PATCH 3/4] adapt rest test after backport --- .../rest-api-spec/test/search.aggregation/20_terms.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml index eb84f9a4ce973..fdb6b74532eb7 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml @@ -760,6 +760,7 @@ setup: index: refresh: true index: test_1 + type: test id: 1 routing: 1 body: { "str": "abc" } @@ -768,6 +769,7 @@ setup: index: refresh: true index: test_1 + type: test id: 2 routing: 1 body: { "str": "abc" } @@ -776,6 +778,7 @@ setup: index: refresh: true index: test_1 + type: test id: 3 routing: 1 body: { "str": "bcd" } @@ -788,7 +791,6 @@ setup: index: test_1 body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "map" } } } } - - match: { hits.total.value: 3} - length: { aggregations.str_terms.buckets: 2 } - do: @@ -804,7 +806,6 @@ setup: index: test_1 body: { "size" : 0, "aggs" : { "str_terms" : { "terms" : { "field" : "str", "execution_hint" : "global_ordinals" } } } } - - match: { hits.total.value: 3} - length: { aggregations.str_terms.buckets: 2 } - do: From 84af87b9a4e2b6eab16f3517f0ea29e4f76fd8a6 Mon Sep 17 00:00:00 2001 From: jimczi Date: Fri, 1 Feb 2019 11:16:17 +0100 Subject: [PATCH 4/4] Do not compute cardinality if the `terms` execution mode does not use `global_ordinals` In #38158 we ensured that global ordinals are not loaded when another execution hint is explicitly set on the source. This change is a follow up that addresses a comment https://github.com/elastic/elasticsearch/pull/38158/files/dd6043c1c019974fe1c58810384b89e30cd8b89b#r252984782 added after the merge. --- .../bucket/terms/TermsAggregatorFactory.java | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index d74cde38184b1..955d9f25e8637 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -20,7 +20,6 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.logging.log4j.LogManager; -import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.logging.DeprecationLogger; @@ -134,7 +133,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) { execution = ExecutionMode.MAP; } - final long maxOrd = getMaxOrd(context.searcher(), valuesSource, execution); + final long maxOrd = execution == ExecutionMode.GLOBAL_ORDINALS ? getMaxOrd(valuesSource, context.searcher()) : -1; if (execution == null) { execution = ExecutionMode.GLOBAL_ORDINALS; } @@ -206,23 +205,13 @@ static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) } /** - * Get the maximum ordinal value for the provided {@link ValuesSource} or -1 + * Get the maximum global ordinal value for the provided {@link ValuesSource} or -1 * if the values source is not an instance of {@link ValuesSource.Bytes.WithOrdinals}. */ - static long getMaxOrd(IndexSearcher searcher, ValuesSource source, ExecutionMode executionMode) throws IOException { + static long getMaxOrd(ValuesSource source, IndexSearcher searcher) throws IOException { if (source instanceof ValuesSource.Bytes.WithOrdinals) { ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) source; - if (executionMode == ExecutionMode.MAP) { - // global ordinals are not requested so we don't load them - // and return the biggest cardinality per segment instead. - long maxOrd = -1; - for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { - maxOrd = Math.max(maxOrd, valueSourceWithOrdinals.ordinalsValues(leaf).getValueCount()); - } - return maxOrd; - } else { - return valueSourceWithOrdinals.globalMaxOrd(searcher); - } + return valueSourceWithOrdinals.globalMaxOrd(searcher); } else { return -1; } @@ -267,7 +256,7 @@ Aggregator create(String name, List pipelineAggregators, Map metaData) throws IOException { - final long maxOrd = getMaxOrd(context.searcher(), valuesSource, ExecutionMode.GLOBAL_ORDINALS); + final long maxOrd = getMaxOrd(valuesSource, context.searcher()); assert maxOrd != -1; final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());