From 4fecc6f796e4b96c80ef691ff0dd7816b399407a Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 1 Jun 2020 09:43:34 -0400 Subject: [PATCH 1/2] Fix an optimization in terms agg When the `terms` agg runs against strings and uses global ordinals it has an optimization when it collects segments that only ever have a single value for the particular string. This is *very* common. But I broke it in #57241. This fixes that optimization and adds `debug` information that you can use to see how often we collect segments of each type. And adds a test to make sure that I don't break the optimization again. We also had a specialiation for when there isn't a filter on the terms to aggregate. I had removed that specialization in #57241 which resulted in some slow down as well. This adds it back but in a more clear way. And, hopefully, a way that is marginally faster when there *is* a filter. Closes #57407 --- .../test/search.aggregation/20_terms.yml | 81 ++++++++++- .../GlobalOrdinalsStringTermsAggregator.java | 134 +++++++++--------- 2 files changed, 145 insertions(+), 70 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml index f742c15a87098..6aa48931cf150 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yml @@ -724,10 +724,10 @@ setup: body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } } --- -"profiler": +"global ords profiler": - skip: - version: " - 7.8.99" - reason: debug information added in 7.9.0 + version: " - 7.9.99" + reason: debug information added in 8.0.0 (backport to 7.9.0 pending) - do: bulk: index: test_1 @@ -753,6 +753,7 @@ setup: terms: field: str collect_mode: breadth_first + execution_hint: global_ordinals aggs: max_number: max: @@ -768,9 +769,83 @@ setup: - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 } - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] } - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense } + - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms } + - gt: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 } + - match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 } + - match: { profile.shards.0.aggregations.0.debug.has_filter: false } - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator } - match: { profile.shards.0.aggregations.0.children.0.description: max_number } + - do: + indices.create: + index: test_3 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + str: + type: keyword + - do: + bulk: + index: test_3 + refresh: true + body: | + { "index": {} } + { "str": ["pig", "sheep"], "number": 100 } + + - do: + search: + index: test_3 + body: + profile: true + size: 0 + aggs: + str_terms: + terms: + field: str + collect_mode: breadth_first + execution_hint: global_ordinals + aggs: + max_number: + max: + field: number + - match: { aggregations.str_terms.buckets.0.key: pig } + - match: { aggregations.str_terms.buckets.0.max_number.value: 100 } + - match: { aggregations.str_terms.buckets.1.key: sheep } + - match: { aggregations.str_terms.buckets.1.max_number.value: 100 } + - match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator } + - match: { profile.shards.0.aggregations.0.description: str_terms } + - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 } + - match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] } + - match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense } + - match: { profile.shards.0.aggregations.0.debug.result_strategy: terms } + - match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 } + - gt: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 } + - match: { profile.shards.0.aggregations.0.debug.has_filter: false } + - match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator } + - match: { profile.shards.0.aggregations.0.children.0.description: max_number } + +--- +"numeric profiler": + - skip: + version: " - 7.8.99" + reason: debug information added in 7.9.0 + - do: + bulk: + index: test_1 + refresh: true + body: | + { "index": {} } + { "number": 1 } + { "index": {} } + { "number": 3 } + { "index": {} } + { "number": 1 } + { "index": {} } + { "number": 1 } + - do: search: index: test_1 diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 83126c3385f53..c8101b529d476 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -33,7 +33,6 @@ import org.elasticsearch.common.util.IntArray; import org.elasticsearch.common.util.LongHash; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -73,6 +72,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr private final long valueCount; private final GlobalOrdLookupFunction lookupGlobalOrd; protected final CollectionStrategy collectionStrategy; + protected int segmentsWithSingleValuedOrds = 0; + protected int segmentsWithMultiValuedOrds = 0; public interface GlobalOrdLookupFunction { BytesRef apply(long ord) throws IOException; @@ -102,7 +103,7 @@ public GlobalOrdinalsStringTermsAggregator( valuesSource.globalOrdinalsValues(context.searcher().getIndexReader().leaves().get(0)) : DocValues.emptySortedSet(); this.valueCount = values.getValueCount(); this.lookupGlobalOrd = values::lookupOrd; - this.acceptedGlobalOrdinals = includeExclude == null ? l -> true : includeExclude.acceptedGlobalOrdinals(values)::get; + this.acceptedGlobalOrdinals = includeExclude == null ? ALWAYS_TRUE : includeExclude.acceptedGlobalOrdinals(values)::get; this.collectionStrategy = remapGlobalOrds ? new RemapGlobalOrds() : new DenseGlobalOrds(); } @@ -110,24 +111,52 @@ String descriptCollectionStrategy() { return collectionStrategy.describe(); } - private SortedSetDocValues getGlobalOrds(LeafReaderContext ctx) throws IOException { - return acceptedGlobalOrdinals == null ? - valuesSource.globalOrdinalsValues(ctx) : new FilteredOrdinals(valuesSource.globalOrdinalsValues(ctx), acceptedGlobalOrdinals); - } - @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { - SortedSetDocValues globalOrds = getGlobalOrds(ctx); + SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx); collectionStrategy.globalOrdsReady(globalOrds); SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds); if (singleValues != null) { + segmentsWithSingleValuedOrds++; + if (acceptedGlobalOrdinals == ALWAYS_TRUE) { + return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) { + @Override + public void collect(int doc, long owningBucketOrd) throws IOException { + assert owningBucketOrd == 0; + if (false == singleValues.advanceExact(doc)) { + return; + } + int globalOrd = singleValues.ordValue(); + collectionStrategy.collectGlobalOrd(doc, globalOrd, sub); + } + }); + } return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) { @Override public void collect(int doc, long owningBucketOrd) throws IOException { assert owningBucketOrd == 0; - if (singleValues.advanceExact(doc)) { - int ord = singleValues.ordValue(); - collectionStrategy.collectGlobalOrd(doc, ord, sub); + if (false == singleValues.advanceExact(doc)) { + return; + } + int globalOrd = singleValues.ordValue(); + if (false == acceptedGlobalOrdinals.test(globalOrd)) { + return; + } + collectionStrategy.collectGlobalOrd(doc, globalOrd, sub); + } + }); + } + segmentsWithMultiValuedOrds++; + if (acceptedGlobalOrdinals == ALWAYS_TRUE) { + return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) { + @Override + public void collect(int doc, long owningBucketOrd) throws IOException { + assert owningBucketOrd == 0; + if (false == globalOrds.advanceExact(doc)) { + return; + } + for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) { + collectionStrategy.collectGlobalOrd(doc, globalOrd, sub); } } }); @@ -136,10 +165,14 @@ public void collect(int doc, long owningBucketOrd) throws IOException { @Override public void collect(int doc, long owningBucketOrd) throws IOException { assert owningBucketOrd == 0; - if (globalOrds.advanceExact(doc)) { - for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) { - collectionStrategy.collectGlobalOrd(doc, globalOrd, sub); + if (false == globalOrds.advanceExact(doc)) { + return; + } + for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) { + if (false == acceptedGlobalOrdinals.test(globalOrd)) { + continue; } + collectionStrategy.collectGlobalOrd(doc, globalOrd, sub); } } }); @@ -160,6 +193,9 @@ public void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); add.accept("collection_strategy", collectionStrategy.describe()); add.accept("result_strategy", resultStrategy.describe()); + add.accept("segments_with_single_valued_ords", segmentsWithSingleValuedOrds); + add.accept("segments_with_multi_valued_ords", segmentsWithMultiValuedOrds); + add.accept("has_filter", acceptedGlobalOrdinals != ALWAYS_TRUE); } /** @@ -253,26 +289,31 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol assert sub == LeafBucketCollector.NO_OP_COLLECTOR; final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds); mapping = valuesSource.globalOrdinalsMapping(ctx); + // Dense mode doesn't support include/exclude so we don't have to check it here. if (singleValues != null) { + segmentsWithSingleValuedOrds++; return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, segmentOrds) { @Override public void collect(int doc, long owningBucketOrd) throws IOException { assert owningBucketOrd == 0; - if (singleValues.advanceExact(doc)) { - final int ord = singleValues.ordValue(); - segmentDocCounts.increment(ord + 1, 1); + if (false == singleValues.advanceExact(doc)) { + return; } + int ord = singleValues.ordValue(); + segmentDocCounts.increment(ord + 1, 1); } }); } + segmentsWithMultiValuedOrds++; return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, segmentOrds) { @Override public void collect(int doc, long owningBucketOrd) throws IOException { assert owningBucketOrd == 0; - if (segmentOrds.advanceExact(doc)) { - for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) { - segmentDocCounts.increment(segmentOrd + 1, 1); - } + if (false == segmentOrds.advanceExact(doc)) { + return; + } + for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) { + segmentDocCounts.increment(segmentOrd + 1, 1); } } }); @@ -306,52 +347,6 @@ private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IO } } - private static final class FilteredOrdinals extends AbstractSortedSetDocValues { - - private final SortedSetDocValues inner; - private final LongPredicate accepted; - - private FilteredOrdinals(SortedSetDocValues inner, LongPredicate accepted) { - this.inner = inner; - this.accepted = accepted; - } - - @Override - public long getValueCount() { - return inner.getValueCount(); - } - - @Override - public BytesRef lookupOrd(long ord) throws IOException { - return inner.lookupOrd(ord); - } - - @Override - public long nextOrd() throws IOException { - for (long ord = inner.nextOrd(); ord != NO_MORE_ORDS; ord = inner.nextOrd()) { - if (accepted.test(ord)) { - return ord; - } - } - return NO_MORE_ORDS; - } - - @Override - public boolean advanceExact(int target) throws IOException { - if (inner.advanceExact(target)) { - for (long ord = inner.nextOrd(); ord != NO_MORE_ORDS; ord = inner.nextOrd()) { - if (accepted.test(ord)) { - // reset the iterator - boolean advanced = inner.advanceExact(target); - assert advanced; - return true; - } - } - } - return false; - } - } - /** * Strategy for collecting global ordinals. *

@@ -800,4 +795,9 @@ private void oversizedCopy(BytesRef from, BytesRef to) { System.arraycopy(from.bytes, from.offset, to.bytes, 0, from.length); } } + + /** + * Predicate used for {@link #acceptedGlobalOrdinals} if there is no filter. + */ + private static final LongPredicate ALWAYS_TRUE = l -> true; } From c1041402fc9b6570f70c46230a1a248bf04dc9e9 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 1 Jun 2020 12:08:32 -0400 Subject: [PATCH 2/2] Explain --- .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index c8101b529d476..34875393085bd 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -119,6 +119,10 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol if (singleValues != null) { segmentsWithSingleValuedOrds++; if (acceptedGlobalOrdinals == ALWAYS_TRUE) { + /* + * Optimize when there isn't a filter because that is very + * common and marginally faster. + */ return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) { @Override public void collect(int doc, long owningBucketOrd) throws IOException { @@ -148,6 +152,10 @@ public void collect(int doc, long owningBucketOrd) throws IOException { } segmentsWithMultiValuedOrds++; if (acceptedGlobalOrdinals == ALWAYS_TRUE) { + /* + * Optimize when there isn't a filter because that is very + * common and marginally faster. + */ return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) { @Override public void collect(int doc, long owningBucketOrd) throws IOException {