From 44cbb4b1de7a6947ab601e9aaa3f5e27bd216313 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 29 May 2017 18:52:51 +0200 Subject: [PATCH 1/4] Terms aggregation should remap global ordinal buckets when a sub-aggregator is used to sort the terms `terms` aggregations at the root level use the `global_ordinals` execution hint by default. When all sub-aggregators can be run in `breadth_first` mode the collected buckets for these sub-aggs are dense (remapped after the initial pruning). But if a sub-aggregator is not deferrable and needs to collect all buckets before pruning we don't remap global ords and the aggregator needs to deal with sparse buckets. Most (if not all) aggregators expect dense buckets and uses this information to allocate memories. This change forces the remap of the global ordinals but only when there is at least one sub-aggregator that cannot be deferred. Relates #24788 --- ...balOrdinalsSignificantTermsAggregator.java | 85 ++---- .../SignificantTermsAggregatorFactory.java | 95 +++++-- .../GlobalOrdinalsStringTermsAggregator.java | 254 ++++++++---------- .../bucket/terms/TermsAggregatorFactory.java | 127 ++++++--- .../bucket/terms/support/IncludeExclude.java | 12 +- .../terms/TermsAggregatorFactoryTests.java | 3 +- .../bucket/terms/TermsAggregatorTests.java | 48 ++++ .../aggregation/AggregationProfilerIT.java | 10 +- .../aggregations/AggregatorTestCase.java | 4 +- 9 files changed, 352 insertions(+), 286 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java index 98effdcfd54c8..1e719e3280cc9 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java @@ -53,22 +53,26 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri protected final SignificantTermsAggregatorFactory termsAggFactory; private final SignificanceHeuristic significanceHeuristic; - public GlobalOrdinalsSignificantTermsAggregator(String name, AggregatorFactories factories, - ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, DocValueFormat format, - BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, - SearchContext context, Aggregator parent, - SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggFactory, - List pipelineAggregators, Map metaData) throws IOException { - + public GlobalOrdinalsSignificantTermsAggregator(String name, + AggregatorFactories factories, + ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, + DocValueFormat format, + BucketCountThresholds bucketCountThresholds, + IncludeExclude.OrdinalsFilter includeExclude, + SearchContext context, + Aggregator parent, + SignificanceHeuristic significanceHeuristic, + SignificantTermsAggregatorFactory termsAggFactory, + List pipelineAggregators, + Map metaData) throws IOException { super(name, factories, valuesSource, null, format, bucketCountThresholds, includeExclude, context, parent, - SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData); + false, SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData); this.significanceHeuristic = significanceHeuristic; this.termsAggFactory = termsAggFactory; } @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) { @Override public void collect(int doc, long bucket) throws IOException { @@ -78,18 +82,17 @@ public void collect(int doc, long bucket) throws IOException { }; } - @Override public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws IOException { assert owningBucketOrdinal == 0; - if (globalOrds == null) { // no context in this reader + if (valueCount == 0) { // no context in this reader return buildEmptyAggregation(); } final int size; if (bucketCountThresholds.getMinDocCount() == 0) { // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns - size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize()); + size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize()); } else { size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); } @@ -98,7 +101,7 @@ public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue<>(size); SignificantStringTerms.Bucket spare = null; - for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) { + for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) { if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) { continue; } @@ -115,7 +118,7 @@ public SignificantStringTerms buildAggregation(long owningBucketOrdinal) throws spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format); } spare.bucketOrd = bucketOrd; - copy(globalOrds.lookupOrd(globalTermOrd), spare.termBytes); + copy(lookupGlobalOrd.apply(globalTermOrd), spare.termBytes); spare.subsetDf = bucketDocCount; spare.subsetSize = subsetSize; spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes); @@ -153,58 +156,8 @@ public SignificantStringTerms buildEmptyAggregation() { @Override protected void doClose() { + super.doClose(); Releasables.close(termsAggFactory); } - - public static class WithHash extends GlobalOrdinalsSignificantTermsAggregator { - - private final LongHash bucketOrds; - - public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, - DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, - SearchContext context, Aggregator parent, SignificanceHeuristic significanceHeuristic, - SignificantTermsAggregatorFactory termsAggFactory, List pipelineAggregators, - Map metaData) throws IOException { - super(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, - termsAggFactory, pipelineAggregators, metaData); - bucketOrds = new LongHash(1, context.bigArrays()); - } - - @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { - return new LeafBucketCollectorBase(super.getLeafCollector(ctx, sub), null) { - @Override - public void collect(int doc, long bucket) throws IOException { - assert bucket == 0; - numCollectedDocs++; - if (globalOrds.advanceExact(doc)) { - for (long globalOrd = globalOrds.nextOrd(); - globalOrd != SortedSetDocValues.NO_MORE_ORDS; - globalOrd = globalOrds.nextOrd()) { - long bucketOrd = bucketOrds.add(globalOrd); - if (bucketOrd < 0) { - bucketOrd = -1 - bucketOrd; - collectExistingBucket(sub, doc, bucketOrd); - } else { - collectBucket(sub, doc, bucketOrd); - } - } - } - } - }; - } - - @Override - protected long getBucketOrd(long termOrd) { - return bucketOrds.find(termOrd); - } - - @Override - protected void doClose() { - Releasables.close(termsAggFactory, bucketOrds); - } - } - } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index c27cabf78b2aa..e2b6082c0080a 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -70,10 +70,17 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac private final TermsAggregator.BucketCountThresholds bucketCountThresholds; private final SignificanceHeuristic significanceHeuristic; - public SignificantTermsAggregatorFactory(String name, ValuesSourceConfig config, IncludeExclude includeExclude, - String executionHint, QueryBuilder filterBuilder, TermsAggregator.BucketCountThresholds bucketCountThresholds, - SignificanceHeuristic significanceHeuristic, SearchContext context, AggregatorFactory parent, - AggregatorFactories.Builder subFactoriesBuilder, Map metaData) throws IOException { + public SignificantTermsAggregatorFactory(String name, + ValuesSourceConfig config, + IncludeExclude includeExclude, + String executionHint, + QueryBuilder filterBuilder, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + SignificanceHeuristic significanceHeuristic, + SearchContext context, + AggregatorFactory parent, + AggregatorFactories.Builder subFactoriesBuilder, + Map metaData) throws IOException { super(name, config, context, parent, subFactoriesBuilder, metaData); this.includeExclude = includeExclude; this.executionHint = executionHint; @@ -246,44 +253,71 @@ public enum ExecutionMode { MAP(new ParseField("map")) { @Override - Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format, - TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, - SignificantTermsAggregatorFactory termsAggregatorFactory, List pipelineAggregators, - Map metaData) throws IOException { + Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext aggregationContext, + Aggregator parent, + SignificanceHeuristic significanceHeuristic, + SignificantTermsAggregatorFactory termsAggregatorFactory, + List pipelineAggregators, + Map metaData) throws IOException { + final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format); return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); + } }, GLOBAL_ORDINALS(new ParseField("global_ordinals")) { @Override - Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format, - TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, - SignificantTermsAggregatorFactory termsAggregatorFactory, List pipelineAggregators, - Map metaData) throws IOException { + Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext aggregationContext, + Aggregator parent, + SignificanceHeuristic significanceHeuristic, + SignificantTermsAggregatorFactory termsAggregatorFactory, + List pipelineAggregators, + Map metaData) throws IOException { + final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); + } }, GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) { @Override - Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format, - TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, - SignificantTermsAggregatorFactory termsAggregatorFactory, List pipelineAggregators, - Map metaData) throws IOException { + Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext aggregationContext, + Aggregator parent, + SignificanceHeuristic significanceHeuristic, + SignificantTermsAggregatorFactory termsAggregatorFactory, + List pipelineAggregators, + Map metaData) throws IOException { + final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); - return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, - (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, - aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); + return new GlobalOrdinalsSignificantTermsAggregator(name, factories, + (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent, + significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); + } }; @@ -302,11 +336,18 @@ public static ExecutionMode fromString(String value) { this.parseField = parseField; } - abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, DocValueFormat format, - TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, - SignificantTermsAggregatorFactory termsAggregatorFactory, List pipelineAggregators, - Map metaData) throws IOException; + abstract Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext aggregationContext, + Aggregator parent, + SignificanceHeuristic significanceHeuristic, + SignificantTermsAggregatorFactory termsAggregatorFactory, + List pipelineAggregators, + Map metaData) throws IOException; @Override public String toString() { diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 33bbc370c6e76..d9c60da7acc16 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; @@ -52,6 +53,8 @@ import java.util.List; import java.util.Map; +import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; + /** * An aggregator of string values that relies on global ordinals in order to build buckets. */ @@ -66,67 +69,104 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr // first defined one. // So currently for each instance of this aggregator the acceptedglobalValues will be computed, this is unnecessary // especially if this agg is on a second layer or deeper. - protected LongBitSet acceptedGlobalOrdinals; + protected final LongBitSet acceptedGlobalOrdinals; + protected final long valueCount; + protected final GlobalOrdLookupFunction lookupGlobalOrd; + + private final LongHash bucketOrds; - protected SortedSetDocValues globalOrds; + public interface GlobalOrdLookupFunction { + BytesRef apply(long ord) throws IOException; + } - public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, - BucketOrder order, DocValueFormat format, BucketCountThresholds bucketCountThresholds, - IncludeExclude.OrdinalsFilter includeExclude, SearchContext context, Aggregator parent, - SubAggCollectionMode collectionMode, boolean showTermDocCountError, List pipelineAggregators, - Map metaData) throws IOException { + public GlobalOrdinalsStringTermsAggregator(String name, AggregatorFactories factories, + ValuesSource.Bytes.WithOrdinals valuesSource, + BucketOrder order, + DocValueFormat format, + BucketCountThresholds bucketCountThresholds, + IncludeExclude.OrdinalsFilter includeExclude, + SearchContext context, + Aggregator parent, + boolean forceRemapGlobalOrds, + SubAggCollectionMode collectionMode, + boolean showTermDocCountError, + List pipelineAggregators, + Map metaData) throws IOException { super(name, factories, context, parent, order, format, bucketCountThresholds, collectionMode, showTermDocCountError, pipelineAggregators, metaData); this.valuesSource = valuesSource; this.includeExclude = includeExclude; + final IndexReader reader = context.searcher().getIndexReader(); + final SortedSetDocValues values = reader.leaves().size() > 0 ? + valuesSource.globalOrdinalsValues(context.searcher().getIndexReader().leaves().get(0)) : DocValues.emptySortedSet(); + this.valueCount = values.getValueCount(); + this.lookupGlobalOrd = values::lookupOrd; + this.acceptedGlobalOrdinals = includeExclude != null ? includeExclude.acceptedGlobalOrdinals(values) : null; + + /** + * Remap global ords to dense bucket ordinals if any sub-aggregator cannot be deferred. + * Sub-aggregators expect dense buckets and allocate memories based on this assumption. + * Deferred aggregators are safe because the selected ordinals are remapped when the buckets + * are replayed. + */ + boolean remapGlobalOrds = forceRemapGlobalOrds || Arrays.stream(subAggregators).anyMatch((a) -> shouldDefer(a) == false); + this.bucketOrds = remapGlobalOrds ? new LongHash(1, context.bigArrays()) : null; } - protected long getBucketOrd(long termOrd) { - return termOrd; - } - - @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { - globalOrds = valuesSource.globalOrdinalsValues(ctx); + boolean remapGlobalOrds() { + return bucketOrds != null; + } - if (acceptedGlobalOrdinals == null && includeExclude != null) { - acceptedGlobalOrdinals = includeExclude.acceptedGlobalOrdinals(globalOrds); - } + protected final long getBucketOrd(long globalOrd) { + return bucketOrds == null ? globalOrd : bucketOrds.find(globalOrd); + } - if (acceptedGlobalOrdinals != null) { - globalOrds = new FilteredOrdinals(globalOrds, acceptedGlobalOrdinals); + private void collectGlobalOrd(int doc, long globalOrd, LeafBucketCollector sub) throws IOException { + if (bucketOrds == null) { + collectExistingBucket(sub, doc, globalOrd); + } else { + long bucketOrd = bucketOrds.add(globalOrd); + if (bucketOrd < 0) { + bucketOrd = -1 - bucketOrd; + collectExistingBucket(sub, doc, bucketOrd); + } else { + collectBucket(sub, doc, bucketOrd); + } } + } - return newCollector(globalOrds, sub); + private SortedSetDocValues getGlobalOrds(LeafReaderContext ctx) throws IOException { + return acceptedGlobalOrdinals == null ? + valuesSource.globalOrdinalsValues(ctx) : new FilteredOrdinals(valuesSource.globalOrdinalsValues(ctx), acceptedGlobalOrdinals); } - protected LeafBucketCollector newCollector(final SortedSetDocValues ords, - final LeafBucketCollector sub) { - grow(ords.getValueCount()); - final SortedDocValues singleValues = DocValues.unwrapSingleton(ords); + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { + final SortedSetDocValues globalOrds = getGlobalOrds(ctx); + if (bucketOrds == null) { + grow(globalOrds.getValueCount()); + } + final SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds); if (singleValues != null) { - return new LeafBucketCollectorBase(sub, ords) { + return new LeafBucketCollectorBase(sub, globalOrds) { @Override public void collect(int doc, long bucket) throws IOException { assert bucket == 0; if (singleValues.advanceExact(doc)) { final int ord = singleValues.ordValue(); - collectExistingBucket(sub, doc, ord); + collectGlobalOrd(doc, ord, sub); } } }; } else { - return new LeafBucketCollectorBase(sub, ords) { + return new LeafBucketCollectorBase(sub, globalOrds) { @Override public void collect(int doc, long bucket) throws IOException { assert bucket == 0; - if (ords.advanceExact(doc)) { - for (long globalOrd = ords.nextOrd(); - globalOrd != SortedSetDocValues.NO_MORE_ORDS; - globalOrd = ords.nextOrd()) { - collectExistingBucket(sub, doc, globalOrd); + if (globalOrds.advanceExact(doc)) { + for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) { + collectGlobalOrd(doc, globalOrd, sub); } } } @@ -145,21 +185,21 @@ protected static void copy(BytesRef from, BytesRef to) { @Override public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException { - if (globalOrds == null) { // no context in this reader + if (valueCount == 0) { // no context in this reader return buildEmptyAggregation(); } final int size; if (bucketCountThresholds.getMinDocCount() == 0) { // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns - size = (int) Math.min(globalOrds.getValueCount(), bucketCountThresholds.getShardSize()); + size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize()); } else { size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); } long otherDocCount = 0; BucketPriorityQueue ordered = new BucketPriorityQueue<>(size, order.comparator(this)); OrdBucket spare = new OrdBucket(-1, 0, null, showTermDocCountError, 0); - for (long globalTermOrd = 0; globalTermOrd < globalOrds.getValueCount(); ++globalTermOrd) { + for (long globalTermOrd = 0; globalTermOrd < valueCount; ++globalTermOrd) { if (includeExclude != null && !acceptedGlobalOrdinals.get(globalTermOrd)) { continue; } @@ -184,10 +224,10 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOE final StringTerms.Bucket[] list = new StringTerms.Bucket[ordered.size()]; long survivingBucketOrds[] = new long[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; --i) { - final OrdBucket bucket = (OrdBucket) ordered.pop(); + final OrdBucket bucket = ordered.pop(); survivingBucketOrds[i] = bucket.bucketOrd; BytesRef scratch = new BytesRef(); - copy(globalOrds.lookupOrd(bucket.globalOrd), scratch); + copy(lookupGlobalOrd.apply(bucket.globalOrd), scratch); list[i] = new StringTerms.Bucket(scratch, bucket.docCount, null, showTermDocCountError, 0, format); list[i].bucketOrd = bucket.bucketOrd; otherDocCount -= list[i].docCount; @@ -254,76 +294,9 @@ protected final XContentBuilder keyToXContent(XContentBuilder builder) throws IO } } - /** - * Variant of {@link GlobalOrdinalsStringTermsAggregator} that rebases hashes in order to make them dense. Might be - * useful in case few hashes are visited. - */ - public static class WithHash extends GlobalOrdinalsStringTermsAggregator { - - private final LongHash bucketOrds; - - public WithHash(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, BucketOrder order, - DocValueFormat format, BucketCountThresholds bucketCountThresholds, IncludeExclude.OrdinalsFilter includeExclude, - SearchContext context, Aggregator parent, SubAggCollectionMode collectionMode, - boolean showTermDocCountError, List pipelineAggregators, Map metaData) - throws IOException { - super(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, collectionMode, - showTermDocCountError, pipelineAggregators, metaData); - bucketOrds = new LongHash(1, context.bigArrays()); - } - - @Override - protected LeafBucketCollector newCollector(final SortedSetDocValues ords, - final LeafBucketCollector sub) { - final SortedDocValues singleValues = DocValues.unwrapSingleton(ords); - if (singleValues != null) { - return new LeafBucketCollectorBase(sub, ords) { - @Override - public void collect(int doc, long bucket) throws IOException { - if (singleValues.advanceExact(doc)) { - final int globalOrd = singleValues.ordValue(); - long bucketOrd = bucketOrds.add(globalOrd); - if (bucketOrd < 0) { - bucketOrd = -1 - bucketOrd; - collectExistingBucket(sub, doc, bucketOrd); - } else { - collectBucket(sub, doc, bucketOrd); - } - } - } - }; - } else { - return new LeafBucketCollectorBase(sub, ords) { - @Override - public void collect(int doc, long bucket) throws IOException { - if (ords.advanceExact(doc)) { - for (long globalOrd = ords.nextOrd(); - globalOrd != SortedSetDocValues.NO_MORE_ORDS; - globalOrd = ords.nextOrd()) { - long bucketOrd = bucketOrds.add(globalOrd); - if (bucketOrd < 0) { - bucketOrd = -1 - bucketOrd; - collectExistingBucket(sub, doc, bucketOrd); - } else { - collectBucket(sub, doc, bucketOrd); - } - } - } - } - }; - } - } - - @Override - protected long getBucketOrd(long termOrd) { - return bucketOrds.find(termOrd); - } - - @Override - protected void doClose() { - Releasables.close(bucketOrds); - } - + @Override + protected void doClose() { + Releasables.close(bucketOrds); } /** @@ -331,32 +304,44 @@ protected void doClose() { * instead of on the fly for each match.This is beneficial for low cardinality fields, because it can reduce * the amount of look-ups significantly. */ - public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { + static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { private IntArray segmentDocCounts; - + private SortedSetDocValues globalOrds; private SortedSetDocValues segmentOrds; - public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals valuesSource, - BucketOrder order, DocValueFormat format, - BucketCountThresholds bucketCountThresholds, SearchContext context, Aggregator parent, - SubAggCollectionMode collectionMode, boolean showTermDocCountError, List pipelineAggregators, - Map metaData) throws IOException { - super(name, factories, valuesSource, order, format, bucketCountThresholds, null, context, parent, collectionMode, - showTermDocCountError, pipelineAggregators, metaData); + LowCardinality(String name, + AggregatorFactories factories, + ValuesSource.Bytes.WithOrdinals valuesSource, + BucketOrder order, + DocValueFormat format, + BucketCountThresholds bucketCountThresholds, + SearchContext context, + Aggregator parent, + boolean forceDenseMode, + SubAggCollectionMode collectionMode, + boolean showTermDocCountError, + List pipelineAggregators, + Map metaData) throws IOException { + super(name, factories, valuesSource, order, format, bucketCountThresholds, null, + context, parent, forceDenseMode, collectionMode, showTermDocCountError, pipelineAggregators, metaData); assert factories == null || factories.countAggregators() == 0; this.segmentDocCounts = context.bigArrays().newIntArray(1, true); } - // bucketOrd is ord + 1 to avoid a branch to deal with the missing ord @Override - protected LeafBucketCollector newCollector(final SortedSetDocValues ords, - LeafBucketCollector sub) { - segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + ords.getValueCount()); + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, + final LeafBucketCollector sub) throws IOException { + if (segmentOrds != null) { + mapSegmentCountsToGlobalCounts(); + } + globalOrds = valuesSource.globalOrdinalsValues(ctx); + segmentOrds = valuesSource.ordinalsValues(ctx); + segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount()); assert sub == LeafBucketCollector.NO_OP_COLLECTOR; - final SortedDocValues singleValues = DocValues.unwrapSingleton(ords); + final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds); if (singleValues != null) { - return new LeafBucketCollectorBase(sub, ords) { + return new LeafBucketCollectorBase(sub, segmentOrds) { @Override public void collect(int doc, long bucket) throws IOException { assert bucket == 0; @@ -367,14 +352,12 @@ public void collect(int doc, long bucket) throws IOException { } }; } else { - return new LeafBucketCollectorBase(sub, ords) { + return new LeafBucketCollectorBase(sub, segmentOrds) { @Override public void collect(int doc, long bucket) throws IOException { assert bucket == 0; - if (ords.advanceExact(doc)) { - for (long segmentOrd = ords.nextOrd(); - segmentOrd != SortedSetDocValues.NO_MORE_ORDS; - segmentOrd = ords.nextOrd()) { + if (segmentOrds.advanceExact(doc)) { + for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) { segmentDocCounts.increment(segmentOrd + 1, 1); } } @@ -383,18 +366,6 @@ public void collect(int doc, long bucket) throws IOException { } } - @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { - if (segmentOrds != null) { - mapSegmentCountsToGlobalCounts(); - } - - globalOrds = valuesSource.globalOrdinalsValues(ctx); - segmentOrds = valuesSource.ordinalsValues(ctx); - return newCollector(segmentOrds, sub); - } - @Override protected void doPostCollection() { if (segmentOrds != null) { @@ -426,7 +397,8 @@ private void mapSegmentCountsToGlobalCounts() { } final long ord = i - 1; // remember we do +1 when counting final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord); - incrementBucketDocCount(globalOrd, inc); + long bucketOrd = getBucketOrd(globalOrd); + incrementBucketDocCount(bucketOrd, inc); } } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 9a06dfe66f592..ca2974a10595f 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -53,10 +53,18 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory config, BucketOrder order, - IncludeExclude includeExclude, String executionHint, SubAggCollectionMode collectMode, - TermsAggregator.BucketCountThresholds bucketCountThresholds, boolean showTermDocCountError, SearchContext context, - AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder, Map metaData) throws IOException { + public TermsAggregatorFactory(String name, + ValuesSourceConfig config, + BucketOrder order, + IncludeExclude includeExclude, + String executionHint, + SubAggCollectionMode collectMode, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + boolean showTermDocCountError, + SearchContext context, + AggregatorFactory parent, + AggregatorFactories.Builder subFactoriesBuilder, + Map metaData) throws IOException { super(name, config, context, parent, subFactoriesBuilder, metaData); this.order = order; this.includeExclude = includeExclude; @@ -225,14 +233,24 @@ public enum ExecutionMode { MAP(new ParseField("map")) { @Override - Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, - DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, - boolean showTermDocCountError, List pipelineAggregators, Map metaData) - throws IOException { + Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + BucketOrder order, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext context, + Aggregator parent, + SubAggCollectionMode subAggCollectMode, + boolean showTermDocCountError, + List pipelineAggregators, + Map metaData) throws IOException { + final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format); return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); + } @Override @@ -244,15 +262,24 @@ boolean needsGlobalOrdinals() { GLOBAL_ORDINALS(new ParseField("global_ordinals")) { @Override - Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, - DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, - boolean showTermDocCountError, List pipelineAggregators, Map metaData) - throws IOException { + Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + BucketOrder order, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext context, Aggregator parent, + SubAggCollectionMode subAggCollectMode, + boolean showTermDocCountError, + List pipelineAggregators, + Map metaData) throws IOException { + final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, - format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError, + format, bucketCountThresholds, filter, context, parent, false, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); + } @Override @@ -264,15 +291,25 @@ boolean needsGlobalOrdinals() { GLOBAL_ORDINALS_HASH(new ParseField("global_ordinals_hash")) { @Override - Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, - DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, - boolean showTermDocCountError, List pipelineAggregators, Map metaData) - throws IOException { + Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + BucketOrder order, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext context, + Aggregator parent, + SubAggCollectionMode subAggCollectMode, + boolean showTermDocCountError, + List pipelineAggregators, + Map metaData) throws IOException { + final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); - return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, - order, format, bucketCountThresholds, filter, context, parent, subAggCollectMode, showTermDocCountError, - pipelineAggregators, metaData); + return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, + order, format, bucketCountThresholds, filter, context, parent, true, subAggCollectMode, + showTermDocCountError, pipelineAggregators, metaData); + } @Override @@ -283,21 +320,31 @@ boolean needsGlobalOrdinals() { GLOBAL_ORDINALS_LOW_CARDINALITY(new ParseField("global_ordinals_low_cardinality")) { @Override - Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, - DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, - boolean showTermDocCountError, List pipelineAggregators, Map metaData) - throws IOException { + Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + BucketOrder order, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext context, + Aggregator parent, + SubAggCollectionMode subAggCollectMode, + boolean showTermDocCountError, + List pipelineAggregators, + Map metaData) throws IOException { + if (includeExclude != null || factories.countAggregators() > 0 - // we need the FieldData impl to be able to extract the - // segment to global ord mapping + // we need the FieldData impl to be able to extract the + // segment to global ord mapping || valuesSource.getClass() != ValuesSource.Bytes.FieldData.class) { return GLOBAL_ORDINALS.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); } return new GlobalOrdinalsStringTermsAggregator.LowCardinality(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, context, parent, - subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); + false, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); + } @Override @@ -321,11 +368,19 @@ public static ExecutionMode fromString(String value) { this.parseField = parseField; } - abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource, BucketOrder order, - DocValueFormat format, TermsAggregator.BucketCountThresholds bucketCountThresholds, IncludeExclude includeExclude, - SearchContext context, Aggregator parent, SubAggCollectionMode subAggCollectMode, - boolean showTermDocCountError, List pipelineAggregators, Map metaData) - throws IOException; + abstract Aggregator create(String name, + AggregatorFactories factories, + ValuesSource valuesSource, + BucketOrder order, + DocValueFormat format, + TermsAggregator.BucketCountThresholds bucketCountThresholds, + IncludeExclude includeExclude, + SearchContext context, + Aggregator parent, + SubAggCollectionMode subAggCollectMode, + boolean showTermDocCountError, + List pipelineAggregators, + Map metaData) throws IOException; abstract boolean needsGlobalOrdinals(); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java index 46e371a3dfe0d..dd0785b2d70b7 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java @@ -233,16 +233,14 @@ public boolean accept(BytesRef value) { } public abstract static class OrdinalsFilter { - public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) - throws IOException; + public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException; } class PartitionedOrdinalsFilter extends OrdinalsFilter { @Override - public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) - throws IOException { + public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException { final long numOrds = globalOrdinals.getValueCount(); final LongBitSet acceptedGlobalOrdinals = new LongBitSet(numOrds); final TermsEnum termEnum = globalOrdinals.termsEnum(); @@ -271,8 +269,7 @@ private AutomatonBackedOrdinalsFilter(Automaton automaton) { * */ @Override - public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) - throws IOException { + public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException { LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); TermsEnum globalTermsEnum; Terms globalTerms = new DocValuesTerms(globalOrdinals); @@ -297,8 +294,7 @@ static class TermListBackedOrdinalsFilter extends OrdinalsFilter { } @Override - public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) - throws IOException { + public LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException { LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); if (includeValues != null) { for (BytesRef term : includeValues) { diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java index 7d7ffac4fb324..fe32bff86b877 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java @@ -21,10 +21,11 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.ESTestCase; import static org.hamcrest.Matchers.equalTo; -public class TermsAggregatorFactoryTests extends ESSingleNodeTestCase { +public class TermsAggregatorFactoryTests extends ESTestCase { public void testSubAggCollectMode() throws Exception { assertThat(TermsAggregatorFactory.subAggCollectionMode(Integer.MAX_VALUE, -1), equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST)); diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index f54cb902d96b8..33e0c54d93425 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -35,6 +35,8 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.BucketOrder; @@ -44,7 +46,53 @@ import java.util.ArrayList; import java.util.List; +import static org.hamcrest.Matchers.instanceOf; + public class TermsAggregatorTests extends AggregatorTestCase { + public void testGlobalOrdinalsExecutionHint() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + // We do not use LuceneTestCase.newSearcher because we need a DirectoryReader + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING) + .field("string") + .collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST); + MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType(); + fieldType.setName("string"); + fieldType.setHasDocValues(true); + + TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class)); + GlobalOrdinalsStringTermsAggregator globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator; + assertFalse(globalAgg.remapGlobalOrds()); + + aggregationBuilder + .subAggregation(AggregationBuilders.cardinality("card").field("string")); + aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class)); + globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator; + assertFalse(globalAgg.remapGlobalOrds()); + + aggregationBuilder + .order(BucketOrder.aggregation("card", true)); + aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class)); + globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator; + assertTrue(globalAgg.remapGlobalOrds()); + + aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING) + .field("string") + .executionHint("global_ordinals_hash"); + aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType); + assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class)); + globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator; + assertTrue(globalAgg.remapGlobalOrds()); + indexReader.close(); + directory.close(); + } public void testTermsAggregator() throws Exception { Directory directory = newDirectory(); diff --git a/core/src/test/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java b/core/src/test/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java index b09c177bf0b98..9914938854d03 100644 --- a/core/src/test/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java +++ b/core/src/test/java/org/elasticsearch/search/profile/aggregation/AggregationProfilerIT.java @@ -154,7 +154,7 @@ public void testMultiLevelProfile() { ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0); assertThat(termsAggResult, notNullValue()); - assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); + assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName())); assertThat(termsAggResult.getLuceneDescription(), equalTo("terms")); assertThat(termsAggResult.getTime(), greaterThan(0L)); Map termsBreakdown = termsAggResult.getTimeBreakdown(); @@ -224,7 +224,7 @@ public void testMultiLevelProfileBreadthFirst() { ProfileResult termsAggResult = histoAggResult.getProfiledChildren().get(0); assertThat(termsAggResult, notNullValue()); - assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); + assertThat(termsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName())); assertThat(termsAggResult.getLuceneDescription(), equalTo("terms")); assertThat(termsAggResult.getTime(), greaterThan(0L)); Map termsBreakdown = termsAggResult.getTimeBreakdown(); @@ -355,7 +355,7 @@ public void testComplexProfile() { ProfileResult tagsAggResult = histoAggResult.getProfiledChildren().get(0); assertThat(tagsAggResult, notNullValue()); - assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); + assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName())); assertThat(tagsAggResult.getLuceneDescription(), equalTo("tags")); assertThat(tagsAggResult.getTime(), greaterThan(0L)); Map tagsBreakdown = tagsAggResult.getTimeBreakdown(); @@ -406,7 +406,7 @@ public void testComplexProfile() { ProfileResult stringsAggResult = histoAggResult.getProfiledChildren().get(1); assertThat(stringsAggResult, notNullValue()); - assertThat(stringsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); + assertThat(stringsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName())); assertThat(stringsAggResult.getLuceneDescription(), equalTo("strings")); assertThat(stringsAggResult.getTime(), greaterThan(0L)); Map stringsBreakdown = stringsAggResult.getTimeBreakdown(); @@ -457,7 +457,7 @@ public void testComplexProfile() { tagsAggResult = stringsAggResult.getProfiledChildren().get(2); assertThat(tagsAggResult, notNullValue()); - assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.WithHash.class.getName())); + assertThat(tagsAggResult.getQueryName(), equalTo(GlobalOrdinalsStringTermsAggregator.class.getName())); assertThat(tagsAggResult.getLuceneDescription(), equalTo("tags")); assertThat(tagsAggResult.getTime(), greaterThan(0L)); tagsBreakdown = tagsAggResult.getTimeBreakdown(); diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 32513a51c132e..e2b87e82d1dc1 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -89,8 +89,8 @@ public abstract class AggregatorTestCase extends ESTestCase { /** Create a factory for the given aggregation builder. */ protected AggregatorFactory createAggregatorFactory(AggregationBuilder aggregationBuilder, - IndexSearcher indexSearcher, - MappedFieldType... fieldTypes) throws IOException { + IndexSearcher indexSearcher, + MappedFieldType... fieldTypes) throws IOException { IndexSettings indexSettings = createIndexSettings(); SearchContext searchContext = createSearchContext(indexSearcher, indexSettings); CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService(); From 3eaedad5102dd9977ddea7678c9df86dd4f335e6 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 29 May 2017 18:57:40 +0200 Subject: [PATCH 2/4] unrelated change --- .../elasticsearch/search/aggregations/AggregatorTestCase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index e2b87e82d1dc1..32513a51c132e 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -89,8 +89,8 @@ public abstract class AggregatorTestCase extends ESTestCase { /** Create a factory for the given aggregation builder. */ protected AggregatorFactory createAggregatorFactory(AggregationBuilder aggregationBuilder, - IndexSearcher indexSearcher, - MappedFieldType... fieldTypes) throws IOException { + IndexSearcher indexSearcher, + MappedFieldType... fieldTypes) throws IOException { IndexSettings indexSettings = createIndexSettings(); SearchContext searchContext = createSearchContext(indexSearcher, indexSettings); CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService(); From b5339a6b5db680e2b313a75699b76683931673e3 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 30 May 2017 14:06:58 +0200 Subject: [PATCH 3/4] remap global ord should also applied to significant terms agg --- .../significant/GlobalOrdinalsSignificantTermsAggregator.java | 4 +++- .../bucket/significant/SignificantTermsAggregatorFactory.java | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java index 1e719e3280cc9..efbd41dac95c5 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java @@ -61,14 +61,16 @@ public GlobalOrdinalsSignificantTermsAggregator(String name, IncludeExclude.OrdinalsFilter includeExclude, SearchContext context, Aggregator parent, + boolean forceRemapGlobalOrds, SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggFactory, List pipelineAggregators, Map metaData) throws IOException { super(name, factories, valuesSource, null, format, bucketCountThresholds, includeExclude, context, parent, - false, SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData); + forceRemapGlobalOrds, SubAggCollectionMode.DEPTH_FIRST, false, pipelineAggregators, metaData); this.significanceHeuristic = significanceHeuristic; this.termsAggFactory = termsAggFactory; + this.numCollectedDocs = 0; } @Override diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index e2b6082c0080a..ba5191b122946 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -292,7 +292,7 @@ Aggregator create(String name, final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, - aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); + aggregationContext, parent, false, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); } @@ -316,7 +316,7 @@ Aggregator create(String name, final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent, - significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); + true, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); } }; From 20d0ee96cf86749f30c486d8b7229e87e781c662 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 30 May 2017 16:30:38 +0200 Subject: [PATCH 4/4] Fix doc counts for empty significant terms aggregations --- .../significant/GlobalOrdinalsSignificantTermsAggregator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java index efbd41dac95c5..50f114b35b5ea 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/GlobalOrdinalsSignificantTermsAggregator.java @@ -153,7 +153,7 @@ public SignificantStringTerms buildEmptyAggregation() { IndexReader topReader = searcher.getIndexReader(); int supersetSize = topReader.numDocs(); return new SignificantStringTerms(name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), - pipelineAggregators(), metaData(), format, 0, supersetSize, significanceHeuristic, emptyList()); + pipelineAggregators(), metaData(), format, numCollectedDocs, supersetSize, significanceHeuristic, emptyList()); } @Override