From a7fa509f39d70e00aabbb4509f902b46b8c84b48 Mon Sep 17 00:00:00 2001 From: jimczi Date: Thu, 6 Aug 2020 14:56:32 +0200 Subject: [PATCH 1/2] Fix AOOBE when setting min_doc_count to 0 in significant_terms This commit fixes the computation of the subset size on empty buckets (doc count of 0). The aggregator test refactoring in #60683 revealed this bug. --- .../terms/GlobalOrdinalsStringTermsAggregator.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index f5943d7902e63..cf1f50ef2283f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -561,7 +561,7 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws BucketUpdater updater = bucketUpdater(owningBucketOrds[ordIdx]); collectionStrategy.forEach(owningBucketOrds[ordIdx], new BucketInfoConsumer() { TB spare = null; - + @Override public void accept(long globalOrd, long bucketOrd, long docCount) throws IOException { otherDocCount[finalOrdIdx] += docCount; @@ -574,7 +574,7 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep } } }); - + // Get the top buckets topBucketsPreOrd[ordIdx] = buildBuckets(ordered.size()); for (int i = ordered.size() - 1; i >= 0; --i) { @@ -799,7 +799,8 @@ SignificantStringTerms.Bucket buildEmptyTemporaryBucket() { @Override BucketUpdater bucketUpdater(long owningBucketOrd) throws IOException { - long subsetSize = subsetSizes.get(owningBucketOrd); + // the subset size is missing for empty buckets (doc count of 0) + long subsetSize = owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0; return (spare, globalOrd, bucketOrd, docCount) -> { spare.bucketOrd = bucketOrd; oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes); @@ -833,13 +834,15 @@ void buildSubAggs(SignificantStringTerms.Bucket[][] topBucketsPreOrd) throws IOE @Override SignificantStringTerms buildResult(long owningBucketOrd, long otherDocCount, SignificantStringTerms.Bucket[] topBuckets) { + // the subset size is missing for empty buckets (doc count of 0) + long subsetSize = owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0; return new SignificantStringTerms( name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), metadata(), format, - subsetSizes.get(owningBucketOrd), + subsetSize, supersetSize, significanceHeuristic, Arrays.asList(topBuckets) From 3ddfda379e81e9fb655cd872c0cf577fe87f89f8 Mon Sep 17 00:00:00 2001 From: jimczi Date: Thu, 6 Aug 2020 15:40:46 +0200 Subject: [PATCH 2/2] address feedback --- .../terms/GlobalOrdinalsStringTermsAggregator.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index cf1f50ef2283f..cd2db4608274f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -797,10 +797,14 @@ SignificantStringTerms.Bucket buildEmptyTemporaryBucket() { return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format, 0); } + private long subsetSize(long owningBucketOrd) { + // if the owningBucketOrd is not in the array that means the bucket is empty so the size has to be 0 + return owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0; + } + @Override BucketUpdater bucketUpdater(long owningBucketOrd) throws IOException { - // the subset size is missing for empty buckets (doc count of 0) - long subsetSize = owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0; + long subsetSize = subsetSize(owningBucketOrd); return (spare, globalOrd, bucketOrd, docCount) -> { spare.bucketOrd = bucketOrd; oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes); @@ -834,15 +838,13 @@ void buildSubAggs(SignificantStringTerms.Bucket[][] topBucketsPreOrd) throws IOE @Override SignificantStringTerms buildResult(long owningBucketOrd, long otherDocCount, SignificantStringTerms.Bucket[] topBuckets) { - // the subset size is missing for empty buckets (doc count of 0) - long subsetSize = owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0; return new SignificantStringTerms( name, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), metadata(), format, - subsetSize, + subsetSize(owningBucketOrd), supersetSize, significanceHeuristic, Arrays.asList(topBuckets)