Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ public int hashCode() {
private final DocValueFormat format;
private final boolean keyed;
private final long minDocCount;
private final EmptyBucketInfo emptyBucketInfo;
final EmptyBucketInfo emptyBucketInfo;

InternalHistogram(String name, List<Bucket> buckets, BucketOrder order, long minDocCount, EmptyBucketInfo emptyBucketInfo,
DocValueFormat formatter, boolean keyed, List<PipelineAggregator> pipelineAggregators,
Expand Down Expand Up @@ -302,7 +302,7 @@ private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, Reduc
final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return a.current.key < b.current.key;
return Double.compare(a.current.key, b.current.key) < 0;
}
};
for (InternalAggregation aggregation : aggregations) {
Expand Down Expand Up @@ -405,7 +405,7 @@ private void addEmptyBuckets(List<Bucket> list, ReduceContext reduceContext) {
iter.add(new Bucket(key, 0, keyed, format, reducedEmptySubAggs));
key = nextKey(key);
}
assert key == nextBucket.key;
assert key == nextBucket.key || Double.isNaN(nextBucket.key) : "key: " + key + ", nextBucket.key: " + nextBucket.key;
}
lastBucket = iter.next();
} while (iter.hasNext());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.test.InternalMultiBucketAggregationTestCase;
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.elasticsearch.test.InternalMultiBucketAggregationTestCase;

import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -40,29 +40,55 @@ public class InternalHistogramTests extends InternalMultiBucketAggregationTestCa

private boolean keyed;
private DocValueFormat format;
private int interval;
private int minDocCount;
private InternalHistogram.EmptyBucketInfo emptyBucketInfo;
private int offset;

@Override
public void setUp() throws Exception{
public void setUp() throws Exception {
super.setUp();
keyed = randomBoolean();
format = randomNumericDocValueFormat();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a small comment explaining why we need to use the same interval, offset, ... in all tests ?
The other solution would be to add an abstract method in InternalAggregationTestCase that creates a list of random instances for the reduce test.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add a comment. Given the issues that this triggered in other test methods, and the fact that we were already doing the same for a couple of fields, I think it makes sense to make this change overall rather than changing only the reduce test. I think it makes the test more realistic too.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought a bit more about this and I think I understand your comment better now. Why limit the randomization of different test instances if same interval etc. are needed only for proper reduction tests? On the other hand, it seems like reduction tests are the only case where we call createTestInstance multiple times as part of the same test method. The consequence of the current change is that all of the test methods in the same run will reuse the same interval, offset, minDocCount and emptyBucketInfo values, which may limit test coverage a bit (it's not true that it makes the test more realistic like I previously said). I wonder if it's worth changing this though and making the base class more complicated.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could have a createTestReduceInstances that by default call createTestInstance and that can be extended by sub classes like this one to ensure that the instances share the same parameter. I agree that it would complicate the base class a little bit but it would also make the test more realistic so I have mixed feelings. Let's continue the discussion since we also need to fix the date_histogram tests.

//in order for reduction to work properly (and be realistic) we need to use the same interval, minDocCount, emptyBucketInfo
//and offset in all randomly created aggs as part of the same test run. This is particularly important when minDocCount is
//set to 0 as empty buckets need to be added to fill the holes.
interval = randomIntBetween(1, 3);
offset = randomIntBetween(0, 3);
if (randomBoolean()) {
minDocCount = randomIntBetween(1, 10);
emptyBucketInfo = null;
} else {
minDocCount = 0;
//it's ok if minBound and maxBound are outside the range of the generated buckets, that will just mean that
//empty buckets won't be added before the first bucket and/or after the last one
int minBound = randomInt(50) - 30;
int maxBound = randomNumberOfBuckets() * interval + randomIntBetween(0, 10);
emptyBucketInfo = new InternalHistogram.EmptyBucketInfo(interval, offset, minBound, maxBound, InternalAggregations.EMPTY);
}
}

private double round(double key) {
return Math.floor((key - offset) / interval) * interval + offset;
}

@Override
protected InternalHistogram createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
InternalAggregations aggregations) {
final int base = randomInt(50) - 30;
final double base = round(randomInt(50) - 30);
final int numBuckets = randomNumberOfBuckets();
final int interval = randomIntBetween(1, 3);
List<InternalHistogram.Bucket> buckets = new ArrayList<>();
for (int i = 0; i < numBuckets; ++i) {
final int docCount = TestUtil.nextInt(random(), 1, 50);
buckets.add(new InternalHistogram.Bucket(base + i * interval, docCount, keyed, format, aggregations));
//rarely leave some holes to be filled up with empty buckets in case minDocCount is set to 0
if (frequently()) {
final int docCount = TestUtil.nextInt(random(), 1, 50);
buckets.add(new InternalHistogram.Bucket(base + i * interval, docCount, keyed, format, aggregations));
}
}
BucketOrder order = BucketOrder.key(randomBoolean());
return new InternalHistogram(name, buckets, order, 1, null, format, keyed, pipelineAggregators, metaData);
return new InternalHistogram(name, buckets, order, minDocCount, emptyBucketInfo, format, keyed, pipelineAggregators, metaData);
}

// issue 26787
Expand All @@ -88,13 +114,36 @@ public void testHandlesNaN() {

@Override
protected void assertReduced(InternalHistogram reduced, List<InternalHistogram> inputs) {
Map<Double, Long> expectedCounts = new TreeMap<>();
TreeMap<Double, Long> expectedCounts = new TreeMap<>();
for (Histogram histogram : inputs) {
for (Histogram.Bucket bucket : histogram.getBuckets()) {
expectedCounts.compute((Double) bucket.getKey(),
(key, oldValue) -> (oldValue == null ? 0 : oldValue) + bucket.getDocCount());
}
}
if (minDocCount == 0) {
double minBound = round(emptyBucketInfo.minBound);
if (expectedCounts.isEmpty() && emptyBucketInfo.minBound < emptyBucketInfo.maxBound) {
expectedCounts.put(minBound, 0L);
}
if (expectedCounts.isEmpty() == false) {
Double nextKey = expectedCounts.firstKey();
while (nextKey < expectedCounts.lastKey()) {
expectedCounts.putIfAbsent(nextKey, 0L);
nextKey += interval;
}
while (minBound < expectedCounts.firstKey()) {
expectedCounts.put(expectedCounts.firstKey() - interval, 0L);
}
double maxBound = round(emptyBucketInfo.maxBound);
while (expectedCounts.lastKey() < maxBound) {
expectedCounts.put(expectedCounts.lastKey() + interval, 0L);
}
}
} else {
expectedCounts.entrySet().removeIf(doubleLongEntry -> doubleLongEntry.getValue() < minDocCount);
}

Map<Double, Long> actualCounts = new TreeMap<>();
for (Histogram.Bucket bucket : reduced.getBuckets()) {
actualCounts.compute((Double) bucket.getKey(),
Expand All @@ -121,6 +170,7 @@ protected InternalHistogram mutateInstance(InternalHistogram instance) {
long minDocCount = instance.getMinDocCount();
List<PipelineAggregator> pipelineAggregators = instance.pipelineAggregators();
Map<String, Object> metaData = instance.getMetaData();
InternalHistogram.EmptyBucketInfo emptyBucketInfo = instance.emptyBucketInfo;
switch (between(0, 4)) {
case 0:
name += randomAlphaOfLength(5);
Expand All @@ -135,6 +185,7 @@ protected InternalHistogram mutateInstance(InternalHistogram instance) {
break;
case 3:
minDocCount += between(1, 10);
emptyBucketInfo = null;
break;
case 4:
if (metaData == null) {
Expand All @@ -147,6 +198,6 @@ protected InternalHistogram mutateInstance(InternalHistogram instance) {
default:
throw new AssertionError("Illegal randomisation branch");
}
return new InternalHistogram(name, buckets, order, minDocCount, null, format, keyed, pipelineAggregators, metaData);
return new InternalHistogram(name, buckets, order, minDocCount, emptyBucketInfo, format, keyed, pipelineAggregators, metaData);
}
}