Skip to content

Commit 61e5844

Browse files
committed
Add early termination support for min/max aggregations (#33375)
This commit adds the support to early terminate the collection of a leaf in the min/max aggregator. If the query matches all documents the min and max value for a numeric field can be retrieved efficiently in the points reader. This change applies this optimization when possible.
1 parent b56d0a2 commit 61e5844

File tree

11 files changed

+737
-21
lines changed

11 files changed

+737
-21
lines changed

server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,11 @@ public Float parse(Object value, boolean coerce) {
187187
return result;
188188
}
189189

190+
@Override
191+
public Number parsePoint(byte[] value) {
192+
return HalfFloatPoint.decodeDimension(value, 0);
193+
}
194+
190195
@Override
191196
public Float parse(XContentParser parser, boolean coerce) throws IOException {
192197
float parsed = parser.floatValue(coerce);
@@ -279,6 +284,11 @@ public Float parse(Object value, boolean coerce) {
279284
return result;
280285
}
281286

287+
@Override
288+
public Number parsePoint(byte[] value) {
289+
return FloatPoint.decodeDimension(value, 0);
290+
}
291+
282292
@Override
283293
public Float parse(XContentParser parser, boolean coerce) throws IOException {
284294
float parsed = parser.floatValue(coerce);
@@ -360,6 +370,11 @@ public Double parse(Object value, boolean coerce) {
360370
return parsed;
361371
}
362372

373+
@Override
374+
public Number parsePoint(byte[] value) {
375+
return DoublePoint.decodeDimension(value, 0);
376+
}
377+
363378
@Override
364379
public Double parse(XContentParser parser, boolean coerce) throws IOException {
365380
double parsed = parser.doubleValue(coerce);
@@ -452,6 +467,11 @@ public Byte parse(Object value, boolean coerce) {
452467
return (byte) doubleValue;
453468
}
454469

470+
@Override
471+
public Number parsePoint(byte[] value) {
472+
return INTEGER.parsePoint(value).byteValue();
473+
}
474+
455475
@Override
456476
public Short parse(XContentParser parser, boolean coerce) throws IOException {
457477
int value = parser.intValue(coerce);
@@ -508,6 +528,11 @@ public Short parse(Object value, boolean coerce) {
508528
return (short) doubleValue;
509529
}
510530

531+
@Override
532+
public Number parsePoint(byte[] value) {
533+
return INTEGER.parsePoint(value).shortValue();
534+
}
535+
511536
@Override
512537
public Short parse(XContentParser parser, boolean coerce) throws IOException {
513538
return parser.shortValue(coerce);
@@ -560,6 +585,11 @@ public Integer parse(Object value, boolean coerce) {
560585
return (int) doubleValue;
561586
}
562587

588+
@Override
589+
public Number parsePoint(byte[] value) {
590+
return IntPoint.decodeDimension(value, 0);
591+
}
592+
563593
@Override
564594
public Integer parse(XContentParser parser, boolean coerce) throws IOException {
565595
return parser.intValue(coerce);
@@ -674,6 +704,11 @@ public Long parse(Object value, boolean coerce) {
674704
return Numbers.toLong(stringValue, coerce);
675705
}
676706

707+
@Override
708+
public Number parsePoint(byte[] value) {
709+
return LongPoint.decodeDimension(value, 0);
710+
}
711+
677712
@Override
678713
public Long parse(XContentParser parser, boolean coerce) throws IOException {
679714
return parser.longValue(coerce);
@@ -790,6 +825,7 @@ public abstract Query rangeQuery(String field, Object lowerTerm, Object upperTer
790825
boolean hasDocValues);
791826
public abstract Number parse(XContentParser parser, boolean coerce) throws IOException;
792827
public abstract Number parse(Object value, boolean coerce);
828+
public abstract Number parsePoint(byte[] value);
793829
public abstract List<Field> createFields(String name, Number value, boolean indexed,
794830
boolean docValued, boolean stored);
795831
Number valueForSearch(Number value) {
@@ -938,6 +974,10 @@ public DocValueFormat docValueFormat(String format, DateTimeZone timeZone) {
938974
}
939975
}
940976

977+
public Number parsePoint(byte[] value) {
978+
return type.parsePoint(value);
979+
}
980+
941981
@Override
942982
public boolean equals(Object o) {
943983
if (super.equals(o) == false) {

server/src/main/java/org/elasticsearch/search/aggregations/metrics/max/MaxAggregator.java

Lines changed: 91 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@
1818
*/
1919
package org.elasticsearch.search.aggregations.metrics.max;
2020

21+
import org.apache.lucene.index.LeafReader;
2122
import org.apache.lucene.index.LeafReaderContext;
23+
import org.apache.lucene.index.PointValues;
24+
import org.apache.lucene.search.CollectionTerminatedException;
25+
import org.apache.lucene.util.Bits;
26+
import org.apache.lucene.util.FutureArrays;
2227
import org.elasticsearch.common.lease.Releasables;
2328
import org.elasticsearch.common.util.BigArrays;
2429
import org.elasticsearch.common.util.DoubleArray;
@@ -33,30 +38,45 @@
3338
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator;
3439
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
3540
import org.elasticsearch.search.aggregations.support.ValuesSource;
41+
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
3642
import org.elasticsearch.search.internal.SearchContext;
3743

3844
import java.io.IOException;
3945
import java.util.List;
4046
import java.util.Map;
47+
import java.util.function.Function;
48+
49+
import static org.elasticsearch.search.aggregations.metrics.min.MinAggregator.getPointReaderOrNull;
4150

4251
public class MaxAggregator extends NumericMetricsAggregator.SingleValue {
4352

4453
final ValuesSource.Numeric valuesSource;
4554
final DocValueFormat formatter;
4655

56+
final String pointField;
57+
final Function<byte[], Number> pointConverter;
58+
4759
DoubleArray maxes;
4860

49-
public MaxAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter,
50-
SearchContext context,
51-
Aggregator parent, List<PipelineAggregator> pipelineAggregators,
52-
Map<String, Object> metaData) throws IOException {
61+
MaxAggregator(String name,
62+
ValuesSourceConfig<ValuesSource.Numeric> config,
63+
ValuesSource.Numeric valuesSource,
64+
SearchContext context,
65+
Aggregator parent, List<PipelineAggregator> pipelineAggregators,
66+
Map<String, Object> metaData) throws IOException {
5367
super(name, context, parent, pipelineAggregators, metaData);
5468
this.valuesSource = valuesSource;
55-
this.formatter = formatter;
5669
if (valuesSource != null) {
5770
maxes = context.bigArrays().newDoubleArray(1, false);
5871
maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY);
5972
}
73+
this.formatter = config.format();
74+
this.pointConverter = getPointReaderOrNull(context, parent, config);
75+
if (pointConverter != null) {
76+
pointField = config.fieldContext().field();
77+
} else {
78+
pointField = null;
79+
}
6080
}
6181

6282
@Override
@@ -68,8 +88,28 @@ public boolean needsScores() {
6888
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
6989
final LeafBucketCollector sub) throws IOException {
7090
if (valuesSource == null) {
71-
return LeafBucketCollector.NO_OP_COLLECTOR;
72-
}
91+
if (parent != null) {
92+
return LeafBucketCollector.NO_OP_COLLECTOR;
93+
} else {
94+
// we have no parent and the values source is empty so we can skip collecting hits.
95+
throw new CollectionTerminatedException();
96+
}
97+
}
98+
if (pointConverter != null) {
99+
Number segMax = findLeafMaxValue(ctx.reader(), pointField, pointConverter);
100+
if (segMax != null) {
101+
/**
102+
* There is no parent aggregator (see {@link MinAggregator#getPointReaderOrNull}
103+
* so the ordinal for the bucket is always 0.
104+
*/
105+
assert maxes.size() == 1;
106+
double max = maxes.get(0);
107+
max = Math.max(max, segMax.doubleValue());
108+
maxes.set(0, max);
109+
// the maximum value has been extracted, we don't need to collect hits on this segment.
110+
throw new CollectionTerminatedException();
111+
}
112+
}
73113
final BigArrays bigArrays = context.bigArrays();
74114
final SortedNumericDoubleValues allValues = valuesSource.doubleValues(ctx);
75115
final NumericDoubleValues values = MultiValueMode.MAX.select(allValues);
@@ -118,4 +158,48 @@ public InternalAggregation buildEmptyAggregation() {
118158
public void doClose() {
119159
Releasables.close(maxes);
120160
}
161+
162+
/**
163+
* Returns the maximum value indexed in the <code>fieldName</code> field or <code>null</code>
164+
* if the value cannot be inferred from the indexed {@link PointValues}.
165+
*/
166+
public static Number findLeafMaxValue(LeafReader reader, String fieldName, Function<byte[], Number> converter) throws IOException {
167+
final PointValues pointValues = reader.getPointValues(fieldName);
168+
if (pointValues == null) {
169+
return null;
170+
}
171+
final Bits liveDocs = reader.getLiveDocs();
172+
if (liveDocs == null) {
173+
return converter.apply(pointValues.getMaxPackedValue());
174+
}
175+
int numBytes = pointValues.getBytesPerDimension();
176+
final byte[] maxValue = pointValues.getMaxPackedValue();
177+
final Number[] result = new Number[1];
178+
pointValues.intersect(new PointValues.IntersectVisitor() {
179+
@Override
180+
public void visit(int docID) {
181+
throw new UnsupportedOperationException();
182+
}
183+
184+
@Override
185+
public void visit(int docID, byte[] packedValue) {
186+
if (liveDocs.get(docID)) {
187+
// we need to collect all values in this leaf (the sort is ascending) where
188+
// the last live doc is guaranteed to contain the max value for the segment.
189+
result[0] = converter.apply(packedValue);
190+
}
191+
}
192+
193+
@Override
194+
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
195+
if (FutureArrays.equals(maxValue, 0, numBytes, maxPackedValue, 0, numBytes)) {
196+
// we only check leaves that contain the max value for the segment.
197+
return PointValues.Relation.CELL_CROSSES_QUERY;
198+
} else {
199+
return PointValues.Relation.CELL_OUTSIDE_QUERY;
200+
}
201+
}
202+
});
203+
return result[0];
204+
}
121205
}

server/src/main/java/org/elasticsearch/search/aggregations/metrics/max/MaxAggregatorFactory.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ public MaxAggregatorFactory(String name, ValuesSourceConfig<Numeric> config, Sea
4343
@Override
4444
protected Aggregator createUnmapped(Aggregator parent,
4545
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
46-
return new MaxAggregator(name, null, config.format(), context, parent, pipelineAggregators, metaData);
46+
return new MaxAggregator(name, config, null, context, parent, pipelineAggregators, metaData);
4747
}
4848

4949
@Override
5050
protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource, Aggregator parent,
5151
boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
5252
throws IOException {
53-
return new MaxAggregator(name, valuesSource, config.format(), context, parent, pipelineAggregators, metaData);
53+
return new MaxAggregator(name, config, valuesSource, context, parent, pipelineAggregators, metaData);
5454
}
5555
}

0 commit comments

Comments
 (0)