Skip to content

Commit e6d0f7d

Browse files
authored
Fix format problem in composite of unmapped (#50869) (#50875)
When a composite aggregation is reduced using the results from an index that has one of the fields unmapped we were throwing away the formatter. This is mildly annoying, except in the case of IP addresses which were coming out as non-utf-8-characters. And tripping assertions. This carefully preserves the formatter from the working bucket. Closes #50600
1 parent 3bac1dc commit e6d0f7d

File tree

4 files changed

+122
-5
lines changed

4 files changed

+122
-5
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,7 @@ setup:
729729
- match: { aggregations.test.buckets.3.key.geo: "12/2048/0" }
730730
- match: { aggregations.test.buckets.3.key.kw: "bar" }
731731
- match: { aggregations.test.buckets.3.doc_count: 1 }
732+
732733
---
733734
"Simple Composite aggregation with geotile grid add aggregate after":
734735
- skip:
@@ -770,3 +771,49 @@ setup:
770771
- match: { aggregations.test.buckets.2.key.geo: "12/2048/0" }
771772
- match: { aggregations.test.buckets.2.key.kw: "bar" }
772773
- match: { aggregations.test.buckets.2.doc_count: 1 }
774+
775+
---
776+
"Mixed ip and unmapped fields":
777+
- skip:
778+
version: " - 7.99.99"
779+
reason: This will fail against 7.x until the fix is backported there
780+
# It is important that the index *without* the ip field be sorted *before*
781+
# the index *with* the ip field because that has caused bugs in the past.
782+
- do:
783+
indices.create:
784+
index: test_1
785+
- do:
786+
indices.create:
787+
index: test_2
788+
body:
789+
mappings:
790+
properties:
791+
f:
792+
type: ip
793+
- do:
794+
index:
795+
index: test_2
796+
id: 1
797+
body: { "f": "192.168.0.1" }
798+
refresh: true
799+
800+
- do:
801+
search:
802+
index: test_*
803+
body:
804+
aggregations:
805+
test:
806+
composite:
807+
sources: [
808+
"f": {
809+
"terms": {
810+
"field": "f"
811+
}
812+
}
813+
]
814+
815+
- match: { hits.total.value: 1 }
816+
- match: { hits.total.relation: eq }
817+
- length: { aggregations.test.buckets: 1 }
818+
- match: { aggregations.test.buckets.0.key.f: "192.168.0.1" }
819+
- match: { aggregations.test.buckets.0.doc_count: 1 }

server/src/main/java/org/elasticsearch/search/DocValueFormat.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,11 @@ public double parseDouble(String value, boolean roundUp, LongSupplier now) {
141141
public BytesRef parseBytesRef(String value) {
142142
return new BytesRef(value);
143143
}
144+
145+
@Override
146+
public String toString() {
147+
return "raw";
148+
}
144149
};
145150

146151
DocValueFormat BINARY = new DocValueFormat() {
@@ -346,6 +351,11 @@ public String format(BytesRef value) {
346351
public BytesRef parseBytesRef(String value) {
347352
return new BytesRef(InetAddressPoint.encode(InetAddresses.forString(value)));
348353
}
354+
355+
@Override
356+
public String toString() {
357+
return "ip";
358+
}
349359
};
350360

351361
final class Decimal implements DocValueFormat {

server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ public List<InternalBucket> getBuckets() {
149149
return buckets;
150150
}
151151

152+
/**
153+
* The formats used when writing the keys. Package private for testing.
154+
*/
155+
List<DocValueFormat> getFormats() {
156+
return formats;
157+
}
158+
152159
@Override
153160
public Map<String, Object> afterKey() {
154161
if (afterKey != null) {
@@ -204,8 +211,17 @@ public InternalAggregation reduce(List<InternalAggregation> aggregations, Reduce
204211
reduceContext.consumeBucketsAndMaybeBreak(1);
205212
result.add(reduceBucket);
206213
}
207-
final CompositeKey lastKey = result.size() > 0 ? result.get(result.size()-1).getRawKey() : null;
208-
return new InternalComposite(name, size, sourceNames, formats, result, lastKey, reverseMuls,
214+
215+
List<DocValueFormat> reducedFormats = formats;
216+
CompositeKey lastKey = null;
217+
if (result.size() > 0) {
218+
lastBucket = result.get(result.size() - 1);
219+
/* Attach the formats from the last bucket to the reduced composite
220+
* so that we can properly format the after key. */
221+
reducedFormats = lastBucket.formats;
222+
lastKey = lastBucket.getRawKey();
223+
}
224+
return new InternalComposite(name, size, sourceNames, reducedFormats, result, lastKey, reverseMuls,
209225
earlyTerminated, pipelineAggregators(), metaData);
210226
}
211227

@@ -219,7 +235,12 @@ protected InternalBucket reduceBucket(List<InternalBucket> buckets, ReduceContex
219235
aggregations.add(bucket.aggregations);
220236
}
221237
InternalAggregations aggs = InternalAggregations.reduce(aggregations, context);
222-
return new InternalBucket(sourceNames, formats, buckets.get(0).key, reverseMuls, docCount, aggs);
238+
/* Use the formats from the bucket because they'll be right to format
239+
* the key. The formats on the InternalComposite doing the reducing are
240+
* just whatever formats make sense for *its* index. This can be real
241+
* trouble when the index doing the reducing is unmapped. */
242+
List<DocValueFormat> reducedFormats = buckets.get(0).formats;
243+
return new InternalBucket(sourceNames, reducedFormats, buckets.get(0).key, reverseMuls, docCount, aggs);
223244
}
224245

225246
@Override
@@ -349,6 +370,13 @@ public Aggregations getAggregations() {
349370
return aggregations;
350371
}
351372

373+
/**
374+
* The formats used when writing the keys. Package private for testing.
375+
*/
376+
List<DocValueFormat> getFormats() {
377+
return formats;
378+
}
379+
352380
@Override
353381
public int compareKey(InternalBucket other) {
354382
for (int i = 0; i < key.size(); i++) {

server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/InternalCompositeTests.java

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.elasticsearch.search.aggregations.bucket.composite;
2121

2222
import com.google.common.collect.Lists;
23+
2324
import org.apache.lucene.util.BytesRef;
2425
import org.elasticsearch.common.io.stream.Writeable;
2526
import org.elasticsearch.common.time.DateFormatter;
@@ -36,6 +37,7 @@
3637
import java.io.IOException;
3738
import java.time.ZoneOffset;
3839
import java.util.ArrayList;
40+
import java.util.Arrays;
3941
import java.util.Collections;
4042
import java.util.Comparator;
4143
import java.util.HashMap;
@@ -47,6 +49,9 @@
4749
import java.util.stream.IntStream;
4850

4951
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiLettersOfLengthBetween;
52+
import static java.util.Collections.emptyList;
53+
import static java.util.Collections.emptyMap;
54+
import static java.util.stream.Collectors.toList;
5055
import static org.hamcrest.Matchers.containsString;
5156
import static org.hamcrest.Matchers.equalTo;
5257
import static org.hamcrest.Matchers.greaterThan;
@@ -238,8 +243,7 @@ public void testReduceSame() throws IOException {
238243
for (int i = 0; i < numSame; i++) {
239244
toReduce.add(result);
240245
}
241-
InternalComposite finalReduce = (InternalComposite) result.reduce(toReduce,
242-
new InternalAggregation.ReduceContext(BigArrays.NON_RECYCLING_INSTANCE, null, true));
246+
InternalComposite finalReduce = (InternalComposite) result.reduce(toReduce, reduceContext());
243247
assertThat(finalReduce.getBuckets().size(), equalTo(result.getBuckets().size()));
244248
Iterator<InternalComposite.InternalBucket> expectedIt = result.getBuckets().iterator();
245249
for (InternalComposite.InternalBucket bucket : finalReduce.getBuckets()) {
@@ -249,6 +253,30 @@ public void testReduceSame() throws IOException {
249253
}
250254
}
251255

256+
/**
257+
* Check that reducing with an unmapped index produces useful formats.
258+
*/
259+
public void testReduceUnmapped() throws IOException {
260+
InternalComposite mapped = createTestInstance(randomAlphaOfLength(10), emptyList(), emptyMap(), InternalAggregations.EMPTY);
261+
List<DocValueFormat> rawFormats = formats.stream().map(f -> DocValueFormat.RAW).collect(toList());
262+
InternalComposite unmapped = new InternalComposite(mapped.getName(), mapped.getSize(), sourceNames,
263+
rawFormats, emptyList(), null, reverseMuls, true, emptyList(), emptyMap());
264+
List<InternalAggregation> toReduce = Arrays.asList(unmapped, mapped);
265+
Collections.shuffle(toReduce, random());
266+
InternalComposite finalReduce = (InternalComposite) unmapped.reduce(toReduce, reduceContext());
267+
assertThat(finalReduce.getBuckets().size(), equalTo(mapped.getBuckets().size()));
268+
if (false == mapped.getBuckets().isEmpty()) {
269+
assertThat(finalReduce.getFormats(), equalTo(mapped.getFormats()));
270+
}
271+
Iterator<InternalComposite.InternalBucket> expectedIt = mapped.getBuckets().iterator();
272+
for (InternalComposite.InternalBucket bucket : finalReduce.getBuckets()) {
273+
InternalComposite.InternalBucket expectedBucket = expectedIt.next();
274+
assertThat(bucket.getRawKey(), equalTo(expectedBucket.getRawKey()));
275+
assertThat(bucket.getDocCount(), equalTo(expectedBucket.getDocCount()));
276+
assertThat(bucket.getFormats(), equalTo(expectedBucket.getFormats()));
277+
}
278+
}
279+
252280
public void testCompareCompositeKeyBiggerFieldName() {
253281
InternalComposite.ArrayMap key1 = createMap(
254282
Lists.newArrayList("field1", "field2"),
@@ -381,4 +409,8 @@ private InternalComposite.ArrayMap createMap(List<String> fields, Comparable[] v
381409
values
382410
);
383411
}
412+
413+
private InternalAggregation.ReduceContext reduceContext() {
414+
return new InternalAggregation.ReduceContext(BigArrays.NON_RECYCLING_INSTANCE, null, true);
415+
}
384416
}

0 commit comments

Comments
 (0)