Skip to content

Commit 72b93ad

Browse files
author
Christoph Büscher
committed
Preserve date_histogram format when aggregating on unmapped fields (#35254)
Currently when aggregating on an unmapped date field (e.g. using a date_histogram) we don't preserve the aggregations `format` setting but instead use the default format. This can lead to loosing the aggregations `format` when aggregating over several indices where some of them contain unmapped date fields and are encountered first in the reduce phase. Related to #31760
1 parent 0bb2fa9 commit 72b93ad

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.lucene.util.BytesRef;
2222
import org.elasticsearch.common.Nullable;
2323
import org.elasticsearch.common.geo.GeoPoint;
24+
import org.elasticsearch.common.joda.Joda;
2425
import org.elasticsearch.index.fielddata.IndexFieldData;
2526
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
2627
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
@@ -56,7 +57,7 @@ public static <VS extends ValuesSource> ValuesSourceConfig<VS> resolve(
5657
if (script == null) {
5758
@SuppressWarnings("unchecked")
5859
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(ValuesSourceType.ANY);
59-
config.format(resolveFormat(null, valueType));
60+
config.format(resolveFormat(null, valueType, timeZone));
6061
return config;
6162
}
6263
ValuesSourceType valuesSourceType = valueType != null ? valueType.getValuesSourceType() : ValuesSourceType.ANY;
@@ -70,7 +71,7 @@ public static <VS extends ValuesSource> ValuesSourceConfig<VS> resolve(
7071
ValuesSourceConfig<VS> config = new ValuesSourceConfig<VS>(valuesSourceType);
7172
config.missing(missing);
7273
config.timezone(timeZone);
73-
config.format(resolveFormat(format, valueType));
74+
config.format(resolveFormat(format, valueType, timeZone));
7475
config.script(createScript(script, context));
7576
config.scriptValueType(valueType);
7677
return config;
@@ -82,7 +83,7 @@ public static <VS extends ValuesSource> ValuesSourceConfig<VS> resolve(
8283
ValuesSourceConfig<VS> config = new ValuesSourceConfig<>(valuesSourceType);
8384
config.missing(missing);
8485
config.timezone(timeZone);
85-
config.format(resolveFormat(format, valueType));
86+
config.format(resolveFormat(format, valueType, timeZone));
8687
config.unmapped(true);
8788
if (valueType != null) {
8889
// todo do we really need this for unmapped?
@@ -123,14 +124,17 @@ private static SearchScript.LeafFactory createScript(Script script, QueryShardCo
123124
}
124125
}
125126

126-
private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType) {
127+
private static DocValueFormat resolveFormat(@Nullable String format, @Nullable ValueType valueType, @Nullable DateTimeZone tz) {
127128
if (valueType == null) {
128129
return DocValueFormat.RAW; // we can't figure it out
129130
}
130131
DocValueFormat valueFormat = valueType.defaultFormat;
131132
if (valueFormat instanceof DocValueFormat.Decimal && format != null) {
132133
valueFormat = new DocValueFormat.Decimal(format);
133134
}
135+
if (valueFormat instanceof DocValueFormat.DateTime && format != null) {
136+
valueFormat = new DocValueFormat.DateTime(Joda.forPattern(format), tz != null ? tz : DateTimeZone.UTC);
137+
}
134138
return valueFormat;
135139
}
136140

server/src/test/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,6 +1341,31 @@ public void testExceptionOnNegativeInterval() {
13411341
}
13421342
}
13431343

1344+
/**
1345+
* https://github.com/elastic/elasticsearch/issues/31760 shows an edge case where an unmapped "date" field in two indices
1346+
* that are queried simultaneously can lead to the "format" parameter in the aggregation not being preserved correctly.
1347+
*
1348+
* The error happens when the bucket from the "unmapped" index is received first in the reduce phase, however the case can
1349+
* be recreated when aggregating about a single index with an unmapped date field and also getting "empty" buckets.
1350+
*/
1351+
public void testFormatIndexUnmapped() throws InterruptedException, ExecutionException {
1352+
String indexDateUnmapped = "test31760";
1353+
indexRandom(true, client().prepareIndex(indexDateUnmapped, "_doc").setSource("foo", "bar"));
1354+
ensureSearchable(indexDateUnmapped);
1355+
1356+
SearchResponse response = client().prepareSearch(indexDateUnmapped)
1357+
.addAggregation(
1358+
dateHistogram("histo").field("dateField").dateHistogramInterval(DateHistogramInterval.MONTH).format("YYYY-MM")
1359+
.minDocCount(0).extendedBounds(new ExtendedBounds("2018-01", "2018-01")))
1360+
.execute().actionGet();
1361+
assertSearchResponse(response);
1362+
Histogram histo = response.getAggregations().get("histo");
1363+
assertThat(histo.getBuckets().size(), equalTo(1));
1364+
assertThat(histo.getBuckets().get(0).getKeyAsString(), equalTo("2018-01"));
1365+
assertThat(histo.getBuckets().get(0).getDocCount(), equalTo(0L));
1366+
internalCluster().wipeIndices(indexDateUnmapped);
1367+
}
1368+
13441369
/**
13451370
* https://github.com/elastic/elasticsearch/issues/31392 demonstrates an edge case where a date field mapping with
13461371
* "format" = "epoch_millis" can lead for the date histogram aggregation to throw an error if a non-UTC time zone

0 commit comments

Comments
 (0)