Skip to content

Commit 2c27591

Browse files
authored
[7.x] Week based parsing for ingest date processor (#58597) (#58802)
Date processor was incorrectly parsing week based dates because when a weekbased year was provided ingest module was thinking year was not on a date and was trying to applying the logic for dd/MM type of dates. Date Processor is also allowing users to specify locale parameter. It should be taken into account when parsing dates - currently only used for formatting. If someone specifies 'en-us' locale, then calendar data rules for that locale should be used. The exception is iso8601 format. If someone is using that format, then locale should not override calendar data rules. closes #58479
1 parent 4f1da31 commit 2c27591

File tree

9 files changed

+219
-42
lines changed

9 files changed

+219
-42
lines changed

modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateFormat.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.time.ZonedDateTime;
3030
import java.time.temporal.ChronoField;
3131
import java.time.temporal.TemporalAccessor;
32+
import java.time.temporal.WeekFields;
3233
import java.util.Arrays;
3334
import java.util.List;
3435
import java.util.Locale;
@@ -45,8 +46,12 @@ enum DateFormat {
4546
Iso8601 {
4647
@Override
4748
Function<String, ZonedDateTime> getFunction(String format, ZoneId timezone, Locale locale) {
48-
return (date) -> DateFormatters.from(DateFormatter.forPattern("iso8601").parse(date), timezone)
49-
.withZoneSameInstant(timezone);
49+
return (date) -> {
50+
TemporalAccessor accessor = DateFormatter.forPattern("iso8601").parse(date);
51+
//even though locale could be set to en-us, Locale.ROOT (following iso8601 calendar data rules) should be used
52+
return DateFormatters.from(accessor, Locale.ROOT, timezone)
53+
.withZoneSameInstant(timezone);
54+
};
5055

5156
}
5257
},
@@ -97,7 +102,9 @@ Function<String, ZonedDateTime> getFunction(String format, ZoneId zoneId, Locale
97102
TemporalAccessor accessor = formatter.parse(text);
98103
// if there is no year nor year-of-era, we fall back to the current one and
99104
// fill the rest of the date up with the parsed date
100-
if (accessor.isSupported(ChronoField.YEAR) == false && accessor.isSupported(ChronoField.YEAR_OF_ERA) == false ) {
105+
if (accessor.isSupported(ChronoField.YEAR) == false
106+
&& accessor.isSupported(ChronoField.YEAR_OF_ERA) == false
107+
&& accessor.isSupported(WeekFields.of(locale).weekOfWeekBasedYear()) == false) {
101108
int year = LocalDate.now(ZoneOffset.UTC).getYear();
102109
ZonedDateTime newTime = Instant.EPOCH.atZone(ZoneOffset.UTC).withYear(year);
103110
for (ChronoField field : FIELDS) {
@@ -110,9 +117,9 @@ Function<String, ZonedDateTime> getFunction(String format, ZoneId zoneId, Locale
110117
}
111118

112119
if (isUtc) {
113-
return DateFormatters.from(accessor).withZoneSameInstant(ZoneOffset.UTC);
120+
return DateFormatters.from(accessor, locale).withZoneSameInstant(ZoneOffset.UTC);
114121
} else {
115-
return DateFormatters.from(accessor);
122+
return DateFormatters.from(accessor, locale);
116123
}
117124
};
118125
}

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateFormatTests.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.elasticsearch.ingest.common;
2121

22+
import org.elasticsearch.bootstrap.JavaVersion;
2223
import org.elasticsearch.common.time.DateFormatter;
2324
import org.elasticsearch.common.time.DateUtils;
2425
import org.elasticsearch.test.ESTestCase;
@@ -69,6 +70,29 @@ public void testParseJavaDefaultYear() {
6970
assertThat(dateTime.getYear(), is(year));
7071
}
7172

73+
public void testParseWeekBased() {
74+
assumeFalse("won't work in jdk8 " +
75+
"because SPI mechanism is not looking at classpath - needs ISOCalendarDataProvider in jre's ext/libs",
76+
JavaVersion.current().equals(JavaVersion.parse("8")));
77+
String format = randomFrom("YYYY-ww");
78+
ZoneId timezone = DateUtils.of("Europe/Amsterdam");
79+
Function<String, ZonedDateTime> javaFunction = DateFormat.Java.getFunction(format, timezone, Locale.ROOT);
80+
ZonedDateTime dateTime = javaFunction.apply("2020-33");
81+
assertThat(dateTime, equalTo(ZonedDateTime.of(2020,8,10,0,0,0,0,timezone)));
82+
}
83+
84+
public void testParseWeekBasedWithLocale() {
85+
assumeFalse("won't work in jdk8 " +
86+
"because SPI mechanism is not looking at classpath - needs ISOCalendarDataProvider in jre's ext/libs",
87+
JavaVersion.current().equals(JavaVersion.parse("8")));
88+
String format = randomFrom("YYYY-ww");
89+
ZoneId timezone = DateUtils.of("Europe/Amsterdam");
90+
Function<String, ZonedDateTime> javaFunction = DateFormat.Java.getFunction(format, timezone, Locale.US);
91+
ZonedDateTime dateTime = javaFunction.apply("2020-33");
92+
//33rd week of 2020 starts on 9th August 2020 as per US locale
93+
assertThat(dateTime, equalTo(ZonedDateTime.of(2020,8,9,0,0,0,0,timezone)));
94+
}
95+
7296
public void testParseUnixMs() {
7397
assertThat(DateFormat.UnixMs.getFunction(null, ZoneOffset.UTC, null).apply("1000500").toInstant().toEpochMilli(),
7498
equalTo(1000500L));

modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/30_date_processor.yml

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,138 @@ teardown:
183183
- match: { _source.date_source_7: "2018-02-05T13:44:56.657+0100" }
184184
- match: { _source.date_target_7: "2018-02-05T12:44:56.657Z" }
185185

186+
187+
---
188+
"Test week based date parsing":
189+
- skip:
190+
reason: "Week based calculations require JDK9"
191+
features: "spi_on_classpath_jdk9"
192+
- do:
193+
indices.create:
194+
index: test
195+
body:
196+
mappings:
197+
properties:
198+
date_source_field:
199+
type: date
200+
format: YYYY-ww
201+
202+
- do:
203+
ingest.put_pipeline:
204+
id: "my_pipeline"
205+
body: >
206+
{
207+
"description": "_description",
208+
"processors": [
209+
{
210+
"date" : {
211+
"field" : "date_source_field",
212+
"target_field" : "date_target_field",
213+
"formats" : ["YYYY-ww"]
214+
}
215+
}
216+
]
217+
}
218+
- match: { acknowledged: true }
219+
220+
- do:
221+
ingest.simulate:
222+
id: "my_pipeline"
223+
body: >
224+
{
225+
"docs": [
226+
{
227+
"_source": {
228+
"date_source_field": "2020-33"
229+
}
230+
}
231+
]
232+
}
233+
- length: { docs: 1 }
234+
- match: { docs.0.doc._source.date_source_field: "2020-33" }
235+
- match: { docs.0.doc._source.date_target_field: "2020-08-10T00:00:00.000Z" }
236+
- length: { docs.0.doc._ingest: 1 }
237+
- is_true: docs.0.doc._ingest.timestamp
238+
239+
- do:
240+
index:
241+
index: test
242+
id: 1
243+
pipeline: "my_pipeline"
244+
body: {date_source_field: "2020-33"}
245+
246+
- do:
247+
get:
248+
index: test
249+
id: 1
250+
- match: { _source.date_source_field: "2020-33" }
251+
- match: { _source.date_target_field: "2020-08-10T00:00:00.000Z" }
252+
253+
---
254+
"Test week based date parsing with locale":
255+
- skip:
256+
reason: "Week based calculations require JDK9"
257+
features: "spi_on_classpath_jdk9"
258+
#locale is used when parsing as well on a pipeline. As per US locale, start of the 33rd week 2020 is on 09August2020 (sunday)
259+
- do:
260+
indices.create:
261+
index: test
262+
body:
263+
mappings:
264+
properties:
265+
date_source_field:
266+
type: date
267+
format: YYYY-ww
268+
locale: en-US
269+
270+
- do:
271+
ingest.put_pipeline:
272+
id: "my_pipeline"
273+
body: >
274+
{
275+
"description": "_description",
276+
"processors": [
277+
{
278+
"date" : {
279+
"field" : "date_source_field",
280+
"target_field" : "date_target_field",
281+
"formats" : ["YYYY-ww"],
282+
"locale" : "en-US"
283+
}
284+
}
285+
]
286+
}
287+
- match: { acknowledged: true }
288+
289+
- do:
290+
ingest.simulate:
291+
id: "my_pipeline"
292+
body: >
293+
{
294+
"docs": [
295+
{
296+
"_source": {
297+
"date_source_field": "2020-33"
298+
}
299+
}
300+
]
301+
}
302+
- length: { docs: 1 }
303+
- match: { docs.0.doc._source.date_source_field: "2020-33" }
304+
- match: { docs.0.doc._source.date_target_field: "2020-08-09T00:00:00.000Z" }
305+
- length: { docs.0.doc._ingest: 1 }
306+
- is_true: docs.0.doc._ingest.timestamp
307+
308+
- do:
309+
index:
310+
index: test
311+
id: 1
312+
pipeline: "my_pipeline"
313+
body: {date_source_field: "2020-33"}
314+
315+
- do:
316+
get:
317+
index: test
318+
id: 1
319+
- match: { _source.date_source_field: "2020-33" }
320+
- match: { _source.date_target_field: "2020-08-09T00:00:00.000Z" }

server/src/main/java/org/elasticsearch/common/time/DateFormatters.java

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import java.time.temporal.ChronoField;
3838
import java.time.temporal.IsoFields;
3939
import java.time.temporal.TemporalAccessor;
40+
import java.time.temporal.TemporalAdjusters;
4041
import java.time.temporal.TemporalQueries;
4142
import java.time.temporal.TemporalQuery;
4243
import java.time.temporal.WeekFields;
@@ -52,7 +53,8 @@
5253
import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
5354

5455
public class DateFormatters {
55-
public static final WeekFields WEEK_FIELDS = WeekFields.of(DayOfWeek.MONDAY,4);
56+
// when run with JDK8, WeekFields for Locale.ROOT would return WeekFields.of(DayOfWeek.SUNDAY,1)
57+
public static final WeekFields WEEK_FIELDS_ROOT = WeekFields.of(DayOfWeek.MONDAY,4);
5658

5759
private static final DateTimeFormatter TIME_ZONE_FORMATTER_NO_COLON = new DateTimeFormatterBuilder()
5860
.appendOffset("+HHmm", "Z")
@@ -946,14 +948,14 @@ public class DateFormatters {
946948
* Returns a formatter for a four digit weekyear
947949
*/
948950
private static final DateFormatter STRICT_WEEKYEAR = new JavaDateFormatter("strict_weekyear", new DateTimeFormatterBuilder()
949-
.appendValue(WEEK_FIELDS.weekBasedYear(), 4, 10, SignStyle.EXCEEDS_PAD)
951+
.appendValue(WEEK_FIELDS_ROOT.weekBasedYear(), 4, 10, SignStyle.EXCEEDS_PAD)
950952
.toFormatter(Locale.ROOT)
951953
.withResolverStyle(ResolverStyle.STRICT));
952954

953955
private static final DateTimeFormatter STRICT_WEEKYEAR_WEEK_FORMATTER = new DateTimeFormatterBuilder()
954-
.appendValue(WEEK_FIELDS.weekBasedYear(), 4, 10, SignStyle.EXCEEDS_PAD)
956+
.appendValue(WEEK_FIELDS_ROOT.weekBasedYear(), 4, 10, SignStyle.EXCEEDS_PAD)
955957
.appendLiteral("-W")
956-
.appendValue(WEEK_FIELDS.weekOfWeekBasedYear(), 2, 2, SignStyle.NOT_NEGATIVE)
958+
.appendValue(WEEK_FIELDS_ROOT.weekOfWeekBasedYear(), 2, 2, SignStyle.NOT_NEGATIVE)
957959
.toFormatter(Locale.ROOT)
958960
.withResolverStyle(ResolverStyle.STRICT);
959961

@@ -972,7 +974,7 @@ public class DateFormatters {
972974
new DateTimeFormatterBuilder()
973975
.append(STRICT_WEEKYEAR_WEEK_FORMATTER)
974976
.appendLiteral("-")
975-
.appendValue(WEEK_FIELDS.dayOfWeek())
977+
.appendValue(WEEK_FIELDS_ROOT.dayOfWeek())
976978
.toFormatter(Locale.ROOT)
977979
.withResolverStyle(ResolverStyle.STRICT));
978980

@@ -1162,7 +1164,7 @@ public class DateFormatters {
11621164
* Returns a formatter for a four digit weekyear. (YYYY)
11631165
*/
11641166
private static final DateFormatter WEEK_YEAR = new JavaDateFormatter("week_year",
1165-
new DateTimeFormatterBuilder().appendValue(WEEK_FIELDS.weekBasedYear()).toFormatter(Locale.ROOT)
1167+
new DateTimeFormatterBuilder().appendValue(WEEK_FIELDS_ROOT.weekBasedYear()).toFormatter(Locale.ROOT)
11661168
.withResolverStyle(ResolverStyle.STRICT));
11671169

11681170
/*
@@ -1591,9 +1593,9 @@ public class DateFormatters {
15911593
*/
15921594
private static final DateFormatter WEEKYEAR_WEEK = new JavaDateFormatter("weekyear_week", STRICT_WEEKYEAR_WEEK_FORMATTER,
15931595
new DateTimeFormatterBuilder()
1594-
.appendValue(WEEK_FIELDS.weekBasedYear())
1596+
.appendValue(WEEK_FIELDS_ROOT.weekBasedYear())
15951597
.appendLiteral("-W")
1596-
.appendValue(WEEK_FIELDS.weekOfWeekBasedYear())
1598+
.appendValue(WEEK_FIELDS_ROOT.weekOfWeekBasedYear())
15971599
.toFormatter(Locale.ROOT)
15981600
.withResolverStyle(ResolverStyle.STRICT)
15991601
);
@@ -1606,15 +1608,15 @@ public class DateFormatters {
16061608
new DateTimeFormatterBuilder()
16071609
.append(STRICT_WEEKYEAR_WEEK_FORMATTER)
16081610
.appendLiteral("-")
1609-
.appendValue(WEEK_FIELDS.dayOfWeek())
1611+
.appendValue(WEEK_FIELDS_ROOT.dayOfWeek())
16101612
.toFormatter(Locale.ROOT)
16111613
.withResolverStyle(ResolverStyle.STRICT),
16121614
new DateTimeFormatterBuilder()
1613-
.appendValue(WEEK_FIELDS.weekBasedYear())
1615+
.appendValue(WEEK_FIELDS_ROOT.weekBasedYear())
16141616
.appendLiteral("-W")
1615-
.appendValue(WEEK_FIELDS.weekOfWeekBasedYear())
1617+
.appendValue(WEEK_FIELDS_ROOT.weekOfWeekBasedYear())
16161618
.appendLiteral("-")
1617-
.appendValue(WEEK_FIELDS.dayOfWeek())
1619+
.appendValue(WEEK_FIELDS_ROOT.dayOfWeek())
16181620
.toFormatter(Locale.ROOT)
16191621
.withResolverStyle(ResolverStyle.STRICT)
16201622
);
@@ -1836,10 +1838,14 @@ static DateFormatter forPattern(String input) {
18361838
* @return The converted zoned date time
18371839
*/
18381840
public static ZonedDateTime from(TemporalAccessor accessor) {
1839-
return from(accessor, ZoneOffset.UTC);
1841+
return from(accessor, Locale.ROOT, ZoneOffset.UTC);
18401842
}
18411843

1842-
public static ZonedDateTime from(TemporalAccessor accessor, ZoneId defaultZone) {
1844+
public static ZonedDateTime from(TemporalAccessor accessor, Locale locale) {
1845+
return from(accessor, locale, ZoneOffset.UTC);
1846+
}
1847+
1848+
public static ZonedDateTime from(TemporalAccessor accessor, Locale locale, ZoneId defaultZone) {
18431849
if (accessor instanceof ZonedDateTime) {
18441850
return (ZonedDateTime) accessor;
18451851
}
@@ -1862,7 +1868,7 @@ public static ZonedDateTime from(TemporalAccessor accessor, ZoneId defaultZone)
18621868
} else if (isLocalDateSet) {
18631869
return localDate.atStartOfDay(zoneId);
18641870
} else if (isLocalTimeSet) {
1865-
return of(getLocalDate(accessor), localTime, zoneId);
1871+
return of(getLocalDate(accessor, locale), localTime, zoneId);
18661872
} else if (accessor.isSupported(ChronoField.YEAR) || accessor.isSupported(ChronoField.YEAR_OF_ERA) ) {
18671873
if (accessor.isSupported(MONTH_OF_YEAR)) {
18681874
return getFirstOfMonth(accessor).atStartOfDay(zoneId);
@@ -1872,26 +1878,28 @@ public static ZonedDateTime from(TemporalAccessor accessor, ZoneId defaultZone)
18721878
}
18731879
} else if (accessor.isSupported(MONTH_OF_YEAR)) {
18741880
// missing year, falling back to the epoch and then filling
1875-
return getLocalDate(accessor).atStartOfDay(zoneId);
1876-
} else if (accessor.isSupported(WEEK_FIELDS.weekBasedYear())) {
1877-
return localDateFromWeekBasedDate(accessor).atStartOfDay(zoneId);
1881+
return getLocalDate(accessor, locale).atStartOfDay(zoneId);
1882+
} else if (accessor.isSupported(WeekFields.of(locale).weekBasedYear())) {
1883+
return localDateFromWeekBasedDate(accessor, locale).atStartOfDay(zoneId);
18781884
}
18791885

18801886
// we should not reach this piece of code, everything being parsed we should be able to
18811887
// convert to a zoned date time! If not, we have to extend the above methods
18821888
throw new IllegalArgumentException("temporal accessor [" + accessor + "] cannot be converted to zoned date time");
18831889
}
18841890

1885-
private static LocalDate localDateFromWeekBasedDate(TemporalAccessor accessor) {
1886-
if (accessor.isSupported(WEEK_FIELDS.weekOfWeekBasedYear())) {
1891+
private static LocalDate localDateFromWeekBasedDate(TemporalAccessor accessor, Locale locale) {
1892+
WeekFields weekFields = WeekFields.of(locale);
1893+
if (accessor.isSupported(weekFields.weekOfWeekBasedYear())) {
18871894
return LocalDate.ofEpochDay(0)
1888-
.with(WEEK_FIELDS.weekBasedYear(), accessor.get(WEEK_FIELDS.weekBasedYear()))
1889-
.with(WEEK_FIELDS.weekOfWeekBasedYear(), accessor.get(WEEK_FIELDS.weekOfWeekBasedYear()))
1890-
.with(ChronoField.DAY_OF_WEEK, WEEK_FIELDS.getFirstDayOfWeek().getValue());
1895+
.with(weekFields.weekBasedYear(), accessor.get(weekFields.weekBasedYear()))
1896+
.with(weekFields.weekOfWeekBasedYear(), accessor.get(weekFields.weekOfWeekBasedYear()))
1897+
.with(TemporalAdjusters.previousOrSame(weekFields.getFirstDayOfWeek()));
18911898
} else {
18921899
return LocalDate.ofEpochDay(0)
1893-
.with(WEEK_FIELDS.weekBasedYear(), accessor.get(WEEK_FIELDS.weekBasedYear()))
1894-
.with(ChronoField.DAY_OF_WEEK, WEEK_FIELDS.getFirstDayOfWeek().getValue());
1900+
.with(weekFields.weekBasedYear(), accessor.get(weekFields.weekBasedYear()))
1901+
.with(TemporalAdjusters.previousOrSame(weekFields.getFirstDayOfWeek()));
1902+
18951903
}
18961904
}
18971905

@@ -1922,9 +1930,9 @@ public String toString() {
19221930
}
19231931
};
19241932

1925-
private static LocalDate getLocalDate(TemporalAccessor accessor) {
1926-
if (accessor.isSupported(WEEK_FIELDS.weekBasedYear())) {
1927-
return localDateFromWeekBasedDate(accessor);
1933+
private static LocalDate getLocalDate(TemporalAccessor accessor, Locale locale) {
1934+
if (accessor.isSupported(WeekFields.of(locale).weekBasedYear())) {
1935+
return localDateFromWeekBasedDate(accessor, locale);
19281936
} else if (accessor.isSupported(MONTH_OF_YEAR)) {
19291937
int year = getYear(accessor);
19301938
if (accessor.isSupported(DAY_OF_MONTH)) {

server/src/main/java/org/elasticsearch/common/time/IsoCalendarDataProvider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* not use this file except in compliance with the License.
88
* You may obtain a copy of the License at
99
*
10-
* http://www.apache.org/licenses/LICENSE-2.0
10+
* http://www.apache.org/licenses/LICENSE-2.0
1111
*
1212
* Unless required by applicable law or agreed to in writing,
1313
* software distributed under the License is distributed on an

0 commit comments

Comments
 (0)