From 4d44d26b2a3c0ed6ea9b344cbfec308e2a0c540e Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 10 Jan 2019 14:12:45 +0000 Subject: [PATCH 1/3] [ML] Adjust structure finder for Joda to Java time migration The ML file structure finder has always reported both Joda and Java time format strings. This change makes the Java time format strings the ones that are incorporated into mappings and ingest pipeline definitions. The BWC syntax of prepending "8" to these formats is used. This will need to be removed once Java time format strings become the default in Elasticsearch. This commit also removes direct imports of Joda classes in the structure finder unit tests. Instead the core Joda BWC class is used. --- .../ml/apis/find-file-structure.asciidoc | 64 ++++++++++--------- .../DelimitedFileStructureFinder.java | 2 +- .../FileStructureUtils.java | 17 ++++- .../NdJsonFileStructureFinder.java | 2 +- .../TextLogFileStructureFinder.java | 2 +- .../TimestampFormatFinder.java | 7 +- .../XmlFileStructureFinder.java | 2 +- .../FileStructureUtilsTests.java | 36 ++++++----- .../TextLogFileStructureFinderTests.java | 2 +- .../TimestampFormatFinderTests.java | 49 +++++++++----- 10 files changed, 112 insertions(+), 71 deletions(-) diff --git a/docs/reference/ml/apis/find-file-structure.asciidoc b/docs/reference/ml/apis/find-file-structure.asciidoc index ddc72b78d8e86..9650efff16189 100644 --- a/docs/reference/ml/apis/find-file-structure.asciidoc +++ b/docs/reference/ml/apis/find-file-structure.asciidoc @@ -164,37 +164,40 @@ format corresponds to the primary timestamp, but you do not want to specify the full `grok_pattern`. If this parameter is not specified, the structure finder chooses the best format from -the formats it knows, which are these Joda formats and their Java time equivalents: - -* `dd/MMM/YYYY:HH:mm:ss Z` -* `EEE MMM dd HH:mm zzz YYYY` -* `EEE MMM dd HH:mm:ss YYYY` -* `EEE MMM dd HH:mm:ss zzz YYYY` -* `EEE MMM dd YYYY HH:mm zzz` -* `EEE MMM dd YYYY HH:mm:ss zzz` -* `EEE, dd MMM YYYY HH:mm Z` -* `EEE, dd MMM YYYY HH:mm ZZ` -* `EEE, dd MMM YYYY HH:mm:ss Z` -* `EEE, dd MMM YYYY HH:mm:ss ZZ` +the formats it knows, which are these Java time formats and their Joda equivalents: + +* `dd/MMM/yyyy:HH:mm:ss XX` +* `EEE MMM dd HH:mm zzz yyyy` +* `EEE MMM dd HH:mm:ss yyyy` +* `EEE MMM dd HH:mm:ss zzz yyyy` +* `EEE MMM dd yyyy HH:mm zzz` +* `EEE MMM dd yyyy HH:mm:ss zzz` +* `EEE, dd MMM yyyy HH:mm XX` +* `EEE, dd MMM yyyy HH:mm XXX` +* `EEE, dd MMM yyyy HH:mm:ss XX` +* `EEE, dd MMM yyyy HH:mm:ss XXX` * `ISO8601` * `MMM d HH:mm:ss` * `MMM d HH:mm:ss,SSS` -* `MMM d YYYY HH:mm:ss` +* `MMM d yyyy HH:mm:ss` * `MMM dd HH:mm:ss` * `MMM dd HH:mm:ss,SSS` -* `MMM dd YYYY HH:mm:ss` -* `MMM dd, YYYY h:mm:ss a` +* `MMM dd yyyy HH:mm:ss` +* `MMM dd, yyyy h:mm:ss a` * `TAI64N` * `UNIX` * `UNIX_MS` -* `YYYY-MM-dd HH:mm:ss` -* `YYYY-MM-dd HH:mm:ss,SSS` -* `YYYY-MM-dd HH:mm:ss,SSS Z` -* `YYYY-MM-dd HH:mm:ss,SSSZ` -* `YYYY-MM-dd HH:mm:ss,SSSZZ` -* `YYYY-MM-dd HH:mm:ssZ` -* `YYYY-MM-dd HH:mm:ssZZ` -* `YYYYMMddHHmmss` +* `yyyy-MM-dd HH:mm:ss` +* `yyyy-MM-dd HH:mm:ss,SSS` +* `yyyy-MM-dd HH:mm:ss,SSS XX` +* `yyyy-MM-dd HH:mm:ss,SSSXX` +* `yyyy-MM-dd HH:mm:ss,SSSXXX` +* `yyyy-MM-dd HH:mm:ssXX` +* `yyyy-MM-dd HH:mm:ssXXX` +* `yyyy-MM-dd'T'HH:mm:ss,SSS` +* `yyyy-MM-dd'T'HH:mm:ss,SSSXX` +* `yyyy-MM-dd'T'HH:mm:ss,SSSXXX` +* `yyyyMMddHHmmss` -- @@ -603,11 +606,11 @@ If the request does not encounter errors, you receive the following result: }, "tpep_dropoff_datetime" : { "type" : "date", - "format" : "YYYY-MM-dd HH:mm:ss" + "format" : "8yyyy-MM-dd HH:mm:ss" }, "tpep_pickup_datetime" : { "type" : "date", - "format" : "YYYY-MM-dd HH:mm:ss" + "format" : "8yyyy-MM-dd HH:mm:ss" }, "trip_distance" : { "type" : "double" @@ -621,7 +624,7 @@ If the request does not encounter errors, you receive the following result: "field" : "tpep_pickup_datetime", "timezone" : "{{ beat.timezone }}", "formats" : [ - "YYYY-MM-dd HH:mm:ss" + "8yyyy-MM-dd HH:mm:ss" ] } } @@ -1287,10 +1290,9 @@ If the request does not encounter errors, you receive the following result: was chosen because it comes first in the column order. If you prefer `tpep_dropoff_datetime` then force it to be chosen using the `timestamp_field` query parameter. -<8> `joda_timestamp_formats` are used to tell Logstash and Ingest pipeline how - to parse timestamps. +<8> `joda_timestamp_formats` are used to tell Logstash how to parse timestamps. <9> `java_timestamp_formats` are the Java time formats recognized in the time - fields. In future Ingest pipeline will switch to use this format. + fields. Elasticsearch mappings and Ingest pipeline use this format. <10> The timestamp format in this sample doesn't specify a timezone, so to accurately convert them to UTC timestamps to store in Elasticsearch it's necessary to supply the timezone they relate to. `need_client_timezone` @@ -1396,7 +1398,7 @@ this: "field" : "timestamp", "timezone" : "{{ beat.timezone }}", "formats" : [ - "ISO8601" + "8yyyy-MM-dd'T'HH:mm:ss,SSS" ] } }, @@ -1556,7 +1558,7 @@ this: "field" : "timestamp", "timezone" : "{{ beat.timezone }}", "formats" : [ - "ISO8601" + "8yyyy-MM-dd'T'HH:mm:ss,SSS" ] } }, diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java index 93d440b79d434..dd30c0a1f94bc 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java @@ -149,7 +149,7 @@ static DelimitedFileStructureFinder makeDelimitedFileStructureFinder(List makeIngestPipelineDefinition(String grokPatter if (needClientTimezone) { dateProcessorSettings.put("timezone", "{{ " + BEAT_TIMEZONE_FIELD + " }}"); } - dateProcessorSettings.put("formats", timestampFormats); + dateProcessorSettings.put("formats", jodaBwcJavaTimestampFormatsForIngestPipeline(timestampFormats)); processors.add(Collections.singletonMap("date", dateProcessorSettings)); } @@ -365,4 +365,19 @@ public static Map makeIngestPipelineDefinition(String grokPatter pipeline.put(Pipeline.PROCESSORS_KEY, processors); return pipeline; } + + // TODO: remove this method when Java time formats are the default + private static List jodaBwcJavaTimestampFormatsForIngestPipeline(List javaTimestampFormats) { + return javaTimestampFormats.stream().map(format -> { + switch (format) { + case "ISO8601": + case "UNIX_MS": + case "UNIX": + case "TAI64N": + return format; + default: + return "8" + format; + } + }).collect(Collectors.toList()); + } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java index d7ba426d6a391..33d9ba56b3f53 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java @@ -63,7 +63,7 @@ static NdJsonFileStructureFinder makeNdJsonFileStructureFinder(List expl .setJavaTimestampFormats(timeField.v2().javaTimestampFormats) .setNeedClientTimezone(needClientTimeZone) .setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, timeField.v1(), - timeField.v2().jodaTimestampFormats, needClientTimeZone)); + timeField.v2().javaTimestampFormats, needClientTimeZone)); } Tuple, SortedMap> mappingsAndFieldStats = diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java index c61a48beb116f..b476e3e465463 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java @@ -123,7 +123,7 @@ static TextLogFileStructureFinder makeTextLogFileStructureFinder(List ex .setNeedClientTimezone(needClientTimeZone) .setGrokPattern(grokPattern) .setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(grokPattern, interimTimestampField, - bestTimestamp.v1().jodaTimestampFormats, needClientTimeZone)) + bestTimestamp.v1().javaTimestampFormats, needClientTimeZone)) .setMappings(mappings) .setFieldStats(fieldStats) .setExplanation(explanation) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java index 392e7b4e0be5e..07dba7dcb2c64 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinder.java @@ -457,13 +457,13 @@ public boolean hasTimezoneDependentParsing() { * and possibly also a "format" setting. */ public Map getEsDateMappingTypeWithFormat() { - if (jodaTimestampFormats.contains("TAI64N")) { + if (javaTimestampFormats.contains("TAI64N")) { // There's no format for TAI64N in the timestamp formats used in mappings return Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword"); } Map mapping = new LinkedHashMap<>(); mapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date"); - String formats = jodaTimestampFormats.stream().flatMap(format -> { + String formats = javaTimestampFormats.stream().flatMap(format -> { switch (format) { case "ISO8601": return Stream.empty(); @@ -472,7 +472,8 @@ public Map getEsDateMappingTypeWithFormat() { case "UNIX": return Stream.of("epoch_second"); default: - return Stream.of(format); + // TODO: remove the "8" prefix when Java time formats are the default + return Stream.of("8" + format); } }).collect(Collectors.joining("||")); if (formats.isEmpty() == false) { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java index b9a805a14feeb..53550ebf18dd3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java @@ -101,7 +101,7 @@ static XmlFileStructureFinder makeXmlFileStructureFinder(List explanatio .setJavaTimestampFormats(timeField.v2().javaTimestampFormats) .setNeedClientTimezone(needClientTimeZone) .setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, topLevelTag + "." + timeField.v1(), - timeField.v2().jodaTimestampFormats, needClientTimeZone)); + timeField.v2().javaTimestampFormats, needClientTimeZone)); } Tuple, SortedMap> mappingsAndFieldStats = diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java index 389a65da749a5..d8d251b361335 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java @@ -16,6 +16,7 @@ import java.util.List; import java.util.Map; import java.util.SortedMap; +import java.util.stream.Collectors; import static org.elasticsearch.xpack.ml.filestructurefinder.FileStructureOverrides.EMPTY_OVERRIDES; import static org.hamcrest.Matchers.contains; @@ -39,7 +40,7 @@ public void testGuessTimestampGivenSingleSampleSingleField() { EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("field1", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("ISO8601")); + assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd'T'HH:mm:ss,SSS")); assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); } @@ -52,7 +53,7 @@ public void testGuessTimestampGivenSingleSampleSingleFieldAndConsistentTimeField overrides, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("field1", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("ISO8601")); + assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd'T'HH:mm:ss,SSS")); assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); } @@ -77,20 +78,20 @@ public void testGuessTimestampGivenSingleSampleSingleFieldAndConsistentTimeForma overrides, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("field1", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("ISO8601")); + assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd'T'HH:mm:ss,SSS")); assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); } public void testGuessTimestampGivenSingleSampleSingleFieldAndImpossibleTimeFormatOverride() { - FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("EEE MMM dd HH:mm:ss YYYY").build(); + FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("EEE MMM dd HH:mm:ss yyyy").build(); Map sample = Collections.singletonMap("field1", "2018-05-24T17:28:31,735"); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> FileStructureUtils.guessTimestampField(explanation, Collections.singletonList(sample), overrides, NOOP_TIMEOUT_CHECKER)); - assertEquals("Specified timestamp format [EEE MMM dd HH:mm:ss YYYY] does not match for record [{field1=2018-05-24T17:28:31,735}]", + assertEquals("Specified timestamp format [EEE MMM dd HH:mm:ss yyyy] does not match for record [{field1=2018-05-24T17:28:31,735}]", e.getMessage()); } @@ -101,7 +102,7 @@ public void testGuessTimestampGivenSamplesWithSameSingleTimeField() { EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("field1", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("ISO8601")); + assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd'T'HH:mm:ss,SSS")); assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); } @@ -130,7 +131,7 @@ public void testGuessTimestampGivenSingleSampleManyFieldsOneTimeFormat() { EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("time", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS")); + assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd HH:mm:ss,SSS")); assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); } @@ -147,7 +148,7 @@ public void testGuessTimestampGivenSamplesWithManyFieldsSameSingleTimeFormat() { EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("time", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS")); + assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd HH:mm:ss,SSS")); assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); } @@ -178,7 +179,7 @@ public void testGuessTimestampGivenSamplesWithManyFieldsSameSingleTimeFormatDist EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("time", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("YYYY-MM-dd HH:mm:ss,SSS")); + assertThat(match.v2().javaTimestampFormats, contains("yyyy-MM-dd HH:mm:ss,SSS")); assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); } @@ -195,7 +196,7 @@ public void testGuessTimestampGivenSamplesWithManyFieldsSameSingleTimeFormatDist EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("time", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss")); + assertThat(match.v2().javaTimestampFormats, contains("MMM dd yyyy HH:mm:ss", "MMM d yyyy HH:mm:ss")); assertEquals("CISCOTIMESTAMP", match.v2().grokPatternName); } @@ -228,7 +229,7 @@ public void testGuessTimestampGivenSamplesWithManyFieldsInconsistentAndConsisten EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER); assertNotNull(match); assertEquals("time2", match.v1()); - assertThat(match.v2().jodaTimestampFormats, contains("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss")); + assertThat(match.v2().javaTimestampFormats, contains("MMM dd yyyy HH:mm:ss", "MMM d yyyy HH:mm:ss")); assertEquals("CISCOTIMESTAMP", match.v2().grokPatternName); } @@ -331,7 +332,8 @@ public void testGuessMappingsAndCalculateFieldStats() { assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("foo")); Map expectedTimeMapping = new HashMap<>(); expectedTimeMapping.put(FileStructureUtils.MAPPING_TYPE_SETTING, "date"); - expectedTimeMapping.put(FileStructureUtils.MAPPING_FORMAT_SETTING, "YYYY-MM-dd HH:mm:ss,SSS"); + // TODO: remove the "8" prefix when Java time formats are the default + expectedTimeMapping.put(FileStructureUtils.MAPPING_FORMAT_SETTING, "8" + "yyyy-MM-dd HH:mm:ss,SSS"); assertEquals(expectedTimeMapping, mappings.get("time")); assertEquals(Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("bar")); assertNull(mappings.get("nothing")); @@ -354,7 +356,7 @@ public void testMakeIngestPipelineDefinitionGivenStructuredWithoutTimestamp() { public void testMakeIngestPipelineDefinitionGivenStructuredWithTimestamp() { String timestampField = randomAlphaOfLength(10); - List timestampFormats = randomFrom(TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS).jodaTimestampFormats; + List timestampFormats = randomFrom(TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS).javaTimestampFormats; boolean needClientTimezone = randomBoolean(); Map pipeline = @@ -371,7 +373,8 @@ public void testMakeIngestPipelineDefinitionGivenStructuredWithTimestamp() { assertNotNull(dateProcessor); assertEquals(timestampField, dateProcessor.get("field")); assertEquals(needClientTimezone, dateProcessor.containsKey("timezone")); - assertEquals(timestampFormats, dateProcessor.get("formats")); + // TODO: remove the "8" prefix when Java time formats are the default + assertEquals(timestampFormats.stream().map(format -> "8" + format).collect(Collectors.toList()), dateProcessor.get("formats")); // After removing the two expected fields there should be nothing left in the pipeline assertEquals(Collections.emptyMap(), pipeline); @@ -382,7 +385,7 @@ public void testMakeIngestPipelineDefinitionGivenSemiStructured() { String grokPattern = randomAlphaOfLength(100); String timestampField = randomAlphaOfLength(10); - List timestampFormats = randomFrom(TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS).jodaTimestampFormats; + List timestampFormats = randomFrom(TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS).javaTimestampFormats; boolean needClientTimezone = randomBoolean(); Map pipeline = @@ -404,7 +407,8 @@ public void testMakeIngestPipelineDefinitionGivenSemiStructured() { assertNotNull(dateProcessor); assertEquals(timestampField, dateProcessor.get("field")); assertEquals(needClientTimezone, dateProcessor.containsKey("timezone")); - assertEquals(timestampFormats, dateProcessor.get("formats")); + // TODO: remove the "8" prefix when Java time formats are the default + assertEquals(timestampFormats.stream().map(format -> "8" + format).collect(Collectors.toList()), dateProcessor.get("formats")); Map removeProcessor = (Map) processors.get(2).get("remove"); assertNotNull(removeProcessor); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java index de4244cd620a5..7ed5518c65077 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinderTests.java @@ -357,7 +357,7 @@ public void testMostLikelyTimestampGivenExceptionTrace() { public void testMostLikelyTimestampGivenExceptionTraceAndTimestampFormatOverride() { - FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("YYYY-MM-dd HH:mm:ss").build(); + FileStructureOverrides overrides = FileStructureOverrides.builder().setTimestampFormat("yyyy-MM-dd HH:mm:ss").build(); Tuple> mostLikelyMatch = TextLogFileStructureFinder.mostLikelyTimestamp(EXCEPTION_TRACE_SAMPLE.split("\n"), overrides, NOOP_TIMEOUT_CHECKER); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java index 6e256680eca55..d917def2503af 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java @@ -6,9 +6,17 @@ package org.elasticsearch.xpack.ml.filestructurefinder; import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.joda.Joda; import org.elasticsearch.common.time.DateFormatters; import org.elasticsearch.xpack.ml.filestructurefinder.TimestampFormatFinder.TimestampMatch; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; +import java.time.temporal.TemporalAccessor; +import java.time.temporal.TemporalQueries; import java.util.Arrays; import java.util.List; import java.util.Locale; @@ -269,32 +277,43 @@ private void validateTimestampMatch(TimestampMatch expected, String text, long e assertTrue(expected.simplePattern.matcher(text).find()); } + // This is because parsing timestamps using Joda formats generates warnings. + // Eventually we'll probably just remove the checks that the Joda formats + // are valid, and at that point this method can be removed too. + protected boolean enableWarningsCheck() { + return false; + } + + // This method is using the Joda BWC layer. When that's removed, this method + // can be deleted too - we'll just validate the Java time formats after that. private void validateJodaTimestampFormats(List jodaTimestampFormats, String text, long expectedEpochMs) { // All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London. // This is the timezone that will be used for any text representations that don't include it. - org.joda.time.DateTimeZone defaultZone = org.joda.time.DateTimeZone.forID("Europe/London"); - org.joda.time.DateTime parsed; + ZoneId defaultZone = ZoneId.of("Europe/London"); + long actualEpochMs; for (int i = 0; i < jodaTimestampFormats.size(); ++i) { try { String timestampFormat = jodaTimestampFormats.get(i); switch (timestampFormat) { case "ISO8601": - parsed = org.joda.time.format.ISODateTimeFormat.dateTimeParser() - .withZone(defaultZone).withDefaultYear(2018).parseDateTime(text); + actualEpochMs = Joda.forPattern("date_optional_time").withZone(defaultZone).parseMillis(text); break; default: - org.joda.time.format.DateTimeFormatter parser = - org.joda.time.format.DateTimeFormat.forPattern(timestampFormat).withZone(defaultZone).withLocale(Locale.ROOT); - parsed = parser.withDefaultYear(2018).parseDateTime(text); + // The Joda BWC layer doesn't support setting a default year, so we + // cannot validate patterns that don't explicitly include a year. + if (timestampFormat.contains("YYYY") == false) { + return; + } + actualEpochMs = Joda.forPattern(timestampFormat).withZone(defaultZone).parseMillis(text); break; } - if (expectedEpochMs == parsed.getMillis()) { + if (expectedEpochMs == actualEpochMs) { break; } // If the last one isn't right then propagate if (i == jodaTimestampFormats.size() - 1) { - assertEquals(expectedEpochMs, parsed.getMillis()); + assertEquals(expectedEpochMs, actualEpochMs); } } catch (RuntimeException e) { // If the last one throws then propagate @@ -309,8 +328,8 @@ private void validateJavaTimestampFormats(List javaTimestampFormats, Str // All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London. // This is the timezone that will be used for any text representations that don't include it. - java.time.ZoneId defaultZone = java.time.ZoneId.of("Europe/London"); - java.time.temporal.TemporalAccessor parsed; + ZoneId defaultZone = ZoneId.of("Europe/London"); + TemporalAccessor parsed; for (int i = 0; i < javaTimestampFormats.size(); ++i) { try { String timestampFormat = javaTimestampFormats.get(i); @@ -319,8 +338,8 @@ private void validateJavaTimestampFormats(List javaTimestampFormats, Str parsed = DateFormatters.forPattern("strict_date_optional_time_nanos").withZone(defaultZone).parse(text); break; default: - java.time.format.DateTimeFormatter parser = new java.time.format.DateTimeFormatterBuilder() - .appendPattern(timestampFormat).parseDefaulting(java.time.temporal.ChronoField.YEAR_OF_ERA, 2018) + DateTimeFormatter parser = new DateTimeFormatterBuilder() + .appendPattern(timestampFormat).parseDefaulting(ChronoField.YEAR_OF_ERA, 2018) .toFormatter(Locale.ROOT); // This next line parses the textual date without any default timezone, so if // the text doesn't contain the timezone then the resulting temporal accessor @@ -332,14 +351,14 @@ private void validateJavaTimestampFormats(List javaTimestampFormats, Str // timezone and then again with a default timezone if the first parse didn't // find one in the text. parsed = parser.parse(text); - if (parsed.query(java.time.temporal.TemporalQueries.zone()) == null) { + if (parsed.query(TemporalQueries.zone()) == null) { // TODO: when Java 8 is no longer supported remove the two // lines and comment above and the closing brace below parsed = parser.withZone(defaultZone).parse(text); } break; } - long actualEpochMs = java.time.Instant.from(parsed).toEpochMilli(); + long actualEpochMs = Instant.from(parsed).toEpochMilli(); if (expectedEpochMs == actualEpochMs) { break; } From fd16bcaa9652330c199626464f04660192c7fff1 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 25 Jan 2019 16:25:53 +0000 Subject: [PATCH 2/3] Adjust to changes of #37407 --- .../TimestampFormatFinderTests.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java index 7557fdb15b961..0374ed6f34175 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/TimestampFormatFinderTests.java @@ -285,7 +285,8 @@ protected boolean enableWarningsCheck() { } // This method is using the Joda BWC layer. When that's removed, this method - // can be deleted too - we'll just validate the Java time formats after that. + // can be deleted - we'll just validate the Java time formats after that. + // Also remove enableWarningsCheck() above if this method is removed. private void validateJodaTimestampFormats(List jodaTimestampFormats, String text, long expectedEpochMs) { // All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London. @@ -300,12 +301,7 @@ private void validateJodaTimestampFormats(List jodaTimestampFormats, Str actualEpochMs = Joda.forPattern("date_optional_time").withZone(defaultZone).parseMillis(text); break; default: - // The Joda BWC layer doesn't support setting a default year, so we - // cannot validate patterns that don't explicitly include a year. - if (timestampFormat.contains("YYYY") == false) { - return; - } - actualEpochMs = Joda.forPattern(timestampFormat).withZone(defaultZone).parseMillis(text); + actualEpochMs = Joda.forPattern(timestampFormat).withYear(2018).withZone(defaultZone).parseMillis(text); break; } if (expectedEpochMs == actualEpochMs) { From 20d080613d07725dcd8b28cadb3067259149a1f3 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 25 Jan 2019 16:32:20 +0000 Subject: [PATCH 3/3] Fixing tests --- .../xpack/ml/filestructurefinder/FileStructureUtils.java | 2 +- .../ml/filestructurefinder/FileStructureUtilsTests.java | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java index 7e61bb2d3e7ce..ba22b170ecea0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java @@ -367,7 +367,7 @@ public static Map makeIngestPipelineDefinition(String grokPatter } // TODO: remove this method when Java time formats are the default - private static List jodaBwcJavaTimestampFormatsForIngestPipeline(List javaTimestampFormats) { + static List jodaBwcJavaTimestampFormatsForIngestPipeline(List javaTimestampFormats) { return javaTimestampFormats.stream().map(format -> { switch (format) { case "ISO8601": diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java index d8d251b361335..8140d2fa6034f 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java @@ -16,7 +16,6 @@ import java.util.List; import java.util.Map; import java.util.SortedMap; -import java.util.stream.Collectors; import static org.elasticsearch.xpack.ml.filestructurefinder.FileStructureOverrides.EMPTY_OVERRIDES; import static org.hamcrest.Matchers.contains; @@ -373,8 +372,8 @@ public void testMakeIngestPipelineDefinitionGivenStructuredWithTimestamp() { assertNotNull(dateProcessor); assertEquals(timestampField, dateProcessor.get("field")); assertEquals(needClientTimezone, dateProcessor.containsKey("timezone")); - // TODO: remove the "8" prefix when Java time formats are the default - assertEquals(timestampFormats.stream().map(format -> "8" + format).collect(Collectors.toList()), dateProcessor.get("formats")); + // TODO: remove the call to jodaBwcJavaTimestampFormatsForIngestPipeline() when Java time formats are the default + assertEquals(FileStructureUtils.jodaBwcJavaTimestampFormatsForIngestPipeline(timestampFormats), dateProcessor.get("formats")); // After removing the two expected fields there should be nothing left in the pipeline assertEquals(Collections.emptyMap(), pipeline); @@ -407,8 +406,8 @@ public void testMakeIngestPipelineDefinitionGivenSemiStructured() { assertNotNull(dateProcessor); assertEquals(timestampField, dateProcessor.get("field")); assertEquals(needClientTimezone, dateProcessor.containsKey("timezone")); - // TODO: remove the "8" prefix when Java time formats are the default - assertEquals(timestampFormats.stream().map(format -> "8" + format).collect(Collectors.toList()), dateProcessor.get("formats")); + // TODO: remove the call to jodaBwcJavaTimestampFormatsForIngestPipeline() when Java time formats are the default + assertEquals(FileStructureUtils.jodaBwcJavaTimestampFormatsForIngestPipeline(timestampFormats), dateProcessor.get("formats")); Map removeProcessor = (Map) processors.get(2).get("remove"); assertNotNull(removeProcessor);