From 1ffb83de821127e8644944915018238833d626ed Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 3 Jan 2020 14:50:31 +0000 Subject: [PATCH 1/4] Test for result doc ID uniqueness --- .../xpack/ml/job/results/ModelPlotTests.java | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ModelPlotTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ModelPlotTests.java index 37788bfa203d2..1bdcd5981b671 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ModelPlotTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ModelPlotTests.java @@ -15,7 +15,11 @@ import org.elasticsearch.xpack.core.ml.job.results.ModelPlot; import java.io.IOException; +import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Objects; import static org.hamcrest.Matchers.containsString; @@ -262,6 +266,128 @@ public void testLenientParser() throws IOException { } } + public void testIdUniqueness() { + ModelPlot modelPlot = new ModelPlot("foo", new Date(), 3600, 0); + + String[] calledNumbers = { + "730", + "132", + "358", + "552", + "888", + "236", + "224", + "674", + "438", + "128", + "722", + "560", + "228", + "628", + "226", + "656" }; + + String[] durationBins = { + "S000", + "S001", + "S002", + "S003", + "S004", + "S005", + "S006", + "S007", + "S008", + "S009", + "S010", + "S011", + "S012", + "S013", + "S014", + "S015", + "S016", + "S017", + "S018", + "S019", + "S020", + "S021", + "S022", + "S023", + "S024", + "S025", + "S026", + "S027", + "S028", + "S029", + "S057", + "S058", + "S059", + "M020", + "M021", + "M026", + "M027", + "M028", + "M029", + "M030", + "M031", + "M032", + "M033", + "M056", + "M057", + "M058", + "M059", + "M060", + "M061", + "M062", + "M063", + "M086", + "M087", + "M088", + "M089", + "M090", + "M091", + "M092", + "M093", + "M116", + "M117", + "M118", + "M119", + "L012", + "L013", + "L014", + "L017", + "L018", + "L019", + "L023", + "L024", + "L025", + "L029", + "L030", + "L031" + }; + + Map> uniqueIds = new HashMap<>(); + + for (String calledNumber : calledNumbers) { + modelPlot.setPartitionFieldValue(calledNumber); + for (String durationBin : durationBins) { + modelPlot.setByFieldValue(durationBin); + String id = modelPlot.getId(); + uniqueIds.compute(id, (k, v) -> { + if (v == null) { + v = new ArrayList<>(); + } + v.add(calledNumber + "/" + durationBin); + if (v.size() > 1) { + logger.error("Duplicates for ID [" + id + "]: " + v); + } + return v; + }); + } + } + + assertEquals(calledNumbers.length * durationBins.length, uniqueIds.size()); + } + private ModelPlot createFullyPopulated() { ModelPlot modelPlot = new ModelPlot("foo", new Date(12345678L), 360L, 22); modelPlot.setByFieldName("by"); From 1df3606b645530ae8fa819b88bfb5ecc6e9201d9 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Mon, 6 Jan 2020 09:51:02 +0000 Subject: [PATCH 2/4] [ML] Improve uniqueness of result document IDs Switch from a 32 bit Java hash to a 128 bit Murmur hash for creating document IDs from by/over/partition field values. The 32 bit Java hash was not sufficiently unique, and could produce identical numbers for relatively common combinations of by/partition field values such as L018/128 and L017/228. Fixes #50613 --- .../xpack/core/ml/MachineLearningField.java | 17 +++ .../core/ml/job/results/AnomalyRecord.java | 9 +- .../xpack/core/ml/job/results/Forecast.java | 6 +- .../xpack/core/ml/job/results/Influencer.java | 3 +- .../xpack/core/ml/job/results/ModelPlot.java | 7 +- .../xpack/ml/job/results/ForecastTests.java | 12 +- .../xpack/ml/job/results/ModelPlotTests.java | 130 +++--------------- 7 files changed, 50 insertions(+), 134 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningField.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningField.java index 5c3da41df7349..30dda7e066db2 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningField.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningField.java @@ -5,10 +5,18 @@ */ package org.elasticsearch.xpack.core.ml; +import org.elasticsearch.common.Numbers; +import org.elasticsearch.common.hash.MurmurHash3; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Collectors; + public final class MachineLearningField { public static final Setting AUTODETECT_PROCESS = Setting.boolSetting("xpack.ml.autodetect_process", true, Setting.Property.NodeScope); @@ -19,4 +27,13 @@ public final class MachineLearningField { private MachineLearningField() {} + public static String valuesToId(String... values) { + String combined = Arrays.stream(values).filter(Objects::nonNull).collect(Collectors.joining()); + byte[] bytes = combined.getBytes(StandardCharsets.UTF_8); + MurmurHash3.Hash128 hash = MurmurHash3.hash128(bytes, 0, bytes.length, 0, new MurmurHash3.Hash128()); + byte[] hashedBytes = new byte[16]; + System.arraycopy(Numbers.longToBytes(hash.h1), 0, hashedBytes, 0, 8); + System.arraycopy(Numbers.longToBytes(hash.h2), 0, hashedBytes, 8, 8); + return new BigInteger(hashedBytes) + "_" + combined.length(); + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java index e95e4cadc3567..335d2b17fee7a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.xcontent.ObjectParser.ValueType; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.job.config.Detector; import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; @@ -353,12 +354,8 @@ public String getJobId() { * Data store ID of this record. */ public String getId() { - int valuesHash = Objects.hash(byFieldValue, overFieldValue, partitionFieldValue); - int length = (byFieldValue == null ? 0 : byFieldValue.length()) + - (overFieldValue == null ? 0 : overFieldValue.length()) + - (partitionFieldValue == null ? 0 : partitionFieldValue.length()); - - return jobId + "_record_" + timestamp.getTime() + "_" + bucketSpan + "_" + detectorIndex + "_" + valuesHash + "_" + length; + return jobId + "_record_" + timestamp.getTime() + "_" + bucketSpan + "_" + detectorIndex + "_" + + MachineLearningField.valuesToId(byFieldValue, overFieldValue, partitionFieldValue); } public int getDetectorIndex() { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/Forecast.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/Forecast.java index 5f4e3c829c3d5..097ecdabe9c37 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/Forecast.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/Forecast.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.xcontent.ObjectParser.ValueType; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.common.time.TimeUtils; @@ -165,12 +166,9 @@ public String getForecastId() { } public String getId() { - int valuesHash = Objects.hash(byFieldValue, partitionFieldValue); - int length = (byFieldValue == null ? 0 : byFieldValue.length()) + - (partitionFieldValue == null ? 0 : partitionFieldValue.length()); return jobId + "_model_forecast_" + forecastId + "_" + timestamp.getTime() + "_" + bucketSpan + "_" + detectorIndex + "_" - + valuesHash + "_" + length; + + MachineLearningField.valuesToId(byFieldValue, partitionFieldValue); } public Date getTimestamp() { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/Influencer.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/Influencer.java index d17b375459bb4..771f55cc96efd 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/Influencer.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/Influencer.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.xcontent.ObjectParser.ValueType; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.core.common.time.TimeUtils; @@ -134,7 +135,7 @@ public String getJobId() { public String getId() { return jobId + "_influencer_" + timestamp.getTime() + "_" + bucketSpan + "_" + - influenceField + "_" + influenceValue.hashCode() + "_" + influenceValue.length(); + influenceField + "_" + MachineLearningField.valuesToId(influenceValue); } public double getProbability() { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ModelPlot.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ModelPlot.java index ab7235ca27ac4..deae8ff1e2e78 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ModelPlot.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ModelPlot.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.xcontent.ObjectParser.ValueType; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.common.time.TimeUtils; @@ -183,12 +184,8 @@ public String getJobId() { } public String getId() { - int valuesHash = Objects.hash(byFieldValue, overFieldValue, partitionFieldValue); - int length = (byFieldValue == null ? 0 : byFieldValue.length()) + - (overFieldValue == null ? 0 : overFieldValue.length()) + - (partitionFieldValue == null ? 0 : partitionFieldValue.length()); return jobId + "_model_plot_" + timestamp.getTime() + "_" + bucketSpan - + "_" + detectorIndex + "_" + valuesHash + "_" + length; + + "_" + detectorIndex + "_" + MachineLearningField.valuesToId(byFieldValue, overFieldValue, partitionFieldValue); } public Date getTimestamp() { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ForecastTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ForecastTests.java index a5c15716ea293..ba8cb203495e4 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ForecastTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ForecastTests.java @@ -9,11 +9,11 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.job.results.Forecast; import java.io.IOException; import java.util.Date; -import java.util.Objects; import static org.hamcrest.Matchers.containsString; @@ -72,23 +72,19 @@ public void testId() { String byFieldValue = null; String partitionFieldValue = null; - int valuesHash = Objects.hash(byFieldValue, partitionFieldValue); - assertEquals("job-foo_model_forecast_222_100_60_2_" + valuesHash + "_0", forecast.getId()); + assertEquals("job-foo_model_forecast_222_100_60_2_0_0", forecast.getId()); - int length = 0; if (randomBoolean()) { byFieldValue = randomAlphaOfLength(10); - length += byFieldValue.length(); forecast.setByFieldValue(byFieldValue); } if (randomBoolean()) { partitionFieldValue = randomAlphaOfLength(10); - length += partitionFieldValue.length(); forecast.setPartitionFieldValue(partitionFieldValue); } - valuesHash = Objects.hash(byFieldValue, partitionFieldValue); - assertEquals("job-foo_model_forecast_222_100_60_2_" + valuesHash + "_" + length, forecast.getId()); + String valuesPart = MachineLearningField.valuesToId(byFieldValue, partitionFieldValue); + assertEquals("job-foo_model_forecast_222_100_60_2_" + valuesPart, forecast.getId()); } public void testStrictParser() throws IOException { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ModelPlotTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ModelPlotTests.java index 1bdcd5981b671..c7451f6cc045a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ModelPlotTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/results/ModelPlotTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.job.results.ModelPlot; import java.io.IOException; @@ -20,7 +21,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Objects; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.not; @@ -225,28 +225,23 @@ public void testId() { String overFieldValue = null; String partitionFieldValue = null; - int valuesHash = Objects.hash(byFieldValue, overFieldValue, partitionFieldValue); - assertEquals("job-foo_model_plot_100_60_33_" + valuesHash + "_0", plot.getId()); + assertEquals("job-foo_model_plot_100_60_33_0_0", plot.getId()); - int length = 0; if (randomBoolean()) { byFieldValue = randomAlphaOfLength(10); - length += byFieldValue.length(); plot.setByFieldValue(byFieldValue); } if (randomBoolean()) { overFieldValue = randomAlphaOfLength(10); - length += overFieldValue.length(); plot.setOverFieldValue(overFieldValue); } if (randomBoolean()) { partitionFieldValue = randomAlphaOfLength(10); - length += partitionFieldValue.length(); plot.setPartitionFieldValue(partitionFieldValue); } - valuesHash = Objects.hash(byFieldValue, overFieldValue, partitionFieldValue); - assertEquals("job-foo_model_plot_100_60_33_" + valuesHash + "_" + length, plot.getId()); + String valuesPart = MachineLearningField.valuesToId(byFieldValue, overFieldValue, partitionFieldValue); + assertEquals("job-foo_model_plot_100_60_33_" + valuesPart, plot.getId()); } public void testStrictParser() throws IOException { @@ -269,114 +264,29 @@ public void testLenientParser() throws IOException { public void testIdUniqueness() { ModelPlot modelPlot = new ModelPlot("foo", new Date(), 3600, 0); - String[] calledNumbers = { - "730", - "132", - "358", - "552", - "888", - "236", - "224", - "674", - "438", - "128", - "722", - "560", - "228", - "628", - "226", - "656" }; - - String[] durationBins = { - "S000", - "S001", - "S002", - "S003", - "S004", - "S005", - "S006", - "S007", - "S008", - "S009", - "S010", - "S011", - "S012", - "S013", - "S014", - "S015", - "S016", - "S017", - "S018", - "S019", - "S020", - "S021", - "S022", - "S023", - "S024", - "S025", - "S026", - "S027", - "S028", - "S029", - "S057", - "S058", - "S059", - "M020", - "M021", - "M026", - "M027", - "M028", - "M029", - "M030", - "M031", - "M032", - "M033", - "M056", - "M057", - "M058", - "M059", - "M060", - "M061", - "M062", - "M063", - "M086", - "M087", - "M088", - "M089", - "M090", - "M091", - "M092", - "M093", - "M116", - "M117", - "M118", - "M119", - "L012", - "L013", - "L014", - "L017", - "L018", - "L019", - "L023", - "L024", - "L025", - "L029", - "L030", - "L031" - }; + String[] partitionFieldValues = { "730", "132", "358", "552", "888", "236", "224", "674", + "438", "128", "722", "560", "228", "628", "226", "656" }; + String[] byFieldValues = { "S000", "S001", "S002", "S003", "S004", "S005", "S006", "S007", "S008", "S009", + "S010", "S011", "S012", "S013", "S014", "S015", "S016", "S017", "S018", "S019", + "S020", "S021", "S022", "S023", "S024", "S025", "S026", "S027", "S028", "S029", + "S057", "S058", "S059", "M020", "M021", "M026", "M027", "M028", "M029", "M030", + "M031", "M032", "M033", "M056", "M057", "M058", "M059", "M060", "M061", "M062", + "M063", "M086", "M087", "M088", "M089", "M090", "M091", "M092", "M093", "M116", + "M117", "M118", "M119", "L012", "L013", "L014", "L017", "L018", "L019", "L023", + "L024", "L025", "L029", "L030", "L031" }; Map> uniqueIds = new HashMap<>(); - for (String calledNumber : calledNumbers) { - modelPlot.setPartitionFieldValue(calledNumber); - for (String durationBin : durationBins) { - modelPlot.setByFieldValue(durationBin); + for (String partitionFieldValue : partitionFieldValues) { + modelPlot.setPartitionFieldValue(partitionFieldValue); + for (String byFieldValue : byFieldValues) { + modelPlot.setByFieldValue(byFieldValue); String id = modelPlot.getId(); uniqueIds.compute(id, (k, v) -> { if (v == null) { v = new ArrayList<>(); } - v.add(calledNumber + "/" + durationBin); + v.add(partitionFieldValue + "/" + byFieldValue); if (v.size() > 1) { logger.error("Duplicates for ID [" + id + "]: " + v); } @@ -385,7 +295,7 @@ public void testIdUniqueness() { } } - assertEquals(calledNumbers.length * durationBins.length, uniqueIds.size()); + assertEquals(partitionFieldValues.length * byFieldValues.length, uniqueIds.size()); } private ModelPlot createFullyPopulated() { From 5a3b1d8ab5f60b94492df1477e3653fa29d73f9e Mon Sep 17 00:00:00 2001 From: David Roberts Date: Tue, 7 Jan 2020 09:15:24 +0000 Subject: [PATCH 3/4] Fix more tests --- .../core/ml/job/results/AnomalyRecordTests.java | 13 ++++--------- .../xpack/core/ml/job/results/InfluencerTests.java | 6 +++--- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java index 7c94e6721435c..9ee45fefba5b9 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xpack.core.ml.MachineLearningField; import java.io.IOException; import java.util.ArrayList; @@ -22,7 +23,6 @@ import java.util.Date; import java.util.List; import java.util.Map; -import java.util.Objects; import static org.hamcrest.Matchers.containsString; @@ -174,28 +174,23 @@ public void testId() { String overFieldValue = null; String partitionFieldValue = null; - int valuesHash = Objects.hash(byFieldValue, overFieldValue, partitionFieldValue); - assertEquals("test-job_record_1000_60_0_" + valuesHash + "_0", record.getId()); + assertEquals("test-job_record_1000_60_0_0_0", record.getId()); - int length = 0; if (randomBoolean()) { byFieldValue = randomAlphaOfLength(10); - length += byFieldValue.length(); record.setByFieldValue(byFieldValue); } if (randomBoolean()) { overFieldValue = randomAlphaOfLength(10); - length += overFieldValue.length(); record.setOverFieldValue(overFieldValue); } if (randomBoolean()) { partitionFieldValue = randomAlphaOfLength(10); - length += partitionFieldValue.length(); record.setPartitionFieldValue(partitionFieldValue); } - valuesHash = Objects.hash(byFieldValue, overFieldValue, partitionFieldValue); - assertEquals("test-job_record_1000_60_0_" + valuesHash + "_" + length, record.getId()); + String valuesPart = MachineLearningField.valuesToId(byFieldValue, overFieldValue, partitionFieldValue); + assertEquals("test-job_record_1000_60_0_" + valuesPart, record.getId()); } public void testStrictParser_IsLenientOnTopLevelFields() throws IOException { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/InfluencerTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/InfluencerTests.java index 7b2da1d795539..7aee1829acc9d 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/InfluencerTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/InfluencerTests.java @@ -12,10 +12,10 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xpack.core.ml.MachineLearningField; import java.io.IOException; import java.util.Date; -import java.util.Objects; public class InfluencerTests extends AbstractSerializingTestCase { @@ -64,8 +64,8 @@ public void testToXContentDoesNotIncludeNameValueFieldWhenReservedWord() throws public void testId() { String influencerFieldValue = "wopr"; Influencer influencer = new Influencer("job-foo", "host", influencerFieldValue, new Date(1000), 300L); - int valueHash = Objects.hashCode(influencerFieldValue); - assertEquals("job-foo_influencer_1000_300_host_" + valueHash + "_" + influencerFieldValue.length(), influencer.getId()); + String valuePart = MachineLearningField.valuesToId(influencerFieldValue); + assertEquals("job-foo_influencer_1000_300_host_" + valuePart, influencer.getId()); } public void testLenientParser() throws IOException { From e4366ae1afb3d57b47076894fe43c9ce86080adb Mon Sep 17 00:00:00 2001 From: David Roberts Date: Tue, 7 Jan 2020 09:34:43 +0000 Subject: [PATCH 4/4] Add a test of maximum length --- .../core/ml/job/results/AnomalyRecord.java | 5 +++++ .../core/ml/job/results/AnomalyRecordTests.java | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java index 335d2b17fee7a..98a59f2ef74eb 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecord.java @@ -354,6 +354,11 @@ public String getJobId() { * Data store ID of this record. */ public String getId() { + return buildId(jobId, timestamp, bucketSpan, detectorIndex, byFieldValue, overFieldValue, partitionFieldValue); + } + + static String buildId(String jobId, Date timestamp, long bucketSpan, int detectorIndex, + String byFieldValue, String overFieldValue, String partitionFieldValue) { return jobId + "_record_" + timestamp.getTime() + "_" + bucketSpan + "_" + detectorIndex + "_" + MachineLearningField.valuesToId(byFieldValue, overFieldValue, partitionFieldValue); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java index 9ee45fefba5b9..c300d8d908f0e 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/results/AnomalyRecordTests.java @@ -15,8 +15,10 @@ import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.AbstractSerializingTestCase; import org.elasticsearch.xpack.core.ml.MachineLearningField; +import org.elasticsearch.xpack.core.ml.utils.MlStrings; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -25,6 +27,7 @@ import java.util.Map; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.lessThanOrEqualTo; public class AnomalyRecordTests extends AbstractSerializingTestCase { @@ -217,4 +220,18 @@ public void testLenientParser() throws IOException { AnomalyRecord.LENIENT_PARSER.apply(parser, null); } } + + public void testIdLength() { + String jobId = randomAlphaOfLength(MlStrings.ID_LENGTH_LIMIT); + Date timestamp = new Date(Long.MAX_VALUE); + long bucketSpan = Long.MAX_VALUE; + int detectorIndex = Integer.MAX_VALUE; + String byFieldValue = randomAlphaOfLength(randomIntBetween(100, 1000)); + String overFieldValue = randomAlphaOfLength(randomIntBetween(100, 1000)); + String partitionFieldValue = randomAlphaOfLength(randomIntBetween(100, 1000)); + + String id = AnomalyRecord.buildId(jobId, timestamp, bucketSpan, detectorIndex, byFieldValue, overFieldValue, partitionFieldValue); + // 512 comes from IndexRequest.validate() + assertThat(id.getBytes(StandardCharsets.UTF_8).length, lessThanOrEqualTo(512)); + } }