Skip to content

Commit c99021c

Browse files
author
David Roberts
authored
[ML] More advanced model snapshot retention options (#56125)
This PR implements the following changes to make ML model snapshot retention more flexible in advance of adding a UI for the feature in an upcoming release. - The default for `model_snapshot_retention_days` for new jobs is now 10 instead of 1 - There is a new job setting, `daily_model_snapshot_retention_after_days`, that defaults to 1 for new jobs and `model_snapshot_retention_days` for pre-7.8 jobs - For days that are older than `model_snapshot_retention_days`, all model snapshots are deleted as before - For days that are in between `daily_model_snapshot_retention_after_days` and `model_snapshot_retention_days` all but the first model snapshot for that day are deleted - The `retain` setting of model snapshots is still respected to allow selected model snapshots to be retained indefinitely Closes #52150
1 parent b82f659 commit c99021c

File tree

28 files changed

+593
-99
lines changed

28 files changed

+593
-99
lines changed

client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2519,10 +2519,12 @@ private static Job buildJobForExpiredDataTests(String jobId) {
25192519
.setFunction("count")
25202520
.setDetectorDescription(randomAlphaOfLength(10))
25212521
.build();
2522-
AnalysisConfig.Builder configBuilder = new AnalysisConfig.Builder(Arrays.asList(detector));
2522+
AnalysisConfig.Builder configBuilder = new AnalysisConfig.Builder(Collections.singletonList(detector));
25232523
//should not be random, see:https://github.com/elastic/ml-cpp/issues/208
25242524
configBuilder.setBucketSpan(new TimeValue(1, TimeUnit.HOURS));
25252525
builder.setAnalysisConfig(configBuilder);
2526+
builder.setModelSnapshotRetentionDays(1L);
2527+
builder.setDailyModelSnapshotRetentionAfterDays(1L);
25262528

25272529
DataDescription.Builder dataDescription = new DataDescription.Builder();
25282530
dataDescription.setTimeFormat(DataDescription.EPOCH_MS);

client/rest-high-level/src/test/java/org/elasticsearch/client/ml/job/config/JobTests.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,11 +141,17 @@ public static Job.Builder createRandomizedJobBuilder() {
141141
if (randomBoolean()) {
142142
builder.setBackgroundPersistInterval(TimeValue.timeValueHours(randomIntBetween(1, 24)));
143143
}
144+
Long modelSnapshotRetentionDays = null;
144145
if (randomBoolean()) {
145-
builder.setModelSnapshotRetentionDays(randomNonNegativeLong());
146+
modelSnapshotRetentionDays = randomNonNegativeLong();
147+
builder.setModelSnapshotRetentionDays(modelSnapshotRetentionDays);
146148
}
147149
if (randomBoolean()) {
148-
builder.setDailyModelSnapshotRetentionAfterDays(randomNonNegativeLong());
150+
if (modelSnapshotRetentionDays != null) {
151+
builder.setDailyModelSnapshotRetentionAfterDays(randomLongBetween(0, modelSnapshotRetentionDays));
152+
} else {
153+
builder.setDailyModelSnapshotRetentionAfterDays(randomNonNegativeLong());
154+
}
149155
}
150156
if (randomBoolean()) {
151157
builder.setResultsRetentionDays(randomNonNegativeLong());

docs/reference/ml/anomaly-detection/apis/get-job.asciidoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ The API returns the following results:
137137
"model_plot_config" : {
138138
"enabled" : true
139139
},
140-
"model_snapshot_retention_days" : 1,
140+
"model_snapshot_retention_days" : 10,
141+
"daily_model_snapshot_retention_after_days" : 1,
141142
"custom_settings" : {
142143
"created_by" : "ml-module-sample",
143144
...

docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ This is a possible response:
102102
},
103103
"model_memory_limit" : "1gb",
104104
"categorization_examples_limit" : 4,
105-
"model_snapshot_retention_days" : 1
105+
"model_snapshot_retention_days" : 10,
106+
"daily_model_snapshot_retention_after_days" : 1
106107
},
107108
"datafeeds" : {
108109
"scroll_size" : 1000

docs/reference/ml/anomaly-detection/apis/put-job.asciidoc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,10 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=custom-settings]
224224
include::{docdir}/ml/ml-shared.asciidoc[tag=data-description]
225225
//End data_description
226226

227+
`daily_model_snapshot_retention_after_days`::
228+
(Optional, long)
229+
include::{docdir}/ml/ml-shared.asciidoc[tag=daily-model-snapshot-retention-after-days]
230+
227231
`description`::
228232
(Optional, string) A description of the job.
229233

@@ -320,7 +324,8 @@ When the job is created, you receive the following results:
320324
"time_field" : "timestamp",
321325
"time_format" : "epoch_ms"
322326
},
323-
"model_snapshot_retention_days" : 1,
327+
"model_snapshot_retention_days" : 10,
328+
"daily_model_snapshot_retention_after_days" : 1,
324329
"results_index_name" : "shared",
325330
"allow_lazy_open" : false
326331
}

docs/reference/ml/anomaly-detection/apis/update-job.asciidoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ close the job, then reopen the job and restart the {dfeed} for the changes to ta
8282
(object)
8383
include::{docdir}/ml/ml-shared.asciidoc[tag=custom-settings]
8484

85+
`daily_model_snapshot_retention_after_days`::
86+
(long)
87+
include::{docdir}/ml/ml-shared.asciidoc[tag=daily-model-snapshot-retention-after-days]
88+
8589
`description`::
8690
(string) A description of the job.
8791

docs/reference/ml/ml-shared.asciidoc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,18 @@ example, it can contain custom URL information as shown in
361361
{ml-docs}/ml-configuring-url.html[Adding custom URLs to {ml} results].
362362
end::custom-settings[]
363363

364+
tag::daily-model-snapshot-retention-after-days[]
365+
Advanced configuration option. Specifies a number of days between 0 and the
366+
value of `model_snapshot_retention_days`. After this period of time, only the first
367+
model snapshot per day is retained for this job. Age is calculated relative to
368+
the timestamp of the newest model snapshot. For new jobs, the default value is
369+
`1`, which means that all snapshots are retained for one day. Older snapshots
370+
are thinned out such that only one per day is retained. For jobs that were
371+
created before this setting was available, the default value matches the
372+
`model_snapshot_retention_days` value, which preserves the original behavior
373+
and no thinning out of model snapshots occurs.
374+
end::daily-model-snapshot-retention-after-days[]
375+
364376
tag::data-description[]
365377
The data description defines the format of the input data when you send data to
366378
the job by using the <<ml-post-data,post data>> API. Note that when configure
@@ -997,8 +1009,8 @@ end::model-snapshot-id[]
9971009
tag::model-snapshot-retention-days[]
9981010
Advanced configuration option. The period of time (in days) that model snapshots
9991011
are retained. Age is calculated relative to the timestamp of the newest model
1000-
snapshot. The default value is `1`, which means snapshots that are one day
1001-
(twenty-four hours) older than the newest snapshot are deleted.
1012+
snapshot. The default value is `10`, which means snapshots that are ten days
1013+
older than the newest snapshot are deleted.
10021014
end::model-snapshot-retention-days[]
10031015

10041016
tag::model-timestamp[]

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/Job.java

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
9898
*/
9999
public static final ByteSizeValue PROCESS_MEMORY_OVERHEAD = new ByteSizeValue(10, ByteSizeUnit.MB);
100100

101-
public static final long DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS = 1;
101+
public static final long DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS = 10;
102+
public static final long DEFAULT_DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS = 1;
102103

103104
private static ObjectParser<Builder, Void> createParser(boolean ignoreUnknownFields) {
104105
ObjectParser<Builder, Void> parser = new ObjectParser<>("job_details", ignoreUnknownFields, Builder::new);
@@ -808,6 +809,10 @@ public Builder setModelSnapshotRetentionDays(Long modelSnapshotRetentionDays) {
808809
return this;
809810
}
810811

812+
public Long getModelSnapshotRetentionDays() {
813+
return modelSnapshotRetentionDays;
814+
}
815+
811816
public Builder setDailyModelSnapshotRetentionAfterDays(Long dailyModelSnapshotRetentionAfterDays) {
812817
this.dailyModelSnapshotRetentionAfterDays = dailyModelSnapshotRetentionAfterDays;
813818
return this;
@@ -1043,9 +1048,6 @@ public void validateInputFields() {
10431048

10441049
checkValidBackgroundPersistInterval();
10451050
checkValueNotLessThan(0, RENORMALIZATION_WINDOW_DAYS.getPreferredName(), renormalizationWindowDays);
1046-
checkValueNotLessThan(0, MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), modelSnapshotRetentionDays);
1047-
checkValueNotLessThan(0, DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS.getPreferredName(),
1048-
dailyModelSnapshotRetentionAfterDays);
10491051
checkValueNotLessThan(0, RESULTS_RETENTION_DAYS.getPreferredName(), resultsRetentionDays);
10501052

10511053
if (!MlStrings.isValidId(id)) {
@@ -1055,6 +1057,8 @@ public void validateInputFields() {
10551057
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_ID_TOO_LONG, MlStrings.ID_LENGTH_LIMIT));
10561058
}
10571059

1060+
validateModelSnapshotRetentionSettings();
1061+
10581062
validateGroups();
10591063

10601064
// Results index name not specified in user input means use the default, so is acceptable in this validation
@@ -1076,6 +1080,37 @@ public void validateAnalysisLimitsAndSetDefaults(@Nullable ByteSizeValue maxMode
10761080
AnalysisLimits.DEFAULT_MODEL_MEMORY_LIMIT_MB);
10771081
}
10781082

1083+
/**
1084+
* This is meant to be called when a new job is created.
1085+
* It sets {@link #dailyModelSnapshotRetentionAfterDays} to the default value if it is not set and the default makes sense.
1086+
*/
1087+
public void validateModelSnapshotRetentionSettingsAndSetDefaults() {
1088+
validateModelSnapshotRetentionSettings();
1089+
if (dailyModelSnapshotRetentionAfterDays == null &&
1090+
modelSnapshotRetentionDays != null &&
1091+
modelSnapshotRetentionDays > DEFAULT_DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS) {
1092+
dailyModelSnapshotRetentionAfterDays = DEFAULT_DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS;
1093+
}
1094+
}
1095+
1096+
/**
1097+
* Validates that {@link #modelSnapshotRetentionDays} and {@link #dailyModelSnapshotRetentionAfterDays} make sense,
1098+
* both individually and in combination.
1099+
*/
1100+
public void validateModelSnapshotRetentionSettings() {
1101+
1102+
checkValueNotLessThan(0, MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), modelSnapshotRetentionDays);
1103+
checkValueNotLessThan(0, DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS.getPreferredName(),
1104+
dailyModelSnapshotRetentionAfterDays);
1105+
1106+
if (modelSnapshotRetentionDays != null &&
1107+
dailyModelSnapshotRetentionAfterDays != null &&
1108+
dailyModelSnapshotRetentionAfterDays > modelSnapshotRetentionDays) {
1109+
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_MODEL_SNAPSHOT_RETENTION_SETTINGS_INCONSISTENT,
1110+
dailyModelSnapshotRetentionAfterDays, modelSnapshotRetentionDays));
1111+
}
1112+
}
1113+
10791114
private void validateGroups() {
10801115
for (String group : this.groups) {
10811116
if (MlStrings.isValidId(group) == false) {

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package org.elasticsearch.xpack.core.ml.job.messages;
77

88
import org.elasticsearch.xpack.core.ml.MachineLearningField;
9+
import org.elasticsearch.xpack.core.ml.job.config.Job;
910

1011
import java.text.MessageFormat;
1112
import java.util.Locale;
@@ -212,6 +213,9 @@ public final class Messages {
212213
"This job would cause a mapping clash with existing field [{0}] - avoid the clash by assigning a dedicated results index";
213214
public static final String JOB_CONFIG_TIME_FIELD_NOT_ALLOWED_IN_ANALYSIS_CONFIG =
214215
"data_description.time_field may not be used in the analysis_config";
216+
public static final String JOB_CONFIG_MODEL_SNAPSHOT_RETENTION_SETTINGS_INCONSISTENT =
217+
"The value of '" + Job.DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS + "' [{0}] cannot be greater than '" +
218+
Job.MODEL_SNAPSHOT_RETENTION_DAYS + "' [{1}]";
215219

216220
public static final String JOB_AND_GROUP_NAMES_MUST_BE_UNIQUE =
217221
"job and group names must be unique but job [{0}] and group [{0}] have the same name";

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/results/ReservedFieldNames.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ public final class ReservedFieldNames {
231231
Job.RENORMALIZATION_WINDOW_DAYS.getPreferredName(),
232232
Job.BACKGROUND_PERSIST_INTERVAL.getPreferredName(),
233233
Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(),
234+
Job.DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS.getPreferredName(),
234235
Job.RESULTS_RETENTION_DAYS.getPreferredName(),
235236
Job.MODEL_SNAPSHOT_ID.getPreferredName(),
236237
Job.MODEL_SNAPSHOT_MIN_VERSION.getPreferredName(),

0 commit comments

Comments
 (0)