Skip to content

Commit a842453

Browse files
committed
[ML] JIindex: Limit the size of bulk migrations (#36481)
1 parent cbe9099 commit a842453

File tree

2 files changed

+155
-8
lines changed

2 files changed

+155
-8
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlConfigMigrator.java

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@
3838
import java.util.Collection;
3939
import java.util.HashMap;
4040
import java.util.HashSet;
41+
import java.util.Iterator;
4142
import java.util.List;
4243
import java.util.Map;
4344
import java.util.Set;
4445
import java.util.concurrent.atomic.AtomicBoolean;
4546
import java.util.concurrent.atomic.AtomicReference;
47+
import java.util.function.Function;
4648
import java.util.stream.Collectors;
4749

4850
import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
@@ -68,14 +70,19 @@
6870
* If there was an error in step 3 and the config is in both the clusterstate and
6971
* index then when the migrator retries it must not overwrite an existing job config
7072
* document as once the index document is present all update operations will function
71-
* on that rather than the clusterstate
73+
* on that rather than the clusterstate.
74+
*
75+
* The number of configs indexed in each bulk operation is limited by {@link #MAX_BULK_WRITE_SIZE}
76+
* pairs of datafeeds and jobs are migrated together.
7277
*/
7378
public class MlConfigMigrator {
7479

7580
private static final Logger logger = LogManager.getLogger(MlConfigMigrator.class);
7681

7782
public static final String MIGRATED_FROM_VERSION = "migrated from version";
7883

84+
static final int MAX_BULK_WRITE_SIZE = 100;
85+
7986
private final Client client;
8087
private final ClusterService clusterService;
8188

@@ -111,10 +118,12 @@ public void migrateConfigsWithoutTasks(ClusterState clusterState, ActionListener
111118
return;
112119
}
113120

114-
Collection<DatafeedConfig> datafeedsToMigrate = stoppedDatafeedConfigs(clusterState);
115-
List<Job> jobsToMigrate = nonDeletingJobs(closedJobConfigs(clusterState)).stream()
121+
Collection<DatafeedConfig> stoppedDatafeeds = stoppedDatafeedConfigs(clusterState);
122+
Map<String, Job> eligibleJobs = nonDeletingJobs(closedJobConfigs(clusterState)).stream()
116123
.map(MlConfigMigrator::updateJobForMigration)
117-
.collect(Collectors.toList());
124+
.collect(Collectors.toMap(Job::getId, Function.identity(), (a, b) -> a));
125+
126+
JobsAndDatafeeds jobsAndDatafeedsToMigrate = limitWrites(stoppedDatafeeds, eligibleJobs);
118127

119128
ActionListener<Boolean> unMarkMigrationInProgress = ActionListener.wrap(
120129
response -> {
@@ -127,16 +136,18 @@ public void migrateConfigsWithoutTasks(ClusterState clusterState, ActionListener
127136
}
128137
);
129138

130-
if (datafeedsToMigrate.isEmpty() && jobsToMigrate.isEmpty()) {
139+
if (jobsAndDatafeedsToMigrate.totalCount() == 0) {
131140
unMarkMigrationInProgress.onResponse(Boolean.FALSE);
132141
return;
133142
}
134143

135-
writeConfigToIndex(datafeedsToMigrate, jobsToMigrate, ActionListener.wrap(
144+
logger.debug("migrating ml configurations");
145+
146+
writeConfigToIndex(jobsAndDatafeedsToMigrate.datafeedConfigs, jobsAndDatafeedsToMigrate.jobs, ActionListener.wrap(
136147
failedDocumentIds -> {
137-
List<String> successfulJobWrites = filterFailedJobConfigWrites(failedDocumentIds, jobsToMigrate);
148+
List<String> successfulJobWrites = filterFailedJobConfigWrites(failedDocumentIds, jobsAndDatafeedsToMigrate.jobs);
138149
List<String> successfulDatafeedWrites =
139-
filterFailedDatafeedConfigWrites(failedDocumentIds, datafeedsToMigrate);
150+
filterFailedDatafeedConfigWrites(failedDocumentIds, jobsAndDatafeedsToMigrate.datafeedConfigs);
140151
removeFromClusterState(successfulJobWrites, successfulDatafeedWrites, unMarkMigrationInProgress);
141152
},
142153
unMarkMigrationInProgress::onFailure
@@ -341,6 +352,62 @@ public static List<DatafeedConfig> stoppedDatafeedConfigs(ClusterState clusterSt
341352
.collect(Collectors.toList());
342353
}
343354

355+
public static class JobsAndDatafeeds {
356+
List<Job> jobs;
357+
List<DatafeedConfig> datafeedConfigs;
358+
359+
private JobsAndDatafeeds() {
360+
jobs = new ArrayList<>();
361+
datafeedConfigs = new ArrayList<>();
362+
}
363+
364+
public int totalCount() {
365+
return jobs.size() + datafeedConfigs.size();
366+
}
367+
}
368+
369+
/**
370+
* Return at most {@link #MAX_BULK_WRITE_SIZE} configs favouring
371+
* datafeed and job pairs so if a datafeed is chosen so is its job.
372+
*
373+
* @param datafeedsToMigrate Datafeed configs
374+
* @param jobsToMigrate Job configs
375+
* @return Job and datafeed configs
376+
*/
377+
public static JobsAndDatafeeds limitWrites(Collection<DatafeedConfig> datafeedsToMigrate, Map<String, Job> jobsToMigrate) {
378+
JobsAndDatafeeds jobsAndDatafeeds = new JobsAndDatafeeds();
379+
380+
if (datafeedsToMigrate.size() + jobsToMigrate.size() <= MAX_BULK_WRITE_SIZE) {
381+
jobsAndDatafeeds.jobs.addAll(jobsToMigrate.values());
382+
jobsAndDatafeeds.datafeedConfigs.addAll(datafeedsToMigrate);
383+
return jobsAndDatafeeds;
384+
}
385+
386+
int count = 0;
387+
388+
// prioritise datafeed and job pairs
389+
for (DatafeedConfig datafeedConfig : datafeedsToMigrate) {
390+
if (count < MAX_BULK_WRITE_SIZE) {
391+
jobsAndDatafeeds.datafeedConfigs.add(datafeedConfig);
392+
count++;
393+
Job datafeedsJob = jobsToMigrate.remove(datafeedConfig.getJobId());
394+
if (datafeedsJob != null) {
395+
jobsAndDatafeeds.jobs.add(datafeedsJob);
396+
count++;
397+
}
398+
}
399+
}
400+
401+
// are there jobs without datafeeds to migrate
402+
Iterator<Job> iter = jobsToMigrate.values().iterator();
403+
while (iter.hasNext() && count < MAX_BULK_WRITE_SIZE) {
404+
jobsAndDatafeeds.jobs.add(iter.next());
405+
count++;
406+
}
407+
408+
return jobsAndDatafeeds;
409+
}
410+
344411
/**
345412
* Check for failures in the bulk response and return the
346413
* Ids of any documents not written to the index

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlConfigMigratorTests.java

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@
2424
import java.util.ArrayList;
2525
import java.util.Arrays;
2626
import java.util.Collections;
27+
import java.util.HashMap;
2728
import java.util.List;
29+
import java.util.Map;
2830
import java.util.Set;
31+
import java.util.stream.Collectors;
2932

3033
import static org.hamcrest.Matchers.contains;
3134
import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -212,6 +215,83 @@ public void testRemoveJobsAndDatafeeds_removeSome() {
212215
}
213216

214217

218+
public void testLimitWrites_GivenBelowLimit() {
219+
MlConfigMigrator.JobsAndDatafeeds jobsAndDatafeeds = MlConfigMigrator.limitWrites(Collections.emptyList(), Collections.emptyMap());
220+
assertThat(jobsAndDatafeeds.datafeedConfigs, empty());
221+
assertThat(jobsAndDatafeeds.jobs, empty());
222+
223+
List<DatafeedConfig> datafeeds = new ArrayList<>();
224+
Map<String, Job> jobs = new HashMap<>();
225+
226+
int numDatafeeds = MlConfigMigrator.MAX_BULK_WRITE_SIZE / 2;
227+
for (int i=0; i<numDatafeeds; i++) {
228+
String jobId = "job" + i;
229+
jobs.put(jobId, JobTests.buildJobBuilder(jobId).build());
230+
datafeeds.add(createCompatibleDatafeed(jobId));
231+
}
232+
233+
jobsAndDatafeeds = MlConfigMigrator.limitWrites(datafeeds, jobs);
234+
assertThat(jobsAndDatafeeds.datafeedConfigs, hasSize(numDatafeeds));
235+
assertThat(jobsAndDatafeeds.jobs, hasSize(numDatafeeds));
236+
}
237+
238+
public void testLimitWrites_GivenAboveLimit() {
239+
List<DatafeedConfig> datafeeds = new ArrayList<>();
240+
Map<String, Job> jobs = new HashMap<>();
241+
242+
int numDatafeeds = MlConfigMigrator.MAX_BULK_WRITE_SIZE / 2 + 10;
243+
for (int i=0; i<numDatafeeds; i++) {
244+
String jobId = "job" + i;
245+
jobs.put(jobId, JobTests.buildJobBuilder(jobId).build());
246+
datafeeds.add(createCompatibleDatafeed(jobId));
247+
}
248+
249+
MlConfigMigrator.JobsAndDatafeeds jobsAndDatafeeds = MlConfigMigrator.limitWrites(datafeeds, jobs);
250+
assertEquals(MlConfigMigrator.MAX_BULK_WRITE_SIZE, jobsAndDatafeeds.totalCount());
251+
assertThat(jobsAndDatafeeds.datafeedConfigs, hasSize(MlConfigMigrator.MAX_BULK_WRITE_SIZE / 2));
252+
assertThat(jobsAndDatafeeds.jobs, hasSize(MlConfigMigrator.MAX_BULK_WRITE_SIZE / 2));
253+
254+
// assert that for each datafeed its corresponding job is selected
255+
Set<String> selectedJobIds = jobsAndDatafeeds.jobs.stream().map(Job::getId).collect(Collectors.toSet());
256+
Set<String> datafeedJobIds = jobsAndDatafeeds.datafeedConfigs.stream().map(DatafeedConfig::getJobId).collect(Collectors.toSet());
257+
assertEquals(selectedJobIds, datafeedJobIds);
258+
}
259+
260+
public void testLimitWrites_GivenMoreJobsThanDatafeeds() {
261+
List<DatafeedConfig> datafeeds = new ArrayList<>();
262+
Map<String, Job> jobs = new HashMap<>();
263+
264+
int numDatafeeds = MlConfigMigrator.MAX_BULK_WRITE_SIZE / 2 - 10;
265+
for (int i=0; i<numDatafeeds; i++) {
266+
String jobId = "job" + i;
267+
jobs.put(jobId, JobTests.buildJobBuilder(jobId).build());
268+
datafeeds.add(createCompatibleDatafeed(jobId));
269+
}
270+
271+
for (int i=numDatafeeds; i<numDatafeeds + 40; i++) {
272+
String jobId = "job" + i;
273+
jobs.put(jobId, JobTests.buildJobBuilder(jobId).build());
274+
}
275+
276+
MlConfigMigrator.JobsAndDatafeeds jobsAndDatafeeds = MlConfigMigrator.limitWrites(datafeeds, jobs);
277+
assertEquals(MlConfigMigrator.MAX_BULK_WRITE_SIZE, jobsAndDatafeeds.totalCount());
278+
assertThat(jobsAndDatafeeds.datafeedConfigs, hasSize(numDatafeeds));
279+
assertThat(jobsAndDatafeeds.jobs, hasSize(MlConfigMigrator.MAX_BULK_WRITE_SIZE - numDatafeeds));
280+
281+
// assert that for each datafeed its corresponding job is selected
282+
Set<String> selectedJobIds = jobsAndDatafeeds.jobs.stream().map(Job::getId).collect(Collectors.toSet());
283+
Set<String> datafeedJobIds = jobsAndDatafeeds.datafeedConfigs.stream().map(DatafeedConfig::getJobId).collect(Collectors.toSet());
284+
assertTrue(selectedJobIds.containsAll(datafeedJobIds));
285+
}
286+
287+
public void testLimitWrites_GivenNullJob() {
288+
List<DatafeedConfig> datafeeds = Collections.singletonList(createCompatibleDatafeed("no-job-for-this-datafeed"));
289+
MlConfigMigrator.JobsAndDatafeeds jobsAndDatafeeds = MlConfigMigrator.limitWrites(datafeeds, Collections.emptyMap());
290+
291+
assertThat(jobsAndDatafeeds.datafeedConfigs, hasSize(1));
292+
assertThat(jobsAndDatafeeds.jobs, empty());
293+
}
294+
215295
private DatafeedConfig createCompatibleDatafeed(String jobId) {
216296
// create a datafeed without aggregations or anything
217297
// else that may cause validation errors

0 commit comments

Comments
 (0)