Skip to content

Commit 4808c65

Browse files
committed
[ML] Snapshot ml configs before migrating (#36645)
1 parent 3449283 commit 4808c65

File tree

4 files changed

+165
-71
lines changed

4 files changed

+165
-71
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java

Lines changed: 18 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@
1010
import org.elasticsearch.action.ActionListener;
1111
import org.elasticsearch.client.Client;
1212
import org.elasticsearch.cluster.ClusterChangedEvent;
13-
import org.elasticsearch.cluster.ClusterState;
1413
import org.elasticsearch.cluster.ClusterStateListener;
15-
import org.elasticsearch.cluster.LocalNodeMasterListener;
1614
import org.elasticsearch.cluster.node.DiscoveryNode;
1715
import org.elasticsearch.cluster.service.ClusterService;
1816
import org.elasticsearch.common.settings.Settings;
@@ -26,75 +24,57 @@
2624
import org.elasticsearch.xpack.ml.notifications.Auditor;
2725

2826
import java.util.Objects;
29-
import java.util.concurrent.atomic.AtomicBoolean;
3027

31-
public class MlAssignmentNotifier implements ClusterStateListener, LocalNodeMasterListener {
3228

29+
public class MlAssignmentNotifier implements ClusterStateListener {
3330
private static final Logger logger = LogManager.getLogger(MlAssignmentNotifier.class);
3431

3532
private final Auditor auditor;
36-
private final ClusterService clusterService;
3733
private final MlConfigMigrator mlConfigMigrator;
3834
private final ThreadPool threadPool;
39-
private final AtomicBoolean enabled = new AtomicBoolean(false);
4035

4136
MlAssignmentNotifier(Settings settings, Auditor auditor, ThreadPool threadPool, Client client, ClusterService clusterService) {
4237
this.auditor = auditor;
43-
this.clusterService = clusterService;
4438
this.mlConfigMigrator = new MlConfigMigrator(settings, client, clusterService);
4539
this.threadPool = threadPool;
46-
clusterService.addLocalNodeMasterListener(this);
40+
clusterService.addListener(this);
4741
}
4842

4943
MlAssignmentNotifier(Auditor auditor, ThreadPool threadPool, MlConfigMigrator mlConfigMigrator, ClusterService clusterService) {
5044
this.auditor = auditor;
51-
this.clusterService = clusterService;
5245
this.mlConfigMigrator = mlConfigMigrator;
5346
this.threadPool = threadPool;
54-
clusterService.addLocalNodeMasterListener(this);
47+
clusterService.addListener(this);
5548
}
5649

57-
@Override
58-
public void onMaster() {
59-
if (enabled.compareAndSet(false, true)) {
60-
clusterService.addListener(this);
61-
}
62-
}
63-
64-
@Override
65-
public void offMaster() {
66-
if (enabled.compareAndSet(true, false)) {
67-
clusterService.removeListener(this);
68-
}
69-
}
70-
71-
@Override
72-
public String executorName() {
50+
private String executorName() {
7351
return ThreadPool.Names.GENERIC;
7452
}
7553

7654
@Override
7755
public void clusterChanged(ClusterChangedEvent event) {
78-
if (enabled.get() == false) {
79-
return;
80-
}
81-
if (event.metaDataChanged() == false) {
56+
57+
if (event.localNodeMaster() == false) {
8258
return;
8359
}
84-
PersistentTasksCustomMetaData previous = event.previousState().getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
85-
PersistentTasksCustomMetaData current = event.state().getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
8660

8761
mlConfigMigrator.migrateConfigsWithoutTasks(event.state(), ActionListener.wrap(
88-
response -> threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(current, previous, event.state())),
62+
response -> threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(event)),
8963
e -> {
9064
logger.error("error migrating ml configurations", e);
91-
threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(current, previous, event.state()));
65+
threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(event));
9266
}
9367
));
9468
}
9569

96-
private void auditChangesToMlTasks(PersistentTasksCustomMetaData current, PersistentTasksCustomMetaData previous,
97-
ClusterState state) {
70+
private void auditChangesToMlTasks(ClusterChangedEvent event) {
71+
72+
if (event.metaDataChanged() == false) {
73+
return;
74+
}
75+
76+
PersistentTasksCustomMetaData previous = event.previousState().getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
77+
PersistentTasksCustomMetaData current = event.state().getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
9878

9979
if (Objects.equals(previous, current)) {
10080
return;
@@ -112,7 +92,7 @@ private void auditChangesToMlTasks(PersistentTasksCustomMetaData current, Persis
11292
if (currentAssignment.getExecutorNode() == null) {
11393
auditor.warning(jobId, "No node found to open job. Reasons [" + currentAssignment.getExplanation() + "]");
11494
} else {
115-
DiscoveryNode node = state.nodes().get(currentAssignment.getExecutorNode());
95+
DiscoveryNode node = event.state().nodes().get(currentAssignment.getExecutorNode());
11696
auditor.info(jobId, "Opening job on node [" + node.toString() + "]");
11797
}
11898
} else if (MlTasks.DATAFEED_TASK_NAME.equals(currentTask.getTaskName())) {
@@ -126,7 +106,7 @@ private void auditChangesToMlTasks(PersistentTasksCustomMetaData current, Persis
126106
auditor.warning(jobId, msg);
127107
}
128108
} else {
129-
DiscoveryNode node = state.nodes().get(currentAssignment.getExecutorNode());
109+
DiscoveryNode node = event.state().nodes().get(currentAssignment.getExecutorNode());
130110
if (jobId != null) {
131111
auditor.info(jobId, "Starting datafeed [" + datafeedParams.getDatafeedId() + "] on node [" + node + "]");
132112
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlConfigMigrator.java

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,14 @@
99
import org.apache.logging.log4j.Logger;
1010
import org.elasticsearch.Version;
1111
import org.elasticsearch.action.ActionListener;
12+
import org.elasticsearch.action.DocWriteRequest;
13+
import org.elasticsearch.action.DocWriteResponse;
1214
import org.elasticsearch.action.bulk.BulkItemResponse;
1315
import org.elasticsearch.action.bulk.BulkRequestBuilder;
1416
import org.elasticsearch.action.bulk.BulkResponse;
1517
import org.elasticsearch.action.index.IndexRequest;
18+
import org.elasticsearch.action.index.IndexRequestBuilder;
19+
import org.elasticsearch.action.index.IndexResponse;
1620
import org.elasticsearch.action.support.WriteRequest;
1721
import org.elasticsearch.client.Client;
1822
import org.elasticsearch.cluster.ClusterState;
@@ -31,12 +35,14 @@
3135
import org.elasticsearch.xpack.core.ml.job.config.Job;
3236
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
3337
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
38+
import org.elasticsearch.xpack.core.ml.utils.ToXContentParams;
3439
import org.elasticsearch.xpack.ml.datafeed.persistence.DatafeedConfigProvider;
3540
import org.elasticsearch.xpack.ml.job.persistence.JobConfigProvider;
3641

3742
import java.io.IOException;
3843
import java.util.ArrayList;
3944
import java.util.Collection;
45+
import java.util.Collections;
4046
import java.util.HashMap;
4147
import java.util.HashSet;
4248
import java.util.Iterator;
@@ -90,12 +96,14 @@ public class MlConfigMigrator {
9096
private final MlConfigMigrationEligibilityCheck migrationEligibilityCheck;
9197

9298
private final AtomicBoolean migrationInProgress;
99+
private final AtomicBoolean firstTime;
93100

94101
public MlConfigMigrator(Settings settings, Client client, ClusterService clusterService) {
95102
this.client = Objects.requireNonNull(client);
96103
this.clusterService = Objects.requireNonNull(clusterService);
97104
this.migrationEligibilityCheck = new MlConfigMigrationEligibilityCheck(settings, clusterService);
98105
this.migrationInProgress = new AtomicBoolean(false);
106+
this.firstTime = new AtomicBoolean(true);
99107
}
100108

101109
/**
@@ -145,8 +153,23 @@ public void migrateConfigsWithoutTasks(ClusterState clusterState, ActionListener
145153
}
146154
);
147155

156+
if (firstTime.get()) {
157+
snapshotMlMeta(MlMetadata.getMlMetadata(clusterState), ActionListener.wrap(
158+
response -> {
159+
firstTime.set(false);
160+
migrate(jobsAndDatafeedsToMigrate, unMarkMigrationInProgress);
161+
},
162+
unMarkMigrationInProgress::onFailure
163+
));
164+
return;
165+
}
166+
167+
migrate(jobsAndDatafeedsToMigrate, unMarkMigrationInProgress);
168+
}
169+
170+
private void migrate(JobsAndDatafeeds jobsAndDatafeedsToMigrate, ActionListener<Boolean> listener) {
148171
if (jobsAndDatafeedsToMigrate.totalCount() == 0) {
149-
unMarkMigrationInProgress.onResponse(Boolean.FALSE);
172+
listener.onResponse(Boolean.FALSE);
150173
return;
151174
}
152175

@@ -157,9 +180,9 @@ public void migrateConfigsWithoutTasks(ClusterState clusterState, ActionListener
157180
List<String> successfulJobWrites = filterFailedJobConfigWrites(failedDocumentIds, jobsAndDatafeedsToMigrate.jobs);
158181
List<String> successfulDatafeedWrites =
159182
filterFailedDatafeedConfigWrites(failedDocumentIds, jobsAndDatafeedsToMigrate.datafeedConfigs);
160-
removeFromClusterState(successfulJobWrites, successfulDatafeedWrites, unMarkMigrationInProgress);
183+
removeFromClusterState(successfulJobWrites, successfulDatafeedWrites, listener);
161184
},
162-
unMarkMigrationInProgress::onFailure
185+
listener::onFailure
163186
));
164187
}
165188

@@ -299,6 +322,45 @@ private IndexRequest indexRequest(ToXContentObject source, String documentId, To
299322
return indexRequest;
300323
}
301324

325+
326+
// public for testing
327+
public void snapshotMlMeta(MlMetadata mlMetadata, ActionListener<Boolean> listener) {
328+
329+
if (mlMetadata.getJobs().isEmpty() && mlMetadata.getDatafeeds().isEmpty()) {
330+
listener.onResponse(Boolean.TRUE);
331+
return;
332+
}
333+
334+
logger.debug("taking a snapshot of mlmetadata");
335+
String documentId = "ml-config";
336+
IndexRequestBuilder indexRequest = client.prepareIndex(AnomalyDetectorsIndex.jobStateIndexName(),
337+
ElasticsearchMappings.DOC_TYPE, documentId)
338+
.setOpType(DocWriteRequest.OpType.CREATE);
339+
340+
ToXContent.MapParams params = new ToXContent.MapParams(Collections.singletonMap(ToXContentParams.FOR_INTERNAL_STORAGE, "true"));
341+
try (XContentBuilder builder = XContentFactory.jsonBuilder()) {
342+
builder.startObject();
343+
mlMetadata.toXContent(builder, params);
344+
builder.endObject();
345+
346+
indexRequest.setSource(builder);
347+
} catch (IOException e) {
348+
logger.error("failed to serialise mlmetadata", e);
349+
listener.onFailure(e);
350+
return;
351+
}
352+
353+
executeAsyncWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN, indexRequest.request(),
354+
ActionListener.<IndexResponse>wrap(
355+
indexResponse -> {
356+
listener.onResponse(indexResponse.getResult() == DocWriteResponse.Result.CREATED);
357+
},
358+
listener::onFailure),
359+
client::index
360+
);
361+
}
362+
363+
302364
public static Job updateJobForMigration(Job job) {
303365
Job.Builder builder = new Job.Builder(job);
304366
Map<String, Object> custom = job.getCustomSettings() == null ? new HashMap<>() : new HashMap<>(job.getCustomSettings());

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import static org.mockito.Matchers.eq;
3232
import static org.mockito.Mockito.doAnswer;
3333
import static org.mockito.Mockito.mock;
34-
import static org.mockito.Mockito.never;
3534
import static org.mockito.Mockito.times;
3635
import static org.mockito.Mockito.verify;
3736
import static org.mockito.Mockito.verifyNoMoreInteractions;
@@ -69,34 +68,39 @@ private void setupMocks() {
6968

7069
public void testClusterChanged_info() {
7170
MlAssignmentNotifier notifier = new MlAssignmentNotifier(auditor, threadPool, configMigrator, clusterService);
72-
notifier.onMaster();
7371

74-
DiscoveryNode node =
75-
new DiscoveryNode("node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), Version.CURRENT);
7672
ClusterState previous = ClusterState.builder(new ClusterName("_name"))
7773
.metaData(MetaData.builder().putCustom(PersistentTasksCustomMetaData.TYPE,
7874
new PersistentTasksCustomMetaData(0L, Collections.emptyMap())))
7975
.build();
8076

8177
PersistentTasksCustomMetaData.Builder tasksBuilder = PersistentTasksCustomMetaData.builder();
82-
addJobTask("job_id", "node_id", null, tasksBuilder);
78+
addJobTask("job_id", "_node_id", null, tasksBuilder);
8379
MetaData metaData = MetaData.builder().putCustom(PersistentTasksCustomMetaData.TYPE, tasksBuilder.build()).build();
84-
ClusterState state = ClusterState.builder(new ClusterName("_name"))
80+
ClusterState newState = ClusterState.builder(new ClusterName("_name"))
8581
.metaData(metaData)
86-
.nodes(DiscoveryNodes.builder().add(node))
82+
// set local node master
83+
.nodes(DiscoveryNodes.builder()
84+
.add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), Version.CURRENT))
85+
.localNodeId("_node_id")
86+
.masterNodeId("_node_id"))
8787
.build();
88-
notifier.clusterChanged(new ClusterChangedEvent("_test", state, previous));
88+
notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous));
8989
verify(auditor, times(1)).info(eq("job_id"), any());
90-
verify(configMigrator, times(1)).migrateConfigsWithoutTasks(eq(state), any());
90+
verify(configMigrator, times(1)).migrateConfigsWithoutTasks(eq(newState), any());
9191

92-
notifier.offMaster();
93-
notifier.clusterChanged(new ClusterChangedEvent("_test", state, previous));
92+
// no longer master
93+
newState = ClusterState.builder(new ClusterName("_name"))
94+
.metaData(metaData)
95+
.nodes(DiscoveryNodes.builder()
96+
.add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), Version.CURRENT)))
97+
.build();
98+
notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous));
9499
verifyNoMoreInteractions(auditor);
95100
}
96101

97102
public void testClusterChanged_warning() {
98103
MlAssignmentNotifier notifier = new MlAssignmentNotifier(auditor, threadPool, configMigrator, clusterService);
99-
notifier.onMaster();
100104

101105
ClusterState previous = ClusterState.builder(new ClusterName("_name"))
102106
.metaData(MetaData.builder().putCustom(PersistentTasksCustomMetaData.TYPE,
@@ -106,21 +110,31 @@ public void testClusterChanged_warning() {
106110
PersistentTasksCustomMetaData.Builder tasksBuilder = PersistentTasksCustomMetaData.builder();
107111
addJobTask("job_id", null, null, tasksBuilder);
108112
MetaData metaData = MetaData.builder().putCustom(PersistentTasksCustomMetaData.TYPE, tasksBuilder.build()).build();
109-
ClusterState state = ClusterState.builder(new ClusterName("_name"))
113+
ClusterState newState = ClusterState.builder(new ClusterName("_name"))
110114
.metaData(metaData)
115+
// set local node master
116+
.nodes(DiscoveryNodes.builder()
117+
.add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT))
118+
.localNodeId("_node_id")
119+
.masterNodeId("_node_id"))
111120
.build();
112-
notifier.clusterChanged(new ClusterChangedEvent("_test", state, previous));
121+
notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous));
113122
verify(auditor, times(1)).warning(eq("job_id"), any());
114-
verify(configMigrator, times(1)).migrateConfigsWithoutTasks(eq(state), any());
123+
verify(configMigrator, times(1)).migrateConfigsWithoutTasks(eq(newState), any());
124+
125+
// no longer master
126+
newState = ClusterState.builder(new ClusterName("_name"))
127+
.metaData(metaData)
128+
.nodes(DiscoveryNodes.builder()
129+
.add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT)))
130+
.build();
115131

116-
notifier.offMaster();
117-
notifier.clusterChanged(new ClusterChangedEvent("_test", state, previous));
132+
notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous));
118133
verifyNoMoreInteractions(auditor);
119134
}
120135

121136
public void testClusterChanged_noPersistentTaskChanges() {
122137
MlAssignmentNotifier notifier = new MlAssignmentNotifier(auditor, threadPool, configMigrator, clusterService);
123-
notifier.onMaster();
124138

125139
PersistentTasksCustomMetaData.Builder tasksBuilder = PersistentTasksCustomMetaData.builder();
126140
addJobTask("job_id", null, null, tasksBuilder);
@@ -129,14 +143,26 @@ public void testClusterChanged_noPersistentTaskChanges() {
129143
.metaData(metaData)
130144
.build();
131145

132-
ClusterState current = ClusterState.builder(new ClusterName("_name"))
146+
ClusterState newState = ClusterState.builder(new ClusterName("_name"))
133147
.metaData(metaData)
148+
// set local node master
149+
.nodes(DiscoveryNodes.builder()
150+
.add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT))
151+
.localNodeId("_node_id")
152+
.masterNodeId("_node_id"))
134153
.build();
135154

136-
notifier.clusterChanged(new ClusterChangedEvent("_test", current, previous));
137-
verify(configMigrator, never()).migrateConfigsWithoutTasks(any(), any());
155+
notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous));
156+
verify(configMigrator, times(1)).migrateConfigsWithoutTasks(any(), any());
157+
verifyNoMoreInteractions(auditor);
138158

139-
notifier.offMaster();
140-
verify(configMigrator, never()).migrateConfigsWithoutTasks(any(), any());
159+
// no longer master
160+
newState = ClusterState.builder(new ClusterName("_name"))
161+
.metaData(metaData)
162+
.nodes(DiscoveryNodes.builder()
163+
.add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT)))
164+
.build();
165+
notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous));
166+
verifyNoMoreInteractions(configMigrator);
141167
}
142168
}

0 commit comments

Comments
 (0)