Skip to content

Commit 88489a3

Browse files
authored
Backport rolling upgrade multi cluster module (#38859)
* Add rolling upgrade multi cluster test module (#38277) This test starts 2 clusters, each with 3 nodes. First the leader cluster is started and tests are run against it and then the follower cluster is started and tests execute against this two cluster. Then the follower cluster is upgraded, one node at a time. After that the leader cluster is upgraded, one node at a time. Every time a node is upgraded tests are ran while both clusters are online. (and either leader cluster has mixed node versions or the follower cluster) This commit only tests CCR index following, but could be used for CCS tests as well. In particular for CCR, unidirectional index following is tested during a rolling upgrade. During the test several indices are created and followed in the leader cluster before or while the follower cluster is being upgraded. This tests also verifies that attempting to follow an index in the upgraded cluster from the not upgraded cluster fails. After both clusters are upgraded following the index that previously failed should succeed. Relates to #37231 and #38037 * Filter out upgraded version index settings when starting index following (#38838) The `index.version.upgraded` and `index.version.upgraded_string` are likely to be different between leader and follower index. In the event that a follower index gets restored on a upgraded node while the leader index is still on non-upgraded nodes. Closes #38835
1 parent 60c1dcd commit 88489a3

File tree

5 files changed

+621
-2
lines changed

5 files changed

+621
-2
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,8 @@ static void validate(
243243
Settings leaderSettings = filter(leaderIndex.getSettings());
244244
Settings followerSettings = filter(followIndex.getSettings());
245245
if (leaderSettings.equals(followerSettings) == false) {
246-
throw new IllegalArgumentException("the leader and follower index settings must be identical");
246+
throw new IllegalArgumentException("the leader index setting[" + leaderSettings + "] and follower index settings [" +
247+
followerSettings + "] must be identical");
247248
}
248249

249250
// Validates if the current follower mapping is mergable with the leader mapping.
@@ -456,6 +457,11 @@ static Settings filter(Settings originalSettings) {
456457
settings.remove(IndexMetaData.SETTING_INDEX_PROVIDED_NAME);
457458
settings.remove(IndexMetaData.SETTING_CREATION_DATE);
458459

460+
// Follower index may be upgraded, while the leader index hasn't been upgraded, so it is expected
461+
// that these settings are different:
462+
settings.remove(IndexMetaData.SETTING_VERSION_UPGRADED);
463+
settings.remove(IndexMetaData.SETTING_VERSION_UPGRADED_STRING);
464+
459465
Iterator<String> iterator = settings.keys().iterator();
460466
while (iterator.hasNext()) {
461467
String key = iterator.next();

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowActionTests.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,10 @@ public void testValidation() throws IOException {
150150
.put("index.analysis.analyzer.my_analyzer.type", "custom")
151151
.put("index.analysis.analyzer.my_analyzer.tokenizer", "standard").build(), customMetaData);
152152
Exception e = expectThrows(IllegalArgumentException.class, () -> validate(request, leaderIMD, followIMD, UUIDs, null));
153-
assertThat(e.getMessage(), equalTo("the leader and follower index settings must be identical"));
153+
assertThat(e.getMessage(), equalTo("the leader index setting[{\"index.analysis.analyzer.my_analyzer.tokenizer\"" +
154+
":\"whitespace\",\"index.analysis.analyzer.my_analyzer.type\":\"custom\",\"index.number_of_shards\":\"5\"}] " +
155+
"and follower index settings [{\"index.analysis.analyzer.my_analyzer.tokenizer\":\"standard\"," +
156+
"\"index.analysis.analyzer.my_analyzer.type\":\"custom\",\"index.number_of_shards\":\"5\"}] must be identical"));
154157
}
155158
{
156159
// should fail because the following index does not have the following_index settings
@@ -242,6 +245,21 @@ public void testDynamicIndexSettingsAreClassified() {
242245
}
243246
}
244247

248+
public void testFilter() {
249+
Settings.Builder settings = Settings.builder();
250+
settings.put(CcrSettings.CCR_FOLLOWING_INDEX_SETTING.getKey(), "");
251+
settings.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "");
252+
settings.put(IndexMetaData.SETTING_INDEX_VERSION_CREATED.getKey(), "");
253+
settings.put(IndexMetaData.SETTING_INDEX_UUID, "");
254+
settings.put(IndexMetaData.SETTING_INDEX_PROVIDED_NAME, "");
255+
settings.put(IndexMetaData.SETTING_CREATION_DATE, "");
256+
settings.put(IndexMetaData.SETTING_VERSION_UPGRADED, "");
257+
settings.put(IndexMetaData.SETTING_VERSION_UPGRADED_STRING, "");
258+
259+
Settings result = TransportResumeFollowAction.filter(settings.build());
260+
assertThat(result.size(), equalTo(0));
261+
}
262+
245263
private static IndexMetaData createIMD(String index,
246264
int numberOfShards,
247265
Settings settings,
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
import org.elasticsearch.gradle.Version
2+
import org.elasticsearch.gradle.test.RestIntegTestTask
3+
4+
apply plugin: 'elasticsearch.standalone-test'
5+
6+
dependencies {
7+
// "org.elasticsearch.plugin:x-pack-core:${version}" doesn't work with idea because the testArtifacts are also here
8+
testCompile project(path: xpackModule('core'), configuration: 'default')
9+
testCompile project(path: xpackModule('core'), configuration: 'testArtifacts') // to be moved in a later commit
10+
}
11+
12+
// This is a top level task which we will add dependencies to below.
13+
// It is a single task that can be used to backcompat tests against all versions.
14+
task bwcTest {
15+
description = 'Runs backwards compatibility tests.'
16+
group = 'verification'
17+
}
18+
19+
for (Version version : bwcVersions.wireCompatible) {
20+
String taskPrefix = "v${version}"
21+
22+
// ============================================================================================
23+
// Create leader cluster
24+
// ============================================================================================
25+
26+
RestIntegTestTask leaderClusterTest = tasks.create(name: "${taskPrefix}#leader#clusterTest", type: RestIntegTestTask) {
27+
mustRunAfter(precommit)
28+
}
29+
30+
configure(extensions.findByName("${taskPrefix}#leader#clusterTestCluster")) {
31+
bwcVersion = version
32+
numBwcNodes = 3
33+
numNodes = 3
34+
clusterName = 'leader'
35+
setting 'xpack.security.enabled', 'false'
36+
setting 'xpack.monitoring.enabled', 'false'
37+
setting 'xpack.ml.enabled', 'false'
38+
setting 'xpack.watcher.enabled', 'false'
39+
setting 'xpack.license.self_generated.type', 'trial'
40+
}
41+
42+
Task leaderClusterTestRunner = tasks.getByName("${taskPrefix}#leader#clusterTestRunner")
43+
leaderClusterTestRunner.configure {
44+
systemProperty 'tests.rest.upgrade_state', 'none'
45+
systemProperty 'tests.rest.cluster_name', 'leader'
46+
47+
systemProperty 'tests.leader_host', "${-> leaderClusterTest.nodes.get(0).httpUri()}"
48+
systemProperty 'tests.leader_remote_cluster_seed', "${-> leaderClusterTest.nodes.get(0).transportUri()}"
49+
}
50+
51+
// ============================================================================================
52+
// Create follower cluster
53+
// ============================================================================================
54+
55+
RestIntegTestTask followerClusterTest = tasks.create(name: "${taskPrefix}#follower#clusterTest", type: RestIntegTestTask) {
56+
mustRunAfter(precommit)
57+
}
58+
59+
configure(extensions.findByName("${taskPrefix}#follower#clusterTestCluster")) {
60+
dependsOn leaderClusterTestRunner
61+
bwcVersion = version
62+
numBwcNodes = 3
63+
numNodes = 3
64+
clusterName = 'follower'
65+
setting 'xpack.security.enabled', 'false'
66+
setting 'xpack.monitoring.enabled', 'false'
67+
setting 'xpack.ml.enabled', 'false'
68+
setting 'xpack.watcher.enabled', 'false'
69+
setting 'xpack.license.self_generated.type', 'trial'
70+
}
71+
72+
Task followerClusterTestRunner = tasks.getByName("${taskPrefix}#follower#clusterTestRunner")
73+
followerClusterTestRunner.configure {
74+
systemProperty 'tests.rest.upgrade_state', 'none'
75+
systemProperty 'tests.rest.cluster_name', 'follower'
76+
77+
systemProperty 'tests.leader_host', "${-> leaderClusterTest.nodes.get(0).httpUri()}"
78+
systemProperty 'tests.leader_remote_cluster_seed', "${-> leaderClusterTest.nodes.get(0).transportUri()}"
79+
80+
systemProperty 'tests.follower_host', "${-> followerClusterTest.nodes.get(0).httpUri()}"
81+
systemProperty 'tests.follower_remote_cluster_seed', "${-> followerClusterTest.nodes.get(0).transportUri()}"
82+
}
83+
84+
// ============================================================================================
85+
// Upgrade follower cluster
86+
// ============================================================================================
87+
88+
Closure configureUpgradeCluster = {String prefix, String cluster, String name, Task lastRunner, int stopNode,
89+
RestIntegTestTask clusterTest, Closure getOtherUnicastHostAddresses ->
90+
configure(extensions.findByName("${prefix}#${cluster}#${name}")) {
91+
dependsOn lastRunner, "${prefix}#${cluster}#clusterTestCluster#node${stopNode}.stop"
92+
clusterName = cluster
93+
otherUnicastHostAddresses = { getOtherUnicastHostAddresses() }
94+
minimumMasterNodes = { 2 }
95+
autoSetInitialMasterNodes = false
96+
/* Override the data directory so the new node always gets the node we
97+
* just stopped's data directory. */
98+
dataDir = { nodeNumber -> clusterTest.nodes[stopNode].dataDir }
99+
setting 'repositories.url.allowed_urls', 'http://snapshot.test*'
100+
setting 'xpack.security.enabled', 'false'
101+
setting 'xpack.monitoring.enabled', 'false'
102+
setting 'xpack.ml.enabled', 'false'
103+
setting 'xpack.watcher.enabled', 'false'
104+
setting 'xpack.license.self_generated.type', 'trial'
105+
setting 'node.name', "upgraded-node-${cluster}-${stopNode}"
106+
setting 'node.attr.upgraded', 'true'
107+
}
108+
}
109+
110+
Task followerOneThirdUpgradedTest = tasks.create(name: "${taskPrefix}#follower#oneThirdUpgradedTest", type: RestIntegTestTask)
111+
112+
configureUpgradeCluster(taskPrefix, 'follower', 'oneThirdUpgradedTestCluster', followerClusterTestRunner, 0, followerClusterTest,
113+
// Use all running nodes as seed nodes so there is no race between pinging and the tests
114+
{ [followerClusterTest.nodes.get(1).transportUri(), followerClusterTest.nodes.get(2).transportUri()] })
115+
116+
Task followerOneThirdUpgradedTestRunner = tasks.getByName("${taskPrefix}#follower#oneThirdUpgradedTestRunner")
117+
followerOneThirdUpgradedTestRunner.configure {
118+
systemProperty 'tests.rest.upgrade_state', 'one_third'
119+
systemProperty 'tests.rest.cluster_name', 'follower'
120+
121+
systemProperty 'tests.follower_host', "${-> followerClusterTest.nodes.get(1).httpUri()}"
122+
systemProperty 'tests.follower_remote_cluster_seed', "${-> followerClusterTest.nodes.get(1).transportUri()}"
123+
124+
systemProperty 'tests.leader_host', "${-> leaderClusterTest.nodes.get(0).httpUri()}"
125+
systemProperty 'tests.leader_remote_cluster_seed', "${-> leaderClusterTest.nodes.get(0).transportUri()}"
126+
127+
finalizedBy "${taskPrefix}#follower#clusterTestCluster#node1.stop"
128+
}
129+
130+
Task followerTwoThirdsUpgradedTest = tasks.create(name: "${taskPrefix}#follower#twoThirdsUpgradedTest", type: RestIntegTestTask)
131+
132+
configureUpgradeCluster(taskPrefix, 'follower', 'twoThirdsUpgradedTestCluster', followerOneThirdUpgradedTestRunner, 1, followerClusterTest,
133+
// Use all running nodes as seed nodes so there is no race between pinging and the tests
134+
{ [followerClusterTest.nodes.get(2).transportUri(), followerOneThirdUpgradedTest.nodes.get(0).transportUri()] })
135+
136+
Task followerTwoThirdsUpgradedTestRunner = tasks.getByName("${taskPrefix}#follower#twoThirdsUpgradedTestRunner")
137+
followerTwoThirdsUpgradedTestRunner.configure {
138+
systemProperty 'tests.rest.upgrade_state', 'two_third'
139+
systemProperty 'tests.rest.cluster_name', 'follower'
140+
141+
systemProperty 'tests.follower_host', "${-> followerClusterTest.nodes.get(2).httpUri()}"
142+
systemProperty 'tests.follower_remote_cluster_seed', "${-> followerClusterTest.nodes.get(2).transportUri()}"
143+
144+
systemProperty 'tests.leader_host', "${-> leaderClusterTest.nodes.get(0).httpUri()}"
145+
systemProperty 'tests.leader_remote_cluster_seed', "${-> leaderClusterTest.nodes.get(0).transportUri()}"
146+
147+
finalizedBy "${taskPrefix}#follower#clusterTestCluster#node2.stop"
148+
}
149+
150+
Task followerUpgradedClusterTest = tasks.create(name: "${taskPrefix}#follower#upgradedClusterTest", type: RestIntegTestTask)
151+
152+
configureUpgradeCluster(taskPrefix, 'follower', 'upgradedClusterTestCluster', followerTwoThirdsUpgradedTestRunner, 2, followerClusterTest,
153+
// Use all running nodes as seed nodes so there is no race between pinging and the tests
154+
{ [followerOneThirdUpgradedTest.nodes.get(0).transportUri(), followerTwoThirdsUpgradedTest.nodes.get(0).transportUri()] })
155+
156+
Task followerUpgradedClusterTestRunner = tasks.getByName("${taskPrefix}#follower#upgradedClusterTestRunner")
157+
followerUpgradedClusterTestRunner.configure {
158+
systemProperty 'tests.rest.upgrade_state', 'all'
159+
systemProperty 'tests.rest.cluster_name', 'follower'
160+
161+
systemProperty 'tests.follower_host', "${-> followerOneThirdUpgradedTest.nodes.get(0).httpUri()}"
162+
systemProperty 'tests.follower_remote_cluster_seed', "${-> followerOneThirdUpgradedTest.nodes.get(0).transportUri()}"
163+
164+
systemProperty 'tests.leader_host', "${-> leaderClusterTest.nodes.get(0).httpUri()}"
165+
systemProperty 'tests.leader_remote_cluster_seed', "${-> leaderClusterTest.nodes.get(0).transportUri()}"
166+
167+
// This is needed, otherwise leader node 0 will stop after the leaderClusterTestRunner task has run.
168+
// Here it is ok to stop, because in the next task, the leader node 0 gets upgraded.
169+
finalizedBy "v${version}#leader#clusterTestCluster#node0.stop"
170+
}
171+
172+
// ============================================================================================
173+
// Upgrade leader cluster
174+
// ============================================================================================
175+
176+
Task leaderOneThirdUpgradedTest = tasks.create(name: "${taskPrefix}#leader#oneThirdUpgradedTest", type: RestIntegTestTask)
177+
178+
configureUpgradeCluster(taskPrefix, 'leader', 'oneThirdUpgradedTestCluster', followerUpgradedClusterTestRunner, 0, leaderClusterTest,
179+
// Use all running nodes as seed nodes so there is no race between pinging and the tests
180+
{ [leaderClusterTest.nodes.get(1).transportUri(), leaderClusterTest.nodes.get(2).transportUri()] })
181+
182+
Task leaderOneThirdUpgradedTestRunner = tasks.getByName("${taskPrefix}#leader#oneThirdUpgradedTestRunner")
183+
leaderOneThirdUpgradedTestRunner.configure {
184+
systemProperty 'tests.rest.upgrade_state', 'one_third'
185+
systemProperty 'tests.rest.cluster_name', 'leader'
186+
187+
systemProperty 'tests.follower_host', "${-> followerUpgradedClusterTest.nodes.get(0).httpUri()}"
188+
systemProperty 'tests.follower_remote_cluster_seed', "${-> followerUpgradedClusterTest.nodes.get(0).transportUri()}"
189+
190+
systemProperty 'tests.leader_host', "${-> leaderClusterTest.nodes.get(2).httpUri()}"
191+
systemProperty 'tests.leader_remote_cluster_seed', "${-> leaderClusterTest.nodes.get(2).transportUri()}"
192+
193+
finalizedBy "${taskPrefix}#leader#clusterTestCluster#node1.stop"
194+
}
195+
196+
Task leaderTwoThirdsUpgradedTest = tasks.create(name: "${taskPrefix}#leader#twoThirdsUpgradedTest", type: RestIntegTestTask)
197+
198+
configureUpgradeCluster(taskPrefix, 'leader', 'twoThirdsUpgradedTestCluster', leaderOneThirdUpgradedTestRunner, 1, leaderClusterTest,
199+
// Use all running nodes as seed nodes so there is no race between pinging and the tests
200+
{ [leaderClusterTest.nodes.get(2).transportUri(), leaderOneThirdUpgradedTest.nodes.get(0).transportUri()] })
201+
202+
Task leaderTwoThirdsUpgradedTestRunner = tasks.getByName("${taskPrefix}#leader#twoThirdsUpgradedTestRunner")
203+
leaderTwoThirdsUpgradedTestRunner.configure {
204+
systemProperty 'tests.rest.upgrade_state', 'two_third'
205+
systemProperty 'tests.rest.cluster_name', 'leader'
206+
207+
systemProperty 'tests.follower_host', "${-> followerUpgradedClusterTest.nodes.get(0).httpUri()}"
208+
systemProperty 'tests.follower_remote_cluster_seed', "${-> followerUpgradedClusterTest.nodes.get(0).transportUri()}"
209+
210+
systemProperty 'tests.leader_host', "${-> leaderOneThirdUpgradedTest.nodes.get(0).httpUri()}"
211+
systemProperty 'tests.leader_remote_cluster_seed', "${-> leaderOneThirdUpgradedTest.nodes.get(0).transportUri()}"
212+
213+
finalizedBy "${taskPrefix}#leader#clusterTestCluster#node2.stop"
214+
}
215+
216+
Task leaderUpgradedClusterTest = tasks.create(name: "${taskPrefix}#leader#upgradedClusterTest", type: RestIntegTestTask)
217+
218+
configureUpgradeCluster(taskPrefix, 'leader', "upgradedClusterTestCluster", leaderTwoThirdsUpgradedTestRunner, 2, leaderClusterTest,
219+
// Use all running nodes as seed nodes so there is no race between pinging and the tests
220+
{ [leaderOneThirdUpgradedTest.nodes.get(0).transportUri(), leaderTwoThirdsUpgradedTest.nodes.get(0).transportUri()] })
221+
222+
Task leaderUpgradedClusterTestRunner = tasks.getByName("${taskPrefix}#leader#upgradedClusterTestRunner")
223+
leaderUpgradedClusterTestRunner.configure {
224+
systemProperty 'tests.rest.upgrade_state', 'all'
225+
systemProperty 'tests.rest.cluster_name', 'leader'
226+
227+
systemProperty 'tests.follower_host', "${-> followerUpgradedClusterTest.nodes.get(0).httpUri()}"
228+
systemProperty 'tests.follower_remote_cluster_seed', "${-> followerUpgradedClusterTest.nodes.get(0).transportUri()}"
229+
230+
systemProperty 'tests.leader_host', "${-> leaderTwoThirdsUpgradedTest.nodes.get(0).httpUri()}"
231+
systemProperty 'tests.leader_remote_cluster_seed', "${-> leaderTwoThirdsUpgradedTest.nodes.get(0).transportUri()}"
232+
233+
/*
234+
* Force stopping all the upgraded nodes after the test runner
235+
* so they are alive during the test.
236+
*/
237+
finalizedBy "${taskPrefix}#follower#oneThirdUpgradedTestCluster#stop"
238+
finalizedBy "${taskPrefix}#follower#twoThirdsUpgradedTestCluster#stop"
239+
finalizedBy "${taskPrefix}#follower#upgradedClusterTestCluster#stop"
240+
finalizedBy "${taskPrefix}#leader#oneThirdUpgradedTestCluster#stop"
241+
finalizedBy "${taskPrefix}#leader#twoThirdsUpgradedTestCluster#stop"
242+
}
243+
244+
if (project.bwc_tests_enabled) {
245+
Task versionBwcTest = tasks.create(name: "${taskPrefix}#bwcTest") {
246+
dependsOn = [leaderUpgradedClusterTest]
247+
}
248+
bwcTest.dependsOn(versionBwcTest)
249+
}
250+
}
251+
252+
unitTest.enabled = false // no unit tests for rolling upgrades, only the rest integration test
253+
254+
// basic integ tests includes testing bwc against the most recent version
255+
task integTest {
256+
if (project.bwc_tests_enabled) {
257+
for (final def version : bwcVersions.unreleasedWireCompatible) {
258+
dependsOn "v${version}#bwcTest"
259+
}
260+
}
261+
}
262+
check.dependsOn(integTest)

0 commit comments

Comments
 (0)