Skip to content

Commit 133e34d

Browse files
authored
Distinguish missing and invalid repositories (#85551)
The `RepositoryService` on each node maintains a repository instance for every repository defined in the cluster state. The master validates each repository definition before inserting it into the cluster state, but in some cases this validation is incomplete. For instance, there may be node-local configuration of which the master is unaware which prevents the repository from being instantiated on some other node in the cluster. Today if a repository cannot be instantiated then the node will log a warning and continue as if the repository doesn't exist. This results in a confusing `RepositoryMissingException` when trying to use the repository, and various other surprises (e.g. #85550). With this commit we create a placeholder `InvalidRepository` which reports a more accurate exception when it is used. Relates #82457 which did the same sort of thing for unknown plugins. Closes #85550 since the repository in question is no longer `null`.
1 parent 737eace commit 133e34d

File tree

8 files changed

+467
-6
lines changed

8 files changed

+467
-6
lines changed

docs/changelog/85551.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 85551
2+
summary: "Distinguish missing and invalid repositories"
3+
area: Snapshot/Restore
4+
type: bug
5+
issues: [85550]
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.repositories;
10+
11+
import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesResponse;
12+
import org.elasticsearch.action.admin.cluster.repositories.verify.VerifyRepositoryResponse;
13+
import org.elasticsearch.cluster.metadata.RepositoryMetadata;
14+
import org.elasticsearch.cluster.service.ClusterService;
15+
import org.elasticsearch.common.settings.Setting;
16+
import org.elasticsearch.common.settings.Settings;
17+
import org.elasticsearch.common.util.BigArrays;
18+
import org.elasticsearch.env.Environment;
19+
import org.elasticsearch.indices.recovery.RecoverySettings;
20+
import org.elasticsearch.plugins.Plugin;
21+
import org.elasticsearch.plugins.RepositoryPlugin;
22+
import org.elasticsearch.snapshots.mockstore.MockRepository;
23+
import org.elasticsearch.test.ESIntegTestCase;
24+
import org.elasticsearch.xcontent.NamedXContentRegistry;
25+
26+
import java.util.Arrays;
27+
import java.util.Collection;
28+
import java.util.Collections;
29+
import java.util.List;
30+
import java.util.Map;
31+
32+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
33+
import static org.hamcrest.Matchers.equalTo;
34+
import static org.hamcrest.Matchers.hasSize;
35+
import static org.hamcrest.Matchers.isA;
36+
37+
public class InvalidRepositoryIT extends ESIntegTestCase {
38+
@Override
39+
protected Collection<Class<? extends Plugin>> nodePlugins() {
40+
return Collections.singletonList(UnstableRepository.Plugin.class);
41+
}
42+
43+
public static class UnstableRepository extends MockRepository {
44+
public static final String TYPE = "unstable";
45+
public static final Setting<List<String>> UNSTABLE_NODES = Setting.stringListSetting(
46+
"repository.unstable_nodes",
47+
Setting.Property.NodeScope,
48+
Setting.Property.Dynamic
49+
);
50+
51+
public UnstableRepository(
52+
RepositoryMetadata metadata,
53+
Environment environment,
54+
NamedXContentRegistry namedXContentRegistry,
55+
ClusterService clusterService,
56+
BigArrays bigArrays,
57+
RecoverySettings recoverySettings
58+
) {
59+
super(metadata, environment, namedXContentRegistry, clusterService, bigArrays, recoverySettings);
60+
List<String> unstableNodes = UNSTABLE_NODES.get(metadata.settings());
61+
if (unstableNodes.contains(clusterService.getNodeName())) {
62+
throw new RepositoryException(metadata.name(), "Failed to create repository: current node is not stable");
63+
}
64+
}
65+
66+
public static class Plugin extends org.elasticsearch.plugins.Plugin implements RepositoryPlugin {
67+
@Override
68+
public Map<String, Factory> getRepositories(
69+
Environment env,
70+
NamedXContentRegistry namedXContentRegistry,
71+
ClusterService clusterService,
72+
BigArrays bigArrays,
73+
RecoverySettings recoverySettings
74+
) {
75+
return Collections.singletonMap(
76+
TYPE,
77+
(metadata) -> new UnstableRepository(metadata, env, namedXContentRegistry, clusterService, bigArrays, recoverySettings)
78+
);
79+
}
80+
81+
@Override
82+
public List<Setting<?>> getSettings() {
83+
return List.of(UNSTABLE_NODES);
84+
}
85+
}
86+
}
87+
88+
public void testCreateInvalidRepository() throws Exception {
89+
internalCluster().ensureAtLeastNumDataNodes(2);
90+
final String repositoryName = "test-duplicate-create-repo";
91+
92+
// put repository for the first time: only let master node create repository successfully
93+
createRepository(
94+
repositoryName,
95+
UnstableRepository.TYPE,
96+
Settings.builder()
97+
.put("location", randomRepoPath())
98+
.putList(
99+
UnstableRepository.UNSTABLE_NODES.getKey(),
100+
Arrays.stream(internalCluster().getNodeNames())
101+
.filter(name -> name.equals(internalCluster().getMasterName()) == false)
102+
.toList()
103+
)
104+
);
105+
// verification should fail with some node has InvalidRepository
106+
final var expectedException = expectThrows(
107+
RepositoryVerificationException.class,
108+
() -> client().admin().cluster().prepareVerifyRepository(repositoryName).get()
109+
);
110+
for (Throwable suppressed : expectedException.getSuppressed()) {
111+
Throwable outerCause = suppressed.getCause();
112+
assertThat(outerCause, isA(RepositoryException.class));
113+
assertThat(
114+
outerCause.getMessage(),
115+
equalTo("[" + repositoryName + "] repository type [" + UnstableRepository.TYPE + "] failed to create on current node")
116+
);
117+
Throwable innerCause = suppressed.getCause().getCause().getCause();
118+
assertThat(innerCause, isA(RepositoryException.class));
119+
assertThat(
120+
innerCause.getMessage(),
121+
equalTo("[" + repositoryName + "] Failed to create repository: current node is not stable")
122+
);
123+
}
124+
125+
// restart master
126+
internalCluster().restartNode(internalCluster().getMasterName());
127+
ensureGreen();
128+
129+
// put repository again: let all node can create repository successfully
130+
createRepository(repositoryName, UnstableRepository.TYPE, Settings.builder().put("location", randomRepoPath()));
131+
// verification should succeed with all node create repository successfully
132+
VerifyRepositoryResponse verifyRepositoryResponse = client().admin().cluster().prepareVerifyRepository(repositoryName).get();
133+
assertEquals(verifyRepositoryResponse.getNodes().size(), internalCluster().numDataAndMasterNodes());
134+
135+
}
136+
137+
private void createRepository(String name, String type, Settings.Builder settings) {
138+
// create
139+
assertAcked(client().admin().cluster().preparePutRepository(name).setType(type).setVerify(false).setSettings(settings).get());
140+
// get
141+
final GetRepositoriesResponse updatedGetRepositoriesResponse = client().admin().cluster().prepareGetRepositories(name).get();
142+
// assert
143+
assertThat(updatedGetRepositoriesResponse.repositories(), hasSize(1));
144+
final RepositoryMetadata updatedRepositoryMetadata = updatedGetRepositoriesResponse.repositories().get(0);
145+
assertThat(updatedRepositoryMetadata.type(), equalTo(type));
146+
}
147+
}
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.repositories;
10+
11+
import org.elasticsearch.Version;
12+
import org.elasticsearch.action.ActionListener;
13+
import org.elasticsearch.cluster.ClusterState;
14+
import org.elasticsearch.cluster.ClusterStateUpdateTask;
15+
import org.elasticsearch.cluster.metadata.IndexMetadata;
16+
import org.elasticsearch.cluster.metadata.Metadata;
17+
import org.elasticsearch.cluster.metadata.RepositoryMetadata;
18+
import org.elasticsearch.cluster.node.DiscoveryNode;
19+
import org.elasticsearch.common.component.AbstractLifecycleComponent;
20+
import org.elasticsearch.index.shard.ShardId;
21+
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
22+
import org.elasticsearch.index.store.Store;
23+
import org.elasticsearch.indices.recovery.RecoveryState;
24+
import org.elasticsearch.snapshots.SnapshotId;
25+
26+
import java.io.IOException;
27+
import java.util.Collection;
28+
import java.util.function.Consumer;
29+
import java.util.function.Function;
30+
31+
/**
32+
* Represents a repository that exists in the cluster state but could not be instantiated on a node, typically due to invalid configuration.
33+
*/
34+
public class InvalidRepository extends AbstractLifecycleComponent implements Repository {
35+
36+
private final RepositoryMetadata repositoryMetadata;
37+
private final RepositoryException creationException;
38+
39+
public InvalidRepository(RepositoryMetadata repositoryMetadata, RepositoryException creationException) {
40+
this.repositoryMetadata = repositoryMetadata;
41+
this.creationException = creationException;
42+
}
43+
44+
private RepositoryException createCreationException() {
45+
return new RepositoryException(
46+
repositoryMetadata.name(),
47+
"repository type [" + repositoryMetadata.type() + "] failed to create on current node",
48+
creationException
49+
);
50+
}
51+
52+
@Override
53+
public RepositoryMetadata getMetadata() {
54+
return repositoryMetadata;
55+
}
56+
57+
@Override
58+
public void getSnapshotInfo(GetSnapshotInfoContext context) {
59+
throw createCreationException();
60+
}
61+
62+
@Override
63+
public Metadata getSnapshotGlobalMetadata(SnapshotId snapshotId) {
64+
throw createCreationException();
65+
}
66+
67+
@Override
68+
public IndexMetadata getSnapshotIndexMetaData(RepositoryData repositoryData, SnapshotId snapshotId, IndexId index) throws IOException {
69+
throw createCreationException();
70+
}
71+
72+
@Override
73+
public void getRepositoryData(ActionListener<RepositoryData> listener) {
74+
listener.onFailure(createCreationException());
75+
}
76+
77+
@Override
78+
public void finalizeSnapshot(FinalizeSnapshotContext finalizeSnapshotContext) {
79+
finalizeSnapshotContext.onFailure(createCreationException());
80+
}
81+
82+
@Override
83+
public void deleteSnapshots(
84+
Collection<SnapshotId> snapshotIds,
85+
long repositoryStateId,
86+
Version repositoryMetaVersion,
87+
ActionListener<RepositoryData> listener
88+
) {
89+
listener.onFailure(createCreationException());
90+
}
91+
92+
@Override
93+
public long getSnapshotThrottleTimeInNanos() {
94+
throw createCreationException();
95+
}
96+
97+
@Override
98+
public long getRestoreThrottleTimeInNanos() {
99+
throw createCreationException();
100+
}
101+
102+
@Override
103+
public String startVerification() {
104+
throw createCreationException();
105+
}
106+
107+
@Override
108+
public void endVerification(String verificationToken) {
109+
throw createCreationException();
110+
}
111+
112+
@Override
113+
public void verify(String verificationToken, DiscoveryNode localNode) {
114+
throw createCreationException();
115+
}
116+
117+
@Override
118+
public boolean isReadOnly() {
119+
// this repository is assumed writable to bypass read-only check and fail with exception produced by this class
120+
return false;
121+
}
122+
123+
@Override
124+
public void snapshotShard(SnapshotShardContext snapshotShardContext) {
125+
snapshotShardContext.onFailure(createCreationException());
126+
}
127+
128+
@Override
129+
public void restoreShard(
130+
Store store,
131+
SnapshotId snapshotId,
132+
IndexId indexId,
133+
ShardId snapshotShardId,
134+
RecoveryState recoveryState,
135+
ActionListener<Void> listener
136+
) {
137+
listener.onFailure(createCreationException());
138+
}
139+
140+
@Override
141+
public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, IndexId indexId, ShardId shardId) {
142+
throw createCreationException();
143+
}
144+
145+
@Override
146+
public void updateState(ClusterState state) {
147+
148+
}
149+
150+
@Override
151+
public void executeConsistentStateUpdate(
152+
Function<RepositoryData, ClusterStateUpdateTask> createUpdateTask,
153+
String source,
154+
Consumer<Exception> onFailure
155+
) {
156+
onFailure.accept(createCreationException());
157+
}
158+
159+
@Override
160+
public void cloneShardSnapshot(
161+
SnapshotId source,
162+
SnapshotId target,
163+
RepositoryShardId shardId,
164+
ShardGeneration shardGeneration,
165+
ActionListener<ShardSnapshotResult> listener
166+
) {
167+
listener.onFailure(createCreationException());
168+
}
169+
170+
@Override
171+
public void awaitIdle() {
172+
173+
}
174+
175+
@Override
176+
protected void doStart() {
177+
178+
}
179+
180+
@Override
181+
protected void doStop() {
182+
183+
}
184+
185+
@Override
186+
protected void doClose() throws IOException {
187+
188+
}
189+
}

server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -515,19 +515,20 @@ public void applyClusterState(ClusterChangedEvent event) {
515515
// TODO: this catch is bogus, it means the old repo is already closed,
516516
// but we have nothing to replace it
517517
logger.warn(() -> new ParameterizedMessage("failed to change repository [{}]", repositoryMetadata.name()), ex);
518+
repository = new InvalidRepository(repositoryMetadata, ex);
518519
}
519520
}
520521
} else {
521522
try {
522523
repository = createRepository(repositoryMetadata, typesRegistry, RepositoriesService::createUnknownTypeRepository);
523524
} catch (RepositoryException ex) {
524525
logger.warn(() -> new ParameterizedMessage("failed to create repository [{}]", repositoryMetadata.name()), ex);
526+
repository = new InvalidRepository(repositoryMetadata, ex);
525527
}
526528
}
527-
if (repository != null) {
528-
logger.debug("registering repository [{}]", repositoryMetadata.name());
529-
builder.put(repositoryMetadata.name(), repository);
530-
}
529+
assert repository != null : "repository should not be null here";
530+
logger.debug("registering repository [{}]", repositoryMetadata.name());
531+
builder.put(repositoryMetadata.name(), repository);
531532
}
532533
for (Repository repo : builder.values()) {
533534
repo.updateState(state);

server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
/**
3232
* This class represents a repository that could not be initialized due to unknown type.
33-
* This could happen whe a user creates a snapshot repository using a type from a plugin and then removes the plugin.
33+
* This could happen when a user creates a snapshot repository using a type from a plugin and then removes the plugin.
3434
*/
3535
public class UnknownTypeRepository extends AbstractLifecycleComponent implements Repository {
3636

0 commit comments

Comments
 (0)