Skip to content

Commit 3498a47

Browse files
authored
Use Lucene exclusively for metadata storage (#50144)
This moves metadata persistence to Lucene for all node types. It also reenables BWC and adds an interoperability layer for upgrades from prior versions. This commit disables a number of tests related to dangling indices and command-line tools. Those will be addressed in follow-ups. Relates #48701
1 parent 1606804 commit 3498a47

File tree

15 files changed

+134
-286
lines changed

15 files changed

+134
-286
lines changed

build.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ task verifyVersions {
205205
* after the backport of the backcompat code is complete.
206206
*/
207207

208-
boolean bwc_tests_enabled = false
209-
final String bwc_tests_disabled_issue = "https://github.com/elastic/elasticsearch/issues/48701" /* place a PR link here when committing bwc changes */
208+
boolean bwc_tests_enabled = true
209+
final String bwc_tests_disabled_issue = "" /* place a PR link here when committing bwc changes */
210210
if (bwc_tests_enabled == false) {
211211
if (bwc_tests_disabled_issue.isEmpty()) {
212212
throw new GradleException("bwc_tests_disabled_issue must be set when bwc_tests_enabled == false")

qa/os/src/test/java/org/elasticsearch/packaging/test/ArchiveTests.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.packaging.util.ServerUtils;
2828
import org.elasticsearch.packaging.util.Shell.Result;
2929
import org.junit.BeforeClass;
30+
import org.junit.Ignore;
3031

3132
import java.nio.file.Files;
3233
import java.nio.file.Path;
@@ -382,6 +383,7 @@ public void test92ElasticsearchNodeCliPackaging() throws Exception {
382383
}
383384
}
384385

386+
@Ignore("https://github.com/elastic/elasticsearch/issues/48701") // TODO unsafe bootstrapping
385387
public void test93ElasticsearchNodeCustomDataPathAndNotEsHomeWorkDir() throws Exception {
386388
Path relativeDataPath = installation.data.relativize(installation.home);
387389
append(installation.config("elasticsearch.yml"), "path.data: " + relativeDataPath);

server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java

Lines changed: 9 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,22 @@
2020
package org.elasticsearch.gateway;
2121

2222
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
23-
import org.apache.logging.log4j.LogManager;
24-
import org.apache.logging.log4j.Logger;
2523
import org.apache.lucene.util.SetOnce;
2624
import org.elasticsearch.ElasticsearchException;
2725
import org.elasticsearch.Version;
28-
import org.elasticsearch.cluster.ClusterChangedEvent;
2926
import org.elasticsearch.cluster.ClusterName;
3027
import org.elasticsearch.cluster.ClusterState;
31-
import org.elasticsearch.cluster.ClusterStateApplier;
3228
import org.elasticsearch.cluster.coordination.CoordinationState.PersistedState;
3329
import org.elasticsearch.cluster.coordination.InMemoryPersistedState;
3430
import org.elasticsearch.cluster.metadata.IndexMetaData;
3531
import org.elasticsearch.cluster.metadata.IndexTemplateMetaData;
36-
import org.elasticsearch.cluster.metadata.Manifest;
3732
import org.elasticsearch.cluster.metadata.MetaData;
3833
import org.elasticsearch.cluster.metadata.MetaDataIndexUpgradeService;
3934
import org.elasticsearch.cluster.node.DiscoveryNode;
4035
import org.elasticsearch.cluster.service.ClusterService;
4136
import org.elasticsearch.common.collect.ImmutableOpenMap;
42-
import org.elasticsearch.common.collect.Tuple;
4337
import org.elasticsearch.common.settings.Settings;
44-
import org.elasticsearch.common.unit.TimeValue;
4538
import org.elasticsearch.core.internal.io.IOUtils;
46-
import org.elasticsearch.index.Index;
4739
import org.elasticsearch.plugins.MetaDataUpgrader;
4840
import org.elasticsearch.transport.TransportService;
4941

@@ -65,7 +57,6 @@
6557
* non-stale state, and master-ineligible nodes receive the real cluster state from the elected master after joining the cluster.
6658
*/
6759
public class GatewayMetaState implements Closeable {
68-
private static final Logger logger = LogManager.getLogger(GatewayMetaState.class);
6960

7061
// Set by calling start()
7162
private final SetOnce<PersistedState> persistedState = new SetOnce<>();
@@ -81,45 +72,23 @@ public MetaData getMetaData() {
8172
}
8273

8374
public void start(Settings settings, TransportService transportService, ClusterService clusterService,
84-
MetaStateService metaStateService, MetaDataIndexUpgradeService metaDataIndexUpgradeService,
75+
MetaDataIndexUpgradeService metaDataIndexUpgradeService,
8576
MetaDataUpgrader metaDataUpgrader, LucenePersistedStateFactory lucenePersistedStateFactory) {
8677
assert persistedState.get() == null : "should only start once, but already have " + persistedState.get();
8778

88-
if (DiscoveryNode.isMasterNode(settings)) {
79+
if (DiscoveryNode.isMasterNode(settings) || DiscoveryNode.isDataNode(settings)) {
8980
try {
9081
persistedState.set(lucenePersistedStateFactory.loadPersistedState((version, metadata) ->
9182
prepareInitialClusterState(transportService, clusterService,
9283
ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings))
9384
.version(version)
94-
.metaData(upgradeMetaDataForMasterEligibleNode(metadata, metaDataIndexUpgradeService, metaDataUpgrader))
95-
.build())));
85+
.metaData(upgradeMetaDataForNode(metadata, metaDataIndexUpgradeService, metaDataUpgrader))
86+
.build()))
87+
);
9688
} catch (IOException e) {
9789
throw new ElasticsearchException("failed to load metadata", e);
9890
}
99-
}
100-
101-
if (DiscoveryNode.isDataNode(settings)) {
102-
final Tuple<Manifest, ClusterState> manifestClusterStateTuple;
103-
try {
104-
upgradeMetaData(settings, metaStateService, metaDataIndexUpgradeService, metaDataUpgrader);
105-
manifestClusterStateTuple = loadStateAndManifest(ClusterName.CLUSTER_NAME_SETTING.get(settings), metaStateService);
106-
} catch (IOException e) {
107-
throw new ElasticsearchException("failed to load metadata", e);
108-
}
109-
110-
final IncrementalClusterStateWriter incrementalClusterStateWriter
111-
= new IncrementalClusterStateWriter(settings, clusterService.getClusterSettings(), metaStateService,
112-
manifestClusterStateTuple.v1(),
113-
prepareInitialClusterState(transportService, clusterService, manifestClusterStateTuple.v2()),
114-
transportService.getThreadPool()::relativeTimeInMillis);
115-
116-
clusterService.addLowPriorityApplier(new GatewayClusterApplier(incrementalClusterStateWriter));
117-
118-
if (DiscoveryNode.isMasterNode(settings) == false) {
119-
persistedState.set(
120-
new InMemoryPersistedState(manifestClusterStateTuple.v1().getCurrentTerm(), manifestClusterStateTuple.v2()));
121-
}
122-
} else if (DiscoveryNode.isMasterNode(settings) == false) {
91+
} else {
12392
persistedState.set(
12493
new InMemoryPersistedState(0L, ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).build()));
12594
}
@@ -138,76 +107,12 @@ ClusterState prepareInitialClusterState(TransportService transportService, Clust
138107
}
139108

140109
// exposed so it can be overridden by tests
141-
MetaData upgradeMetaDataForMasterEligibleNode(MetaData metaData,
142-
MetaDataIndexUpgradeService metaDataIndexUpgradeService,
143-
MetaDataUpgrader metaDataUpgrader) {
110+
MetaData upgradeMetaDataForNode(MetaData metaData,
111+
MetaDataIndexUpgradeService metaDataIndexUpgradeService,
112+
MetaDataUpgrader metaDataUpgrader) {
144113
return upgradeMetaData(metaData, metaDataIndexUpgradeService, metaDataUpgrader);
145114
}
146115

147-
// exposed so it can be overridden by tests
148-
void upgradeMetaData(Settings settings, MetaStateService metaStateService, MetaDataIndexUpgradeService metaDataIndexUpgradeService,
149-
MetaDataUpgrader metaDataUpgrader) throws IOException {
150-
if (isMasterOrDataNode(settings)) {
151-
try {
152-
final Tuple<Manifest, MetaData> metaStateAndData = metaStateService.loadFullState();
153-
final Manifest manifest = metaStateAndData.v1();
154-
final MetaData metaData = metaStateAndData.v2();
155-
156-
// We finished global state validation and successfully checked all indices for backward compatibility
157-
// and found no non-upgradable indices, which means the upgrade can continue.
158-
// Now it's safe to overwrite global and index metadata.
159-
// We don't re-write metadata if it's not upgraded by upgrade plugins, because
160-
// if there is manifest file, it means metadata is properly persisted to all data paths
161-
// if there is no manifest file (upgrade from 6.x to 7.x) metadata might be missing on some data paths,
162-
// but anyway we will re-write it as soon as we receive first ClusterState
163-
final IncrementalClusterStateWriter.AtomicClusterStateWriter writer
164-
= new IncrementalClusterStateWriter.AtomicClusterStateWriter(metaStateService, manifest);
165-
final MetaData upgradedMetaData = upgradeMetaData(metaData, metaDataIndexUpgradeService, metaDataUpgrader);
166-
167-
final long globalStateGeneration;
168-
if (MetaData.isGlobalStateEquals(metaData, upgradedMetaData) == false) {
169-
globalStateGeneration = writer.writeGlobalState("upgrade", upgradedMetaData);
170-
} else {
171-
globalStateGeneration = manifest.getGlobalGeneration();
172-
}
173-
174-
Map<Index, Long> indices = new HashMap<>(manifest.getIndexGenerations());
175-
for (IndexMetaData indexMetaData : upgradedMetaData) {
176-
if (metaData.hasIndexMetaData(indexMetaData) == false) {
177-
final long generation = writer.writeIndex("upgrade", indexMetaData);
178-
indices.put(indexMetaData.getIndex(), generation);
179-
}
180-
}
181-
182-
final Manifest newManifest = new Manifest(manifest.getCurrentTerm(), manifest.getClusterStateVersion(),
183-
globalStateGeneration, indices);
184-
writer.writeManifestAndCleanup("startup", newManifest);
185-
} catch (Exception e) {
186-
logger.error("failed to read or upgrade local state, exiting...", e);
187-
throw e;
188-
}
189-
}
190-
}
191-
192-
private static Tuple<Manifest,ClusterState> loadStateAndManifest(ClusterName clusterName,
193-
MetaStateService metaStateService) throws IOException {
194-
final long startNS = System.nanoTime();
195-
final Tuple<Manifest, MetaData> manifestAndMetaData = metaStateService.loadFullState();
196-
final Manifest manifest = manifestAndMetaData.v1();
197-
198-
final ClusterState clusterState = ClusterState.builder(clusterName)
199-
.version(manifest.getClusterStateVersion())
200-
.metaData(manifestAndMetaData.v2()).build();
201-
202-
logger.debug("took {} to load state", TimeValue.timeValueMillis(TimeValue.nsecToMSec(System.nanoTime() - startNS)));
203-
204-
return Tuple.tuple(manifest, clusterState);
205-
}
206-
207-
private static boolean isMasterOrDataNode(Settings settings) {
208-
return DiscoveryNode.isMasterNode(settings) || DiscoveryNode.isDataNode(settings);
209-
}
210-
211116
/**
212117
* Elasticsearch 2.0 removed several deprecated features and as well as support for Lucene 3.x. This method calls
213118
* {@link MetaDataIndexUpgradeService} to makes sure that indices are compatible with the current version. The
@@ -262,36 +167,4 @@ public void close() throws IOException {
262167
IOUtils.close(persistedState.get());
263168
}
264169

265-
private static class GatewayClusterApplier implements ClusterStateApplier {
266-
267-
private final IncrementalClusterStateWriter incrementalClusterStateWriter;
268-
269-
private GatewayClusterApplier(IncrementalClusterStateWriter incrementalClusterStateWriter) {
270-
this.incrementalClusterStateWriter = incrementalClusterStateWriter;
271-
}
272-
273-
@Override
274-
public void applyClusterState(ClusterChangedEvent event) {
275-
if (event.state().blocks().disableStatePersistence()) {
276-
incrementalClusterStateWriter.setIncrementalWrite(false);
277-
return;
278-
}
279-
280-
try {
281-
// Hack: This is to ensure that non-master-eligible Zen2 nodes always store a current term
282-
// that's higher than the last accepted term.
283-
// TODO: can we get rid of this hack?
284-
if (event.state().term() > incrementalClusterStateWriter.getPreviousManifest().getCurrentTerm()) {
285-
incrementalClusterStateWriter.setCurrentTerm(event.state().term());
286-
}
287-
288-
incrementalClusterStateWriter.updateClusterState(event.state());
289-
incrementalClusterStateWriter.setIncrementalWrite(true);
290-
} catch (WriteStateException e) {
291-
logger.warn("Exception occurred when storing new meta data", e);
292-
}
293-
}
294-
295-
}
296-
297170
}

server/src/main/java/org/elasticsearch/gateway/LucenePersistedStateFactory.java

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@
5050
import org.elasticsearch.cluster.ClusterState;
5151
import org.elasticsearch.cluster.coordination.CoordinationState;
5252
import org.elasticsearch.cluster.metadata.IndexMetaData;
53+
import org.elasticsearch.cluster.metadata.Manifest;
5354
import org.elasticsearch.cluster.metadata.MetaData;
5455
import org.elasticsearch.common.CheckedConsumer;
56+
import org.elasticsearch.common.collect.Tuple;
5557
import org.elasticsearch.common.io.stream.ReleasableBytesStreamOutput;
5658
import org.elasticsearch.common.lease.Releasable;
5759
import org.elasticsearch.common.logging.Loggers;
@@ -125,11 +127,43 @@ public class LucenePersistedStateFactory {
125127
private final NodeEnvironment nodeEnvironment;
126128
private final NamedXContentRegistry namedXContentRegistry;
127129
private final BigArrays bigArrays;
130+
private final LegacyLoader legacyLoader;
128131

129-
public LucenePersistedStateFactory(NodeEnvironment nodeEnvironment, NamedXContentRegistry namedXContentRegistry, BigArrays bigArrays) {
132+
/**
133+
* Allows interacting with legacy metadata
134+
*/
135+
public interface LegacyLoader {
136+
/**
137+
* Loads legacy state
138+
*/
139+
Tuple<Manifest, MetaData> loadClusterState() throws IOException;
140+
141+
/**
142+
* Cleans legacy state
143+
*/
144+
void clean() throws IOException;
145+
}
146+
147+
LucenePersistedStateFactory(NodeEnvironment nodeEnvironment, NamedXContentRegistry namedXContentRegistry, BigArrays bigArrays) {
148+
this(nodeEnvironment, namedXContentRegistry, bigArrays, new LegacyLoader() {
149+
@Override
150+
public Tuple<Manifest, MetaData> loadClusterState() {
151+
return new Tuple<>(Manifest.empty(), MetaData.EMPTY_META_DATA);
152+
}
153+
154+
@Override
155+
public void clean() {
156+
157+
}
158+
});
159+
}
160+
161+
public LucenePersistedStateFactory(NodeEnvironment nodeEnvironment, NamedXContentRegistry namedXContentRegistry, BigArrays bigArrays,
162+
LegacyLoader legacyLoader) {
130163
this.nodeEnvironment = nodeEnvironment;
131164
this.namedXContentRegistry = namedXContentRegistry;
132165
this.bigArrays = bigArrays;
166+
this.legacyLoader = legacyLoader;
133167
}
134168

135169
CoordinationState.PersistedState loadPersistedState(BiFunction<Long, MetaData, ClusterState> clusterStateFromMetaData)
@@ -172,6 +206,7 @@ CoordinationState.PersistedState loadPersistedState(BiFunction<Long, MetaData, C
172206
success = false;
173207
try {
174208
lucenePersistedState.persistInitialState();
209+
legacyLoader.clean();
175210
success = true;
176211
return lucenePersistedState;
177212
} finally {
@@ -205,10 +240,12 @@ private OnDiskState(String nodeId, Path dataPath, long currentTerm, long lastAcc
205240
}
206241
}
207242

243+
private static final OnDiskState NO_ON_DISK_STATE = new OnDiskState(null, null, 0L, 0L, MetaData.EMPTY_META_DATA);
244+
208245
private OnDiskState loadBestOnDiskState() throws IOException {
209246
String committedClusterUuid = null;
210247
Path committedClusterUuidPath = null;
211-
OnDiskState bestOnDiskState = new OnDiskState(null, null, 0L, 0L, MetaData.EMPTY_META_DATA);
248+
OnDiskState bestOnDiskState = NO_ON_DISK_STATE;
212249
OnDiskState maxCurrentTermOnDiskState = bestOnDiskState;
213250

214251
// We use a write-all-read-one strategy: metadata is written to every data path when accepting it, which means it is mostly
@@ -243,7 +280,8 @@ private OnDiskState loadBestOnDiskState() throws IOException {
243280

244281
long acceptedTerm = onDiskState.metaData.coordinationMetaData().term();
245282
long maxAcceptedTerm = bestOnDiskState.metaData.coordinationMetaData().term();
246-
if (acceptedTerm > maxAcceptedTerm
283+
if (bestOnDiskState == NO_ON_DISK_STATE
284+
|| acceptedTerm > maxAcceptedTerm
247285
|| (acceptedTerm == maxAcceptedTerm
248286
&& (onDiskState.lastAcceptedVersion > bestOnDiskState.lastAcceptedVersion
249287
|| (onDiskState.lastAcceptedVersion == bestOnDiskState.lastAcceptedVersion)
@@ -262,6 +300,15 @@ private OnDiskState loadBestOnDiskState() throws IOException {
262300
"] with greater term [" + maxCurrentTermOnDiskState.currentTerm + "]");
263301
}
264302

303+
if (bestOnDiskState == NO_ON_DISK_STATE) {
304+
assert Version.CURRENT.major <= Version.V_7_0_0.major + 1 : "legacy metadata loader is not needed anymore from v9 onwards";
305+
final Tuple<Manifest, MetaData> legacyState = legacyLoader.loadClusterState();
306+
if (legacyState.v1().isEmpty() == false) {
307+
return new OnDiskState(nodeEnvironment.nodeId(), null, legacyState.v1().getCurrentTerm(),
308+
legacyState.v1().getClusterStateVersion(), legacyState.v2());
309+
}
310+
}
311+
265312
return bestOnDiskState;
266313
}
267314

0 commit comments

Comments
 (0)