Skip to content

Commit eae6361

Browse files
authored
Always rebuild checkpoint tracker for old indices (#46340)
The max_seq_no of Lucene commit of the old indices (before 6.6.2) can be smaller than seq_no of some documents in the commit (see #38879). Although we fixed this bug in 6.6.2 and 7.0.0, a problematic index commit can still affect the newer version after a rolling upgrade or full cluster restart. In particular, if a FollowingEngine (or an internal engine with MSU enabled) restores from a problematic commit, then it can apply MSU optimization for existing documents. The symptom that we see here is the local checkpoint tracker assertion is violated. Closes #46311 Relates #38879
1 parent c7326d2 commit eae6361

File tree

2 files changed

+63
-3
lines changed

2 files changed

+63
-3
lines changed

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,10 +261,14 @@ private static LocalCheckpointTracker createLocalCheckpointTracker(EngineConfig
261261
// Thus, we need to restore the LocalCheckpointTracker bit by bit to ensure the consistency between LocalCheckpointTracker and
262262
// Lucene index. This is not the only solution since we can bootstrap max_seq_no_of_updates with max_seq_no of the commit to
263263
// disable the MSU optimization during recovery. Here we prefer to maintain the consistency of LocalCheckpointTracker.
264-
if (localCheckpoint < maxSeqNo && engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
264+
// The max_seq_no of Lucene commit in the old indices might be smaller than seq_no of some documents in the commit.
265+
// We have to rebuild the LocalCheckpointTracker for those indices. See https://github.com/elastic/elasticsearch/pull/38879.
266+
// Note that this bug affects only indices created between 6.5.0 and 6.6.1 with soft-deletes is explicitly enabled.
267+
final boolean mustRebuild = engineConfig.getIndexSettings().getIndexVersionCreated().before(Version.V_6_6_2);
268+
if (engineConfig.getIndexSettings().isSoftDeleteEnabled() && (localCheckpoint < maxSeqNo || mustRebuild)) {
265269
try (Searcher searcher = searcherSupplier.get()) {
266-
Lucene.scanSeqNosInReader(searcher.getDirectoryReader(), localCheckpoint + 1, maxSeqNo,
267-
tracker::markSeqNoAsCompleted);
270+
final long toSeqNo = mustRebuild ? Long.MAX_VALUE : maxSeqNo;
271+
Lucene.scanSeqNosInReader(searcher.getDirectoryReader(), localCheckpoint + 1, toSeqNo, tracker::markSeqNoAsCompleted);
268272
}
269273
}
270274
return tracker;

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@
132132
import org.elasticsearch.index.translog.TranslogConfig;
133133
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
134134
import org.elasticsearch.test.IndexSettingsModule;
135+
import org.elasticsearch.test.VersionUtils;
135136
import org.hamcrest.MatcherAssert;
136137
import org.hamcrest.Matchers;
137138

@@ -5632,6 +5633,61 @@ public void testRebuildLocalCheckpointTracker() throws Exception {
56325633
}
56335634
}
56345635

5636+
/**
5637+
* Simulate a bug in https://github.com/elastic/elasticsearch/pull/38879 where the max_seq_no
5638+
* of the index commit can be smaller than seq_no of some documents in the commit.
5639+
*/
5640+
public void testAlwaysRebuildLocalCheckpointForOldIndex() throws Exception {
5641+
Settings.Builder settings = Settings.builder()
5642+
.put(defaultSettings.getSettings())
5643+
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_6_5_0, Version.V_6_6_1))
5644+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true);
5645+
final IndexMetaData indexMetaData = IndexMetaData.builder(defaultSettings.getIndexMetaData()).settings(settings).build();
5646+
final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(indexMetaData);
5647+
Path translogPath = createTempDir();
5648+
List<Engine.Operation> operations = generateHistoryOnReplica(between(1, 500), randomBoolean(), randomBoolean(), randomBoolean());
5649+
final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED);
5650+
try (Store store = createStore()) {
5651+
EngineConfig config = config(indexSettings, store, translogPath, NoMergePolicy.INSTANCE, null, null, globalCheckpoint::get);
5652+
final List<DocIdSeqNoAndTerm> docs;
5653+
try (InternalEngine engine = createEngine(config)) {
5654+
for (Engine.Operation op : operations) {
5655+
applyOperation(engine, op);
5656+
if (randomInt(100) < 10) {
5657+
engine.flush();
5658+
globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), engine.getLocalCheckpoint()));
5659+
}
5660+
}
5661+
globalCheckpoint.set(randomLongBetween(globalCheckpoint.get(), engine.getLocalCheckpoint()));
5662+
engine.syncTranslog();
5663+
docs = getDocIds(engine, true);
5664+
}
5665+
trimUnsafeCommits(config);
5666+
// Simulate a bug in https://github.com/elastic/elasticsearch/pull/38879 where max_seq_no is smaller than seq_no of some docs.
5667+
if (randomBoolean()) {
5668+
IndexWriterConfig iwc = new IndexWriterConfig(null)
5669+
.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
5670+
.setCommitOnClose(false)
5671+
.setMergePolicy(NoMergePolicy.INSTANCE)
5672+
.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
5673+
try (IndexWriter writer = new IndexWriter(config.getStore().directory(), iwc)) {
5674+
Map<String, String> userData = new HashMap<>();
5675+
writer.getLiveCommitData().forEach(e -> userData.put(e.getKey(), e.getValue()));
5676+
SequenceNumbers.CommitInfo commitInfo = SequenceNumbers.loadSeqNoInfoFromLuceneCommit(userData.entrySet());
5677+
long maxSeqNo = randomLongBetween(commitInfo.localCheckpoint, commitInfo.maxSeqNo);
5678+
userData.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(maxSeqNo));
5679+
writer.setLiveCommitData(userData.entrySet());
5680+
writer.commit();
5681+
}
5682+
}
5683+
try (InternalEngine engine = new InternalEngine(config)) {
5684+
engine.reinitializeMaxSeqNoOfUpdatesOrDeletes();
5685+
engine.recoverFromTranslog(translogHandler, Long.MAX_VALUE);
5686+
assertThat(getDocIds(engine, true), equalTo(docs));
5687+
}
5688+
}
5689+
}
5690+
56355691
public void testOpenSoftDeletesIndexWithSoftDeletesDisabled() throws Exception {
56365692
try (Store store = createStore()) {
56375693
Path translogPath = createTempDir();

0 commit comments

Comments
 (0)