Skip to content

Commit cec6678

Browse files
authored
Use peer recovery retention leases for indices without soft-deletes (#50351)
Today, the replica allocator uses peer recovery retention leases to select the best-matched copies when allocating replicas of indices with soft-deletes. We can employ this mechanism for indices without soft-deletes because the retaining sequence number of a PRRL is the persisted global checkpoint (plus one) of that copy. If the primary and replica have the same retaining sequence number, then we should be able to perform a noop recovery. The reason is that we must be retaining translog up to the local checkpoint of the safe commit, which is at most the global checkpoint of either copy). The only limitation is that we might not cancel ongoing file-based recoveries with PRRLs for noop recoveries. We can't make the translog retention policy comply with PRRLs. We also have this problem with soft-deletes if a PRRL is about to expire. Relates #45136 Relates #46959
1 parent 40ef785 commit cec6678

File tree

11 files changed

+69
-100
lines changed

11 files changed

+69
-100
lines changed

qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1278,7 +1278,7 @@ public void testOperationBasedRecovery() throws Exception {
12781278
}
12791279
}
12801280
flush(index, true);
1281-
ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index);
1281+
ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index, false);
12821282
// less than 10% of the committed docs (see IndexSetting#FILE_BASED_RECOVERY_THRESHOLD_SETTING).
12831283
int uncommittedDocs = randomIntBetween(0, (int) (committedDocs * 0.1));
12841284
for (int i = 0; i < uncommittedDocs; i++) {
@@ -1288,6 +1288,7 @@ public void testOperationBasedRecovery() throws Exception {
12881288
} else {
12891289
ensureGreen(index);
12901290
assertNoFileBasedRecovery(index, n -> true);
1291+
ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index, true);
12911292
}
12921293
}
12931294

@@ -1312,6 +1313,7 @@ public void testTurnOffTranslogRetentionAfterUpgraded() throws Exception {
13121313
ensureGreen(index);
13131314
flush(index, true);
13141315
assertEmptyTranslog(index);
1316+
ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index, true);
13151317
}
13161318
}
13171319
}

qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,7 @@ public void testOperationBasedRecovery() throws Exception {
695695
ensureGreen(index);
696696
indexDocs(index, 0, randomIntBetween(100, 200));
697697
flush(index, randomBoolean());
698-
ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index);
698+
ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index, false);
699699
// uncommitted docs must be less than 10% of committed docs (see IndexSetting#FILE_BASED_RECOVERY_THRESHOLD_SETTING).
700700
indexDocs(index, randomIntBetween(0, 100), randomIntBetween(0, 3));
701701
} else {
@@ -705,6 +705,9 @@ public void testOperationBasedRecovery() throws Exception {
705705
|| nodeName.startsWith(CLUSTER_NAME + "-0")
706706
|| (nodeName.startsWith(CLUSTER_NAME + "-1") && Booleans.parseBoolean(System.getProperty("tests.first_round")) == false));
707707
indexDocs(index, randomIntBetween(0, 100), randomIntBetween(0, 3));
708+
if (CLUSTER_TYPE == ClusterType.UPGRADED) {
709+
ensurePeerRecoveryRetentionLeasesRenewedAndSynced(index, true);
710+
}
708711
}
709712
}
710713

server/src/main/java/org/elasticsearch/index/IndexService.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -820,9 +820,7 @@ private void maybeSyncGlobalCheckpoints() {
820820
}
821821

822822
private void syncRetentionLeases() {
823-
if (indexSettings.isSoftDeleteEnabled()) {
824-
sync(IndexShard::syncRetentionLeases, "retention lease");
825-
}
823+
sync(IndexShard::syncRetentionLeases, "retention lease");
826824
}
827825

828826
private void sync(final Consumer<IndexShard> sync, final String source) {

server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -895,10 +895,12 @@ public ReplicationTracker(
895895
this.pendingInSync = new HashSet<>();
896896
this.routingTable = null;
897897
this.replicationGroup = null;
898-
this.hasAllPeerRecoveryRetentionLeases = indexSettings.isSoftDeleteEnabled() &&
899-
(indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_6_0) ||
900-
(indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_4_0) &&
901-
indexSettings.getIndexMetaData().getState() == IndexMetaData.State.OPEN));
898+
this.hasAllPeerRecoveryRetentionLeases = indexSettings.getIndexVersionCreated().onOrAfter(Version.V_8_0_0)
899+
|| (indexSettings.isSoftDeleteEnabled() &&
900+
(indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_6_0) ||
901+
(indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_4_0) &&
902+
indexSettings.getIndexMetaData().getState() == IndexMetaData.State.OPEN)));
903+
902904
this.fileBasedRecoveryThreshold = IndexSettings.FILE_BASED_RECOVERY_THRESHOLD_SETTING.get(indexSettings.getSettings());
903905
this.safeCommitInfoSupplier = safeCommitInfoSupplier;
904906
assert Version.V_EMPTY.equals(indexSettings.getIndexVersionCreated()) == false;
@@ -994,10 +996,7 @@ public synchronized void activatePrimaryMode(final long localCheckpoint) {
994996
updateLocalCheckpoint(shardAllocationId, checkpoints.get(shardAllocationId), localCheckpoint);
995997
updateGlobalCheckpointOnPrimary();
996998

997-
if (indexSettings.isSoftDeleteEnabled()) {
998-
addPeerRecoveryRetentionLeaseForSolePrimary();
999-
}
1000-
999+
addPeerRecoveryRetentionLeaseForSolePrimary();
10011000
assert invariant();
10021001
}
10031002

@@ -1358,7 +1357,7 @@ public synchronized boolean hasAllPeerRecoveryRetentionLeases() {
13581357
* prior to {@link Version#V_7_4_0} that does not create peer-recovery retention leases.
13591358
*/
13601359
public synchronized void createMissingPeerRecoveryRetentionLeases(ActionListener<Void> listener) {
1361-
if (indexSettings().isSoftDeleteEnabled() && hasAllPeerRecoveryRetentionLeases == false) {
1360+
if (hasAllPeerRecoveryRetentionLeases == false) {
13621361
final List<ShardRouting> shardRoutings = routingTable.assignedShards();
13631362
final GroupedActionListener<ReplicationResponse> groupedActionListener = new GroupedActionListener<>(ActionListener.wrap(vs -> {
13641363
setHasAllPeerRecoveryRetentionLeases();

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,10 +1892,10 @@ boolean shouldRollTranslogGeneration() {
18921892
public void onSettingsChanged() {
18931893
Engine engineOrNull = getEngineOrNull();
18941894
if (engineOrNull != null) {
1895-
final boolean useRetentionLeasesInPeerRecovery = this.useRetentionLeasesInPeerRecovery;
1895+
final boolean disableTranslogRetention = indexSettings.isSoftDeleteEnabled() && useRetentionLeasesInPeerRecovery;
18961896
engineOrNull.onSettingsChanged(
1897-
useRetentionLeasesInPeerRecovery ? TimeValue.MINUS_ONE : indexSettings.getTranslogRetentionAge(),
1898-
useRetentionLeasesInPeerRecovery ? new ByteSizeValue(-1) : indexSettings.getTranslogRetentionSize(),
1897+
disableTranslogRetention ? TimeValue.MINUS_ONE : indexSettings.getTranslogRetentionAge(),
1898+
disableTranslogRetention ? new ByteSizeValue(-1) : indexSettings.getTranslogRetentionSize(),
18991899
indexSettings.getSoftDeleteRetentionOperations()
19001900
);
19011901
}
@@ -2224,7 +2224,6 @@ public boolean assertRetentionLeasesPersisted() throws IOException {
22242224
public void syncRetentionLeases() {
22252225
assert assertPrimaryMode();
22262226
verifyNotClosed();
2227-
ensureSoftDeletesEnabled("retention leases");
22282227
replicationTracker.renewPeerRecoveryRetentionLeases();
22292228
final Tuple<Boolean, RetentionLeases> retentionLeases = getRetentionLeases(true);
22302229
if (retentionLeases.v1()) {
@@ -2619,7 +2618,7 @@ public RetentionLease addPeerRecoveryRetentionLease(String nodeId, long globalCh
26192618
ActionListener<ReplicationResponse> listener) {
26202619
assert assertPrimaryMode();
26212620
// only needed for BWC reasons involving rolling upgrades from versions that do not support PRRLs:
2622-
assert indexSettings.getIndexVersionCreated().before(Version.V_7_4_0);
2621+
assert indexSettings.getIndexVersionCreated().before(Version.V_7_4_0) || indexSettings.isSoftDeleteEnabled() == false;
26232622
return replicationTracker.addPeerRecoveryRetentionLease(nodeId, globalCheckpoint, listener);
26242623
}
26252624

server/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,12 @@ public void recoverToTarget(ActionListener<RecoveryResponse> listener) {
165165
throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node");
166166
}
167167
assert targetShardRouting.initializing() : "expected recovery target to be initializing but was " + targetShardRouting;
168-
retentionLeaseRef.set(softDeletesEnabled ? shard.getRetentionLeases().get(
169-
ReplicationTracker.getPeerRecoveryRetentionLeaseId(targetShardRouting)) : null);
168+
retentionLeaseRef.set(
169+
shard.getRetentionLeases().get(ReplicationTracker.getPeerRecoveryRetentionLeaseId(targetShardRouting)));
170170
}, shardId + " validating recovery target ["+ request.targetAllocationId() + "] registered ",
171171
shard, cancellableThreads, logger);
172172
final Engine.HistorySource historySource;
173-
if (shard.useRetentionLeasesInPeerRecovery() || retentionLeaseRef.get() != null) {
173+
if (softDeletesEnabled && (shard.useRetentionLeasesInPeerRecovery() || retentionLeaseRef.get() != null)) {
174174
historySource = Engine.HistorySource.INDEX;
175175
} else {
176176
historySource = Engine.HistorySource.TRANSLOG;
@@ -190,7 +190,7 @@ && isTargetSameHistory()
190190
// Also it's pretty cheap when soft deletes are enabled, and it'd be a disaster if we tried a sequence-number-based recovery
191191
// without having a complete history.
192192

193-
if (isSequenceNumberBasedRecovery && retentionLeaseRef.get() != null) {
193+
if (isSequenceNumberBasedRecovery && softDeletesEnabled && retentionLeaseRef.get() != null) {
194194
// all the history we need is retained by an existing retention lease, so we do not need a separate retention lock
195195
retentionLock.close();
196196
logger.trace("history is retained by {}", retentionLeaseRef.get());
@@ -209,7 +209,7 @@ && isTargetSameHistory()
209209
if (isSequenceNumberBasedRecovery) {
210210
logger.trace("performing sequence numbers based recovery. starting at [{}]", request.startingSeqNo());
211211
startingSeqNo = request.startingSeqNo();
212-
if (softDeletesEnabled && retentionLeaseRef.get() == null) {
212+
if (retentionLeaseRef.get() == null) {
213213
createRetentionLease(startingSeqNo, ActionListener.map(sendFileStep, ignored -> SendFileResult.EMPTY));
214214
} else {
215215
sendFileStep.onResponse(SendFileResult.EMPTY);
@@ -251,36 +251,24 @@ && isTargetSameHistory()
251251
});
252252

253253
final StepListener<ReplicationResponse> deleteRetentionLeaseStep = new StepListener<>();
254-
if (softDeletesEnabled) {
255-
runUnderPrimaryPermit(() -> {
256-
try {
257-
// If the target previously had a copy of this shard then a file-based recovery might move its global
258-
// checkpoint backwards. We must therefore remove any existing retention lease so that we can create a
259-
// new one later on in the recovery.
260-
shard.removePeerRecoveryRetentionLease(request.targetNode().getId(),
261-
new ThreadedActionListener<>(logger, shard.getThreadPool(), ThreadPool.Names.GENERIC,
262-
deleteRetentionLeaseStep, false));
263-
} catch (RetentionLeaseNotFoundException e) {
264-
logger.debug("no peer-recovery retention lease for " + request.targetAllocationId());
265-
deleteRetentionLeaseStep.onResponse(null);
266-
}
267-
}, shardId + " removing retention leaes for [" + request.targetAllocationId() + "]",
268-
shard, cancellableThreads, logger);
269-
} else {
270-
deleteRetentionLeaseStep.onResponse(null);
271-
}
254+
runUnderPrimaryPermit(() -> {
255+
try {
256+
// If the target previously had a copy of this shard then a file-based recovery might move its global
257+
// checkpoint backwards. We must therefore remove any existing retention lease so that we can create a
258+
// new one later on in the recovery.
259+
shard.removePeerRecoveryRetentionLease(request.targetNode().getId(),
260+
new ThreadedActionListener<>(logger, shard.getThreadPool(), ThreadPool.Names.GENERIC,
261+
deleteRetentionLeaseStep, false));
262+
} catch (RetentionLeaseNotFoundException e) {
263+
logger.debug("no peer-recovery retention lease for " + request.targetAllocationId());
264+
deleteRetentionLeaseStep.onResponse(null);
265+
}
266+
}, shardId + " removing retention lease for [" + request.targetAllocationId() + "]",
267+
shard, cancellableThreads, logger);
272268

273269
deleteRetentionLeaseStep.whenComplete(ignored -> {
274270
assert Transports.assertNotTransportThread(RecoverySourceHandler.this + "[phase1]");
275-
276-
final Consumer<ActionListener<RetentionLease>> createRetentionLeaseAsync;
277-
if (softDeletesEnabled) {
278-
createRetentionLeaseAsync = l -> createRetentionLease(startingSeqNo, l);
279-
} else {
280-
createRetentionLeaseAsync = l -> l.onResponse(null);
281-
}
282-
283-
phase1(safeCommitRef.getIndexCommit(), createRetentionLeaseAsync, () -> estimateNumOps, sendFileStep);
271+
phase1(safeCommitRef.getIndexCommit(), startingSeqNo, () -> estimateNumOps, sendFileStep);
284272
}, onFailure);
285273

286274
} catch (final Exception e) {
@@ -451,8 +439,7 @@ static final class SendFileResult {
451439
* segments that are missing. Only segments that have the same size and
452440
* checksum can be reused
453441
*/
454-
void phase1(IndexCommit snapshot, Consumer<ActionListener<RetentionLease>> createRetentionLease,
455-
IntSupplier translogOps, ActionListener<SendFileResult> listener) {
442+
void phase1(IndexCommit snapshot, long startingSeqNo, IntSupplier translogOps, ActionListener<SendFileResult> listener) {
456443
cancellableThreads.checkForCancel();
457444
final Store store = shard.store();
458445
try {
@@ -526,7 +513,7 @@ void phase1(IndexCommit snapshot, Consumer<ActionListener<RetentionLease>> creat
526513
sendFileInfoStep.whenComplete(r ->
527514
sendFiles(store, phase1Files.toArray(new StoreFileMetaData[0]), translogOps, sendFilesStep), listener::onFailure);
528515

529-
sendFilesStep.whenComplete(r -> createRetentionLease.accept(createRetentionLeaseStep), listener::onFailure);
516+
sendFilesStep.whenComplete(r -> createRetentionLease(startingSeqNo, createRetentionLeaseStep), listener::onFailure);
530517

531518
createRetentionLeaseStep.whenComplete(retentionLease ->
532519
{
@@ -554,7 +541,7 @@ void phase1(IndexCommit snapshot, Consumer<ActionListener<RetentionLease>> creat
554541

555542
// but we must still create a retention lease
556543
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
557-
createRetentionLease.accept(createRetentionLeaseStep);
544+
createRetentionLease(startingSeqNo, createRetentionLeaseStep);
558545
createRetentionLeaseStep.whenComplete(retentionLease -> {
559546
final TimeValue took = stopWatch.totalTime();
560547
logger.trace("recovery [phase1]: took [{}]", took);
@@ -590,7 +577,8 @@ private void createRetentionLease(final long startingSeqNo, ActionListener<Reten
590577
// it's possible that the primary has no retention lease yet if we are doing a rolling upgrade from a version before
591578
// 7.4, and in that case we just create a lease using the local checkpoint of the safe commit which we're using for
592579
// recovery as a conservative estimate for the global checkpoint.
593-
assert shard.indexSettings().getIndexVersionCreated().before(Version.V_7_4_0);
580+
assert shard.indexSettings().getIndexVersionCreated().before(Version.V_7_4_0)
581+
|| shard.indexSettings().isSoftDeleteEnabled() == false;
594582
final StepListener<ReplicationResponse> addRetentionLeaseStep = new StepListener<>();
595583
final long estimatedGlobalCheckpoint = startingSeqNo - 1;
596584
final RetentionLease newLease = shard.addPeerRecoveryRetentionLease(request.targetNode().getId(),

server/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorIT.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public void testPreferCopyCanPerformNoopRecovery() throws Exception {
7878
assertAcked(
7979
client().admin().indices().prepareCreate(indexName)
8080
.setSettings(Settings.builder()
81-
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
81+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean())
8282
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
8383
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)
8484
.put(IndexSettings.FILE_BASED_RECOVERY_THRESHOLD_SETTING.getKey(), 1.0f)
@@ -211,7 +211,7 @@ public void testFullClusterRestartPerformNoopRecovery() throws Exception {
211211
assertAcked(
212212
client().admin().indices().prepareCreate(indexName)
213213
.setSettings(Settings.builder()
214-
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
214+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean())
215215
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
216216
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), randomIntBetween(10, 100) + "kb")
217217
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, numOfReplicas)
@@ -248,7 +248,7 @@ public void testPreferCopyWithHighestMatchingOperations() throws Exception {
248248
assertAcked(
249249
client().admin().indices().prepareCreate(indexName)
250250
.setSettings(Settings.builder()
251-
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
251+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean())
252252
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
253253
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), randomIntBetween(10, 100) + "kb")
254254
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)
@@ -329,7 +329,7 @@ public void testPeerRecoveryForClosedIndices() throws Exception {
329329
createIndex(indexName, Settings.builder()
330330
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
331331
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
332-
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
332+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean())
333333
.put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "100ms")
334334
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "100ms")
335335
.build());

0 commit comments

Comments
 (0)