Skip to content

Commit a3bfd27

Browse files
committed
Roll translog generation on primary promotion
When a primary is promoted, rolling the translog generation here makes simpler reasoning about the relationship between primary terms and translog generation. Note that this is not strictly necessary for correctness (e.g., to avoid duplicate operations with the same sequence number within a single generation). Relates #27313
1 parent a6ee653 commit a3bfd27

File tree

2 files changed

+50
-1
lines changed

2 files changed

+50
-1
lines changed

core/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,12 @@ public void updateShardState(final ShardRouting newRouting,
487487
* subsequently fails before the primary/replica re-sync completes successfully and we are now being
488488
* promoted, the local checkpoint tracker here could be left in a state where it would re-issue sequence
489489
* numbers. To ensure that this is not the case, we restore the state of the local checkpoint tracker by
490-
* replaying the translog and marking any operations there are completed.
490+
* replaying the translog and marking any operations there are completed. Rolling the translog generation is
491+
* not strictly needed here (as we will never have collisions between sequence numbers in a translog
492+
* generation in a new primary as it takes the last known sequence number as a starting point), but it
493+
* simplifies reasoning about the relationship between primary terms and translog generations.
491494
*/
495+
getEngine().rollTranslogGeneration();
492496
getEngine().restoreLocalCheckpointFromTranslog();
493497
getEngine().fillSeqNoGaps(newPrimaryTerm);
494498
getEngine().seqNoService().updateLocalCheckpointForShard(currentRouting.allocationId().getId(),

core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,51 @@ public void onFailure(Exception e) {
505505
closeShards(indexShard);
506506
}
507507

508+
public void testPrimaryPromotionRollsGeneration() throws Exception {
509+
final IndexShard indexShard = newStartedShard(false);
510+
511+
final long currentTranslogGeneration = indexShard.getTranslog().getGeneration().translogFileGeneration;
512+
513+
// promote the replica
514+
final ShardRouting replicaRouting = indexShard.routingEntry();
515+
final ShardRouting primaryRouting =
516+
newShardRouting(
517+
replicaRouting.shardId(),
518+
replicaRouting.currentNodeId(),
519+
null,
520+
true,
521+
ShardRoutingState.STARTED,
522+
replicaRouting.allocationId());
523+
indexShard.updateShardState(primaryRouting, indexShard.getPrimaryTerm() + 1, (shard, listener) -> {},
524+
0L, Collections.singleton(primaryRouting.allocationId().getId()),
525+
new IndexShardRoutingTable.Builder(primaryRouting.shardId()).addShard(primaryRouting).build(), Collections.emptySet());
526+
527+
/*
528+
* This operation completing means that the delay operation executed as part of increasing the primary term has completed and the
529+
* gaps are filled.
530+
*/
531+
final CountDownLatch latch = new CountDownLatch(1);
532+
indexShard.acquirePrimaryOperationPermit(
533+
new ActionListener<Releasable>() {
534+
@Override
535+
public void onResponse(Releasable releasable) {
536+
releasable.close();
537+
latch.countDown();
538+
}
539+
540+
@Override
541+
public void onFailure(Exception e) {
542+
throw new RuntimeException(e);
543+
}
544+
},
545+
ThreadPool.Names.GENERIC);
546+
547+
latch.await();
548+
assertThat(indexShard.getTranslog().getGeneration().translogFileGeneration, equalTo(currentTranslogGeneration + 1));
549+
550+
closeShards(indexShard);
551+
}
552+
508553
public void testOperationPermitsOnPrimaryShards() throws InterruptedException, ExecutionException, IOException {
509554
final ShardId shardId = new ShardId("test", "_na_", 0);
510555
final IndexShard indexShard;

0 commit comments

Comments
 (0)