Skip to content

Commit 2a46cf7

Browse files
committed
Roll translog generation on primary promotion
When a primary is promoted, rolling the translog generation here makes simpler reasoning about the relationship between primary terms and translog generation. Note that this is not strictly necessary for correctness (e.g., to avoid duplicate operations with the same sequence number within a single generation). Relates #27313
1 parent 875c9f3 commit 2a46cf7

File tree

2 files changed

+50
-1
lines changed

2 files changed

+50
-1
lines changed

core/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,12 @@ public void updateShardState(final ShardRouting newRouting,
472472
* subsequently fails before the primary/replica re-sync completes successfully and we are now being
473473
* promoted, the local checkpoint tracker here could be left in a state where it would re-issue sequence
474474
* numbers. To ensure that this is not the case, we restore the state of the local checkpoint tracker by
475-
* replaying the translog and marking any operations there are completed.
475+
* replaying the translog and marking any operations there are completed. Rolling the translog generation is
476+
* not strictly needed here (as we will never have collisions between sequence numbers in a translog
477+
* generation in a new primary as it takes the last known sequence number as a starting point), but it
478+
* simplifies reasoning about the relationship between primary terms and translog generations.
476479
*/
480+
getEngine().rollTranslogGeneration();
477481
getEngine().restoreLocalCheckpointFromTranslog();
478482
getEngine().fillSeqNoGaps(newPrimaryTerm);
479483
getEngine().seqNoService().updateLocalCheckpointForShard(currentRouting.allocationId().getId(),

core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,51 @@ public void onFailure(Exception e) {
508508
closeShards(indexShard);
509509
}
510510

511+
public void testPrimaryPromotionRollsGeneration() throws Exception {
512+
final IndexShard indexShard = newStartedShard(false);
513+
514+
final long currentTranslogGeneration = indexShard.getTranslog().getGeneration().translogFileGeneration;
515+
516+
// promote the replica
517+
final ShardRouting replicaRouting = indexShard.routingEntry();
518+
final ShardRouting primaryRouting =
519+
newShardRouting(
520+
replicaRouting.shardId(),
521+
replicaRouting.currentNodeId(),
522+
null,
523+
true,
524+
ShardRoutingState.STARTED,
525+
replicaRouting.allocationId());
526+
indexShard.updateShardState(primaryRouting, indexShard.getPrimaryTerm() + 1, (shard, listener) -> {},
527+
0L, Collections.singleton(primaryRouting.allocationId().getId()),
528+
new IndexShardRoutingTable.Builder(primaryRouting.shardId()).addShard(primaryRouting).build(), Collections.emptySet());
529+
530+
/*
531+
* This operation completing means that the delay operation executed as part of increasing the primary term has completed and the
532+
* gaps are filled.
533+
*/
534+
final CountDownLatch latch = new CountDownLatch(1);
535+
indexShard.acquirePrimaryOperationPermit(
536+
new ActionListener<Releasable>() {
537+
@Override
538+
public void onResponse(Releasable releasable) {
539+
releasable.close();
540+
latch.countDown();
541+
}
542+
543+
@Override
544+
public void onFailure(Exception e) {
545+
throw new RuntimeException(e);
546+
}
547+
},
548+
ThreadPool.Names.GENERIC);
549+
550+
latch.await();
551+
assertThat(indexShard.getTranslog().getGeneration().translogFileGeneration, equalTo(currentTranslogGeneration + 1));
552+
553+
closeShards(indexShard);
554+
}
555+
511556
public void testOperationPermitsOnPrimaryShards() throws InterruptedException, ExecutionException, IOException {
512557
final ShardId shardId = new ShardId("test", "_na_", 0);
513558
final IndexShard indexShard;

0 commit comments

Comments
 (0)