Skip to content

Commit 45066b5

Browse files
authored
Verify primary mode usage with assertions (#32667)
Primary terms were introduced as part of the sequence-number effort (#10708) and added in ES 5.0. Subsequent work introduced the replication tracker which lets the primary own its replication group (#25692) to coordinate recovery and replication. The replication tracker explicitly exposes whether it is operating in primary mode or replica mode, independent of the ShardRouting object that's associated with a shard. During a primary relocation, for example, the primary mode is transferred between the primary relocation source and the primary relocation target. After transferring this so-called primary context, the old primary becomes a replication target and the new primary the replication source, reflected in the replication tracker on both nodes. With the most recent PR in this area (#32442), we finally have a clean transition between a shard that's operating as a primary and issuing sequence numbers and a shard that's serving as a replication target. The transition from one state to the other is enforced through the operation-permit system, where we block permit acquisition during such changes and perform the transition under this operation block, ensuring that there are no operations in progress while the transition is being performed. This finally allows us to turn the best-effort checks that were put in place to prevent shards from being used in the wrong way (i.e. primary as replica, or replica as primary) into hard assertions, making it easier to catch any bugs in this area.
1 parent 3ce984d commit 45066b5

File tree

2 files changed

+45
-41
lines changed

2 files changed

+45
-41
lines changed

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,30 +1446,25 @@ private void ensureWriteAllowed(Engine.Operation.Origin origin) throws IllegalIn
14461446
}
14471447
} else {
14481448
if (origin == Engine.Operation.Origin.PRIMARY) {
1449-
verifyPrimary();
1449+
assert assertPrimaryMode();
14501450
} else {
14511451
assert origin == Engine.Operation.Origin.REPLICA;
1452-
verifyReplicationTarget();
1452+
assert assertReplicationTarget();
14531453
}
14541454
if (writeAllowedStates.contains(state) == false) {
14551455
throw new IllegalIndexShardStateException(shardId, state, "operation only allowed when shard state is one of " + writeAllowedStates + ", origin [" + origin + "]");
14561456
}
14571457
}
14581458
}
14591459

1460-
private void verifyPrimary() {
1461-
if (shardRouting.primary() == false) {
1462-
throw new IllegalStateException("shard " + shardRouting + " is not a primary");
1463-
}
1460+
private boolean assertPrimaryMode() {
1461+
assert shardRouting.primary() && replicationTracker.isPrimaryMode() : "shard " + shardRouting + " is not a primary shard in primary mode";
1462+
return true;
14641463
}
14651464

1466-
private void verifyReplicationTarget() {
1467-
final IndexShardState state = state();
1468-
if (shardRouting.primary() && shardRouting.active() && replicationTracker.isPrimaryMode()) {
1469-
// must use exception that is not ignored by replication logic. See TransportActions.isShardNotAvailableException
1470-
throw new IllegalStateException("active primary shard " + shardRouting + " cannot be a replication target before " +
1471-
"relocation hand off, state is [" + state + "]");
1472-
}
1465+
private boolean assertReplicationTarget() {
1466+
assert replicationTracker.isPrimaryMode() == false : "shard " + shardRouting + " in primary mode cannot be a replication target";
1467+
return true;
14731468
}
14741469

14751470
private void verifyNotClosed() throws IllegalIndexShardStateException {
@@ -1716,7 +1711,7 @@ public void writeIndexingBuffer() {
17161711
* @param checkpoint the local checkpoint for the shard
17171712
*/
17181713
public void updateLocalCheckpointForShard(final String allocationId, final long checkpoint) {
1719-
verifyPrimary();
1714+
assert assertPrimaryMode();
17201715
verifyNotClosed();
17211716
replicationTracker.updateLocalCheckpoint(allocationId, checkpoint);
17221717
}
@@ -1728,7 +1723,7 @@ public void updateLocalCheckpointForShard(final String allocationId, final long
17281723
* @param globalCheckpoint the global checkpoint
17291724
*/
17301725
public void updateGlobalCheckpointForShard(final String allocationId, final long globalCheckpoint) {
1731-
verifyPrimary();
1726+
assert assertPrimaryMode();
17321727
verifyNotClosed();
17331728
replicationTracker.updateGlobalCheckpointForShard(allocationId, globalCheckpoint);
17341729
}
@@ -1750,7 +1745,7 @@ public void waitForOpsToComplete(final long seqNo) throws InterruptedException {
17501745
* @param allocationId the allocation ID of the shard for which recovery was initiated
17511746
*/
17521747
public void initiateTracking(final String allocationId) {
1753-
verifyPrimary();
1748+
assert assertPrimaryMode();
17541749
replicationTracker.initiateTracking(allocationId);
17551750
}
17561751

@@ -1763,7 +1758,7 @@ public void initiateTracking(final String allocationId) {
17631758
* @param localCheckpoint the current local checkpoint on the shard
17641759
*/
17651760
public void markAllocationIdAsInSync(final String allocationId, final long localCheckpoint) throws InterruptedException {
1766-
verifyPrimary();
1761+
assert assertPrimaryMode();
17671762
replicationTracker.markAllocationIdAsInSync(allocationId, localCheckpoint);
17681763
}
17691764

@@ -1798,7 +1793,7 @@ public long getLastSyncedGlobalCheckpoint() {
17981793
* @return a map from allocation ID to the local knowledge of the global checkpoint for that allocation ID
17991794
*/
18001795
public ObjectLongMap<String> getInSyncGlobalCheckpoints() {
1801-
verifyPrimary();
1796+
assert assertPrimaryMode();
18021797
verifyNotClosed();
18031798
return replicationTracker.getInSyncGlobalCheckpoints();
18041799
}
@@ -1808,11 +1803,12 @@ public ObjectLongMap<String> getInSyncGlobalCheckpoints() {
18081803
* primary.
18091804
*/
18101805
public void maybeSyncGlobalCheckpoint(final String reason) {
1811-
verifyPrimary();
18121806
verifyNotClosed();
1807+
assert shardRouting.primary() : "only call maybeSyncGlobalCheckpoint on primary shard";
18131808
if (replicationTracker.isPrimaryMode() == false) {
18141809
return;
18151810
}
1811+
assert assertPrimaryMode();
18161812
// only sync if there are not operations in flight
18171813
final SeqNoStats stats = getEngine().getSeqNoStats(replicationTracker.getGlobalCheckpoint());
18181814
if (stats.getMaxSeqNo() == stats.getGlobalCheckpoint()) {
@@ -1838,7 +1834,7 @@ public void maybeSyncGlobalCheckpoint(final String reason) {
18381834
* @return the replication group
18391835
*/
18401836
public ReplicationGroup getReplicationGroup() {
1841-
verifyPrimary();
1837+
assert assertPrimaryMode();
18421838
verifyNotClosed();
18431839
return replicationTracker.getReplicationGroup();
18441840
}
@@ -1850,7 +1846,7 @@ public ReplicationGroup getReplicationGroup() {
18501846
* @param reason the reason the global checkpoint was updated
18511847
*/
18521848
public void updateGlobalCheckpointOnReplica(final long globalCheckpoint, final String reason) {
1853-
verifyReplicationTarget();
1849+
assert assertReplicationTarget();
18541850
final long localCheckpoint = getLocalCheckpoint();
18551851
if (globalCheckpoint > localCheckpoint) {
18561852
/*
@@ -1877,8 +1873,7 @@ assert state() != IndexShardState.POST_RECOVERY && state() != IndexShardState.ST
18771873
* @param primaryContext the sequence number context
18781874
*/
18791875
public void activateWithPrimaryContext(final ReplicationTracker.PrimaryContext primaryContext) {
1880-
verifyPrimary();
1881-
assert shardRouting.isRelocationTarget() : "only relocation target can update allocation IDs from primary context: " + shardRouting;
1876+
assert shardRouting.primary() && shardRouting.isRelocationTarget() : "only primary relocation target can update allocation IDs from primary context: " + shardRouting;
18821877
assert primaryContext.getCheckpointStates().containsKey(routingEntry().allocationId().getId()) &&
18831878
getLocalCheckpoint() == primaryContext.getCheckpointStates().get(routingEntry().allocationId().getId()).getLocalCheckpoint();
18841879
synchronized (mutex) {
@@ -1892,7 +1887,7 @@ public void activateWithPrimaryContext(final ReplicationTracker.PrimaryContext p
18921887
* @return {@code true} if there is at least one shard pending in-sync, otherwise false
18931888
*/
18941889
public boolean pendingInSync() {
1895-
verifyPrimary();
1890+
assert assertPrimaryMode();
18961891
return replicationTracker.pendingInSync();
18971892
}
18981893

@@ -2209,7 +2204,7 @@ private EngineConfig newEngineConfig() {
22092204
*/
22102205
public void acquirePrimaryOperationPermit(ActionListener<Releasable> onPermitAcquired, String executorOnDelay, Object debugInfo) {
22112206
verifyNotClosed();
2212-
verifyPrimary();
2207+
assert shardRouting.primary() : "acquirePrimaryOperationPermit should only be called on primary shard: " + shardRouting;
22132208

22142209
indexShardOperationPermits.acquire(onPermitAcquired, executorOnDelay, false, debugInfo);
22152210
}
@@ -2259,7 +2254,6 @@ public void acquireReplicaOperationPermit(final long opPrimaryTerm, final long g
22592254
final ActionListener<Releasable> onPermitAcquired, final String executorOnDelay,
22602255
final Object debugInfo) {
22612256
verifyNotClosed();
2262-
verifyReplicationTarget();
22632257
if (opPrimaryTerm > pendingPrimaryTerm) {
22642258
synchronized (mutex) {
22652259
if (opPrimaryTerm > pendingPrimaryTerm) {
@@ -2312,6 +2306,7 @@ public void onResponse(final Releasable releasable) {
23122306
operationPrimaryTerm);
23132307
onPermitAcquired.onFailure(new IllegalStateException(message));
23142308
} else {
2309+
assert assertReplicationTarget();
23152310
try {
23162311
updateGlobalCheckpointOnReplica(globalCheckpoint, "operation");
23172312
} catch (Exception e) {

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.lucene.store.FilterDirectory;
3232
import org.apache.lucene.store.IOContext;
3333
import org.apache.lucene.util.Constants;
34+
import org.elasticsearch.Assertions;
3435
import org.elasticsearch.Version;
3536
import org.elasticsearch.action.ActionListener;
3637
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
@@ -560,28 +561,20 @@ public void testOperationPermitsOnPrimaryShards() throws InterruptedException, E
560561
ShardRouting primaryRouting = newShardRouting(replicaRouting.shardId(), replicaRouting.currentNodeId(), null,
561562
true, ShardRoutingState.STARTED, replicaRouting.allocationId());
562563
final long newPrimaryTerm = indexShard.getPendingPrimaryTerm() + between(1, 1000);
564+
CountDownLatch latch = new CountDownLatch(1);
563565
indexShard.updateShardState(primaryRouting, newPrimaryTerm, (shard, listener) -> {
564566
assertThat(TestTranslog.getCurrentTerm(getTranslog(indexShard)), equalTo(newPrimaryTerm));
567+
latch.countDown();
565568
}, 0L,
566569
Collections.singleton(indexShard.routingEntry().allocationId().getId()),
567570
new IndexShardRoutingTable.Builder(indexShard.shardId()).addShard(primaryRouting).build(),
568571
Collections.emptySet());
572+
latch.await();
569573
} else {
570574
indexShard = newStartedShard(true);
571575
}
572576
final long primaryTerm = indexShard.getPendingPrimaryTerm();
573577
assertEquals(0, indexShard.getActiveOperationsCount());
574-
if (indexShard.routingEntry().isRelocationTarget() == false) {
575-
try {
576-
final PlainActionFuture<Releasable> permitAcquiredFuture = new PlainActionFuture<>();
577-
indexShard.acquireReplicaOperationPermit(primaryTerm, indexShard.getGlobalCheckpoint(), permitAcquiredFuture,
578-
ThreadPool.Names.WRITE, "");
579-
permitAcquiredFuture.actionGet();
580-
fail("shard shouldn't accept operations as replica");
581-
} catch (IllegalStateException ignored) {
582-
583-
}
584-
}
585578
Releasable operation1 = acquirePrimaryOperationPermitBlockingly(indexShard);
586579
assertEquals(1, indexShard.getActiveOperationsCount());
587580
Releasable operation2 = acquirePrimaryOperationPermitBlockingly(indexShard);
@@ -590,6 +583,22 @@ public void testOperationPermitsOnPrimaryShards() throws InterruptedException, E
590583
Releasables.close(operation1, operation2);
591584
assertEquals(0, indexShard.getActiveOperationsCount());
592585

586+
if (Assertions.ENABLED && indexShard.routingEntry().isRelocationTarget() == false) {
587+
assertThat(expectThrows(AssertionError.class, () -> indexShard.acquireReplicaOperationPermit(primaryTerm,
588+
indexShard.getGlobalCheckpoint(), new ActionListener<Releasable>() {
589+
@Override
590+
public void onResponse(Releasable releasable) {
591+
fail();
592+
}
593+
594+
@Override
595+
public void onFailure(Exception e) {
596+
fail();
597+
}
598+
},
599+
ThreadPool.Names.WRITE, "")).getMessage(), containsString("in primary mode cannot be a replication target"));
600+
}
601+
593602
closeShards(indexShard);
594603
}
595604

@@ -647,11 +656,11 @@ public void testOperationPermitOnReplicaShards() throws Exception {
647656
logger.info("shard routing to {}", shardRouting);
648657

649658
assertEquals(0, indexShard.getActiveOperationsCount());
650-
if (shardRouting.primary() == false) {
651-
final IllegalStateException e =
652-
expectThrows(IllegalStateException.class,
659+
if (shardRouting.primary() == false && Assertions.ENABLED) {
660+
final AssertionError e =
661+
expectThrows(AssertionError.class,
653662
() -> indexShard.acquirePrimaryOperationPermit(null, ThreadPool.Names.WRITE, ""));
654-
assertThat(e, hasToString(containsString("shard " + shardRouting + " is not a primary")));
663+
assertThat(e, hasToString(containsString("acquirePrimaryOperationPermit should only be called on primary shard")));
655664
}
656665

657666
final long primaryTerm = indexShard.getPendingPrimaryTerm();

0 commit comments

Comments
 (0)