Skip to content

Commit d060ac0

Browse files
committed
Add global checkpoint tracking on the primary
This commit adds local tracking of the global checkpoints on all shard copies when a global checkpoint tracker is operating in primary mode. With this, we relay the global checkpoint on a shard copy back to the primary shard during replication operations. This serves as another step towards adding a background sync of the global checkpoint to the shard copies. Relates #26666
1 parent 973e756 commit d060ac0

17 files changed

+544
-210
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ task verifyVersions {
175175
* after the backport of the backcompat code is complete.
176176
*/
177177
allprojects {
178-
ext.bwc_tests_enabled = true
178+
ext.bwc_tests_enabled = false
179179
}
180180

181181
task verifyBwcTestsEnabled {

core/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ protected void sendReplicaRequest(
9393
if (node.getVersion().onOrAfter(Version.V_6_0_0_alpha1)) {
9494
super.sendReplicaRequest(replicaRequest, node, listener);
9595
} else {
96-
listener.onResponse(new ReplicaResponse(SequenceNumbersService.PRE_60_NODE_LOCAL_CHECKPOINT));
96+
final long pre60NodeCheckpoint = SequenceNumbersService.PRE_60_NODE_CHECKPOINT;
97+
listener.onResponse(new ReplicaResponse(pre60NodeCheckpoint, pre60NodeCheckpoint));
9798
}
9899
}
99100

core/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.elasticsearch.common.Nullable;
3333
import org.elasticsearch.common.io.stream.StreamInput;
3434
import org.elasticsearch.common.util.set.Sets;
35+
import org.elasticsearch.index.seqno.SequenceNumbersService;
3536
import org.elasticsearch.index.shard.ReplicationGroup;
3637
import org.elasticsearch.index.shard.ShardId;
3738
import org.elasticsearch.rest.RestStatus;
@@ -173,6 +174,7 @@ public void onResponse(ReplicaResponse response) {
173174
successfulShards.incrementAndGet();
174175
try {
175176
primary.updateLocalCheckpointForShard(shard.allocationId().getId(), response.localCheckpoint());
177+
primary.updateGlobalCheckpointForShard(shard.allocationId().getId(), response.globalCheckpoint());
176178
} catch (final AlreadyClosedException e) {
177179
// okay, the index was deleted or this shard was never activated after a relocation; fall through and finish normally
178180
} catch (final Exception e) {
@@ -315,6 +317,14 @@ public interface Primary<
315317
*/
316318
void updateLocalCheckpointForShard(String allocationId, long checkpoint);
317319

320+
/**
321+
* Update the local knowledge of the global checkpoint for the specified allocation ID.
322+
*
323+
* @param allocationId the allocation ID to update the global checkpoint for
324+
* @param globalCheckpoint the global checkpoint
325+
*/
326+
void updateGlobalCheckpointForShard(String allocationId, long globalCheckpoint);
327+
318328
/**
319329
* Returns the local checkpoint on the primary shard.
320330
*
@@ -385,12 +395,24 @@ void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, Runnable
385395
}
386396

387397
/**
388-
* An interface to encapsulate the metadata needed from replica shards when they respond to operations performed on them
398+
* An interface to encapsulate the metadata needed from replica shards when they respond to operations performed on them.
389399
*/
390400
public interface ReplicaResponse {
391401

392-
/** the local check point for the shard. see {@link org.elasticsearch.index.seqno.SequenceNumbersService#getLocalCheckpoint()} */
402+
/**
403+
* The local checkpoint for the shard. See {@link SequenceNumbersService#getLocalCheckpoint()}.
404+
*
405+
* @return the local checkpoint
406+
**/
393407
long localCheckpoint();
408+
409+
/**
410+
* The global checkpoint for the shard. See {@link SequenceNumbersService#getGlobalCheckpoint()}.
411+
*
412+
* @return the global checkpoint
413+
**/
414+
long globalCheckpoint();
415+
394416
}
395417

396418
public static class RetryOnPrimaryException extends ElasticsearchException {

core/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,8 @@ public void onResponse(Releasable releasable) {
531531
try {
532532
final ReplicaResult replicaResult = shardOperationOnReplica(request, replica);
533533
releasable.close(); // release shard operation lock before responding to caller
534-
final TransportReplicationAction.ReplicaResponse response = new ReplicaResponse(replica.getLocalCheckpoint());
534+
final TransportReplicationAction.ReplicaResponse response =
535+
new ReplicaResponse(replica.getLocalCheckpoint(), replica.getGlobalCheckpoint());
535536
replicaResult.respond(new ResponseListener(response));
536537
} catch (final Exception e) {
537538
Releasables.closeWhileHandlingException(releasable); // release shard operation lock before responding to caller
@@ -1006,6 +1007,11 @@ public void updateLocalCheckpointForShard(String allocationId, long checkpoint)
10061007
indexShard.updateLocalCheckpointForShard(allocationId, checkpoint);
10071008
}
10081009

1010+
@Override
1011+
public void updateGlobalCheckpointForShard(final String allocationId, final long globalCheckpoint) {
1012+
indexShard.updateGlobalCheckpointForShard(allocationId, globalCheckpoint);
1013+
}
1014+
10091015
@Override
10101016
public long localCheckpoint() {
10111017
return indexShard.getLocalCheckpoint();
@@ -1025,47 +1031,60 @@ public ReplicationGroup getReplicationGroup() {
10251031

10261032
public static class ReplicaResponse extends ActionResponse implements ReplicationOperation.ReplicaResponse {
10271033
private long localCheckpoint;
1034+
private long globalCheckpoint;
10281035

10291036
ReplicaResponse() {
10301037

10311038
}
10321039

1033-
public ReplicaResponse(long localCheckpoint) {
1040+
public ReplicaResponse(long localCheckpoint, long globalCheckpoint) {
10341041
/*
1035-
* A replica should always know its own local checkpoint so this should always be a valid sequence number or the pre-6.0 local
1042+
* A replica should always know its own local checkpoints so this should always be a valid sequence number or the pre-6.0
10361043
* checkpoint value when simulating responses to replication actions that pre-6.0 nodes are not aware of (e.g., the global
10371044
* checkpoint background sync, and the primary/replica resync).
10381045
*/
10391046
assert localCheckpoint != SequenceNumbers.UNASSIGNED_SEQ_NO;
10401047
this.localCheckpoint = localCheckpoint;
1048+
this.globalCheckpoint = globalCheckpoint;
10411049
}
10421050

10431051
@Override
10441052
public void readFrom(StreamInput in) throws IOException {
1053+
super.readFrom(in);
10451054
if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha1)) {
1046-
super.readFrom(in);
10471055
localCheckpoint = in.readZLong();
10481056
} else {
10491057
// 5.x used to read empty responses, which don't really read anything off the stream, so just do nothing.
1050-
localCheckpoint = SequenceNumbersService.PRE_60_NODE_LOCAL_CHECKPOINT;
1058+
localCheckpoint = SequenceNumbersService.PRE_60_NODE_CHECKPOINT;
1059+
}
1060+
if (in.getVersion().onOrAfter(Version.V_6_0_0_rc1)) {
1061+
globalCheckpoint = in.readZLong();
1062+
} else {
1063+
globalCheckpoint = SequenceNumbersService.PRE_60_NODE_CHECKPOINT;
10511064
}
10521065
}
10531066

10541067
@Override
10551068
public void writeTo(StreamOutput out) throws IOException {
1069+
super.writeTo(out);
10561070
if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha1)) {
1057-
super.writeTo(out);
10581071
out.writeZLong(localCheckpoint);
1059-
} else {
1060-
// we use to write empty responses
1061-
Empty.INSTANCE.writeTo(out);
1072+
}
1073+
if (out.getVersion().onOrAfter(Version.V_6_0_0_rc1)) {
1074+
out.writeZLong(globalCheckpoint);
10621075
}
10631076
}
10641077

10651078
@Override
10661079
public long localCheckpoint() {
10671080
return localCheckpoint;
10681081
}
1082+
1083+
@Override
1084+
public long globalCheckpoint() {
1085+
return globalCheckpoint;
1086+
}
1087+
10691088
}
10701089

10711090
/**

core/src/main/java/org/elasticsearch/index/seqno/GlobalCheckpointSyncAction.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ protected void sendReplicaRequest(
8989
if (node.getVersion().onOrAfter(Version.V_6_0_0_alpha1)) {
9090
super.sendReplicaRequest(replicaRequest, node, listener);
9191
} else {
92-
listener.onResponse(new ReplicaResponse(SequenceNumbersService.PRE_60_NODE_LOCAL_CHECKPOINT));
92+
final long pre60NodeCheckpoint = SequenceNumbersService.PRE_60_NODE_CHECKPOINT;
93+
listener.onResponse(new ReplicaResponse(pre60NodeCheckpoint, pre60NodeCheckpoint));
9394
}
9495
}
9596

0 commit comments

Comments
 (0)