Skip to content

Commit ac99d1d

Browse files
authored
Fix bugs in fixLag() (#34346)
The hack to work around lag detection had some issues: - it always called runFor(), even if no lag was detected - it looked at the last-accepted state not the last-applied state, so missed some lag situations. This fixes these issues.
1 parent 03da4f6 commit ac99d1d

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -614,21 +614,22 @@ void stabilise(long stabilisationDurationMillis) {
614614
// TODO remove this when lag detection is implemented
615615
void fixLag() {
616616
final ClusterNode leader = getAnyLeader();
617-
final long leaderVersion = leader.coordinator.getLastAcceptedState().version();
617+
final long leaderVersion = leader.coordinator.getApplierState().version();
618618
final long minVersion = clusterNodes.stream()
619619
.filter(n -> isConnectedPair(n, leader))
620-
.map(n -> n.coordinator.getLastAcceptedState().version()).min(Long::compare).orElse(Long.MIN_VALUE);
621-
620+
.map(n -> n.coordinator.getApplierState().version()).min(Long::compare).orElse(Long.MIN_VALUE);
622621
assert minVersion >= 0;
623622
if (minVersion < leaderVersion) {
624-
logger.info("--> publishing a value to fix lag, leaderVersion={}, minVersion={}", leaderVersion, minVersion);
623+
logger.info("--> fixLag publishing a value to fix lag, leaderVersion={}, minVersion={}", leaderVersion, minVersion);
625624
onNode(leader.getLocalNode(), () -> {
626625
synchronized (leader.coordinator.mutex) {
627626
leader.submitValue(randomLong());
628627
}
629628
}).run();
629+
runFor(DEFAULT_CLUSTER_STATE_UPDATE_DELAY, "re-stabilising after lag-fixing publication");
630+
} else {
631+
logger.info("--> fixLag found no lag, leader={}, leaderVersion={}, minVersion={}", leader, leaderVersion, minVersion);
630632
}
631-
runFor(DEFAULT_CLUSTER_STATE_UPDATE_DELAY, "re-stabilising after lag-fixing publication");
632633
}
633634

634635
void runFor(long runDurationMillis, String description) {

0 commit comments

Comments
 (0)