Skip to content

Commit 920eba2

Browse files
committed
Enable trace log in FollowerFailOverIT (#38148)
This suite still fails one per week sometimes with a worrying assertion. Sadly we are still unable to find the actual source. Expected: <SeqNoStats{maxSeqNo=229, localCheckpoint=86, globalCheckpoint=86}> but: was <SeqNoStats{maxSeqNo=229, localCheckpoint=-1, globalCheckpoint=86}> This change enables trace log in the suite so we will have a better picture if this fails again. Relates #3333
1 parent d16c667 commit 920eba2

File tree

4 files changed

+24
-7
lines changed

4 files changed

+24
-7
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/bulk/TransportBulkShardOperationsAction.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ public TransportBulkShardOperationsAction(
6363
@Override
6464
protected WritePrimaryResult<BulkShardOperationsRequest, BulkShardOperationsResponse> shardOperationOnPrimary(
6565
final BulkShardOperationsRequest request, final IndexShard primary) throws Exception {
66+
if (logger.isTraceEnabled()) {
67+
logger.trace("index [{}] on the following primary shard {}", request.getOperations(), primary.routingEntry());
68+
}
6669
return shardOperationOnPrimary(request.shardId(), request.getHistoryUUID(), request.getOperations(),
6770
request.getMaxSeqNoOfUpdatesOrDeletes(), primary, logger);
6871
}
@@ -137,6 +140,10 @@ public static CcrWritePrimaryResult shardOperationOnPrimary(
137140
// replicated to replicas but with the existing primary term (not the current primary term) in order
138141
// to guarantee the consistency between the primary and replicas, and between translog and Lucene index.
139142
final AlreadyProcessedFollowingEngineException failure = (AlreadyProcessedFollowingEngineException) result.getFailure();
143+
if (logger.isTraceEnabled()) {
144+
logger.trace("operation [{}] was processed before on following primary shard {} with existing term {}",
145+
targetOp, primary.routingEntry(), failure.getExistingPrimaryTerm());
146+
}
140147
assert failure.getSeqNo() == targetOp.seqNo() : targetOp.seqNo() + " != " + failure.getSeqNo();
141148
if (failure.getExistingPrimaryTerm().isPresent()) {
142149
appliedOperations.add(rewriteOperationWithPrimaryTerm(sourceOp, failure.getExistingPrimaryTerm().getAsLong()));
@@ -159,6 +166,9 @@ public static CcrWritePrimaryResult shardOperationOnPrimary(
159166
@Override
160167
protected WriteReplicaResult<BulkShardOperationsRequest> shardOperationOnReplica(
161168
final BulkShardOperationsRequest request, final IndexShard replica) throws Exception {
169+
if (logger.isTraceEnabled()) {
170+
logger.trace("index [{}] on the following replica shard {}", request.getOperations(), replica.routingEntry());
171+
}
162172
return shardOperationOnReplica(request, replica, logger);
163173
}
164174

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngine.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ protected InternalEngine.IndexingStrategy indexingStrategyForOperation(final Ind
7474
final long maxSeqNoOfUpdatesOrDeletes = getMaxSeqNoOfUpdatesOrDeletes();
7575
assert maxSeqNoOfUpdatesOrDeletes != SequenceNumbers.UNASSIGNED_SEQ_NO : "max_seq_no_of_updates is not initialized";
7676
if (hasBeenProcessedBefore(index)) {
77+
if (logger.isTraceEnabled()) {
78+
logger.trace("index operation [id={} seq_no={} origin={}] was processed before", index.id(), index.seqNo(), index.origin());
79+
}
7780
if (index.origin() == Operation.Origin.PRIMARY) {
7881
/*
7982
* The existing operation in this engine was probably assigned the term of the previous primary shard which is different

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,13 @@ public static ResumeFollowAction.Request resumeFollow(String followerIndex) {
434434
* on the follower equal the leader's; then verifies the existing pairs of (docId, seqNo) on the follower also equal the leader.
435435
*/
436436
protected void assertIndexFullyReplicatedToFollower(String leaderIndex, String followerIndex) throws Exception {
437+
logger.info("--> asserting <<docId,seqNo>> between {} and {}", leaderIndex, followerIndex);
438+
assertBusy(() -> {
439+
Map<Integer, List<DocIdSeqNoAndTerm>> docsOnFollower = getDocIdAndSeqNos(clusterGroup.followerCluster, followerIndex);
440+
logger.info("--> docs on the follower {}", docsOnFollower);
441+
assertThat(docsOnFollower, equalTo(getDocIdAndSeqNos(clusterGroup.leaderCluster, leaderIndex)));
442+
}, 120, TimeUnit.SECONDS);
443+
437444
logger.info("--> asserting seq_no_stats between {} and {}", leaderIndex, followerIndex);
438445
assertBusy(() -> {
439446
Map<Integer, SeqNoStats> leaderStats = new HashMap<>();
@@ -450,13 +457,8 @@ protected void assertIndexFullyReplicatedToFollower(String leaderIndex, String f
450457
}
451458
followerStats.put(shardStat.getShardRouting().shardId().id(), shardStat.getSeqNoStats());
452459
}
453-
assertThat(leaderStats, equalTo(followerStats));
454-
}, 60, TimeUnit.SECONDS);
455-
logger.info("--> asserting <<docId,seqNo>> between {} and {}", leaderIndex, followerIndex);
456-
assertBusy(() -> {
457-
assertThat(getDocIdAndSeqNos(clusterGroup.leaderCluster, leaderIndex),
458-
equalTo(getDocIdAndSeqNos(clusterGroup.followerCluster, followerIndex)));
459-
}, 60, TimeUnit.SECONDS);
460+
assertThat(followerStats, equalTo(leaderStats));
461+
}, 120, TimeUnit.SECONDS);
460462
}
461463

462464
private Map<Integer, List<DocIdSeqNoAndTerm>> getDocIdAndSeqNos(InternalTestCluster cluster, String index) throws IOException {

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/FollowerFailOverIT.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.elasticsearch.index.IndexSettings;
2222
import org.elasticsearch.index.shard.ShardId;
2323
import org.elasticsearch.test.InternalTestCluster;
24+
import org.elasticsearch.test.junit.annotations.TestLogging;
2425
import org.elasticsearch.xpack.CcrIntegTestCase;
2526
import org.elasticsearch.xpack.core.ccr.action.PutFollowAction;
2627

@@ -33,6 +34,7 @@
3334
import static java.util.Collections.singletonMap;
3435
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
3536

37+
@TestLogging("org.elasticsearch.xpack.ccr:TRACE,org.elasticsearch.index.shard:DEBUG")
3638
public class FollowerFailOverIT extends CcrIntegTestCase {
3739

3840
@Override

0 commit comments

Comments
 (0)