Skip to content

Commit 3bca1aa

Browse files
authored
Enable trace log in FollowerFailOverIT (#38148)
This suite still fails one per week sometimes with a worrying assertion. Sadly we are still unable to find the actual source. Expected: <SeqNoStats{maxSeqNo=229, localCheckpoint=86, globalCheckpoint=86}> but: was <SeqNoStats{maxSeqNo=229, localCheckpoint=-1, globalCheckpoint=86}> This change enables trace log in the suite so we will have a better picture if this fails again. Relates #3333
1 parent 4cf0391 commit 3bca1aa

File tree

4 files changed

+24
-7
lines changed

4 files changed

+24
-7
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/bulk/TransportBulkShardOperationsAction.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ public TransportBulkShardOperationsAction(
6363
@Override
6464
protected WritePrimaryResult<BulkShardOperationsRequest, BulkShardOperationsResponse> shardOperationOnPrimary(
6565
final BulkShardOperationsRequest request, final IndexShard primary) throws Exception {
66+
if (logger.isTraceEnabled()) {
67+
logger.trace("index [{}] on the following primary shard {}", request.getOperations(), primary.routingEntry());
68+
}
6669
return shardOperationOnPrimary(request.shardId(), request.getHistoryUUID(), request.getOperations(),
6770
request.getMaxSeqNoOfUpdatesOrDeletes(), primary, logger);
6871
}
@@ -137,6 +140,10 @@ public static CcrWritePrimaryResult shardOperationOnPrimary(
137140
// replicated to replicas but with the existing primary term (not the current primary term) in order
138141
// to guarantee the consistency between the primary and replicas, and between translog and Lucene index.
139142
final AlreadyProcessedFollowingEngineException failure = (AlreadyProcessedFollowingEngineException) result.getFailure();
143+
if (logger.isTraceEnabled()) {
144+
logger.trace("operation [{}] was processed before on following primary shard {} with existing term {}",
145+
targetOp, primary.routingEntry(), failure.getExistingPrimaryTerm());
146+
}
140147
assert failure.getSeqNo() == targetOp.seqNo() : targetOp.seqNo() + " != " + failure.getSeqNo();
141148
if (failure.getExistingPrimaryTerm().isPresent()) {
142149
appliedOperations.add(rewriteOperationWithPrimaryTerm(sourceOp, failure.getExistingPrimaryTerm().getAsLong()));
@@ -159,6 +166,9 @@ public static CcrWritePrimaryResult shardOperationOnPrimary(
159166
@Override
160167
protected WriteReplicaResult<BulkShardOperationsRequest> shardOperationOnReplica(
161168
final BulkShardOperationsRequest request, final IndexShard replica) throws Exception {
169+
if (logger.isTraceEnabled()) {
170+
logger.trace("index [{}] on the following replica shard {}", request.getOperations(), replica.routingEntry());
171+
}
162172
return shardOperationOnReplica(request, replica, logger);
163173
}
164174

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngine.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ protected InternalEngine.IndexingStrategy indexingStrategyForOperation(final Ind
7272
final long maxSeqNoOfUpdatesOrDeletes = getMaxSeqNoOfUpdatesOrDeletes();
7373
assert maxSeqNoOfUpdatesOrDeletes != SequenceNumbers.UNASSIGNED_SEQ_NO : "max_seq_no_of_updates is not initialized";
7474
if (hasBeenProcessedBefore(index)) {
75+
if (logger.isTraceEnabled()) {
76+
logger.trace("index operation [id={} seq_no={} origin={}] was processed before", index.id(), index.seqNo(), index.origin());
77+
}
7578
if (index.origin() == Operation.Origin.PRIMARY) {
7679
/*
7780
* The existing operation in this engine was probably assigned the term of the previous primary shard which is different

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,13 @@ public static ResumeFollowAction.Request resumeFollow(String followerIndex) {
447447
* on the follower equal the leader's; then verifies the existing pairs of (docId, seqNo) on the follower also equal the leader.
448448
*/
449449
protected void assertIndexFullyReplicatedToFollower(String leaderIndex, String followerIndex) throws Exception {
450+
logger.info("--> asserting <<docId,seqNo>> between {} and {}", leaderIndex, followerIndex);
451+
assertBusy(() -> {
452+
Map<Integer, List<DocIdSeqNoAndTerm>> docsOnFollower = getDocIdAndSeqNos(clusterGroup.followerCluster, followerIndex);
453+
logger.info("--> docs on the follower {}", docsOnFollower);
454+
assertThat(docsOnFollower, equalTo(getDocIdAndSeqNos(clusterGroup.leaderCluster, leaderIndex)));
455+
}, 120, TimeUnit.SECONDS);
456+
450457
logger.info("--> asserting seq_no_stats between {} and {}", leaderIndex, followerIndex);
451458
assertBusy(() -> {
452459
Map<Integer, SeqNoStats> leaderStats = new HashMap<>();
@@ -463,13 +470,8 @@ protected void assertIndexFullyReplicatedToFollower(String leaderIndex, String f
463470
}
464471
followerStats.put(shardStat.getShardRouting().shardId().id(), shardStat.getSeqNoStats());
465472
}
466-
assertThat(leaderStats, equalTo(followerStats));
467-
}, 60, TimeUnit.SECONDS);
468-
logger.info("--> asserting <<docId,seqNo>> between {} and {}", leaderIndex, followerIndex);
469-
assertBusy(() -> {
470-
assertThat(getDocIdAndSeqNos(clusterGroup.leaderCluster, leaderIndex),
471-
equalTo(getDocIdAndSeqNos(clusterGroup.followerCluster, followerIndex)));
472-
}, 60, TimeUnit.SECONDS);
473+
assertThat(followerStats, equalTo(leaderStats));
474+
}, 120, TimeUnit.SECONDS);
473475
}
474476

475477
private Map<Integer, List<DocIdSeqNoAndTerm>> getDocIdAndSeqNos(InternalTestCluster cluster, String index) throws IOException {

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/FollowerFailOverIT.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.index.shard.ShardId;
2828
import org.elasticsearch.indices.IndicesService;
2929
import org.elasticsearch.test.InternalTestCluster;
30+
import org.elasticsearch.test.junit.annotations.TestLogging;
3031
import org.elasticsearch.xpack.CcrIntegTestCase;
3132
import org.elasticsearch.xpack.core.ccr.action.FollowStatsAction;
3233
import org.elasticsearch.xpack.core.ccr.action.PutFollowAction;
@@ -44,6 +45,7 @@
4445
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
4546
import static org.hamcrest.Matchers.equalTo;
4647

48+
@TestLogging("org.elasticsearch.xpack.ccr:TRACE,org.elasticsearch.index.shard:DEBUG")
4749
public class FollowerFailOverIT extends CcrIntegTestCase {
4850

4951
@Override

0 commit comments

Comments
 (0)