Skip to content

Commit 3ecdfe1

Browse files
authored
Enable trace log in FollowerFailOverIT (#38148)
This suite still fails one per week sometimes with a worrying assertion. Sadly we are still unable to find the actual source. Expected: <SeqNoStats{maxSeqNo=229, localCheckpoint=86, globalCheckpoint=86}> but: was <SeqNoStats{maxSeqNo=229, localCheckpoint=-1, globalCheckpoint=86}> This change enables trace log in the suite so we will have a better picture if this fails again. Relates #3333
1 parent 03a1d21 commit 3ecdfe1

File tree

4 files changed

+24
-7
lines changed

4 files changed

+24
-7
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/bulk/TransportBulkShardOperationsAction.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ public TransportBulkShardOperationsAction(
6363
@Override
6464
protected WritePrimaryResult<BulkShardOperationsRequest, BulkShardOperationsResponse> shardOperationOnPrimary(
6565
final BulkShardOperationsRequest request, final IndexShard primary) throws Exception {
66+
if (logger.isTraceEnabled()) {
67+
logger.trace("index [{}] on the following primary shard {}", request.getOperations(), primary.routingEntry());
68+
}
6669
return shardOperationOnPrimary(request.shardId(), request.getHistoryUUID(), request.getOperations(),
6770
request.getMaxSeqNoOfUpdatesOrDeletes(), primary, logger);
6871
}
@@ -134,6 +137,10 @@ public static CcrWritePrimaryResult shardOperationOnPrimary(
134137
// replicated to replicas but with the existing primary term (not the current primary term) in order
135138
// to guarantee the consistency between the primary and replicas, and between translog and Lucene index.
136139
final AlreadyProcessedFollowingEngineException failure = (AlreadyProcessedFollowingEngineException) result.getFailure();
140+
if (logger.isTraceEnabled()) {
141+
logger.trace("operation [{}] was processed before on following primary shard {} with existing term {}",
142+
targetOp, primary.routingEntry(), failure.getExistingPrimaryTerm());
143+
}
137144
assert failure.getSeqNo() == targetOp.seqNo() : targetOp.seqNo() + " != " + failure.getSeqNo();
138145
if (failure.getExistingPrimaryTerm().isPresent()) {
139146
appliedOperations.add(rewriteOperationWithPrimaryTerm(sourceOp, failure.getExistingPrimaryTerm().getAsLong()));
@@ -156,6 +163,9 @@ public static CcrWritePrimaryResult shardOperationOnPrimary(
156163
@Override
157164
protected WriteReplicaResult<BulkShardOperationsRequest> shardOperationOnReplica(
158165
final BulkShardOperationsRequest request, final IndexShard replica) throws Exception {
166+
if (logger.isTraceEnabled()) {
167+
logger.trace("index [{}] on the following replica shard {}", request.getOperations(), replica.routingEntry());
168+
}
159169
return shardOperationOnReplica(request, replica, logger);
160170
}
161171

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngine.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ protected InternalEngine.IndexingStrategy indexingStrategyForOperation(final Ind
7272
final long maxSeqNoOfUpdatesOrDeletes = getMaxSeqNoOfUpdatesOrDeletes();
7373
assert maxSeqNoOfUpdatesOrDeletes != SequenceNumbers.UNASSIGNED_SEQ_NO : "max_seq_no_of_updates is not initialized";
7474
if (hasBeenProcessedBefore(index)) {
75+
if (logger.isTraceEnabled()) {
76+
logger.trace("index operation [id={} seq_no={} origin={}] was processed before", index.id(), index.seqNo(), index.origin());
77+
}
7578
if (index.origin() == Operation.Origin.PRIMARY) {
7679
/*
7780
* The existing operation in this engine was probably assigned the term of the previous primary shard which is different

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,13 @@ public static ResumeFollowAction.Request resumeFollow(String followerIndex) {
444444
* on the follower equal the leader's; then verifies the existing pairs of (docId, seqNo) on the follower also equal the leader.
445445
*/
446446
protected void assertIndexFullyReplicatedToFollower(String leaderIndex, String followerIndex) throws Exception {
447+
logger.info("--> asserting <<docId,seqNo>> between {} and {}", leaderIndex, followerIndex);
448+
assertBusy(() -> {
449+
Map<Integer, List<DocIdSeqNoAndTerm>> docsOnFollower = getDocIdAndSeqNos(clusterGroup.followerCluster, followerIndex);
450+
logger.info("--> docs on the follower {}", docsOnFollower);
451+
assertThat(docsOnFollower, equalTo(getDocIdAndSeqNos(clusterGroup.leaderCluster, leaderIndex)));
452+
}, 120, TimeUnit.SECONDS);
453+
447454
logger.info("--> asserting seq_no_stats between {} and {}", leaderIndex, followerIndex);
448455
assertBusy(() -> {
449456
Map<Integer, SeqNoStats> leaderStats = new HashMap<>();
@@ -460,13 +467,8 @@ protected void assertIndexFullyReplicatedToFollower(String leaderIndex, String f
460467
}
461468
followerStats.put(shardStat.getShardRouting().shardId().id(), shardStat.getSeqNoStats());
462469
}
463-
assertThat(leaderStats, equalTo(followerStats));
464-
}, 60, TimeUnit.SECONDS);
465-
logger.info("--> asserting <<docId,seqNo>> between {} and {}", leaderIndex, followerIndex);
466-
assertBusy(() -> {
467-
assertThat(getDocIdAndSeqNos(clusterGroup.leaderCluster, leaderIndex),
468-
equalTo(getDocIdAndSeqNos(clusterGroup.followerCluster, followerIndex)));
469-
}, 60, TimeUnit.SECONDS);
470+
assertThat(followerStats, equalTo(leaderStats));
471+
}, 120, TimeUnit.SECONDS);
470472
}
471473

472474
private Map<Integer, List<DocIdSeqNoAndTerm>> getDocIdAndSeqNos(InternalTestCluster cluster, String index) throws IOException {

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/FollowerFailOverIT.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.index.shard.ShardId;
2828
import org.elasticsearch.indices.IndicesService;
2929
import org.elasticsearch.test.InternalTestCluster;
30+
import org.elasticsearch.test.junit.annotations.TestLogging;
3031
import org.elasticsearch.xpack.CcrIntegTestCase;
3132
import org.elasticsearch.xpack.core.ccr.action.FollowStatsAction;
3233
import org.elasticsearch.xpack.core.ccr.action.PutFollowAction;
@@ -44,6 +45,7 @@
4445
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
4546
import static org.hamcrest.Matchers.equalTo;
4647

48+
@TestLogging("org.elasticsearch.xpack.ccr:TRACE,org.elasticsearch.index.shard:DEBUG")
4749
public class FollowerFailOverIT extends CcrIntegTestCase {
4850

4951
@Override

0 commit comments

Comments
 (0)