Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions op-conductor/conductor/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -751,10 +751,8 @@ func (oc *OpConductor) action() {
case status.leader && !status.healthy && status.active:
// There are two scenarios we need to handle here:
// 1. we're transitioned from case status.leader && !status.healthy && !status.active, see description above
// then we should continue to sequence blocks and try to bring ourselves back to healthy state.
// note: we need to also make sure that the health error is not due to ErrSequencerConnectionDown
// because in this case, we should stop sequencing and transfer leadership to other nodes.
if oc.prevState.leader && !oc.prevState.healthy && !oc.prevState.active && !errors.Is(oc.hcerr, health.ErrSequencerConnectionDown) {
// then we should continue to sequence blocks and try to bring ourselves back to healthy state (if possible)
if oc.shouldWaitForHealthRecovery() {
err = errors.New("waiting for sequencing to become healthy by itself")
break
}
Expand Down Expand Up @@ -936,3 +934,24 @@ func (oc *OpConductor) updateSequencerActiveStatus() error {
oc.seqActive.Store(active)
return nil
}

// shouldWaitForHealthRecovery determines if the conductor should wait for the sequencer
// to recover health naturally instead of transferring leadership.
func (oc *OpConductor) shouldWaitForHealthRecovery() bool {
// Only wait for recovery if we transitioned from [leader, unhealthy, inactive] state
if !oc.prevState.leader || oc.prevState.healthy || oc.prevState.active {
return false
}

// Don't wait if the error is a connection issue - transfer leadership instead
if errors.Is(oc.hcerr, health.ErrSequencerConnectionDown) {
return false
}

// Don't wait if rollup boost is enabled and partially healthy - transfer leadership instead
if oc.cfg.RollupBoostEnabled && errors.Is(oc.hcerr, health.ErrRollupBoostPartiallyHealthy) {
return false
}

return true
}
40 changes: 40 additions & 0 deletions op-conductor/conductor/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1183,3 +1183,43 @@ connected:
// Verify that the conductor is stopped
s.True(conductor.Stopped())
}

// TestRollupBoostPartialFailure tests that OpConductor correctly handles rollup boost partial health failures.
// This test verifies that when a leader is unhealthy and actively sequencing due to ErrRollupBoostPartiallyHealthy,
// it should stop sequencing and transfer leadership instead of waiting for health recovery.
// Scenario: [leader, unhealthy, active] with prevState [leader, unhealthy, inactive] and ErrRollupBoostPartiallyHealthy
// Expected: Stop sequencing and transfer leadership (not wait for recovery)
func (s *OpConductorTestSuite) TestRollupBoostPartialFailure() {
s.enableSynchronization()

// Set initial state: leader is unhealthy and actively sequencing
// Previous state was [leader, unhealthy, inactive] - this simulates the scenario where
// the leader started sequencing during a network stall but rollup boost is partially healthy
s.conductor.leader.Store(true)
s.conductor.healthy.Store(false)
s.conductor.seqActive.Store(true)
s.conductor.prevState = &state{
leader: true,
healthy: false,
active: false,
}
s.conductor.cfg.RollupBoostEnabled = true

// Setup expectations - with ErrRollupBoostPartiallyHealthy, conductor should NOT wait for recovery
// Instead, it should stop sequencing and transfer leadership to another node
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1)
s.cons.EXPECT().TransferLeader().Return(nil).Times(1)

// Trigger the health update with rollup boost partial failure
s.updateHealthStatusAndExecuteAction(health.ErrRollupBoostPartiallyHealthy)

// Verify the conductor stops sequencing and transfers leadership instead of waiting for recovery
s.False(s.conductor.leader.Load(), "Should transfer leadership to another node")
s.False(s.conductor.healthy.Load(), "Should remain marked as unhealthy")
s.False(s.conductor.seqActive.Load(), "Should stop sequencing")
s.Equal(health.ErrRollupBoostPartiallyHealthy, s.conductor.hcerr, "Should store the rollup boost error")

// Verify the expected actions were taken
s.ctrl.AssertNumberOfCalls(s.T(), "StopSequencer", 1)
s.cons.AssertNumberOfCalls(s.T(), "TransferLeader", 1)
}