diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go index 9e5859c2..932809e7 100644 --- a/cmd/node-termination-handler.go +++ b/cmd/node-termination-handler.go @@ -331,7 +331,7 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto if nthConfig.CordonOnly || (!nthConfig.EnableSQSTerminationDraining && drainEvent.IsRebalanceRecommendation() && !nthConfig.EnableRebalanceDraining) { err = cordonNode(node, nodeName, drainEvent, metrics, recorder) } else { - err = cordonAndDrainNode(node, nodeName, metrics, recorder, nthConfig.EnableSQSTerminationDraining) + err = cordonAndDrainNode(node, nodeName, drainEvent, metrics, recorder, nthConfig.EnableSQSTerminationDraining) } if nthConfig.WebhookURL != "" { @@ -362,7 +362,7 @@ func runPreDrainTask(node node.Node, nodeName string, drainEvent *monitor.Interr } func cordonNode(node node.Node, nodeName string, drainEvent *monitor.InterruptionEvent, metrics observability.Metrics, recorder observability.K8sEventRecorder) error { - err := node.Cordon(nodeName) + err := node.Cordon(nodeName, drainEvent.Description) if err != nil { if errors.IsNotFound(err) { log.Err(err).Msgf("node '%s' not found in the cluster", nodeName) @@ -373,15 +373,15 @@ func cordonNode(node node.Node, nodeName string, drainEvent *monitor.Interruptio } return err } else { - log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned") + log.Info().Str("node_name", nodeName).Str("reason", drainEvent.Description).Msg("Node successfully cordoned") metrics.NodeActionsInc("cordon", nodeName, err) recorder.Emit(nodeName, observability.Normal, observability.CordonReason, observability.CordonMsg) } return nil } -func cordonAndDrainNode(node node.Node, nodeName string, metrics observability.Metrics, recorder observability.K8sEventRecorder, sqsTerminationDraining bool) error { - err := node.CordonAndDrain(nodeName) +func cordonAndDrainNode(node node.Node, nodeName string, drainEvent *monitor.InterruptionEvent, metrics observability.Metrics, recorder observability.K8sEventRecorder, sqsTerminationDraining bool) error { + err := node.CordonAndDrain(nodeName, drainEvent.Description) if err != nil { if errors.IsNotFound(err) { log.Err(err).Msgf("node '%s' not found in the cluster", nodeName) @@ -395,7 +395,7 @@ func cordonAndDrainNode(node node.Node, nodeName string, metrics observability.M } return err } else { - log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned and drained") + log.Info().Str("node_name", nodeName).Str("reason", drainEvent.Description).Msg("Node successfully cordoned and drained") metrics.NodeActionsInc("cordon-and-drain", nodeName, err) recorder.Emit(nodeName, observability.Normal, observability.CordonAndDrainReason, observability.CordonAndDrainMsg) } diff --git a/pkg/node/node.go b/pkg/node/node.go index 6164b082..42e01afb 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -90,13 +90,12 @@ func NewWithValues(nthConfig config.Config, drainHelper *drain.Helper, uptime up } // CordonAndDrain will cordon the node and evict pods based on the config -func (n Node) CordonAndDrain(nodeName string) error { +func (n Node) CordonAndDrain(nodeName string, reason string) error { if n.nthConfig.DryRun { - log.Info().Str("node_name", nodeName).Msg("Node would have been cordoned and drained, but dry-run flag was set") + log.Info().Str("node_name", nodeName).Str("reason", reason).Msg("Node would have been cordoned and drained, but dry-run flag was set.") return nil } - log.Info().Msg("Cordoning the node") - err := n.Cordon(nodeName) + err := n.Cordon(nodeName, reason) if err != nil { return err } @@ -114,9 +113,9 @@ func (n Node) CordonAndDrain(nodeName string) error { } // Cordon will add a NoSchedule on the node -func (n Node) Cordon(nodeName string) error { +func (n Node) Cordon(nodeName string, reason string) error { if n.nthConfig.DryRun { - log.Info().Str("node_name", nodeName).Msg("Node would have been cordoned, but dry-run flag was set") + log.Info().Str("node_name", nodeName).Str("reason", reason).Msgf("Node would have been cordoned, but dry-run flag was set") return nil } node, err := n.fetchKubernetesNode(nodeName) diff --git a/pkg/node/node_test.go b/pkg/node/node_test.go index 06436899..3fd723cc 100644 --- a/pkg/node/node_test.go +++ b/pkg/node/node_test.go @@ -61,10 +61,10 @@ func TestDryRun(t *testing.T) { tNode, err := node.New(config.Config{DryRun: true}) h.Ok(t, err) - err = tNode.CordonAndDrain(nodeName) + err = tNode.CordonAndDrain(nodeName, "cordonReason") h.Ok(t, err) - err = tNode.Cordon(nodeName) + err = tNode.Cordon(nodeName, "cordonReason") h.Ok(t, err) err = tNode.Uncordon(nodeName) @@ -107,13 +107,13 @@ func TestDrainSuccess(t *testing.T) { metav1.CreateOptions{}) h.Ok(t, err) tNode := getNode(t, getDrainHelper(client)) - err = tNode.CordonAndDrain(nodeName) + err = tNode.CordonAndDrain(nodeName, "cordonReason") h.Ok(t, err) } func TestDrainCordonNodeFailure(t *testing.T) { tNode := getNode(t, getDrainHelper(fake.NewSimpleClientset())) - err := tNode.CordonAndDrain(nodeName) + err := tNode.CordonAndDrain(nodeName, "cordonReason") h.Assert(t, true, "Failed to return error on CordonAndDrain failing to cordon node", err != nil) }