From eb2b89a0a2902600f88a6d37c53b84d384fa6f34 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Wed, 2 Jun 2021 16:27:09 -0500 Subject: [PATCH 1/2] add pods to webhook event and log pods on node --- cmd/node-termination-handler.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go index 7160f97b..468c4faf 100644 --- a/cmd/node-termination-handler.go +++ b/cmd/node-termination-handler.go @@ -313,6 +313,15 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto } else { cordonAndDrainNode(node, nodeName, metrics, recorder, nthConfig.EnableSQSTerminationDraining) } + podNameList, err := node.FetchPodNameList(nodeName) + drainEvent.Pods = podNameList + if err != nil { + log.Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName) + } + err = node.LogPods(podNameList, nodeName) + if err != nil { + log.Err(err).Msg("There was a problem while trying to log all pod names on the node") + } interruptionEventStore.MarkAllAsProcessed(nodeName) if nthConfig.WebhookURL != "" { @@ -348,15 +357,6 @@ func cordonNode(node node.Node, nodeName string, drainEvent *monitor.Interruptio } } else { log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned") - podNameList, err := node.FetchPodNameList(nodeName) - drainEvent.Pods = podNameList - if err != nil { - log.Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName) - } - err = node.LogPods(podNameList, nodeName) - if err != nil { - log.Err(err).Msg("There was a problem while trying to log all pod names on the node") - } metrics.NodeActionsInc("cordon", nodeName, err) recorder.Emit(nodeName, observability.Normal, observability.CordonReason, observability.CordonMsg) } From 2846ee82e664ef1d83aea4104990f628fc8a9ba8 Mon Sep 17 00:00:00 2001 From: Brandon Wagner Date: Fri, 4 Jun 2021 11:46:43 -0500 Subject: [PATCH 2/2] only add podlist when no errors when cordoning or draining --- cmd/node-termination-handler.go | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go index 468c4faf..519dcdd1 100644 --- a/cmd/node-termination-handler.go +++ b/cmd/node-termination-handler.go @@ -309,18 +309,21 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto } if nthConfig.CordonOnly || (drainEvent.IsRebalanceRecommendation() && !nthConfig.EnableRebalanceDraining) { - cordonNode(node, nodeName, drainEvent, metrics, recorder) + err = cordonNode(node, nodeName, drainEvent, metrics, recorder) } else { - cordonAndDrainNode(node, nodeName, metrics, recorder, nthConfig.EnableSQSTerminationDraining) + err = cordonAndDrainNode(node, nodeName, metrics, recorder, nthConfig.EnableSQSTerminationDraining) } - podNameList, err := node.FetchPodNameList(nodeName) - drainEvent.Pods = podNameList - if err != nil { - log.Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName) - } - err = node.LogPods(podNameList, nodeName) + if err != nil { - log.Err(err).Msg("There was a problem while trying to log all pod names on the node") + podNameList, err := node.FetchPodNameList(nodeName) + drainEvent.Pods = podNameList + if err != nil { + log.Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName) + } + err = node.LogPods(podNameList, nodeName) + if err != nil { + log.Err(err).Msg("There was a problem while trying to log all pod names on the node") + } } interruptionEventStore.MarkAllAsProcessed(nodeName) @@ -345,7 +348,7 @@ func runPreDrainTask(node node.Node, nodeName string, drainEvent *monitor.Interr metrics.NodeActionsInc("pre-drain", nodeName, err) } -func cordonNode(node node.Node, nodeName string, drainEvent *monitor.InterruptionEvent, metrics observability.Metrics, recorder observability.K8sEventRecorder) { +func cordonNode(node node.Node, nodeName string, drainEvent *monitor.InterruptionEvent, metrics observability.Metrics, recorder observability.K8sEventRecorder) error { err := node.Cordon(nodeName) if err != nil { if errors.IsNotFound(err) { @@ -355,14 +358,16 @@ func cordonNode(node node.Node, nodeName string, drainEvent *monitor.Interruptio recorder.Emit(nodeName, observability.Warning, observability.CordonErrReason, observability.CordonErrMsgFmt, err.Error()) os.Exit(1) } + return err } else { log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned") metrics.NodeActionsInc("cordon", nodeName, err) recorder.Emit(nodeName, observability.Normal, observability.CordonReason, observability.CordonMsg) } + return nil } -func cordonAndDrainNode(node node.Node, nodeName string, metrics observability.Metrics, recorder observability.K8sEventRecorder, sqsTerminationDraining bool) { +func cordonAndDrainNode(node node.Node, nodeName string, metrics observability.Metrics, recorder observability.K8sEventRecorder, sqsTerminationDraining bool) error { err := node.CordonAndDrain(nodeName) if err != nil { if errors.IsNotFound(err) { @@ -375,11 +380,13 @@ func cordonAndDrainNode(node node.Node, nodeName string, metrics observability.M os.Exit(1) } } + return err } else { log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned and drained") metrics.NodeActionsInc("cordon-and-drain", nodeName, err) recorder.Emit(nodeName, observability.Normal, observability.CordonAndDrainReason, observability.CordonAndDrainMsg) } + return nil } func runPostDrainTask(node node.Node, nodeName string, drainEvent *monitor.InterruptionEvent, metrics observability.Metrics, recorder observability.K8sEventRecorder) {