From 53db54ce4990d73993f69201a70d81305fc55113 Mon Sep 17 00:00:00 2001 From: Mohammed Date: Fri, 12 Mar 2021 09:42:55 -0800 Subject: [PATCH] Add running pods to webhook notifications --- cmd/node-termination-handler.go | 7 ++++++- pkg/monitor/types.go | 1 + pkg/node/node.go | 23 ++++++++++++++++------- pkg/node/node_test.go | 27 ++++++++++++++++++++++++++- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go index 26b62020..9ad5c7bc 100644 --- a/cmd/node-termination-handler.go +++ b/cmd/node-termination-handler.go @@ -296,7 +296,12 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto } } else { log.Log().Str("node_name", nodeName).Msg("Node successfully cordoned") - err = node.LogPods(nodeName) + podNameList, err := node.FetchPodNameList(nodeName) + if err != nil { + log.Log().Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName) + } + drainEvent.Pods = podNameList + err = node.LogPods(podNameList, nodeName) if err != nil { log.Log().Err(err).Msg("There was a problem while trying to log all pod names on the node") } diff --git a/pkg/monitor/types.go b/pkg/monitor/types.go index e84b8c07..5e52ff95 100644 --- a/pkg/monitor/types.go +++ b/pkg/monitor/types.go @@ -31,6 +31,7 @@ type InterruptionEvent struct { AutoScalingGroupName string NodeName string NodeLabels map[string]string + Pods []string InstanceID string StartTime time.Time EndTime time.Time diff --git a/pkg/node/node.go b/pkg/node/node.go index d1612be1..15023fbd 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -346,20 +346,29 @@ func (n Node) TaintRebalanceRecommendation(nodeName string, eventID string) erro } // LogPods logs all the pod names on a node -func (n Node) LogPods(nodeName string) error { - podList, err := n.fetchAllPods(nodeName) - if err != nil { - return fmt.Errorf("Unable to fetch all pods from API: %w", err) - } +func (n Node) LogPods(podList []string, nodeName string) error { podNamesArr := zerolog.Arr() - for _, pod := range podList.Items { - podNamesArr = podNamesArr.Str(pod.Name) + for _, pod := range podList { + podNamesArr = podNamesArr.Str(pod) } log.Log().Array("pod_names", podNamesArr).Str("node_name", nodeName).Msg("Pods on node") return nil } +// FetchPodNameList fetches list of all the pods names running on given nodeName +func (n Node) FetchPodNameList(nodeName string) ([]string, error) { + podList, err := n.fetchAllPods(nodeName) + if err != nil { + return nil, err + } + var podNamesList []string + for _, pod := range podList.Items { + podNamesList = append(podNamesList, pod.Name) + } + return podNamesList, nil +} + // TaintScheduledMaintenance adds the scheduled maintenance taint onto a node func (n Node) TaintScheduledMaintenance(nodeName string, eventID string) error { if !n.nthConfig.TaintNode { diff --git a/pkg/node/node_test.go b/pkg/node/node_test.go index 2cfd2aa5..50b115fc 100644 --- a/pkg/node/node_test.go +++ b/pkg/node/node_test.go @@ -240,6 +240,31 @@ func TestMarkForUncordonAfterRebootAddActionLabelFailure(t *testing.T) { h.Assert(t, err != nil, "Failed to return error on MarkForUncordonAfterReboot failing to add action Label") } +func TestFetchPodsNameList(t *testing.T) { + resetFlagsForTest() + + client := fake.NewSimpleClientset( + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "myPod", + Labels: map[string]string{ + "spec.nodeName": nodeName, + }, + }, + }, + &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + }, + ) + + tNode := getNode(t, getDrainHelper(client)) + podList, err := tNode.FetchPodNameList(nodeName) + h.Ok(t, err) + h.Equals(t, []string{"myPod"}, podList) +} + func TestLogPods(t *testing.T) { resetFlagsForTest() @@ -260,7 +285,7 @@ func TestLogPods(t *testing.T) { ) tNode := getNode(t, getDrainHelper(client)) - err := tNode.LogPods(nodeName) + err := tNode.LogPods([]string{"myPod"}, nodeName) h.Ok(t, err) }