Skip to content

Commit 4a37023

Browse files
authored
fix undefined log levels (#402)
* fix undefined log levels * fix flaky test * fix wrong levels in some places * error to warn ec2 metadata
1 parent 5896712 commit 4a37023

File tree

13 files changed

+116
-113
lines changed

13 files changed

+116
-113
lines changed

cmd/node-termination-handler.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ func main() {
125125
wait.PollImmediateUntil(2*time.Second, func() (done bool, err error) {
126126
err = handleRebootUncordon(nthConfig.NodeName, interruptionEventStore, *node)
127127
if err != nil {
128-
log.Log().Err(err).Msgf("Unable to complete the uncordon after reboot workflow on startup, retrying")
128+
log.Err(err).Msgf("Unable to complete the uncordon after reboot workflow on startup, retrying")
129129
}
130130
return false, nil
131131
}, stopCh)
@@ -152,7 +152,7 @@ func main() {
152152
if nthConfig.EnableSQSTerminationDraining {
153153
creds, err := nthConfig.AWSSession.Config.Credentials.Get()
154154
if err != nil {
155-
log.Warn().Err(err).Msg("Unable to get AWS credentials")
155+
log.Err(err).Msg("Unable to get AWS credentials")
156156
}
157157
log.Debug().Msgf("AWS Credentials retrieved from provider: %s", creds.ProviderName)
158158

@@ -171,13 +171,13 @@ func main() {
171171

172172
for _, fn := range monitoringFns {
173173
go func(monitor monitor.Monitor) {
174-
log.Log().Str("event_type", monitor.Kind()).Msg("Started monitoring for events")
174+
log.Info().Str("event_type", monitor.Kind()).Msg("Started monitoring for events")
175175
var previousErr error
176176
var duplicateErrCount int
177177
for range time.Tick(time.Second * 2) {
178178
err := monitor.Monitor()
179179
if err != nil {
180-
log.Log().Str("event_type", monitor.Kind()).Err(err).Msg("There was a problem monitoring for events")
180+
log.Warn().Str("event_type", monitor.Kind()).Err(err).Msg("There was a problem monitoring for events")
181181
metrics.ErrorEventsInc(monitor.Kind())
182182
if previousErr != nil && err.Error() == previousErr.Error() {
183183
duplicateErrCount++
@@ -186,7 +186,7 @@ func main() {
186186
previousErr = err
187187
}
188188
if duplicateErrCount >= duplicateErrThreshold {
189-
log.Log().Msg("Stopping NTH - Duplicate Error Threshold hit.")
189+
log.Warn().Msg("Stopping NTH - Duplicate Error Threshold hit.")
190190
panic(fmt.Sprintf("%v", err))
191191
}
192192
}
@@ -195,11 +195,11 @@ func main() {
195195
}
196196

197197
go watchForInterruptionEvents(interruptionChan, interruptionEventStore)
198-
log.Log().Msg("Started watching for interruption events")
199-
log.Log().Msg("Kubernetes AWS Node Termination Handler has started successfully!")
198+
log.Info().Msg("Started watching for interruption events")
199+
log.Info().Msg("Kubernetes AWS Node Termination Handler has started successfully!")
200200

201201
go watchForCancellationEvents(cancelChan, interruptionEventStore, node, metrics)
202-
log.Log().Msg("Started watching for event cancellations")
202+
log.Info().Msg("Started watching for event cancellations")
203203

204204
var wg sync.WaitGroup
205205

@@ -222,7 +222,7 @@ func main() {
222222
}
223223
}
224224
}
225-
log.Log().Msg("AWS Node Termination Handler is shutting down")
225+
log.Info().Msg("AWS Node Termination Handler is shutting down")
226226
wg.Wait()
227227
log.Debug().Msg("all event processors finished")
228228
}
@@ -260,17 +260,17 @@ func watchForCancellationEvents(cancelChan <-chan monitor.InterruptionEvent, int
260260
nodeName := interruptionEvent.NodeName
261261
interruptionEventStore.CancelInterruptionEvent(interruptionEvent.EventID)
262262
if interruptionEventStore.ShouldUncordonNode(nodeName) {
263-
log.Log().Msg("Uncordoning the node due to a cancellation event")
263+
log.Info().Msg("Uncordoning the node due to a cancellation event")
264264
err := node.Uncordon(nodeName)
265265
if err != nil {
266-
log.Log().Err(err).Msg("Uncordoning the node failed")
266+
log.Err(err).Msg("Uncordoning the node failed")
267267
}
268268
metrics.NodeActionsInc("uncordon", nodeName, err)
269269

270270
node.RemoveNTHLabels(nodeName)
271271
node.RemoveNTHTaints(nodeName)
272272
} else {
273-
log.Log().Msg("Another interruption event is active, not uncordoning the node")
273+
log.Info().Msg("Another interruption event is active, not uncordoning the node")
274274
}
275275
}
276276
}
@@ -280,13 +280,13 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto
280280
nodeName := drainEvent.NodeName
281281
nodeLabels, err := node.GetNodeLabels(nodeName)
282282
if err != nil {
283-
log.Warn().Err(err).Msgf("Unable to fetch node labels for node '%s' ", nodeName)
283+
log.Err(err).Msgf("Unable to fetch node labels for node '%s' ", nodeName)
284284
}
285285
drainEvent.NodeLabels = nodeLabels
286286
if drainEvent.PreDrainTask != nil {
287287
err := drainEvent.PreDrainTask(*drainEvent, node)
288288
if err != nil {
289-
log.Log().Err(err).Msg("There was a problem executing the pre-drain task")
289+
log.Err(err).Msg("There was a problem executing the pre-drain task")
290290
}
291291
metrics.NodeActionsInc("pre-drain", nodeName, err)
292292
}
@@ -295,35 +295,35 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto
295295
err := node.Cordon(nodeName)
296296
if err != nil {
297297
if errors.IsNotFound(err) {
298-
log.Warn().Err(err).Msgf("node '%s' not found in the cluster", nodeName)
298+
log.Err(err).Msgf("node '%s' not found in the cluster", nodeName)
299299
} else {
300-
log.Log().Err(err).Msg("There was a problem while trying to cordon the node")
300+
log.Err(err).Msg("There was a problem while trying to cordon the node")
301301
os.Exit(1)
302302
}
303303
} else {
304-
log.Log().Str("node_name", nodeName).Msg("Node successfully cordoned")
304+
log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned")
305305
podNameList, err := node.FetchPodNameList(nodeName)
306306
if err != nil {
307-
log.Log().Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName)
307+
log.Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName)
308308
}
309309
drainEvent.Pods = podNameList
310310
err = node.LogPods(podNameList, nodeName)
311311
if err != nil {
312-
log.Log().Err(err).Msg("There was a problem while trying to log all pod names on the node")
312+
log.Err(err).Msg("There was a problem while trying to log all pod names on the node")
313313
}
314314
metrics.NodeActionsInc("cordon", nodeName, err)
315315
}
316316
} else {
317317
err := node.CordonAndDrain(nodeName)
318318
if err != nil {
319319
if errors.IsNotFound(err) {
320-
log.Warn().Err(err).Msgf("node '%s' not found in the cluster", nodeName)
320+
log.Err(err).Msgf("node '%s' not found in the cluster", nodeName)
321321
} else {
322-
log.Log().Err(err).Msg("There was a problem while trying to cordon and drain the node")
322+
log.Err(err).Msg("There was a problem while trying to cordon and drain the node")
323323
os.Exit(1)
324324
}
325325
} else {
326-
log.Log().Str("node_name", nodeName).Msg("Node successfully cordoned and drained")
326+
log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned and drained")
327327
metrics.NodeActionsInc("cordon-and-drain", nodeName, err)
328328
}
329329
}

pkg/config/config.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,9 @@ func ParseCliArgs() (config Config, err error) {
205205
}
206206

207207
if isConfigProvided("pod-termination-grace-period", podTerminationGracePeriodConfigKey) && isConfigProvided("grace-period", gracePeriodConfigKey) {
208-
log.Log().Msg("Deprecated argument \"grace-period\" and the replacement argument \"pod-termination-grace-period\" was provided. Using the newer argument \"pod-termination-grace-period\"")
208+
log.Warn().Msg("Deprecated argument \"grace-period\" and the replacement argument \"pod-termination-grace-period\" was provided. Using the newer argument \"pod-termination-grace-period\"")
209209
} else if isConfigProvided("grace-period", gracePeriodConfigKey) {
210-
log.Log().Msg("Deprecated argument \"grace-period\" was provided. This argument will eventually be removed. Please switch to \"pod-termination-grace-period\" instead.")
210+
log.Warn().Msg("Deprecated argument \"grace-period\" was provided. This argument will eventually be removed. Please switch to \"pod-termination-grace-period\" instead.")
211211
config.PodTerminationGracePeriod = gracePeriod
212212
}
213213

@@ -243,7 +243,7 @@ func (c Config) Print() {
243243
func (c Config) PrintJsonConfigArgs() {
244244
// manually setting fields instead of using log.Log().Interface() to use snake_case instead of PascalCase
245245
// intentionally did not log webhook configuration as there may be secrets
246-
log.Log().
246+
log.Info().
247247
Bool("dry_run", c.DryRun).
248248
Str("node_name", c.NodeName).
249249
Str("metadata_url", c.MetadataURL).
@@ -282,7 +282,7 @@ func (c Config) PrintHumanConfigArgs() {
282282
webhookURLDisplay = "<provided-not-displayed>"
283283
}
284284
// intentionally did not log webhook configuration as there may be secrets
285-
log.Log().Msgf(
285+
log.Info().Msgf(
286286
"aws-node-termination-handler arguments: \n"+
287287
"\tdry-run: %t,\n"+
288288
"\tnode-name: %s,\n"+

pkg/ec2metadata/ec2metadata.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ func (e *Service) Request(contextPath string) (*http.Response, error) {
223223
if err != nil {
224224
e.v2Token = ""
225225
e.tokenTTL = -1
226-
log.Log().Err(err).Msg("Unable to retrieve an IMDSv2 token, continuing with IMDSv1")
226+
log.Warn().Msgf("Unable to retrieve an IMDSv2 token, continuing with IMDSv1, %v", err)
227227
} else {
228228
e.v2Token = token
229229
e.tokenTTL = ttl
@@ -267,7 +267,7 @@ func (e *Service) getV2Token() (string, int, error) {
267267
httpReq := func() (*http.Response, error) {
268268
return e.httpClient.Do(req)
269269
}
270-
log.Log().Msg("Trying to get token from IMDSv2")
270+
log.Debug().Msg("Trying to get token from IMDSv2")
271271
resp, err := retry(1, 2*time.Second, httpReq)
272272
if err != nil {
273273
return "", -1, err
@@ -284,7 +284,7 @@ func (e *Service) getV2Token() (string, int, error) {
284284
if err != nil {
285285
return "", -1, fmt.Errorf("IMDS v2 Token TTL header not sent in response: %w", err)
286286
}
287-
log.Log().Msg("Got token from IMDSv2")
287+
log.Debug().Msg("Got token from IMDSv2")
288288
return string(token), ttl, nil
289289
}
290290

@@ -307,8 +307,7 @@ func retry(attempts int, sleep time.Duration, httpReq func() (*http.Response, er
307307
jitter := time.Duration(rand.Int63n(int64(sleep)))
308308
sleep = sleep + jitter/2
309309

310-
log.Log().Msgf("Request failed. Attempts remaining: %d", attempts)
311-
log.Log().Msgf("Sleep for %s seconds", sleep)
310+
log.Warn().Msgf("Request failed. Attempts remaining: %d, sleeping for %s seconds", attempts, sleep)
312311
time.Sleep(sleep)
313312
return retry(attempts, 2*sleep, httpReq)
314313
}
@@ -322,12 +321,12 @@ func (e *Service) GetNodeMetadata() NodeMetadata {
322321
var metadata NodeMetadata
323322
identityDoc, err := e.GetMetadataInfo(IdentityDocPath)
324323
if err != nil {
325-
log.Log().Err(err).Msg("Unable to fetch metadata from IMDS")
324+
log.Err(err).Msg("Unable to fetch metadata from IMDS")
326325
return metadata
327326
}
328327
err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata)
329328
if err != nil {
330-
log.Log().Msg("Unable to fetch instance identity document from ec2 metadata")
329+
log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata")
331330
metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath)
332331
metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath)
333332
metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath)
@@ -340,7 +339,7 @@ func (e *Service) GetNodeMetadata() NodeMetadata {
340339
metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath)
341340
metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath)
342341

343-
log.Log().Interface("metadata", metadata).Msg("Startup Metadata Retrieved")
342+
log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved")
344343

345344
return metadata
346345
}

pkg/interruptioneventstore/interruption-event-store.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ func (s *Store) AddInterruptionEvent(interruptionEvent *monitor.InterruptionEven
6161

6262
s.Lock()
6363
defer s.Unlock()
64-
log.Log().Interface("event", interruptionEvent).Msg("Adding new event to the event store")
64+
log.Info().Interface("event", interruptionEvent).Msg("Adding new event to the event store")
6565
s.interruptionEventStore[interruptionEvent.EventID] = interruptionEvent
6666
if _, ignored := s.ignoredEvents[interruptionEvent.EventID]; !ignored {
6767
s.atLeastOneEvent = true

pkg/monitor/scheduledevent/scheduled-event-monitor.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ func (m ScheduledEventMonitor) checkForScheduledEvents() ([]monitor.Interruption
9696
notAfter, err = time.Parse(scheduledEventDateFormat, scheduledEvent.NotAfter)
9797
if err != nil {
9898
notAfter = notBefore
99-
log.Log().Err(err).Msg("Unable to parse scheduled event end time, continuing")
99+
log.Err(err).Msg("Unable to parse scheduled event end time, continuing")
100100
}
101101
}
102102
events = append(events, monitor.InterruptionEvent{
@@ -128,7 +128,7 @@ func uncordonAfterRebootPreDrain(interruptionEvent monitor.InterruptionEvent, n
128128
// if the node is already marked as unschedulable, then don't do anything
129129
unschedulable, err := n.IsUnschedulable(nodeName)
130130
if err == nil && unschedulable {
131-
log.Log().Msg("Node is already marked unschedulable, not taking any action to add uncordon label.")
131+
log.Debug().Msg("Node is already marked unschedulable, not taking any action to add uncordon label.")
132132
return nil
133133
} else if err != nil {
134134
return fmt.Errorf("Encountered an error while checking if the node is unschedulable. Not setting an uncordon label: %w", err)
@@ -137,7 +137,7 @@ func uncordonAfterRebootPreDrain(interruptionEvent monitor.InterruptionEvent, n
137137
if err != nil {
138138
return fmt.Errorf("Unable to mark the node for uncordon: %w", err)
139139
}
140-
log.Log().Msg("Successfully applied uncordon after reboot action label to node.")
140+
log.Info().Msg("Successfully applied uncordon after reboot action label to node.")
141141
return nil
142142
}
143143

pkg/monitor/sqsevent/asg-lifecycle-event.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ func (m SQSMonitor) asgTerminationToInterruptionEvent(event EventBridgeEvent, me
105105
interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error {
106106
err := n.TaintASGLifecycleTermination(interruptionEvent.NodeName, interruptionEvent.EventID)
107107
if err != nil {
108-
log.Warn().Err(err).Msgf("Unable to taint node with taint %s:%s", node.ASGLifecycleTerminationTaint, interruptionEvent.EventID)
108+
log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.ASGLifecycleTerminationTaint, interruptionEvent.EventID)
109109
}
110110
return nil
111111
}

pkg/monitor/sqsevent/rebalance-recommendation-event.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ func (m SQSMonitor) rebalanceRecommendationToInterruptionEvent(event EventBridge
7878
interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error {
7979
err := n.TaintRebalanceRecommendation(interruptionEvent.NodeName, interruptionEvent.EventID)
8080
if err != nil {
81-
log.Warn().Err(err).Msgf("Unable to taint node with taint %s:%s", node.RebalanceRecommendationTaint, interruptionEvent.EventID)
81+
log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.RebalanceRecommendationTaint, interruptionEvent.EventID)
8282
}
8383
return nil
8484
}

pkg/monitor/sqsevent/spot-itn-event.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func (m SQSMonitor) spotITNTerminationToInterruptionEvent(event EventBridgeEvent
7979
interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error {
8080
err := n.TaintSpotItn(interruptionEvent.NodeName, interruptionEvent.EventID)
8181
if err != nil {
82-
log.Warn().Err(err).Msgf("Unable to taint node with taint %s:%s", node.SpotInterruptionTaint, interruptionEvent.EventID)
82+
log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.SpotInterruptionTaint, interruptionEvent.EventID)
8383
}
8484
return nil
8585
}

pkg/monitor/sqsevent/sqs-monitor.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,16 @@ func (m SQSMonitor) Monitor() error {
6868
switch {
6969
case errors.Is(err, ErrNodeStateNotRunning):
7070
// If the node is no longer running, just log and delete the message. If message deletion fails, count it as an error.
71-
log.Warn().Err(err).Msg("dropping event for an already terminated node")
71+
log.Err(err).Msg("dropping event for an already terminated node")
7272
errs := m.deleteMessages([]*sqs.Message{message})
7373
if len(errs) > 0 {
74-
log.Warn().Err(errs[0]).Msg("error deleting event for already terminated node")
74+
log.Err(errs[0]).Msg("error deleting event for already terminated node")
7575
failedEvents++
7676
}
7777

7878
case err != nil:
7979
// Log errors and record as failed events
80-
log.Warn().Err(err).Msg("ignoring event due to error")
80+
log.Err(err).Msg("ignoring event due to error")
8181
failedEvents++
8282

8383
case err == nil && interruptionEvent != nil && interruptionEvent.Kind == SQSTerminateKind:
@@ -176,7 +176,7 @@ func (m SQSMonitor) deleteMessages(messages []*sqs.Message) []error {
176176
if err != nil {
177177
errs = append(errs, err)
178178
}
179-
log.Log().Msgf("SQS Deleted Message: %s", message)
179+
log.Debug().Msgf("SQS Deleted Message: %s", message)
180180
}
181181
return errs
182182
}

0 commit comments

Comments
 (0)