-
Notifications
You must be signed in to change notification settings - Fork 278
fix undefined log levels #402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -125,7 +125,7 @@ func main() { | |
| wait.PollImmediateUntil(2*time.Second, func() (done bool, err error) { | ||
| err = handleRebootUncordon(nthConfig.NodeName, interruptionEventStore, *node) | ||
| if err != nil { | ||
| log.Log().Err(err).Msgf("Unable to complete the uncordon after reboot workflow on startup, retrying") | ||
| log.Err(err).Msgf("Unable to complete the uncordon after reboot workflow on startup, retrying") | ||
| } | ||
| return false, nil | ||
| }, stopCh) | ||
|
|
@@ -152,7 +152,7 @@ func main() { | |
| if nthConfig.EnableSQSTerminationDraining { | ||
| creds, err := nthConfig.AWSSession.Config.Credentials.Get() | ||
| if err != nil { | ||
| log.Warn().Err(err).Msg("Unable to get AWS credentials") | ||
| log.Err(err).Msg("Unable to get AWS credentials") | ||
| } | ||
| log.Debug().Msgf("AWS Credentials retrieved from provider: %s", creds.ProviderName) | ||
|
|
||
|
|
@@ -171,13 +171,13 @@ func main() { | |
|
|
||
| for _, fn := range monitoringFns { | ||
| go func(monitor monitor.Monitor) { | ||
| log.Log().Str("event_type", monitor.Kind()).Msg("Started monitoring for events") | ||
| log.Info().Str("event_type", monitor.Kind()).Msg("Started monitoring for events") | ||
| var previousErr error | ||
| var duplicateErrCount int | ||
| for range time.Tick(time.Second * 2) { | ||
| err := monitor.Monitor() | ||
| if err != nil { | ||
| log.Log().Str("event_type", monitor.Kind()).Err(err).Msg("There was a problem monitoring for events") | ||
| log.Warn().Str("event_type", monitor.Kind()).Err(err).Msg("There was a problem monitoring for events") | ||
| metrics.ErrorEventsInc(monitor.Kind()) | ||
| if previousErr != nil && err.Error() == previousErr.Error() { | ||
| duplicateErrCount++ | ||
|
|
@@ -186,7 +186,7 @@ func main() { | |
| previousErr = err | ||
| } | ||
| if duplicateErrCount >= duplicateErrThreshold { | ||
| log.Log().Msg("Stopping NTH - Duplicate Error Threshold hit.") | ||
| log.Warn().Msg("Stopping NTH - Duplicate Error Threshold hit.") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. log.panic since we're panicking ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could, but IMHO it's clearer when they're separate. I don't think I'd expect a log to panic if I was reading and I didn't know of that weirdness. |
||
| panic(fmt.Sprintf("%v", err)) | ||
| } | ||
| } | ||
|
|
@@ -195,11 +195,11 @@ func main() { | |
| } | ||
|
|
||
| go watchForInterruptionEvents(interruptionChan, interruptionEventStore) | ||
| log.Log().Msg("Started watching for interruption events") | ||
| log.Log().Msg("Kubernetes AWS Node Termination Handler has started successfully!") | ||
| log.Info().Msg("Started watching for interruption events") | ||
| log.Info().Msg("Kubernetes AWS Node Termination Handler has started successfully!") | ||
|
|
||
| go watchForCancellationEvents(cancelChan, interruptionEventStore, node, metrics) | ||
| log.Log().Msg("Started watching for event cancellations") | ||
| log.Info().Msg("Started watching for event cancellations") | ||
|
|
||
| var wg sync.WaitGroup | ||
|
|
||
|
|
@@ -222,7 +222,7 @@ func main() { | |
| } | ||
| } | ||
| } | ||
| log.Log().Msg("AWS Node Termination Handler is shutting down") | ||
| log.Info().Msg("AWS Node Termination Handler is shutting down") | ||
| wg.Wait() | ||
| log.Debug().Msg("all event processors finished") | ||
| } | ||
|
|
@@ -260,17 +260,17 @@ func watchForCancellationEvents(cancelChan <-chan monitor.InterruptionEvent, int | |
| nodeName := interruptionEvent.NodeName | ||
| interruptionEventStore.CancelInterruptionEvent(interruptionEvent.EventID) | ||
| if interruptionEventStore.ShouldUncordonNode(nodeName) { | ||
| log.Log().Msg("Uncordoning the node due to a cancellation event") | ||
| log.Info().Msg("Uncordoning the node due to a cancellation event") | ||
| err := node.Uncordon(nodeName) | ||
| if err != nil { | ||
| log.Log().Err(err).Msg("Uncordoning the node failed") | ||
| log.Err(err).Msg("Uncordoning the node failed") | ||
| } | ||
| metrics.NodeActionsInc("uncordon", nodeName, err) | ||
|
|
||
| node.RemoveNTHLabels(nodeName) | ||
| node.RemoveNTHTaints(nodeName) | ||
| } else { | ||
| log.Log().Msg("Another interruption event is active, not uncordoning the node") | ||
| log.Info().Msg("Another interruption event is active, not uncordoning the node") | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -280,13 +280,13 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto | |
| nodeName := drainEvent.NodeName | ||
| nodeLabels, err := node.GetNodeLabels(nodeName) | ||
| if err != nil { | ||
| log.Warn().Err(err).Msgf("Unable to fetch node labels for node '%s' ", nodeName) | ||
| log.Err(err).Msgf("Unable to fetch node labels for node '%s' ", nodeName) | ||
| } | ||
| drainEvent.NodeLabels = nodeLabels | ||
| if drainEvent.PreDrainTask != nil { | ||
| err := drainEvent.PreDrainTask(*drainEvent, node) | ||
| if err != nil { | ||
| log.Log().Err(err).Msg("There was a problem executing the pre-drain task") | ||
| log.Err(err).Msg("There was a problem executing the pre-drain task") | ||
| } | ||
| metrics.NodeActionsInc("pre-drain", nodeName, err) | ||
| } | ||
|
|
@@ -295,35 +295,35 @@ func drainOrCordonIfNecessary(interruptionEventStore *interruptioneventstore.Sto | |
| err := node.Cordon(nodeName) | ||
| if err != nil { | ||
| if errors.IsNotFound(err) { | ||
| log.Warn().Err(err).Msgf("node '%s' not found in the cluster", nodeName) | ||
| log.Err(err).Msgf("node '%s' not found in the cluster", nodeName) | ||
| } else { | ||
| log.Log().Err(err).Msg("There was a problem while trying to cordon the node") | ||
| log.Err(err).Msg("There was a problem while trying to cordon the node") | ||
| os.Exit(1) | ||
| } | ||
| } else { | ||
| log.Log().Str("node_name", nodeName).Msg("Node successfully cordoned") | ||
| log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned") | ||
| podNameList, err := node.FetchPodNameList(nodeName) | ||
| if err != nil { | ||
| log.Log().Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName) | ||
| log.Err(err).Msgf("Unable to fetch running pods for node '%s' ", nodeName) | ||
| } | ||
| drainEvent.Pods = podNameList | ||
| err = node.LogPods(podNameList, nodeName) | ||
| if err != nil { | ||
| log.Log().Err(err).Msg("There was a problem while trying to log all pod names on the node") | ||
| log.Err(err).Msg("There was a problem while trying to log all pod names on the node") | ||
| } | ||
| metrics.NodeActionsInc("cordon", nodeName, err) | ||
| } | ||
| } else { | ||
| err := node.CordonAndDrain(nodeName) | ||
| if err != nil { | ||
| if errors.IsNotFound(err) { | ||
| log.Warn().Err(err).Msgf("node '%s' not found in the cluster", nodeName) | ||
| log.Err(err).Msgf("node '%s' not found in the cluster", nodeName) | ||
| } else { | ||
| log.Log().Err(err).Msg("There was a problem while trying to cordon and drain the node") | ||
| log.Err(err).Msg("There was a problem while trying to cordon and drain the node") | ||
| os.Exit(1) | ||
| } | ||
| } else { | ||
| log.Log().Str("node_name", nodeName).Msg("Node successfully cordoned and drained") | ||
| log.Info().Str("node_name", nodeName).Msg("Node successfully cordoned and drained") | ||
| metrics.NodeActionsInc("cordon-and-drain", nodeName, err) | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -223,7 +223,7 @@ func (e *Service) Request(contextPath string) (*http.Response, error) { | |
| if err != nil { | ||
| e.v2Token = "" | ||
| e.tokenTTL = -1 | ||
| log.Log().Err(err).Msg("Unable to retrieve an IMDSv2 token, continuing with IMDSv1") | ||
| log.Warn().Msgf("Unable to retrieve an IMDSv2 token, continuing with IMDSv1, %v", err) | ||
| } else { | ||
| e.v2Token = token | ||
| e.tokenTTL = ttl | ||
|
|
@@ -267,7 +267,7 @@ func (e *Service) getV2Token() (string, int, error) { | |
| httpReq := func() (*http.Response, error) { | ||
| return e.httpClient.Do(req) | ||
| } | ||
| log.Log().Msg("Trying to get token from IMDSv2") | ||
| log.Debug().Msg("Trying to get token from IMDSv2") | ||
| resp, err := retry(1, 2*time.Second, httpReq) | ||
| if err != nil { | ||
| return "", -1, err | ||
|
|
@@ -284,7 +284,7 @@ func (e *Service) getV2Token() (string, int, error) { | |
| if err != nil { | ||
| return "", -1, fmt.Errorf("IMDS v2 Token TTL header not sent in response: %w", err) | ||
| } | ||
| log.Log().Msg("Got token from IMDSv2") | ||
| log.Debug().Msg("Got token from IMDSv2") | ||
| return string(token), ttl, nil | ||
| } | ||
|
|
||
|
|
@@ -307,8 +307,7 @@ func retry(attempts int, sleep time.Duration, httpReq func() (*http.Response, er | |
| jitter := time.Duration(rand.Int63n(int64(sleep))) | ||
| sleep = sleep + jitter/2 | ||
|
|
||
| log.Log().Msgf("Request failed. Attempts remaining: %d", attempts) | ||
| log.Log().Msgf("Sleep for %s seconds", sleep) | ||
| log.Warn().Msgf("Request failed. Attempts remaining: %d, sleeping for %s seconds", attempts, sleep) | ||
| time.Sleep(sleep) | ||
| return retry(attempts, 2*sleep, httpReq) | ||
| } | ||
|
|
@@ -322,12 +321,12 @@ func (e *Service) GetNodeMetadata() NodeMetadata { | |
| var metadata NodeMetadata | ||
| identityDoc, err := e.GetMetadataInfo(IdentityDocPath) | ||
| if err != nil { | ||
| log.Log().Err(err).Msg("Unable to fetch metadata from IMDS") | ||
| log.Err(err).Msg("Unable to fetch metadata from IMDS") | ||
| return metadata | ||
| } | ||
| err = json.NewDecoder(strings.NewReader(identityDoc)).Decode(&metadata) | ||
| if err != nil { | ||
| log.Log().Msg("Unable to fetch instance identity document from ec2 metadata") | ||
| log.Warn().Msg("Unable to fetch instance identity document from ec2 metadata") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we can make a strict rule like that. We have a fallback for this error, so I think it can stay a Warn.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I agree that all loggin within err != nil should be error level, but we should come up with a consistent understanding of what these levels mean
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed the rule can't be that strict, but if having a fallback doesn't classify as error, then what about:
v1 is the fallback in this case? Are there any formal docs for log-levels similar to semantic versioning or something? Agreed we need some kinda consistency
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I changed that one to WARN
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think there's anything formal for log levels... I think we probably log way too much in NTH, so maybe we should consider what actually makes sense to log so that someone can view the logs and be confident things are working and have enough information to debug certain error scenarios. |
||
| metadata.InstanceID, _ = e.GetMetadataInfo(InstanceIDPath) | ||
| metadata.InstanceType, _ = e.GetMetadataInfo(InstanceTypePath) | ||
| metadata.LocalIP, _ = e.GetMetadataInfo(LocalIPPath) | ||
|
|
@@ -340,7 +339,7 @@ func (e *Service) GetNodeMetadata() NodeMetadata { | |
| metadata.PublicIP, _ = e.GetMetadataInfo(PublicIPPath) | ||
| metadata.LocalHostname, _ = e.GetMetadataInfo(LocalHostnamePath) | ||
|
|
||
| log.Log().Interface("metadata", metadata).Msg("Startup Metadata Retrieved") | ||
| log.Info().Interface("metadata", metadata).Msg("Startup Metadata Retrieved") | ||
|
|
||
| return metadata | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.