Skip to content

Commit 7fa3eb0

Browse files
committed
Replace and deprecate ASG-specific tags
1 parent 44f8012 commit 7fa3eb0

File tree

9 files changed

+75
-59
lines changed

9 files changed

+75
-59
lines changed

README.md

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -269,19 +269,26 @@ $ aws autoscaling put-lifecycle-hook \
269269
--role-arn <your SQS access role ARN here>
270270
```
271271

272-
#### 3. Tag the ASGs:
272+
#### 3. Tag the ASGs and Instances:
273273

274-
By default the aws-node-termination-handler will only manage terminations for ASGs tagged w/ `key=aws-node-termination-handler/managed`
274+
By default the aws-node-termination-handler will only manage terminations for instances tagged with `key=aws-node-termination-handler/managed`.
275+
The value of the key does not matter.
275276

277+
To tag ASGs and propagate the tags to your instances (recommended):
276278
```
277279
$ aws autoscaling create-or-update-tags \
278280
--tags ResourceId=my-auto-scaling-group,ResourceType=auto-scaling-group,Key=aws-node-termination-handler/managed,Value=,PropagateAtLaunch=true
279281
```
280282

281-
The value of the key does not matter.
283+
To tag an EC2 instance:
284+
```
285+
aws ec2 create-tags \
286+
--resources i-1234567890abcdef0 \
287+
--tags 'Key="aws-node-termination-handler/managed",Value='
288+
```
282289

283290
This functionality is helpful in accounts where there are ASGs that do not run kubernetes nodes or you do not want aws-node-termination-handler to manage their termination lifecycle.
284-
However, if your account is dedicated to ASGs for your kubernetes cluster, then you can turn off the ASG tag check by setting the flag `--check-asg-tag-before-draining=false` or environment variable `CHECK_ASG_TAG_BEFORE_DRAINING=false`.
291+
However, if your account is dedicated to ASGs for your kubernetes cluster, then you can turn off the ASG tag check by setting the flag `--check-tag-before-draining=false` or environment variable `CHECK_TAG_BEFORE_DRAINING=false`.
285292

286293
You can also control what resources NTH manages by adding the resource ARNs to your Amazon EventBridge rules.
287294

cmd/node-termination-handler.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ func main() {
176176
log.Debug().Msgf("AWS Credentials retrieved from provider: %s", creds.ProviderName)
177177

178178
sqsMonitor := sqsevent.SQSMonitor{
179-
CheckIfManaged: nthConfig.CheckASGTagBeforeDraining,
180-
ManagedAsgTag: nthConfig.ManagedAsgTag,
179+
CheckIfManaged: nthConfig.CheckTagBeforeDraining,
180+
ManagedTag: nthConfig.ManagedTag,
181181
QueueURL: nthConfig.QueueURL,
182182
InterruptionChan: interruptionChan,
183183
CancelChan: cancelChan,

config/helm/aws-node-termination-handler/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ The configuration in this table applies to AWS Node Termination Handler in queue
110110
| `awsRegion` | If specified, use the AWS region for AWS API calls, else NTH will try to find the region through the `AWS_REGION` environment variable, IMDS, or the specified queue URL. | `""` |
111111
| `queueURL` | Listens for messages on the specified SQS queue URL. | `""` |
112112
| `workers` | The maximum amount of parallel event processors to handle concurrent events. | `10` |
113-
| `checkASGTagBeforeDraining` | If `true`, check that the instance is tagged with the `managedAsgTag` before draining the node. If `false`, disables calls ASG API. | `true` |
114-
| `managedAsgTag` | The node tag to check if `checkASGTagBeforeDraining` is `true`. | `aws-node-termination-handler/managed` |
115-
| `useProviderId` | If `true`, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname. | `false` |
113+
| `checkTagBeforeDraining` | If `true`, check that the instance is tagged with the `managedTag` before draining the node. | `true` |
114+
| `managedTag` | The node tag to check if `checkTagBeforeDraining` is `true`. | `aws-node-termination-handler/managed` |
115+
| `useProviderId` | If `true`, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname. | `false` |
116116

117117
### IMDS Mode Configuration
118118

config/helm/aws-node-termination-handler/templates/deployment.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ spec:
8282
value: {{ .Values.enablePrometheusServer | quote }}
8383
- name: PROMETHEUS_SERVER_PORT
8484
value: {{ .Values.prometheusServerPort | quote }}
85-
- name: CHECK_ASG_TAG_BEFORE_DRAINING
86-
value: {{ .Values.checkASGTagBeforeDraining | quote }}
87-
- name: MANAGED_ASG_TAG
88-
value: {{ .Values.managedAsgTag | quote }}
85+
- name: CHECK_TAG_BEFORE_DRAINING
86+
value: {{ .Values.checkTagBeforeDraining | quote }}
87+
- name: MANAGED_TAG
88+
value: {{ .Values.managedTag | quote }}
8989
- name: USE_PROVIDER_ID
9090
value: {{ .Values.useProviderId | quote }}
9191
- name: DRY_RUN

config/helm/aws-node-termination-handler/values.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,10 @@ queueURL: ""
171171
workers: 10
172172

173173
# If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node
174-
# If false, disables calls to ASG API.
175-
checkASGTagBeforeDraining: true
174+
checkTagBeforeDraining: true
176175

177-
# The tag to ensure is on a node if checkASGTagBeforeDraining is true
178-
managedAsgTag: "aws-node-termination-handler/managed"
176+
# The tag to ensure is on a node if checkTagBeforeDraining is true
177+
managedTag: "aws-node-termination-handler/managed"
179178

180179
# If true, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname.
181180
useProviderId: false

pkg/config/config.go

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,12 @@ const (
6060
enableRebalanceDrainingDefault = false
6161
checkASGTagBeforeDrainingConfigKey = "CHECK_ASG_TAG_BEFORE_DRAINING"
6262
checkASGTagBeforeDrainingDefault = true
63+
checkTagBeforeDrainingConfigKey = "CHECK_TAG_BEFORE_DRAINING"
64+
checkTagBeforeDrainingDefault = true
6365
managedAsgTagConfigKey = "MANAGED_ASG_TAG"
66+
managedTagConfigKey = "MANAGED_TAG"
6467
managedAsgTagDefault = "aws-node-termination-handler/managed"
68+
managedTagDefault = "aws-node-termination-handler/managed"
6569
useProviderIdConfigKey = "USE_PROVIDER_ID"
6670
useProviderIdDefault = false
6771
metadataTriesConfigKey = "METADATA_TRIES"
@@ -123,7 +127,9 @@ type Config struct {
123127
EnableRebalanceMonitoring bool
124128
EnableRebalanceDraining bool
125129
CheckASGTagBeforeDraining bool
130+
CheckTagBeforeDraining bool
126131
ManagedAsgTag string
132+
ManagedTag string
127133
MetadataTries int
128134
CordonOnly bool
129135
TaintNode bool
@@ -178,8 +184,10 @@ func ParseCliArgs() (config Config, err error) {
178184
flag.BoolVar(&config.EnableSQSTerminationDraining, "enable-sqs-termination-draining", getBoolEnv(enableSQSTerminationDrainingConfigKey, enableSQSTerminationDrainingDefault), "If true, drain nodes when an SQS termination event is received")
179185
flag.BoolVar(&config.EnableRebalanceMonitoring, "enable-rebalance-monitoring", getBoolEnv(enableRebalanceMonitoringConfigKey, enableRebalanceMonitoringDefault), "If true, cordon nodes when the rebalance recommendation notice is received. If you'd like to drain the node in addition to cordoning, then also set \"enableRebalanceDraining\".")
180186
flag.BoolVar(&config.EnableRebalanceDraining, "enable-rebalance-draining", getBoolEnv(enableRebalanceDrainingConfigKey, enableRebalanceDrainingDefault), "If true, drain nodes when the rebalance recommendation notice is received")
181-
flag.BoolVar(&config.CheckASGTagBeforeDraining, "check-asg-tag-before-draining", getBoolEnv(checkASGTagBeforeDrainingConfigKey, checkASGTagBeforeDrainingDefault), "If true, check that the instance is tagged with \"aws-node-termination-handler/managed\" as the key before draining the node. If false, disables calls to ASG API.")
182-
flag.StringVar(&config.ManagedAsgTag, "managed-asg-tag", getEnv(managedAsgTagConfigKey, managedAsgTagDefault), "Sets the tag to check for on instances that is propogated from the ASG before taking action, default to aws-node-termination-handler/managed")
187+
flag.BoolVar(&config.CheckASGTagBeforeDraining, "check-asg-tag-before-draining", getBoolEnv(checkASGTagBeforeDrainingConfigKey, checkASGTagBeforeDrainingDefault), "[DEPRECATED] * Use check-tag-before-draining instead * If true, check that the instance is tagged with \"aws-node-termination-handler/managed\" as the key before draining the node. If false, disables calls to ASG API.") // austin: mark as deprecated, same as grace-period
188+
flag.BoolVar(&config.CheckTagBeforeDraining, "check-tag-before-draining", getBoolEnv(checkTagBeforeDrainingConfigKey, checkTagBeforeDrainingDefault), "If true, check that the instance is tagged with \"aws-node-termination-handler/managed\" as the key before draining the node.") // austin: mark as deprecated, same as grace-period
189+
flag.StringVar(&config.ManagedAsgTag, "managed-asg-tag", getEnv(managedAsgTagConfigKey, managedAsgTagDefault), "[DEPRECATED] * Use managed-tag instead * Sets the tag to check instances for that is propogated from the ASG before taking action, default to aws-node-termination-handler/managed") // austin: mark as deprecated, same as grace-period
190+
flag.StringVar(&config.ManagedTag, "managed-tag", getEnv(managedTagConfigKey, managedTagDefault), "Sets the tag to check instances for before taking action, default to aws-node-termination-handler/managed")
183191
flag.IntVar(&config.MetadataTries, "metadata-tries", getIntEnv(metadataTriesConfigKey, metadataTriesDefault), "The number of times to try requesting metadata. If you would like 2 retries, set metadata-tries to 3.")
184192
flag.BoolVar(&config.CordonOnly, "cordon-only", getBoolEnv(cordonOnly, false), "If true, nodes will be cordoned but not drained when an interruption event occurs.")
185193
flag.BoolVar(&config.TaintNode, "taint-node", getBoolEnv(taintNode, false), "If true, nodes will be tainted when an interruption event occurs.")
@@ -209,12 +217,26 @@ func ParseCliArgs() (config Config, err error) {
209217
config.PodTerminationGracePeriod = gracePeriod
210218
}
211219

220+
if isConfigProvided("managed-asg-tag", managedAsgTagConfigKey) && isConfigProvided("managed-tag", managedTagConfigKey) {
221+
log.Warn().Msg("Deprecated argument \"managed-asg-tag\" and the replacement argument \"managed-tag\" was provided. Using the newer argument \"managed-tag\"") // austin: check that user is expected to provide these similar to grace period
222+
} else if isConfigProvided("managed-asg-tag", managedAsgTagConfigKey) {
223+
log.Warn().Msg("Deprecated argument \"managed-asg-tag\" was provided. This argument will eventually be removed. Please switch to \"managed-tag\" instead.")
224+
config.ManagedTag = config.ManagedAsgTag
225+
}
226+
227+
if isConfigProvided("check-asg-tag-before-draining", checkASGTagBeforeDrainingConfigKey) && isConfigProvided("check-tag-before-draining", checkTagBeforeDrainingConfigKey) {
228+
log.Warn().Msg("Deprecated argument \"check-asg-tag-before-draining\" and the replacement argument \"check-tag-before-draining\" was provided. Using the newer argument \"check-tag-before-draining\"") // austin: check that user is expected to provide these similar to grace period
229+
} else if isConfigProvided("check-asg-tag-before-draining", checkASGTagBeforeDrainingConfigKey) {
230+
log.Warn().Msg("Deprecated argument \"check-asg-tag-before-draining\" was provided. This argument will eventually be removed. Please switch to \"check-tag-before-draining\" instead.")
231+
config.CheckTagBeforeDraining = config.CheckASGTagBeforeDraining
232+
}
233+
212234
switch strings.ToLower(config.LogLevel) {
213235
case "info":
214236
case "debug":
215237
case "error":
216238
default:
217-
return config, fmt.Errorf("Invalid log-level passed: %s Should be one of: info, debug, error", config.LogLevel)
239+
return config, fmt.Errorf("invalid log-level passed: %s Should be one of: info, debug, error", config.LogLevel)
218240
}
219241

220242
if config.NodeName == "" {
@@ -273,8 +295,8 @@ func (c Config) PrintJsonConfigArgs() {
273295
Str("aws_region", c.AWSRegion).
274296
Str("aws_endpoint", c.AWSEndpoint).
275297
Str("queue_url", c.QueueURL).
276-
Bool("check_asg_tag_before_draining", c.CheckASGTagBeforeDraining).
277-
Str("ManagedAsgTag", c.ManagedAsgTag).
298+
Bool("check_tag_before_draining", c.CheckTagBeforeDraining).
299+
Str("ManagedTag", c.ManagedTag).
278300
Bool("use_provider_id", c.UseProviderId).
279301
Msg("aws-node-termination-handler arguments")
280302
}
@@ -321,8 +343,8 @@ func (c Config) PrintHumanConfigArgs() {
321343
"\tkubernetes-events-extra-annotations: %s,\n"+
322344
"\taws-region: %s,\n"+
323345
"\tqueue-url: %s,\n"+
324-
"\tcheck-asg-tag-before-draining: %t,\n"+
325-
"\tmanaged-asg-tag: %s,\n"+
346+
"\tcheck-tag-before-draining: %t,\n"+
347+
"\tmanaged-tag: %s,\n"+
326348
"\tuse-provider-id: %t,\n"+
327349
"\taws-endpoint: %s,\n",
328350
c.DryRun,
@@ -358,8 +380,8 @@ func (c Config) PrintHumanConfigArgs() {
358380
c.KubernetesEventsExtraAnnotations,
359381
c.AWSRegion,
360382
c.QueueURL,
361-
c.CheckASGTagBeforeDraining,
362-
c.ManagedAsgTag,
383+
c.CheckTagBeforeDraining,
384+
c.ManagedTag,
363385
c.UseProviderId,
364386
c.AWSEndpoint,
365387
)

pkg/monitor/sqsevent/sqs-monitor.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ type SQSMonitor struct {
4848
ASG autoscalingiface.AutoScalingAPI
4949
EC2 ec2iface.EC2API
5050
CheckIfManaged bool
51-
ManagedAsgTag string
51+
ManagedTag string
5252
}
5353

5454
// InterruptionEventWrapper is a convenience wrapper for associating an interruption event with its error, if any
@@ -213,7 +213,7 @@ func (m SQSMonitor) processInterruptionEvents(interruptionEventWrappers []Interr
213213
dropMessageSuggestionCount++
214214

215215
case m.CheckIfManaged && !eventWrapper.InterruptionEvent.IsManaged:
216-
// This event isn't for an instance that is managed by this process
216+
// This event is for an instance that is not managed by this process
217217
log.Debug().Str("instance-id", eventWrapper.InterruptionEvent.InstanceID).Msg("dropping interruption event for unmanaged node")
218218
dropMessageSuggestionCount++
219219

@@ -352,7 +352,7 @@ func (m SQSMonitor) getNodeInfo(instanceID string) (*NodeInfo, error) {
352352
}
353353
}
354354

355-
if m.CheckIfManaged && nodeInfo.Tags[m.ManagedAsgTag] == "" {
355+
if m.CheckIfManaged && nodeInfo.Tags[m.ManagedTag] == "" {
356356
nodeInfo.IsManaged = false
357357
}
358358

pkg/monitor/sqsevent/sqs-monitor_internal_test.go

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ func TestGetNodeInfo_BothTags_Managed(t *testing.T) {
7676
EC2: ec2Mock,
7777
ASG: h.MockedASG{},
7878
CheckIfManaged: true,
79-
ManagedAsgTag: "aws-nth/managed",
79+
ManagedTag: "aws-nth/managed",
8080
}
8181
nodeInfo, err := monitor.getNodeInfo("i-0123456789")
8282
h.Ok(t, err)
@@ -101,7 +101,7 @@ func TestGetNodeInfo_ASGTag_ASGNotManaged(t *testing.T) {
101101
EC2: ec2Mock,
102102
ASG: asgMock,
103103
CheckIfManaged: true,
104-
ManagedAsgTag: "aws-nth/managed",
104+
ManagedTag: "aws-nth/managed",
105105
}
106106
nodeInfo, err := monitor.getNodeInfo("i-0123456789")
107107
h.Ok(t, err)
@@ -129,26 +129,20 @@ func TestGetNodeInfo_ASGTag_ASGManaged(t *testing.T) {
129129
EC2: ec2Mock,
130130
ASG: asgMock,
131131
CheckIfManaged: true,
132-
ManagedAsgTag: "aws-nth/managed",
132+
ManagedTag: "aws-nth/managed",
133133
}
134134
nodeInfo, err := monitor.getNodeInfo("i-0123456789")
135135
h.Ok(t, err)
136136
h.Equals(t, "test-asg", nodeInfo.AsgName)
137137
h.Equals(t, true, nodeInfo.IsManaged)
138138
}
139139

140-
func TestGetNodeInfo_NoASG(t *testing.T) {
140+
func TestGetNodeInfo_NoASG_Managed(t *testing.T) {
141141
ec2Mock := h.MockedEC2{
142142
DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", map[string]string{}),
143143
}
144-
asgMock := h.MockedASG{
145-
DescribeAutoScalingInstancesResp: autoscaling.DescribeAutoScalingInstancesOutput{
146-
AutoScalingInstances: []*autoscaling.InstanceDetails{},
147-
},
148-
}
149144
monitor := SQSMonitor{
150145
EC2: ec2Mock,
151-
ASG: asgMock,
152146
}
153147
nodeInfo, err := monitor.getNodeInfo("i-0123456789")
154148
h.Ok(t, err)
@@ -160,16 +154,10 @@ func TestGetNodeInfo_NoASG_NotManaged(t *testing.T) {
160154
ec2Mock := h.MockedEC2{
161155
DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", map[string]string{}),
162156
}
163-
asgMock := h.MockedASG{
164-
DescribeAutoScalingInstancesResp: autoscaling.DescribeAutoScalingInstancesOutput{
165-
AutoScalingInstances: []*autoscaling.InstanceDetails{},
166-
},
167-
}
168157
monitor := SQSMonitor{
169158
EC2: ec2Mock,
170-
ASG: asgMock,
171159
CheckIfManaged: true,
172-
ManagedAsgTag: "aws-nth/managed",
160+
ManagedTag: "aws-nth/managed",
173161
}
174162
nodeInfo, err := monitor.getNodeInfo("i-0123456789")
175163
h.Ok(t, err)
@@ -221,7 +209,7 @@ func TestGetNodeInfo_ASG_ASGManaged(t *testing.T) {
221209
EC2: ec2Mock,
222210
ASG: asgMock,
223211
CheckIfManaged: true,
224-
ManagedAsgTag: "aws-nth/managed",
212+
ManagedTag: "aws-nth/managed",
225213
}
226214
nodeInfo, err := monitor.getNodeInfo("i-0123456789")
227215
h.Ok(t, err)
@@ -248,7 +236,7 @@ func TestGetNodeInfo_ASG_ASGNotManaged(t *testing.T) {
248236
EC2: ec2Mock,
249237
ASG: asgMock,
250238
CheckIfManaged: true,
251-
ManagedAsgTag: "aws-nth/managed",
239+
ManagedTag: "aws-nth/managed",
252240
}
253241
nodeInfo, err := monitor.getNodeInfo("i-0123456789")
254242
h.Ok(t, err)
@@ -300,7 +288,7 @@ func TestGetNodeInfo_ASGTagErr(t *testing.T) {
300288
EC2: ec2Mock,
301289
ASG: asgMock,
302290
CheckIfManaged: true,
303-
ManagedAsgTag: "aws-nth/managed",
291+
ManagedTag: "aws-nth/managed",
304292
}
305293
_, err := monitor.getNodeInfo("i-0123456789")
306294
h.Nok(t, err)

0 commit comments

Comments
 (0)