Skip to content

Commit c36027b

Browse files
committed
[Tests] Update cluster-api provider to use machineTemplate.status.nodeInfo for architecture-aware autoscale from zero
kubernetes-sigs/cluster-api#11962 introduced the nodeInfo field for MachineTemplates. Providers can reconcile this field in the status subresource to inform the autoscaler about the architecture and operating system that the MachineTemplate's nodes will run. Previously, we have been implementing this behavior in the cluster autoscaler by leveraging the labels capacity annotation and, as a fallback, default values set in environment variables at cluster-autoscaler deployment time. With this commit, the cluster autoscaler computes the future architecture of a node with the following priority order: - Labels set in existing nodes for not-autoscale-from-zero cases - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. - Values in the status.nodeSystemInfo of MachineTemplates - Generic/default labels set in the environment of the cluster autoscaler
1 parent 5dd671f commit c36027b

File tree

2 files changed

+149
-8
lines changed

2 files changed

+149
-8
lines changed

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_test_framework.go

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ type testConfigBuilder struct {
6060
nodeCount int
6161
annotations map[string]string
6262
capacity map[string]string
63+
nodeInfo map[string]string
6364
}
6465

6566
// NewTestConfigBuilder returns a builder for dynamically constructing mock ClusterAPI resources for testing.
@@ -91,6 +92,7 @@ func (b *testConfigBuilder) Build() *TestConfig {
9192
isMachineDeployment,
9293
b.annotations,
9394
b.capacity,
95+
b.nodeInfo,
9496
)[0],
9597
)[0]
9698
}
@@ -111,6 +113,7 @@ func (b *testConfigBuilder) BuildMultiple(configCount int) []*TestConfig {
111113
isMachineDeployment,
112114
b.annotations,
113115
b.capacity,
116+
b.nodeInfo,
114117
)...,
115118
)
116119
}
@@ -171,6 +174,18 @@ func (b *testConfigBuilder) WithCapacity(c map[string]string) *testConfigBuilder
171174
return b
172175
}
173176

177+
func (b *testConfigBuilder) WithNodeInfo(n map[string]string) *testConfigBuilder {
178+
if n == nil {
179+
b.nodeInfo = nil
180+
} else {
181+
if b.nodeInfo == nil {
182+
b.nodeInfo = map[string]string{}
183+
}
184+
maps.Insert(b.nodeInfo, maps.All(n))
185+
}
186+
return b
187+
}
188+
174189
// TestConfig contains clusterspecific information about a single test configuration.
175190
type TestConfig struct {
176191
spec *TestSpec
@@ -290,8 +305,8 @@ func createTestConfigs(specs ...TestSpec) []*TestConfig {
290305
UID: config.machineSet.GetUID(),
291306
}
292307

293-
if spec.capacity != nil {
294-
klog.V(4).Infof("adding capacity to machine template")
308+
if spec.capacity != nil || spec.nodeInfo != nil {
309+
klog.V(4).Infof("creating machine template")
295310
config.machineTemplate = &unstructured.Unstructured{
296311
Object: map[string]interface{}{
297312
"apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1",
@@ -303,13 +318,25 @@ func createTestConfigs(specs ...TestSpec) []*TestConfig {
303318
},
304319
},
305320
}
321+
}
322+
if spec.capacity != nil {
323+
klog.V(4).Infof("adding capacity to machine template")
306324
if err := unstructured.SetNestedStringMap(config.machineTemplate.Object, spec.capacity, "status", "capacity"); err != nil {
307325
panic(err)
308326
}
309327
} else {
310328
klog.V(4).Infof("not adding capacity")
311329
}
312330

331+
if spec.nodeInfo != nil {
332+
klog.V(4).Infof("adding node info")
333+
if err := unstructured.SetNestedStringMap(config.machineTemplate.Object, spec.nodeInfo, "status", "nodeInfo"); err != nil {
334+
panic(err)
335+
}
336+
} else {
337+
klog.V(4).Infof("not adding node info")
338+
}
339+
313340
for j := 0; j < spec.nodeCount; j++ {
314341
config.nodes[j], config.machines[j] = makeLinkedNodeAndMachine(j, spec.namespace, spec.clusterName, machineOwner, machineSetLabels)
315342
}
@@ -324,6 +351,7 @@ func createTestConfigs(specs ...TestSpec) []*TestConfig {
324351
type TestSpec struct {
325352
annotations map[string]string
326353
capacity map[string]string
354+
nodeInfo map[string]string
327355
machineDeploymentName string
328356
machineSetName string
329357
machinePoolName string
@@ -333,17 +361,17 @@ type TestSpec struct {
333361
rootIsMachineDeployment bool
334362
}
335363

336-
func createTestSpecs(namespace, clusterName, namePrefix string, scalableResourceCount, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string) []TestSpec {
364+
func createTestSpecs(namespace, clusterName, namePrefix string, scalableResourceCount, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string, nodeInfo map[string]string) []TestSpec {
337365
var specs []TestSpec
338366

339367
for i := 0; i < scalableResourceCount; i++ {
340-
specs = append(specs, createTestSpec(namespace, clusterName, fmt.Sprintf("%s-%d", namePrefix, i), nodeCount, isMachineDeployment, annotations, capacity))
368+
specs = append(specs, createTestSpec(namespace, clusterName, fmt.Sprintf("%s-%d", namePrefix, i), nodeCount, isMachineDeployment, annotations, capacity, nodeInfo))
341369
}
342370

343371
return specs
344372
}
345373

346-
func createTestSpec(namespace, clusterName, name string, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string) TestSpec {
374+
func createTestSpec(namespace, clusterName, name string, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string, nodeInfo map[string]string) TestSpec {
347375
return TestSpec{
348376
annotations: annotations,
349377
capacity: capacity,
@@ -353,6 +381,7 @@ func createTestSpec(namespace, clusterName, name string, nodeCount int, isMachin
353381
namespace: namespace,
354382
nodeCount: nodeCount,
355383
rootIsMachineDeployment: isMachineDeployment,
384+
nodeInfo: nodeInfo,
356385
}
357386
}
358387

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go

Lines changed: 115 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,14 @@ import (
3333
)
3434

3535
const (
36-
cpuStatusKey = "cpu"
37-
memoryStatusKey = "memory"
38-
nvidiaGpuStatusKey = "nvidia.com/gpu"
36+
cpuStatusKey = "cpu"
37+
memoryStatusKey = "memory"
38+
nvidiaGpuStatusKey = "nvidia.com/gpu"
39+
architectureStatusKey = "architecture"
40+
operatingSystemStatusKey = "operatingSystem"
41+
42+
arm64 = "arm64"
43+
linux = "linux"
3944
)
4045

4146
func TestSetSize(t *testing.T) {
@@ -595,3 +600,110 @@ func TestCanScaleFromZero(t *testing.T) {
595600
})
596601
}
597602
}
603+
604+
func TestInstanceSystemInfo(t *testing.T) {
605+
// use a constant capacity as that's necessary for the business logic to consider the resource scalable
606+
capacity := map[string]string{
607+
cpuStatusKey: "1",
608+
memoryStatusKey: "4G",
609+
}
610+
testConfigs := []struct {
611+
name string
612+
nodeInfo map[string]string
613+
expectedArch string
614+
expectedOS string
615+
}{
616+
{
617+
"with no architecture or operating system in machine template's status' nodeInfo, the system info is empty",
618+
map[string]string{},
619+
"",
620+
"",
621+
},
622+
{
623+
"with architecture in machine template's status' nodeInfo, the system info is filled in the scalable resource",
624+
map[string]string{
625+
architectureStatusKey: arm64,
626+
},
627+
arm64,
628+
"",
629+
},
630+
{
631+
"with operating system in machine template's status' nodeInfo, the system info is filled in the scalable resource",
632+
map[string]string{
633+
operatingSystemStatusKey: linux,
634+
},
635+
"",
636+
linux,
637+
},
638+
{
639+
"with architecture and operating system in machine template's status' nodeInfo, the system info is filled in the scalable resource",
640+
map[string]string{
641+
architectureStatusKey: arm64,
642+
operatingSystemStatusKey: linux,
643+
},
644+
arm64,
645+
linux,
646+
},
647+
}
648+
649+
for _, tc := range testConfigs {
650+
testname := fmt.Sprintf("MachineSet %s", tc.name)
651+
t.Run(testname, func(t *testing.T) {
652+
mdTestConfig := NewTestConfigBuilder().
653+
ForMachineSet().
654+
WithNodeCount(1).
655+
WithCapacity(capacity).
656+
WithNodeInfo(tc.nodeInfo).
657+
Build()
658+
controller := NewTestMachineController(t)
659+
defer controller.Stop()
660+
controller.AddTestConfigs(mdTestConfig)
661+
662+
testResource := mdTestConfig.machineSet
663+
664+
sr, err := newUnstructuredScalableResource(controller.machineController, testResource)
665+
if err != nil {
666+
t.Fatal(err)
667+
}
668+
669+
sysInfo := sr.InstanceSystemInfo()
670+
if sysInfo.Architecture != tc.expectedArch {
671+
t.Errorf("expected architecture %s, got %s", tc.nodeInfo[architectureStatusKey], sysInfo.Architecture)
672+
}
673+
if sysInfo.OperatingSystem != tc.expectedOS {
674+
t.Errorf("expected operating system %s, got %s", tc.nodeInfo[operatingSystemStatusKey], sysInfo.OperatingSystem)
675+
}
676+
})
677+
}
678+
679+
for _, tc := range testConfigs {
680+
testname := fmt.Sprintf("MachineDeployment %s", tc.name)
681+
t.Run(testname, func(t *testing.T) {
682+
mdTestConfig := NewTestConfigBuilder().
683+
ForMachineDeployment().
684+
WithNodeCount(1).
685+
WithCapacity(capacity).
686+
WithNodeInfo(tc.nodeInfo).
687+
Build()
688+
controller := NewTestMachineController(t)
689+
defer controller.Stop()
690+
controller.AddTestConfigs(mdTestConfig)
691+
692+
testResource := mdTestConfig.machineDeployment
693+
694+
sr, err := newUnstructuredScalableResource(controller.machineController, testResource)
695+
if err != nil {
696+
t.Fatal(err)
697+
}
698+
699+
sysInfo := sr.InstanceSystemInfo()
700+
if sysInfo.Architecture != tc.expectedArch {
701+
t.Errorf("expected architecture %s, got %s", tc.nodeInfo[architectureStatusKey], sysInfo.Architecture)
702+
}
703+
704+
if sysInfo.OperatingSystem != tc.expectedOS {
705+
t.Errorf("expected operating system %s, got %s", tc.nodeInfo[operatingSystemStatusKey], sysInfo.OperatingSystem)
706+
}
707+
})
708+
}
709+
}

0 commit comments

Comments
 (0)