From 5dd671fd2442ee84d3a151ea6c735a0a89aaeee9 Mon Sep 17 00:00:00 2001 From: aleskandro Date: Wed, 23 Jul 2025 13:32:51 +0100 Subject: [PATCH 1/2] Update cluster-api provider to use machineTemplate.status.nodeInfo for architecture-aware autoscale from zero kubernetes-sigs/cluster-api#11962 introduced the nodeInfo field for MachineTemplates. Providers can reconcile this field in the status subresource to inform the autoscaler about the architecture and operating system that the MachineTemplate's nodes will run. Previously, we have been implementing this behavior in the cluster autoscaler by leveraging the labels capacity annotation and, as a fallback, default values set in environment variables at cluster-autoscaler deployment time. With this commit, the cluster autoscaler computes the future architecture of a node with the following priority order: - Labels set in existing nodes for not-autoscale-from-zero cases - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. - Values in the status.nodeSystemInfo of MachineTemplates - Generic/default labels set in the environment of the cluster autoscaler # Conflicts: # cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go --- .../clusterapi/clusterapi_nodegroup.go | 22 +++++++-- .../clusterapi/clusterapi_unstructured.go | 46 +++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go index d68a48b89c3f..5e2a1100b77a 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go @@ -361,12 +361,17 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) { }, } + nsi := ng.scalableResource.InstanceSystemInfo() + if nsi != nil { + node.Status.NodeInfo = *nsi + } + node.Status.Capacity = capacity node.Status.Allocatable = capacity node.Status.Conditions = cloudprovider.BuildReadyConditions() node.Spec.Taints = ng.scalableResource.Taints() - node.Labels, err = ng.buildTemplateLabels(nodeName) + node.Labels, err = ng.buildTemplateLabels(nodeName, nsi) if err != nil { return nil, err } @@ -380,8 +385,19 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) { return nodeInfo, nil } -func (ng *nodegroup) buildTemplateLabels(nodeName string) (map[string]string, error) { - labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), ng.scalableResource.Labels()) +func (ng *nodegroup) buildTemplateLabels(nodeName string, nsi *corev1.NodeSystemInfo) (map[string]string, error) { + nsiLabels := make(map[string]string) + if nsi != nil { + nsiLabels[corev1.LabelArchStable] = nsi.Architecture + nsiLabels[corev1.LabelOSStable] = nsi.OperatingSystem + } + + // The order of priority is: + // - Labels set in existing nodes for not-autoscale-from-zero cases + // - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. + // - Values in the status.nodeSystemInfo of MachineTemplates + // - Generic/default labels set in the environment of the cluster autoscaler + labels := cloudprovider.JoinStringMaps(buildGenericLabels(nodeName), nsiLabels, ng.scalableResource.Labels()) nodes, err := ng.Nodes() if err != nil { diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go index e8a6cf339c8a..bbbe0cfd0898 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go @@ -23,6 +23,7 @@ import ( "path" "strconv" "strings" + "sync" "time" "github.com/pkg/errors" @@ -42,6 +43,8 @@ import ( type unstructuredScalableResource struct { controller *machineController unstructured *unstructured.Unstructured + infraObj *unstructured.Unstructured + infraMutex sync.RWMutex maxSize int minSize int autoscalingOptions map[string]string @@ -321,6 +324,17 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam return capacity, nil } +// InstanceSystemInfo sets the nodeSystemInfo from the infrastructure reference resource. +// If the infrastructure reference resource is not found, returns nil. +func (r unstructuredScalableResource) InstanceSystemInfo() *apiv1.NodeSystemInfo { + infraObj, err := r.readInfrastructureReferenceResource() + if err != nil || infraObj == nil { + return nil + } + nsiObj := systemInfoFromInfrastructureObject(infraObj) + return &nsiObj +} + func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) { var result []*resourceapi.ResourceSlice driver := r.InstanceDRADriver() @@ -390,6 +404,17 @@ func (r unstructuredScalableResource) InstanceDRADriver() string { } func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) { + // Cache w/ lazy loading of the infrastructure reference resource. + r.infraMutex.RLock() + if r.infraObj != nil { + defer r.infraMutex.RUnlock() + return r.infraObj, nil + } + r.infraMutex.RUnlock() + + r.infraMutex.Lock() + defer r.infraMutex.Unlock() + obKind := r.unstructured.GetKind() obName := r.unstructured.GetName() @@ -440,6 +465,8 @@ func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*un return nil, err } + r.infraObj = infra + return infra, nil } @@ -477,6 +504,25 @@ func resourceCapacityFromInfrastructureObject(infraobj *unstructured.Unstructure return capacity } +func systemInfoFromInfrastructureObject(infraobj *unstructured.Unstructured) apiv1.NodeSystemInfo { + nsi := apiv1.NodeSystemInfo{} + infransi, found, err := unstructured.NestedStringMap(infraobj.Object, "status", "nodeInfo") + if !found || err != nil { + return nsi + } + + for k, v := range infransi { + switch k { + case "architecture": + nsi.Architecture = v + case "operatingSystem": + nsi.OperatingSystem = v + } + } + + return nsi +} + // adapted from https://github.com/kubernetes/kubernetes/blob/release-1.25/pkg/util/taints/taints.go#L39 func parseTaint(st string) (apiv1.Taint, error) { var taint apiv1.Taint From c36027bfef7af2bc2d59e8e055be7177e8f4db23 Mon Sep 17 00:00:00 2001 From: aleskandro Date: Wed, 23 Jul 2025 13:33:19 +0100 Subject: [PATCH 2/2] [Tests] Update cluster-api provider to use machineTemplate.status.nodeInfo for architecture-aware autoscale from zero kubernetes-sigs/cluster-api#11962 introduced the nodeInfo field for MachineTemplates. Providers can reconcile this field in the status subresource to inform the autoscaler about the architecture and operating system that the MachineTemplate's nodes will run. Previously, we have been implementing this behavior in the cluster autoscaler by leveraging the labels capacity annotation and, as a fallback, default values set in environment variables at cluster-autoscaler deployment time. With this commit, the cluster autoscaler computes the future architecture of a node with the following priority order: - Labels set in existing nodes for not-autoscale-from-zero cases - Labels set in the labels capacity annotation of machine template, machine set, and machine deployment. - Values in the status.nodeSystemInfo of MachineTemplates - Generic/default labels set in the environment of the cluster autoscaler --- .../clusterapi/clusterapi_test_framework.go | 39 +++++- .../clusterapi_unstructured_test.go | 118 +++++++++++++++++- 2 files changed, 149 insertions(+), 8 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_test_framework.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_test_framework.go index caa47b2641d8..588a592281b3 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_test_framework.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_test_framework.go @@ -60,6 +60,7 @@ type testConfigBuilder struct { nodeCount int annotations map[string]string capacity map[string]string + nodeInfo map[string]string } // NewTestConfigBuilder returns a builder for dynamically constructing mock ClusterAPI resources for testing. @@ -91,6 +92,7 @@ func (b *testConfigBuilder) Build() *TestConfig { isMachineDeployment, b.annotations, b.capacity, + b.nodeInfo, )[0], )[0] } @@ -111,6 +113,7 @@ func (b *testConfigBuilder) BuildMultiple(configCount int) []*TestConfig { isMachineDeployment, b.annotations, b.capacity, + b.nodeInfo, )..., ) } @@ -171,6 +174,18 @@ func (b *testConfigBuilder) WithCapacity(c map[string]string) *testConfigBuilder return b } +func (b *testConfigBuilder) WithNodeInfo(n map[string]string) *testConfigBuilder { + if n == nil { + b.nodeInfo = nil + } else { + if b.nodeInfo == nil { + b.nodeInfo = map[string]string{} + } + maps.Insert(b.nodeInfo, maps.All(n)) + } + return b +} + // TestConfig contains clusterspecific information about a single test configuration. type TestConfig struct { spec *TestSpec @@ -290,8 +305,8 @@ func createTestConfigs(specs ...TestSpec) []*TestConfig { UID: config.machineSet.GetUID(), } - if spec.capacity != nil { - klog.V(4).Infof("adding capacity to machine template") + if spec.capacity != nil || spec.nodeInfo != nil { + klog.V(4).Infof("creating machine template") config.machineTemplate = &unstructured.Unstructured{ Object: map[string]interface{}{ "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", @@ -303,6 +318,9 @@ func createTestConfigs(specs ...TestSpec) []*TestConfig { }, }, } + } + if spec.capacity != nil { + klog.V(4).Infof("adding capacity to machine template") if err := unstructured.SetNestedStringMap(config.machineTemplate.Object, spec.capacity, "status", "capacity"); err != nil { panic(err) } @@ -310,6 +328,15 @@ func createTestConfigs(specs ...TestSpec) []*TestConfig { klog.V(4).Infof("not adding capacity") } + if spec.nodeInfo != nil { + klog.V(4).Infof("adding node info") + if err := unstructured.SetNestedStringMap(config.machineTemplate.Object, spec.nodeInfo, "status", "nodeInfo"); err != nil { + panic(err) + } + } else { + klog.V(4).Infof("not adding node info") + } + for j := 0; j < spec.nodeCount; j++ { config.nodes[j], config.machines[j] = makeLinkedNodeAndMachine(j, spec.namespace, spec.clusterName, machineOwner, machineSetLabels) } @@ -324,6 +351,7 @@ func createTestConfigs(specs ...TestSpec) []*TestConfig { type TestSpec struct { annotations map[string]string capacity map[string]string + nodeInfo map[string]string machineDeploymentName string machineSetName string machinePoolName string @@ -333,17 +361,17 @@ type TestSpec struct { rootIsMachineDeployment bool } -func createTestSpecs(namespace, clusterName, namePrefix string, scalableResourceCount, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string) []TestSpec { +func createTestSpecs(namespace, clusterName, namePrefix string, scalableResourceCount, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string, nodeInfo map[string]string) []TestSpec { var specs []TestSpec for i := 0; i < scalableResourceCount; i++ { - specs = append(specs, createTestSpec(namespace, clusterName, fmt.Sprintf("%s-%d", namePrefix, i), nodeCount, isMachineDeployment, annotations, capacity)) + specs = append(specs, createTestSpec(namespace, clusterName, fmt.Sprintf("%s-%d", namePrefix, i), nodeCount, isMachineDeployment, annotations, capacity, nodeInfo)) } return specs } -func createTestSpec(namespace, clusterName, name string, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string) TestSpec { +func createTestSpec(namespace, clusterName, name string, nodeCount int, isMachineDeployment bool, annotations map[string]string, capacity map[string]string, nodeInfo map[string]string) TestSpec { return TestSpec{ annotations: annotations, capacity: capacity, @@ -353,6 +381,7 @@ func createTestSpec(namespace, clusterName, name string, nodeCount int, isMachin namespace: namespace, nodeCount: nodeCount, rootIsMachineDeployment: isMachineDeployment, + nodeInfo: nodeInfo, } } diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go index e2bd032a445a..0749358faab2 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go @@ -33,9 +33,14 @@ import ( ) const ( - cpuStatusKey = "cpu" - memoryStatusKey = "memory" - nvidiaGpuStatusKey = "nvidia.com/gpu" + cpuStatusKey = "cpu" + memoryStatusKey = "memory" + nvidiaGpuStatusKey = "nvidia.com/gpu" + architectureStatusKey = "architecture" + operatingSystemStatusKey = "operatingSystem" + + arm64 = "arm64" + linux = "linux" ) func TestSetSize(t *testing.T) { @@ -595,3 +600,110 @@ func TestCanScaleFromZero(t *testing.T) { }) } } + +func TestInstanceSystemInfo(t *testing.T) { + // use a constant capacity as that's necessary for the business logic to consider the resource scalable + capacity := map[string]string{ + cpuStatusKey: "1", + memoryStatusKey: "4G", + } + testConfigs := []struct { + name string + nodeInfo map[string]string + expectedArch string + expectedOS string + }{ + { + "with no architecture or operating system in machine template's status' nodeInfo, the system info is empty", + map[string]string{}, + "", + "", + }, + { + "with architecture in machine template's status' nodeInfo, the system info is filled in the scalable resource", + map[string]string{ + architectureStatusKey: arm64, + }, + arm64, + "", + }, + { + "with operating system in machine template's status' nodeInfo, the system info is filled in the scalable resource", + map[string]string{ + operatingSystemStatusKey: linux, + }, + "", + linux, + }, + { + "with architecture and operating system in machine template's status' nodeInfo, the system info is filled in the scalable resource", + map[string]string{ + architectureStatusKey: arm64, + operatingSystemStatusKey: linux, + }, + arm64, + linux, + }, + } + + for _, tc := range testConfigs { + testname := fmt.Sprintf("MachineSet %s", tc.name) + t.Run(testname, func(t *testing.T) { + mdTestConfig := NewTestConfigBuilder(). + ForMachineSet(). + WithNodeCount(1). + WithCapacity(capacity). + WithNodeInfo(tc.nodeInfo). + Build() + controller := NewTestMachineController(t) + defer controller.Stop() + controller.AddTestConfigs(mdTestConfig) + + testResource := mdTestConfig.machineSet + + sr, err := newUnstructuredScalableResource(controller.machineController, testResource) + if err != nil { + t.Fatal(err) + } + + sysInfo := sr.InstanceSystemInfo() + if sysInfo.Architecture != tc.expectedArch { + t.Errorf("expected architecture %s, got %s", tc.nodeInfo[architectureStatusKey], sysInfo.Architecture) + } + if sysInfo.OperatingSystem != tc.expectedOS { + t.Errorf("expected operating system %s, got %s", tc.nodeInfo[operatingSystemStatusKey], sysInfo.OperatingSystem) + } + }) + } + + for _, tc := range testConfigs { + testname := fmt.Sprintf("MachineDeployment %s", tc.name) + t.Run(testname, func(t *testing.T) { + mdTestConfig := NewTestConfigBuilder(). + ForMachineDeployment(). + WithNodeCount(1). + WithCapacity(capacity). + WithNodeInfo(tc.nodeInfo). + Build() + controller := NewTestMachineController(t) + defer controller.Stop() + controller.AddTestConfigs(mdTestConfig) + + testResource := mdTestConfig.machineDeployment + + sr, err := newUnstructuredScalableResource(controller.machineController, testResource) + if err != nil { + t.Fatal(err) + } + + sysInfo := sr.InstanceSystemInfo() + if sysInfo.Architecture != tc.expectedArch { + t.Errorf("expected architecture %s, got %s", tc.nodeInfo[architectureStatusKey], sysInfo.Architecture) + } + + if sysInfo.OperatingSystem != tc.expectedOS { + t.Errorf("expected operating system %s, got %s", tc.nodeInfo[operatingSystemStatusKey], sysInfo.OperatingSystem) + } + }) + } +}