From 0d5d817a1c2bdacabc9fb601ef02e4b5b705af0c Mon Sep 17 00:00:00 2001 From: Thomas Jungblut Date: Tue, 16 Sep 2025 15:41:59 +0200 Subject: [PATCH] OCPBUGS-57444: set appropriate rolling update settings Signed-off-by: Thomas Jungblut --- bindata/oauth-openshift/deployment.yaml | 3 +- .../deployment/deployment_controller.go | 25 ++++- .../deployment/deployment_controller_test.go | 101 ++++++++++++++++++ 3 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 pkg/controllers/deployment/deployment_controller_test.go diff --git a/bindata/oauth-openshift/deployment.yaml b/bindata/oauth-openshift/deployment.yaml index 3b8b9785d5..f2dda458c8 100644 --- a/bindata/oauth-openshift/deployment.yaml +++ b/bindata/oauth-openshift/deployment.yaml @@ -9,7 +9,8 @@ spec: strategy: type: RollingUpdate rollingUpdate: - maxUnavailable: 1 + # those are being adjusted for each control plane size by the deployment controller + maxUnavailable: 0 maxSurge: 0 selector: matchLabels: diff --git a/pkg/controllers/deployment/deployment_controller.go b/pkg/controllers/deployment/deployment_controller.go index 944c131e4b..2d7f576473 100644 --- a/pkg/controllers/deployment/deployment_controller.go +++ b/pkg/controllers/deployment/deployment_controller.go @@ -11,6 +11,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/informers" coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes" @@ -253,12 +254,17 @@ func (c *oauthServerDeploymentSyncer) Sync(ctx context.Context, syncContext fact return nil, false, append(errs, fmt.Errorf("unable to ensure at most one pod per node: %v", err)) } - // Set the replica count to the number of master nodes. - masterNodeCount, err := c.countNodes(expectedDeployment.Spec.Template.Spec.NodeSelector) + // Set the replica count to the number of control plane nodes. + controlPlaneCount, err := c.countNodes(expectedDeployment.Spec.Template.Spec.NodeSelector) if err != nil { - return nil, false, append(errs, fmt.Errorf("failed to determine number of master nodes: %v", err)) + return nil, false, append(errs, fmt.Errorf("failed to determine number of control plane nodes: %v", err)) } - expectedDeployment.Spec.Replicas = masterNodeCount + if controlPlaneCount == nil { + return nil, false, append(errs, fmt.Errorf("found nil control plane nodes count")) + } + + expectedDeployment.Spec.Replicas = controlPlaneCount + setRollingUpdateParameters(*controlPlaneCount, expectedDeployment) deployment, _, err := resourceapply.ApplyDeployment(ctx, c.deployments, syncContext.Recorder(), @@ -311,3 +317,14 @@ func (c *oauthServerDeploymentSyncer) getConfigResourceVersions() ([]string, err return configRVs, nil } + +// Given the control plane sizes, we adjust the max unavailable and max surge values to mimic "MinAvailable". +// We always ensure it is controlPlaneCount - 1, as this allows us to keep have at least a single replica running. +// We also set MaxSurge to always be exactly the control plane count, as this allows us to more quickly replace failing +// deployments with a new replica set. This does not clash with the pod anti affinity set above. +func setRollingUpdateParameters(controlPlaneCount int32, deployment *appsv1.Deployment) { + maxUnavailable := intstr.FromInt32(max(controlPlaneCount-1, 1)) + maxSurge := intstr.FromInt32(controlPlaneCount) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + deployment.Spec.Strategy.RollingUpdate.MaxSurge = &maxSurge +} diff --git a/pkg/controllers/deployment/deployment_controller_test.go b/pkg/controllers/deployment/deployment_controller_test.go new file mode 100644 index 0000000000..b7385a9f3d --- /dev/null +++ b/pkg/controllers/deployment/deployment_controller_test.go @@ -0,0 +1,101 @@ +package deployment + +import ( + "testing" + + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +func TestSetRollingUpdateParameters(t *testing.T) { + testCases := []struct { + name string + controlPlaneCount int32 + expectedMaxUnavailable int32 + expectedMaxSurge int32 + }{ + { + name: "single control plane node", + controlPlaneCount: 1, + expectedMaxUnavailable: 1, // max(1-1, 1) = max(0, 1) = 1 + expectedMaxSurge: 1, + }, + { + name: "two control plane nodes", + controlPlaneCount: 2, + expectedMaxUnavailable: 1, // max(2-1, 1) = max(1, 1) = 1 + expectedMaxSurge: 2, + }, + { + name: "three control plane nodes", + controlPlaneCount: 3, + expectedMaxUnavailable: 2, // max(3-1, 1) = max(2, 1) = 2 + expectedMaxSurge: 3, + }, + { + name: "four control plane nodes", + controlPlaneCount: 4, + expectedMaxUnavailable: 3, // max(4-1, 1) = max(3, 1) = 3 + expectedMaxSurge: 4, + }, + { + name: "five control plane nodes", + controlPlaneCount: 5, + expectedMaxUnavailable: 4, // max(5-1, 1) = max(4, 1) = 4 + expectedMaxSurge: 5, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create a test deployment with rolling update strategy + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-deployment", + Namespace: "test-namespace", + }, + Spec: appsv1.DeploymentSpec{ + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 0}, + MaxSurge: &intstr.IntOrString{Type: intstr.Int, IntVal: 0}, + }, + }, + }, + } + + // Call the function under test + setRollingUpdateParameters(tc.controlPlaneCount, deployment) + + // Verify MaxUnavailable is set correctly + if deployment.Spec.Strategy.RollingUpdate.MaxUnavailable == nil { + t.Errorf("MaxUnavailable should not be nil") + } else { + actualMaxUnavailable := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable.IntVal + if actualMaxUnavailable != tc.expectedMaxUnavailable { + t.Errorf("Expected MaxUnavailable to be %d, got %d", tc.expectedMaxUnavailable, actualMaxUnavailable) + } + } + + // Verify MaxSurge is set correctly + if deployment.Spec.Strategy.RollingUpdate.MaxSurge == nil { + t.Errorf("MaxSurge should not be nil") + } else { + actualMaxSurge := deployment.Spec.Strategy.RollingUpdate.MaxSurge.IntVal + if actualMaxSurge != tc.expectedMaxSurge { + t.Errorf("Expected MaxSurge to be %d, got %d", tc.expectedMaxSurge, actualMaxSurge) + } + } + + // Verify the values are of type Int (not String) + if deployment.Spec.Strategy.RollingUpdate.MaxUnavailable.Type != intstr.Int { + t.Errorf("Expected MaxUnavailable to be of type Int, got %v", deployment.Spec.Strategy.RollingUpdate.MaxUnavailable.Type) + } + if deployment.Spec.Strategy.RollingUpdate.MaxSurge.Type != intstr.Int { + t.Errorf("Expected MaxSurge to be of type Int, got %v", deployment.Spec.Strategy.RollingUpdate.MaxSurge.Type) + } + }) + } +}