From adb1bfcd8738aac12b6915e5864241f4facb4ac3 Mon Sep 17 00:00:00 2001 From: Arvinderpal Wander Date: Wed, 10 Feb 2021 15:24:25 -0800 Subject: [PATCH] This commits adds the Available, Progressing and Failure conditions to MachineDeployment. --- api/v1alpha3/zz_generated.conversion.go | 6 +- api/v1alpha4/condition_consts.go | 31 ++ api/v1alpha4/machinedeployment_types.go | 12 + api/v1alpha4/zz_generated.deepcopy.go | 9 +- .../cluster.x-k8s.io_machinedeployments.yaml | 29 ++ .../machinedeployment_controller_test.go | 18 ++ controllers/machinedeployment_rolling.go | 4 +- controllers/machinedeployment_sync.go | 128 ++++++++- controllers/machinedeployment_sync_test.go | 268 +++++++++++++++--- controllers/mdutil/util.go | 81 ++++++ 10 files changed, 533 insertions(+), 53 deletions(-) diff --git a/api/v1alpha3/zz_generated.conversion.go b/api/v1alpha3/zz_generated.conversion.go index 0f00332f99b7..0a97b30da762 100644 --- a/api/v1alpha3/zz_generated.conversion.go +++ b/api/v1alpha3/zz_generated.conversion.go @@ -812,14 +812,10 @@ func autoConvert_v1alpha4_MachineDeploymentStatus_To_v1alpha3_MachineDeploymentS out.AvailableReplicas = in.AvailableReplicas out.UnavailableReplicas = in.UnavailableReplicas out.Phase = in.Phase + // WARNING: in.Conditions requires manual conversion: does not exist in peer-type return nil } -// Convert_v1alpha4_MachineDeploymentStatus_To_v1alpha3_MachineDeploymentStatus is an autogenerated conversion function. -func Convert_v1alpha4_MachineDeploymentStatus_To_v1alpha3_MachineDeploymentStatus(in *v1alpha4.MachineDeploymentStatus, out *MachineDeploymentStatus, s conversion.Scope) error { - return autoConvert_v1alpha4_MachineDeploymentStatus_To_v1alpha3_MachineDeploymentStatus(in, out, s) -} - func autoConvert_v1alpha3_MachineDeploymentStrategy_To_v1alpha4_MachineDeploymentStrategy(in *MachineDeploymentStrategy, out *v1alpha4.MachineDeploymentStrategy, s conversion.Scope) error { out.Type = v1alpha4.MachineDeploymentStrategyType(in.Type) if in.RollingUpdate != nil { diff --git a/api/v1alpha4/condition_consts.go b/api/v1alpha4/condition_consts.go index 7aca8adc6167..627e6daca03b 100644 --- a/api/v1alpha4/condition_consts.go +++ b/api/v1alpha4/condition_consts.go @@ -184,3 +184,34 @@ const ( // from making any further remediations. TooManyUnhealthyReason = "TooManyUnhealthy" ) + +// These are valid conditions of a MachineDeployment. +const ( + // MachineDeploymentReady reports an aggregate of current status of the machines controlled by the MachineDeployment. + MachineDeploymentReadyCondition ConditionType = "MachineDeploymentReady" + // Available means the MachineDeployment is available, ie. at least the minimum available + // machines required are up and running for at least minReadySeconds. + MachineDeploymentAvailable ConditionType = "MachineDeploymentAvailable" + // Progressing means the MachineDeployment is progressing. Progress for a MachineDeployment is + // considered when a new machine set is created or adopted, and when new machine scale + // up or old machine scale down. Progress is not estimated for paused deployments or + // when progressDeadlineSeconds is not specified. + MachineDeploymentProgressing ConditionType = "MachineDeploymentProgressing" + + // Reasons for deployment conditions + // + // Progressing: + // NewMSAvailableReason is added in a deployment when its newest machine set is made available + NewMSAvailableReason = "NewMachineSetAvailable" + // TimedOutReason is added in a deployment when its newest machine set fails to show any progress + // within the given deadline (progressDeadlineSeconds). + TimedOutReason = "ProgressDeadlineExceeded" + // MachineSetUpdatedReason is added in a deployment when one of its machine sets is updated as part + // of the rollout process. + MachineSetUpdatedReason = "MachineSetUpdated" + // + // Available: + + // MinimumMachinesAvailable is added in a deployment when it has its minimum machines required available. + MinimumMachinesAvailable = "MinimumMachinesAvailable" +) diff --git a/api/v1alpha4/machinedeployment_types.go b/api/v1alpha4/machinedeployment_types.go index 73eeae9aeada..40837099185c 100644 --- a/api/v1alpha4/machinedeployment_types.go +++ b/api/v1alpha4/machinedeployment_types.go @@ -204,6 +204,10 @@ type MachineDeploymentStatus struct { // Phase represents the current phase of a MachineDeployment (ScalingUp, ScalingDown, Running, Failed, or Unknown). // +optional Phase string `json:"phase,omitempty"` + + // Conditions defines current service state of the MachineDeployment. + // +optional + Conditions Conditions `json:"conditions,omitempty"` } // ANCHOR_END: MachineDeploymentStatus @@ -280,3 +284,11 @@ type MachineDeploymentList struct { func init() { SchemeBuilder.Register(&MachineDeployment{}, &MachineDeploymentList{}) } + +func (in *MachineDeployment) GetConditions() Conditions { + return in.Status.Conditions +} + +func (in *MachineDeployment) SetConditions(conditions Conditions) { + in.Status.Conditions = conditions +} diff --git a/api/v1alpha4/zz_generated.deepcopy.go b/api/v1alpha4/zz_generated.deepcopy.go index 1dd174f68a4d..c6996d6079d3 100644 --- a/api/v1alpha4/zz_generated.deepcopy.go +++ b/api/v1alpha4/zz_generated.deepcopy.go @@ -374,7 +374,7 @@ func (in *MachineDeployment) DeepCopyInto(out *MachineDeployment) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeployment. @@ -472,6 +472,13 @@ func (in *MachineDeploymentSpec) DeepCopy() *MachineDeploymentSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MachineDeploymentStatus) DeepCopyInto(out *MachineDeploymentStatus) { *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineDeploymentStatus. diff --git a/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml b/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml index 7a938dfa9ac8..de328f620ed8 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml @@ -593,6 +593,35 @@ spec: description: Total number of available machines (ready for at least minReadySeconds) targeted by this deployment. format: int32 type: integer + conditions: + description: Conditions defines current service state of the MachineDeployment. + items: + description: Condition defines an observation of a Cluster API resource operational state. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status to another. This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about the transition. This field may be empty. + type: string + reason: + description: The reason for the condition's last transition in CamelCase. The specific API may choose whether or not this field is considered a guaranteed API. This field may not be empty. + type: string + severity: + description: Severity provides an explicit classification of Reason code, so the users or machines can immediately understand the current situation and act accordingly. The Severity field MUST be set only when Status=False. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase or in foo.example.com/CamelCase. Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be useful (see .node.status.conditions), the ability to deconflict is important. + type: string + required: + - status + - type + type: object + type: array observedGeneration: description: The generation observed by the deployment controller. format: int64 diff --git a/controllers/machinedeployment_controller_test.go b/controllers/machinedeployment_controller_test.go index 91a764eeff30..9c1d5dea9e50 100644 --- a/controllers/machinedeployment_controller_test.go +++ b/controllers/machinedeployment_controller_test.go @@ -389,6 +389,24 @@ var _ = Describe("MachineDeployment Reconciler", func() { return len(machineSets.Items) }, timeout*5).Should(BeEquivalentTo(0)) + By("Verifying MachineDeployment has correct Conditions") + Eventually(func() bool { + key := client.ObjectKey{Name: deployment.Name, Namespace: deployment.Namespace} + if err := testEnv.Get(ctx, key, deployment); err != nil { + return false + } + var availableCond, progressingCond bool + for _, c := range deployment.Status.Conditions { + if c.Type == clusterv1.MachineDeploymentAvailable && c.Status == corev1.ConditionTrue && c.Reason == clusterv1.MinimumMachinesAvailable { + availableCond = true + } + if c.Type == clusterv1.MachineDeploymentProgressing && c.Status == corev1.ConditionTrue && c.Reason == clusterv1.NewMSAvailableReason { + progressingCond = true + } + } + return availableCond && progressingCond + }, timeout).Should(BeTrue()) + // Validate that the controller set the cluster name label in selector. Expect(deployment.Status.Selector).To(ContainSubstring(testCluster.Name)) }) diff --git a/controllers/machinedeployment_rolling.go b/controllers/machinedeployment_rolling.go index dc96d383d4f6..555f520a4717 100644 --- a/controllers/machinedeployment_rolling.go +++ b/controllers/machinedeployment_rolling.go @@ -48,7 +48,7 @@ func (r *MachineDeploymentReconciler) rolloutRolling(ctx context.Context, d *clu return err } - if err := r.syncDeploymentStatus(allMSs, newMS, d); err != nil { + if err := r.syncDeploymentStatus(ctx, allMSs, newMS, d); err != nil { return err } @@ -57,7 +57,7 @@ func (r *MachineDeploymentReconciler) rolloutRolling(ctx context.Context, d *clu return err } - if err := r.syncDeploymentStatus(allMSs, newMS, d); err != nil { + if err := r.syncDeploymentStatus(ctx, allMSs, newMS, d); err != nil { return err } diff --git a/controllers/machinedeployment_sync.go b/controllers/machinedeployment_sync.go index 1a06dc7adebf..4ecd938f56fb 100644 --- a/controllers/machinedeployment_sync.go +++ b/controllers/machinedeployment_sync.go @@ -32,6 +32,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/controllers/mdutil" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -55,7 +56,7 @@ func (r *MachineDeploymentReconciler) sync(ctx context.Context, d *clusterv1.Mac // // TODO: Clean up the deployment when it's paused and no rollback is in flight. // allMSs := append(oldMSs, newMS) - return r.syncDeploymentStatus(allMSs, newMS, d) + return r.syncDeploymentStatus(ctx, allMSs, newMS, d) } // getAllMachineSetsAndSyncRevision returns all the machine sets for the provided deployment (new and all old), with new MS's and deployment's revision updated. @@ -351,8 +352,85 @@ func (r *MachineDeploymentReconciler) scale(ctx context.Context, deployment *clu } // syncDeploymentStatus checks if the status is up-to-date and sync it if necessary -func (r *MachineDeploymentReconciler) syncDeploymentStatus(allMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet, d *clusterv1.MachineDeployment) error { - d.Status = calculateStatus(allMSs, newMS, d) +func (r *MachineDeploymentReconciler) syncDeploymentStatus(ctx context.Context, allMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet, d *clusterv1.MachineDeployment) error { + + log := ctrl.LoggerFrom(ctx) + // If there is no progressDeadlineSeconds set, remove any Progressing condition. + if !mdutil.HasProgressDeadline(d) { + conditions.Delete(d, clusterv1.MachineDeploymentProgressing) + } + + newStatus := calculateStatus(allMSs, newMS, d) + + currentCond := conditions.Get(d, clusterv1.MachineDeploymentProgressing) + isCompleteDeployment := newStatus.Replicas == newStatus.UpdatedReplicas && currentCond != nil && currentCond.Reason == clusterv1.NewMSAvailableReason + var cond *clusterv1.Condition + if mdutil.HasProgressDeadline(d) && !isCompleteDeployment { + switch { + case mdutil.DeploymentComplete(d, &newStatus): + log.V(4).Info("MachineDeployment successfully progressed", "machinedeployment", d.Name) + // Update the deployment conditions with a message for the new machine set that + // was successfully deployed. If the condition already exists, we ignore this update. + msg := fmt.Sprintf("MachineDeployment %q has successfully progressed.", d.Name) + if newMS != nil { + msg = fmt.Sprintf("MachineSet %q has successfully progressed.", newMS.Name) + } + cond = &clusterv1.Condition{ + Type: clusterv1.MachineDeploymentProgressing, + Status: corev1.ConditionTrue, + Reason: clusterv1.NewMSAvailableReason, + Severity: clusterv1.ConditionSeverityInfo, + Message: msg, + } + case mdutil.DeploymentProgressing(d, &newStatus): + log.V(4).Info("MachineDeployment is progressing", "machinedeployment", d.Name) + // If there is any progress made, continue by not checking if the deployment failed. This + // behavior emulates the rolling updater progressDeadline check. + msg := fmt.Sprintf("MachineDeployment %q is progressing.", d.Name) + if newMS != nil { + msg = fmt.Sprintf("MachineSet %q is progressing.", newMS.Name) + } + cond = &clusterv1.Condition{ + Type: clusterv1.MachineDeploymentProgressing, + Status: corev1.ConditionTrue, + Reason: clusterv1.MachineSetUpdatedReason, + Severity: clusterv1.ConditionSeverityInfo, + Message: msg, + } + case mdutil.DeploymentTimedOut(d, &newStatus): + log.V(4).Info("MachineDeployment has timed out progressing", "machinedeployment", d.Name) + // Update the deployment with a timeout condition. If the condition already exists, + // we ignore this update. + msg := fmt.Sprintf("MachineDeployment %q has timed out progressing.", d.Name) + if newMS != nil { + msg = fmt.Sprintf("MachineSet %q has timed out progressing.", newMS.Name) + } + cond = &clusterv1.Condition{ + Type: clusterv1.MachineDeploymentProgressing, + Status: corev1.ConditionFalse, + Reason: clusterv1.TimedOutReason, + Severity: clusterv1.ConditionSeverityError, + Message: msg, + } + } + } + d.Status = newStatus + if cond != nil { + conditions.Set(d, cond) + } + if failed, reason, msg := getMachineSetFailures(allMSs); failed { + conditions.Set(d, &clusterv1.Condition{ + Type: clusterv1.MachineDeploymentReadyCondition, + Status: corev1.ConditionFalse, + Reason: reason, + Message: msg, + }) + } else { + conditions.Set(d, &clusterv1.Condition{ + Type: clusterv1.MachineDeploymentReadyCondition, + Status: corev1.ConditionTrue, + }) + } return nil } @@ -382,6 +460,20 @@ func calculateStatus(allMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet UnavailableReplicas: unavailableReplicas, } + // Copy conditions one by one so we won't mutate the original object. + existingConditions := deployment.Status.Conditions + for i := range existingConditions { + status.Conditions = append(status.Conditions, existingConditions[i]) + } + if availableReplicas >= *(deployment.Spec.Replicas)-mdutil.MaxUnavailable(*deployment) { + status.Conditions = append(status.Conditions, clusterv1.Condition{ + Type: clusterv1.MachineDeploymentAvailable, + Status: corev1.ConditionTrue, + Reason: clusterv1.MinimumMachinesAvailable, + Message: "MachineDeployment has minimum availability", + }) + } + if *deployment.Spec.Replicas == status.ReadyReplicas { status.Phase = string(clusterv1.MachineDeploymentPhaseRunning) } @@ -393,14 +485,11 @@ func calculateStatus(allMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet if totalReplicas-availableReplicas < 0 { status.Phase = string(clusterv1.MachineDeploymentPhaseScalingDown) } - for _, ms := range allMSs { - if ms != nil { - if ms.Status.FailureReason != nil || ms.Status.FailureMessage != nil { - status.Phase = string(clusterv1.MachineDeploymentPhaseFailed) - break - } - } + + if failed, _, _ := getMachineSetFailures(allMSs); failed { + status.Phase = string(clusterv1.MachineDeploymentPhaseFailed) } + return status } @@ -526,3 +615,22 @@ func updateMachineDeployment(ctx context.Context, c client.Client, d *clusterv1. return patchHelper.Patch(ctx, d) }) } + +// getMachineSetFailures returns true if any one of the MachineSets have failed. +func getMachineSetFailures(allMSs []*clusterv1.MachineSet) (bool, string, string) { + for _, ms := range allMSs { + if ms != nil { + if ms.Status.FailureReason != nil || ms.Status.FailureMessage != nil { + var reason, msg string + if ms.Status.FailureReason != nil { + reason = string(*ms.Status.FailureReason) + } + if ms.Status.FailureMessage != nil { + msg = *ms.Status.FailureMessage + } + return true, reason, msg + } + } + } + return false, "", "" +} diff --git a/controllers/machinedeployment_sync_test.go b/controllers/machinedeployment_sync_test.go index 8c9c4af59cb9..ea1918a0de18 100644 --- a/controllers/machinedeployment_sync_test.go +++ b/controllers/machinedeployment_sync_test.go @@ -17,23 +17,217 @@ limitations under the License. package controllers import ( + "context" "testing" + "time" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" capierrors "sigs.k8s.io/cluster-api/errors" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestMachineDeploymentSyncStatus(t *testing.T) { +func newDeploymentStatus(replicas, updatedReplicas, availableReplicas int32) clusterv1.MachineDeploymentStatus { + return clusterv1.MachineDeploymentStatus{ + Replicas: replicas, + UpdatedReplicas: updatedReplicas, + AvailableReplicas: availableReplicas, + } +} + +// assumes the retuned deployment is always observed - not needed to be tested here. +func currentDeployment(pds *int32, replicas, statusReplicas, updatedReplicas, availableReplicas int32, conditions clusterv1.Conditions) *clusterv1.MachineDeployment { + d := &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "progress-test", + }, + Spec: clusterv1.MachineDeploymentSpec{ + ProgressDeadlineSeconds: pds, + Replicas: &replicas, + Strategy: &clusterv1.MachineDeploymentStrategy{ + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ + MaxUnavailable: intOrStrPtr(0), + MaxSurge: intOrStrPtr(1), + DeletePolicy: pointer.StringPtr("Oldest"), + }, + }, + }, + Status: newDeploymentStatus(statusReplicas, updatedReplicas, availableReplicas), + } + d.Status.Conditions = conditions + return d +} + +// helper to create MS with given availableReplicas +func newMSWithAvailable(name string, specReplicas, statusReplicas, availableReplicas int32) *clusterv1.MachineSet { + return &clusterv1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + CreationTimestamp: metav1.Time{}, + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.MachineSetSpec{ + Replicas: pointer.Int32Ptr(specReplicas), + }, + Status: clusterv1.MachineSetStatus{ + Selector: "", + AvailableReplicas: int32(availableReplicas), + Replicas: int32(statusReplicas), + }, + } +} + +func TestMachineDeploymentSyncDeploymentStatus(t *testing.T) { + pds := int32(60) + testTime := metav1.Date(2017, 2, 15, 18, 49, 00, 00, time.UTC) + failedTimedOut := clusterv1.Condition{ + Type: clusterv1.MachineDeploymentProgressing, + Status: corev1.ConditionFalse, + Reason: clusterv1.TimedOutReason, + } + newMSAvailable := clusterv1.Condition{ + Type: clusterv1.MachineDeploymentProgressing, + Status: corev1.ConditionTrue, + Reason: clusterv1.NewMSAvailableReason, + LastTransitionTime: testTime, + } + machineSetUpdated := clusterv1.Condition{ + Type: clusterv1.MachineDeploymentProgressing, + Status: corev1.ConditionTrue, + Reason: clusterv1.MachineSetUpdatedReason, + LastTransitionTime: testTime, + } + tests := []struct { + name string + d *clusterv1.MachineDeployment + allMSs []*clusterv1.MachineSet + newMS *clusterv1.MachineSet + expectCond bool + expectedCondType clusterv1.ConditionType + expectedCondStatus corev1.ConditionStatus + expectedCondReason string + expectedCondLastTransitionTime metav1.Time + }{ + { + name: "General: remove Progressing condition and do not estimate progress if machinedeployment has no Progress Deadline", + d: currentDeployment(nil, 3, 2, 2, 2, clusterv1.Conditions{machineSetUpdated}), + allMSs: []*clusterv1.MachineSet{newMSWithAvailable("bar", 0, 1, 1)}, + newMS: newMSWithAvailable("foo", 3, 2, 2), + expectCond: false, + expectedCondType: clusterv1.MachineDeploymentProgressing, + }, + { + name: "General: do not estimate progress of machinedeployment with only one active machineset", + d: currentDeployment(&pds, 3, 3, 3, 3, clusterv1.Conditions{newMSAvailable}), + allMSs: []*clusterv1.MachineSet{newMSWithAvailable("bar", 3, 3, 3)}, + expectCond: true, + expectedCondType: clusterv1.MachineDeploymentProgressing, + expectedCondStatus: corev1.ConditionTrue, + expectedCondReason: clusterv1.NewMSAvailableReason, + expectedCondLastTransitionTime: testTime, + }, + { + name: "DeploymentProgressing: create Progressing condition if it does not exist", + d: currentDeployment(&pds, 3, 2, 2, 2, clusterv1.Conditions{}), + allMSs: []*clusterv1.MachineSet{newMSWithAvailable("bar", 0, 1, 1)}, + newMS: newMSWithAvailable("foo", 3, 2, 2), + expectCond: true, + expectedCondType: clusterv1.MachineDeploymentProgressing, + expectedCondStatus: corev1.ConditionTrue, + expectedCondReason: clusterv1.MachineSetUpdatedReason, + }, + { + name: "DeploymentComplete: create Progressing condition if it does not exist", + d: currentDeployment(&pds, 3, 3, 3, 3, clusterv1.Conditions{}), + allMSs: []*clusterv1.MachineSet{}, + newMS: newMSWithAvailable("foo", 3, 3, 3), + expectCond: true, + expectedCondType: clusterv1.MachineDeploymentProgressing, + expectedCondStatus: corev1.ConditionTrue, + expectedCondReason: clusterv1.NewMSAvailableReason, + }, + { + name: "DeploymentTimedOut: update status if rollout exceeds Progress Deadline", + d: currentDeployment(&pds, 3, 2, 2, 2, clusterv1.Conditions{machineSetUpdated}), + allMSs: []*clusterv1.MachineSet{}, + newMS: newMSWithAvailable("foo", 3, 2, 2), + expectCond: true, + expectedCondType: clusterv1.MachineDeploymentProgressing, + expectedCondStatus: corev1.ConditionFalse, + expectedCondReason: clusterv1.TimedOutReason, + }, + { + name: "DeploymentTimedOut: do not update status if deployment has existing timedOut condition", + d: currentDeployment(&pds, 3, 2, 2, 2, clusterv1.Conditions{failedTimedOut}), + allMSs: []*clusterv1.MachineSet{}, + newMS: newMSWithAvailable("foo", 3, 2, 2), + expectCond: true, + expectedCondType: clusterv1.MachineDeploymentProgressing, + expectedCondStatus: corev1.ConditionFalse, + expectedCondReason: clusterv1.TimedOutReason, + expectedCondLastTransitionTime: testTime, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + g := NewWithT(t) + r := &MachineDeploymentReconciler{ + Client: fake.NewClientBuilder().Build(), + recorder: record.NewFakeRecorder(32), + } + + if test.newMS != nil { + test.allMSs = append(test.allMSs, test.newMS) + } + + err := r.syncDeploymentStatus(context.Background(), test.allMSs, test.newMS, test.d) + g.Expect(err).ToNot(HaveOccurred()) + + newCond := conditions.Get(test.d, test.expectedCondType) + if !test.expectCond { + g.Expect(newCond).To(BeNil()) + } else { + g.Expect(newCond.Type).To(Equal(test.expectedCondType)) + g.Expect(newCond.Status).To(Equal(test.expectedCondStatus)) + g.Expect(newCond.Reason).To(Equal(test.expectedCondReason)) + if !test.expectedCondLastTransitionTime.IsZero() && test.expectedCondLastTransitionTime != testTime { + g.Expect(newCond.LastTransitionTime).To(Equal(test.expectedCondLastTransitionTime)) + } + } + }) + } +} + +func TestMachineDeploymentCalculateStatus(t *testing.T) { msStatusError := capierrors.MachineSetStatusError("some failure") + testDeployment := clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Generation: 2, + }, + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: pointer.Int32Ptr(2), + Strategy: &clusterv1.MachineDeploymentStrategy{ + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ + MaxUnavailable: intOrStrPtr(0), + MaxSurge: intOrStrPtr(1), + DeletePolicy: pointer.StringPtr("Oldest"), + }, + }, + }, + } var tests = map[string]struct { machineSets []*clusterv1.MachineSet newMachineSet *clusterv1.MachineSet - deployment *clusterv1.MachineDeployment + deployment clusterv1.MachineDeployment expectedStatus clusterv1.MachineDeploymentStatus }{ "all machines are running": { @@ -61,14 +255,7 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { ObservedGeneration: 1, }, }, - deployment: &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Generation: 2, - }, - Spec: clusterv1.MachineDeploymentSpec{ - Replicas: pointer.Int32Ptr(2), - }, - }, + deployment: testDeployment, expectedStatus: clusterv1.MachineDeploymentStatus{ ObservedGeneration: 2, Replicas: 2, @@ -77,6 +264,14 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { AvailableReplicas: 2, UnavailableReplicas: 0, Phase: "Running", + Conditions: clusterv1.Conditions{ + clusterv1.Condition{ + Type: "MachineDeploymentAvailable", + Status: "True", + Reason: "MinimumMachinesAvailable", + Message: "MachineDeployment has minimum availability", + }, + }, }, }, "scaling up": { @@ -104,14 +299,7 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { ObservedGeneration: 1, }, }, - deployment: &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Generation: 2, - }, - Spec: clusterv1.MachineDeploymentSpec{ - Replicas: pointer.Int32Ptr(2), - }, - }, + deployment: testDeployment, expectedStatus: clusterv1.MachineDeploymentStatus{ ObservedGeneration: 2, Replicas: 2, @@ -120,6 +308,14 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { AvailableReplicas: 1, UnavailableReplicas: 1, Phase: "ScalingUp", + Conditions: clusterv1.Conditions{ + clusterv1.Condition{ + Type: "MachineDeploymentAvailable", + Status: "False", + Reason: "MinimumMachinesUnavailable", + Message: "MachineDeployment does not have minimum availability", + }, + }, }, }, "scaling down": { @@ -147,14 +343,7 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { ObservedGeneration: 1, }, }, - deployment: &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Generation: 2, - }, - Spec: clusterv1.MachineDeploymentSpec{ - Replicas: pointer.Int32Ptr(2), - }, - }, + deployment: testDeployment, expectedStatus: clusterv1.MachineDeploymentStatus{ ObservedGeneration: 2, Replicas: 2, @@ -163,6 +352,14 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { AvailableReplicas: 3, UnavailableReplicas: 0, Phase: "ScalingDown", + Conditions: clusterv1.Conditions{ + clusterv1.Condition{ + Type: "MachineDeploymentAvailable", + Status: "True", + Reason: "MinimumMachinesAvailable", + Message: "MachineDeployment has minimum availability", + }, + }, }, }, "machine set failed": { @@ -191,14 +388,7 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { ObservedGeneration: 1, }, }, - deployment: &clusterv1.MachineDeployment{ - ObjectMeta: metav1.ObjectMeta{ - Generation: 2, - }, - Spec: clusterv1.MachineDeploymentSpec{ - Replicas: pointer.Int32Ptr(2), - }, - }, + deployment: testDeployment, expectedStatus: clusterv1.MachineDeploymentStatus{ ObservedGeneration: 2, Replicas: 2, @@ -207,6 +397,14 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { AvailableReplicas: 0, UnavailableReplicas: 2, Phase: "Failed", + Conditions: clusterv1.Conditions{ + clusterv1.Condition{ + Type: "MachineDeploymentAvailable", + Status: "False", + Reason: "MinimumMachinesUnavailable", + Message: "MachineDeployment does not have minimum availability", + }, + }, }, }, } @@ -215,7 +413,7 @@ func TestMachineDeploymentSyncStatus(t *testing.T) { t.Run(name, func(t *testing.T) { g := NewWithT(t) - actualStatus := calculateStatus(test.machineSets, test.newMachineSet, test.deployment) + actualStatus := calculateStatus(test.machineSets, test.newMachineSet, &test.deployment) g.Expect(actualStatus).To(Equal(test.expectedStatus)) }) } diff --git a/controllers/mdutil/util.go b/controllers/mdutil/util.go index 61c6af1f0bd5..a119b53dea4b 100644 --- a/controllers/mdutil/util.go +++ b/controllers/mdutil/util.go @@ -20,9 +20,11 @@ import ( "fmt" "hash" "hash/fnv" + "math" "sort" "strconv" "strings" + "time" "github.com/davecgh/go-spew/spew" "github.com/go-logr/logr" @@ -524,6 +526,85 @@ func DeploymentComplete(deployment *clusterv1.MachineDeployment, newStatus *clus newStatus.ObservedGeneration >= deployment.Generation } +// DeploymentProgressing reports progress for a deployment. Progress is estimated by comparing the +// current with the new status of the deployment that the controller is observing. More specifically, +// when new machines are scaled up or become ready or available, or old machines are scaled down, then we +// consider the deployment is progressing. +func DeploymentProgressing(deployment *clusterv1.MachineDeployment, newStatus *clusterv1.MachineDeploymentStatus) bool { + oldStatus := deployment.Status + + // Old replicas that need to be scaled down + oldStatusOldReplicas := oldStatus.Replicas - oldStatus.UpdatedReplicas + newStatusOldReplicas := newStatus.Replicas - newStatus.UpdatedReplicas + + return (newStatus.UpdatedReplicas > oldStatus.UpdatedReplicas) || + (newStatusOldReplicas < oldStatusOldReplicas) || + newStatus.ReadyReplicas > deployment.Status.ReadyReplicas || + newStatus.AvailableReplicas > deployment.Status.AvailableReplicas +} + +// DeploymentTimedOut considers a deployment to have timed out once its condition that reports progress +// is older than progressDeadlineSeconds or a Progressing condition with a TimedOutReason reason already +// exists. +func DeploymentTimedOut(deployment *clusterv1.MachineDeployment, newStatus *clusterv1.MachineDeploymentStatus) bool { + if !HasProgressDeadline(deployment) { + return false + } + + // Look for the Progressing condition. If it doesn't exist, we have no base to estimate progress. + // If it's already set with a TimedOutReason reason, we have already timed out, no need to check + // again. + condition := GetMachineDeploymentCondition(*newStatus, clusterv1.MachineDeploymentProgressing) + if condition == nil { + return false + } + // If the previous condition has been a successful rollout then we shouldn't try to + // estimate any progress. Scenario: + // + // * progressDeadlineSeconds is smaller than the difference between now and the time + // the last rollout finished in the past. + // * the creation of a new ReplicaSet triggers a resync of the MachineDeployment prior to the + // cached copy of the MachineDeployment getting updated with the status.condition that indicates + // the creation of the new ReplicaSet. + // + // The MachineDeployment will be resynced and eventually its Progressing condition will catch + // up with the state of the world. + if condition.Reason == clusterv1.NewMSAvailableReason { + return false + } + if condition.Reason == clusterv1.TimedOutReason { + return true + } + + // Look at the difference in seconds between now and the last time we reported any + // progress or tried to create a replica set, or resumed a paused MachineDeployment and + // compare against progressDeadlineSeconds. + from := condition.LastTransitionTime + now := time.Now() + delta := time.Duration(*deployment.Spec.ProgressDeadlineSeconds) * time.Second + timedOut := from.Add(delta).Before(now) + + // logger.V(4).Info(fmt.Sprintf("MachineDeployment %q timed out (%t) [last progress check: %v - now: %v]", deployment.Name, timedOut, from, now)) + return timedOut +} + +// HasProgressDeadline checks if the MachineDeployment d is expected to surface the reason +// "ProgressDeadlineExceeded" when the MachineDeployment progress takes longer than expected time. +func HasProgressDeadline(d *clusterv1.MachineDeployment) bool { + return d.Spec.ProgressDeadlineSeconds != nil && *d.Spec.ProgressDeadlineSeconds != math.MaxInt32 +} + +// GetMachineDeploymentCondition returns the condition with the provided type. +func GetMachineDeploymentCondition(status clusterv1.MachineDeploymentStatus, condType clusterv1.ConditionType) *clusterv1.Condition { + for i := range status.Conditions { + c := status.Conditions[i] + if c.Type == condType { + return &c + } + } + return nil +} + // NewMSNewReplicas calculates the number of replicas a deployment's new MS should have. // When one of the following is true, we're rolling out the deployment; otherwise, we're scaling it. // 1) The new MS is saturated: newMS's replicas == deployment's replicas