From d006aae7c987e2f04af7d4ddb39925eb921105a7 Mon Sep 17 00:00:00 2001 From: David Collom Date: Tue, 1 Apr 2025 15:36:55 +0100 Subject: [PATCH 1/8] Track Kubernetes Channels for latest versions --- cmd/app/app.go | 16 +++- cmd/app/options.go | 18 +++- go.mod | 1 + go.sum | 2 + pkg/client/docker/docker.go | 2 + pkg/client/fallback/fallback.go | 4 +- pkg/client/util/http_backoff.go | 19 ++++ pkg/controller/kube_controller.go | 140 ++++++++++++++++++++++++++++++ pkg/metrics/kubernetes.go | 21 +++++ pkg/metrics/metrics.go | 48 +++++----- 10 files changed, 244 insertions(+), 27 deletions(-) create mode 100644 pkg/client/util/http_backoff.go create mode 100644 pkg/controller/kube_controller.go create mode 100644 pkg/metrics/kubernetes.go diff --git a/cmd/app/app.go b/cmd/app/app.go index e18d85f0..af61316f 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -110,15 +110,27 @@ func NewCommand(ctx context.Context) *cobra.Command { return fmt.Errorf("failed to setup image registry clients: %s", err) } - c := controller.NewPodReconciler(opts.CacheTimeout, + _ = client + + podController := controller.NewPodReconciler(opts.CacheTimeout, metricsServer, client, mgr.GetClient(), log, opts.DefaultTestAll, ) + if err := podController.SetupWithManager(mgr); err != nil { + return err + } - if err := c.SetupWithManager(mgr); err != nil { + kubeController := controller.NewKubeReconciler( + log, + mgr.GetConfig(), + metricsServer, + opts.KubeInterval, + opts.KubeChannel, + ) + if err := mgr.Add(kubeController); err != nil { return err } diff --git a/cmd/app/options.go b/cmd/app/options.go index b4bb191c..4e2a6b80 100644 --- a/cmd/app/options.go +++ b/cmd/app/options.go @@ -72,10 +72,16 @@ type Options struct { GracefulShutdownTimeout time.Duration CacheSyncPeriod time.Duration + KubeChannel string + KubeInterval time.Duration + + // kubeConfigFlags holds the flags for the kubernetes client kubeConfigFlags *genericclioptions.ConfigFlags - selfhosted selfhosted.Options + // Client holds the options for the image client(s) Client client.Options + // selfhosted holds the options for the selfhosted registry + selfhosted selfhosted.Options } func (o *Options) addFlags(cmd *cobra.Command) { @@ -133,7 +139,15 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) { fs.DurationVarP(&o.CacheSyncPeriod, "cache-sync-period", "", 5*time.Hour, - "The time in which all resources should be updated.") + "The duration in which all resources should be updated.") + + fs.DurationVarP(&o.KubeInterval, + "kube-interval", "", o.CacheSyncPeriod, + "The time in which kubernetes channels updates are checked.") + + fs.StringVarP(&o.KubeChannel, + "kube-channel", "", "stable", + "The Kubernetes channel to check against for cluster updates.") } func (o *Options) addAuthFlags(fs *pflag.FlagSet) { diff --git a/go.mod b/go.mod index 14584f84..391ad2fe 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( ) require ( + github.com/Masterminds/semver/v3 v3.3.1 github.com/aws/aws-sdk-go-v2/config v1.29.12 github.com/aws/aws-sdk-go-v2/credentials v1.17.65 github.com/aws/aws-sdk-go-v2/service/ecr v1.43.0 diff --git a/go.sum b/go.sum index 1d17bdfe..52b36d6d 100644 --- a/go.sum +++ b/go.sum @@ -19,6 +19,8 @@ github.com/Azure/go-autorest/logger v0.2.2/go.mod h1:I5fg9K52o+iuydlWfa9T5K6WFos github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/Azure/go-autorest/tracing v0.6.1 h1:YUMSrC/CeD1ZnnXcNYU4a/fzsO35u2Fsful9L/2nyR0= github.com/Azure/go-autorest/tracing v0.6.1/go.mod h1:/3EgjbsjraOqiicERAeu3m7/z0x1TzjQGAwDrJrXGkc= +github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= +github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM= github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= github.com/aws/aws-sdk-go-v2/config v1.29.12 h1:Y/2a+jLPrPbHpFkpAAYkVEtJmxORlXoo5k2g1fa2sUo= diff --git a/pkg/client/docker/docker.go b/pkg/client/docker/docker.go index 12e34628..93dc0913 100644 --- a/pkg/client/docker/docker.go +++ b/pkg/client/docker/docker.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/go-retryablehttp" "github.com/jetstack/version-checker/pkg/api" + "github.com/jetstack/version-checker/pkg/client/util" ) const ( @@ -43,6 +44,7 @@ func New(opts Options, log *logrus.Entry) (*Client, error) { retryclient.RetryMax = 10 retryclient.RetryWaitMax = 2 * time.Minute retryclient.RetryWaitMin = 1 * time.Second + retryclient.Backoff = util.HTTPBackOff retryclient.Logger = log.WithField("client", "docker") client := retryclient.StandardClient() diff --git a/pkg/client/fallback/fallback.go b/pkg/client/fallback/fallback.go index 12fc0c52..b273c5bc 100644 --- a/pkg/client/fallback/fallback.go +++ b/pkg/client/fallback/fallback.go @@ -54,9 +54,9 @@ func (c *Client) Tags(ctx context.Context, host, repo, image string) (tags []api remaining := len(c.clients) - i - 1 if remaining == 0 { - c.log.Debugf("failed to lookup via %q, Giving up, no more clients", client.Name()) + c.log.Infof("failed to lookup via %q, Giving up, no more clients", client.Name()) } else { - c.log.Debugf("failed to lookup via %q, continuing to search with %v clients remaining", client.Name(), remaining) + c.log.Infof("failed to lookup via %q, continuing to search with %v clients remaining", client.Name(), remaining) } } diff --git a/pkg/client/util/http_backoff.go b/pkg/client/util/http_backoff.go new file mode 100644 index 00000000..ab65ed1c --- /dev/null +++ b/pkg/client/util/http_backoff.go @@ -0,0 +1,19 @@ +package util + +import ( + "net/http" + "time" + + "github.com/hashicorp/go-retryablehttp" +) + +// This is a custom Backoff that enforces the Max wait duration. +// If the sleep is greater we refuse to sleep at all +func HTTPBackOff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration { + sleep := retryablehttp.DefaultBackoff(min, max, attemptNum, resp) + if sleep <= max { + return sleep + } + + return 0 +} diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go new file mode 100644 index 00000000..eb812280 --- /dev/null +++ b/pkg/controller/kube_controller.go @@ -0,0 +1,140 @@ +package controller + +import ( + "context" + "fmt" + "io" + "strings" + "time" + + "github.com/sirupsen/logrus" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/hashicorp/go-retryablehttp" + "github.com/jetstack/version-checker/pkg/metrics" + + "github.com/Masterminds/semver/v3" +) + +const channelURLSuffix = "https://dl.k8s.io/release/" + +type ClusterVersionScheduler struct { + client kubernetes.Interface + log *logrus.Entry + metrics *metrics.Metrics + interval time.Duration + channel string +} + +func NewKubeReconciler( + log *logrus.Entry, + config *rest.Config, + metrics *metrics.Metrics, + interval time.Duration, + channel string, +) *ClusterVersionScheduler { + + return &ClusterVersionScheduler{ + log: log, + client: kubernetes.NewForConfigOrDie(config), + interval: interval, + metrics: metrics, + channel: channel, + } +} + +func (s *ClusterVersionScheduler) Start(ctx context.Context) error { + go s.runScheduler(ctx) + return s.reconcile(ctx) +} + +func (s *ClusterVersionScheduler) runScheduler(ctx context.Context) { + ticker := time.NewTicker(s.interval) + defer ticker.Stop() + + s.log.WithField("interval", s.interval).WithField("channel", s.channel). + Info("ClusterVersionScheduler started") + + for { + select { + case <-ctx.Done(): + s.log.Info("ClusterVersionScheduler stopping") + return + case <-ticker.C: + if err := s.reconcile(ctx); err != nil { + s.log.Error(err, "Failed to reconcile cluster version") + } + } + } +} + +func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { + // Get current cluster version + current, err := s.client.Discovery().ServerVersion() + if err != nil { + return fmt.Errorf("getting cluster version: %w", err) + } + + // Get latest stable version + latest, err := getLatestStableVersion(s.channel) + if err != nil { + return fmt.Errorf("fetching latest stable version: %w", err) + } + + latestSemVer, err := semver.NewVersion(latest) + if err != nil { + return err + } + currentSemVer, err := semver.NewVersion(current.GitVersion) + if err != nil { + return err + } + // Strip metadata from the versions + currentSemVerNoMeta, _ := currentSemVer.SetMetadata("") + latestSemVerNoMeta, _ := latestSemVer.SetMetadata("") + + // Register metrics! + s.metrics.RegisterKubeVersion(!currentSemVerNoMeta.LessThan(&latestSemVerNoMeta), + currentSemVerNoMeta.String(), latestSemVerNoMeta.String(), + s.channel, + ) + + s.log.WithFields(logrus.Fields{ + "currentVersion": currentSemVerNoMeta, + "latestStable": latestSemVerNoMeta, + "channel": s.channel, + }).Info("Cluster version check complete") + + return nil +} + +func getLatestStableVersion(channel string) (string, error) { + if !strings.HasSuffix(channel, ".txt") { + channel += ".txt" + } + + // We don't need a `/` here as its should be in the channelURLSuffix + channelURL := fmt.Sprintf("%s%s", channelURLSuffix, channel) + + client := retryablehttp.NewClient() + client.RetryMax = 3 + client.RetryWaitMin = 1 * time.Second + client.RetryWaitMax = 30 * time.Second + // Optional: Log using your own logrus/logr logger + client.Logger = nil + + resp, err := client.Get(channelURL) + if err != nil { + return "", err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(body)), nil +} diff --git a/pkg/metrics/kubernetes.go b/pkg/metrics/kubernetes.go new file mode 100644 index 00000000..c4f326d8 --- /dev/null +++ b/pkg/metrics/kubernetes.go @@ -0,0 +1,21 @@ +package metrics + +import "github.com/prometheus/client_golang/prometheus" + +func (m *Metrics) RegisterKubeVersion(isLatest bool, currentVersion, latestVersion, channel string) { + m.mu.Lock() + defer m.mu.Unlock() + + isLatestF := 0.0 + if isLatest { + isLatestF = 1.0 + } + + m.kubernetesVersion.With( + prometheus.Labels{ + "current_version": currentVersion, + "latest_version": latestVersion, + "channel": channel, + }, + ).Set(isLatestF) +} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 09aa10ee..c6690a90 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -28,6 +28,9 @@ type Metrics struct { containerImageDuration *prometheus.GaugeVec containerImageErrors *prometheus.CounterVec + // Kubernetes version metric + kubernetesVersion *prometheus.GaugeVec + cache k8sclient.Reader // Contains all metrics for the roundtripper @@ -80,6 +83,16 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R "namespace", "pod", "container", "image", }, ) + kubernetesVersion := promauto.With(reg).NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "version_checker", + Name: "is_latest_kube_version", + Help: "Where the current cluster is using the latest release channel version", + }, + []string{ + "current_version", "latest_version", "channel", + }, + ) return &Metrics{ log: log.WithField("module", "metrics"), @@ -90,6 +103,7 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R containerImageDuration: containerImageDuration, containerImageChecked: containerImageChecked, containerImageErrors: containerImageErrors, + kubernetesVersion: kubernetesVersion, roundTripper: NewRoundTripper(reg), } } @@ -113,15 +127,11 @@ func (m *Metrics) AddImage(namespace, pod, container, containerType, imageURL st ).Set(float64(time.Now().Unix())) } -func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { - m.mu.Lock() - defer m.mu.Unlock() - total := 0 - - total += m.containerImageVersion.DeletePartialMatch( +func (m *Metrics) CleanUpMetrics(namespace, pod string) (total int) { + total += m.containerImageDuration.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) - total += m.containerImageDuration.DeletePartialMatch( + total += m.containerImageChecked.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) @@ -131,6 +141,15 @@ func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { total += m.containerImageErrors.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) + return total +} + +func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { + m.mu.Lock() + defer m.mu.Unlock() + + total := m.CleanUpMetrics(namespace, pod) + m.log.Infof("Removed %d metrics for image %s/%s/%s", total, namespace, pod, container) } @@ -138,20 +157,7 @@ func (m *Metrics) RemovePod(namespace, pod string) { m.mu.Lock() defer m.mu.Unlock() - total := 0 - total += m.containerImageVersion.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageDuration.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageChecked.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageErrors.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - + total := m.CleanUpMetrics(namespace, pod) m.log.Infof("Removed %d metrics for pod %s/%s", total, namespace, pod) } From c574db552f0ba0c556c963ff5ab08bdfc396d262 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Mon, 21 Jul 2025 17:25:59 +0200 Subject: [PATCH 2/8] Fix semVer --- pkg/controller/kube_controller.go | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go index eb812280..95dd1269 100644 --- a/pkg/controller/kube_controller.go +++ b/pkg/controller/kube_controller.go @@ -14,8 +14,7 @@ import ( "github.com/hashicorp/go-retryablehttp" "github.com/jetstack/version-checker/pkg/metrics" - - "github.com/Masterminds/semver/v3" + "github.com/jetstack/version-checker/pkg/version/semver" ) const channelURLSuffix = "https://dl.k8s.io/release/" @@ -83,21 +82,20 @@ func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { return fmt.Errorf("fetching latest stable version: %w", err) } - latestSemVer, err := semver.NewVersion(latest) - if err != nil { - return err - } - currentSemVer, err := semver.NewVersion(current.GitVersion) - if err != nil { - return err - } - // Strip metadata from the versions - currentSemVerNoMeta, _ := currentSemVer.SetMetadata("") - latestSemVerNoMeta, _ := latestSemVer.SetMetadata("") + latestSemVer := semver.Parse(latest) + currentSemVer := semver.Parse(current.GitVersion) + + // Create version strings without metadata for comparison + currentSemVerNoMeta := fmt.Sprintf("%d.%d.%d", currentSemVer.Major(), currentSemVer.Minor(), currentSemVer.Patch()) + latestSemVerNoMeta := fmt.Sprintf("%d.%d.%d", latestSemVer.Major(), latestSemVer.Minor(), latestSemVer.Patch()) + + // Parse the versions without metadata for comparison + currentComparable := semver.Parse(currentSemVerNoMeta) + latestComparable := semver.Parse(latestSemVerNoMeta) // Register metrics! - s.metrics.RegisterKubeVersion(!currentSemVerNoMeta.LessThan(&latestSemVerNoMeta), - currentSemVerNoMeta.String(), latestSemVerNoMeta.String(), + s.metrics.RegisterKubeVersion(!currentComparable.LessThan(latestComparable), + currentSemVerNoMeta, latestSemVerNoMeta, s.channel, ) From bcca8e3797132aff86401553ab69363619be6975 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Tue, 22 Jul 2025 16:29:46 +0200 Subject: [PATCH 3/8] update docs --- README.md | 24 +++++++++++++++++------- docs/metrics.md | 20 ++++++++++++++++++-- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 48216780..ca1cfe89 100644 --- a/README.md +++ b/README.md @@ -6,20 +6,29 @@ ![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/jetstack/version-checker) version-checker is a Kubernetes utility for observing the current versions of -images running in the cluster, as well as the latest available upstream. These -checks get exposed as Prometheus metrics to be viewed on a dashboard, or _soft_ -alert cluster operators. +images running in the cluster, as well as the latest available upstream. Additionally, +it monitors the Kubernetes cluster version against the latest available releases +using official Kubernetes release channels. These checks get exposed as Prometheus +metrics to be viewed on a dashboard, or _soft_ alert cluster operators. + +## Features + +- **Container Image Version Checking**: Monitor and compare container image versions running in the cluster against their latest upstream versions +- **Kubernetes Version Monitoring**: Track your cluster's Kubernetes version against the latest available releases from official Kubernetes channels +- **Prometheus Metrics Integration**: Export all version information as Prometheus metrics for monitoring and alerting +- **Flexible Channel Selection**: Configure which Kubernetes release channel to track (stable, latest, etc.) --- ## Why Use version-checker? -- **Improved Security**: Ensures images are up-to-date, reducing the risk of using vulnerable or compromised versions. -- **Enhanced Visibility**: Provides a clear overview of all running container versions across clusters. -- **Operational Efficiency**: Automates image tracking and reduces manual intervention in version management. -- **Compliance and Policy**: Enforcement: Helps maintain version consistency and adherence to organizational policies. +- **Improved Security**: Ensures images and Kubernetes clusters are up-to-date, reducing the risk of using vulnerable or compromised versions. +- **Enhanced Visibility**: Provides a clear overview of all running container versions and cluster versions across clusters. +- **Operational Efficiency**: Automates image and Kubernetes version tracking and reduces manual intervention in version management. +- **Compliance and Policy Enforcement**: Helps maintain version consistency and adherence to organizational policies for both applications and infrastructure. - **Incremental Upgrades**: Facilitates frequent, incremental updates to reduce the risk of large, disruptive upgrades. - **Add-On Compatibility**: Ensures compatibility with the latest versions of Kubernetes add-ons and dependencies. +- **Proactive Cluster Management**: Stay informed about Kubernetes security updates and new features through automated version monitoring. --- @@ -45,6 +54,7 @@ These registries support authentication. - [Installation Guide](docs/installation.md) - [Metrics](docs/metrics.md) +- [New Features](docs/new_features.md) --- diff --git a/docs/metrics.md b/docs/metrics.md index 8393f8b6..49405e1d 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -2,16 +2,32 @@ By default, version-checker exposes the following Prometheus metrics on `0.0.0.0:8080/metrics`: +## Container Image Metrics + - `version_checker_is_latest_version`: Indicates whether the container in use is using the latest upstream registry version. - `version_checker_last_checked`: Timestamp when the image was last checked. - `version_checker_image_lookup_duration`: Duration of the image version check. - `version_checker_image_failures_total`: Total of errors encountered during image version checks. +## Kubernetes Version Metrics + +- `version_checker_is_latest_kube_version`: Indicates whether the cluster is running the latest version from the configured Kubernetes release channel. + - Labels: `current_version`, `latest_version`, `channel` + - Value `1`: Cluster is up-to-date + - Value `0`: Update available + --- -## Example Prometheus Query +## Example Prometheus Queries +### Check container image versions ```sh QUERY="version_checker_is_latest_version" curl -s --get --data-urlencode query=$QUERY -``` \ No newline at end of file +``` + +### Check Kubernetes cluster version +```sh +QUERY="version_checker_is_latest_kube_version" +curl -s --get --data-urlencode query=$QUERY +``` From e33bb7e0628413dddbf8e60725221738132ad145 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Fri, 25 Jul 2025 15:19:40 +0200 Subject: [PATCH 4/8] add tests --- pkg/metrics/kubernetes_test.go | 140 +++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 pkg/metrics/kubernetes_test.go diff --git a/pkg/metrics/kubernetes_test.go b/pkg/metrics/kubernetes_test.go new file mode 100644 index 00000000..509e3061 --- /dev/null +++ b/pkg/metrics/kubernetes_test.go @@ -0,0 +1,140 @@ +package metrics + +import ( + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var fakeK8sClient = fake.NewFakeClient() + +func TestRegisterKubeVersion(t *testing.T) { + tests := []struct { + name string + isLatest bool + currentVersion string + latestVersion string + channel string + expectedValue float64 + }{ + { + name: "cluster is up to date", + isLatest: true, + currentVersion: "1.28.2", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 1.0, + }, + { + name: "cluster needs update", + isLatest: false, + currentVersion: "1.27.1", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 0.0, + }, + { + name: "cluster is ahead of stable", + isLatest: true, + currentVersion: "1.29.0", + latestVersion: "1.28.2", + channel: "stable", + expectedValue: 1.0, + }, + { + name: "latest channel with pre-release", + isLatest: false, + currentVersion: "1.28.1", + latestVersion: "1.29.0-alpha.1", + channel: "latest", + expectedValue: 0.0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create a new metrics instance for each test to avoid interference + registry := prometheus.NewRegistry() + m := New(logrus.NewEntry(logrus.New()), registry, fakeK8sClient) + + // Register the Kubernetes version + m.RegisterKubeVersion(tt.isLatest, tt.currentVersion, tt.latestVersion, tt.channel) + + // Gather metrics + metricFamilies, err := registry.Gather() + require.NoError(t, err) + + // Find the kubernetes version metric + var kubeMetric *dto.MetricFamily + for _, mf := range metricFamilies { + if mf.GetName() == "version_checker_is_latest_kube_version" { + kubeMetric = mf + break + } + } + + require.NotNil(t, kubeMetric, "Kubernetes version metric should be present") + require.Len(t, kubeMetric.GetMetric(), 1, "Should have exactly one metric value") + + metric := kubeMetric.GetMetric()[0] + assert.Equal(t, tt.expectedValue, metric.GetGauge().GetValue()) + + // Check labels + labels := metric.GetLabel() + assert.Len(t, labels, 3, "Should have 3 labels: current_version, latest_version, channel") + + labelMap := make(map[string]string) + for _, label := range labels { + labelMap[label.GetName()] = label.GetValue() + } + + assert.Equal(t, tt.currentVersion, labelMap["current_version"]) + assert.Equal(t, tt.latestVersion, labelMap["latest_version"]) + assert.Equal(t, tt.channel, labelMap["channel"]) + }) + } +} + +func TestRegisterKubeVersion_MultipleChannels(t *testing.T) { + registry := prometheus.NewRegistry() + m := New(logrus.NewEntry(logrus.New()), registry, fakeK8sClient) + + // Register metrics for different channels + m.RegisterKubeVersion(true, "1.28.2", "1.28.2", "stable") + m.RegisterKubeVersion(false, "1.28.2", "1.29.0-alpha.1", "latest") + + // Gather metrics + metricFamilies, err := registry.Gather() + require.NoError(t, err) + + // Find the kubernetes version metric + var kubeMetric *dto.MetricFamily + for _, mf := range metricFamilies { + if mf.GetName() == "version_checker_is_latest_kube_version" { + kubeMetric = mf + break + } + } + + require.NotNil(t, kubeMetric, "Kubernetes version metric should be present") + require.Len(t, kubeMetric.GetMetric(), 2, "Should have exactly two metric values for different channels") + + // Check that both metrics are present + channels := make(map[string]float64) + for _, metric := range kubeMetric.GetMetric() { + labelMap := make(map[string]string) + for _, label := range metric.GetLabel() { + labelMap[label.GetName()] = label.GetValue() + } + channels[labelMap["channel"]] = metric.GetGauge().GetValue() + } + + assert.Equal(t, 1.0, channels["stable"], "Stable channel should be up to date") + assert.Equal(t, 0.0, channels["latest"], "Latest channel should need update") +} From 09f0a0f4bad2a98bde2ecb423e060cecab010954 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Tue, 29 Jul 2025 16:22:16 +0200 Subject: [PATCH 5/8] fixes --- docs/new_features.md | 43 +++++++++++++++++++++++++++++++ pkg/controller/kube_controller.go | 39 ++++++++++++++++++++-------- 2 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 docs/new_features.md diff --git a/docs/new_features.md b/docs/new_features.md new file mode 100644 index 00000000..60641435 --- /dev/null +++ b/docs/new_features.md @@ -0,0 +1,43 @@ +# Kubernetes Version Monitoring + +version-checker now includes built-in Kubernetes cluster version monitoring capabilities. This feature automatically compares your cluster's current Kubernetes version against the latest available versions from official Kubernetes release channels. + +### How It Works + +The Kubernetes version checker: +- Fetches the current cluster version using the Kubernetes Discovery API +- Compares it against the latest version from the configured Kubernetes release channel (using official `https://dl.k8s.io/release/` endpoints) +- Exposes the comparison as Prometheus metrics for monitoring and alerting +- Strips metadata from versions for accurate semantic version comparison (e.g., `v1.28.2-gke.1` becomes `v1.28.2`) + +### Configuration + +You can configure the Kubernetes version checking behavior using the following CLI flags: + +- `--kube-channel`: Specifies which Kubernetes release channel to check against (default: `"stable"`) + - Examples: `stable`, `latest`, `stable-1.28`, `latest-1.29` +- `--kube-interval`: How often to check for Kubernetes version updates (default: same as `--cache-sync-period`, 5 hours) + +### Metrics + +The Kubernetes version monitoring exposes the following Prometheus metric: + +``` +version_checker_is_latest_kube_version{current_version="1.28.2", latest_version="1.29.1", channel="stable"} 0 +``` + +- Value `1`: Cluster is running the latest version from the specified channel +- Value `0`: Cluster is not running the latest version (update available) + +### Examples + +```bash +# Use the default stable channel +./version-checker + +# Monitor against the latest channel +./version-checker --kube-channel=latest + +# Monitor against a specific version channel with custom interval +./version-checker --kube-channel=stable-1.28 --kube-interval=1h +``` \ No newline at end of file diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go index 95dd1269..2d79d509 100644 --- a/pkg/controller/kube_controller.go +++ b/pkg/controller/kube_controller.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "net/url" "strings" "time" @@ -76,10 +77,10 @@ func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { return fmt.Errorf("getting cluster version: %w", err) } - // Get latest stable version - latest, err := getLatestStableVersion(s.channel) + // Get latest version from specified channel + latest, err := getLatestVersion(s.channel) if err != nil { - return fmt.Errorf("fetching latest stable version: %w", err) + return fmt.Errorf("fetching latest version from channel %s: %w", s.channel, err) } latestSemVer := semver.Parse(latest) @@ -101,20 +102,23 @@ func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { s.log.WithFields(logrus.Fields{ "currentVersion": currentSemVerNoMeta, - "latestStable": latestSemVerNoMeta, + "latestVersion": latestSemVerNoMeta, "channel": s.channel, }).Info("Cluster version check complete") return nil } -func getLatestStableVersion(channel string) (string, error) { +func getLatestVersion(channel string) (string, error) { if !strings.HasSuffix(channel, ".txt") { channel += ".txt" } - // We don't need a `/` here as its should be in the channelURLSuffix - channelURL := fmt.Sprintf("%s%s", channelURLSuffix, channel) + // Use url.JoinPath to safely join the base URL and channel path + channelURL, err := url.JoinPath(channelURLSuffix, channel) + if err != nil { + return "", fmt.Errorf("failed to join channel URL: %w", err) + } client := retryablehttp.NewClient() client.RetryMax = 3 @@ -125,14 +129,27 @@ func getLatestStableVersion(channel string) (string, error) { resp, err := client.Get(channelURL) if err != nil { - return "", err + return "", fmt.Errorf("failed to fetch from channel URL %s: %w", channelURL, err) + } + defer func() { + if cerr := resp.Body.Close(); cerr != nil { + fmt.Printf("warning: failed to close response body: %v\n", cerr) + } + }() + + if resp.StatusCode != 200 { + return "", fmt.Errorf("unexpected status code %d when fetching channel %s", resp.StatusCode, channel) } - defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { - return "", err + return "", fmt.Errorf("failed to read response body: %w", err) + } + + version := strings.TrimSpace(string(body)) + if version == "" { + return "", fmt.Errorf("empty version returned from channel %s", channel) } - return strings.TrimSpace(string(body)), nil + return version, nil } From ae1ad29e4de349887ea2a32e33d5a950a75f5881 Mon Sep 17 00:00:00 2001 From: maria-reynoso Date: Fri, 1 Aug 2025 11:35:32 +0200 Subject: [PATCH 6/8] some improvements --- cmd/app/app.go | 9 ++++-- docs/new_features.md | 29 ++++++++++++++---- pkg/controller/kube_controller.go | 49 ++++++++++++++++++++++++------- 3 files changed, 69 insertions(+), 18 deletions(-) diff --git a/cmd/app/app.go b/cmd/app/app.go index 9ba174c5..9cced0d9 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -131,8 +131,13 @@ func NewCommand(ctx context.Context) *cobra.Command { opts.KubeInterval, opts.KubeChannel, ) - if err := mgr.Add(kubeController); err != nil { - return err + + // Only add to manager if controller was created (channel was specified) + if kubeController != nil { + if err := mgr.Add(kubeController); err != nil { + return err + } + log.WithField("channel", opts.KubeChannel).Info("Kubernetes version checking enabled") } // Start the manager and all controllers diff --git a/docs/new_features.md b/docs/new_features.md index 60641435..e6560d7c 100644 --- a/docs/new_features.md +++ b/docs/new_features.md @@ -29,15 +29,34 @@ version_checker_is_latest_kube_version{current_version="1.28.2", latest_version= - Value `1`: Cluster is running the latest version from the specified channel - Value `0`: Cluster is not running the latest version (update available) +### Supported Channels + +version-checker uses official Kubernetes release channels: + +- `stable` - Latest stable Kubernetes release (recommended) +- `latest` - Latest Kubernetes release (including pre-releases) +- `latest-1.28` - Latest patch for Kubernetes 1.28.x +- `latest-1.27` - Latest patch for Kubernetes 1.27.x + ### Examples ```bash -# Use the default stable channel -./version-checker +# Check against latest stable Kubernetes +version-checker --kube-version-channel=stable + +# Check against latest Kubernetes (including alpha/beta) +version-checker --kube-version-channel=latest -# Monitor against the latest channel -./version-checker --kube-channel=latest +# Check against latest 1.28.x patch +version-checker --kube-version-channel=latest-1.28 # Monitor against a specific version channel with custom interval ./version-checker --kube-channel=stable-1.28 --kube-interval=1h -``` \ No newline at end of file +``` + +### Managed Kubernetes Support + +Works with all managed Kubernetes services: +- **Amazon EKS**: Compares `v1.28.2-eks-abc123` against upstream `v1.28.2` +- **Google GKE**: Compares `v1.28.2-gke.1034000` against upstream `v1.28.2` +- **Azure AKS**: Compares `v1.28.2-aks-xyz789` against upstream `v1.28.2` \ No newline at end of file diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go index 2d79d509..5246458e 100644 --- a/pkg/controller/kube_controller.go +++ b/pkg/controller/kube_controller.go @@ -35,9 +35,14 @@ func NewKubeReconciler( interval time.Duration, channel string, ) *ClusterVersionScheduler { + // If no channel is specified, return nil to indicate disabled + if channel == "" { + log.Info("Kubernetes version checking disabled (no channel specified)") + return nil + } return &ClusterVersionScheduler{ - log: log, + log: log.WithField("channel", channel), client: kubernetes.NewForConfigOrDie(config), interval: interval, metrics: metrics, @@ -47,7 +52,7 @@ func NewKubeReconciler( func (s *ClusterVersionScheduler) Start(ctx context.Context) error { go s.runScheduler(ctx) - return s.reconcile(ctx) + return s.reconcile() } func (s *ClusterVersionScheduler) runScheduler(ctx context.Context) { @@ -63,14 +68,14 @@ func (s *ClusterVersionScheduler) runScheduler(ctx context.Context) { s.log.Info("ClusterVersionScheduler stopping") return case <-ticker.C: - if err := s.reconcile(ctx); err != nil { + if err := s.reconcile(); err != nil { s.log.Error(err, "Failed to reconcile cluster version") } } } } -func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { +func (s *ClusterVersionScheduler) reconcile() error { // Get current cluster version current, err := s.client.Discovery().ServerVersion() if err != nil { @@ -110,11 +115,21 @@ func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { } func getLatestVersion(channel string) (string, error) { + // Always use upstream Kubernetes channels - this is the authoritative source + // Platform detection is kept for logging purposes only + return getLatestVersionFromUpstream(channel) +} + +func getLatestVersionFromUpstream(channel string) (string, error) { + // Validate channel - only allow known Kubernetes channels + if !isValidKubernetesChannel(channel) { + return "", fmt.Errorf("unsupported channel: %s. Valid channels: stable, latest, latest-1.xx", channel) + } + if !strings.HasSuffix(channel, ".txt") { channel += ".txt" } - // Use url.JoinPath to safely join the base URL and channel path channelURL, err := url.JoinPath(channelURLSuffix, channel) if err != nil { return "", fmt.Errorf("failed to join channel URL: %w", err) @@ -124,18 +139,13 @@ func getLatestVersion(channel string) (string, error) { client.RetryMax = 3 client.RetryWaitMin = 1 * time.Second client.RetryWaitMax = 30 * time.Second - // Optional: Log using your own logrus/logr logger client.Logger = nil resp, err := client.Get(channelURL) if err != nil { return "", fmt.Errorf("failed to fetch from channel URL %s: %w", channelURL, err) } - defer func() { - if cerr := resp.Body.Close(); cerr != nil { - fmt.Printf("warning: failed to close response body: %v\n", cerr) - } - }() + defer resp.Body.Close() if resp.StatusCode != 200 { return "", fmt.Errorf("unexpected status code %d when fetching channel %s", resp.StatusCode, channel) @@ -153,3 +163,20 @@ func getLatestVersion(channel string) (string, error) { return version, nil } + +func isValidKubernetesChannel(channel string) bool { + // Only allow official Kubernetes channels + validChannels := []string{"stable", "latest"} + + // Allow latest-X.Y format + if strings.HasPrefix(channel, "latest-1.") { + return true + } + + for _, valid := range validChannels { + if channel == valid { + return true + } + } + return false +} From e297dce3cb2fe594df7462e612bce82c97172f49 Mon Sep 17 00:00:00 2001 From: Maria Reynoso Date: Tue, 28 Oct 2025 16:43:30 +0100 Subject: [PATCH 7/8] add unit test --- pkg/controller/kube_controller_test.go | 750 +++++++++++++++++++++++++ 1 file changed, 750 insertions(+) create mode 100644 pkg/controller/kube_controller_test.go diff --git a/pkg/controller/kube_controller_test.go b/pkg/controller/kube_controller_test.go new file mode 100644 index 00000000..cc5bfa39 --- /dev/null +++ b/pkg/controller/kube_controller_test.go @@ -0,0 +1,750 @@ +package controller + +import ( + "context" + "fmt" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/version" + fakediscovery "k8s.io/client-go/discovery/fake" + fakeclientset "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/rest" + clienttesting "k8s.io/client-go/testing" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/jetstack/version-checker/pkg/metrics" + "github.com/jetstack/version-checker/pkg/version/semver" +) + +func TestNewKubeReconciler(t *testing.T) { + logger := logrus.NewEntry(logrus.New()) + logger.Logger.SetOutput(io.Discard) + + kubeClient := fake.NewFakeClient() + metricsInstance := metrics.New(logger, prometheus.NewRegistry(), kubeClient) + + // Create a minimal valid REST config for testing + config := &rest.Config{ + Host: "https://localhost:8080", + } + + t.Run("with valid channel", func(t *testing.T) { + reconciler := NewKubeReconciler( + logger, + config, + metricsInstance, + 5*time.Minute, + "stable", + ) + + assert.NotNil(t, reconciler) + assert.Equal(t, 5*time.Minute, reconciler.interval) + assert.Equal(t, "stable", reconciler.channel) + assert.Equal(t, metricsInstance, reconciler.metrics) + assert.NotNil(t, reconciler.client) + // Note: We don't check the log field directly as it may have been modified with WithField + }) + + t.Run("with empty channel", func(t *testing.T) { + reconciler := NewKubeReconciler( + logger, + config, + metricsInstance, + 5*time.Minute, + "", // Empty channel + ) + + assert.Nil(t, reconciler, "Should return nil when channel is empty") + }) + + t.Run("with different valid channels", func(t *testing.T) { + // Only test official Kubernetes channels + channels := []string{"stable", "latest", "latest-1.28", "latest-1.27", "latest-1.26"} + + for _, channel := range channels { + t.Run(channel, func(t *testing.T) { + reconciler := NewKubeReconciler( + logger, + config, + metricsInstance, + 5*time.Minute, + channel, + ) + + assert.NotNil(t, reconciler) + assert.Equal(t, channel, reconciler.channel) + }) + } + }) + + t.Run("with invalid channels", func(t *testing.T) { + // These should be rejected if we add validation + invalidChannels := []string{"rapid", "regular", "extended", "invalid-channel"} + + for _, channel := range invalidChannels { + t.Run(channel, func(t *testing.T) { + // For now, they still create reconcilers but would fail at runtime + // In the future, we might want to validate channels in the constructor + reconciler := NewKubeReconciler( + logger, + config, + metricsInstance, + 5*time.Minute, + channel, + ) + + // Currently accepts any non-empty channel + assert.NotNil(t, reconciler) + assert.Equal(t, channel, reconciler.channel) + }) + } + }) +} + +func TestGetLatestVersion(t *testing.T) { + tests := []struct { + name string + channel string + serverResponse string + serverStatus int + expectedResult string + expectedError bool + }{ + { + name: "stable channel", + channel: "stable", + serverResponse: "v1.28.2\n", + serverStatus: http.StatusOK, + expectedResult: "v1.28.2", + expectedError: false, + }, + { + name: "latest channel", + channel: "latest", + serverResponse: "v1.29.0-alpha.1\n", + serverStatus: http.StatusOK, + expectedResult: "v1.29.0-alpha.1", + expectedError: false, + }, + { + name: "channel already has .txt extension", + channel: "stable.txt", + serverResponse: "v1.28.2\n", + serverStatus: http.StatusOK, + expectedResult: "v1.28.2", + expectedError: false, + }, + { + name: "server error", + channel: "stable", + serverResponse: "Not Found", + serverStatus: http.StatusNotFound, + expectedResult: "", + expectedError: true, + }, + { + name: "empty response", + channel: "stable", + serverResponse: "", + serverStatus: http.StatusOK, + expectedResult: "", + expectedError: true, + }, + { + name: "response with whitespace", + channel: "stable", + serverResponse: " v1.28.2 \n\n", + serverStatus: http.StatusOK, + expectedResult: "v1.28.2", + expectedError: false, + }, + { + name: "latest-1.28 channel", + channel: "latest-1.28", + serverResponse: "v1.28.5\n", + serverStatus: http.StatusOK, + expectedResult: "v1.28.5", + expectedError: false, + }, + { + name: "latest-1.27 channel", + channel: "latest-1.27", + serverResponse: "v1.27.8\n", + serverStatus: http.StatusOK, + expectedResult: "v1.27.8", + expectedError: false, + }, + { + name: "invalid channel should error", + channel: "rapid", + serverResponse: "Not Found", + serverStatus: http.StatusNotFound, + expectedResult: "", + expectedError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create test server that matches the expected behavior + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + expectedPath := "/" + tt.channel + if !strings.HasSuffix(expectedPath, ".txt") { + expectedPath += ".txt" + } + assert.Equal(t, expectedPath, r.URL.Path) + + w.WriteHeader(tt.serverStatus) + w.Write([]byte(tt.serverResponse)) + })) + defer server.Close() + + // Create a test version of getLatestVersion that uses our test server + testGetLatestVersion := func(channel string) (string, error) { + if !strings.HasSuffix(channel, ".txt") { + channel += ".txt" + } + + resp, err := http.Get(server.URL + "/" + channel) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", assert.AnError + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + version := strings.TrimSpace(string(body)) + if version == "" { + return "", assert.AnError + } + + return version, nil + } + + result, err := testGetLatestVersion(tt.channel) + + if tt.expectedError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expectedResult, result) + } + }) + } +} + +// TestClusterVersionScheduler_reconcile_Integration tests the reconcile method +// by mocking the Kubernetes discovery and HTTP calls +func TestClusterVersionScheduler_reconcile_Integration(t *testing.T) { + tests := []struct { + name string + currentVersion string + latestVersion string + channel string + expectedUpToDate bool + kubernetesAPIError bool + channelServerError bool + expectedError bool + }{ + { + name: "cluster is up to date", + currentVersion: "v1.28.2", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: true, + expectedError: false, + }, + { + name: "cluster needs update", + currentVersion: "v1.27.1", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: false, + expectedError: false, + }, + { + name: "cluster is ahead of stable", + currentVersion: "v1.29.0", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: true, + expectedError: false, + }, + { + name: "current version with metadata", + currentVersion: "v1.28.2-gke.1", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: true, + expectedError: false, + }, + { + name: "kubernetes api error", + currentVersion: "v1.28.2", + latestVersion: "v1.28.2", + channel: "stable", + kubernetesAPIError: true, + expectedError: true, + }, + { + name: "channel server error", + currentVersion: "v1.28.2", + latestVersion: "v1.28.2", + channel: "stable", + channelServerError: true, + expectedError: true, + }, + // Platform-specific version format tests (metadata handling) + { + name: "EKS version format - up to date", + currentVersion: "v1.28.2-eks-a5565ad", + latestVersion: "v1.28.2", + channel: "stable", // Always compare against upstream Kubernetes + expectedUpToDate: true, + expectedError: false, + }, + { + name: "EKS version format - needs update", + currentVersion: "v1.27.9-eks-2f008fe", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: false, + expectedError: false, + }, + { + name: "EKS version format - ahead of stable", + currentVersion: "v1.29.0-eks-5e0fdde", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: true, + expectedError: false, + }, + { + name: "EKS version with longer metadata", + currentVersion: "v1.28.2-eks-a5565ad-20231102", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: true, + expectedError: false, + }, + { + name: "EKS Fargate version format", + currentVersion: "v1.28.2-eks-fargate-a5565ad", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: true, + expectedError: false, + }, + { + name: "GKE version format - up to date", + currentVersion: "v1.28.2-gke.1034000", + latestVersion: "v1.28.2", + channel: "stable", // Always compare against upstream Kubernetes + expectedUpToDate: true, + expectedError: false, + }, + { + name: "GKE version format - needs update", + currentVersion: "v1.27.9-gke.1034000", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: false, + expectedError: false, + }, + { + name: "GKE version format - ahead of stable", + currentVersion: "v1.29.0-gke.1034000", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: true, + expectedError: false, + }, + { + name: "GKE version format with newer patch", + currentVersion: "v1.29.0-gke.1234567", + latestVersion: "v1.28.2", + channel: "stable", // Comparing against upstream stable + expectedUpToDate: true, + expectedError: false, + }, + { + name: "GKE version format - stable", + currentVersion: "v1.28.2-gke.1034000", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: true, + expectedError: false, + }, + { + name: "GKE version format - extended support", + currentVersion: "v1.26.15-gke.4901000", + latestVersion: "v1.28.2", + channel: "stable", + expectedUpToDate: false, + expectedError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create fake Kubernetes client + fakeClient := fakeclientset.NewSimpleClientset() + fakeDiscovery := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + + if tt.kubernetesAPIError { + fakeDiscovery.PrependReactor("*", "*", func(action clienttesting.Action) (handled bool, ret runtime.Object, err error) { + return true, nil, assert.AnError + }) + } else { + fakeDiscovery.FakedServerVersion = &version.Info{ + GitVersion: tt.currentVersion, + } + } + + // Create test server for Kubernetes release channel + var server *httptest.Server + if tt.channelServerError { + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("Internal Server Error")) + })) + } else { + server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + expectedPath := "/" + tt.channel + ".txt" + assert.Equal(t, expectedPath, r.URL.Path) + + // Simulate that invalid channels return 404 + if !isValidKubernetesChannel(tt.channel) { + w.WriteHeader(http.StatusNotFound) + w.Write([]byte("Not Found")) + return + } + + w.WriteHeader(http.StatusOK) + w.Write([]byte(tt.latestVersion)) + })) + } + defer server.Close() + + // Create logger + logger := logrus.NewEntry(logrus.New()) + logger.Logger.SetOutput(io.Discard) + + // Create metrics registry + registry := prometheus.NewRegistry() + kubeClient := fake.NewFakeClient() + metricsInstance := metrics.New(logger, registry, kubeClient) + + // Create reconciler with a custom getLatestStableVersion function + reconciler := &testableClusterVersionScheduler{ + ClusterVersionScheduler: ClusterVersionScheduler{ + client: fakeClient, + log: logger, + metrics: metricsInstance, + interval: 5 * time.Minute, + channel: tt.channel, + }, + testServerURL: server.URL, + } + + // Execute reconcile + ctx := context.Background() + var _ context.Context = ctx + err := reconciler.reconcile() + + if tt.expectedError { + assert.Error(t, err) + return + } + + assert.NoError(t, err) + + // Check metrics were registered correctly + metricFamilies, err := registry.Gather() + require.NoError(t, err) + + // Find the kubernetes version metric + var kubeMetric *dto.MetricFamily + for _, mf := range metricFamilies { + if mf.GetName() == "version_checker_is_latest_kube_version" { + kubeMetric = mf + break + } + } + + require.NotNil(t, kubeMetric, "Kubernetes version metric should be present") + require.Len(t, kubeMetric.GetMetric(), 1, "Should have exactly one metric value") + + metric := kubeMetric.GetMetric()[0] + expectedValue := 0.0 + if tt.expectedUpToDate { + expectedValue = 1.0 + } + + assert.Equal(t, expectedValue, metric.GetGauge().GetValue()) + + // Check labels + labels := metric.GetLabel() + assert.Len(t, labels, 3, "Should have 3 labels: current_version, latest_version, channel") + + labelMap := make(map[string]string) + for _, label := range labels { + labelMap[label.GetName()] = label.GetValue() + } + + assert.Equal(t, tt.channel, labelMap["channel"]) + }) + } +} + +// testableClusterVersionScheduler is a wrapper that allows us to override the HTTP calls for testing +type testableClusterVersionScheduler struct { + ClusterVersionScheduler + testServerURL string +} + +func (s *testableClusterVersionScheduler) reconcile() error { + // Get current cluster version + current, err := s.client.Discovery().ServerVersion() + if err != nil { + return err + } + + // Get latest version using our test server + latest, err := s.getLatestVersionFromTestServer(s.channel) + if err != nil { + return err + } + + // Use the same logic as the main code + latestSemVer := semver.Parse(latest) + currentSemVer := semver.Parse(current.GitVersion) + + // Create version strings without metadata for comparison (same as main code) + currentSemVerNoMeta := fmt.Sprintf("%d.%d.%d", currentSemVer.Major(), currentSemVer.Minor(), currentSemVer.Patch()) + latestSemVerNoMeta := fmt.Sprintf("%d.%d.%d", latestSemVer.Major(), latestSemVer.Minor(), latestSemVer.Patch()) + + // Parse the versions without metadata for comparison + currentComparable := semver.Parse(currentSemVerNoMeta) + latestComparable := semver.Parse(latestSemVerNoMeta) + + s.metrics.RegisterKubeVersion(!currentComparable.LessThan(latestComparable), + currentSemVerNoMeta, latestSemVerNoMeta, + s.channel, + ) + + return nil +} + +func (s *testableClusterVersionScheduler) getLatestVersionFromTestServer(channel string) (string, error) { + if !strings.HasSuffix(channel, ".txt") { + channel += ".txt" + } + + resp, err := http.Get(s.testServerURL + "/" + channel) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", assert.AnError + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + version := strings.TrimSpace(string(body)) + if version == "" { + return "", assert.AnError + } + + return version, nil +} + +func (s *testableClusterVersionScheduler) Start(ctx context.Context) error { + s.log.Info("Starting Kubernetes version checker") + s.runScheduler(ctx) + return nil +} + +func (s *testableClusterVersionScheduler) runScheduler(ctx context.Context) { + ticker := time.NewTicker(s.interval) + defer ticker.Stop() + + // Run once immediately, then on interval + _ = s.reconcile() + + for { + select { + case <-ctx.Done(): + s.log.Info("Kubernetes version checker stopped") + return + case <-ticker.C: + if err := s.reconcile(); err != nil { + s.log.WithError(err).Error("Failed to reconcile cluster version") + } + } + } +} + +func TestClusterVersionScheduler_Start(t *testing.T) { + t.Run("context cancellation stops scheduler", func(t *testing.T) { + // Create logger + logger := logrus.NewEntry(logrus.New()) + logger.Logger.SetOutput(io.Discard) + + // Create metrics registry + registry := prometheus.NewRegistry() + kubeClient := fake.NewFakeClient() + metricsInstance := metrics.New(logger, registry, kubeClient) + + // Create fake Kubernetes client that won't fail + fakeClient := fakeclientset.NewSimpleClientset() + fakeDiscovery := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + fakeDiscovery.FakedServerVersion = &version.Info{ + GitVersion: "v1.28.2", + } + + // Create a simple test server for the channel + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("v1.28.2")) + })) + defer server.Close() + + // Create a testable reconciler + reconciler := &testableClusterVersionScheduler{ + ClusterVersionScheduler: ClusterVersionScheduler{ + client: fakeClient, + log: logger, + metrics: metricsInstance, + interval: 50 * time.Millisecond, // Short interval for testing + channel: "stable", + }, + testServerURL: server.URL, + } + + // Start the reconciler with a short timeout + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + + // This should run for the timeout duration and then stop gracefully + err := reconciler.Start(ctx) + assert.NoError(t, err, "Start should complete without error when context is cancelled") + }) + + t.Run("nil reconciler handling", func(t *testing.T) { + logger := logrus.NewEntry(logrus.New()) + logger.Logger.SetOutput(io.Discard) + + kubeClient := fake.NewFakeClient() + metricsInstance := metrics.New(logger, prometheus.NewRegistry(), kubeClient) + + config := &rest.Config{ + Host: "https://localhost:8080", + } + + // Create reconciler with empty channel (should return nil) + reconciler := NewKubeReconciler( + logger, + config, + metricsInstance, + 5*time.Minute, + "", // Empty channel + ) + + // Verify it's nil and we don't try to start it + assert.Nil(t, reconciler) + }) +} + +func TestClusterVersionScheduler_runScheduler(t *testing.T) { + // Create logger + logger := logrus.NewEntry(logrus.New()) + logger.Logger.SetOutput(io.Discard) + + // Create metrics registry + registry := prometheus.NewRegistry() + kubeClient := fake.NewFakeClient() + metricsInstance := metrics.New(logger, registry, kubeClient) + + // Create fake Kubernetes client + fakeClient := fakeclientset.NewSimpleClientset() + fakeDiscovery := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + fakeDiscovery.FakedServerVersion = &version.Info{ + GitVersion: "v1.28.2", + } + + // Create test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("v1.28.2")) + })) + defer server.Close() + + // Create testable reconciler + reconciler := &testableClusterVersionScheduler{ + ClusterVersionScheduler: ClusterVersionScheduler{ + client: fakeClient, + log: logger, + metrics: metricsInstance, + interval: 30 * time.Millisecond, // Very short interval for testing + channel: "stable", + }, + testServerURL: server.URL, + } + + // Run the scheduler for a short time + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) + defer cancel() + + // This should complete without panicking and run at least one + // reconciliation loop + assert.NotPanics(t, func() { + reconciler.runScheduler(ctx) + }, "runScheduler should not panic") +} + +func TestChannelValidation(t *testing.T) { + tests := []struct { + name string + channel string + isValid bool + }{ + {"stable channel", "stable", true}, + {"latest channel", "latest", true}, + {"latest-1.28", "latest-1.28", true}, + {"latest-1.27", "latest-1.27", true}, + {"latest-1.26", "latest-1.26", true}, + {"completely invalid", "invalid-channel", false}, + {"empty channel", "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + isValid := isValidKubernetesChannel(tt.channel) + assert.Equal(t, tt.isValid, isValid) + }) + } +} From 75915d689a74da002ebcf57fcc5066b37dc9ae7d Mon Sep 17 00:00:00 2001 From: Maria Reynoso Date: Tue, 28 Oct 2025 16:48:17 +0100 Subject: [PATCH 8/8] tidy --- pkg/controller/kube_controller_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/controller/kube_controller_test.go b/pkg/controller/kube_controller_test.go index cc5bfa39..4b4b125f 100644 --- a/pkg/controller/kube_controller_test.go +++ b/pkg/controller/kube_controller_test.go @@ -313,11 +313,12 @@ func TestClusterVersionScheduler_reconcile_Integration(t *testing.T) { expectedError: true, }, // Platform-specific version format tests (metadata handling) + // EKS and GKE platform versions { name: "EKS version format - up to date", currentVersion: "v1.28.2-eks-a5565ad", latestVersion: "v1.28.2", - channel: "stable", // Always compare against upstream Kubernetes + channel: "stable", expectedUpToDate: true, expectedError: false, }, @@ -709,13 +710,12 @@ func TestClusterVersionScheduler_runScheduler(t *testing.T) { client: fakeClient, log: logger, metrics: metricsInstance, - interval: 30 * time.Millisecond, // Very short interval for testing + interval: 30 * time.Millisecond, channel: "stable", }, testServerURL: server.URL, } - // Run the scheduler for a short time ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond) defer cancel()