diff --git a/Makefile b/Makefile index 48e926929..ecfd5f292 100644 --- a/Makefile +++ b/Makefile @@ -229,7 +229,7 @@ extension-developer-e2e: $(OPERATOR_SDK) $(KUSTOMIZE) #EXHELP Run extension crea test/extension-developer-e2e/setup.sh $(OPERATOR_SDK) $(CONTAINER_RUNTIME) $(KUSTOMIZE) ${LOCAL_REGISTRY_HOST} ${CLUSTER_REGISTRY_HOST} go test -count=1 -v ./test/extension-developer-e2e/... -UNIT_TEST_DIRS := $(shell go list ./... | grep -v /test/) +UNIT_TEST_DIRS := $(shell go list ./... | grep -vE "/test/|/testutils") COVERAGE_UNIT_DIR := $(ROOT_DIR)/coverage/unit .PHONY: envtest-k8s-bins #HELP Uses setup-envtest to download and install the binaries required to run ENVTEST-test based locally at the project/bin directory. diff --git a/commitchecker.yaml b/commitchecker.yaml index c6d5b4d5d..edb6c9cc0 100644 --- a/commitchecker.yaml +++ b/commitchecker.yaml @@ -1,4 +1,4 @@ -expectedMergeBase: 3d6a33b60dab6aedec2b676eba3a7631d3961340 +expectedMergeBase: 3ad622560d7872e336943a22b8e70923d2dc9c07 upstreamBranch: main upstreamOrg: operator-framework upstreamRepo: operator-controller diff --git a/config/overlays/prometheus/prometheus_rule.yaml b/config/overlays/prometheus/prometheus_rule.yaml index 5bd7e120b..b7e3fcdaf 100644 --- a/config/overlays/prometheus/prometheus_rule.yaml +++ b/config/overlays/prometheus/prometheus_rule.yaml @@ -57,3 +57,15 @@ spec: keep_firing_for: 1d annotations: description: "catalogd using high cpu resources for 5 minutes: {{ $value | printf \"%.2f\" }}%" + - alert: operator-controller-api-call-rate + expr: sum(rate(rest_client_requests_total{job=~"operator-controller-service"}[5m])) > 10 + for: 5m + keep_firing_for: 1d + annotations: + description: "operator-controller making excessive API calls for 5 minutes: {{ $value | printf \"%.2f\" }}/sec" + - alert: catalogd-api-call-rate + expr: sum(rate(rest_client_requests_total{job=~"catalogd-service"}[5m])) > 5 + for: 5m + keep_firing_for: 1d + annotations: + description: "catalogd making excessive API calls for 5 minutes: {{ $value | printf \"%.2f\" }}/sec" diff --git a/test/utils/artifacts.go b/internal/shared/util/testutils/artifacts.go similarity index 99% rename from test/utils/artifacts.go rename to internal/shared/util/testutils/artifacts.go index acb523ade..485128c83 100644 --- a/test/utils/artifacts.go +++ b/internal/shared/util/testutils/artifacts.go @@ -1,4 +1,4 @@ -package utils +package testutils import ( "context" diff --git a/test/utils/summary.go b/internal/shared/util/testutils/summary.go similarity index 79% rename from test/utils/summary.go rename to internal/shared/util/testutils/summary.go index d91ae3239..79328f9ef 100644 --- a/test/utils/summary.go +++ b/internal/shared/util/testutils/summary.go @@ -1,4 +1,4 @@ -package utils +package testutils import ( "context" @@ -42,14 +42,16 @@ type xychart struct { } type githubSummary struct { - client api.Client - Pods []string + client api.Client + Pods []string + alertsFiring bool } func NewSummary(c api.Client, pods ...string) githubSummary { return githubSummary{ - client: c, - Pods: pods, + client: c, + Pods: pods, + alertsFiring: false, } } @@ -60,7 +62,7 @@ func NewSummary(c api.Client, pods ...string) githubSummary { // yLabel - Label of the Y axis i.e. "KB/s", "MB", etc. // scaler - Constant by which to scale the results. For instance, cpu usage is more human-readable // as "mCPU" vs "CPU", so we scale the results by a factor of 1,000. -func (s githubSummary) PerformanceQuery(title, pod, query string, yLabel string, scaler float64) (string, error) { +func (s *githubSummary) PerformanceQuery(title, pod, query, yLabel string, scaler float64) (string, error) { v1api := v1.NewAPI(s.client) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() @@ -90,8 +92,9 @@ func (s githubSummary) PerformanceQuery(title, pod, query string, yLabel string, formattedData := make([]string, 0) // matrix does not allow [] access, so we just do one iteration for the single result for _, metric := range matrix { - if len(metric.Values) < 1 { - return "", fmt.Errorf("expected at least one data point; got: %d", len(metric.Values)) + if len(metric.Values) < 2 { + // A graph with one data point means something with the collection was wrong + return "", fmt.Errorf("expected at least two data points; got: %d", len(metric.Values)) } for _, sample := range metric.Values { floatSample := float64(sample.Value) * scaler @@ -115,7 +118,7 @@ func (s githubSummary) PerformanceQuery(title, pod, query string, yLabel string, // Alerts queries the prometheus server for alerts and generates markdown output for anything found. // If no alerts are found, the alerts section will contain only "None." in the final output. -func (s githubSummary) Alerts() (string, error) { +func (s *githubSummary) Alerts() (string, error) { v1api := v1.NewAPI(s.client) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() @@ -136,6 +139,7 @@ func (s githubSummary) Alerts() (string, error) { switch a.State { case v1.AlertStateFiring: firingAlerts = append(firingAlerts, aConv) + s.alertsFiring = true case v1.AlertStatePending: pendingAlerts = append(pendingAlerts, aConv) // Ignore AlertStateInactive; the alerts endpoint doesn't return them @@ -172,28 +176,34 @@ func executeTemplate(templateFile string, obj any) (string, error) { // The markdown is template-driven; the summary methods are called from within the // template. This allows us to add or change queries (hopefully) without needing to // touch code. The summary will be output to a file supplied by the env target. -func PrintSummary(envTarget string) error { +func PrintSummary(path string) error { + if path == "" { + fmt.Printf("No summary output path specified; skipping") + return nil + } + client, err := api.NewClient(api.Config{ Address: defaultPromUrl, }) if err != nil { - fmt.Printf("Error creating prometheus client: %v\n", err) - os.Exit(1) + fmt.Printf("warning: failed to initialize promQL client: %v", err) + return nil } summary := NewSummary(client, "operator-controller", "catalogd") - summaryMarkdown, err := executeTemplate(summaryTemplate, summary) + summaryMarkdown, err := executeTemplate(summaryTemplate, &summary) if err != nil { - return err + fmt.Printf("warning: failed to generate e2e test summary: %v", err) + return nil } - if path := os.Getenv(envTarget); path != "" { - err = os.WriteFile(path, []byte(summaryMarkdown), 0o600) - if err != nil { - return err - } - fmt.Printf("Test summary output to %s successful\n", envTarget) - } else { - fmt.Printf("No summary output specified; skipping") + err = os.WriteFile(path, []byte(summaryMarkdown), 0o600) + if err != nil { + fmt.Printf("warning: failed to write e2e test summary output to %s: %v", path, err) + return nil + } + fmt.Printf("Test summary output to %s successful\n", path) + if summary.alertsFiring { + return fmt.Errorf("performance alerts encountered during test run; please check e2e test summary for details") } return nil } diff --git a/test/utils/templates/alert.md.tmpl b/internal/shared/util/testutils/templates/alert.md.tmpl similarity index 100% rename from test/utils/templates/alert.md.tmpl rename to internal/shared/util/testutils/templates/alert.md.tmpl diff --git a/test/utils/templates/mermaid_chart.md.tmpl b/internal/shared/util/testutils/templates/mermaid_chart.md.tmpl similarity index 100% rename from test/utils/templates/mermaid_chart.md.tmpl rename to internal/shared/util/testutils/templates/mermaid_chart.md.tmpl diff --git a/test/utils/templates/summary.md.tmpl b/internal/shared/util/testutils/templates/summary.md.tmpl similarity index 74% rename from test/utils/templates/summary.md.tmpl rename to internal/shared/util/testutils/templates/summary.md.tmpl index c094d49f3..b1372b874 100644 --- a/test/utils/templates/summary.md.tmpl +++ b/internal/shared/util/testutils/templates/summary.md.tmpl @@ -11,6 +11,13 @@ #### CPU Usage {{$.PerformanceQuery "CPU Usage" $pod `rate(container_cpu_usage_seconds_total{pod=~"%s.*",container="manager"}[5m])[5m:]` "mCPU" 1000}} + +#### API Queries Total +{{$.PerformanceQuery "API Queries Total" $pod `sum(rest_client_requests_total{job=~"%s.*"})[5m:]` "# queries" 1}} + +#### API Query Rate +{{$.PerformanceQuery "API Queries/sec" $pod `sum(rate(rest_client_requests_total{job=~"%s.*"}[5m]))[5m:]` "per sec" 1}} + {{end}} {{- end}} diff --git a/test/utils/utils.go b/internal/shared/util/testutils/utils.go similarity index 97% rename from test/utils/utils.go rename to internal/shared/util/testutils/utils.go index db6d25a7f..94eb2d5b3 100644 --- a/test/utils/utils.go +++ b/internal/shared/util/testutils/utils.go @@ -1,4 +1,4 @@ -package utils +package testutils import ( "os/exec" diff --git a/requirements.txt b/requirements.txt index 327eebf35..241ff2c8a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ Babel==2.17.0 beautifulsoup4==4.13.4 certifi==2025.8.3 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click==8.1.8 colorama==0.4.6 cssselect==1.3.0 diff --git a/test/e2e/cluster_extension_install_test.go b/test/e2e/cluster_extension_install_test.go index 3c9dcbc2a..7c070cb44 100644 --- a/test/e2e/cluster_extension_install_test.go +++ b/test/e2e/cluster_extension_install_test.go @@ -25,7 +25,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ocv1 "github.com/operator-framework/operator-controller/api/v1" - "github.com/operator-framework/operator-controller/test/utils" + utils "github.com/operator-framework/operator-controller/internal/shared/util/testutils" ) const ( diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index cf4f474eb..0bf84bec8 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -16,7 +16,7 @@ import ( ocv1 "github.com/operator-framework/operator-controller/api/v1" "github.com/operator-framework/operator-controller/internal/operator-controller/scheme" - utils "github.com/operator-framework/operator-controller/test/utils" + utils "github.com/operator-framework/operator-controller/internal/shared/util/testutils" ) var ( @@ -25,7 +25,7 @@ var ( ) const ( - testSummaryOutputEnvVar = "GITHUB_STEP_SUMMARY" + testSummaryOutputEnvVar = "E2E_SUMMARY_OUTPUT" testCatalogRefEnvVar = "CATALOG_IMG" testCatalogName = "test-catalog" latestImageTag = "latest" @@ -40,9 +40,16 @@ func TestMain(m *testing.M) { utilruntime.Must(err) res := m.Run() - err = utils.PrintSummary(testSummaryOutputEnvVar) - if err != nil { - fmt.Println("PrintSummary error", err) + path := os.Getenv(testSummaryOutputEnvVar) + if path == "" { + fmt.Printf("Note: E2E_SUMMARY_OUTPUT is unset; skipping summary generation") + } else { + err = utils.PrintSummary(path) + if err != nil { + // Fail the run if alerts are found + fmt.Printf("%v", err) + os.Exit(1) + } } os.Exit(res) } diff --git a/test/e2e/metrics_test.go b/test/e2e/metrics_test.go index 85908f4d5..a95f16c2c 100644 --- a/test/e2e/metrics_test.go +++ b/test/e2e/metrics_test.go @@ -25,7 +25,7 @@ import ( "github.com/stretchr/testify/require" "k8s.io/apimachinery/pkg/util/rand" - "github.com/operator-framework/operator-controller/test/utils" + utils "github.com/operator-framework/operator-controller/internal/shared/util/testutils" ) // TestOperatorControllerMetricsExportedEndpoint verifies that the metrics endpoint for the operator controller diff --git a/test/e2e/network_policy_test.go b/test/e2e/network_policy_test.go index 0f3979d23..00143df41 100644 --- a/test/e2e/network_policy_test.go +++ b/test/e2e/network_policy_test.go @@ -15,7 +15,7 @@ import ( "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/operator-framework/operator-controller/test/utils" + utils "github.com/operator-framework/operator-controller/internal/shared/util/testutils" ) const ( diff --git a/test/experimental-e2e/experimental_e2e_test.go b/test/experimental-e2e/experimental_e2e_test.go index 8ead64e45..39c16e97f 100644 --- a/test/experimental-e2e/experimental_e2e_test.go +++ b/test/experimental-e2e/experimental_e2e_test.go @@ -27,7 +27,7 @@ import ( ocv1 "github.com/operator-framework/operator-controller/api/v1" "github.com/operator-framework/operator-controller/internal/operator-controller/scheme" - "github.com/operator-framework/operator-controller/test/utils" + utils "github.com/operator-framework/operator-controller/internal/shared/util/testutils" ) const ( diff --git a/test/upgrade-e2e/post_upgrade_test.go b/test/upgrade-e2e/post_upgrade_test.go index 221182bb6..b196db356 100644 --- a/test/upgrade-e2e/post_upgrade_test.go +++ b/test/upgrade-e2e/post_upgrade_test.go @@ -19,7 +19,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ocv1 "github.com/operator-framework/operator-controller/api/v1" - "github.com/operator-framework/operator-controller/test/utils" + utils "github.com/operator-framework/operator-controller/internal/shared/util/testutils" ) const (