From 175036b7fee99ef683431b1e7c7ea615a85ebb80 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 29 Mar 2024 10:07:38 -0700 Subject: [PATCH 01/36] Add https traffic support for automated tests --- tests/framework/request.go | 14 +++- tests/suite/graceful_recovery_test.go | 58 +++++++++++++++++ .../graceful-recovery/cafe-routes.yaml | 37 +++++++++++ .../graceful-recovery/cafe-secret.yaml | 8 +++ .../manifests/graceful-recovery/cafe.yaml | 65 +++++++++++++++++++ .../manifests/graceful-recovery/gateway.yaml | 20 ++++++ tests/suite/system_suite_test.go | 3 +- 7 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 tests/suite/graceful_recovery_test.go create mode 100644 tests/suite/manifests/graceful-recovery/cafe-routes.yaml create mode 100644 tests/suite/manifests/graceful-recovery/cafe-secret.yaml create mode 100644 tests/suite/manifests/graceful-recovery/cafe.yaml create mode 100644 tests/suite/manifests/graceful-recovery/gateway.yaml diff --git a/tests/framework/request.go b/tests/framework/request.go index 674a35ed45..a63f4c96e3 100644 --- a/tests/framework/request.go +++ b/tests/framework/request.go @@ -3,6 +3,7 @@ package framework import ( "bytes" "context" + "crypto/tls" "fmt" "net" "net/http" @@ -34,7 +35,18 @@ func Get(url, address string, timeout time.Duration) (int, string, error) { return 0, "", err } - resp, err := http.DefaultClient.Do(req) + var resp *http.Response + if strings.HasPrefix(url, "https") { + customTransport := http.DefaultTransport.(*http.Transport).Clone() + // similar to how in our examples with https requests we run our curl command + // we turn off verification of the certificate, we do the same here + customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: false} // #nosec G402 + client := &http.Client{Transport: customTransport} + resp, err = client.Do(req) + } else { + resp, err = http.DefaultClient.Do(req) + } + if err != nil { return 0, "", err } diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go new file mode 100644 index 0000000000..d3d20ad125 --- /dev/null +++ b/tests/suite/graceful_recovery_test.go @@ -0,0 +1,58 @@ +package suite + +import ( + "fmt" + "net/http" + "strconv" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/nginxinc/nginx-gateway-fabric/tests/framework" +) + +var _ = Describe("Graceful Recovery test", Label("nfr", "graceful-recovery"), func() { + files := []string{ + "graceful-recovery/cafe.yaml", + "graceful-recovery/cafe-secret.yaml", + "graceful-recovery/gateway.yaml", + "graceful-recovery/cafe-routes.yaml", + } + ns := &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "graceful-recovery", + }, + } + + BeforeEach(func() { + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + }) + + AfterEach(func() { + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + }) + + It("sends traffic", func() { + teaURL := "https://cafe.example.com/tea" + coffeeURL := "http://cafe.example.com/coffee" + if portFwdPort != 0 { + teaURL = fmt.Sprintf("https://cafe.example.com:%s/tea", strconv.Itoa(portFwdPort)) + coffeeURL = fmt.Sprintf("http://cafe.example.com:%s/coffee", strconv.Itoa(portFwdPort)) + } + status, body, err := framework.Get(teaURL, address, timeoutConfig.RequestTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(status).To(Equal(http.StatusOK)) + Expect(body).To(ContainSubstring("URI: /tea")) + + status, body, err = framework.Get(coffeeURL, address, timeoutConfig.RequestTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(status).To(Equal(http.StatusOK)) + Expect(body).To(ContainSubstring("URI: /coffee")) + }) +}) diff --git a/tests/suite/manifests/graceful-recovery/cafe-routes.yaml b/tests/suite/manifests/graceful-recovery/cafe-routes.yaml new file mode 100644 index 0000000000..5d63141a97 --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/cafe-routes.yaml @@ -0,0 +1,37 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: coffee +spec: + parentRefs: + - name: gateway + sectionName: http + hostnames: + - "cafe.example.com" + rules: + - matches: + - path: + type: PathPrefix + value: /coffee + backendRefs: + - name: coffee + port: 80 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: tea +spec: + parentRefs: + - name: gateway + sectionName: https + hostnames: + - "cafe.example.com" + rules: + - matches: + - path: + type: PathPrefix + value: /tea + backendRefs: + - name: tea + port: 80 diff --git a/tests/suite/manifests/graceful-recovery/cafe-secret.yaml b/tests/suite/manifests/graceful-recovery/cafe-secret.yaml new file mode 100644 index 0000000000..4510460bba --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/cafe-secret.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: cafe-secret +type: kubernetes.io/tls +data: + tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNzakNDQVpvQ0NRQzdCdVdXdWRtRkNEQU5CZ2txaGtpRzl3MEJBUXNGQURBYk1Sa3dGd1lEVlFRRERCQmoKWVdabExtVjRZVzF3YkdVdVkyOXRNQjRYRFRJeU1EY3hOREl4TlRJek9Wb1hEVEl6TURjeE5ESXhOVEl6T1ZvdwpHekVaTUJjR0ExVUVBd3dRWTJGbVpTNWxlR0Z0Y0d4bExtTnZiVENDQVNJd0RRWUpLb1pJaHZjTkFRRUJCUUFECmdnRVBBRENDQVFvQ2dnRUJBTHFZMnRHNFc5aStFYzJhdnV4Q2prb2tnUUx1ek10U1Rnc1RNaEhuK3ZRUmxIam8KVzFLRnMvQVdlS25UUStyTWVKVWNseis4M3QwRGtyRThwUisxR2NKSE50WlNMb0NEYUlRN0Nhck5nY1daS0o4Qgo1WDNnVS9YeVJHZjI2c1REd2xzU3NkSEQ1U2U3K2Vab3NPcTdHTVF3K25HR2NVZ0VtL1Q1UEMvY05PWE0zZWxGClRPL051MStoMzROVG9BbDNQdTF2QlpMcDNQVERtQ0thaEROV0NWbUJQUWpNNFI4VERsbFhhMHQ5Z1o1MTRSRzUKWHlZWTNtdzZpUzIrR1dYVXllMjFuWVV4UEhZbDV4RHY0c0FXaGRXbElweHlZQlNCRURjczN6QlI2bFF1OWkxZAp0R1k4dGJ3blVmcUVUR3NZdWxzc05qcU95V1VEcFdJelhibHhJZVVDQXdFQUFUQU5CZ2txaGtpRzl3MEJBUXNGCkFBT0NBUUVBcjkrZWJ0U1dzSnhLTGtLZlRkek1ISFhOd2Y5ZXFVbHNtTXZmMGdBdWVKTUpUR215dG1iWjlpbXQKL2RnWlpYVE9hTElHUG9oZ3BpS0l5eVVRZVdGQ2F0NHRxWkNPVWRhbUloOGk0Q1h6QVJYVHNvcUNOenNNLzZMRQphM25XbFZyS2lmZHYrWkxyRi8vblc0VVNvOEoxaCtQeDljY0tpRDZZU0RVUERDRGh1RUtFWXcvbHpoUDJVOXNmCnl6cEJKVGQ4enFyM3paTjNGWWlITmgzYlRhQS82di9jU2lyamNTK1EwQXg4RWpzQzYxRjRVMTc4QzdWNWRCKzQKcmtPTy9QNlA0UFlWNTRZZHMvRjE2WkZJTHFBNENCYnExRExuYWRxamxyN3NPbzl2ZzNnWFNMYXBVVkdtZ2todAp6VlZPWG1mU0Z4OS90MDBHUi95bUdPbERJbWlXMGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRQzZtTnJSdUZ2WXZoSE4KbXI3c1FvNUtKSUVDN3N6TFVrNExFeklSNS9yMEVaUjQ2RnRTaGJQd0ZuaXAwMFBxekhpVkhKYy92TjdkQTVLeApQS1VmdFJuQ1J6YldVaTZBZzJpRU93bXF6WUhGbVNpZkFlVjk0RlAxOGtSbjl1ckV3OEpiRXJIUncrVW51L25tCmFMRHF1eGpFTVBweGhuRklCSnYwK1R3djNEVGx6TjNwUlV6dnpidGZvZCtEVTZBSmR6N3Rid1dTNmR6MHc1Z2kKbW9RelZnbFpnVDBJek9FZkV3NVpWMnRMZllHZWRlRVJ1VjhtR041c09va3R2aGxsMU1udHRaMkZNVHgySmVjUQo3K0xBRm9YVnBTS2NjbUFVZ1JBM0xOOHdVZXBVTHZZdFhiUm1QTFc4SjFINmhFeHJHTHBiTERZNmpzbGxBNlZpCk0xMjVjU0hsQWdNQkFBRUNnZ0VBQnpaRE50bmVTdWxGdk9HZlFYaHRFWGFKdWZoSzJBenRVVVpEcUNlRUxvekQKWlV6dHdxbkNRNlJLczUyandWNTN4cU9kUU94bTNMbjNvSHdNa2NZcEliWW82MjJ2dUczYnkwaVEzaFlsVHVMVgpqQmZCcS9UUXFlL2NMdngvSkczQWhFNmJxdFRjZFlXeGFmTmY2eUtpR1dzZk11WVVXTWs4MGVJVUxuRmZaZ1pOCklYNTlSOHlqdE9CVm9Sa3hjYTVoMW1ZTDFsSlJNM3ZqVHNHTHFybmpOTjNBdWZ3ZGRpK1VDbGZVL2l0K1EvZkUKV216aFFoTlRpNVFkRWJLVStOTnYvNnYvb2JvandNb25HVVBCdEFTUE05cmxFemIralQ1WHdWQjgvLzRGY3VoSwoyVzNpcjhtNHVlQ1JHSVlrbGxlLzhuQmZ0eVhiVkNocVRyZFBlaGlPM1FLQmdRRGlrR3JTOTc3cjg3Y1JPOCtQClpoeXltNXo4NVIzTHVVbFNTazJiOTI1QlhvakpZL2RRZDVTdFVsSWE4OUZKZnNWc1JRcEhHaTFCYzBMaTY1YjIKazR0cE5xcVFoUmZ1UVh0UG9GYXRuQzlPRnJVTXJXbDVJN0ZFejZnNkNQMVBXMEg5d2hPemFKZUdpZVpNYjlYTQoybDdSSFZOcC9jTDlYbmhNMnN0Q1lua2Iwd0tCZ1FEUzF4K0crakEyUVNtRVFWNXA1RnRONGcyamsyZEFjMEhNClRIQ2tTazFDRjhkR0Z2UWtsWm5ZbUt0dXFYeXNtekJGcnZKdmt2eUhqbUNYYTducXlpajBEdDZtODViN3BGcVAKQWxtajdtbXI3Z1pUeG1ZMXBhRWFLMXY4SDNINGtRNVl3MWdrTWRybVJHcVAvaTBGaDVpaGtSZS9DOUtGTFVkSQpDcnJjTzhkUVp3S0JnSHA1MzRXVWNCMVZibzFlYStIMUxXWlFRUmxsTWlwRFM2TzBqeWZWSmtFb1BZSEJESnp2ClIrdzZLREJ4eFoyWmJsZ05LblV0YlhHSVFZd3lGelhNcFB5SGxNVHpiZkJhYmJLcDFyR2JVT2RCMXpXM09PRkgKcmppb21TUm1YNmxhaDk0SjRHU0lFZ0drNGw1SHhxZ3JGRDZ2UDd4NGRjUktJWFpLZ0w2dVJSSUpBb0dCQU1CVApaL2p5WStRNTBLdEtEZHUrYU9ORW4zaGxUN3hrNXRKN3NBek5rbWdGMU10RXlQUk9Xd1pQVGFJbWpRbk9qbHdpCldCZ2JGcXg0M2ZlQ1Z4ZXJ6V3ZEM0txaWJVbWpCTkNMTGtYeGh3ZEVteFQwVit2NzZGYzgwaTNNYVdSNnZZR08KditwVVovL0F6UXdJcWZ6dlVmV2ZxdStrMHlhVXhQOGNlcFBIRyt0bEFvR0FmQUtVVWhqeFU0Ym5vVzVwVUhKegpwWWZXZXZ5TW54NWZyT2VsSmRmNzlvNGMvMHhVSjh1eFBFWDFkRmNrZW96dHNpaVFTNkN6MENRY09XVWxtSkRwCnVrdERvVzM3VmNSQU1BVjY3NlgxQVZlM0UwNm5aL2g2Tkd4Z28rT042Q3pwL0lkMkJPUm9IMFAxa2RjY1NLT3kKMUtFZlNnb1B0c1N1eEpBZXdUZmxDMXc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K diff --git a/tests/suite/manifests/graceful-recovery/cafe.yaml b/tests/suite/manifests/graceful-recovery/cafe.yaml new file mode 100644 index 0000000000..2d03ae59ff --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/cafe.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: coffee +spec: + replicas: 1 + selector: + matchLabels: + app: coffee + template: + metadata: + labels: + app: coffee + spec: + containers: + - name: coffee + image: nginxdemos/nginx-hello:plain-text + ports: + - containerPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: coffee +spec: + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + selector: + app: coffee +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tea +spec: + replicas: 1 + selector: + matchLabels: + app: tea + template: + metadata: + labels: + app: tea + spec: + containers: + - name: tea + image: nginxdemos/nginx-hello:plain-text + ports: + - containerPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: tea +spec: + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + selector: + app: tea diff --git a/tests/suite/manifests/graceful-recovery/gateway.yaml b/tests/suite/manifests/graceful-recovery/gateway.yaml new file mode 100644 index 0000000000..6789002abb --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/gateway.yaml @@ -0,0 +1,20 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: gateway +spec: + gatewayClassName: nginx + listeners: + - name: http + port: 80 + protocol: HTTP + hostname: "*.example.com" + - name: https + port: 443 + protocol: HTTPS + hostname: "*.example.com" + tls: + mode: Terminate + certificateRefs: + - kind: Secret + name: cafe-secret diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index d7786aaa3d..8b9050a94a 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -278,5 +278,6 @@ func isNFR(labelFilter string) bool { return strings.Contains(labelFilter, "nfr") || strings.Contains(labelFilter, "longevity") || strings.Contains(labelFilter, "performance") || - strings.Contains(labelFilter, "upgrade") + strings.Contains(labelFilter, "upgrade") || + strings.Contains(labelFilter, "graceful-recovery") } From f0f41563e9578e201c23eb1fd77988768649eed5 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 16 Apr 2024 16:06:24 -0700 Subject: [PATCH 02/36] Add NGF and NGINX container restart tests --- .../templates/deployment.yaml | 2 +- tests/framework/request.go | 2 +- tests/suite/graceful_recovery_test.go | 178 ++++++++++++++++-- .../manifests/graceful-recovery/cafe.yaml | 8 + tests/suite/system_suite_test.go | 5 +- 5 files changed, 177 insertions(+), 18 deletions(-) diff --git a/charts/nginx-gateway-fabric/templates/deployment.yaml b/charts/nginx-gateway-fabric/templates/deployment.yaml index 83aeafde21..b826740fee 100644 --- a/charts/nginx-gateway-fabric/templates/deployment.yaml +++ b/charts/nginx-gateway-fabric/templates/deployment.yaml @@ -173,7 +173,7 @@ spec: shareProcessNamespace: true securityContext: fsGroup: 1001 - runAsNonRoot: true + runAsNonRoot: {{ ne .Values.nginxGateway.securityContext.runAsNonRoot false }} {{- if .Values.tolerations }} tolerations: {{- toYaml .Values.tolerations | nindent 6 }} diff --git a/tests/framework/request.go b/tests/framework/request.go index a63f4c96e3..2fa78b35c3 100644 --- a/tests/framework/request.go +++ b/tests/framework/request.go @@ -40,7 +40,7 @@ func Get(url, address string, timeout time.Duration) (int, string, error) { customTransport := http.DefaultTransport.(*http.Transport).Clone() // similar to how in our examples with https requests we run our curl command // we turn off verification of the certificate, we do the same here - customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: false} // #nosec G402 + customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} // #nosec G402 client := &http.Client{Transport: customTransport} resp, err = client.Do(req) } else { diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index d3d20ad125..7d58ca1ccb 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -3,7 +3,9 @@ package suite import ( "fmt" "net/http" + "os/exec" "strconv" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -14,7 +16,7 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) -var _ = Describe("Graceful Recovery test", Label("nfr", "graceful-recovery"), func() { +var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recovery"), func() { files := []string{ "graceful-recovery/cafe.yaml", "graceful-recovery/cafe-secret.yaml", @@ -27,32 +29,178 @@ var _ = Describe("Graceful Recovery test", Label("nfr", "graceful-recovery"), fu }, } + var teaURL, coffeeURL string + + BeforeAll(func() { + teaURL = "https://cafe.example.com/tea" + coffeeURL = "http://cafe.example.com/coffee" + if portFwdPort != 0 { + teaURL = fmt.Sprintf("https://cafe.example.com:%s/tea", strconv.Itoa(portFwdPort)) + coffeeURL = fmt.Sprintf("http://cafe.example.com:%s/coffee", strconv.Itoa(portFwdPort)) + } + }) + BeforeEach(func() { + // this test is unique in that NGF + teardown(releaseName) + + cfg := getDefaultSetupCfg() + cfg.nfr = true + + setup(cfg, "--set", "nginxGateway.securityContext.runAsNonRoot=false") + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + // Sometimes the traffic would error with code 502, after implementing this sleep it stopped. + time.Sleep(2 * time.Second) + + expectWorkingTraffic(teaURL, coffeeURL) }) - AfterEach(func() { + AfterAll(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) }) - It("sends traffic", func() { - teaURL := "https://cafe.example.com/tea" - coffeeURL := "http://cafe.example.com/coffee" - if portFwdPort != 0 { - teaURL = fmt.Sprintf("https://cafe.example.com:%s/tea", strconv.Itoa(portFwdPort)) - coffeeURL = fmt.Sprintf("http://cafe.example.com:%s/coffee", strconv.Itoa(portFwdPort)) - } - status, body, err := framework.Get(teaURL, address, timeoutConfig.RequestTimeout) + It("recovers when NGF container is restarted", func() { + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) - Expect(status).To(Equal(http.StatusOK)) - Expect(body).To(ContainSubstring("URI: /tea")) + Expect(podNames).ToNot(BeEmpty()) + + output, err := restartNGFProcess() + Expect(err).ToNot(HaveOccurred(), string(output)) + // Wait for NGF to restart. + time.Sleep(2 * time.Second) + + expectWorkingTraffic(teaURL, coffeeURL) + + checkContainerLogsForErrors(podNames[0]) + + // I tried just deleting the routes and ran into a bunch of issues, deleting all the files was better + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + // Wait for files to be deleted. + time.Sleep(2 * time.Second) + + expectFailingTraffic(teaURL, coffeeURL) + + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - status, body, err = framework.Get(coffeeURL, address, timeoutConfig.RequestTimeout) + expectWorkingTraffic(teaURL, coffeeURL) + }) + + It("recovers when nginx container is restarted", func() { + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) - Expect(status).To(Equal(http.StatusOK)) - Expect(body).To(ContainSubstring("URI: /coffee")) + Expect(podNames).ToNot(BeEmpty()) + + // If we correctly restart the nginx container, it should give us an error 137 code + output, err := restartNginxContainer() + Expect(err).To(HaveOccurred(), string(output)) + time.Sleep(2 * time.Second) + + checkContainerLogsForErrors(podNames[0]) + + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + // Wait for files to be deleted. + time.Sleep(2 * time.Second) + + expectFailingTraffic(teaURL, coffeeURL) + + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + + expectWorkingTraffic(teaURL, coffeeURL) }) }) + +func restartNginxContainer() ([]byte, error) { + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).ToNot(BeEmpty()) + + // This should error since we are in the nginx container, killing the nginx master process. + // Exit Code 137 is the code we want to see as that is the kill -9 signal + output, err := exec.Command( // nolint:gosec + "kubectl", + "exec", + "-n", + ngfNamespace, podNames[0], + "--container", + "nginx", + "--", + "sh", + "-c", + "$(PID=$(pgrep -f \"nginx: master process\") && kill -9 $PID)").CombinedOutput() + if err != nil { + return output, err + } + return nil, nil +} + +func restartNGFProcess() ([]byte, error) { + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).ToNot(BeEmpty()) + + output, err := exec.Command( // nolint:gosec + "kubectl", + "debug", + "-n", + ngfNamespace, + podNames[0], + "--image=busybox:1.28", + "--target=nginx-gateway", + "--", + "sh", + "-c", + "$(PID=$(pgrep -f \"/[u]sr/bin/gateway\") && kill -9 $PID)").CombinedOutput() + if err != nil { + return output, err + } + return nil, nil +} + +func expectWorkingTraffic(teaURL, coffeeURL string) { + status, body, err := framework.Get(teaURL, address, timeoutConfig.RequestTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(status).To(Equal(http.StatusOK)) + Expect(body).To(ContainSubstring("URI: /tea")) + + status, body, err = framework.Get(coffeeURL, address, timeoutConfig.RequestTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(status).To(Equal(http.StatusOK), coffeeURL+" "+address) + Expect(body).To(ContainSubstring("URI: /coffee")) +} + +func expectFailingTraffic(teaURL, coffeeURL string) { + status, body, err := framework.Get(teaURL, address, timeoutConfig.RequestTimeout) + Expect(err).To(HaveOccurred()) + Expect(status).ToNot(Equal(http.StatusOK)) + Expect(body).ToNot(ContainSubstring("URI: /tea")) + + status, body, err = framework.Get(coffeeURL, address, timeoutConfig.RequestTimeout) + Expect(err).To(HaveOccurred()) + Expect(status).ToNot(Equal(http.StatusOK)) + Expect(body).ToNot(ContainSubstring("URI: /coffee")) +} + +func checkContainerLogsForErrors(ngfPodName string) { + sinceSeconds := int64(15) + logs, err := resourceManager.GetPodLogs( + ngfNamespace, + ngfPodName, + &core.PodLogOptions{Container: "nginx", SinceSeconds: &sinceSeconds}, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(logs).ToNot(ContainSubstring("emerg"), logs) + + logs, err = resourceManager.GetPodLogs( + ngfNamespace, + ngfPodName, + &core.PodLogOptions{Container: "nginx-gateway", SinceSeconds: &sinceSeconds}, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(logs).ToNot(ContainSubstring("error"), logs) +} diff --git a/tests/suite/manifests/graceful-recovery/cafe.yaml b/tests/suite/manifests/graceful-recovery/cafe.yaml index 2d03ae59ff..be4ad51a99 100644 --- a/tests/suite/manifests/graceful-recovery/cafe.yaml +++ b/tests/suite/manifests/graceful-recovery/cafe.yaml @@ -17,6 +17,10 @@ spec: image: nginxdemos/nginx-hello:plain-text ports: - containerPort: 8080 + readinessProbe: + httpGet: + path: / + port: 8080 --- apiVersion: v1 kind: Service @@ -50,6 +54,10 @@ spec: image: nginxdemos/nginx-hello:plain-text ports: - containerPort: 8080 + readinessProbe: + httpGet: + path: / + port: 8080 --- apiVersion: v1 kind: Service diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 8b9050a94a..9ed21716ed 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -243,9 +243,12 @@ var _ = BeforeSuite(func() { // - running upgrade test (this test will deploy its own version) // - running longevity teardown (deployment will already exist) // - running telemetry test (NGF will be deployed as part of the test) + // - running graceful-recovery (NGF will be deployed as part of the test) if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") || - strings.Contains(labelFilter, "telemetry") { + strings.Contains(labelFilter, "telemetry") || + strings.Contains(labelFilter, "graceful-recovery") { + cfg.deploy = false } From 8150af17ee99772566956ab2dd1329262ddbb2ba Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 17 Apr 2024 15:40:06 -0700 Subject: [PATCH 03/36] Add restart count to check container restart --- tests/suite/graceful_recovery_test.go | 83 +++++++++++++++++++-------- 1 file changed, 60 insertions(+), 23 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 7d58ca1ccb..ae739885ad 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -1,16 +1,16 @@ package suite import ( - "fmt" + "context" "net/http" "os/exec" - "strconv" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/nginxinc/nginx-gateway-fabric/tests/framework" @@ -29,26 +29,18 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }, } - var teaURL, coffeeURL string + nginxContainerName := "nginx" + ngfContainerName := "nginx-gateway" + teaURL := "https://cafe.example.com/tea" + coffeeURL := "http://cafe.example.com/coffee" BeforeAll(func() { - teaURL = "https://cafe.example.com/tea" - coffeeURL = "http://cafe.example.com/coffee" - if portFwdPort != 0 { - teaURL = fmt.Sprintf("https://cafe.example.com:%s/tea", strconv.Itoa(portFwdPort)) - coffeeURL = fmt.Sprintf("http://cafe.example.com:%s/coffee", strconv.Itoa(portFwdPort)) - } - }) - - BeforeEach(func() { - // this test is unique in that NGF - teardown(releaseName) - cfg := getDefaultSetupCfg() cfg.nfr = true - setup(cfg, "--set", "nginxGateway.securityContext.runAsNonRoot=false") + }) + BeforeEach(func() { Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) @@ -68,10 +60,33 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var ngfPod core.Pod + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + Expect(err).ToNot(HaveOccurred()) + + var restartCount int + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == ngfContainerName { + restartCount = int(containerStatus.RestartCount) + } + } + output, err := restartNGFProcess() Expect(err).ToNot(HaveOccurred(), string(output)) // Wait for NGF to restart. - time.Sleep(2 * time.Second) + time.Sleep(6 * time.Second) + + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + Expect(err).ToNot(HaveOccurred()) + + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == ngfContainerName { + Expect(int(containerStatus.RestartCount)).To(Equal(restartCount + 1)) + } + } expectWorkingTraffic(teaURL, coffeeURL) @@ -95,11 +110,34 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) - // If we correctly restart the nginx container, it should give us an error 137 code + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var ngfPod core.Pod + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + Expect(err).ToNot(HaveOccurred()) + + var restartCount int + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == nginxContainerName { + restartCount = int(containerStatus.RestartCount) + } + } + output, err := restartNginxContainer() - Expect(err).To(HaveOccurred(), string(output)) + Expect(err).ToNot(HaveOccurred(), string(output)) + // Wait for NGF to restart. time.Sleep(2 * time.Second) + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + Expect(err).ToNot(HaveOccurred()) + + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == nginxContainerName { + Expect(int(containerStatus.RestartCount)).To(Equal(restartCount + 1)) + } + } + checkContainerLogsForErrors(podNames[0]) Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) @@ -120,19 +158,18 @@ func restartNginxContainer() ([]byte, error) { Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) - // This should error since we are in the nginx container, killing the nginx master process. - // Exit Code 137 is the code we want to see as that is the kill -9 signal output, err := exec.Command( // nolint:gosec "kubectl", "exec", "-n", - ngfNamespace, podNames[0], + ngfNamespace, + podNames[0], "--container", "nginx", "--", "sh", "-c", - "$(PID=$(pgrep -f \"nginx: master process\") && kill -9 $PID)").CombinedOutput() + "$(PID=$(pgrep -f \"[n]ginx: master process\") && kill -9 $PID)").CombinedOutput() if err != nil { return output, err } From e6b90f5dbb725de6afb51115711cc89c9a4bdd80 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 17 Apr 2024 16:16:01 -0700 Subject: [PATCH 04/36] Move restart count checks to helper function --- tests/suite/graceful_recovery_test.go | 110 +++++++++++++------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index ae739885ad..456294c2ca 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -60,33 +60,8 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) - defer cancel() - - var ngfPod core.Pod - err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) - Expect(err).ToNot(HaveOccurred()) - - var restartCount int - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == ngfContainerName { - restartCount = int(containerStatus.RestartCount) - } - } - - output, err := restartNGFProcess() + output, err := restartNGFProcess(ngfContainerName) Expect(err).ToNot(HaveOccurred(), string(output)) - // Wait for NGF to restart. - time.Sleep(6 * time.Second) - - err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) - Expect(err).ToNot(HaveOccurred()) - - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == ngfContainerName { - Expect(int(containerStatus.RestartCount)).To(Equal(restartCount + 1)) - } - } expectWorkingTraffic(teaURL, coffeeURL) @@ -110,33 +85,8 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) - defer cancel() - - var ngfPod core.Pod - err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) - Expect(err).ToNot(HaveOccurred()) - - var restartCount int - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == nginxContainerName { - restartCount = int(containerStatus.RestartCount) - } - } - - output, err := restartNginxContainer() + output, err := restartNginxContainer(nginxContainerName) Expect(err).ToNot(HaveOccurred(), string(output)) - // Wait for NGF to restart. - time.Sleep(2 * time.Second) - - err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) - Expect(err).ToNot(HaveOccurred()) - - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == nginxContainerName { - Expect(int(containerStatus.RestartCount)).To(Equal(restartCount + 1)) - } - } checkContainerLogsForErrors(podNames[0]) @@ -153,11 +103,25 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }) }) -func restartNginxContainer() ([]byte, error) { +func restartNginxContainer(nginxContainerName string) ([]byte, error) { podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var ngfPod core.Pod + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + Expect(err).ToNot(HaveOccurred()) + + var restartCount int + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == nginxContainerName { + restartCount = int(containerStatus.RestartCount) + } + } + output, err := exec.Command( // nolint:gosec "kubectl", "exec", @@ -173,14 +137,40 @@ func restartNginxContainer() ([]byte, error) { if err != nil { return output, err } + + // Wait for NGF to restart. + time.Sleep(2 * time.Second) + + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + Expect(err).ToNot(HaveOccurred()) + + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == nginxContainerName { + Expect(int(containerStatus.RestartCount)).To(Equal(restartCount + 1)) + } + } return nil, nil } -func restartNGFProcess() ([]byte, error) { +func restartNGFProcess(ngfContainerName string) ([]byte, error) { podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var ngfPod core.Pod + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + Expect(err).ToNot(HaveOccurred()) + + var restartCount int + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == ngfContainerName { + restartCount = int(containerStatus.RestartCount) + } + } + output, err := exec.Command( // nolint:gosec "kubectl", "debug", @@ -196,6 +186,18 @@ func restartNGFProcess() ([]byte, error) { if err != nil { return output, err } + + // Wait for NGF to restart. + time.Sleep(6 * time.Second) + + err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + Expect(err).ToNot(HaveOccurred()) + + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == ngfContainerName { + Expect(int(containerStatus.RestartCount)).To(Equal(restartCount + 1)) + } + } return nil, nil } From a31141a4000e6d19daec8676249f43aa53eaf56a Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 18 Apr 2024 12:05:03 -0700 Subject: [PATCH 05/36] Add polling wait functions to replace sleep workarounds --- tests/suite/graceful_recovery_test.go | 168 +++++++++++++++++--------- 1 file changed, 111 insertions(+), 57 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 456294c2ca..e362b0fc0a 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -2,8 +2,10 @@ package suite import ( "context" + "errors" "net/http" "os/exec" + "strings" "time" . "github.com/onsi/ginkgo/v2" @@ -11,11 +13,22 @@ import ( core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) +const ( + // FIXME(bjee19): Find an automated way to keep the version updated here similar to dependabot. + // https://github.com/nginxinc/nginx-gateway-fabric/issues/1665 + debugImage = "busybox:1.28" + teaURL = "https://cafe.example.com/tea" + coffeeURL = "http://cafe.example.com/coffee" + nginxContainerName = "nginx" + ngfContainerName = "nginx-gateway" +) + var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recovery"), func() { files := []string{ "graceful-recovery/cafe.yaml", @@ -29,11 +42,6 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }, } - nginxContainerName := "nginx" - ngfContainerName := "nginx-gateway" - teaURL := "https://cafe.example.com/tea" - coffeeURL := "http://cafe.example.com/coffee" - BeforeAll(func() { cfg := getDefaultSetupCfg() cfg.nfr = true @@ -44,10 +52,9 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - // Sometimes the traffic would error with code 502, after implementing this sleep it stopped. - time.Sleep(2 * time.Second) - expectWorkingTraffic(teaURL, coffeeURL) + err := waitForWorkingTraffic() + Expect(err).ToNot(HaveOccurred()) }) AfterAll(func() { @@ -63,21 +70,22 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov output, err := restartNGFProcess(ngfContainerName) Expect(err).ToNot(HaveOccurred(), string(output)) - expectWorkingTraffic(teaURL, coffeeURL) - checkContainerLogsForErrors(podNames[0]) + err = waitForWorkingTraffic() + Expect(err).ToNot(HaveOccurred()) + // I tried just deleting the routes and ran into a bunch of issues, deleting all the files was better Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - // Wait for files to be deleted. - time.Sleep(2 * time.Second) - expectFailingTraffic(teaURL, coffeeURL) + err = waitForFailingTraffic() + Expect(err).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - expectWorkingTraffic(teaURL, coffeeURL) + err = waitForWorkingTraffic() + Expect(err).ToNot(HaveOccurred()) }) It("recovers when nginx container is restarted", func() { @@ -90,16 +98,19 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov checkContainerLogsForErrors(podNames[0]) + err = waitForWorkingTraffic() + Expect(err).ToNot(HaveOccurred()) + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - // Wait for files to be deleted. - time.Sleep(2 * time.Second) - expectFailingTraffic(teaURL, coffeeURL) + err = waitForFailingTraffic() + Expect(err).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - expectWorkingTraffic(teaURL, coffeeURL) + err = waitForWorkingTraffic() + Expect(err).ToNot(HaveOccurred()) }) }) @@ -138,17 +149,9 @@ func restartNginxContainer(nginxContainerName string) ([]byte, error) { return output, err } - // Wait for NGF to restart. - time.Sleep(2 * time.Second) - - err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + err = waitForContainerRestart(podNames[0], nginxContainerName, restartCount) Expect(err).ToNot(HaveOccurred()) - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == nginxContainerName { - Expect(int(containerStatus.RestartCount)).To(Equal(restartCount + 1)) - } - } return nil, nil } @@ -177,7 +180,7 @@ func restartNGFProcess(ngfContainerName string) ([]byte, error) { "-n", ngfNamespace, podNames[0], - "--image=busybox:1.28", + "--image="+debugImage, "--target=nginx-gateway", "--", "sh", @@ -187,42 +190,93 @@ func restartNGFProcess(ngfContainerName string) ([]byte, error) { return output, err } - // Wait for NGF to restart. - time.Sleep(6 * time.Second) - - err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + err = waitForContainerRestart(podNames[0], ngfContainerName, restartCount) Expect(err).ToNot(HaveOccurred()) - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == ngfContainerName { - Expect(int(containerStatus.RestartCount)).To(Equal(restartCount + 1)) - } - } return nil, nil } -func expectWorkingTraffic(teaURL, coffeeURL string) { - status, body, err := framework.Get(teaURL, address, timeoutConfig.RequestTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(status).To(Equal(http.StatusOK)) - Expect(body).To(ContainSubstring("URI: /tea")) +func waitForContainerRestart(ngfPodName string, containerName string, currentRestartCount int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) + defer cancel() - status, body, err = framework.Get(coffeeURL, address, timeoutConfig.RequestTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(status).To(Equal(http.StatusOK), coffeeURL+" "+address) - Expect(body).To(ContainSubstring("URI: /coffee")) + //nolint:nilerr + return wait.PollUntilContextCancel( + ctx, + 500*time.Millisecond, + true, /* poll immediately */ + func(ctx context.Context) (bool, error) { + var ngfPod core.Pod + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil { + return false, nil + } + + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == containerName { + return int(containerStatus.RestartCount) == currentRestartCount+1, nil + } + } + return false, nil + }, + ) +} + +func waitForWorkingTraffic() error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) + defer cancel() + + //nolint:nilerr + return wait.PollUntilContextCancel( + ctx, + 500*time.Millisecond, + true, /* poll immediately */ + func(_ context.Context) (bool, error) { + if err := expectRequest(teaURL, address, http.StatusOK, "URI: /tea"); err != nil { + return false, nil + } + if err := expectRequest(coffeeURL, address, http.StatusOK, "URI: /coffee"); err != nil { + return false, nil + } + return true, nil + }, + ) } -func expectFailingTraffic(teaURL, coffeeURL string) { - status, body, err := framework.Get(teaURL, address, timeoutConfig.RequestTimeout) - Expect(err).To(HaveOccurred()) - Expect(status).ToNot(Equal(http.StatusOK)) - Expect(body).ToNot(ContainSubstring("URI: /tea")) +func waitForFailingTraffic() error { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) + defer cancel() - status, body, err = framework.Get(coffeeURL, address, timeoutConfig.RequestTimeout) - Expect(err).To(HaveOccurred()) - Expect(status).ToNot(Equal(http.StatusOK)) - Expect(body).ToNot(ContainSubstring("URI: /coffee")) + return wait.PollUntilContextCancel( + ctx, + 500*time.Millisecond, + true, /* poll immediately */ + func(_ context.Context) (bool, error) { + if err := expectRequest(teaURL, address, 0, "URI: /tea"); err == nil { + return false, nil + } + if err := expectRequest(coffeeURL, address, 0, "URI: /coffee"); err == nil { + return false, nil + } + return true, nil + }, + ) +} + +func expectRequest(appURL string, address string, httpStatus int, responseBodyMessage string) error { + status, body, err := framework.Get(appURL, address, timeoutConfig.RequestTimeout) + if status != httpStatus { + return errors.New("http statuses were not equal") + } + if httpStatus == http.StatusOK { + if !strings.Contains(body, responseBodyMessage) { + return errors.New("expected response body to contain body message") + } + } else { + if strings.Contains(body, responseBodyMessage) { + return errors.New("expected response body to not contain body message") + } + } + return err } func checkContainerLogsForErrors(ngfPodName string) { @@ -230,7 +284,7 @@ func checkContainerLogsForErrors(ngfPodName string) { logs, err := resourceManager.GetPodLogs( ngfNamespace, ngfPodName, - &core.PodLogOptions{Container: "nginx", SinceSeconds: &sinceSeconds}, + &core.PodLogOptions{Container: nginxContainerName, SinceSeconds: &sinceSeconds}, ) Expect(err).ToNot(HaveOccurred()) Expect(logs).ToNot(ContainSubstring("emerg"), logs) @@ -238,7 +292,7 @@ func checkContainerLogsForErrors(ngfPodName string) { logs, err = resourceManager.GetPodLogs( ngfNamespace, ngfPodName, - &core.PodLogOptions{Container: "nginx-gateway", SinceSeconds: &sinceSeconds}, + &core.PodLogOptions{Container: ngfContainerName, SinceSeconds: &sinceSeconds}, ) Expect(err).ToNot(HaveOccurred()) Expect(logs).ToNot(ContainSubstring("error"), logs) From b3cf4a5a3d1c56361ef622d77c98621347a376a6 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 19 Apr 2024 13:27:22 -0700 Subject: [PATCH 06/36] Add polling wait for leader election lease to change and refactored expect Request --- tests/suite/graceful_recovery_test.go | 98 ++++++++++++++++++++++----- 1 file changed, 81 insertions(+), 17 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index e362b0fc0a..9ecdf82567 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -10,6 +10,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + coordination "k8s.io/api/coordination/v1" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -22,7 +23,8 @@ import ( const ( // FIXME(bjee19): Find an automated way to keep the version updated here similar to dependabot. // https://github.com/nginxinc/nginx-gateway-fabric/issues/1665 - debugImage = "busybox:1.28" + debugImage = "busybox:1.28" + teaURL = "https://cafe.example.com/tea" coffeeURL = "http://cafe.example.com/coffee" nginxContainerName = "nginx" @@ -67,15 +69,20 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) + leaseName, err := getLeaderElectionLeaseHolderName() + Expect(err).ToNot(HaveOccurred()) + output, err := restartNGFProcess(ngfContainerName) Expect(err).ToNot(HaveOccurred(), string(output)) checkContainerLogsForErrors(podNames[0]) + err = waitForLeaderLeaseToChange(leaseName) + Expect(err).ToNot(HaveOccurred()) + err = waitForWorkingTraffic() Expect(err).ToNot(HaveOccurred()) - // I tried just deleting the routes and ran into a bunch of issues, deleting all the files was better Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) err = waitForFailingTraffic() @@ -93,11 +100,17 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) + leaseName, err := getLeaderElectionLeaseHolderName() + Expect(err).ToNot(HaveOccurred()) + output, err := restartNginxContainer(nginxContainerName) Expect(err).ToNot(HaveOccurred(), string(output)) checkContainerLogsForErrors(podNames[0]) + err = waitForLeaderLeaseToChange(leaseName) + Expect(err).ToNot(HaveOccurred()) + err = waitForWorkingTraffic() Expect(err).ToNot(HaveOccurred()) @@ -231,10 +244,10 @@ func waitForWorkingTraffic() error { 500*time.Millisecond, true, /* poll immediately */ func(_ context.Context) (bool, error) { - if err := expectRequest(teaURL, address, http.StatusOK, "URI: /tea"); err != nil { + if err := expectRequestToSucceed(teaURL, address, "URI: /tea"); err != nil { return false, nil } - if err := expectRequest(coffeeURL, address, http.StatusOK, "URI: /coffee"); err != nil { + if err := expectRequestToSucceed(coffeeURL, address, "URI: /coffee"); err != nil { return false, nil } return true, nil @@ -246,15 +259,16 @@ func waitForFailingTraffic() error { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) defer cancel() + //nolint:nilerr return wait.PollUntilContextCancel( ctx, 500*time.Millisecond, true, /* poll immediately */ func(_ context.Context) (bool, error) { - if err := expectRequest(teaURL, address, 0, "URI: /tea"); err == nil { + if err := expectRequestToFail(teaURL, address, "URI: /tea"); err != nil { return false, nil } - if err := expectRequest(coffeeURL, address, 0, "URI: /coffee"); err == nil { + if err := expectRequestToFail(coffeeURL, address, "URI: /coffee"); err != nil { return false, nil } return true, nil @@ -262,23 +276,36 @@ func waitForFailingTraffic() error { ) } -func expectRequest(appURL string, address string, httpStatus int, responseBodyMessage string) error { +func expectRequestToSucceed(appURL string, address string, responseBodyMessage string) error { status, body, err := framework.Get(appURL, address, timeoutConfig.RequestTimeout) - if status != httpStatus { - return errors.New("http statuses were not equal") + if status != http.StatusOK { + return errors.New("http status was not 200") } - if httpStatus == http.StatusOK { - if !strings.Contains(body, responseBodyMessage) { - return errors.New("expected response body to contain body message") - } - } else { - if strings.Contains(body, responseBodyMessage) { - return errors.New("expected response body to not contain body message") - } + + if !strings.Contains(body, responseBodyMessage) { + return errors.New("expected response body to contain correct body message") } + return err } +func expectRequestToFail(appURL string, address string, responseBodyMessage string) error { + status, body, err := framework.Get(appURL, address, timeoutConfig.RequestTimeout) + if status != 0 { + return errors.New("expected http status to be 0") + } + + if strings.Contains(body, responseBodyMessage) { + return errors.New("expected response body to not contain correct body message") + } + + if err == nil { + return errors.New("expected request to error") + } + + return nil +} + func checkContainerLogsForErrors(ngfPodName string) { sinceSeconds := int64(15) logs, err := resourceManager.GetPodLogs( @@ -297,3 +324,40 @@ func checkContainerLogsForErrors(ngfPodName string) { Expect(err).ToNot(HaveOccurred()) Expect(logs).ToNot(ContainSubstring("error"), logs) } + +func waitForLeaderLeaseToChange(originalLeaseName string) error { + leaseCtx, leaseCancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer leaseCancel() + + //nolint:nilerr + return wait.PollUntilContextCancel( + leaseCtx, + 500*time.Millisecond, + true, /* poll immediately */ + func(_ context.Context) (bool, error) { + leaseName, err := getLeaderElectionLeaseHolderName() + if err != nil { + return false, nil + } + + if originalLeaseName != leaseName { + return true, nil + } + + return false, nil + }, + ) +} + +func getLeaderElectionLeaseHolderName() (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var lease coordination.Lease + key := types.NamespacedName{Name: "ngf-test-nginx-gateway-fabric-leader-election", Namespace: ngfNamespace} + + if err := k8sClient.Get(ctx, key, &lease); err != nil { + return "", errors.New("could not retrieve leader election lease") + } + return *lease.Spec.HolderIdentity, nil +} From 48b9cc1672793cbe1bde37f54e20d0e33404bb8c Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 19 Apr 2024 13:57:33 -0700 Subject: [PATCH 07/36] Refactor container restart functions --- tests/suite/graceful_recovery_test.go | 68 ++++++++++++--------------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 9ecdf82567..053ab10023 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -72,7 +72,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov leaseName, err := getLeaderElectionLeaseHolderName() Expect(err).ToNot(HaveOccurred()) - output, err := restartNGFProcess(ngfContainerName) + output, err := restartNGFProcess() Expect(err).ToNot(HaveOccurred(), string(output)) checkContainerLogsForErrors(podNames[0]) @@ -89,6 +89,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + time.Sleep(2 * time.Second) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) err = waitForWorkingTraffic() @@ -103,7 +104,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov leaseName, err := getLeaderElectionLeaseHolderName() Expect(err).ToNot(HaveOccurred()) - output, err := restartNginxContainer(nginxContainerName) + output, err := restartNginxContainer() Expect(err).ToNot(HaveOccurred(), string(output)) checkContainerLogsForErrors(podNames[0]) @@ -120,6 +121,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + time.Sleep(2 * time.Second) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) err = waitForWorkingTraffic() @@ -127,25 +129,14 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }) }) -func restartNginxContainer(nginxContainerName string) ([]byte, error) { +func restartNginxContainer() ([]byte, error) { podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) - defer cancel() - - var ngfPod core.Pod - err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + restartCount, err := getContainerRestartCount(nginxContainerName, podNames[0]) Expect(err).ToNot(HaveOccurred()) - var restartCount int - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == nginxContainerName { - restartCount = int(containerStatus.RestartCount) - } - } - output, err := exec.Command( // nolint:gosec "kubectl", "exec", @@ -168,25 +159,14 @@ func restartNginxContainer(nginxContainerName string) ([]byte, error) { return nil, nil } -func restartNGFProcess(ngfContainerName string) ([]byte, error) { +func restartNGFProcess() ([]byte, error) { podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) - defer cancel() - - var ngfPod core.Pod - err = k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: podNames[0]}, &ngfPod) + restartCount, err := getContainerRestartCount(ngfContainerName, podNames[0]) Expect(err).ToNot(HaveOccurred()) - var restartCount int - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == ngfContainerName { - restartCount = int(containerStatus.RestartCount) - } - } - output, err := exec.Command( // nolint:gosec "kubectl", "debug", @@ -218,18 +198,13 @@ func waitForContainerRestart(ngfPodName string, containerName string, currentRes ctx, 500*time.Millisecond, true, /* poll immediately */ - func(ctx context.Context) (bool, error) { - var ngfPod core.Pod - if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil { + func(_ context.Context) (bool, error) { + restartCount, err := getContainerRestartCount(containerName, ngfPodName) + if err != nil { return false, nil } - for _, containerStatus := range ngfPod.Status.ContainerStatuses { - if containerStatus.Name == containerName { - return int(containerStatus.RestartCount) == currentRestartCount+1, nil - } - } - return false, nil + return restartCount == currentRestartCount+1, nil }, ) } @@ -361,3 +336,22 @@ func getLeaderElectionLeaseHolderName() (string, error) { } return *lease.Spec.HolderIdentity, nil } + +func getContainerRestartCount(containerName, ngfPodName string) (int, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var ngfPod core.Pod + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil { + return 0, errors.New("could not retrieve ngfPod") + } + + var restartCount int + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == containerName { + restartCount = int(containerStatus.RestartCount) + } + } + + return restartCount, nil +} From e5301980b5db338d595eb4b8146ed5f0c72a954c Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 25 Apr 2024 13:11:32 -0700 Subject: [PATCH 08/36] Refactor container restart command to use job --- tests/framework/resourcemanager.go | 8 +- tests/suite/graceful_recovery_test.go | 89 ++++++++++--------- .../graceful-recovery/node-debugger-job.yaml | 27 ++++++ 3 files changed, 77 insertions(+), 47 deletions(-) create mode 100644 tests/suite/manifests/graceful-recovery/node-debugger-job.yaml diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index e62cb5db87..d847d4ac1c 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -159,7 +159,7 @@ func (rm *ResourceManager) readAndHandleObjects( files []string, ) error { for _, file := range files { - data, err := rm.getFileContents(file) + data, err := rm.GetFileContents(file) if err != nil { return err } @@ -187,9 +187,9 @@ func (rm *ResourceManager) readAndHandleObjects( return nil } -// getFileContents takes a string that can either be a local file +// GetFileContents takes a string that can either be a local file // path or an https:// URL to YAML manifests and provides the contents. -func (rm *ResourceManager) getFileContents(file string) (*bytes.Buffer, error) { +func (rm *ResourceManager) GetFileContents(file string) (*bytes.Buffer, error) { if strings.HasPrefix(file, "http://") { return nil, fmt.Errorf("data can't be retrieved from %s: http is not supported, use https", file) } else if strings.HasPrefix(file, "https://") { @@ -314,7 +314,7 @@ func (rm *ResourceManager) waitForRoutesToBeReady(ctx context.Context, namespace var numParents, readyCount int for _, route := range routeList.Items { - numParents += len(route.Status.Parents) + numParents += len(route.Status.Parents) // extract from the parentref not the status. for _, parent := range route.Status.Parents { for _, cond := range parent.Conditions { if cond.Type == string(v1.RouteConditionAccepted) && cond.Status == metav1.ConditionTrue { diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 053ab10023..a08b068ad1 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -3,28 +3,26 @@ package suite import ( "context" "errors" + "fmt" "net/http" - "os/exec" "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + v1 "k8s.io/api/batch/v1" coordination "k8s.io/api/coordination/v1" core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) const ( - // FIXME(bjee19): Find an automated way to keep the version updated here similar to dependabot. - // https://github.com/nginxinc/nginx-gateway-fabric/issues/1665 - debugImage = "busybox:1.28" - teaURL = "https://cafe.example.com/tea" coffeeURL = "http://cafe.example.com/coffee" nginxContainerName = "nginx" @@ -72,8 +70,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov leaseName, err := getLeaderElectionLeaseHolderName() Expect(err).ToNot(HaveOccurred()) - output, err := restartNGFProcess() - Expect(err).ToNot(HaveOccurred(), string(output)) + restartNGFProcess() checkContainerLogsForErrors(podNames[0]) @@ -104,8 +101,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov leaseName, err := getLeaderElectionLeaseHolderName() Expect(err).ToNot(HaveOccurred()) - output, err := restartNginxContainer() - Expect(err).ToNot(HaveOccurred(), string(output)) + restartNginxContainer() checkContainerLogsForErrors(podNames[0]) @@ -129,7 +125,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }) }) -func restartNginxContainer() ([]byte, error) { +func restartNginxContainer() { podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) @@ -137,29 +133,17 @@ func restartNginxContainer() ([]byte, error) { restartCount, err := getContainerRestartCount(nginxContainerName, podNames[0]) Expect(err).ToNot(HaveOccurred()) - output, err := exec.Command( // nolint:gosec - "kubectl", - "exec", - "-n", - ngfNamespace, - podNames[0], - "--container", - "nginx", - "--", - "sh", - "-c", - "$(PID=$(pgrep -f \"[n]ginx: master process\") && kill -9 $PID)").CombinedOutput() - if err != nil { - return output, err - } + job, err := runNodeDebuggerJob(podNames[0], "PID=$(pgrep -f \"[n]ginx: master process\") && kill -9 $PID") + Expect(err).ToNot(HaveOccurred()) err = waitForContainerRestart(podNames[0], nginxContainerName, restartCount) Expect(err).ToNot(HaveOccurred()) - return nil, nil + err = resourceManager.Delete([]client.Object{job}) + Expect(err).ToNot(HaveOccurred()) } -func restartNGFProcess() ([]byte, error) { +func restartNGFProcess() { podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) @@ -167,26 +151,14 @@ func restartNGFProcess() ([]byte, error) { restartCount, err := getContainerRestartCount(ngfContainerName, podNames[0]) Expect(err).ToNot(HaveOccurred()) - output, err := exec.Command( // nolint:gosec - "kubectl", - "debug", - "-n", - ngfNamespace, - podNames[0], - "--image="+debugImage, - "--target=nginx-gateway", - "--", - "sh", - "-c", - "$(PID=$(pgrep -f \"/[u]sr/bin/gateway\") && kill -9 $PID)").CombinedOutput() - if err != nil { - return output, err - } + job, err := runNodeDebuggerJob(podNames[0], "PID=$(pgrep -f \"/[u]sr/bin/gateway\") && kill -9 $PID") + Expect(err).ToNot(HaveOccurred()) err = waitForContainerRestart(podNames[0], ngfContainerName, restartCount) Expect(err).ToNot(HaveOccurred()) - return nil, nil + err = resourceManager.Delete([]client.Object{job}) + Expect(err).ToNot(HaveOccurred()) } func waitForContainerRestart(ngfPodName string, containerName string, currentRestartCount int) error { @@ -355,3 +327,34 @@ func getContainerRestartCount(containerName, ngfPodName string) (int, error) { return restartCount, nil } + +func runNodeDebuggerJob(ngfPodName, jobScript string) (*v1.Job, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var ngfPod core.Pod + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil { + return nil, errors.New("could not retrieve ngfPod") + } + + b, err := resourceManager.GetFileContents("graceful-recovery/node-debugger-job.yaml") + if err != nil { + return nil, errors.New("error processing node debugger job file") + } + + job := &v1.Job{} + _ = v1.AddToScheme(resourceManager.K8sClient.Scheme()) + if err = yaml.Unmarshal(b.Bytes(), job); err != nil { + return nil, errors.New("error with yaml unmarshal") + } + + job.Spec.Template.Spec.NodeSelector["kubernetes.io/hostname"] = ngfPod.Spec.NodeName + job.Spec.Template.Spec.Containers[0].Args = []string{jobScript} + job.Namespace = ngfNamespace + + if err = resourceManager.Apply([]client.Object{job}); err != nil { + return nil, fmt.Errorf("errored in applying job: %w", err) + } + + return job, nil +} diff --git a/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml new file mode 100644 index 0000000000..12d7241d3d --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml @@ -0,0 +1,27 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: node-debugger-job +spec: + template: + spec: + hostPID: true + hostIPC: true + nodeSelector: + kubernetes.io/hostname: "" + containers: + - name: node-debugger-container + image: ubuntu + command: ["/bin/bash", "-c"] + args: [] + securityContext: + privileged: true + volumeMounts: + - name: host-fs + mountPath: /mnt/host + volumes: + - name: host-fs + hostPath: + path: / + type: Directory + restartPolicy: Never From 1eeafd2413a332275250c8cfa5eb8b51e45bf37c Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 25 Apr 2024 13:28:58 -0700 Subject: [PATCH 09/36] Refactor waitForRoutesToBeReady to use parentrefs instead of status --- tests/framework/resourcemanager.go | 2 +- tests/suite/graceful_recovery_test.go | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index d847d4ac1c..500fafa8aa 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -314,7 +314,7 @@ func (rm *ResourceManager) waitForRoutesToBeReady(ctx context.Context, namespace var numParents, readyCount int for _, route := range routeList.Items { - numParents += len(route.Status.Parents) // extract from the parentref not the status. + numParents += len(route.Spec.ParentRefs) for _, parent := range route.Status.Parents { for _, cond := range parent.Conditions { if cond.Type == string(v1.RouteConditionAccepted) && cond.Status == metav1.ConditionTrue { diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index a08b068ad1..a86d61b408 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -86,7 +86,6 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) - time.Sleep(2 * time.Second) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) err = waitForWorkingTraffic() @@ -117,7 +116,6 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(err).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) - time.Sleep(2 * time.Second) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) err = waitForWorkingTraffic() From 801a18798938900a81cbcd0e601815c3e678264c Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 25 Apr 2024 13:36:42 -0700 Subject: [PATCH 10/36] Revert runAsNonRoot to true --- charts/nginx-gateway-fabric/templates/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/nginx-gateway-fabric/templates/deployment.yaml b/charts/nginx-gateway-fabric/templates/deployment.yaml index b826740fee..83aeafde21 100644 --- a/charts/nginx-gateway-fabric/templates/deployment.yaml +++ b/charts/nginx-gateway-fabric/templates/deployment.yaml @@ -173,7 +173,7 @@ spec: shareProcessNamespace: true securityContext: fsGroup: 1001 - runAsNonRoot: {{ ne .Values.nginxGateway.securityContext.runAsNonRoot false }} + runAsNonRoot: true {{- if .Values.tolerations }} tolerations: {{- toYaml .Values.tolerations | nindent 6 }} From 8134a947a4dbfadd5e509253109018e7fdc4bdc8 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 25 Apr 2024 14:04:08 -0700 Subject: [PATCH 11/36] Change test to allow system suite to deploy NGF --- tests/suite/graceful_recovery_test.go | 6 ------ tests/suite/system_suite_test.go | 4 +--- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index a86d61b408..e52d370505 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -42,12 +42,6 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }, } - BeforeAll(func() { - cfg := getDefaultSetupCfg() - cfg.nfr = true - setup(cfg, "--set", "nginxGateway.securityContext.runAsNonRoot=false") - }) - BeforeEach(func() { Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 9ed21716ed..870281e71b 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -243,11 +243,9 @@ var _ = BeforeSuite(func() { // - running upgrade test (this test will deploy its own version) // - running longevity teardown (deployment will already exist) // - running telemetry test (NGF will be deployed as part of the test) - // - running graceful-recovery (NGF will be deployed as part of the test) if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") || - strings.Contains(labelFilter, "telemetry") || - strings.Contains(labelFilter, "graceful-recovery") { + strings.Contains(labelFilter, "telemetry") { cfg.deploy = false } From 2df9b80c4ef309c24a1544152eb536a18ee429eb Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 25 Apr 2024 15:06:36 -0700 Subject: [PATCH 12/36] Add propagation policy when deleting Job --- tests/framework/resourcemanager.go | 4 ++-- tests/suite/graceful_recovery_test.go | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index 500fafa8aa..fdc996c5d6 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -124,12 +124,12 @@ func (rm *ResourceManager) ApplyFromFiles(files []string, namespace string) erro } // Delete deletes Kubernetes resources defined as Go objects. -func (rm *ResourceManager) Delete(resources []client.Object) error { +func (rm *ResourceManager) Delete(resources []client.Object, opts ...client.DeleteOption) error { for _, resource := range resources { ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.DeleteTimeout) defer cancel() - if err := rm.K8sClient.Delete(ctx, resource); err != nil && !apierrors.IsNotFound(err) { + if err := rm.K8sClient.Delete(ctx, resource, opts...); err != nil && !apierrors.IsNotFound(err) { return fmt.Errorf("error deleting resource: %w", err) } } diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index e52d370505..d261e3d1f5 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -131,7 +131,8 @@ func restartNginxContainer() { err = waitForContainerRestart(podNames[0], nginxContainerName, restartCount) Expect(err).ToNot(HaveOccurred()) - err = resourceManager.Delete([]client.Object{job}) + // propagation policy is set to delete underlying pod created through job + err = resourceManager.Delete([]client.Object{job}, client.PropagationPolicy(metav1.DeletePropagationBackground)) Expect(err).ToNot(HaveOccurred()) } @@ -149,7 +150,8 @@ func restartNGFProcess() { err = waitForContainerRestart(podNames[0], ngfContainerName, restartCount) Expect(err).ToNot(HaveOccurred()) - err = resourceManager.Delete([]client.Object{job}) + // propagation policy is set to delete underlying pod created through job + err = resourceManager.Delete([]client.Object{job}, client.PropagationPolicy(metav1.DeletePropagationBackground)) Expect(err).ToNot(HaveOccurred()) } From 3f4d37caa7a74793f6a510fe65367884f10e89f3 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Apr 2024 10:44:38 -0700 Subject: [PATCH 13/36] Refactor multiple functions into restartContainer function --- tests/suite/graceful_recovery_test.go | 76 ++++++++++----------------- 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index d261e3d1f5..448ef1dd79 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -47,8 +47,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - err := waitForWorkingTraffic() - Expect(err).ToNot(HaveOccurred()) + Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) }) AfterAll(func() { @@ -64,26 +63,22 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov leaseName, err := getLeaderElectionLeaseHolderName() Expect(err).ToNot(HaveOccurred()) - restartNGFProcess() + restartContainer(ngfContainerName) checkContainerLogsForErrors(podNames[0]) - err = waitForLeaderLeaseToChange(leaseName) - Expect(err).ToNot(HaveOccurred()) + Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) - err = waitForWorkingTraffic() - Expect(err).ToNot(HaveOccurred()) + Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - err = waitForFailingTraffic() - Expect(err).ToNot(HaveOccurred()) + Expect(waitForFailingTraffic()).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - err = waitForWorkingTraffic() - Expect(err).ToNot(HaveOccurred()) + Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) }) It("recovers when nginx container is restarted", func() { @@ -94,68 +89,53 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov leaseName, err := getLeaderElectionLeaseHolderName() Expect(err).ToNot(HaveOccurred()) - restartNginxContainer() + restartContainer(nginxContainerName) checkContainerLogsForErrors(podNames[0]) - err = waitForLeaderLeaseToChange(leaseName) - Expect(err).ToNot(HaveOccurred()) + Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) - err = waitForWorkingTraffic() - Expect(err).ToNot(HaveOccurred()) + Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - err = waitForFailingTraffic() - Expect(err).ToNot(HaveOccurred()) + Expect(waitForFailingTraffic()).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - err = waitForWorkingTraffic() - Expect(err).ToNot(HaveOccurred()) + Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) }) }) -func restartNginxContainer() { - podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(podNames).ToNot(BeEmpty()) - - restartCount, err := getContainerRestartCount(nginxContainerName, podNames[0]) - Expect(err).ToNot(HaveOccurred()) - - job, err := runNodeDebuggerJob(podNames[0], "PID=$(pgrep -f \"[n]ginx: master process\") && kill -9 $PID") - Expect(err).ToNot(HaveOccurred()) - - err = waitForContainerRestart(podNames[0], nginxContainerName, restartCount) - Expect(err).ToNot(HaveOccurred()) - - // propagation policy is set to delete underlying pod created through job - err = resourceManager.Delete([]client.Object{job}, client.PropagationPolicy(metav1.DeletePropagationBackground)) - Expect(err).ToNot(HaveOccurred()) -} +func restartContainer(containerName string) { + var jobScript string + if containerName == "nginx" { + jobScript = "PID=$(pgrep -f \"[n]ginx: master process\") && kill -9 $PID" + } else { + jobScript = "PID=$(pgrep -f \"/[u]sr/bin/gateway\") && kill -9 $PID" + } -func restartNGFProcess() { podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) - restartCount, err := getContainerRestartCount(ngfContainerName, podNames[0]) + restartCount, err := getContainerRestartCount(containerName, podNames[0]) Expect(err).ToNot(HaveOccurred()) - job, err := runNodeDebuggerJob(podNames[0], "PID=$(pgrep -f \"/[u]sr/bin/gateway\") && kill -9 $PID") + job, err := runNodeDebuggerJob(podNames[0], jobScript) Expect(err).ToNot(HaveOccurred()) - err = waitForContainerRestart(podNames[0], ngfContainerName, restartCount) - Expect(err).ToNot(HaveOccurred()) + Expect(waitForContainerRestart(podNames[0], containerName, restartCount)).ToNot(HaveOccurred()) // propagation policy is set to delete underlying pod created through job - err = resourceManager.Delete([]client.Object{job}, client.PropagationPolicy(metav1.DeletePropagationBackground)) - Expect(err).ToNot(HaveOccurred()) + Expect(resourceManager.Delete( + []client.Object{job}, + client.PropagationPolicy(metav1.DeletePropagationBackground), + )).ToNot(HaveOccurred()) } -func waitForContainerRestart(ngfPodName string, containerName string, currentRestartCount int) error { +func waitForContainerRestart(ngfPodName, containerName string, currentRestartCount int) error { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) defer cancel() @@ -217,7 +197,7 @@ func waitForFailingTraffic() error { ) } -func expectRequestToSucceed(appURL string, address string, responseBodyMessage string) error { +func expectRequestToSucceed(appURL, address string, responseBodyMessage string) error { status, body, err := framework.Get(appURL, address, timeoutConfig.RequestTimeout) if status != http.StatusOK { return errors.New("http status was not 200") @@ -230,7 +210,7 @@ func expectRequestToSucceed(appURL string, address string, responseBodyMessage s return err } -func expectRequestToFail(appURL string, address string, responseBodyMessage string) error { +func expectRequestToFail(appURL, address string, responseBodyMessage string) error { status, body, err := framework.Get(appURL, address, timeoutConfig.RequestTimeout) if status != 0 { return errors.New("expected http status to be 0") From 24eec0864ff486c569fe4d8fb45544637e907911 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Apr 2024 12:18:42 -0700 Subject: [PATCH 14/36] Refactor test cases using shared runTest function --- tests/suite/graceful_recovery_test.go | 60 ++++++++++----------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 448ef1dd79..304f2c0736 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -56,57 +56,39 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }) It("recovers when NGF container is restarted", func() { - podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(podNames).ToNot(BeEmpty()) - - leaseName, err := getLeaderElectionLeaseHolderName() - Expect(err).ToNot(HaveOccurred()) - - restartContainer(ngfContainerName) - - checkContainerLogsForErrors(podNames[0]) - - Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) - - Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) - - Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - - Expect(waitForFailingTraffic()).ToNot(HaveOccurred()) - - Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - - Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) + runTest(ngfContainerName, files, ns) }) It("recovers when nginx container is restarted", func() { - podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(podNames).ToNot(BeEmpty()) + runTest(nginxContainerName, files, ns) + }) +}) - leaseName, err := getLeaderElectionLeaseHolderName() - Expect(err).ToNot(HaveOccurred()) +func runTest(containerName string, files []string, ns *core.Namespace) { + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).ToNot(BeEmpty()) - restartContainer(nginxContainerName) + leaseName, err := getLeaderElectionLeaseHolderName() + Expect(err).ToNot(HaveOccurred()) - checkContainerLogsForErrors(podNames[0]) + restartContainer(containerName) - Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) + checkContainerLogsForErrors(podNames[0]) - Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) + Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) - Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) - Expect(waitForFailingTraffic()).ToNot(HaveOccurred()) + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + Expect(waitForFailingTraffic()).ToNot(HaveOccurred()) - Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) - }) -}) + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + + Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) +} func restartContainer(containerName string) { var jobScript string From 8ae6c60462bfea56e68037da156fbe73005cae5d Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Apr 2024 14:17:37 -0700 Subject: [PATCH 15/36] Change runTest name to runRecoveryTest --- tests/suite/graceful_recovery_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 304f2c0736..a0f8506606 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -56,15 +56,15 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }) It("recovers when NGF container is restarted", func() { - runTest(ngfContainerName, files, ns) + runRecoveryTest(ngfContainerName, files, ns) }) It("recovers when nginx container is restarted", func() { - runTest(nginxContainerName, files, ns) + runRecoveryTest(nginxContainerName, files, ns) }) }) -func runTest(containerName string, files []string, ns *core.Namespace) { +func runRecoveryTest(containerName string, files []string, ns *core.Namespace) { podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) From f47df826abc11d5cf324a769488c563c2c948814 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Apr 2024 13:26:03 -0700 Subject: [PATCH 16/36] Add noling:gosec to satisfy CodeQL --- tests/Makefile | 2 +- tests/framework/request.go | 2 +- tests/results/ngf-upgrade/b.jee/b.jee.md | 45 ++++++++++++++++++ tests/results/ngf-upgrade/b.jee/http.png | Bin 0 -> 3964 bytes tests/results/ngf-upgrade/b.jee/https.png | Bin 0 -> 3964 bytes tests/results/ngf-upgrade/s.berman/http.png | Bin 0 -> 4308 bytes tests/results/ngf-upgrade/s.berman/https.png | Bin 0 -> 4308 bytes .../results/ngf-upgrade/s.berman/s.berman.md | 30 ++++++++++++ 8 files changed, 77 insertions(+), 2 deletions(-) create mode 100755 tests/results/ngf-upgrade/b.jee/b.jee.md create mode 100644 tests/results/ngf-upgrade/b.jee/http.png create mode 100644 tests/results/ngf-upgrade/b.jee/https.png create mode 100644 tests/results/ngf-upgrade/s.berman/http.png create mode 100644 tests/results/ngf-upgrade/s.berman/https.png create mode 100755 tests/results/ngf-upgrade/s.berman/s.berman.md diff --git a/tests/Makefile b/tests/Makefile index 4feede75a7..0776da47c7 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -14,7 +14,7 @@ GINKGO_FLAGS= NGF_VERSION= CI=false TELEMETRY_ENDPOINT= -TELEMETRY_ENDPOINT_INSECURE= +TELEMETRY_ENDPOINT_INSECURE=false ifneq ($(GINKGO_LABEL),) override GINKGO_FLAGS += --label-filter "$(GINKGO_LABEL)" diff --git a/tests/framework/request.go b/tests/framework/request.go index 2fa78b35c3..f5a03c32a6 100644 --- a/tests/framework/request.go +++ b/tests/framework/request.go @@ -40,7 +40,7 @@ func Get(url, address string, timeout time.Duration) (int, string, error) { customTransport := http.DefaultTransport.(*http.Transport).Clone() // similar to how in our examples with https requests we run our curl command // we turn off verification of the certificate, we do the same here - customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} // #nosec G402 + customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} //nolint:gosec // for https test traffic client := &http.Client{Transport: customTransport} resp, err = client.Do(req) } else { diff --git a/tests/results/ngf-upgrade/b.jee/b.jee.md b/tests/results/ngf-upgrade/b.jee/b.jee.md new file mode 100755 index 0000000000..6e9202a7bd --- /dev/null +++ b/tests/results/ngf-upgrade/b.jee/b.jee.md @@ -0,0 +1,45 @@ +# Results + +## Test environment + +NGINX Plus: false + +GKE Cluster: + +- Node count: 3 +- k8s version: v1.28.7-gke.1026000 +- vCPUs per node: 2 +- RAM per node: 4019188Ki +- Max pods per node: 110 +- Zone: us-central1-c +- Instance Type: e2-medium +# Results + +## Test environment + +NGINX Plus: false + +GKE Cluster: + +- Node count: 3 +- k8s version: v1.28.7-gke.1026000 +- vCPUs per node: 2 +- RAM per node: 4019188Ki +- Max pods per node: 110 +- Zone: us-central1-c +- Instance Type: e2-medium +# Results + +## Test environment + +NGINX Plus: false + +GKE Cluster: + +- Node count: 3 +- k8s version: v1.28.7-gke.1026000 +- vCPUs per node: 2 +- RAM per node: 4019188Ki +- Max pods per node: 110 +- Zone: us-central1-c +- Instance Type: e2-medium diff --git a/tests/results/ngf-upgrade/b.jee/http.png b/tests/results/ngf-upgrade/b.jee/http.png new file mode 100644 index 0000000000000000000000000000000000000000..d1c379a77d50493b20def82f0cfe0170c004da3b GIT binary patch literal 3964 zcmeHJYfw{X8a{%Eh6U^ts)B^*j;)G-cmV}Mf)y`|aubOZA%I{T5W_75F*l$XD}>tnGv7JieDBQj-oN)e zKlu3^HHBHh001yOe(Z1n02m?wz~IbUL%pQx--(9+VAUSK6Ma*8vxKW0HQ%u=*^>@JM~cXNUt7eWo5N@@7|Lq zPo6)2o=T+_6cjvt`n08`rL(hhe0*GQLbYd49++44_Py$~Y6OBnQ3lk|S`C7b5YSr# zqV!+t*6ghg*_e;#>`j~E^{xOw7rk2nthfjP-LD~wn-JAcMWaDpzyd8%mY=42fBiWt zmVL!!c6O+QhJYRcU2jKyMYZEaB~6obLwa=G`cf-3bf zuZuqxoCE-7FIL`Fx9!ZV^{G}JKkONpdQT}bNqO+W>WaS%cYtv{GrN-t>+&)>DOcWF z4lGHVtaqgcj+wlM{C5W{?)m6-{r-W*155{|2a^_cMqdk)l zUGq=C@^VEP^#VQT;+czGbz|*S;}wLxq}e^--s0ZQQ+MUgRaj**r>&09jzx8E+ZDnR~Z7u*90=i0SPg)uE*)MfL3fZhQ zplC%$1Fj4oT#eV&jS17;DVfn5=I#49XZjA^>(TTm<0{B&qqH%DVRD>sTb}EhM5=N4 z@Wu@aU6!ovp{-E2p^w3zn^wHyrF(=*+*ePjO!GrWSl6o&guT0(7v?-9%%c9nkjHlg)oH(rThoV z671LE2Kew=uuujI55}{It?#pE7JaxBqf37qtB^6;&3MJ{$!>Xl-uQDo`#ef&OjvjC zo!C5uH(Ddpk)~z;3M(_1xjgt7qPV_8<_i>frlY`!GIcC|t4VtSouMi%{U`)>H$=q!4m zJWh^NkjnW7Kx?CRp2xGz9gQBdaUte~((*~T%do8;+z(^Yh1WeXr#TO3uig5Nc##vfKAJu{su zdRhuz+55jbbEM7kEMsuxx<18KfH;LnUu#F^2vg}x z9=uT)hcSq3}2n})VpI4?{ zn^hmo%cc61QFW|4kcT5b!gG1diA+GF2)hIm0*~$_TlH3vUB|EzTz6ToBnBZJ9Bh=u zg{4o3zu6EOO+%XF}Ig#)`6Wqg!p7L^{*cP)R(tp48|;MkjuN;c??XV5i1nAF95$E zzWgl1ZazRDX_-DExzEn!U_}o`U>66*C2J)2YjTaT;!^#Qqn2nR9-!Y=<>ulo0`}yc z*QFNw)H!c7_d;sage?Z6RC$l_lwe2y#7qzBD@cp?g~;H!?NQ{y!2$bH+?`wv`ehFN z!ScM_Ro110Kce7Qu@;HjsSBZ%r$m9LF?eVA%LN|SSevd^w>?|S5{Nh{=b=&_(M^6S?<=b&9mSIO ze^_c%6upGU6Axl@8$S$v96MhyE+5qJcww`Tq&`p0>$f_8vz|z}Tmas9HDeekRiLUp z$O|u7yNE~MHC7bC%sq;j>9>m9MC@t@>&=sF->0oZ=c=xnZg^2CE9Ge4oajlM=JS0S z%~STTME)sYOxe$wlj#-=w_T0uC-1^R+|5S*H=kKMTva4!rbVw)BxO?hLc9Zm=|Vj> zp?t%c7!vqo8H){fco0czI2W9V9Fo{>`z-bcuJSU`UxRBL>Nh2nmOLj8v0 zfF#zR8g2DPJd&D8cPZB-aYlb+iNnFxpchw6BKIMUks=#kv1V4?!w-%GF`A?I4VNvB z-EZUtP|kKddf&Z4mL-M9d0k*5N)6#*iwvxvRJ|>%q#bF}vn02y?_EW8ZQHqzRRE4< z4zoM?#uB{3eOqZ}Bz_I6<_xvsS^?Pgl0i}=Qz^W^SX z9nP;ng6vYnRj_i+YqvU63pL5l-SV-eCs1vL-19gGmUnG!lF-c(PC^~Ech%Io?`oSI zX}YZQinzZxnp|o5M3lkiEJb*pCP=Pd(k=DTyC$v7zSWv!XXsJ4ZQL1Wk{?MMriV&r qhLqsQ>wgBl2r@DFAJrYPfedb0y<={}>tnGv7JieDBQj-oN)e zKlu3^HHBHh001yOe(Z1n02m?wz~IbUL%pQx--(9+VAUSK6Ma*8vxKW0HQ%u=*^>@JM~cXNUt7eWo5N@@7|Lq zPo6)2o=T+_6cjvt`n08`rL(hhe0*GQLbYd49++44_Py$~Y6OBnQ3lk|S`C7b5YSr# zqV!+t*6ghg*_e;#>`j~E^{xOw7rk2nthfjP-LD~wn-JAcMWaDpzyd8%mY=42fBiWt zmVL!!c6O+QhJYRcU2jKyMYZEaB~6obLwa=G`cf-3bf zuZuqxoCE-7FIL`Fx9!ZV^{G}JKkONpdQT}bNqO+W>WaS%cYtv{GrN-t>+&)>DOcWF z4lGHVtaqgcj+wlM{C5W{?)m6-{r-W*155{|2a^_cMqdk)l zUGq=C@^VEP^#VQT;+czGbz|*S;}wLxq}e^--s0ZQQ+MUgRaj**r>&09jzx8E+ZDnR~Z7u*90=i0SPg)uE*)MfL3fZhQ zplC%$1Fj4oT#eV&jS17;DVfn5=I#49XZjA^>(TTm<0{B&qqH%DVRD>sTb}EhM5=N4 z@Wu@aU6!ovp{-E2p^w3zn^wHyrF(=*+*ePjO!GrWSl6o&guT0(7v?-9%%c9nkjHlg)oH(rThoV z671LE2Kew=uuujI55}{It?#pE7JaxBqf37qtB^6;&3MJ{$!>Xl-uQDo`#ef&OjvjC zo!C5uH(Ddpk)~z;3M(_1xjgt7qPV_8<_i>frlY`!GIcC|t4VtSouMi%{U`)>H$=q!4m zJWh^NkjnW7Kx?CRp2xGz9gQBdaUte~((*~T%do8;+z(^Yh1WeXr#TO3uig5Nc##vfKAJu{su zdRhuz+55jbbEM7kEMsuxx<18KfH;LnUu#F^2vg}x z9=uT)hcSq3}2n})VpI4?{ zn^hmo%cc61QFW|4kcT5b!gG1diA+GF2)hIm0*~$_TlH3vUB|EzTz6ToBnBZJ9Bh=u zg{4o3zu6EOO+%XF}Ig#)`6Wqg!p7L^{*cP)R(tp48|;MkjuN;c??XV5i1nAF95$E zzWgl1ZazRDX_-DExzEn!U_}o`U>66*C2J)2YjTaT;!^#Qqn2nR9-!Y=<>ulo0`}yc z*QFNw)H!c7_d;sage?Z6RC$l_lwe2y#7qzBD@cp?g~;H!?NQ{y!2$bH+?`wv`ehFN z!ScM_Ro110Kce7Qu@;HjsSBZ%r$m9LF?eVA%LN|SSevd^w>?|S5{Nh{=b=&_(M^6S?<=b&9mSIO ze^_c%6upGU6Axl@8$S$v96MhyE+5qJcww`Tq&`p0>$f_8vz|z}Tmas9HDeekRiLUp z$O|u7yNE~MHC7bC%sq;j>9>m9MC@t@>&=sF->0oZ=c=xnZg^2CE9Ge4oajlM=JS0S z%~STTME)sYOxe$wlj#-=w_T0uC-1^R+|5S*H=kKMTva4!rbVw)BxO?hLc9Zm=|Vj> zp?t%c7!vqo8H){fco0czI2W9V9Fo{>`z-bcuJSU`UxRBL>Nh2nmOLj8v0 zfF#zR8g2DPJd&D8cPZB-aYlb+iNnFxpchw6BKIMUks=#kv1V4?!w-%GF`A?I4VNvB z-EZUtP|kKddf&Z4mL-M9d0k*5N)6#*iwvxvRJ|>%q#bF}vn02y?_EW8ZQHqzRRE4< z4zoM?#uB{3eOqZ}Bz_I6<_xvsS^?Pgl0i}=Qz^W^SX z9nP;ng6vYnRj_i+YqvU63pL5l-SV-eCs1vL-19gGmUnG!lF-c(PC^~Ech%Io?`oSI zX}YZQinzZxnp|o5M3lkiEJb*pCP=Pd(k=DTyC$v7zSWv!XXsJ4ZQL1Wk{?MMriV&r qhLqsQ>wgBl2r@DFAJrYPfedb0y<={C1&epM1V#R_KLZZ}1M|o}KAt)G7EDw1a-XQ@OvI1+^ z(iSZUbXf@k0W1(AL=s2?3>XnZAP^GrC}0xuz<`7#sq8bAEHr z@9}-#@6Ne*f&-5 zp<#Ps0016j03e`JF;KwhC~z`C3^H5+Jc@w_k6y)>0{{vSfOrrGPCfkSBWNp1^a9$q zfB$|D508L=fcW@$91eH?{{615E(U|~?Af!$#YHHD=Gd`HRVA6XqzToCKu{$p1}TFb zDit^bK+%9}&=dDT`3LUh;cwUFj*e+iDgeMHJXZkQDT2WBNzkqe#06{ccaPg8@SIfDCqTod(o2P}qauAe+%y0HPW|*MWh7g@uKfm>3rq7bFr{UtdqB z(`)yKwLvxSOgnce698;~+P>fU*3H%l0AT07`26I>oLZ&8I!r$Hm(q|Q`%mHmeO|k6 zsfXU*S!u>hEpB-AeP+et9jBI$CiOr6kUcN3g?#=!%S&c3#IRmbA+6%pi6e~ajyNUKL>=a{A9Wq!58+A z_^U4DPuN-cPTHOGC}`_%k?U8Hl#$yis^I(dahtPRGA5I@ezd}|?!|?!e+wF2Bi9dWJ|wG_pF z6HN49M#=Y$E*`|ouJYPAHqzT?kt>@ev#BH{X{5UKy{EZ<%CsNlnrZj&uH{%fs*x_W zPN=J#iNp1h@Sw*_Q6!Rbr15=-o+)yq0!yUx~Smo0xhZXhHOs|3$_KdTh@ zr^Ye8D^0q_+`>{n`VMb|#VwaQI?P8Uy#RYEvt?7Eg?2y(r&~G_<+4NI=Crs2*~e+UBmdEVoWAqEWxxj2 z_?y4z5Tj{y@5+a>$iVz)WyUhM$ZlrGgg9ZRg;^2G^`E2*07k66F$HdWZA)C*U#PfD z7ukU&PlO`77_r0UH!dewICQ($30Ao7-RdpNz}1+03Y)-Hd*>R{V3qzk%d~RXG;3`T zvZ8&q;JRJL*)NS<4j=AW_-nn>qMgf{L?&jvjkw3^-=9R22j__($e#-jfT_i+D0z4w zOnuUIPXVHm;)FyzvkbcDEM!WHx(#gVbSo&q#x}yjhfnYv6(?4)iAr%kE`xUo4 zf}ri^eW-qZ@dq%WTy?u`od?25?NEwjJGno6hUn-!;G2-AO9-<1S=SercfHU%XL?P; zsngggpfz%Ugz?UKxu%gi3kKO@;|EwIP4#L{BHGDe=I7k9oMFpu3JVMD9Cg4gQ9 zgT9F312Nevt$n_4Nf>Y*EzGf#LHJ+dFo}i?S7uE=GwEvx`)-+dWb=O)`|UpcFHrv9 z3Ojh)hbrvd?p!n{Ou(KD-)386tC}mE=~brjveqA0=;g0kIn0`zG)pxBhQ;udwxNg% zU2x)<475y@IGH!6<552D+6irE! zDk=AWY%Xs+?_wrq7sR<<;adM8X31cdP4~cXixQ;rFjBr`37_Rb>Aa*T;lJsx-YBh zO~oj*ArQCe;$~W_ws`K$AA~QaKGq5T{cqv_l{WsPo#2~kITH|tSs!mAs5hP+oQIV6 zXMHG#xsfli^}D!!RBSrUVZw4GwkdO)QtKvyi{>ZjCFaN$x57-EogeSY)}$l-Ci4c@ zH0Bm&gRS+k0a@TyuXchSv#zFP*Ec0B6RS~mUI-(Qsy!ugDB0t?5fsqU(i-E5C_v)c ztbYA!g~uu_gXBIAhAxus27D#oAz!&h7rAL}1g%a)-$NBt6{dXiWkGO{DUErBj%klc zD+7lvr>2#4Xsq20uv!WI=!{dX#LZYdZZaq7;Lp}~kaWACLqms+#V;ob9beznB42&= zBJ}1=b?>L49;Q@5cW8L(0WOjLB02m>m@s=QmepLsS_)x(JO8mJo%|`2@^RRFrTdDi z*(Ql=pd{9?9m00=GtjKwSY`>k@%0<3BPFWWL47weGWp?v&k9KfGnlA(p)2eD`H^I$ zT$;<}pIHz6L{x=d#LmFOD)dyIZwu|~ns@!$dJ=i1tTCsd5`jw(n%u6;#`4`eele!9 z^ww@?Sju#d`wl$aZlzFB_Wz8}MT8saoP7qo#)rgg^hDAxQ31NMCP6<>MPw#&t6nwR{3(X;bjf&{py z(wM&0{Ap1w!G1%~$&;hpmv413B$4$nerH6w|3GJcBZZ8ae@_~IW4y$kNpYw2(|M#u zzgcH5W|)eV-&nVIbWMaYC9acX{FFXEB?S8^@^_}>x2f4T?scf~nw8ZKhp^1bldyit z%vhGq>X7R09A#Sb-kZ+Vp)$0SgAal`n}zNl*G_dKQcB75<=JPCz$w$$H!veJpKM*% zQ@oKi2{o<#yO*lH#=?01@mbezXVwQ z;&guYHYSlPMlvr8T^*8}KfgQb$u?cl$>dg-iRy4h+;WoH6R*TRnMx}Xu#*h0Tf*G4 z`iGRUq!6#=ZqG^s)AM6Wl*8y&2!HxB72S~kM!TIc){Sz7$Mw<(w-u1FO%aV38QvNI zLo<_^ajHI2#CT(S>K*1-&jM0P`2<-tcAQPS_z#!IeC1&epM1V#R_KLZZ}1M|o}KAt)G7EDw1a-XQ@OvI1+^ z(iSZUbXf@k0W1(AL=s2?3>XnZAP^GrC}0xuz<`7#sq8bAEHr z@9}-#@6Ne*f&-5 zp<#Ps0016j03e`JF;KwhC~z`C3^H5+Jc@w_k6y)>0{{vSfOrrGPCfkSBWNp1^a9$q zfB$|D508L=fcW@$91eH?{{615E(U|~?Af!$#YHHD=Gd`HRVA6XqzToCKu{$p1}TFb zDit^bK+%9}&=dDT`3LUh;cwUFj*e+iDgeMHJXZkQDT2WBNzkqe#06{ccaPg8@SIfDCqTod(o2P}qauAe+%y0HPW|*MWh7g@uKfm>3rq7bFr{UtdqB z(`)yKwLvxSOgnce698;~+P>fU*3H%l0AT07`26I>oLZ&8I!r$Hm(q|Q`%mHmeO|k6 zsfXU*S!u>hEpB-AeP+et9jBI$CiOr6kUcN3g?#=!%S&c3#IRmbA+6%pi6e~ajyNUKL>=a{A9Wq!58+A z_^U4DPuN-cPTHOGC}`_%k?U8Hl#$yis^I(dahtPRGA5I@ezd}|?!|?!e+wF2Bi9dWJ|wG_pF z6HN49M#=Y$E*`|ouJYPAHqzT?kt>@ev#BH{X{5UKy{EZ<%CsNlnrZj&uH{%fs*x_W zPN=J#iNp1h@Sw*_Q6!Rbr15=-o+)yq0!yUx~Smo0xhZXhHOs|3$_KdTh@ zr^Ye8D^0q_+`>{n`VMb|#VwaQI?P8Uy#RYEvt?7Eg?2y(r&~G_<+4NI=Crs2*~e+UBmdEVoWAqEWxxj2 z_?y4z5Tj{y@5+a>$iVz)WyUhM$ZlrGgg9ZRg;^2G^`E2*07k66F$HdWZA)C*U#PfD z7ukU&PlO`77_r0UH!dewICQ($30Ao7-RdpNz}1+03Y)-Hd*>R{V3qzk%d~RXG;3`T zvZ8&q;JRJL*)NS<4j=AW_-nn>qMgf{L?&jvjkw3^-=9R22j__($e#-jfT_i+D0z4w zOnuUIPXVHm;)FyzvkbcDEM!WHx(#gVbSo&q#x}yjhfnYv6(?4)iAr%kE`xUo4 zf}ri^eW-qZ@dq%WTy?u`od?25?NEwjJGno6hUn-!;G2-AO9-<1S=SercfHU%XL?P; zsngggpfz%Ugz?UKxu%gi3kKO@;|EwIP4#L{BHGDe=I7k9oMFpu3JVMD9Cg4gQ9 zgT9F312Nevt$n_4Nf>Y*EzGf#LHJ+dFo}i?S7uE=GwEvx`)-+dWb=O)`|UpcFHrv9 z3Ojh)hbrvd?p!n{Ou(KD-)386tC}mE=~brjveqA0=;g0kIn0`zG)pxBhQ;udwxNg% zU2x)<475y@IGH!6<552D+6irE! zDk=AWY%Xs+?_wrq7sR<<;adM8X31cdP4~cXixQ;rFjBr`37_Rb>Aa*T;lJsx-YBh zO~oj*ArQCe;$~W_ws`K$AA~QaKGq5T{cqv_l{WsPo#2~kITH|tSs!mAs5hP+oQIV6 zXMHG#xsfli^}D!!RBSrUVZw4GwkdO)QtKvyi{>ZjCFaN$x57-EogeSY)}$l-Ci4c@ zH0Bm&gRS+k0a@TyuXchSv#zFP*Ec0B6RS~mUI-(Qsy!ugDB0t?5fsqU(i-E5C_v)c ztbYA!g~uu_gXBIAhAxus27D#oAz!&h7rAL}1g%a)-$NBt6{dXiWkGO{DUErBj%klc zD+7lvr>2#4Xsq20uv!WI=!{dX#LZYdZZaq7;Lp}~kaWACLqms+#V;ob9beznB42&= zBJ}1=b?>L49;Q@5cW8L(0WOjLB02m>m@s=QmepLsS_)x(JO8mJo%|`2@^RRFrTdDi z*(Ql=pd{9?9m00=GtjKwSY`>k@%0<3BPFWWL47weGWp?v&k9KfGnlA(p)2eD`H^I$ zT$;<}pIHz6L{x=d#LmFOD)dyIZwu|~ns@!$dJ=i1tTCsd5`jw(n%u6;#`4`eele!9 z^ww@?Sju#d`wl$aZlzFB_Wz8}MT8saoP7qo#)rgg^hDAxQ31NMCP6<>MPw#&t6nwR{3(X;bjf&{py z(wM&0{Ap1w!G1%~$&;hpmv413B$4$nerH6w|3GJcBZZ8ae@_~IW4y$kNpYw2(|M#u zzgcH5W|)eV-&nVIbWMaYC9acX{FFXEB?S8^@^_}>x2f4T?scf~nw8ZKhp^1bldyit z%vhGq>X7R09A#Sb-kZ+Vp)$0SgAal`n}zNl*G_dKQcB75<=JPCz$w$$H!veJpKM*% zQ@oKi2{o<#yO*lH#=?01@mbezXVwQ z;&guYHYSlPMlvr8T^*8}KfgQb$u?cl$>dg-iRy4h+;WoH6R*TRnMx}Xu#*h0Tf*G4 z`iGRUq!6#=ZqG^s)AM6Wl*8y&2!HxB72S~kM!TIc){Sz7$Mw<(w-u1FO%aV38QvNI zLo<_^ajHI2#CT(S>K*1-&jM0P`2<-tcAQPS_z#!Ie Date: Mon, 29 Apr 2024 13:27:43 -0700 Subject: [PATCH 17/36] Remove accidentally added files --- tests/results/ngf-upgrade/b.jee/b.jee.md | 45 ------------------ tests/results/ngf-upgrade/b.jee/http.png | Bin 3964 -> 0 bytes tests/results/ngf-upgrade/b.jee/https.png | Bin 3964 -> 0 bytes tests/results/ngf-upgrade/s.berman/http.png | Bin 4308 -> 0 bytes tests/results/ngf-upgrade/s.berman/https.png | Bin 4308 -> 0 bytes .../results/ngf-upgrade/s.berman/s.berman.md | 30 ------------ 6 files changed, 75 deletions(-) delete mode 100755 tests/results/ngf-upgrade/b.jee/b.jee.md delete mode 100644 tests/results/ngf-upgrade/b.jee/http.png delete mode 100644 tests/results/ngf-upgrade/b.jee/https.png delete mode 100644 tests/results/ngf-upgrade/s.berman/http.png delete mode 100644 tests/results/ngf-upgrade/s.berman/https.png delete mode 100755 tests/results/ngf-upgrade/s.berman/s.berman.md diff --git a/tests/results/ngf-upgrade/b.jee/b.jee.md b/tests/results/ngf-upgrade/b.jee/b.jee.md deleted file mode 100755 index 6e9202a7bd..0000000000 --- a/tests/results/ngf-upgrade/b.jee/b.jee.md +++ /dev/null @@ -1,45 +0,0 @@ -# Results - -## Test environment - -NGINX Plus: false - -GKE Cluster: - -- Node count: 3 -- k8s version: v1.28.7-gke.1026000 -- vCPUs per node: 2 -- RAM per node: 4019188Ki -- Max pods per node: 110 -- Zone: us-central1-c -- Instance Type: e2-medium -# Results - -## Test environment - -NGINX Plus: false - -GKE Cluster: - -- Node count: 3 -- k8s version: v1.28.7-gke.1026000 -- vCPUs per node: 2 -- RAM per node: 4019188Ki -- Max pods per node: 110 -- Zone: us-central1-c -- Instance Type: e2-medium -# Results - -## Test environment - -NGINX Plus: false - -GKE Cluster: - -- Node count: 3 -- k8s version: v1.28.7-gke.1026000 -- vCPUs per node: 2 -- RAM per node: 4019188Ki -- Max pods per node: 110 -- Zone: us-central1-c -- Instance Type: e2-medium diff --git a/tests/results/ngf-upgrade/b.jee/http.png b/tests/results/ngf-upgrade/b.jee/http.png deleted file mode 100644 index d1c379a77d50493b20def82f0cfe0170c004da3b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3964 zcmeHJYfw{X8a{%Eh6U^ts)B^*j;)G-cmV}Mf)y`|aubOZA%I{T5W_75F*l$XD}>tnGv7JieDBQj-oN)e zKlu3^HHBHh001yOe(Z1n02m?wz~IbUL%pQx--(9+VAUSK6Ma*8vxKW0HQ%u=*^>@JM~cXNUt7eWo5N@@7|Lq zPo6)2o=T+_6cjvt`n08`rL(hhe0*GQLbYd49++44_Py$~Y6OBnQ3lk|S`C7b5YSr# zqV!+t*6ghg*_e;#>`j~E^{xOw7rk2nthfjP-LD~wn-JAcMWaDpzyd8%mY=42fBiWt zmVL!!c6O+QhJYRcU2jKyMYZEaB~6obLwa=G`cf-3bf zuZuqxoCE-7FIL`Fx9!ZV^{G}JKkONpdQT}bNqO+W>WaS%cYtv{GrN-t>+&)>DOcWF z4lGHVtaqgcj+wlM{C5W{?)m6-{r-W*155{|2a^_cMqdk)l zUGq=C@^VEP^#VQT;+czGbz|*S;}wLxq}e^--s0ZQQ+MUgRaj**r>&09jzx8E+ZDnR~Z7u*90=i0SPg)uE*)MfL3fZhQ zplC%$1Fj4oT#eV&jS17;DVfn5=I#49XZjA^>(TTm<0{B&qqH%DVRD>sTb}EhM5=N4 z@Wu@aU6!ovp{-E2p^w3zn^wHyrF(=*+*ePjO!GrWSl6o&guT0(7v?-9%%c9nkjHlg)oH(rThoV z671LE2Kew=uuujI55}{It?#pE7JaxBqf37qtB^6;&3MJ{$!>Xl-uQDo`#ef&OjvjC zo!C5uH(Ddpk)~z;3M(_1xjgt7qPV_8<_i>frlY`!GIcC|t4VtSouMi%{U`)>H$=q!4m zJWh^NkjnW7Kx?CRp2xGz9gQBdaUte~((*~T%do8;+z(^Yh1WeXr#TO3uig5Nc##vfKAJu{su zdRhuz+55jbbEM7kEMsuxx<18KfH;LnUu#F^2vg}x z9=uT)hcSq3}2n})VpI4?{ zn^hmo%cc61QFW|4kcT5b!gG1diA+GF2)hIm0*~$_TlH3vUB|EzTz6ToBnBZJ9Bh=u zg{4o3zu6EOO+%XF}Ig#)`6Wqg!p7L^{*cP)R(tp48|;MkjuN;c??XV5i1nAF95$E zzWgl1ZazRDX_-DExzEn!U_}o`U>66*C2J)2YjTaT;!^#Qqn2nR9-!Y=<>ulo0`}yc z*QFNw)H!c7_d;sage?Z6RC$l_lwe2y#7qzBD@cp?g~;H!?NQ{y!2$bH+?`wv`ehFN z!ScM_Ro110Kce7Qu@;HjsSBZ%r$m9LF?eVA%LN|SSevd^w>?|S5{Nh{=b=&_(M^6S?<=b&9mSIO ze^_c%6upGU6Axl@8$S$v96MhyE+5qJcww`Tq&`p0>$f_8vz|z}Tmas9HDeekRiLUp z$O|u7yNE~MHC7bC%sq;j>9>m9MC@t@>&=sF->0oZ=c=xnZg^2CE9Ge4oajlM=JS0S z%~STTME)sYOxe$wlj#-=w_T0uC-1^R+|5S*H=kKMTva4!rbVw)BxO?hLc9Zm=|Vj> zp?t%c7!vqo8H){fco0czI2W9V9Fo{>`z-bcuJSU`UxRBL>Nh2nmOLj8v0 zfF#zR8g2DPJd&D8cPZB-aYlb+iNnFxpchw6BKIMUks=#kv1V4?!w-%GF`A?I4VNvB z-EZUtP|kKddf&Z4mL-M9d0k*5N)6#*iwvxvRJ|>%q#bF}vn02y?_EW8ZQHqzRRE4< z4zoM?#uB{3eOqZ}Bz_I6<_xvsS^?Pgl0i}=Qz^W^SX z9nP;ng6vYnRj_i+YqvU63pL5l-SV-eCs1vL-19gGmUnG!lF-c(PC^~Ech%Io?`oSI zX}YZQinzZxnp|o5M3lkiEJb*pCP=Pd(k=DTyC$v7zSWv!XXsJ4ZQL1Wk{?MMriV&r qhLqsQ>wgBl2r@DFAJrYPfedb0y<={}>tnGv7JieDBQj-oN)e zKlu3^HHBHh001yOe(Z1n02m?wz~IbUL%pQx--(9+VAUSK6Ma*8vxKW0HQ%u=*^>@JM~cXNUt7eWo5N@@7|Lq zPo6)2o=T+_6cjvt`n08`rL(hhe0*GQLbYd49++44_Py$~Y6OBnQ3lk|S`C7b5YSr# zqV!+t*6ghg*_e;#>`j~E^{xOw7rk2nthfjP-LD~wn-JAcMWaDpzyd8%mY=42fBiWt zmVL!!c6O+QhJYRcU2jKyMYZEaB~6obLwa=G`cf-3bf zuZuqxoCE-7FIL`Fx9!ZV^{G}JKkONpdQT}bNqO+W>WaS%cYtv{GrN-t>+&)>DOcWF z4lGHVtaqgcj+wlM{C5W{?)m6-{r-W*155{|2a^_cMqdk)l zUGq=C@^VEP^#VQT;+czGbz|*S;}wLxq}e^--s0ZQQ+MUgRaj**r>&09jzx8E+ZDnR~Z7u*90=i0SPg)uE*)MfL3fZhQ zplC%$1Fj4oT#eV&jS17;DVfn5=I#49XZjA^>(TTm<0{B&qqH%DVRD>sTb}EhM5=N4 z@Wu@aU6!ovp{-E2p^w3zn^wHyrF(=*+*ePjO!GrWSl6o&guT0(7v?-9%%c9nkjHlg)oH(rThoV z671LE2Kew=uuujI55}{It?#pE7JaxBqf37qtB^6;&3MJ{$!>Xl-uQDo`#ef&OjvjC zo!C5uH(Ddpk)~z;3M(_1xjgt7qPV_8<_i>frlY`!GIcC|t4VtSouMi%{U`)>H$=q!4m zJWh^NkjnW7Kx?CRp2xGz9gQBdaUte~((*~T%do8;+z(^Yh1WeXr#TO3uig5Nc##vfKAJu{su zdRhuz+55jbbEM7kEMsuxx<18KfH;LnUu#F^2vg}x z9=uT)hcSq3}2n})VpI4?{ zn^hmo%cc61QFW|4kcT5b!gG1diA+GF2)hIm0*~$_TlH3vUB|EzTz6ToBnBZJ9Bh=u zg{4o3zu6EOO+%XF}Ig#)`6Wqg!p7L^{*cP)R(tp48|;MkjuN;c??XV5i1nAF95$E zzWgl1ZazRDX_-DExzEn!U_}o`U>66*C2J)2YjTaT;!^#Qqn2nR9-!Y=<>ulo0`}yc z*QFNw)H!c7_d;sage?Z6RC$l_lwe2y#7qzBD@cp?g~;H!?NQ{y!2$bH+?`wv`ehFN z!ScM_Ro110Kce7Qu@;HjsSBZ%r$m9LF?eVA%LN|SSevd^w>?|S5{Nh{=b=&_(M^6S?<=b&9mSIO ze^_c%6upGU6Axl@8$S$v96MhyE+5qJcww`Tq&`p0>$f_8vz|z}Tmas9HDeekRiLUp z$O|u7yNE~MHC7bC%sq;j>9>m9MC@t@>&=sF->0oZ=c=xnZg^2CE9Ge4oajlM=JS0S z%~STTME)sYOxe$wlj#-=w_T0uC-1^R+|5S*H=kKMTva4!rbVw)BxO?hLc9Zm=|Vj> zp?t%c7!vqo8H){fco0czI2W9V9Fo{>`z-bcuJSU`UxRBL>Nh2nmOLj8v0 zfF#zR8g2DPJd&D8cPZB-aYlb+iNnFxpchw6BKIMUks=#kv1V4?!w-%GF`A?I4VNvB z-EZUtP|kKddf&Z4mL-M9d0k*5N)6#*iwvxvRJ|>%q#bF}vn02y?_EW8ZQHqzRRE4< z4zoM?#uB{3eOqZ}Bz_I6<_xvsS^?Pgl0i}=Qz^W^SX z9nP;ng6vYnRj_i+YqvU63pL5l-SV-eCs1vL-19gGmUnG!lF-c(PC^~Ech%Io?`oSI zX}YZQinzZxnp|o5M3lkiEJb*pCP=Pd(k=DTyC$v7zSWv!XXsJ4ZQL1Wk{?MMriV&r qhLqsQ>wgBl2r@DFAJrYPfedb0y<={C1&epM1V#R_KLZZ}1M|o}KAt)G7EDw1a-XQ@OvI1+^ z(iSZUbXf@k0W1(AL=s2?3>XnZAP^GrC}0xuz<`7#sq8bAEHr z@9}-#@6Ne*f&-5 zp<#Ps0016j03e`JF;KwhC~z`C3^H5+Jc@w_k6y)>0{{vSfOrrGPCfkSBWNp1^a9$q zfB$|D508L=fcW@$91eH?{{615E(U|~?Af!$#YHHD=Gd`HRVA6XqzToCKu{$p1}TFb zDit^bK+%9}&=dDT`3LUh;cwUFj*e+iDgeMHJXZkQDT2WBNzkqe#06{ccaPg8@SIfDCqTod(o2P}qauAe+%y0HPW|*MWh7g@uKfm>3rq7bFr{UtdqB z(`)yKwLvxSOgnce698;~+P>fU*3H%l0AT07`26I>oLZ&8I!r$Hm(q|Q`%mHmeO|k6 zsfXU*S!u>hEpB-AeP+et9jBI$CiOr6kUcN3g?#=!%S&c3#IRmbA+6%pi6e~ajyNUKL>=a{A9Wq!58+A z_^U4DPuN-cPTHOGC}`_%k?U8Hl#$yis^I(dahtPRGA5I@ezd}|?!|?!e+wF2Bi9dWJ|wG_pF z6HN49M#=Y$E*`|ouJYPAHqzT?kt>@ev#BH{X{5UKy{EZ<%CsNlnrZj&uH{%fs*x_W zPN=J#iNp1h@Sw*_Q6!Rbr15=-o+)yq0!yUx~Smo0xhZXhHOs|3$_KdTh@ zr^Ye8D^0q_+`>{n`VMb|#VwaQI?P8Uy#RYEvt?7Eg?2y(r&~G_<+4NI=Crs2*~e+UBmdEVoWAqEWxxj2 z_?y4z5Tj{y@5+a>$iVz)WyUhM$ZlrGgg9ZRg;^2G^`E2*07k66F$HdWZA)C*U#PfD z7ukU&PlO`77_r0UH!dewICQ($30Ao7-RdpNz}1+03Y)-Hd*>R{V3qzk%d~RXG;3`T zvZ8&q;JRJL*)NS<4j=AW_-nn>qMgf{L?&jvjkw3^-=9R22j__($e#-jfT_i+D0z4w zOnuUIPXVHm;)FyzvkbcDEM!WHx(#gVbSo&q#x}yjhfnYv6(?4)iAr%kE`xUo4 zf}ri^eW-qZ@dq%WTy?u`od?25?NEwjJGno6hUn-!;G2-AO9-<1S=SercfHU%XL?P; zsngggpfz%Ugz?UKxu%gi3kKO@;|EwIP4#L{BHGDe=I7k9oMFpu3JVMD9Cg4gQ9 zgT9F312Nevt$n_4Nf>Y*EzGf#LHJ+dFo}i?S7uE=GwEvx`)-+dWb=O)`|UpcFHrv9 z3Ojh)hbrvd?p!n{Ou(KD-)386tC}mE=~brjveqA0=;g0kIn0`zG)pxBhQ;udwxNg% zU2x)<475y@IGH!6<552D+6irE! zDk=AWY%Xs+?_wrq7sR<<;adM8X31cdP4~cXixQ;rFjBr`37_Rb>Aa*T;lJsx-YBh zO~oj*ArQCe;$~W_ws`K$AA~QaKGq5T{cqv_l{WsPo#2~kITH|tSs!mAs5hP+oQIV6 zXMHG#xsfli^}D!!RBSrUVZw4GwkdO)QtKvyi{>ZjCFaN$x57-EogeSY)}$l-Ci4c@ zH0Bm&gRS+k0a@TyuXchSv#zFP*Ec0B6RS~mUI-(Qsy!ugDB0t?5fsqU(i-E5C_v)c ztbYA!g~uu_gXBIAhAxus27D#oAz!&h7rAL}1g%a)-$NBt6{dXiWkGO{DUErBj%klc zD+7lvr>2#4Xsq20uv!WI=!{dX#LZYdZZaq7;Lp}~kaWACLqms+#V;ob9beznB42&= zBJ}1=b?>L49;Q@5cW8L(0WOjLB02m>m@s=QmepLsS_)x(JO8mJo%|`2@^RRFrTdDi z*(Ql=pd{9?9m00=GtjKwSY`>k@%0<3BPFWWL47weGWp?v&k9KfGnlA(p)2eD`H^I$ zT$;<}pIHz6L{x=d#LmFOD)dyIZwu|~ns@!$dJ=i1tTCsd5`jw(n%u6;#`4`eele!9 z^ww@?Sju#d`wl$aZlzFB_Wz8}MT8saoP7qo#)rgg^hDAxQ31NMCP6<>MPw#&t6nwR{3(X;bjf&{py z(wM&0{Ap1w!G1%~$&;hpmv413B$4$nerH6w|3GJcBZZ8ae@_~IW4y$kNpYw2(|M#u zzgcH5W|)eV-&nVIbWMaYC9acX{FFXEB?S8^@^_}>x2f4T?scf~nw8ZKhp^1bldyit z%vhGq>X7R09A#Sb-kZ+Vp)$0SgAal`n}zNl*G_dKQcB75<=JPCz$w$$H!veJpKM*% zQ@oKi2{o<#yO*lH#=?01@mbezXVwQ z;&guYHYSlPMlvr8T^*8}KfgQb$u?cl$>dg-iRy4h+;WoH6R*TRnMx}Xu#*h0Tf*G4 z`iGRUq!6#=ZqG^s)AM6Wl*8y&2!HxB72S~kM!TIc){Sz7$Mw<(w-u1FO%aV38QvNI zLo<_^ajHI2#CT(S>K*1-&jM0P`2<-tcAQPS_z#!IeC1&epM1V#R_KLZZ}1M|o}KAt)G7EDw1a-XQ@OvI1+^ z(iSZUbXf@k0W1(AL=s2?3>XnZAP^GrC}0xuz<`7#sq8bAEHr z@9}-#@6Ne*f&-5 zp<#Ps0016j03e`JF;KwhC~z`C3^H5+Jc@w_k6y)>0{{vSfOrrGPCfkSBWNp1^a9$q zfB$|D508L=fcW@$91eH?{{615E(U|~?Af!$#YHHD=Gd`HRVA6XqzToCKu{$p1}TFb zDit^bK+%9}&=dDT`3LUh;cwUFj*e+iDgeMHJXZkQDT2WBNzkqe#06{ccaPg8@SIfDCqTod(o2P}qauAe+%y0HPW|*MWh7g@uKfm>3rq7bFr{UtdqB z(`)yKwLvxSOgnce698;~+P>fU*3H%l0AT07`26I>oLZ&8I!r$Hm(q|Q`%mHmeO|k6 zsfXU*S!u>hEpB-AeP+et9jBI$CiOr6kUcN3g?#=!%S&c3#IRmbA+6%pi6e~ajyNUKL>=a{A9Wq!58+A z_^U4DPuN-cPTHOGC}`_%k?U8Hl#$yis^I(dahtPRGA5I@ezd}|?!|?!e+wF2Bi9dWJ|wG_pF z6HN49M#=Y$E*`|ouJYPAHqzT?kt>@ev#BH{X{5UKy{EZ<%CsNlnrZj&uH{%fs*x_W zPN=J#iNp1h@Sw*_Q6!Rbr15=-o+)yq0!yUx~Smo0xhZXhHOs|3$_KdTh@ zr^Ye8D^0q_+`>{n`VMb|#VwaQI?P8Uy#RYEvt?7Eg?2y(r&~G_<+4NI=Crs2*~e+UBmdEVoWAqEWxxj2 z_?y4z5Tj{y@5+a>$iVz)WyUhM$ZlrGgg9ZRg;^2G^`E2*07k66F$HdWZA)C*U#PfD z7ukU&PlO`77_r0UH!dewICQ($30Ao7-RdpNz}1+03Y)-Hd*>R{V3qzk%d~RXG;3`T zvZ8&q;JRJL*)NS<4j=AW_-nn>qMgf{L?&jvjkw3^-=9R22j__($e#-jfT_i+D0z4w zOnuUIPXVHm;)FyzvkbcDEM!WHx(#gVbSo&q#x}yjhfnYv6(?4)iAr%kE`xUo4 zf}ri^eW-qZ@dq%WTy?u`od?25?NEwjJGno6hUn-!;G2-AO9-<1S=SercfHU%XL?P; zsngggpfz%Ugz?UKxu%gi3kKO@;|EwIP4#L{BHGDe=I7k9oMFpu3JVMD9Cg4gQ9 zgT9F312Nevt$n_4Nf>Y*EzGf#LHJ+dFo}i?S7uE=GwEvx`)-+dWb=O)`|UpcFHrv9 z3Ojh)hbrvd?p!n{Ou(KD-)386tC}mE=~brjveqA0=;g0kIn0`zG)pxBhQ;udwxNg% zU2x)<475y@IGH!6<552D+6irE! zDk=AWY%Xs+?_wrq7sR<<;adM8X31cdP4~cXixQ;rFjBr`37_Rb>Aa*T;lJsx-YBh zO~oj*ArQCe;$~W_ws`K$AA~QaKGq5T{cqv_l{WsPo#2~kITH|tSs!mAs5hP+oQIV6 zXMHG#xsfli^}D!!RBSrUVZw4GwkdO)QtKvyi{>ZjCFaN$x57-EogeSY)}$l-Ci4c@ zH0Bm&gRS+k0a@TyuXchSv#zFP*Ec0B6RS~mUI-(Qsy!ugDB0t?5fsqU(i-E5C_v)c ztbYA!g~uu_gXBIAhAxus27D#oAz!&h7rAL}1g%a)-$NBt6{dXiWkGO{DUErBj%klc zD+7lvr>2#4Xsq20uv!WI=!{dX#LZYdZZaq7;Lp}~kaWACLqms+#V;ob9beznB42&= zBJ}1=b?>L49;Q@5cW8L(0WOjLB02m>m@s=QmepLsS_)x(JO8mJo%|`2@^RRFrTdDi z*(Ql=pd{9?9m00=GtjKwSY`>k@%0<3BPFWWL47weGWp?v&k9KfGnlA(p)2eD`H^I$ zT$;<}pIHz6L{x=d#LmFOD)dyIZwu|~ns@!$dJ=i1tTCsd5`jw(n%u6;#`4`eele!9 z^ww@?Sju#d`wl$aZlzFB_Wz8}MT8saoP7qo#)rgg^hDAxQ31NMCP6<>MPw#&t6nwR{3(X;bjf&{py z(wM&0{Ap1w!G1%~$&;hpmv413B$4$nerH6w|3GJcBZZ8ae@_~IW4y$kNpYw2(|M#u zzgcH5W|)eV-&nVIbWMaYC9acX{FFXEB?S8^@^_}>x2f4T?scf~nw8ZKhp^1bldyit z%vhGq>X7R09A#Sb-kZ+Vp)$0SgAal`n}zNl*G_dKQcB75<=JPCz$w$$H!veJpKM*% zQ@oKi2{o<#yO*lH#=?01@mbezXVwQ z;&guYHYSlPMlvr8T^*8}KfgQb$u?cl$>dg-iRy4h+;WoH6R*TRnMx}Xu#*h0Tf*G4 z`iGRUq!6#=ZqG^s)AM6Wl*8y&2!HxB72S~kM!TIc){Sz7$Mw<(w-u1FO%aV38QvNI zLo<_^ajHI2#CT(S>K*1-&jM0P`2<-tcAQPS_z#!Ie Date: Mon, 29 Apr 2024 14:47:31 -0700 Subject: [PATCH 18/36] Add placeholder values in node debugger job --- .../suite/manifests/graceful-recovery/node-debugger-job.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml index 12d7241d3d..5e1aa37695 100644 --- a/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml +++ b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml @@ -8,12 +8,12 @@ spec: hostPID: true hostIPC: true nodeSelector: - kubernetes.io/hostname: "" + kubernetes.io/hostname: "to be replaced by the test" containers: - name: node-debugger-container image: ubuntu command: ["/bin/bash", "-c"] - args: [] + args: ["to be replaced by the test"] securityContext: privileged: true volumeMounts: From 851de6b37d8eaadcba954c264d330d8c27f1b3c6 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Apr 2024 15:04:39 -0700 Subject: [PATCH 19/36] Add check for job spec container count --- tests/suite/graceful_recovery_test.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index a0f8506606..b9ff77de15 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -271,7 +271,7 @@ func getContainerRestartCount(containerName, ngfPodName string) (int, error) { var ngfPod core.Pod if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil { - return 0, errors.New("could not retrieve ngfPod") + return 0, fmt.Errorf("error retriving NGF Pod: %w", err) } var restartCount int @@ -290,26 +290,32 @@ func runNodeDebuggerJob(ngfPodName, jobScript string) (*v1.Job, error) { var ngfPod core.Pod if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil { - return nil, errors.New("could not retrieve ngfPod") + return nil, fmt.Errorf("error retriving NGF Pod: %w", err) } b, err := resourceManager.GetFileContents("graceful-recovery/node-debugger-job.yaml") if err != nil { - return nil, errors.New("error processing node debugger job file") + return nil, fmt.Errorf("error processing node debugger job file: %w", err) } job := &v1.Job{} _ = v1.AddToScheme(resourceManager.K8sClient.Scheme()) if err = yaml.Unmarshal(b.Bytes(), job); err != nil { - return nil, errors.New("error with yaml unmarshal") + return nil, fmt.Errorf("error with yaml unmarshal: %w", err) } job.Spec.Template.Spec.NodeSelector["kubernetes.io/hostname"] = ngfPod.Spec.NodeName + if len(job.Spec.Template.Spec.Containers) != 1 { + return nil, fmt.Errorf( + "expected node debugger job to contain one container, actual number: %d", + len(job.Spec.Template.Spec.Containers), + ) + } job.Spec.Template.Spec.Containers[0].Args = []string{jobScript} job.Namespace = ngfNamespace if err = resourceManager.Apply([]client.Object{job}); err != nil { - return nil, fmt.Errorf("errored in applying job: %w", err) + return nil, fmt.Errorf("error in applying job: %w", err) } return job, nil From cf6bd61767857a63f0659e8758af7bd982012afa Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Apr 2024 15:30:03 -0700 Subject: [PATCH 20/36] Remove regex from pgrep commands --- tests/suite/graceful_recovery_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index b9ff77de15..ac4d731b5b 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -93,9 +93,9 @@ func runRecoveryTest(containerName string, files []string, ns *core.Namespace) { func restartContainer(containerName string) { var jobScript string if containerName == "nginx" { - jobScript = "PID=$(pgrep -f \"[n]ginx: master process\") && kill -9 $PID" + jobScript = "PID=$(pgrep -f \"nginx: master process\") && kill -9 $PID" } else { - jobScript = "PID=$(pgrep -f \"/[u]sr/bin/gateway\") && kill -9 $PID" + jobScript = "PID=$(pgrep -f \"/usr/bin/gateway\") && kill -9 $PID" } podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) From afb3af90438eeb04ca0bad91ebd82525f3660ce6 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Apr 2024 15:49:24 -0700 Subject: [PATCH 21/36] Remove leader lease checking from nginx container restart --- tests/suite/graceful_recovery_test.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index ac4d731b5b..a9b2d04fad 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -69,14 +69,19 @@ func runRecoveryTest(containerName string, files []string, ns *core.Namespace) { Expect(err).ToNot(HaveOccurred()) Expect(podNames).ToNot(BeEmpty()) - leaseName, err := getLeaderElectionLeaseHolderName() - Expect(err).ToNot(HaveOccurred()) + var leaseName string + if containerName != nginxContainerName { + leaseName, err = getLeaderElectionLeaseHolderName() + Expect(err).ToNot(HaveOccurred()) + } restartContainer(containerName) checkContainerLogsForErrors(podNames[0]) - Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) + if containerName != nginxContainerName { + Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) + } Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) From 1ad52715616efb2ff245990b40f4de68f88ad9a7 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Apr 2024 15:54:20 -0700 Subject: [PATCH 22/36] Add ngfPodName to BeforeAll --- tests/suite/graceful_recovery_test.go | 43 ++++++++++++++++----------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index a9b2d04fad..43a3ab94d7 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -36,12 +36,23 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov "graceful-recovery/gateway.yaml", "graceful-recovery/cafe-routes.yaml", } + ns := &core.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: "graceful-recovery", }, } + var ngfPodName string + + BeforeAll(func() { + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).ToNot(BeEmpty()) + + ngfPodName = podNames[0] + }) + BeforeEach(func() { Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) @@ -56,28 +67,28 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }) It("recovers when NGF container is restarted", func() { - runRecoveryTest(ngfContainerName, files, ns) + runRecoveryTest(ngfPodName, ngfContainerName, files, ns) }) It("recovers when nginx container is restarted", func() { - runRecoveryTest(nginxContainerName, files, ns) + runRecoveryTest(ngfPodName, nginxContainerName, files, ns) }) }) -func runRecoveryTest(containerName string, files []string, ns *core.Namespace) { - podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(podNames).ToNot(BeEmpty()) +func runRecoveryTest(ngfPodName, containerName string, files []string, ns *core.Namespace) { + var ( + err error + leaseName string + ) - var leaseName string if containerName != nginxContainerName { leaseName, err = getLeaderElectionLeaseHolderName() Expect(err).ToNot(HaveOccurred()) } - restartContainer(containerName) + restartContainer(ngfPodName, containerName) - checkContainerLogsForErrors(podNames[0]) + checkContainerLogsForErrors(ngfPodName) if containerName != nginxContainerName { Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) @@ -95,7 +106,7 @@ func runRecoveryTest(containerName string, files []string, ns *core.Namespace) { Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) } -func restartContainer(containerName string) { +func restartContainer(ngfPodName, containerName string) { var jobScript string if containerName == "nginx" { jobScript = "PID=$(pgrep -f \"nginx: master process\") && kill -9 $PID" @@ -103,17 +114,13 @@ func restartContainer(containerName string) { jobScript = "PID=$(pgrep -f \"/usr/bin/gateway\") && kill -9 $PID" } - podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) - Expect(err).ToNot(HaveOccurred()) - Expect(podNames).ToNot(BeEmpty()) - - restartCount, err := getContainerRestartCount(containerName, podNames[0]) + restartCount, err := getContainerRestartCount(containerName, ngfPodName) Expect(err).ToNot(HaveOccurred()) - job, err := runNodeDebuggerJob(podNames[0], jobScript) + job, err := runNodeDebuggerJob(ngfPodName, jobScript) Expect(err).ToNot(HaveOccurred()) - Expect(waitForContainerRestart(podNames[0], containerName, restartCount)).ToNot(HaveOccurred()) + Expect(waitForContainerRestart(ngfPodName, containerName, restartCount)).ToNot(HaveOccurred()) // propagation policy is set to delete underlying pod created through job Expect(resourceManager.Delete( @@ -270,7 +277,7 @@ func getLeaderElectionLeaseHolderName() (string, error) { return *lease.Spec.HolderIdentity, nil } -func getContainerRestartCount(containerName, ngfPodName string) (int, error) { +func getContainerRestartCount(ngfPodName, containerName string) (int, error) { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) defer cancel() From 7eaad5bcadd5cccc611e9d62dd51895e4524362d Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Apr 2024 16:01:39 -0700 Subject: [PATCH 23/36] Fix argument refactor mistake --- tests/suite/graceful_recovery_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 43a3ab94d7..c011c39633 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -114,7 +114,7 @@ func restartContainer(ngfPodName, containerName string) { jobScript = "PID=$(pgrep -f \"/usr/bin/gateway\") && kill -9 $PID" } - restartCount, err := getContainerRestartCount(containerName, ngfPodName) + restartCount, err := getContainerRestartCount(ngfPodName, containerName) Expect(err).ToNot(HaveOccurred()) job, err := runNodeDebuggerJob(ngfPodName, jobScript) @@ -139,7 +139,7 @@ func waitForContainerRestart(ngfPodName, containerName string, currentRestartCou 500*time.Millisecond, true, /* poll immediately */ func(_ context.Context) (bool, error) { - restartCount, err := getContainerRestartCount(containerName, ngfPodName) + restartCount, err := getContainerRestartCount(ngfPodName, containerName) if err != nil { return false, nil } From d513eeb79161e88c906b9e2aa3b280349471cf34 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Apr 2024 11:10:03 -0700 Subject: [PATCH 24/36] Add ContainerRestartTimeout --- tests/framework/timeout.go | 14 +++++++++----- tests/suite/graceful_recovery_test.go | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/framework/timeout.go b/tests/framework/timeout.go index d49e988a50..af18a77b9e 100644 --- a/tests/framework/timeout.go +++ b/tests/framework/timeout.go @@ -17,15 +17,19 @@ type TimeoutConfig struct { // RequestTimeout represents the maximum time for making an HTTP Request with the roundtripper. RequestTimeout time.Duration + + // ContainerRestartTimeout represents the maximum time for a Kubernetes Container to restart. + ContainerRestartTimeout time.Duration } // DefaultTimeoutConfig populates a TimeoutConfig with the default values. func DefaultTimeoutConfig() TimeoutConfig { return TimeoutConfig{ - CreateTimeout: 60 * time.Second, - DeleteTimeout: 10 * time.Second, - GetTimeout: 10 * time.Second, - ManifestFetchTimeout: 10 * time.Second, - RequestTimeout: 10 * time.Second, + CreateTimeout: 60 * time.Second, + DeleteTimeout: 10 * time.Second, + GetTimeout: 10 * time.Second, + ManifestFetchTimeout: 10 * time.Second, + RequestTimeout: 10 * time.Second, + ContainerRestartTimeout: 10 * time.Second, } } diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index c011c39633..884a54b85d 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -130,7 +130,7 @@ func restartContainer(ngfPodName, containerName string) { } func waitForContainerRestart(ngfPodName, containerName string, currentRestartCount int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.ContainerRestartTimeout) defer cancel() //nolint:nilerr From b4c762823b5a4d0816d6b1750afb91e515fdb0bc Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Apr 2024 11:26:33 -0700 Subject: [PATCH 25/36] Move batchv1 scheme to system suite test --- tests/suite/graceful_recovery_test.go | 1 - tests/suite/system_suite_test.go | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 884a54b85d..72976ac4d0 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -311,7 +311,6 @@ func runNodeDebuggerJob(ngfPodName, jobScript string) (*v1.Job, error) { } job := &v1.Job{} - _ = v1.AddToScheme(resourceManager.K8sClient.Scheme()) if err = yaml.Unmarshal(b.Bytes(), job); err != nil { return nil, fmt.Errorf("error with yaml unmarshal: %w", err) } diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 870281e71b..6d987a5454 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -14,6 +14,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" apps "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" coordination "k8s.io/api/coordination/v1" core "k8s.io/api/core/v1" apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -96,6 +97,7 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { Expect(apiext.AddToScheme(scheme)).To(Succeed()) Expect(coordination.AddToScheme(scheme)).To(Succeed()) Expect(v1.AddToScheme(scheme)).To(Succeed()) + Expect(batchv1.AddToScheme(scheme)).To(Succeed()) options := client.Options{ Scheme: scheme, From bbd7448cce8bcc1093060a389875fabf84ee0147 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Apr 2024 11:41:11 -0700 Subject: [PATCH 26/36] Refactor constants to local variables --- tests/suite/graceful_recovery_test.go | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 72976ac4d0..6df8971362 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -23,8 +23,6 @@ import ( ) const ( - teaURL = "https://cafe.example.com/tea" - coffeeURL = "http://cafe.example.com/coffee" nginxContainerName = "nginx" ngfContainerName = "nginx-gateway" ) @@ -43,6 +41,9 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }, } + teaURL := "https://cafe.example.com/tea" + coffeeURL := "http://cafe.example.com/coffee" + var ngfPodName string BeforeAll(func() { @@ -58,7 +59,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) + Expect(waitForWorkingTraffic(teaURL, coffeeURL)).ToNot(HaveOccurred()) }) AfterAll(func() { @@ -67,15 +68,15 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }) It("recovers when NGF container is restarted", func() { - runRecoveryTest(ngfPodName, ngfContainerName, files, ns) + runRecoveryTest(teaURL, coffeeURL, ngfPodName, ngfContainerName, files, ns) }) It("recovers when nginx container is restarted", func() { - runRecoveryTest(ngfPodName, nginxContainerName, files, ns) + runRecoveryTest(teaURL, coffeeURL, ngfPodName, nginxContainerName, files, ns) }) }) -func runRecoveryTest(ngfPodName, containerName string, files []string, ns *core.Namespace) { +func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files []string, ns *core.Namespace) { var ( err error leaseName string @@ -94,16 +95,16 @@ func runRecoveryTest(ngfPodName, containerName string, files []string, ns *core. Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) } - Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) + Expect(waitForWorkingTraffic(teaURL, coffeeURL)).ToNot(HaveOccurred()) Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(waitForFailingTraffic()).ToNot(HaveOccurred()) + Expect(waitForFailingTraffic(teaURL, coffeeURL)).ToNot(HaveOccurred()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - Expect(waitForWorkingTraffic()).ToNot(HaveOccurred()) + Expect(waitForWorkingTraffic(teaURL, coffeeURL)).ToNot(HaveOccurred()) } func restartContainer(ngfPodName, containerName string) { @@ -149,7 +150,7 @@ func waitForContainerRestart(ngfPodName, containerName string, currentRestartCou ) } -func waitForWorkingTraffic() error { +func waitForWorkingTraffic(teaURL, coffeeURL string) error { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) defer cancel() @@ -170,7 +171,7 @@ func waitForWorkingTraffic() error { ) } -func waitForFailingTraffic() error { +func waitForFailingTraffic(teaURL, coffeeURL string) error { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) defer cancel() From 4544fb5eec1ed389ddb03dce76ea0b3eb0feee51 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Apr 2024 14:31:18 -0700 Subject: [PATCH 27/36] Add additional comments --- tests/suite/graceful_recovery_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 6df8971362..6a9c180a4f 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -83,6 +83,10 @@ func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files ) if containerName != nginxContainerName { + // Since we have already deployed resources and ran resourceManager.WaitForAppsToBeReady(ns.Name) earlier, + // we know that the applications are ready at this point. This could only be the case if NGF has written + // statuses, which could only be the case if NGF has the leader lease. Since there is only one instance + // of NGF in this test, we can be certain that this is the correct leaseholder name. leaseName, err = getLeaderElectionLeaseHolderName() Expect(err).ToNot(HaveOccurred()) } @@ -123,7 +127,9 @@ func restartContainer(ngfPodName, containerName string) { Expect(waitForContainerRestart(ngfPodName, containerName, restartCount)).ToNot(HaveOccurred()) - // propagation policy is set to delete underlying pod created through job + // default propagation policy is metav1.DeletePropagationOrphan which does not delete the underlying + // pod created through the job after the job is deleted. Setting it to metav1.DeletePropagationBackground + // deletes the underlying pod after the job is deleted. Expect(resourceManager.Delete( []client.Object{job}, client.PropagationPolicy(metav1.DeletePropagationBackground), From 1c3eceecff4f09fa70532ba1e0a32190c286c3d4 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Apr 2024 14:48:32 -0700 Subject: [PATCH 28/36] Refactor error checking --- tests/suite/graceful_recovery_test.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 6a9c180a4f..32d0663d94 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -217,8 +217,8 @@ func expectRequestToFail(appURL, address string, responseBodyMessage string) err return errors.New("expected http status to be 0") } - if strings.Contains(body, responseBodyMessage) { - return errors.New("expected response body to not contain correct body message") + if body != "" { + return fmt.Errorf("expected response body to be empty, instead received: %s", body) } if err == nil { @@ -236,7 +236,10 @@ func checkContainerLogsForErrors(ngfPodName string) { &core.PodLogOptions{Container: nginxContainerName, SinceSeconds: &sinceSeconds}, ) Expect(err).ToNot(HaveOccurred()) - Expect(logs).ToNot(ContainSubstring("emerg"), logs) + Expect(logs).ToNot(ContainSubstring("[error]"), logs) + Expect(logs).ToNot(ContainSubstring("[crit]"), logs) + Expect(logs).ToNot(ContainSubstring("[alert]"), logs) + Expect(logs).ToNot(ContainSubstring("[emerg]"), logs) logs, err = resourceManager.GetPodLogs( ngfNamespace, @@ -244,7 +247,7 @@ func checkContainerLogsForErrors(ngfPodName string) { &core.PodLogOptions{Container: ngfContainerName, SinceSeconds: &sinceSeconds}, ) Expect(err).ToNot(HaveOccurred()) - Expect(logs).ToNot(ContainSubstring("error"), logs) + Expect(logs).ToNot(ContainSubstring("\"level\":\"error\""), logs) } func waitForLeaderLeaseToChange(originalLeaseName string) error { From 6508425a2b673bcb5d55de24b3ec6648811b6371 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 30 Apr 2024 15:03:52 -0700 Subject: [PATCH 29/36] Remove sinceSeconds when checking container logs for errors --- tests/suite/graceful_recovery_test.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 32d0663d94..7014c1e787 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -27,6 +27,8 @@ const ( ngfContainerName = "nginx-gateway" ) +// Since checkContainerLogsForErrors may experience interference from previous tests (as explained in the function +// documentation), this test is recommended to be run separate from other nfr tests. var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recovery"), func() { files := []string{ "graceful-recovery/cafe.yaml", @@ -228,12 +230,15 @@ func expectRequestToFail(appURL, address string, responseBodyMessage string) err return nil } +// checkContainerLogsForErrors checks both nginx and ngf container's logs for any possible errors. +// Since this function retrieves all the logs from both containers and the NGF pod may be shared between tests, +// the logs retrieved may contain log messages from previous tests, thus any errors in the logs from previous tests +// may cause an interference with this test and cause this test to fail. func checkContainerLogsForErrors(ngfPodName string) { - sinceSeconds := int64(15) logs, err := resourceManager.GetPodLogs( ngfNamespace, ngfPodName, - &core.PodLogOptions{Container: nginxContainerName, SinceSeconds: &sinceSeconds}, + &core.PodLogOptions{Container: nginxContainerName}, ) Expect(err).ToNot(HaveOccurred()) Expect(logs).ToNot(ContainSubstring("[error]"), logs) @@ -244,7 +249,7 @@ func checkContainerLogsForErrors(ngfPodName string) { logs, err = resourceManager.GetPodLogs( ngfNamespace, ngfPodName, - &core.PodLogOptions{Container: ngfContainerName, SinceSeconds: &sinceSeconds}, + &core.PodLogOptions{Container: ngfContainerName}, ) Expect(err).ToNot(HaveOccurred()) Expect(logs).ToNot(ContainSubstring("\"level\":\"error\""), logs) From cefead3c719cae9562f607f391c88689b7186201 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 1 May 2024 08:13:30 -0700 Subject: [PATCH 30/36] Refactor waiting functions using gomega Eventually assertions --- tests/framework/timeout.go | 4 + tests/suite/graceful_recovery_test.go | 160 ++++++++++++-------------- 2 files changed, 80 insertions(+), 84 deletions(-) diff --git a/tests/framework/timeout.go b/tests/framework/timeout.go index af18a77b9e..2aee2e5a21 100644 --- a/tests/framework/timeout.go +++ b/tests/framework/timeout.go @@ -20,6 +20,9 @@ type TimeoutConfig struct { // ContainerRestartTimeout represents the maximum time for a Kubernetes Container to restart. ContainerRestartTimeout time.Duration + + // GetLeaderLeaseTimeout represents the maximum time for NGF to retrieve the leader lease. + GetLeaderLeaseTimeout time.Duration } // DefaultTimeoutConfig populates a TimeoutConfig with the default values. @@ -31,5 +34,6 @@ func DefaultTimeoutConfig() TimeoutConfig { ManifestFetchTimeout: 10 * time.Second, RequestTimeout: 10 * time.Second, ContainerRestartTimeout: 10 * time.Second, + GetLeaderLeaseTimeout: 60 * time.Second, } } diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 7014c1e787..d33d397693 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -15,7 +15,6 @@ import ( core "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/yaml" @@ -61,7 +60,13 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - Expect(waitForWorkingTraffic(teaURL, coffeeURL)).ToNot(HaveOccurred()) + Eventually( + func() bool { + return checkForWorkingTraffic(teaURL, coffeeURL) + }). + WithTimeout(timeoutConfig.RequestTimeout). + WithPolling(500 * time.Millisecond). + Should(BeTrue()) }) AfterAll(func() { @@ -98,19 +103,43 @@ func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files checkContainerLogsForErrors(ngfPodName) if containerName != nginxContainerName { - Expect(waitForLeaderLeaseToChange(leaseName)).ToNot(HaveOccurred()) + Eventually( + func() bool { + return checkLeaderLeaseChange(leaseName) + }). + WithTimeout(timeoutConfig.GetLeaderLeaseTimeout). + WithPolling(500 * time.Millisecond). + Should(BeTrue()) } - Expect(waitForWorkingTraffic(teaURL, coffeeURL)).ToNot(HaveOccurred()) + Eventually( + func() bool { + return checkForWorkingTraffic(teaURL, coffeeURL) + }). + WithTimeout(timeoutConfig.RequestTimeout). + WithPolling(500 * time.Millisecond). + Should(BeTrue()) Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(waitForFailingTraffic(teaURL, coffeeURL)).ToNot(HaveOccurred()) + Eventually( + func() bool { + return checkForFailingTraffic(teaURL, coffeeURL) + }). + WithTimeout(timeoutConfig.RequestTimeout). + WithPolling(500 * time.Millisecond). + Should(BeTrue()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - Expect(waitForWorkingTraffic(teaURL, coffeeURL)).ToNot(HaveOccurred()) + Eventually( + func() bool { + return checkForWorkingTraffic(teaURL, coffeeURL) + }). + WithTimeout(timeoutConfig.RequestTimeout). + WithPolling(500 * time.Millisecond). + Should(BeTrue()) } func restartContainer(ngfPodName, containerName string) { @@ -127,7 +156,13 @@ func restartContainer(ngfPodName, containerName string) { job, err := runNodeDebuggerJob(ngfPodName, jobScript) Expect(err).ToNot(HaveOccurred()) - Expect(waitForContainerRestart(ngfPodName, containerName, restartCount)).ToNot(HaveOccurred()) + Eventually( + func() bool { + return checkContainerRestart(ngfPodName, containerName, restartCount) + }). + WithTimeout(timeoutConfig.ContainerRestartTimeout). + WithPolling(500 * time.Millisecond). + Should(BeTrue()) // default propagation policy is metav1.DeletePropagationOrphan which does not delete the underlying // pod created through the job after the job is deleted. Setting it to metav1.DeletePropagationBackground @@ -138,66 +173,33 @@ func restartContainer(ngfPodName, containerName string) { )).ToNot(HaveOccurred()) } -func waitForContainerRestart(ngfPodName, containerName string, currentRestartCount int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.ContainerRestartTimeout) - defer cancel() +func checkContainerRestart(ngfPodName, containerName string, currentRestartCount int) bool { + restartCount, err := getContainerRestartCount(ngfPodName, containerName) + if err != nil { + return false + } - //nolint:nilerr - return wait.PollUntilContextCancel( - ctx, - 500*time.Millisecond, - true, /* poll immediately */ - func(_ context.Context) (bool, error) { - restartCount, err := getContainerRestartCount(ngfPodName, containerName) - if err != nil { - return false, nil - } - - return restartCount == currentRestartCount+1, nil - }, - ) + return restartCount == currentRestartCount+1 } -func waitForWorkingTraffic(teaURL, coffeeURL string) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) - defer cancel() - - //nolint:nilerr - return wait.PollUntilContextCancel( - ctx, - 500*time.Millisecond, - true, /* poll immediately */ - func(_ context.Context) (bool, error) { - if err := expectRequestToSucceed(teaURL, address, "URI: /tea"); err != nil { - return false, nil - } - if err := expectRequestToSucceed(coffeeURL, address, "URI: /coffee"); err != nil { - return false, nil - } - return true, nil - }, - ) +func checkForWorkingTraffic(teaURL, coffeeURL string) bool { + if err := expectRequestToSucceed(teaURL, address, "URI: /tea"); err != nil { + return false + } + if err := expectRequestToSucceed(coffeeURL, address, "URI: /coffee"); err != nil { + return false + } + return true } -func waitForFailingTraffic(teaURL, coffeeURL string) error { - ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.RequestTimeout) - defer cancel() - - //nolint:nilerr - return wait.PollUntilContextCancel( - ctx, - 500*time.Millisecond, - true, /* poll immediately */ - func(_ context.Context) (bool, error) { - if err := expectRequestToFail(teaURL, address, "URI: /tea"); err != nil { - return false, nil - } - if err := expectRequestToFail(coffeeURL, address, "URI: /coffee"); err != nil { - return false, nil - } - return true, nil - }, - ) +func checkForFailingTraffic(teaURL, coffeeURL string) bool { + if err := expectRequestToFail(teaURL, address, "URI: /tea"); err != nil { + return false + } + if err := expectRequestToFail(coffeeURL, address, "URI: /coffee"); err != nil { + return false + } + return true } func expectRequestToSucceed(appURL, address string, responseBodyMessage string) error { @@ -255,28 +257,18 @@ func checkContainerLogsForErrors(ngfPodName string) { Expect(logs).ToNot(ContainSubstring("\"level\":\"error\""), logs) } -func waitForLeaderLeaseToChange(originalLeaseName string) error { - leaseCtx, leaseCancel := context.WithTimeout(context.Background(), 1*time.Minute) - defer leaseCancel() - - //nolint:nilerr - return wait.PollUntilContextCancel( - leaseCtx, - 500*time.Millisecond, - true, /* poll immediately */ - func(_ context.Context) (bool, error) { - leaseName, err := getLeaderElectionLeaseHolderName() - if err != nil { - return false, nil - } - - if originalLeaseName != leaseName { - return true, nil - } - - return false, nil - }, - ) +func checkLeaderLeaseChange(originalLeaseName string) bool { + leaseName, err := getLeaderElectionLeaseHolderName() + + if err != nil { + return false + } + + if originalLeaseName != leaseName { + return true + } + + return false } func getLeaderElectionLeaseHolderName() (string, error) { From 587b237c84e73f6948bcec18c59b488c15ea10fc Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 1 May 2024 09:01:45 -0700 Subject: [PATCH 31/36] Add Skip of failing test --- tests/suite/graceful_recovery_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index d33d397693..56f0d5126a 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -79,6 +79,8 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov }) It("recovers when nginx container is restarted", func() { + // FIXME(bjee19) remove Skip() when https://github.com/nginxinc/nginx-gateway-fabric/issues/1108 is completed. + Skip("Test currently fails due to this issue: https://github.com/nginxinc/nginx-gateway-fabric/issues/1108") runRecoveryTest(teaURL, coffeeURL, ngfPodName, nginxContainerName, files, ns) }) }) From dc07841b258e9865e542a0c2377671ccaa6c7a77 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 1 May 2024 14:11:45 -0700 Subject: [PATCH 32/36] Refactored Eventually functions to return errors instead of booleans --- tests/suite/graceful_recovery_test.go | 65 ++++++++++++++------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 56f0d5126a..6d8d84f47d 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -61,12 +61,12 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) Eventually( - func() bool { + func() error { return checkForWorkingTraffic(teaURL, coffeeURL) }). WithTimeout(timeoutConfig.RequestTimeout). WithPolling(500 * time.Millisecond). - Should(BeTrue()) + Should(Succeed()) }) AfterAll(func() { @@ -102,46 +102,46 @@ func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files restartContainer(ngfPodName, containerName) - checkContainerLogsForErrors(ngfPodName) - if containerName != nginxContainerName { Eventually( - func() bool { + func() error { return checkLeaderLeaseChange(leaseName) }). WithTimeout(timeoutConfig.GetLeaderLeaseTimeout). WithPolling(500 * time.Millisecond). - Should(BeTrue()) + Should(Succeed()) } Eventually( - func() bool { + func() error { return checkForWorkingTraffic(teaURL, coffeeURL) }). WithTimeout(timeoutConfig.RequestTimeout). WithPolling(500 * time.Millisecond). - Should(BeTrue()) + Should(Succeed()) Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) Eventually( - func() bool { + func() error { return checkForFailingTraffic(teaURL, coffeeURL) }). WithTimeout(timeoutConfig.RequestTimeout). WithPolling(500 * time.Millisecond). - Should(BeTrue()) + Should(Succeed()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) Eventually( - func() bool { + func() error { return checkForWorkingTraffic(teaURL, coffeeURL) }). WithTimeout(timeoutConfig.RequestTimeout). WithPolling(500 * time.Millisecond). - Should(BeTrue()) + Should(Succeed()) + + checkContainerLogsForErrors(ngfPodName) } func restartContainer(ngfPodName, containerName string) { @@ -159,12 +159,12 @@ func restartContainer(ngfPodName, containerName string) { Expect(err).ToNot(HaveOccurred()) Eventually( - func() bool { + func() error { return checkContainerRestart(ngfPodName, containerName, restartCount) }). WithTimeout(timeoutConfig.ContainerRestartTimeout). WithPolling(500 * time.Millisecond). - Should(BeTrue()) + Should(Succeed()) // default propagation policy is metav1.DeletePropagationOrphan which does not delete the underlying // pod created through the job after the job is deleted. Setting it to metav1.DeletePropagationBackground @@ -172,36 +172,40 @@ func restartContainer(ngfPodName, containerName string) { Expect(resourceManager.Delete( []client.Object{job}, client.PropagationPolicy(metav1.DeletePropagationBackground), - )).ToNot(HaveOccurred()) + )).To(Succeed()) } -func checkContainerRestart(ngfPodName, containerName string, currentRestartCount int) bool { +func checkContainerRestart(ngfPodName, containerName string, currentRestartCount int) error { restartCount, err := getContainerRestartCount(ngfPodName, containerName) if err != nil { - return false + return err + } + + if restartCount != currentRestartCount+1 { + return fmt.Errorf("expected current restart count: %d to match incremented restart count: %d", restartCount, currentRestartCount+1) } - return restartCount == currentRestartCount+1 + return nil } -func checkForWorkingTraffic(teaURL, coffeeURL string) bool { +func checkForWorkingTraffic(teaURL, coffeeURL string) error { if err := expectRequestToSucceed(teaURL, address, "URI: /tea"); err != nil { - return false + return err } if err := expectRequestToSucceed(coffeeURL, address, "URI: /coffee"); err != nil { - return false + return err } - return true + return nil } -func checkForFailingTraffic(teaURL, coffeeURL string) bool { +func checkForFailingTraffic(teaURL, coffeeURL string) error { if err := expectRequestToFail(teaURL, address, "URI: /tea"); err != nil { - return false + return err } if err := expectRequestToFail(coffeeURL, address, "URI: /coffee"); err != nil { - return false + return err } - return true + return nil } func expectRequestToSucceed(appURL, address string, responseBodyMessage string) error { @@ -259,18 +263,17 @@ func checkContainerLogsForErrors(ngfPodName string) { Expect(logs).ToNot(ContainSubstring("\"level\":\"error\""), logs) } -func checkLeaderLeaseChange(originalLeaseName string) bool { +func checkLeaderLeaseChange(originalLeaseName string) error { leaseName, err := getLeaderElectionLeaseHolderName() - if err != nil { - return false + return err } if originalLeaseName != leaseName { - return true + return fmt.Errorf("originalLeaseName: %s, does not match current leaseName: %s", originalLeaseName, leaseName) } - return false + return nil } func getLeaderElectionLeaseHolderName() (string, error) { From d311b9d1dcecb0945fcc05f6b6db74f67dbff4b2 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 1 May 2024 14:42:21 -0700 Subject: [PATCH 33/36] Add check for lease holder identity being empty --- tests/suite/graceful_recovery_test.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 6d8d84f47d..a91e15c269 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -269,8 +269,8 @@ func checkLeaderLeaseChange(originalLeaseName string) error { return err } - if originalLeaseName != leaseName { - return fmt.Errorf("originalLeaseName: %s, does not match current leaseName: %s", originalLeaseName, leaseName) + if originalLeaseName == leaseName { + return fmt.Errorf("expected originalLeaseName: %s, to not match current leaseName: %s", originalLeaseName, leaseName) } return nil @@ -286,6 +286,11 @@ func getLeaderElectionLeaseHolderName() (string, error) { if err := k8sClient.Get(ctx, key, &lease); err != nil { return "", errors.New("could not retrieve leader election lease") } + + if *lease.Spec.HolderIdentity == "" { + return "", errors.New("leader election lease holder identity is empty") + } + return *lease.Spec.HolderIdentity, nil } From dbba3498e3b196e664eebbd98071b2f7d664878e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 2 May 2024 15:17:26 -0700 Subject: [PATCH 34/36] Add filter of error messages in nginx logs to allow expected errors --- tests/suite/graceful_recovery_test.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index a91e15c269..62fe4d1331 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -249,10 +249,15 @@ func checkContainerLogsForErrors(ngfPodName string) { &core.PodLogOptions{Container: nginxContainerName}, ) Expect(err).ToNot(HaveOccurred()) - Expect(logs).ToNot(ContainSubstring("[error]"), logs) - Expect(logs).ToNot(ContainSubstring("[crit]"), logs) - Expect(logs).ToNot(ContainSubstring("[alert]"), logs) - Expect(logs).ToNot(ContainSubstring("[emerg]"), logs) + + for _, line := range strings.Split(logs, "\n") { + Expect(line).ToNot(ContainSubstring("[crit]"), line) + Expect(line).ToNot(ContainSubstring("[alert]"), line) + Expect(line).ToNot(ContainSubstring("[emerg]"), line) + if strings.Contains(line, "[error]") { + Expect(line).To(ContainSubstring("connect() failed (111: Connection refused)"), line) + } + } logs, err = resourceManager.GetPodLogs( ngfNamespace, From b5dc80e9b17dcb77b5090387a57cb2bf590c5f9e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 2 May 2024 17:06:03 -0700 Subject: [PATCH 35/36] Add ubuntu image version --- tests/suite/manifests/graceful-recovery/node-debugger-job.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml index 5e1aa37695..dcc3d5ef18 100644 --- a/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml +++ b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml @@ -11,7 +11,7 @@ spec: kubernetes.io/hostname: "to be replaced by the test" containers: - name: node-debugger-container - image: ubuntu + image: ubuntu:22.04 command: ["/bin/bash", "-c"] args: ["to be replaced by the test"] securityContext: From e326e9633e66064de86db52971545a4c0721aae4 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 3 May 2024 08:37:27 -0700 Subject: [PATCH 36/36] Add teardown and setup of NGF and check NGF pod length is one --- tests/suite/graceful_recovery_test.go | 9 ++++++++- tests/suite/system_suite_test.go | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 62fe4d1331..bf6c61295b 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -48,9 +48,16 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov var ngfPodName string BeforeAll(func() { + // this test is unique in that it will check the entire log of both ngf and nginx containers + // for any errors, so in order to avoid errors generated in previous tests we will uninstall + // NGF installed at the suite level, then re-deploy our own + teardown(releaseName) + + setup(getDefaultSetupCfg()) + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) - Expect(podNames).ToNot(BeEmpty()) + Expect(podNames).To(HaveLen(1)) ngfPodName = podNames[0] }) diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 6d987a5454..7c1218d15f 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -247,7 +247,8 @@ var _ = BeforeSuite(func() { // - running telemetry test (NGF will be deployed as part of the test) if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") || - strings.Contains(labelFilter, "telemetry") { + strings.Contains(labelFilter, "telemetry") || + strings.Contains(labelFilter, "graceful-recovery") { cfg.deploy = false }