From ba537f78322ed8efaf58e5283eb1b94539e1d524 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Tue, 23 Sep 2025 17:48:38 +0800 Subject: [PATCH 01/10] Adding a flag to control whether auth is added to the EPP metrics server --- cmd/epp/runner/runner.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 5a798906a..a4ee6a1d4 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -36,6 +36,7 @@ import ( healthPb "google.golang.org/grpc/health/grpc_health_v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -94,6 +95,7 @@ var ( grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy") grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes") metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port") + metricsAuth = flag.Bool("metrics-auth", true, "Enables secure of EPP metrics endpoint") enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.") poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") @@ -211,8 +213,14 @@ func (r *Runner) Run(ctx context.Context) error { // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.1/pkg/metrics/server // - https://book.kubebuilder.io/reference/metrics.html metricsServerOptions := metricsserver.Options{ - BindAddress: fmt.Sprintf(":%d", *metricsPort), - FilterProvider: filters.WithAuthenticationAndAuthorization, + BindAddress: fmt.Sprintf(":%d", *metricsPort), + FilterProvider: func() func(c *rest.Config, httpClient *http.Client) (metricsserver.Filter, error) { + if *metricsAuth { + return filters.WithAuthenticationAndAuthorization + } + + return nil + }(), } // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default From d7080f8ffd9047d9ab59f487b4af93e05a244101 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Wed, 24 Sep 2025 00:26:13 +0800 Subject: [PATCH 02/10] Update cmd/epp/runner/runner.go Co-authored-by: Cong Liu --- cmd/epp/runner/runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index a4ee6a1d4..ae4321a89 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -95,7 +95,7 @@ var ( grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy") grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes") metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port") - metricsAuth = flag.Bool("metrics-auth", true, "Enables secure of EPP metrics endpoint") + metricsEndpointAuth = flag.Bool("metrics-endpoint-auth", true, "Enables authentication and authorization of the metrics endpoint") enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.") poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") From 320b5b7c52d1fd419c52666d170811c0d904692b Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Wed, 24 Sep 2025 00:27:09 +0800 Subject: [PATCH 03/10] update --- cmd/epp/runner/runner.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index ae4321a89..f96e23308 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -92,18 +92,18 @@ var flowControlConfig = flowcontrol.Config{ } var ( - grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy") - grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes") - metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port") - metricsEndpointAuth = flag.Bool("metrics-endpoint-auth", true, "Enables authentication and authorization of the metrics endpoint") - enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.") - poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") - poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") - poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.") - logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") - secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") - healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking") - certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+ + grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy") + grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes") + metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port") + metricsEndpointAuth = flag.Bool("metrics-endpoint-auth", true, "Enables authentication and authorization of the metrics endpoint") + enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.") + poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") + poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") + poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.") + logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") + secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") + healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking") + certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+ "are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+ "then a self-signed certificate is used.") // metric flags @@ -215,7 +215,7 @@ func (r *Runner) Run(ctx context.Context) error { metricsServerOptions := metricsserver.Options{ BindAddress: fmt.Sprintf(":%d", *metricsPort), FilterProvider: func() func(c *rest.Config, httpClient *http.Client) (metricsserver.Filter, error) { - if *metricsAuth { + if *metricsEndpointAuth { return filters.WithAuthenticationAndAuthorization } From 73c04345f8a6e2f4ae05f273b41ea5b8de5cb6c2 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Fri, 10 Oct 2025 14:42:29 +0800 Subject: [PATCH 04/10] apply review's suggestion --- .../templates/epp-deployment.yaml | 3 +++ .../templates/epp-sa-token-secret.yaml | 4 ++-- .../templates/epp-servicemonitor.yaml | 11 +++++++--- .../charts/inferencepool/templates/gke.yaml | 4 ++-- .../charts/inferencepool/templates/rbac.yaml | 2 +- config/charts/inferencepool/values.yaml | 21 ++++++++----------- 6 files changed, 25 insertions(+), 20 deletions(-) diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml index 120240217..c2dfc9627 100644 --- a/config/charts/inferencepool/templates/epp-deployment.yaml +++ b/config/charts/inferencepool/templates/epp-deployment.yaml @@ -68,6 +68,9 @@ spec: {{- else }} - "false" {{- end }} + {{- if not .Values.inferenceExtension.serviceMonitor.auth.enabled }} + - --metrics-endpoint-auth=false + {{- end }} ports: - name: grpc containerPort: 9002 diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml index df54b3475..838208dc1 100644 --- a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml +++ b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml @@ -1,8 +1,8 @@ -{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} +{{- if and .Values.inferenceExtension.serviceMonitor.enabled .Values.inferenceExtension.serviceMonitor.auth.enabled (ne (lower .Values.provider.name) "gke") }} apiVersion: v1 kind: Secret metadata: - name: {{ .Values.inferenceExtension.monitoring.secret.name }} + name: {{ .Values.inferenceExtension.serviceMonitor.auth.secretName }} namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} diff --git a/config/charts/inferencepool/templates/epp-servicemonitor.yaml b/config/charts/inferencepool/templates/epp-servicemonitor.yaml index e4788ba83..90eddb1cd 100644 --- a/config/charts/inferencepool/templates/epp-servicemonitor.yaml +++ b/config/charts/inferencepool/templates/epp-servicemonitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} +{{- if and .Values.inferenceExtension.serviceMonitor.enabled (ne (lower .Values.provider.name) "gke") }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -6,15 +6,20 @@ metadata: namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} + {{- with .Values.inferenceExtension.serviceMonitor.extraLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} spec: endpoints: - - interval: {{ .Values.inferenceExtension.monitoring.interval }} + - interval: {{ .Values.inferenceExtension.serviceMonitor.interval }} port: "http-metrics" path: "/metrics" + {{- if .Values.inferenceExtension.serviceMonitor.auth.enabled }} authorization: credentials: key: token - name: {{ .Values.inferenceExtension.monitoring.secret.name }} + name: {{ .Values.inferenceExtension.serviceMonitor.auth.secretName }} + {{- end }} jobLabel: {{ include "gateway-api-inference-extension.name" . }} namespaceSelector: matchNames: diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index 77855c35a..f64d70b6c 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -40,7 +40,7 @@ spec: logging: enabled: true # log all requests by default --- -{{- if .Values.inferenceExtension.monitoring.gke.enabled }} +{{- if and .Values.inferenceExtension.serviceMonitor.enabled .Values.inferenceExtension.serviceMonitor.auth.enabled }} {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}} {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}} {{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} @@ -83,7 +83,7 @@ spec: endpoints: - port: metrics scheme: http - interval: {{ .Values.inferenceExtension.monitoring.interval }} + interval: {{ .Values.inferenceExtension.serviceMonitor.interval }} path: /metrics authorization: type: Bearer diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml index ebe68c3ea..0eb154fb9 100644 --- a/config/charts/inferencepool/templates/rbac.yaml +++ b/config/charts/inferencepool/templates/rbac.yaml @@ -17,7 +17,7 @@ rules: - subjectaccessreviews verbs: - create -{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} +{{- if .Values.inferenceExtension.serviceMonitor.enabled }} - nonResourceURLs: - "/metrics" verbs: diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index f901f7f0f..7b08bdfbe 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -40,19 +40,16 @@ inferenceExtension: tolerations: [] - # Monitoring configuration for EPP - monitoring: + # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection + serviceMonitor: + enabled: true interval: "10s" - # Service account token secret for authentication - secret: - name: inference-gateway-sa-metrics-reader-secret - - # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection - prometheus: - enabled: false - - gke: - enabled: false + auth: + enabled: true + # Service account token secret for authentication + secretName: inference-gateway-sa-metrics-reader-secret + # additional labels for the ServiceMonitor + extraLabels: {} tracing: enabled: false otelExporterEndpoint: "http://localhost:4317" From 56a60d3e66a6e2fb7dae09cc672ad9103f65df85 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Mon, 29 Sep 2025 21:35:23 +0800 Subject: [PATCH 05/10] apply reviewer's suggestion --- .../templates/epp-deployment.yaml | 2 +- .../templates/epp-sa-token-secret.yaml | 4 ++-- .../templates/epp-servicemonitor.yaml | 10 ++++----- .../charts/inferencepool/templates/gke.yaml | 4 ++-- .../charts/inferencepool/templates/rbac.yaml | 2 +- config/charts/inferencepool/values.yaml | 22 ++++++++++--------- 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml index c2dfc9627..10eb2907a 100644 --- a/config/charts/inferencepool/templates/epp-deployment.yaml +++ b/config/charts/inferencepool/templates/epp-deployment.yaml @@ -68,7 +68,7 @@ spec: {{- else }} - "false" {{- end }} - {{- if not .Values.inferenceExtension.serviceMonitor.auth.enabled }} + {{- if not .Values.inferenceExtension.monitoring.prometheus.enabled }} - --metrics-endpoint-auth=false {{- end }} ports: diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml index 838208dc1..16d935f96 100644 --- a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml +++ b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml @@ -1,8 +1,8 @@ -{{- if and .Values.inferenceExtension.serviceMonitor.enabled .Values.inferenceExtension.serviceMonitor.auth.enabled (ne (lower .Values.provider.name) "gke") }} +{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled (ne (lower .Values.provider.name) "gke") }} apiVersion: v1 kind: Secret metadata: - name: {{ .Values.inferenceExtension.serviceMonitor.auth.secretName }} + name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }} namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} diff --git a/config/charts/inferencepool/templates/epp-servicemonitor.yaml b/config/charts/inferencepool/templates/epp-servicemonitor.yaml index 90eddb1cd..15071340b 100644 --- a/config/charts/inferencepool/templates/epp-servicemonitor.yaml +++ b/config/charts/inferencepool/templates/epp-servicemonitor.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.inferenceExtension.serviceMonitor.enabled (ne (lower .Values.provider.name) "gke") }} +{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled (ne (lower .Values.provider.name) "gke") }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -6,19 +6,19 @@ metadata: namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} - {{- with .Values.inferenceExtension.serviceMonitor.extraLabels }} + {{- with .Values.inferenceExtension.monitoring.prometheus.extraLabels }} {{- toYaml . | nindent 4 }} {{- end }} spec: endpoints: - - interval: {{ .Values.inferenceExtension.serviceMonitor.interval }} + - interval: {{ .Values.inferenceExtension.monitoring.prometheus.interval }} port: "http-metrics" path: "/metrics" - {{- if .Values.inferenceExtension.serviceMonitor.auth.enabled }} + {{- if .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} authorization: credentials: key: token - name: {{ .Values.inferenceExtension.serviceMonitor.auth.secretName }} + name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }} {{- end }} jobLabel: {{ include "gateway-api-inference-extension.name" . }} namespaceSelector: diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index f64d70b6c..f219a78b9 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -40,7 +40,7 @@ spec: logging: enabled: true # log all requests by default --- -{{- if and .Values.inferenceExtension.serviceMonitor.enabled .Values.inferenceExtension.serviceMonitor.auth.enabled }} +{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}} {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}} {{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} @@ -83,7 +83,7 @@ spec: endpoints: - port: metrics scheme: http - interval: {{ .Values.inferenceExtension.serviceMonitor.interval }} + interval: {{ .Values.inferenceExtension.monitoring.prometheus.interval }} path: /metrics authorization: type: Bearer diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml index 0eb154fb9..ebe68c3ea 100644 --- a/config/charts/inferencepool/templates/rbac.yaml +++ b/config/charts/inferencepool/templates/rbac.yaml @@ -17,7 +17,7 @@ rules: - subjectaccessreviews verbs: - create -{{- if .Values.inferenceExtension.serviceMonitor.enabled }} +{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} - nonResourceURLs: - "/metrics" verbs: diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 7b08bdfbe..5f92659a4 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -40,16 +40,18 @@ inferenceExtension: tolerations: [] - # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection - serviceMonitor: - enabled: true - interval: "10s" - auth: - enabled: true - # Service account token secret for authentication - secretName: inference-gateway-sa-metrics-reader-secret - # additional labels for the ServiceMonitor - extraLabels: {} + # Monitoring configuration for EPP + monitoring: + # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection + prometheus: + enabled: false + interval: "10s" + auth: + enabled: true + # Service account token secret for authentication + secretName: inference-gateway-sa-metrics-reader-secret + # additional labels for the ServiceMonitor + extraLabels: {} tracing: enabled: false otelExporterEndpoint: "http://localhost:4317" From 946268385039e4a3a35a623134c6fdfb9083df98 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Tue, 30 Sep 2025 09:52:27 +0800 Subject: [PATCH 06/10] rollback interval --- config/charts/inferencepool/templates/epp-servicemonitor.yaml | 2 +- config/charts/inferencepool/templates/gke.yaml | 2 +- config/charts/inferencepool/values.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/charts/inferencepool/templates/epp-servicemonitor.yaml b/config/charts/inferencepool/templates/epp-servicemonitor.yaml index 15071340b..220be76dc 100644 --- a/config/charts/inferencepool/templates/epp-servicemonitor.yaml +++ b/config/charts/inferencepool/templates/epp-servicemonitor.yaml @@ -11,7 +11,7 @@ metadata: {{- end }} spec: endpoints: - - interval: {{ .Values.inferenceExtension.monitoring.prometheus.interval }} + - interval: {{ .Values.inferenceExtension.monitoring.interval }} port: "http-metrics" path: "/metrics" {{- if .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index f219a78b9..2ee2e13fc 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -83,7 +83,7 @@ spec: endpoints: - port: metrics scheme: http - interval: {{ .Values.inferenceExtension.monitoring.prometheus.interval }} + interval: {{ .Values.inferenceExtension.monitoring.interval }} path: /metrics authorization: type: Bearer diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 5f92659a4..3cf88ff61 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -42,10 +42,10 @@ inferenceExtension: # Monitoring configuration for EPP monitoring: + interval: "10s" # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection prometheus: enabled: false - interval: "10s" auth: enabled: true # Service account token secret for authentication From 73a24345039c9c6c02e78d8d38a5abc5b88dd3b9 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Tue, 30 Sep 2025 11:44:02 +0800 Subject: [PATCH 07/10] update --- config/charts/inferencepool/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index f6354bfee..82d8eef39 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -137,14 +137,16 @@ inferenceExtension: monitoring: interval: "10s" prometheus: - enabled: true - secret: - name: inference-gateway-sa-metrics-reader-secret + enabled: false + auth: + enabled: true + secretName: inference-gateway-sa-metrics-reader-secret + extraLabels: {} ``` **Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster. -For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection. +For GKE environments, you need to set `provider.name` to `gke` firstly. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection. If you are using a GKE Autopilot cluster, you also need to set `provider.gke.autopilot` to `true`. @@ -186,7 +188,6 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | | `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | -| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | | `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | | `inferenceExtension.tracing.enabled` | Enables or disables OpenTelemetry tracing globally for the EndpointPicker. | | `inferenceExtension.tracing.otelExporterEndpoint` | OpenTelemetry collector endpoint. | From 626cc34922c36eb1fa7d134150fbd9462a6b2bb5 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Fri, 10 Oct 2025 14:52:09 +0800 Subject: [PATCH 08/10] update README.md --- config/charts/inferencepool/README.md | 56 ++++++++++++++------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 82d8eef39..a46769a36 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -168,33 +168,35 @@ $ helm uninstall pool-1 The following table list the configurable parameters of the chart. -| **Parameter Name** | **Description** | -|----------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. | -| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. | -| `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. | -| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. | -| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`. | -| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. | -| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. | -| `inferenceExtension.image.tag` | Image tag of the endpoint picker. | -| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. | -| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. | -| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. | -| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. | -| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. | -| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. | -| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. | -| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | -| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | -| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | -| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | -| `inferenceExtension.tracing.enabled` | Enables or disables OpenTelemetry tracing globally for the EndpointPicker. | -| `inferenceExtension.tracing.otelExporterEndpoint` | OpenTelemetry collector endpoint. | -| `inferenceExtension.tracing.sampling.sampler` | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans. | -| `inferenceExtension.tracing.sampling.samplerArg` | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling. | -| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. | -| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | +| **Parameter Name** | **Description** | +|------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. | +| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. | +| `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. | +| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. | +| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`. | +| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. | +| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. | +| `inferenceExtension.image.tag` | Image tag of the endpoint picker. | +| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. | +| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. | +| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. | +| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. | +| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. | +| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. | +| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. | +| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | +| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | +| `inferenceExtension.monitoring.prometheus.auth.enabled` | Enable auth for Prometheus metrics endpoint. Defaults is `true` | +| `inferenceExtension.monitoring.prometheus.auth.secretName` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | +| `inferenceExtension.monitoring.prometheus.extraLabels` | Extra labels added to ServiceMonitor. | +| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | +| `inferenceExtension.tracing.enabled` | Enables or disables OpenTelemetry tracing globally for the EndpointPicker. | +| `inferenceExtension.tracing.otelExporterEndpoint` | OpenTelemetry collector endpoint. | +| `inferenceExtension.tracing.sampling.sampler` | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans. | +| `inferenceExtension.tracing.sampling.samplerArg` | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling. | +| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. | +| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | ### Provider Specific Configuration From d5450ab72e6788f09bad313b4ab969885c0df935 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Wed, 15 Oct 2025 18:12:22 +0800 Subject: [PATCH 09/10] revert gke --- config/charts/inferencepool/README.md | 1 + config/charts/inferencepool/templates/gke.yaml | 2 +- config/charts/inferencepool/values.yaml | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index a46769a36..9fbbfb9bf 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -187,6 +187,7 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. | | `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | +| `inferenceExtension.monitoring.gke.enabled` | **DEPRECATED**: This field is deprecated and will be removed in the next release. Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | | `inferenceExtension.monitoring.prometheus.auth.enabled` | Enable auth for Prometheus metrics endpoint. Defaults is `true` | | `inferenceExtension.monitoring.prometheus.auth.secretName` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | | `inferenceExtension.monitoring.prometheus.extraLabels` | Extra labels added to ServiceMonitor. | diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index 2ee2e13fc..77855c35a 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -40,7 +40,7 @@ spec: logging: enabled: true # log all requests by default --- -{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} +{{- if .Values.inferenceExtension.monitoring.gke.enabled }} {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}} {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}} {{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 3cf88ff61..8b3385ab1 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -52,6 +52,10 @@ inferenceExtension: secretName: inference-gateway-sa-metrics-reader-secret # additional labels for the ServiceMonitor extraLabels: {} + + # DEPRECATED: The 'gke' configuration will be removed in the next release. + gke: + enabled: false tracing: enabled: false otelExporterEndpoint: "http://localhost:4317" From dce33272b3403ed9f86c83f3c45165ac9b0ffd45 Mon Sep 17 00:00:00 2001 From: Murphy Chen Date: Thu, 16 Oct 2025 10:13:58 +0800 Subject: [PATCH 10/10] update --- config/charts/inferencepool/templates/gke.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index 77855c35a..a2d8bbc87 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -40,7 +40,7 @@ spec: logging: enabled: true # log all requests by default --- -{{- if .Values.inferenceExtension.monitoring.gke.enabled }} +{{- if or .Values.inferenceExtension.monitoring.gke.enabled (and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled) }} {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}} {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}} {{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}