diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go
index 5a798906a..f96e23308 100644
--- a/cmd/epp/runner/runner.go
+++ b/cmd/epp/runner/runner.go
@@ -36,6 +36,7 @@ import (
 	healthPb "google.golang.org/grpc/health/grpc_health_v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/rest"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
@@ -91,17 +92,18 @@ var flowControlConfig = flowcontrol.Config{
 }
 
 var (
-	grpcPort       = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy")
-	grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes")
-	metricsPort    = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port")
-	enablePprof    = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
-	poolName       = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.")
-	poolGroup      = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.")
-	poolNamespace  = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.")
-	logVerbosity   = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
-	secureServing  = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
-	healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking")
-	certPath       = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+
+	grpcPort            = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy")
+	grpcHealthPort      = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes")
+	metricsPort         = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port")
+	metricsEndpointAuth = flag.Bool("metrics-endpoint-auth", true, "Enables authentication and authorization of the metrics endpoint")
+	enablePprof         = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
+	poolName            = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.")
+	poolGroup           = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.")
+	poolNamespace       = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.")
+	logVerbosity        = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
+	secureServing       = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
+	healthChecking      = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking")
+	certPath            = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+
 		"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
 		"then a self-signed certificate is used.")
 	// metric flags
@@ -211,8 +213,14 @@ func (r *Runner) Run(ctx context.Context) error {
 	// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.1/pkg/metrics/server
 	// - https://book.kubebuilder.io/reference/metrics.html
 	metricsServerOptions := metricsserver.Options{
-		BindAddress:    fmt.Sprintf(":%d", *metricsPort),
-		FilterProvider: filters.WithAuthenticationAndAuthorization,
+		BindAddress: fmt.Sprintf(":%d", *metricsPort),
+		FilterProvider: func() func(c *rest.Config, httpClient *http.Client) (metricsserver.Filter, error) {
+			if *metricsEndpointAuth {
+				return filters.WithAuthenticationAndAuthorization
+			}
+
+			return nil
+		}(),
 	}
 
 	// Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
index f6354bfee..9fbbfb9bf 100644
--- a/config/charts/inferencepool/README.md
+++ b/config/charts/inferencepool/README.md
@@ -137,14 +137,16 @@ inferenceExtension:
   monitoring:
     interval: "10s"
     prometheus:
-      enabled: true
-    secret:
-      name: inference-gateway-sa-metrics-reader-secret
+      enabled: false
+      auth:
+        enabled: true
+        secretName: inference-gateway-sa-metrics-reader-secret
+      extraLabels: {}
 ```
 
 **Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster.
 
-For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection.
+For GKE environments, you need to set `provider.name` to `gke` firstly. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection.
 
 If you are using a GKE Autopilot cluster, you also need to set `provider.gke.autopilot` to `true`.
 
@@ -166,34 +168,36 @@ $ helm uninstall pool-1
 
 The following table list the configurable parameters of the chart.
 
-| **Parameter Name**                                 | **Description**                                                                                                                                                                                                                                                                                                                       |
-|----------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `inferencePool.apiVersion`                         | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions.                                                                                                                                           |
-| `inferencePool.targetPortNumber`                   | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000.                                                                                                                                                                                                                |
-| `inferencePool.modelServerType`                    | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm.                                                                                                                                                                                                                                |
-| `inferencePool.modelServers.matchLabels`           | Label selector to match vllm backends managed by the inference pool.                                                                                                                                                                                                                                                                  |
-| `inferenceExtension.replicas`                      | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`.                                                                                                                                                                              |
-| `inferenceExtension.image.name`                    | Name of the container image used for the endpoint picker.                                                                                                                                                                                                                                                                             |
-| `inferenceExtension.image.hub`                     | Registry URL where the endpoint picker image is hosted.                                                                                                                                                                                                                                                                               |
-| `inferenceExtension.image.tag`                     | Image tag of the endpoint picker.                                                                                                                                                                                                                                                                                                     |
-| `inferenceExtension.image.pullPolicy`              | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.                                                                                                                                                                                                                     |
-| `inferenceExtension.env`                           | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`.                                                                                                                                                                                                                            |
-| `inferenceExtension.extraContainerPorts`           | List of additional container ports to expose. Defaults to `[]`.                                                                                                                                                                                                                                                                       |
-| `inferenceExtension.extraServicePorts`             | List of additional service ports to expose. Defaults to `[]`.                                                                                                                                                                                                                                                                         |
-| `inferenceExtension.flags`                         | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list.                                                                                                                                                                                      |
-| `inferenceExtension.affinity`                      | Affinity for the endpoint picker. Defaults to `{}`.                                                                                                                                                                                                                                                                                   |
-| `inferenceExtension.tolerations`                   | Tolerations for the endpoint picker. Defaults to `[]`.                                                                                                                                                                                                                                                                                |
-| `inferenceExtension.monitoring.interval`           | Metrics scraping interval for monitoring. Defaults to `10s`.                                                                                                                                                                                                                                                                          |
-| `inferenceExtension.monitoring.secret.name`        | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`.                                                                                                                                                                                                        |
-| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`.                                                                                                                                                                                                                                            |
-| `inferenceExtension.monitoring.gke.enabled`        | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`.                                                                                                                                                                                                                                                      |
-| `inferenceExtension.pluginsCustomConfig`           | Custom config that is passed to EPP as inline yaml.                                                                                                                                                                                                                                                                                   |
-| `inferenceExtension.tracing.enabled`               | Enables or disables OpenTelemetry tracing globally for the EndpointPicker.                                                                                                                                                                                                                                                            |
-| `inferenceExtension.tracing.otelExporterEndpoint`  | OpenTelemetry collector endpoint.                                                                                                                                                                                                                                                                                                     |
-| `inferenceExtension.tracing.sampling.sampler`      | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans.                                                                                                                           |
-| `inferenceExtension.tracing.sampling.samplerArg`   | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling.                                                                                                                       |
-| `provider.name`                                    | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`.                                                                                                                                                                                                            |
-| `provider.gke.autopilot`                           | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`.                                                                                                                                                                                                          |
+| **Parameter Name**                                         | **Description**                                                                                                                                                                                                                                    |
+|------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `inferencePool.apiVersion`                                 | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions.                                                        |
+| `inferencePool.targetPortNumber`                           | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000.                                                                                                                             |
+| `inferencePool.modelServerType`                            | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm.                                                                                                                                             |
+| `inferencePool.modelServers.matchLabels`                   | Label selector to match vllm backends managed by the inference pool.                                                                                                                                                                               |
+| `inferenceExtension.replicas`                              | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`.                                                                                           |
+| `inferenceExtension.image.name`                            | Name of the container image used for the endpoint picker.                                                                                                                                                                                          |
+| `inferenceExtension.image.hub`                             | Registry URL where the endpoint picker image is hosted.                                                                                                                                                                                            |
+| `inferenceExtension.image.tag`                             | Image tag of the endpoint picker.                                                                                                                                                                                                                  |
+| `inferenceExtension.image.pullPolicy`                      | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.                                                                                                                                  |
+| `inferenceExtension.env`                                   | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`.                                                                                                                                         |
+| `inferenceExtension.extraContainerPorts`                   | List of additional container ports to expose. Defaults to `[]`.                                                                                                                                                                                    |
+| `inferenceExtension.extraServicePorts`                     | List of additional service ports to expose. Defaults to `[]`.                                                                                                                                                                                      |
+| `inferenceExtension.flags`                                 | List of flags which are passed through to endpoint picker. Example flags, enable-pprof, grpc-port etc. Refer [runner.go](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/cmd/epp/runner/runner.go) for complete list. |
+| `inferenceExtension.affinity`                              | Affinity for the endpoint picker. Defaults to `{}`.                                                                                                                                                                                                |
+| `inferenceExtension.tolerations`                           | Tolerations for the endpoint picker. Defaults to `[]`.                                                                                                                                                                                             |
+| `inferenceExtension.monitoring.interval`                   | Metrics scraping interval for monitoring. Defaults to `10s`.                                                                                                                                                                                       |
+| `inferenceExtension.monitoring.prometheus.enabled`         | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`.                                                                                                                                                         |
+| `inferenceExtension.monitoring.gke.enabled`                | **DEPRECATED**: This field is deprecated and will be removed in the next release.  Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`.                                                                                |
+| `inferenceExtension.monitoring.prometheus.auth.enabled`    | Enable auth for Prometheus metrics endpoint. Defaults is `true`                                                                                                                                                                                    |
+| `inferenceExtension.monitoring.prometheus.auth.secretName` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`.                                                                                                                     |
+| `inferenceExtension.monitoring.prometheus.extraLabels`     | Extra labels added to ServiceMonitor.                                                                                                                                                                                                              |
+| `inferenceExtension.pluginsCustomConfig`                   | Custom config that is passed to EPP as inline yaml.                                                                                                                                                                                                |
+| `inferenceExtension.tracing.enabled`                       | Enables or disables OpenTelemetry tracing globally for the EndpointPicker.                                                                                                                                                                         |
+| `inferenceExtension.tracing.otelExporterEndpoint`          | OpenTelemetry collector endpoint.                                                                                                                                                                                                                  |
+| `inferenceExtension.tracing.sampling.sampler`              | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans.                                        |
+| `inferenceExtension.tracing.sampling.samplerArg`           | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling.                                    |
+| `provider.name`                                            | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`.                                                                                                                         |
+| `provider.gke.autopilot`                                   | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`.                                                                                                                       |
 
 ### Provider Specific Configuration
 
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
index 120240217..10eb2907a 100644
--- a/config/charts/inferencepool/templates/epp-deployment.yaml
+++ b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -68,6 +68,9 @@ spec:
         {{- else }}
         - "false"
         {{- end }}
+        {{- if not .Values.inferenceExtension.monitoring.prometheus.enabled }}
+        - --metrics-endpoint-auth=false
+        {{- end }}
         ports:
         - name: grpc
           containerPort: 9002
diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml
index df54b3475..16d935f96 100644
--- a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml
+++ b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml
@@ -1,8 +1,8 @@
-{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
+{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled (ne (lower .Values.provider.name) "gke") }}
 apiVersion: v1
 kind: Secret
 metadata:
-  name: {{ .Values.inferenceExtension.monitoring.secret.name }}
+  name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }}
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
diff --git a/config/charts/inferencepool/templates/epp-servicemonitor.yaml b/config/charts/inferencepool/templates/epp-servicemonitor.yaml
index e4788ba83..220be76dc 100644
--- a/config/charts/inferencepool/templates/epp-servicemonitor.yaml
+++ b/config/charts/inferencepool/templates/epp-servicemonitor.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
+{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled (ne (lower .Values.provider.name) "gke") }}
 apiVersion: monitoring.coreos.com/v1
 kind: ServiceMonitor
 metadata:
@@ -6,15 +6,20 @@ metadata:
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+    {{- with .Values.inferenceExtension.monitoring.prometheus.extraLabels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
 spec:
   endpoints:
   - interval: {{ .Values.inferenceExtension.monitoring.interval }}
     port: "http-metrics"
     path: "/metrics"
+    {{- if .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
     authorization:
       credentials:
         key: token
-        name: {{ .Values.inferenceExtension.monitoring.secret.name }}
+        name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }}
+    {{- end }}
   jobLabel: {{ include "gateway-api-inference-extension.name" . }}
   namespaceSelector:
     matchNames:
diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml
index 77855c35a..a2d8bbc87 100644
--- a/config/charts/inferencepool/templates/gke.yaml
+++ b/config/charts/inferencepool/templates/gke.yaml
@@ -40,7 +40,7 @@ spec:
     logging:
       enabled: true    # log all requests by default
 ---
-{{- if .Values.inferenceExtension.monitoring.gke.enabled }}
+{{- if or .Values.inferenceExtension.monitoring.gke.enabled (and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled) }}
 {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
 {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}
 {{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
index f901f7f0f..8b3385ab1 100644
--- a/config/charts/inferencepool/values.yaml
+++ b/config/charts/inferencepool/values.yaml
@@ -43,14 +43,17 @@ inferenceExtension:
   # Monitoring configuration for EPP
   monitoring:
     interval: "10s"
-    # Service account token secret for authentication
-    secret:
-      name: inference-gateway-sa-metrics-reader-secret
-
     # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection
     prometheus:
       enabled: false
-    
+      auth:
+        enabled: true
+        # Service account token secret for authentication
+        secretName: inference-gateway-sa-metrics-reader-secret
+      # additional labels for the ServiceMonitor
+      extraLabels: {}
+
+    # DEPRECATED: The 'gke' configuration will be removed in the next release.
     gke:
       enabled: false
   tracing: