From efa938baa79017da92f071242bec9329be437159 Mon Sep 17 00:00:00 2001
From: X1aoZEOuO <nizefeng2002@outlook.com>
Date: Wed, 16 Jul 2025 11:10:00 +0800
Subject: [PATCH 1/7] feat: add service anno for activator.

Signed-off-by: X1aoZEOuO <nizefeng2002@outlook.com>
---
 api/core/v1alpha1/model_types.go               |  5 +++++
 pkg/controller/inference/service_controller.go | 10 +++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go
index 7f4a997f..284b0b59 100644
--- a/api/core/v1alpha1/model_types.go
+++ b/api/core/v1alpha1/model_types.go
@@ -35,6 +35,11 @@ const (
 	// Once either of them qualified, we'll expose this as a field in Model.
 	ModelPreheatAnnoKey = "llmaz.io/model-preheat"
 
+	// ModelActivatorAnnotationKey is used to indicate whether the model is activated by the activator.
+	ModelActivatorAnnoKey = "activator.llmaz.io/playground"
+	// CachedModelActivatorAnnotationKey is used to cache the activator info of the model.
+	CachedModelActivatorAnnoKey = "cached.activator.llmaz.io"
+
 	HUGGING_FACE = "Huggingface"
 	MODEL_SCOPE  = "ModelScope"
 
diff --git a/pkg/controller/inference/service_controller.go b/pkg/controller/inference/service_controller.go
index 1f62cabd..eb2bddd6 100644
--- a/pkg/controller/inference/service_controller.go
+++ b/pkg/controller/inference/service_controller.go
@@ -357,6 +357,12 @@ func modelAnnotations(service *inferenceapi.Service) map[string]string {
 	return nil
 }
 
+func activatorAnnotations(model *coreapi.OpenModel) map[string]string {
+	return map[string]string{
+		coreapi.ModelActivatorAnnoKey: model.Name,
+	}
+}
+
 func setServiceCondition(service *inferenceapi.Service, workload *lws.LeaderWorkerSet) {
 	defer func() {
 		if service.Status.Selector != workload.Status.HPAPodSelector {
@@ -419,7 +425,7 @@ func setControllerReferenceForWorkload(owner metav1.Object, lws *applyconfigurat
 	return nil
 }
 
-func CreateServiceIfNotExists(ctx context.Context, k8sClient client.Client, Scheme *runtime.Scheme, service *inferenceapi.Service) error {
+func CreateServiceIfNotExists(ctx context.Context, k8sClient client.Client, Scheme *runtime.Scheme, service *inferenceapi.Service, model []*coreapi.OpenModel) error {
 	log := ctrl.LoggerFrom(ctx)
 	// The load balancing service name.
 	svcName := service.Name + "-lb"
@@ -433,6 +439,8 @@ func CreateServiceIfNotExists(ctx context.Context, k8sClient client.Client, Sche
 			ObjectMeta: metav1.ObjectMeta{
 				Name:      svcName,
 				Namespace: service.Namespace,
+				// For activator service, we can ignore it if serverless config is not enabled.
+				Annotations: activatorAnnotations(model[0]),
 			},
 			Spec: corev1.ServiceSpec{
 				Ports: []corev1.ServicePort{

From e79d0dc53cd76286a32b97ed24de85a6fcf3e14f Mon Sep 17 00:00:00 2001
From: X1aoZEOuO <nizefeng2002@outlook.com>
Date: Mon, 21 Jul 2025 11:10:00 +0800
Subject: [PATCH 2/7] feat: add port manager.

Signed-off-by: X1aoZEOuO <nizefeng2002@outlook.com>
---
 .../inference/activator_controller.go         | 203 ++++++++++++++++++
 1 file changed, 203 insertions(+)
 create mode 100644 pkg/controller/inference/activator_controller.go

diff --git a/pkg/controller/inference/activator_controller.go b/pkg/controller/inference/activator_controller.go
new file mode 100644
index 00000000..43957e9a
--- /dev/null
+++ b/pkg/controller/inference/activator_controller.go
@@ -0,0 +1,203 @@
+/*
+Copyright 2024 The InftyAI Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package inference
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net"
+	"sync"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/dynamic"
+	"k8s.io/client-go/util/retry"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/builder"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/event"
+	"sigs.k8s.io/controller-runtime/pkg/handler"
+	"sigs.k8s.io/controller-runtime/pkg/predicate"
+	"sigs.k8s.io/controller-runtime/pkg/reconcile"
+
+	llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
+	llmazcorev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
+)
+
+var (
+	activatorControllerLog = ctrl.Log.WithName("activator-controller")
+)
+
+const (
+	playgroundsResource     = "playgrounds"
+	activatorControllerName = "activator-controller"
+)
+
+func tunnel(a, b net.Conn) {
+	go io.Copy(a, b)
+	go io.Copy(b, a)
+}
+
+type Listener interface {
+	net.Listener
+	Port() int
+}
+
+type listener struct {
+	net.Listener
+	port int
+}
+
+func NewListener() (Listener, error) {
+	l, err := net.Listen("tcp", ":0")
+	if err != nil {
+		return nil, err
+	}
+	return &listener{
+		Listener: l,
+		port:     l.Addr().(*net.TCPAddr).Port,
+	}, nil
+}
+
+func (l *listener) Accept() (net.Conn, error) {
+	c, err := l.Listener.Accept()
+	if err != nil {
+		return nil, err
+	}
+	return c, nil
+}
+
+func (l *listener) Port() int {
+	return l.port
+}
+
+type Target struct {
+	Name      string
+	Namespace string
+	Port      int
+}
+
+type PortInformation struct {
+	Target      Target
+	Listener    Listener
+	Connections []net.Conn
+}
+
+type PortManager struct {
+	portMap        map[int]*PortInformation
+	reversePortMap map[Target]int
+	mut            sync.Mutex
+
+	cb func(*PortInformation)
+}
+
+func NewPortManager(cb func(*PortInformation)) *PortManager {
+	return &PortManager{
+		portMap:        map[int]*PortInformation{},
+		reversePortMap: map[Target]int{},
+		cb:             cb,
+	}
+}
+
+func (pm *PortManager) AddTarget(name string, namespace string, port int) (*PortInformation, error) {
+	pm.mut.Lock()
+	defer pm.mut.Unlock()
+
+	target := Target{
+		Name:      name,
+		Namespace: namespace,
+		Port:      port,
+	}
+
+	port, ok := pm.reversePortMap[target]
+	if ok {
+		return pm.portMap[port], nil
+	}
+
+	listener, err := NewListener()
+	if err != nil {
+		return nil, err
+	}
+	port = listener.Port()
+	downstream := &PortInformation{
+		Target:   target,
+		Listener: listener,
+	}
+	pm.portMap[port] = downstream
+	pm.reversePortMap[target] = port
+
+	go pm.startListener(downstream)
+	return downstream, nil
+}
+
+func (pm *PortManager) RemoveTarget(name string, namespace string, port int) *PortInformation {
+	pm.mut.Lock()
+	defer pm.mut.Unlock()
+
+	target := Target{
+		Name:      name,
+		Namespace: namespace,
+		Port:      port,
+	}
+
+	port, ok := pm.reversePortMap[target]
+	if !ok {
+		return nil
+	}
+	downstream := pm.portMap[port]
+	delete(pm.portMap, port)
+	delete(pm.reversePortMap, target)
+	return downstream
+}
+
+func (pm *PortManager) RemoveTargetForAllPorts(name string, namespace string) []*PortInformation {
+	pm.mut.Lock()
+	defer pm.mut.Unlock()
+
+	var downstreams []*PortInformation
+	for port, downstream := range pm.portMap {
+		if downstream.Target.Name == name && downstream.Target.Namespace == namespace {
+			delete(pm.portMap, port)
+			delete(pm.reversePortMap, downstream.Target)
+			downstreams = append(downstreams, downstream)
+		}
+	}
+	return downstreams
+}
+
+func (pm *PortManager) startListener(downstream *PortInformation) {
+	start := false
+	for {
+		conn, err := downstream.Listener.Accept()
+		if err != nil {
+			return
+		}
+		downstream.Connections = append(downstream.Connections, conn)
+		if !start {
+			go pm.cb(downstream)
+			start = true
+		}
+	}
+}

From 632cb3e46a4970ae7c192b7a22622d9fe5db9522 Mon Sep 17 00:00:00 2001
From: X1aoZEOuO <nizefeng2002@outlook.com>
Date: Wed, 30 Jul 2025 11:10:00 +0800
Subject: [PATCH 3/7] feat: add activator controller manager for activator.

Signed-off-by: X1aoZEOuO <nizefeng2002@outlook.com>
---
 .../inference/activator_controller.go         | 347 ++++++++++++++++++
 1 file changed, 347 insertions(+)

diff --git a/pkg/controller/inference/activator_controller.go b/pkg/controller/inference/activator_controller.go
index 43957e9a..c873a561 100644
--- a/pkg/controller/inference/activator_controller.go
+++ b/pkg/controller/inference/activator_controller.go
@@ -55,6 +55,353 @@ const (
 	activatorControllerName = "activator-controller"
 )
 
+type ActivatorReconciler struct {
+	client.Client
+	dynamicClient dynamic.Interface
+	portManager   *PortManager
+	ip            string
+}
+
+func NewActivatorReconciler(mgr ctrl.Manager, dynamicClient dynamic.Interface, ip string) *ActivatorReconciler {
+	reconciler := &ActivatorReconciler{
+		Client:        mgr.GetClient(),
+		dynamicClient: dynamicClient,
+		ip:            ip,
+	}
+	reconciler.portManager = NewPortManager(reconciler.scaleUp)
+	return reconciler
+}
+
+// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;update;patch;delete
+// +kubebuilder:rbac:groups="",resources=endpoints,verbs=get;list;watch;update;patch
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+// For more details, check Reconcile and its Result here:
+// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile
+func (r *ActivatorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	svc := &corev1.Service{}
+	if err := r.Get(ctx, req.NamespacedName, svc); err != nil {
+		if errors.IsNotFound(err) {
+			r.handleServiceDeletion(req.Namespace, req.Name)
+			return ctrl.Result{}, nil
+		}
+		return ctrl.Result{}, err
+	}
+
+	if err := r.restoreSelectorIfNeeded(ctx, svc); err != nil {
+		return ctrl.Result{}, err
+	}
+
+	ep := &corev1.Endpoints{}
+	if err := r.Get(ctx, req.NamespacedName, ep); err != nil {
+		if errors.IsNotFound(err) {
+			activatorControllerLog.Info("Endpoints not found, waiting for creation", "service", svc.Name)
+			return ctrl.Result{}, nil
+		}
+		activatorControllerLog.Error(err, "Failed to get endpoints", "service", svc.Name)
+		return ctrl.Result{}, err
+	}
+
+	// Check if the service has the activator annotation
+	ports, ok := r.needInject(svc)
+	if !ok {
+		activatorControllerLog.Info("Activator annotation not found, skipping", "service", svc.Name)
+		return ctrl.Result{}, nil
+	}
+
+	if len(ep.Subsets) == 0 {
+		// If the endpoints are empty, inject the activator IP
+		return ctrl.Result{}, r.injectEndpoint(ctx, ep, svc, ports)
+	} else if ep.Subsets[0].Addresses != nil &&
+		len(ep.Subsets[0].Addresses) > 0 &&
+		ep.Subsets[0].Addresses[0].IP != r.ip {
+		// If the endpoints are not empty and not the activator IP, forward the traffic
+		return ctrl.Result{}, r.forwardEndpoint(ctx, ep, ports)
+	}
+
+	return ctrl.Result{}, nil
+}
+
+func (r *ActivatorReconciler) needInject(svc *corev1.Service) ([]corev1.ServicePort, bool) {
+	if svc == nil || svc.Annotations == nil {
+		return nil, false
+	}
+	if _, ok := svc.Annotations[llmazcoreapi.ModelActivatorAnnoKey]; !ok {
+		return nil, false
+	}
+	if len(svc.Spec.Ports) == 0 || svc.Spec.Type != corev1.ServiceTypeClusterIP {
+		return nil, false
+	}
+
+	validPorts := make([]corev1.ServicePort, 0, len(svc.Spec.Ports))
+	for _, port := range svc.Spec.Ports {
+		if port.Port == 0 || port.Protocol != corev1.ProtocolTCP {
+			continue
+		}
+		validPorts = append(validPorts, port)
+	}
+	if len(validPorts) == 0 {
+		return nil, false
+	}
+	return validPorts, true
+}
+
+func (r *ActivatorReconciler) restoreSelectorIfNeeded(ctx context.Context, svc *corev1.Service) error {
+	selectorStr := svc.Annotations[llmazcoreapi.CachedModelActivatorAnnoKey]
+	if selectorStr == "" {
+		return nil
+	}
+
+	sel := map[string]string{}
+	if err := json.Unmarshal([]byte(selectorStr), &sel); err != nil {
+		activatorControllerLog.Error(err, "Failed to unmarshal selector")
+		return err
+	}
+
+	updatedSvc := svc.DeepCopy()
+	delete(updatedSvc.Annotations, llmazcoreapi.CachedModelActivatorAnnoKey)
+	updatedSvc.Spec.Selector = sel
+
+	if err := r.Update(ctx, updatedSvc); err != nil {
+		activatorControllerLog.Error(err, "Failed to restore service selector")
+		return err
+	}
+
+	activatorControllerLog.Info("Restored service selector", "selector", sel)
+	return nil
+}
+
+func (r *ActivatorReconciler) injectEndpoint(ctx context.Context, ep *corev1.Endpoints, svc *corev1.Service, ports []corev1.ServicePort) error {
+	subsets := make([]corev1.EndpointSubset, 0, len(ports))
+	for _, port := range ports {
+		ds, err := r.portManager.AddTarget(ep.Name, ep.Namespace, int(port.Port))
+		if err != nil {
+			return err
+		}
+
+		activatorControllerLog.Info("Injecting endpoint",
+			"port", port.Port,
+			"listenerPort", ds.Listener.Port(),
+		)
+
+		subsets = append(subsets, corev1.EndpointSubset{
+			Addresses: []corev1.EndpointAddress{{IP: r.ip}},
+			Ports: []corev1.EndpointPort{{
+				Name: port.Name,
+				Port: int32(ds.Listener.Port()),
+			}},
+		})
+	}
+
+	updatedEp := ep.DeepCopy()
+	updatedEp.Subsets = subsets
+	if err := r.Update(ctx, updatedEp); err != nil {
+		activatorControllerLog.Error(err, "Failed to update endpoints")
+		return err
+	}
+
+	// Save the original selector to annotation and clear the selector
+	selectorBytes, _ := json.Marshal(svc.Spec.Selector)
+	updatedSvc := svc.DeepCopy()
+	if updatedSvc.Annotations == nil {
+		updatedSvc.Annotations = make(map[string]string)
+	}
+	updatedSvc.Annotations[llmazcoreapi.CachedModelActivatorAnnoKey] = string(selectorBytes)
+	updatedSvc.Spec.Selector = nil
+	return r.Update(ctx, updatedSvc)
+}
+
+func (r *ActivatorReconciler) handleServiceDeletion(namespace, name string) {
+	pis := r.portManager.RemoveTargetForAllPorts(name, namespace)
+	for _, pi := range pis {
+		activatorControllerLog.Info("Cleaning up endpoints after service deletion",
+			"port", pi.Target.Port,
+			"listenerPort", pi.Listener.Port(),
+		)
+		_ = pi.Listener.Close()
+		for _, conn := range pi.Connections {
+			_ = conn.Close()
+		}
+	}
+}
+
+func (r *ActivatorReconciler) forwardEndpoint(ctx context.Context, ep *corev1.Endpoints, ports []corev1.ServicePort) error {
+	for _, port := range ports {
+		ds := r.portManager.RemoveTarget(ep.Name, ep.Namespace, int(port.Port))
+		if ds == nil {
+			continue
+		}
+
+		address, err := r.getEndpointAddress(ep, ports, &ds.Target)
+		if err != nil {
+			activatorControllerLog.Error(err, "Failed to get endpoint address")
+			continue
+		}
+
+		activatorControllerLog.Info("Forwarding traffic to real endpoint",
+			"port", port.Port,
+			"address", address,
+			"connections", len(ds.Connections),
+		)
+
+		for _, conn := range ds.Connections {
+			targetConn, err := net.Dial("tcp", address)
+			if err != nil {
+				activatorControllerLog.Error(err, "Failed to dial target")
+				continue
+			}
+			tunnel(conn, targetConn)
+		}
+		err = ds.Listener.Close()
+		if err != nil {
+			activatorControllerLog.Error(err, "Failed to close listener")
+			continue
+		}
+	}
+	return nil
+}
+
+func (r *ActivatorReconciler) getEndpointAddress(ep *corev1.Endpoints, ports []corev1.ServicePort, target *Target) (string, error) {
+	for _, port := range ports {
+		if int(port.Port) != target.Port {
+			continue
+		}
+
+		for _, subset := range ep.Subsets {
+			if len(subset.Addresses) == 0 {
+				continue
+			}
+			for _, p := range subset.Ports {
+				if port.TargetPort.Type == intstr.Int && int(p.Port) == int(port.TargetPort.IntVal) {
+					return fmt.Sprintf("%s:%d", subset.Addresses[0].IP, p.Port), nil
+				}
+			}
+		}
+	}
+	return "", fmt.Errorf("address not found for port %d", target.Port)
+}
+
+func (r *ActivatorReconciler) scaleUp(pi *PortInformation) {
+	ctx := context.Background()
+	activatorControllerLog.Info("Scaling up target Playground", "service", pi.Target.Name)
+
+	svc := &corev1.Service{}
+	key := types.NamespacedName{Namespace: pi.Target.Namespace, Name: pi.Target.Name}
+	if err := r.Get(ctx, key, svc); err != nil {
+		activatorControllerLog.Error(err, "Failed to get service")
+		return
+	}
+
+	name := svc.Annotations[llmazcoreapi.ModelActivatorAnnoKey]
+	if name == "" {
+		activatorControllerLog.Error(nil, "Scale annotation not found")
+		return
+	}
+
+	gvr := llmazcorev1alpha1.GroupVersion.WithResource(playgroundsResource)
+
+	activatorControllerLog.Info("Scaling up Playground", "playground", name)
+	retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+		playground, err := r.dynamicClient.Resource(gvr).Namespace(pi.Target.Namespace).Get(ctx, name, metav1.GetOptions{})
+		if err != nil {
+			return err
+		}
+		if err := unstructured.SetNestedField(playground.Object, int64(1), "spec", "replicas"); err != nil {
+			return err
+		}
+		_, err = r.dynamicClient.Resource(gvr).Namespace(pi.Target.Namespace).Update(ctx, playground, metav1.UpdateOptions{})
+		return err
+	})
+
+	if retryErr != nil {
+		activatorControllerLog.Error(retryErr, "Failed to scale Playground")
+		return
+	}
+
+	if err := r.waitUntilPlaygroundPodIsReady(ctx, name, pi.Target.Namespace); err != nil {
+		activatorControllerLog.Error(err, "Failed waiting for Playground pod")
+		return
+	}
+
+	// Restore the service selector
+	restoreSelectorIfNeededErr := r.restoreSelectorIfNeeded(ctx, svc)
+	if restoreSelectorIfNeededErr != nil {
+		activatorControllerLog.Error(restoreSelectorIfNeededErr, "Failed to restore service selector")
+		return
+	}
+}
+
+func (r *ActivatorReconciler) waitUntilPlaygroundPodIsReady(ctx context.Context, name, namespace string) error {
+	// The pod name is always playground name + "-0"
+	podName := name + "-0"
+	return wait.PollUntilContextTimeout(ctx, time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
+		pod := &corev1.Pod{}
+		if err := r.Get(ctx, types.NamespacedName{Namespace: namespace, Name: podName}, pod); err != nil {
+			if errors.IsNotFound(err) {
+				return false, nil
+			}
+			return false, err
+		}
+		for _, cond := range pod.Status.Conditions {
+			if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue {
+				return true, nil
+			}
+		}
+		return false, nil
+	})
+}
+
+func (r *ActivatorReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	hasActivatorAnnotation := func(obj client.Object) bool {
+		// Make sure the object has the activator annotation
+		annotations := obj.GetAnnotations()
+		_, ok := annotations[llmazcoreapi.ModelActivatorAnnoKey]
+		if ok {
+			activatorControllerLog.V(4).Info("Object has activator annotation", "object", obj.GetName())
+		}
+
+		return ok
+	}
+
+	return ctrl.NewControllerManagedBy(mgr).
+		Named(activatorControllerName).
+		For(&corev1.Service{}, builder.WithPredicates(predicate.Funcs{
+			CreateFunc: func(e event.CreateEvent) bool {
+				return hasActivatorAnnotation(e.Object)
+			},
+			UpdateFunc: func(e event.UpdateEvent) bool {
+				return hasActivatorAnnotation(e.ObjectNew) || hasActivatorAnnotation(e.ObjectOld)
+			},
+			DeleteFunc: func(e event.DeleteEvent) bool {
+				return hasActivatorAnnotation(e.Object)
+			},
+		})).
+		Watches(
+			&corev1.Endpoints{},
+			handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request {
+				return []reconcile.Request{
+					{NamespacedName: types.NamespacedName{
+						Namespace: obj.GetNamespace(),
+						Name:      obj.GetName(),
+					}},
+				}
+			}),
+			builder.WithPredicates(predicate.Funcs{
+				CreateFunc: func(e event.CreateEvent) bool {
+					return hasActivatorAnnotation(e.Object)
+				},
+				UpdateFunc: func(e event.UpdateEvent) bool {
+					return hasActivatorAnnotation(e.ObjectNew)
+				},
+				DeleteFunc: func(e event.DeleteEvent) bool {
+					return hasActivatorAnnotation(e.Object)
+				},
+			}),
+		).
+		Complete(r)
+}
+
 func tunnel(a, b net.Conn) {
 	go io.Copy(a, b)
 	go io.Copy(b, a)

From 19c57b73d080d68a351db95b1417d7d9d634eb5a Mon Sep 17 00:00:00 2001
From: X1aoZEOuO <nizefeng2002@outlook.com>
Date: Sun, 3 Aug 2025 11:10:00 +0800
Subject: [PATCH 4/7] feat: add entrypoint for activator.

Signed-off-by: X1aoZEOuO <nizefeng2002@outlook.com>
---
 cmd/main.go | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/cmd/main.go b/cmd/main.go
index 5372335f..9a8c4d4d 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -63,10 +63,14 @@ func main() {
 	var enableLeaderElection bool
 	var probeAddr string
 	var namespace string
+	var enableServerless bool
+	var podIP string
 
 	flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
 	flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
 	flag.StringVar(&namespace, "namespace", "llmaz-system", "The namespace of the llmaz to deploy")
+	flag.BoolVar(&enableServerless, "enable-serverless", false, "Enable the serverless feature")
+	flag.StringVar(&podIP, "pod-ip", "", "The pod IP of the llmaz controller manager")
 	flag.BoolVar(&enableLeaderElection, "leader-elect", false,
 		"Enable leader election for controller manager. "+
 			"Enabling this will ensure there is only one active controller manager.")
@@ -120,7 +124,7 @@ func main() {
 	// Cert won't be ready until manager starts, so start a goroutine here which
 	// will block until the cert is ready before setting up the controllers.
 	// Controllers who register after manager starts will start directly.
-	go setupControllers(mgr, certsReady)
+	go setupControllers(mgr, certsReady, enableServerless, podIP)
 
 	//+kubebuilder:scaffold:builder
 
@@ -140,7 +144,7 @@ func main() {
 	}
 }
 
-func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
+func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServerless bool, podIP string) {
 	// The controllers won't work until the webhooks are operating,
 	// and the webhook won't work until the certs are all in places.
 	setupLog.Info("waiting for the cert generation to complete")
@@ -176,6 +180,21 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
 		os.Exit(1)
 	}
 
+
+	if enableServerless {
+		dynamicClient, err := dynamic.NewForConfig(mgr.GetConfig())
+		if err != nil {
+			setupLog.Error(err, "unable to create dynamic client")
+			os.Exit(1)
+		}
+
+		activatorReconciler := inferencecontroller.NewActivatorReconciler(mgr, dynamicClient, podIP)
+		if err := activatorReconciler.SetupWithManager(mgr); err != nil {
+			setupLog.Error(err, "unable to create controller", "controller", "Activator")
+			os.Exit(1)
+		}
+	}
+
 	if os.Getenv("ENABLE_WEBHOOKS") != "false" {
 		if err := webhook.SetupOpenModelWebhook(mgr); err != nil {
 			setupLog.Error(err, "unable to create webhook", "webhook", "Model")

From ef39b503cc385090f22f37d01a72a6f731210a4b Mon Sep 17 00:00:00 2001
From: X1aoZEOuO <nizefeng2002@outlook.com>
Date: Sun, 24 Aug 2025 11:10:00 +0800
Subject: [PATCH 5/7] feat: add chart for activator.

Signed-off-by: X1aoZEOuO <nizefeng2002@outlook.com>
---
 chart/templates/deployment.yaml |  4 ++++
 chart/values.yaml               |  2 ++
 config/rbac/role.yaml           | 10 ++++++++++
 3 files changed, 16 insertions(+)

diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml
index 0be4fea7..aa58f15b 100644
--- a/chart/templates/deployment.yaml
+++ b/chart/templates/deployment.yaml
@@ -29,6 +29,10 @@ spec:
         env:
         - name: KUBERNETES_CLUSTER_DOMAIN
           value: {{ quote .Values.kubernetesClusterDomain }}
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
         image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag
           | default .Chart.AppVersion }}
         livenessProbe:
diff --git a/chart/values.yaml b/chart/values.yaml
index ddf55284..25f375c7 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -5,6 +5,8 @@ controllerManager:
     - --metrics-bind-address=:8443
     - --leader-elect
     - --namespace=llmaz-system
+    - --enable-serverless
+    - --pod-ip=$(POD_IP)
     containerSecurityContext:
       allowPrivilegeEscalation: false
       capabilities:
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
index 61c42763..ea9b95cc 100644
--- a/config/rbac/role.yaml
+++ b/config/rbac/role.yaml
@@ -11,6 +11,16 @@ rules:
   verbs:
   - get
   - list
+- apiGroups:
+  - ""
+  resources:
+  - endpoints
+  verbs:
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - ""
   resources:

From 67d5b73cd09b88624f83aecddb96581b9170e8fd Mon Sep 17 00:00:00 2001
From: X1aoZEOuO <nizefeng2002@outlook.com>
Date: Thu, 11 Sep 2025 11:10:00 +0800
Subject: [PATCH 6/7] fix: fix param in create service.

Signed-off-by: X1aoZEOuO <nizefeng2002@outlook.com>
---
 cmd/main.go                                   |  2 +-
 .../inference/activator_controller.go         | 40 +++++++++++++------
 .../inference/service_controller.go           |  2 +-
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/cmd/main.go b/cmd/main.go
index 9a8c4d4d..ddbb9fdd 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -26,6 +26,7 @@ import (
 
 	"k8s.io/apimachinery/pkg/runtime"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/client-go/dynamic"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/healthz"
@@ -180,7 +181,6 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServerle
 		os.Exit(1)
 	}
 
-
 	if enableServerless {
 		dynamicClient, err := dynamic.NewForConfig(mgr.GetConfig())
 		if err != nil {
diff --git a/pkg/controller/inference/activator_controller.go b/pkg/controller/inference/activator_controller.go
index c873a561..9516c21d 100644
--- a/pkg/controller/inference/activator_controller.go
+++ b/pkg/controller/inference/activator_controller.go
@@ -42,7 +42,6 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/predicate"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 
-	llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
 	llmazcorev1alpha1 "github.com/inftyai/llmaz/api/core/v1alpha1"
 )
 
@@ -93,6 +92,7 @@ func (r *ActivatorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
 		return ctrl.Result{}, err
 	}
 
+	// nolint:staticcheck
 	ep := &corev1.Endpoints{}
 	if err := r.Get(ctx, req.NamespacedName, ep); err != nil {
 		if errors.IsNotFound(err) {
@@ -113,8 +113,7 @@ func (r *ActivatorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
 	if len(ep.Subsets) == 0 {
 		// If the endpoints are empty, inject the activator IP
 		return ctrl.Result{}, r.injectEndpoint(ctx, ep, svc, ports)
-	} else if ep.Subsets[0].Addresses != nil &&
-		len(ep.Subsets[0].Addresses) > 0 &&
+	} else if len(ep.Subsets[0].Addresses) > 0 &&
 		ep.Subsets[0].Addresses[0].IP != r.ip {
 		// If the endpoints are not empty and not the activator IP, forward the traffic
 		return ctrl.Result{}, r.forwardEndpoint(ctx, ep, ports)
@@ -127,7 +126,7 @@ func (r *ActivatorReconciler) needInject(svc *corev1.Service) ([]corev1.ServiceP
 	if svc == nil || svc.Annotations == nil {
 		return nil, false
 	}
-	if _, ok := svc.Annotations[llmazcoreapi.ModelActivatorAnnoKey]; !ok {
+	if _, ok := svc.Annotations[llmazcorev1alpha1.ModelActivatorAnnoKey]; !ok {
 		return nil, false
 	}
 	if len(svc.Spec.Ports) == 0 || svc.Spec.Type != corev1.ServiceTypeClusterIP {
@@ -148,7 +147,7 @@ func (r *ActivatorReconciler) needInject(svc *corev1.Service) ([]corev1.ServiceP
 }
 
 func (r *ActivatorReconciler) restoreSelectorIfNeeded(ctx context.Context, svc *corev1.Service) error {
-	selectorStr := svc.Annotations[llmazcoreapi.CachedModelActivatorAnnoKey]
+	selectorStr := svc.Annotations[llmazcorev1alpha1.CachedModelActivatorAnnoKey]
 	if selectorStr == "" {
 		return nil
 	}
@@ -160,7 +159,7 @@ func (r *ActivatorReconciler) restoreSelectorIfNeeded(ctx context.Context, svc *
 	}
 
 	updatedSvc := svc.DeepCopy()
-	delete(updatedSvc.Annotations, llmazcoreapi.CachedModelActivatorAnnoKey)
+	delete(updatedSvc.Annotations, llmazcorev1alpha1.CachedModelActivatorAnnoKey)
 	updatedSvc.Spec.Selector = sel
 
 	if err := r.Update(ctx, updatedSvc); err != nil {
@@ -172,7 +171,9 @@ func (r *ActivatorReconciler) restoreSelectorIfNeeded(ctx context.Context, svc *
 	return nil
 }
 
+// nolint:staticcheck
 func (r *ActivatorReconciler) injectEndpoint(ctx context.Context, ep *corev1.Endpoints, svc *corev1.Service, ports []corev1.ServicePort) error {
+	// nolint:staticcheck
 	subsets := make([]corev1.EndpointSubset, 0, len(ports))
 	for _, port := range ports {
 		ds, err := r.portManager.AddTarget(ep.Name, ep.Namespace, int(port.Port))
@@ -185,6 +186,7 @@ func (r *ActivatorReconciler) injectEndpoint(ctx context.Context, ep *corev1.End
 			"listenerPort", ds.Listener.Port(),
 		)
 
+		// nolint:staticcheck
 		subsets = append(subsets, corev1.EndpointSubset{
 			Addresses: []corev1.EndpointAddress{{IP: r.ip}},
 			Ports: []corev1.EndpointPort{{
@@ -207,7 +209,7 @@ func (r *ActivatorReconciler) injectEndpoint(ctx context.Context, ep *corev1.End
 	if updatedSvc.Annotations == nil {
 		updatedSvc.Annotations = make(map[string]string)
 	}
-	updatedSvc.Annotations[llmazcoreapi.CachedModelActivatorAnnoKey] = string(selectorBytes)
+	updatedSvc.Annotations[llmazcorev1alpha1.CachedModelActivatorAnnoKey] = string(selectorBytes)
 	updatedSvc.Spec.Selector = nil
 	return r.Update(ctx, updatedSvc)
 }
@@ -226,7 +228,8 @@ func (r *ActivatorReconciler) handleServiceDeletion(namespace, name string) {
 	}
 }
 
-func (r *ActivatorReconciler) forwardEndpoint(ctx context.Context, ep *corev1.Endpoints, ports []corev1.ServicePort) error {
+// nolint:staticcheck
+func (r *ActivatorReconciler) forwardEndpoint(_ context.Context, ep *corev1.Endpoints, ports []corev1.ServicePort) error {
 	for _, port := range ports {
 		ds := r.portManager.RemoveTarget(ep.Name, ep.Namespace, int(port.Port))
 		if ds == nil {
@@ -256,12 +259,13 @@ func (r *ActivatorReconciler) forwardEndpoint(ctx context.Context, ep *corev1.En
 		err = ds.Listener.Close()
 		if err != nil {
 			activatorControllerLog.Error(err, "Failed to close listener")
-			continue
+			return err
 		}
 	}
 	return nil
 }
 
+// nolint:staticcheck
 func (r *ActivatorReconciler) getEndpointAddress(ep *corev1.Endpoints, ports []corev1.ServicePort, target *Target) (string, error) {
 	for _, port := range ports {
 		if int(port.Port) != target.Port {
@@ -293,7 +297,7 @@ func (r *ActivatorReconciler) scaleUp(pi *PortInformation) {
 		return
 	}
 
-	name := svc.Annotations[llmazcoreapi.ModelActivatorAnnoKey]
+	name := svc.Annotations[llmazcorev1alpha1.ModelActivatorAnnoKey]
 	if name == "" {
 		activatorControllerLog.Error(nil, "Scale annotation not found")
 		return
@@ -356,7 +360,7 @@ func (r *ActivatorReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	hasActivatorAnnotation := func(obj client.Object) bool {
 		// Make sure the object has the activator annotation
 		annotations := obj.GetAnnotations()
-		_, ok := annotations[llmazcoreapi.ModelActivatorAnnoKey]
+		_, ok := annotations[llmazcorev1alpha1.ModelActivatorAnnoKey]
 		if ok {
 			activatorControllerLog.V(4).Info("Object has activator annotation", "object", obj.GetName())
 		}
@@ -378,6 +382,7 @@ func (r *ActivatorReconciler) SetupWithManager(mgr ctrl.Manager) error {
 			},
 		})).
 		Watches(
+			// nolint:staticcheck
 			&corev1.Endpoints{},
 			handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request {
 				return []reconcile.Request{
@@ -403,8 +408,17 @@ func (r *ActivatorReconciler) SetupWithManager(mgr ctrl.Manager) error {
 }
 
 func tunnel(a, b net.Conn) {
-	go io.Copy(a, b)
-	go io.Copy(b, a)
+	go func() {
+		if _, err := io.Copy(a, b); err != nil {
+			activatorControllerLog.Error(err, "Failed to copy")
+		}
+	}()
+
+	go func() {
+		if _, err := io.Copy(b, a); err != nil {
+			activatorControllerLog.Error(err, "Failed to copy")
+		}
+	}()
 }
 
 type Listener interface {
diff --git a/pkg/controller/inference/service_controller.go b/pkg/controller/inference/service_controller.go
index eb2bddd6..87126770 100644
--- a/pkg/controller/inference/service_controller.go
+++ b/pkg/controller/inference/service_controller.go
@@ -131,7 +131,7 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
 	}
 
 	// Create a service for the leader pods of the lws for loadbalancing.
-	if err := CreateServiceIfNotExists(ctx, r.Client, r.Scheme, service); err != nil {
+	if err := CreateServiceIfNotExists(ctx, r.Client, r.Scheme, service, models); err != nil {
 		return ctrl.Result{}, err
 	}
 

From 43f79d3c906d9fa2c5007c1d3307994178d403e8 Mon Sep 17 00:00:00 2001
From: X1aoZEOuO <nizefeng2002@outlook.com>
Date: Tue, 28 Oct 2025 19:06:00 +0800
Subject: [PATCH 7/7] fix: update activator label and var name.

Signed-off-by: X1aoZEOuO <nizefeng2002@outlook.com>
---
 api/core/v1alpha1/model_types.go              |   8 +-
 chart/values.yaml                             |   2 +-
 cmd/main.go                                   |  12 +-
 .../inference.llmaz.io_backendruntimes.yaml   |  50 +++++++-
 .../bases/inference.llmaz.io_playgrounds.yaml |  44 ++++++-
 .../bases/inference.llmaz.io_services.yaml    | 112 ++++++++++--------
 .../inference/activator_controller.go         |   2 +-
 7 files changed, 166 insertions(+), 64 deletions(-)

diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go
index 284b0b59..982959da 100644
--- a/api/core/v1alpha1/model_types.go
+++ b/api/core/v1alpha1/model_types.go
@@ -35,10 +35,10 @@ const (
 	// Once either of them qualified, we'll expose this as a field in Model.
 	ModelPreheatAnnoKey = "llmaz.io/model-preheat"
 
-	// ModelActivatorAnnotationKey is used to indicate whether the model is activated by the activator.
-	ModelActivatorAnnoKey = "activator.llmaz.io/playground"
-	// CachedModelActivatorAnnotationKey is used to cache the activator info of the model.
-	CachedModelActivatorAnnoKey = "cached.activator.llmaz.io"
+	// ModelActivatorAnnoKey is used to indicate the model name activated by the activator.
+	ModelActivatorAnnoKey = "activator.llmaz.io/model-name"
+	// CachedModelActivatorAnnoKey is used to cache the activator state of the model.
+	CachedModelActivatorAnnoKey = "activator.llmaz.io/cached-state"
 
 	HUGGING_FACE = "Huggingface"
 	MODEL_SCOPE  = "ModelScope"
diff --git a/chart/values.yaml b/chart/values.yaml
index 25f375c7..60f76554 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -5,7 +5,7 @@ controllerManager:
     - --metrics-bind-address=:8443
     - --leader-elect
     - --namespace=llmaz-system
-    - --enable-serverless
+    - --enable-service-activator
     - --pod-ip=$(POD_IP)
     containerSecurityContext:
       allowPrivilegeEscalation: false
diff --git a/cmd/main.go b/cmd/main.go
index ddbb9fdd..17c86008 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -64,14 +64,14 @@ func main() {
 	var enableLeaderElection bool
 	var probeAddr string
 	var namespace string
-	var enableServerless bool
+	var enableServiceActivator bool
 	var podIP string
 
 	flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
 	flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
 	flag.StringVar(&namespace, "namespace", "llmaz-system", "The namespace of the llmaz to deploy")
-	flag.BoolVar(&enableServerless, "enable-serverless", false, "Enable the serverless feature")
-	flag.StringVar(&podIP, "pod-ip", "", "The pod IP of the llmaz controller manager")
+	flag.BoolVar(&enableServiceActivator, "enable-service-activator", false, "Enable the service activator feature. This is an experimental feature.")
+	flag.StringVar(&podIP, "pod-ip", "", "The pod IP of the llmaz controller manager. Only used when service activator is enabled.")
 	flag.BoolVar(&enableLeaderElection, "leader-elect", false,
 		"Enable leader election for controller manager. "+
 			"Enabling this will ensure there is only one active controller manager.")
@@ -125,7 +125,7 @@ func main() {
 	// Cert won't be ready until manager starts, so start a goroutine here which
 	// will block until the cert is ready before setting up the controllers.
 	// Controllers who register after manager starts will start directly.
-	go setupControllers(mgr, certsReady, enableServerless, podIP)
+	go setupControllers(mgr, certsReady, enableServiceActivator, podIP)
 
 	//+kubebuilder:scaffold:builder
 
@@ -145,7 +145,7 @@ func main() {
 	}
 }
 
-func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServerless bool, podIP string) {
+func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServiceActivator bool, podIP string) {
 	// The controllers won't work until the webhooks are operating,
 	// and the webhook won't work until the certs are all in places.
 	setupLog.Info("waiting for the cert generation to complete")
@@ -181,7 +181,7 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServerle
 		os.Exit(1)
 	}
 
-	if enableServerless {
+	if enableServiceActivator {
 		dynamicClient, err := dynamic.NewForConfig(mgr.GetConfig())
 		if err != nil {
 			setupLog.Error(err, "unable to create dynamic client")
diff --git a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
index a626c0af..ea03fd13 100644
--- a/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
+++ b/config/crd/bases/inference.llmaz.io_backendruntimes.yaml
@@ -388,6 +388,12 @@ spec:
                         - port
                         type: object
                     type: object
+                  stopSignal:
+                    description: |-
+                      StopSignal defines which signal will be sent to a container when it is being stopped.
+                      If not specified, the default is defined by the container runtime in use.
+                      StopSignal can only be set for Pods with a non-empty .spec.os.name
+                    type: string
                 type: object
               livenessProbe:
                 description: |-
@@ -770,7 +776,9 @@ spec:
                                     policies:
                                       description: |-
                                         policies is a list of potential scaling polices which can be used during scaling.
-                                        At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
+                                        If not set, use the default values:
+                                        - For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
+                                        - For scale down: allow all pods to be removed in a 15s window.
                                       items:
                                         description: HPAScalingPolicy is a single
                                           policy which must hold true for a specified
@@ -814,6 +822,24 @@ spec:
                                         - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
                                       format: int32
                                       type: integer
+                                    tolerance:
+                                      anyOf:
+                                      - type: integer
+                                      - type: string
+                                      description: |-
+                                        tolerance is the tolerance on the ratio between the current and desired
+                                        metric value under which no updates are made to the desired number of
+                                        replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
+                                        set, the default cluster-wide tolerance is applied (by default 10%).
+
+                                        For example, if autoscaling is configured with a memory consumption target of 100Mi,
+                                        and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
+                                        triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
+
+                                        This is an alpha field and requires enabling the HPAConfigurableTolerance
+                                        feature gate.
+                                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                      x-kubernetes-int-or-string: true
                                   type: object
                                 scaleUp:
                                   description: |-
@@ -826,7 +852,9 @@ spec:
                                     policies:
                                       description: |-
                                         policies is a list of potential scaling polices which can be used during scaling.
-                                        At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
+                                        If not set, use the default values:
+                                        - For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
+                                        - For scale down: allow all pods to be removed in a 15s window.
                                       items:
                                         description: HPAScalingPolicy is a single
                                           policy which must hold true for a specified
@@ -870,6 +898,24 @@ spec:
                                         - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
                                       format: int32
                                       type: integer
+                                    tolerance:
+                                      anyOf:
+                                      - type: integer
+                                      - type: string
+                                      description: |-
+                                        tolerance is the tolerance on the ratio between the current and desired
+                                        metric value under which no updates are made to the desired number of
+                                        replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
+                                        set, the default cluster-wide tolerance is applied (by default 10%).
+
+                                        For example, if autoscaling is configured with a memory consumption target of 100Mi,
+                                        and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
+                                        triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
+
+                                        This is an alpha field and requires enabling the HPAConfigurableTolerance
+                                        feature gate.
+                                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                      x-kubernetes-int-or-string: true
                                   type: object
                               type: object
                             metrics:
diff --git a/config/crd/bases/inference.llmaz.io_playgrounds.yaml b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
index d64ae9d9..44b38958 100644
--- a/config/crd/bases/inference.llmaz.io_playgrounds.yaml
+++ b/config/crd/bases/inference.llmaz.io_playgrounds.yaml
@@ -295,7 +295,9 @@ spec:
                                   policies:
                                     description: |-
                                       policies is a list of potential scaling polices which can be used during scaling.
-                                      At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
+                                      If not set, use the default values:
+                                      - For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
+                                      - For scale down: allow all pods to be removed in a 15s window.
                                     items:
                                       description: HPAScalingPolicy is a single policy
                                         which must hold true for a specified past
@@ -339,6 +341,24 @@ spec:
                                       - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
                                     format: int32
                                     type: integer
+                                  tolerance:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    description: |-
+                                      tolerance is the tolerance on the ratio between the current and desired
+                                      metric value under which no updates are made to the desired number of
+                                      replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
+                                      set, the default cluster-wide tolerance is applied (by default 10%).
+
+                                      For example, if autoscaling is configured with a memory consumption target of 100Mi,
+                                      and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
+                                      triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
+
+                                      This is an alpha field and requires enabling the HPAConfigurableTolerance
+                                      feature gate.
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
                                 type: object
                               scaleUp:
                                 description: |-
@@ -351,7 +371,9 @@ spec:
                                   policies:
                                     description: |-
                                       policies is a list of potential scaling polices which can be used during scaling.
-                                      At least one policy must be specified, otherwise the HPAScalingRules will be discarded as invalid
+                                      If not set, use the default values:
+                                      - For scale up: allow doubling the number of pods, or an absolute change of 4 pods in a 15s window.
+                                      - For scale down: allow all pods to be removed in a 15s window.
                                     items:
                                       description: HPAScalingPolicy is a single policy
                                         which must hold true for a specified past
@@ -395,6 +417,24 @@ spec:
                                       - For scale down: 300 (i.e. the stabilization window is 300 seconds long).
                                     format: int32
                                     type: integer
+                                  tolerance:
+                                    anyOf:
+                                    - type: integer
+                                    - type: string
+                                    description: |-
+                                      tolerance is the tolerance on the ratio between the current and desired
+                                      metric value under which no updates are made to the desired number of
+                                      replicas (e.g. 0.01 for 1%). Must be greater than or equal to zero. If not
+                                      set, the default cluster-wide tolerance is applied (by default 10%).
+
+                                      For example, if autoscaling is configured with a memory consumption target of 100Mi,
+                                      and scale-down and scale-up tolerances of 5% and 1% respectively, scaling will be
+                                      triggered when the actual consumption falls below 95Mi or exceeds 101Mi.
+
+                                      This is an alpha field and requires enabling the HPAConfigurableTolerance
+                                      feature gate.
+                                    pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                                    x-kubernetes-int-or-string: true
                                 type: object
                             type: object
                           metrics:
diff --git a/config/crd/bases/inference.llmaz.io_services.yaml b/config/crd/bases/inference.llmaz.io_services.yaml
index 61fb763a..13926b0b 100644
--- a/config/crd/bases/inference.llmaz.io_services.yaml
+++ b/config/crd/bases/inference.llmaz.io_services.yaml
@@ -498,7 +498,6 @@ spec:
                                                 pod labels will be ignored. The default value is empty.
                                                 The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                                 Also, matchLabelKeys cannot be set when labelSelector isn't set.
-                                                This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                               items:
                                                 type: string
                                               type: array
@@ -513,7 +512,6 @@ spec:
                                                 pod labels will be ignored. The default value is empty.
                                                 The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                                 Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
-                                                This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                               items:
                                                 type: string
                                               type: array
@@ -682,7 +680,6 @@ spec:
                                             pod labels will be ignored. The default value is empty.
                                             The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                             Also, matchLabelKeys cannot be set when labelSelector isn't set.
-                                            This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                           items:
                                             type: string
                                           type: array
@@ -697,7 +694,6 @@ spec:
                                             pod labels will be ignored. The default value is empty.
                                             The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                             Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
-                                            This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                           items:
                                             type: string
                                           type: array
@@ -864,7 +860,6 @@ spec:
                                                 pod labels will be ignored. The default value is empty.
                                                 The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                                 Also, matchLabelKeys cannot be set when labelSelector isn't set.
-                                                This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                               items:
                                                 type: string
                                               type: array
@@ -879,7 +874,6 @@ spec:
                                                 pod labels will be ignored. The default value is empty.
                                                 The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                                 Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
-                                                This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                               items:
                                                 type: string
                                               type: array
@@ -1048,7 +1042,6 @@ spec:
                                             pod labels will be ignored. The default value is empty.
                                             The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                             Also, matchLabelKeys cannot be set when labelSelector isn't set.
-                                            This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                           items:
                                             type: string
                                           type: array
@@ -1063,7 +1056,6 @@ spec:
                                             pod labels will be ignored. The default value is empty.
                                             The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                             Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
-                                            This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                           items:
                                             type: string
                                           type: array
@@ -1327,7 +1319,7 @@ spec:
                                     Cannot be updated.
                                   items:
                                     description: EnvFromSource represents the source
-                                      of a set of ConfigMaps
+                                      of a set of ConfigMaps or Secrets
                                     properties:
                                       configMapRef:
                                         description: The ConfigMap to select from
@@ -1348,9 +1340,9 @@ spec:
                                         type: object
                                         x-kubernetes-map-type: atomic
                                       prefix:
-                                        description: An optional identifier to prepend
-                                          to each key in the ConfigMap. Must be a
-                                          C_IDENTIFIER.
+                                        description: Optional text to prepend to the
+                                          name of each environment variable. Must
+                                          be a C_IDENTIFIER.
                                         type: string
                                       secretRef:
                                         description: The Secret to select from
@@ -1622,6 +1614,12 @@ spec:
                                           - port
                                           type: object
                                       type: object
+                                    stopSignal:
+                                      description: |-
+                                        StopSignal defines which signal will be sent to a container when it is being stopped.
+                                        If not specified, the default is defined by the container runtime in use.
+                                        StopSignal can only be set for Pods with a non-empty .spec.os.name
+                                      type: string
                                   type: object
                                 livenessProbe:
                                   description: |-
@@ -2853,7 +2851,7 @@ spec:
                                     Cannot be updated.
                                   items:
                                     description: EnvFromSource represents the source
-                                      of a set of ConfigMaps
+                                      of a set of ConfigMaps or Secrets
                                     properties:
                                       configMapRef:
                                         description: The ConfigMap to select from
@@ -2874,9 +2872,9 @@ spec:
                                         type: object
                                         x-kubernetes-map-type: atomic
                                       prefix:
-                                        description: An optional identifier to prepend
-                                          to each key in the ConfigMap. Must be a
-                                          C_IDENTIFIER.
+                                        description: Optional text to prepend to the
+                                          name of each environment variable. Must
+                                          be a C_IDENTIFIER.
                                         type: string
                                       secretRef:
                                         description: The Secret to select from
@@ -3145,6 +3143,12 @@ spec:
                                           - port
                                           type: object
                                       type: object
+                                    stopSignal:
+                                      description: |-
+                                        StopSignal defines which signal will be sent to a container when it is being stopped.
+                                        If not specified, the default is defined by the container runtime in use.
+                                        StopSignal can only be set for Pods with a non-empty .spec.os.name
+                                      type: string
                                   type: object
                                 livenessProbe:
                                   description: Probes are not allowed for ephemeral
@@ -4200,7 +4204,7 @@ spec:
                               Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes.
                               The resourceRequirements of an init container are taken into account during scheduling
                               by finding the highest request/limit for each resource type, and then using the max of
-                              of that value or the sum of the normal containers. Limits are applied to init containers
+                              that value or the sum of the normal containers. Limits are applied to init containers
                               in a similar fashion.
                               Init containers cannot currently be added or removed.
                               Cannot be updated.
@@ -4376,7 +4380,7 @@ spec:
                                     Cannot be updated.
                                   items:
                                     description: EnvFromSource represents the source
-                                      of a set of ConfigMaps
+                                      of a set of ConfigMaps or Secrets
                                     properties:
                                       configMapRef:
                                         description: The ConfigMap to select from
@@ -4397,9 +4401,9 @@ spec:
                                         type: object
                                         x-kubernetes-map-type: atomic
                                       prefix:
-                                        description: An optional identifier to prepend
-                                          to each key in the ConfigMap. Must be a
-                                          C_IDENTIFIER.
+                                        description: Optional text to prepend to the
+                                          name of each environment variable. Must
+                                          be a C_IDENTIFIER.
                                         type: string
                                       secretRef:
                                         description: The Secret to select from
@@ -4671,6 +4675,12 @@ spec:
                                           - port
                                           type: object
                                       type: object
+                                    stopSignal:
+                                      description: |-
+                                        StopSignal defines which signal will be sent to a container when it is being stopped.
+                                        If not specified, the default is defined by the container runtime in use.
+                                        StopSignal can only be set for Pods with a non-empty .spec.os.name
+                                      type: string
                                   type: object
                                 livenessProbe:
                                   description: |-
@@ -6391,7 +6401,6 @@ spec:
                                     - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations.
 
                                     If this value is nil, the behavior is equivalent to the Honor policy.
-                                    This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag.
                                   type: string
                                 nodeTaintsPolicy:
                                   description: |-
@@ -6402,7 +6411,6 @@ spec:
                                     - Ignore: node taints are ignored. All nodes are included.
 
                                     If this value is nil, the behavior is equivalent to the Ignore policy.
-                                    This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag.
                                   type: string
                                 topologyKey:
                                   description: |-
@@ -7404,7 +7412,7 @@ spec:
                                     The types of objects that may be mounted by this volume are defined by the container runtime implementation on a host machine and at minimum must include all valid types supported by the container image field.
                                     The OCI object gets mounted in a single directory (spec.containers[*].volumeMounts.mountPath) by merging the manifest layers in the same way as for container images.
                                     The volume will be mounted read-only (ro) and non-executable files (noexec).
-                                    Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath).
+                                    Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath) before 1.33.
                                     The field spec.securityContext.fsGroupChangePolicy has no effect on this volume type.
                                   properties:
                                     pullPolicy:
@@ -8664,7 +8672,6 @@ spec:
                                                 pod labels will be ignored. The default value is empty.
                                                 The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                                 Also, matchLabelKeys cannot be set when labelSelector isn't set.
-                                                This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                               items:
                                                 type: string
                                               type: array
@@ -8679,7 +8686,6 @@ spec:
                                                 pod labels will be ignored. The default value is empty.
                                                 The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                                 Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
-                                                This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                               items:
                                                 type: string
                                               type: array
@@ -8848,7 +8854,6 @@ spec:
                                             pod labels will be ignored. The default value is empty.
                                             The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                             Also, matchLabelKeys cannot be set when labelSelector isn't set.
-                                            This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                           items:
                                             type: string
                                           type: array
@@ -8863,7 +8868,6 @@ spec:
                                             pod labels will be ignored. The default value is empty.
                                             The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                             Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
-                                            This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                           items:
                                             type: string
                                           type: array
@@ -9030,7 +9034,6 @@ spec:
                                                 pod labels will be ignored. The default value is empty.
                                                 The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                                 Also, matchLabelKeys cannot be set when labelSelector isn't set.
-                                                This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                               items:
                                                 type: string
                                               type: array
@@ -9045,7 +9048,6 @@ spec:
                                                 pod labels will be ignored. The default value is empty.
                                                 The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                                 Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
-                                                This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                               items:
                                                 type: string
                                               type: array
@@ -9214,7 +9216,6 @@ spec:
                                             pod labels will be ignored. The default value is empty.
                                             The same key is forbidden to exist in both matchLabelKeys and labelSelector.
                                             Also, matchLabelKeys cannot be set when labelSelector isn't set.
-                                            This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                           items:
                                             type: string
                                           type: array
@@ -9229,7 +9230,6 @@ spec:
                                             pod labels will be ignored. The default value is empty.
                                             The same key is forbidden to exist in both mismatchLabelKeys and labelSelector.
                                             Also, mismatchLabelKeys cannot be set when labelSelector isn't set.
-                                            This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default).
                                           items:
                                             type: string
                                           type: array
@@ -9493,7 +9493,7 @@ spec:
                                     Cannot be updated.
                                   items:
                                     description: EnvFromSource represents the source
-                                      of a set of ConfigMaps
+                                      of a set of ConfigMaps or Secrets
                                     properties:
                                       configMapRef:
                                         description: The ConfigMap to select from
@@ -9514,9 +9514,9 @@ spec:
                                         type: object
                                         x-kubernetes-map-type: atomic
                                       prefix:
-                                        description: An optional identifier to prepend
-                                          to each key in the ConfigMap. Must be a
-                                          C_IDENTIFIER.
+                                        description: Optional text to prepend to the
+                                          name of each environment variable. Must
+                                          be a C_IDENTIFIER.
                                         type: string
                                       secretRef:
                                         description: The Secret to select from
@@ -9788,6 +9788,12 @@ spec:
                                           - port
                                           type: object
                                       type: object
+                                    stopSignal:
+                                      description: |-
+                                        StopSignal defines which signal will be sent to a container when it is being stopped.
+                                        If not specified, the default is defined by the container runtime in use.
+                                        StopSignal can only be set for Pods with a non-empty .spec.os.name
+                                      type: string
                                   type: object
                                 livenessProbe:
                                   description: |-
@@ -11019,7 +11025,7 @@ spec:
                                     Cannot be updated.
                                   items:
                                     description: EnvFromSource represents the source
-                                      of a set of ConfigMaps
+                                      of a set of ConfigMaps or Secrets
                                     properties:
                                       configMapRef:
                                         description: The ConfigMap to select from
@@ -11040,9 +11046,9 @@ spec:
                                         type: object
                                         x-kubernetes-map-type: atomic
                                       prefix:
-                                        description: An optional identifier to prepend
-                                          to each key in the ConfigMap. Must be a
-                                          C_IDENTIFIER.
+                                        description: Optional text to prepend to the
+                                          name of each environment variable. Must
+                                          be a C_IDENTIFIER.
                                         type: string
                                       secretRef:
                                         description: The Secret to select from
@@ -11311,6 +11317,12 @@ spec:
                                           - port
                                           type: object
                                       type: object
+                                    stopSignal:
+                                      description: |-
+                                        StopSignal defines which signal will be sent to a container when it is being stopped.
+                                        If not specified, the default is defined by the container runtime in use.
+                                        StopSignal can only be set for Pods with a non-empty .spec.os.name
+                                      type: string
                                   type: object
                                 livenessProbe:
                                   description: Probes are not allowed for ephemeral
@@ -12366,7 +12378,7 @@ spec:
                               Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes.
                               The resourceRequirements of an init container are taken into account during scheduling
                               by finding the highest request/limit for each resource type, and then using the max of
-                              of that value or the sum of the normal containers. Limits are applied to init containers
+                              that value or the sum of the normal containers. Limits are applied to init containers
                               in a similar fashion.
                               Init containers cannot currently be added or removed.
                               Cannot be updated.
@@ -12542,7 +12554,7 @@ spec:
                                     Cannot be updated.
                                   items:
                                     description: EnvFromSource represents the source
-                                      of a set of ConfigMaps
+                                      of a set of ConfigMaps or Secrets
                                     properties:
                                       configMapRef:
                                         description: The ConfigMap to select from
@@ -12563,9 +12575,9 @@ spec:
                                         type: object
                                         x-kubernetes-map-type: atomic
                                       prefix:
-                                        description: An optional identifier to prepend
-                                          to each key in the ConfigMap. Must be a
-                                          C_IDENTIFIER.
+                                        description: Optional text to prepend to the
+                                          name of each environment variable. Must
+                                          be a C_IDENTIFIER.
                                         type: string
                                       secretRef:
                                         description: The Secret to select from
@@ -12837,6 +12849,12 @@ spec:
                                           - port
                                           type: object
                                       type: object
+                                    stopSignal:
+                                      description: |-
+                                        StopSignal defines which signal will be sent to a container when it is being stopped.
+                                        If not specified, the default is defined by the container runtime in use.
+                                        StopSignal can only be set for Pods with a non-empty .spec.os.name
+                                      type: string
                                   type: object
                                 livenessProbe:
                                   description: |-
@@ -14557,7 +14575,6 @@ spec:
                                     - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations.
 
                                     If this value is nil, the behavior is equivalent to the Honor policy.
-                                    This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag.
                                   type: string
                                 nodeTaintsPolicy:
                                   description: |-
@@ -14568,7 +14585,6 @@ spec:
                                     - Ignore: node taints are ignored. All nodes are included.
 
                                     If this value is nil, the behavior is equivalent to the Ignore policy.
-                                    This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag.
                                   type: string
                                 topologyKey:
                                   description: |-
@@ -15570,7 +15586,7 @@ spec:
                                     The types of objects that may be mounted by this volume are defined by the container runtime implementation on a host machine and at minimum must include all valid types supported by the container image field.
                                     The OCI object gets mounted in a single directory (spec.containers[*].volumeMounts.mountPath) by merging the manifest layers in the same way as for container images.
                                     The volume will be mounted read-only (ro) and non-executable files (noexec).
-                                    Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath).
+                                    Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath) before 1.33.
                                     The field spec.securityContext.fsGroupChangePolicy has no effect on this volume type.
                                   properties:
                                     pullPolicy:
diff --git a/pkg/controller/inference/activator_controller.go b/pkg/controller/inference/activator_controller.go
index 9516c21d..78bcc212 100644
--- a/pkg/controller/inference/activator_controller.go
+++ b/pkg/controller/inference/activator_controller.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2024 The InftyAI Team.
+Copyright 2025 The InftyAI Team.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.