diff --git a/assets/performanceprofile/scripts/dynamic-memory-enforcement.py b/assets/performanceprofile/scripts/dynamic-memory-enforcement.py new file mode 100644 index 0000000000..22c9c642a5 --- /dev/null +++ b/assets/performanceprofile/scripts/dynamic-memory-enforcement.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# Dynamic Memory Enforcement Service +# This service is used to protect the cluster from memory exhaustion. +# It is used to set the memory.max value for the kubepods-burstable.slice and kubepods-besteffort.slice. + +from datetime import datetime +import os +import os.path + +ROOT="/sys/fs/cgroup/kubepods.slice" +ROOT_MAX=os.path.join(ROOT, "memory.max") + +BESTEFFORT_MAX=os.path.join(ROOT, "kubepods-besteffort.slice", "memory.max") + +BURSTABLE_CURRENT=os.path.join(ROOT, "kubepods-burstable.slice", "memory.current") +BURSTABLE_MAX=os.path.join(ROOT, "kubepods-burstable.slice", "memory.max") + +def guaranteed_max(): + sum = 0 + + for d in os.listdir(ROOT): + if not d.startswith("kubepods-pod"): + continue + try: + sum += int(open(os.path.join(ROOT, d, "memory.max")).read().strip()) + except IOError as e: + continue + + return sum + +if __name__ == "__main__": + root_max = int(open(ROOT_MAX).read().strip()) + burstable_current = int(open(BURSTABLE_CURRENT).read().strip()) + guaranteed_current = guaranteed_max() + + burstable_max = root_max - guaranteed_current + besteffort_max = burstable_max - burstable_current + + now = datetime.now().isoformat(' ', timespec='microseconds') + print(f"\n{now}\n") + print(f"root: {root_max}\nburstable: {burstable_current}\nguaranteed: {guaranteed_current}\n\nburstable max: {burstable_max}\nbest-effort max: {besteffort_max}\n") + + with open(BURSTABLE_MAX, "w") as f: + f.write(str(burstable_max)) + + with open(BESTEFFORT_MAX, "w") as f: + f.write(str(besteffort_max)) + diff --git a/pkg/apis/performanceprofile/v2/performanceprofile_types.go b/pkg/apis/performanceprofile/v2/performanceprofile_types.go index d95a855f0c..821c6935ea 100644 --- a/pkg/apis/performanceprofile/v2/performanceprofile_types.go +++ b/pkg/apis/performanceprofile/v2/performanceprofile_types.go @@ -34,6 +34,10 @@ const PerformanceProfileEnablePhysicalRpsAnnotation = "performance.openshift.io/ // that ignores the removal of all RPS settings when realtime workload hint is explicitly set to false. const PerformanceProfileEnableRpsAnnotation = "performance.openshift.io/enable-rps" +// PerformanceProfileEnforceReservedMemoryAnnotation enables dynamic memory enforcement +// service that manages memory limits for kubepods slices. +const PerformanceProfileEnforceReservedMemoryAnnotation = "enforce-reserved-memory.experimental" + // PerformanceProfileSpec defines the desired state of PerformanceProfile. type PerformanceProfileSpec struct { // CPU defines a set of CPU related parameters. diff --git a/pkg/performanceprofile/controller/performanceprofile/components/machineconfig/machineconfig.go b/pkg/performanceprofile/controller/performanceprofile/components/machineconfig/machineconfig.go index d5b2559ac2..a6d116ff85 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/machineconfig/machineconfig.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/machineconfig/machineconfig.go @@ -75,7 +75,8 @@ const ( ovsDynamicPinningTriggerFile = "ovs-enable-dynamic-cpu-affinity" ovsDynamicPinningTriggerHostFile = "/var/lib/ovn-ic/etc/enable_dynamic_cpu_affinity" - cpusetConfigure = "cpuset-configure" + cpusetConfigure = "cpuset-configure" + dynamicMemoryEnforcement = "dynamic-memory-enforcement" ) const ( @@ -98,6 +99,7 @@ const ( systemdServiceCrio = "crio.service" systemdServiceTunedOneShot = "ocp-tuned-one-shot.service" systemdServiceTypeOneshot = "oneshot" + systemdServiceTypeSimple = "simple" systemdTargetMultiUser = "multi-user.target" systemdTrue = "true" ) @@ -314,6 +316,30 @@ func getIgnitionConfig(profile *performancev2.PerformanceProfile, opts *componen addContent(ignitionConfig, content, dst, &mode) } + // Add dynamic memory enforcement service only if annotation is enabled and workload partitioning is enabled + clusterHasWorkloadPartitioning := opts.PinningMode != nil && *opts.PinningMode == apiconfigv1.CPUPartitioningAllNodes + if profilecomponent.IsEnforceReservedMemoryEnabled(profile) && clusterHasWorkloadPartitioning { + dynamicMemoryEnforcementService, err := getSystemdContent(getDynamicMemoryEnforcementUnitOptions()) + if err != nil { + return nil, err + } + + ignitionConfig.Systemd.Units = append(ignitionConfig.Systemd.Units, igntypes.Unit{ + Contents: &dynamicMemoryEnforcementService, + Enabled: ptr.To(true), + Name: getSystemdService(dynamicMemoryEnforcement), + }) + + // Add the Python script for dynamic memory enforcement + pythonScriptContent, err := getDynamicMemoryEnforcementPythonScript() + if err != nil { + return nil, err + } + pythonScriptPath := getPythonScriptPath(dynamicMemoryEnforcement) + pythonMode := 0755 // Make it executable + addContent(ignitionConfig, pythonScriptContent, pythonScriptPath, &pythonMode) + } + if profile.Spec.CPU.Offlined != nil { offlinedCPUSList, err := cpuset.Parse(string(*profile.Spec.CPU.Offlined)) if err != nil { @@ -412,6 +438,14 @@ func getBashScriptPath(scriptName string) string { return fmt.Sprintf("%s/%s.sh", bashScriptsDir, scriptName) } +func getPythonScriptPath(scriptName string) string { + return fmt.Sprintf("%s/%s.py", bashScriptsDir, scriptName) +} + +func getDynamicMemoryEnforcementPythonScript() ([]byte, error) { + return assets.Scripts.ReadFile(fmt.Sprintf("scripts/%s.py", dynamicMemoryEnforcement)) +} + func getSystemdEnvironment(key string, value string) string { return fmt.Sprintf("%s=%s", key, value) } @@ -565,6 +599,26 @@ func getRPSUnitOptions(rpsMask string) []*unit.UnitOption { } } +func getDynamicMemoryEnforcementUnitOptions() []*unit.UnitOption { + return []*unit.UnitOption{ + // [Unit] + // Description + unit.NewUnitOption(systemdSectionUnit, systemdDescription, "Dynamic memory enforcement service"), + // Before + unit.NewUnitOption(systemdSectionUnit, systemdAfter, systemdServiceKubelet), + // [Service] + // Type + unit.NewUnitOption(systemdSectionService, systemdType, systemdServiceTypeSimple), + // RemainAfterExit + unit.NewUnitOption(systemdSectionService, systemdRemainAfterExit, systemdTrue), + // ExecStart + unit.NewUnitOption(systemdSectionService, systemdExecStart, fmt.Sprintf("bash -c 'while true; do /usr/bin/python3 %s; sleep 2; done'", getPythonScriptPath(dynamicMemoryEnforcement))), + // [Install] + // WantedBy + unit.NewUnitOption(systemdSectionInstall, systemdWantedBy, systemdTargetMultiUser), + } +} + func addContent(ignitionConfig *igntypes.Config, content []byte, dst string, mode *int) { contentBase64 := base64.StdEncoding.EncodeToString(content) ignitionConfig.Storage.Files = append(ignitionConfig.Storage.Files, igntypes.File{ diff --git a/pkg/performanceprofile/controller/performanceprofile/components/profile/profile.go b/pkg/performanceprofile/controller/performanceprofile/components/profile/profile.go index d31c2d097a..8406790b41 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/profile/profile.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/profile/profile.go @@ -95,3 +95,17 @@ func IsMixedCPUsEnabled(profile *performancev2.PerformanceProfile) bool { } return *profile.Spec.WorkloadHints.MixedCpus } + +// IsEnforceReservedMemoryEnabled checks if dynamic memory enforcement should be enabled +func IsEnforceReservedMemoryEnabled(profile *performancev2.PerformanceProfile) bool { + if profile.Annotations == nil { + return false + } + + isEnabled, ok := profile.Annotations[performancev2.PerformanceProfileEnforceReservedMemoryAnnotation] + if ok && isEnabled == "enable" { + return true + } + + return false +}