From 93595790643edb7f7929ba93ef8553a098995596 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Tue, 22 Aug 2023 16:35:58 +0200 Subject: [PATCH 01/12] 8261242: [Linux] OSContainer::is_containerized() returns true --- .../os/linux/cgroupSubsystem_linux.cpp | 66 ++++++++++++++----- .../os/linux/cgroupSubsystem_linux.hpp | 8 ++- .../os/linux/cgroupV1Subsystem_linux.cpp | 11 ++++ .../os/linux/cgroupV1Subsystem_linux.hpp | 8 ++- .../os/linux/cgroupV2Subsystem_linux.cpp | 4 ++ .../os/linux/cgroupV2Subsystem_linux.hpp | 6 +- src/hotspot/os/linux/osContainer_linux.cpp | 3 +- .../os/linux/test_cgroupSubsystem_linux.cpp | 3 + .../gtest/runtime/test_os_linux_cgroups.cpp | 6 +- 9 files changed, 91 insertions(+), 24 deletions(-) diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp index 9053e8eac302a..dd31456e8a1e4 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp @@ -63,7 +63,9 @@ CgroupSubsystem* CgroupSubsystemFactory::create() { // Construct the subsystem, free resources and return // Note: any index in cg_infos will do as the path is the same for // all controllers. - CgroupController* unified = new CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path, cg_infos[MEMORY_IDX]._cgroup_path); + CgroupController* unified = new CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path, + cg_infos[MEMORY_IDX]._cgroup_path, + cg_infos[MEMORY_IDX]._is_ro); log_debug(os, container)("Detected cgroups v2 unified hierarchy"); cleanup(cg_infos); return new CgroupV2Subsystem(unified); @@ -100,19 +102,19 @@ CgroupSubsystem* CgroupSubsystemFactory::create() { CgroupInfo info = cg_infos[i]; if (info._data_complete) { // pids controller might have incomplete data if (strcmp(info._name, "memory") == 0) { - memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path); + memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path, info._is_ro); memory->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpuset") == 0) { - cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._is_ro); cpuset->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpu") == 0) { - cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._is_ro); cpu->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpuacct") == 0) { - cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._is_ro); cpuacct->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "pids") == 0) { - pids = new CgroupV1Controller(info._root_mount_path, info._mount_path); + pids = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._is_ro); pids->set_subsystem_path(info._cgroup_path); } } else { @@ -127,7 +129,8 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, int controller, const char* name, char* mount_path, - char* root_path) { + char* root_path, + bool is_read_only) { if (cg_infos[controller]._mount_path != nullptr) { // On some systems duplicate controllers get mounted in addition to // the main cgroup controllers most likely under /sys/fs/cgroup. In that @@ -139,6 +142,7 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, os::free(cg_infos[controller]._root_mount_path); cg_infos[controller]._mount_path = os::strdup(mount_path); cg_infos[controller]._root_mount_path = os::strdup(root_path); + cg_infos[controller]._is_ro = is_read_only; } else { log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.", name, cg_infos[controller]._mount_path, mount_path); @@ -146,6 +150,7 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, } else { cg_infos[controller]._mount_path = os::strdup(mount_path); cg_infos[controller]._root_mount_path = os::strdup(root_path); + cg_infos[controller]._is_ro = is_read_only; } } @@ -318,26 +323,34 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, char tmproot[MAXPATHLEN+1]; char tmpmount[MAXPATHLEN+1]; char tmpcgroups[MAXPATHLEN+1]; + char mount_opts[MAXPATHLEN+1]; char *cptr = tmpcgroups; char *token; // Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so // as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1 // block in the hybrid case. - if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %*s", tmproot, tmpmount, tmp_fs_type) == 3) { + // + // We collect the read only mount option in the cgroup infos so as to have that + // info ready when determining is_containerized(). + if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %*s", tmproot, tmpmount, mount_opts, tmp_fs_type) == 4) { // we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype if (strcmp("cgroup2", tmp_fs_type) == 0) { cgroupv2_mount_point_found = true; any_cgroup_mounts_found = true; + // For unified we only have a single line with cgroup2 fs type. + // Therefore use that option for all CG info structs. + bool ro_opt = find_ro_opt(mount_opts); for (int i = 0; i < CG_INFO_LENGTH; i++) { - set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot); + set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot, ro_opt); } } } /* Cgroup v1 relevant info * - * Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids + * Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids. For each controller + * determine whether or not they show up as mounted read only or not. * * Example for docker: * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory @@ -347,7 +360,7 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, * * 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids */ - if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) { + if (sscanf(p, "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s", tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups) == 5) { if (strcmp("cgroup", tmp_fs_type) != 0) { // Skip cgroup2 fs lines on hybrid or unified hierarchy. continue; @@ -355,23 +368,28 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, while ((token = strsep(&cptr, ",")) != nullptr) { if (strcmp(token, "memory") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot); + bool ro_opt = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot, ro_opt); cg_infos[MEMORY_IDX]._data_complete = true; } else if (strcmp(token, "cpuset") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot); + bool ro_opt = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot, ro_opt); cg_infos[CPUSET_IDX]._data_complete = true; } else if (strcmp(token, "cpu") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot); + bool ro_opt = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot, ro_opt); cg_infos[CPU_IDX]._data_complete = true; } else if (strcmp(token, "cpuacct") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot); + bool ro_opt = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot, ro_opt); cg_infos[CPUACCT_IDX]._data_complete = true; } else if (strcmp(token, "pids") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot); + bool ro_opt = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot, ro_opt); cg_infos[PIDS_IDX]._data_complete = true; } } @@ -436,6 +454,22 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, return true; }; +/* + * Determine whether or not the mount options, which are comma separated, + * contain the 'ro' string. + */ +bool CgroupSubsystemFactory::find_ro_opt(char* mount_opts) { + char* token; + char* mo_ptr = mount_opts; + // mount options are comma-separated (man proc). + while ((token = strsep(&mo_ptr, ",")) != NULL) { + if (strcmp(token, "ro") == 0) { + return true; + } + } + return false; +} + void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) { assert(cg_infos != nullptr, "Invariant"); for (int i = 0; i < CG_INFO_LENGTH; i++) { diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp index 7943d2fe9de45..6c1a4d621a37d 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp @@ -74,6 +74,7 @@ typedef char * cptr; class CgroupController: public CHeapObj { public: virtual char *subsystem_path() = 0; + virtual bool is_read_only() = 0; }; PRAGMA_DIAG_PUSH @@ -269,6 +270,7 @@ class CgroupSubsystem: public CHeapObj { virtual jlong memory_max_usage_in_bytes() = 0; virtual jlong rss_usage_in_bytes() = 0; virtual jlong cache_usage_in_bytes() = 0; + virtual bool is_containerized() = 0; virtual char * cpu_cpuset_cpus() = 0; virtual char * cpu_cpuset_memory_nodes() = 0; @@ -291,6 +293,7 @@ class CgroupInfo : public StackObj { char* _name; int _hierarchy_id; bool _enabled; + bool _is_ro; // whether or not the mount path is mounted read-only bool _data_complete; // indicating cgroup v1 data is complete for this controller char* _cgroup_path; // cgroup controller path from /proc/self/cgroup char* _root_mount_path; // root mount path from /proc/self/mountinfo. Unused for cgroup v2 @@ -301,6 +304,7 @@ class CgroupInfo : public StackObj { _name = nullptr; _hierarchy_id = -1; _enabled = false; + _is_ro = false; _data_complete = false; _cgroup_path = nullptr; _root_mount_path = nullptr; @@ -328,11 +332,13 @@ class CgroupSubsystemFactory: AllStatic { } #endif + static bool find_ro_opt(char* mount_opts); static void set_controller_paths(CgroupInfo* cg_infos, int controller, const char* name, char* mount_path, - char* root_path); + char* root_path, + bool read_only); // Determine the cgroup type (version 1 or version 2), given // relevant paths to files. Sets 'flags' accordingly. static bool determine_type(CgroupInfo* cg_infos, diff --git a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp index b83da9099f80d..c15aef570786a 100644 --- a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp @@ -199,6 +199,17 @@ jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() { } } +bool CgroupV1Subsystem::is_containerized() { + // containerized iff all required controllers are mounted + // read-only (for now). + // + // TODO: Implement fallback of looking at cpu/mem limits + return _memory->controller()->is_read_only() && + _cpu->controller()->is_read_only() && + _cpuacct->is_read_only() && + _cpuset->is_read_only(); +} + /* memory_usage_in_bytes * * Return the amount of used memory for this process. diff --git a/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp b/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp index 3205973961a92..ed12913905e76 100644 --- a/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp @@ -36,19 +36,22 @@ class CgroupV1Controller: public CgroupController { /* mountinfo contents */ char *_root; char *_mount_point; + bool _read_only; /* Constructed subsystem directory */ char *_path; public: - CgroupV1Controller(char *root, char *mountpoint) { + CgroupV1Controller(char *root, char *mountpoint, bool ro) { _root = os::strdup(root); _mount_point = os::strdup(mountpoint); _path = nullptr; + _read_only = ro; } virtual void set_subsystem_path(char *cgroup_path); char *subsystem_path() { return _path; } + bool is_read_only() { return _read_only; } }; class CgroupV1MemoryController: public CgroupV1Controller { @@ -65,7 +68,7 @@ class CgroupV1MemoryController: public CgroupV1Controller { void set_hierarchical(bool value) { _uses_mem_hierarchy = value; } public: - CgroupV1MemoryController(char *root, char *mountpoint) : CgroupV1Controller(root, mountpoint) { + CgroupV1MemoryController(char *root, char *mountpoint, bool ro) : CgroupV1Controller(root, mountpoint, ro) { _uses_mem_hierarchy = false; } @@ -97,6 +100,7 @@ class CgroupV1Subsystem: public CgroupSubsystem { jlong pids_max(); jlong pids_current(); + bool is_containerized(); void print_version_specific_info(outputStream* st); diff --git a/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp b/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp index d7c6464caa0fa..3d77eeec341e2 100644 --- a/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp @@ -89,6 +89,10 @@ int CgroupV2Subsystem::cpu_quota() { return limit; } +bool CgroupV2Subsystem::is_containerized() { + return _unified->is_read_only(); +} + char * CgroupV2Subsystem::cpu_cpuset_cpus() { GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.cpus", "cpuset.cpus is: %s", "%1023s", cpus, 1024); diff --git a/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp b/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp index 978c193c6027e..7f04551d16250 100644 --- a/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp @@ -33,19 +33,22 @@ class CgroupV2Controller: public CgroupController { char *_mount_path; /* The cgroup path for the controller */ char *_cgroup_path; + bool _read_only; /* Constructed full path to the subsystem directory */ char *_path; static char* construct_path(char* mount_path, char *cgroup_path); public: - CgroupV2Controller(char * mount_path, char *cgroup_path) { + CgroupV2Controller(char * mount_path, char *cgroup_path, bool ro) { _mount_path = mount_path; _cgroup_path = os::strdup(cgroup_path); _path = construct_path(mount_path, cgroup_path); + _read_only = ro; } char *subsystem_path() { return _path; } + bool is_read_only() { return _read_only; } }; class CgroupV2Subsystem: public CgroupSubsystem { @@ -87,6 +90,7 @@ class CgroupV2Subsystem: public CgroupSubsystem { jlong pids_max(); jlong pids_current(); + bool is_containerized(); void print_version_specific_info(outputStream* st); const char * container_type() { diff --git a/src/hotspot/os/linux/osContainer_linux.cpp b/src/hotspot/os/linux/osContainer_linux.cpp index fdb138c864faa..6eeff44c2e7f7 100644 --- a/src/hotspot/os/linux/osContainer_linux.cpp +++ b/src/hotspot/os/linux/osContainer_linux.cpp @@ -58,8 +58,7 @@ void OSContainer::init() { if (cgroup_subsystem == nullptr) { return; // Required subsystem files not found or other error } - - _is_containerized = true; + _is_containerized = cgroup_subsystem->is_containerized(); } const char * OSContainer::container_type() { diff --git a/test/hotspot/gtest/os/linux/test_cgroupSubsystem_linux.cpp b/test/hotspot/gtest/os/linux/test_cgroupSubsystem_linux.cpp index 463955fd4012f..1539269b7c05a 100644 --- a/test/hotspot/gtest/os/linux/test_cgroupSubsystem_linux.cpp +++ b/test/hotspot/gtest/os/linux/test_cgroupSubsystem_linux.cpp @@ -64,6 +64,9 @@ class TestController : public CgroupController { // The real subsystem is in /tmp/, generated by temp_file() return (char*)"/"; }; + bool is_read_only() override { + return true; // doesn't matter + } }; static void fill_file(const char* path, const char* content) { diff --git a/test/hotspot/gtest/runtime/test_os_linux_cgroups.cpp b/test/hotspot/gtest/runtime/test_os_linux_cgroups.cpp index 21e0152a43d49..2339ee57d96f3 100644 --- a/test/hotspot/gtest/runtime/test_os_linux_cgroups.cpp +++ b/test/hotspot/gtest/runtime/test_os_linux_cgroups.cpp @@ -54,7 +54,8 @@ TEST(cgroupTest, set_cgroupv1_subsystem_path) { &container_engine }; for (int i = 0; i < length; i++) { CgroupV1Controller* ctrl = new CgroupV1Controller( (char*)testCases[i]->root_path, - (char*)testCases[i]->mount_path); + (char*)testCases[i]->mount_path, + true /* read-only mount */); ctrl->set_subsystem_path((char*)testCases[i]->cgroup_path); ASSERT_STREQ(testCases[i]->expected_path, ctrl->subsystem_path()); } @@ -78,7 +79,8 @@ TEST(cgroupTest, set_cgroupv2_subsystem_path) { &sub_path }; for (int i = 0; i < length; i++) { CgroupV2Controller* ctrl = new CgroupV2Controller( (char*)testCases[i]->mount_path, - (char*)testCases[i]->cgroup_path); + (char*)testCases[i]->cgroup_path, + true /* read-only mount */); ASSERT_STREQ(testCases[i]->expected_path, ctrl->subsystem_path()); } } From 819311502e34c1ea540d0e62f7ce4b7e95ec010b Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Thu, 7 Mar 2024 17:20:10 +0100 Subject: [PATCH 02/12] Make find_ro static and local to compilation unit --- .../os/linux/cgroupSubsystem_linux.cpp | 32 +++++++++---------- .../os/linux/cgroupSubsystem_linux.hpp | 1 - src/hotspot/os/linux/osContainer_linux.cpp | 1 + .../jtreg/containers/cgroup/PlainRead.java | 2 ++ 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp index dd31456e8a1e4..b31b9d9321a61 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp @@ -154,6 +154,22 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, } } +/* + * Determine whether or not the mount options, which are comma separated, + * contain the 'ro' string. + */ +static bool find_ro_opt(char* mount_opts) { + char* token; + char* mo_ptr = mount_opts; + // mount options are comma-separated (man proc). + while ((token = strsep(&mo_ptr, ",")) != NULL) { + if (strcmp(token, "ro") == 0) { + return true; + } + } + return false; +} + bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, const char* proc_cgroups, const char* proc_self_cgroup, @@ -454,22 +470,6 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, return true; }; -/* - * Determine whether or not the mount options, which are comma separated, - * contain the 'ro' string. - */ -bool CgroupSubsystemFactory::find_ro_opt(char* mount_opts) { - char* token; - char* mo_ptr = mount_opts; - // mount options are comma-separated (man proc). - while ((token = strsep(&mo_ptr, ",")) != NULL) { - if (strcmp(token, "ro") == 0) { - return true; - } - } - return false; -} - void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) { assert(cg_infos != nullptr, "Invariant"); for (int i = 0; i < CG_INFO_LENGTH; i++) { diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp index 6c1a4d621a37d..433739a1c3192 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp @@ -332,7 +332,6 @@ class CgroupSubsystemFactory: AllStatic { } #endif - static bool find_ro_opt(char* mount_opts); static void set_controller_paths(CgroupInfo* cg_infos, int controller, const char* name, diff --git a/src/hotspot/os/linux/osContainer_linux.cpp b/src/hotspot/os/linux/osContainer_linux.cpp index 6eeff44c2e7f7..851b91f86456c 100644 --- a/src/hotspot/os/linux/osContainer_linux.cpp +++ b/src/hotspot/os/linux/osContainer_linux.cpp @@ -59,6 +59,7 @@ void OSContainer::init() { return; // Required subsystem files not found or other error } _is_containerized = cgroup_subsystem->is_containerized(); + log_trace(os, container)("OSContainer::init: is_containerized() = %s", _is_containerized ? "true" : "false"); } const char * OSContainer::container_type() { diff --git a/test/hotspot/jtreg/containers/cgroup/PlainRead.java b/test/hotspot/jtreg/containers/cgroup/PlainRead.java index 21eccd79835d4..4476e8aba78da 100644 --- a/test/hotspot/jtreg/containers/cgroup/PlainRead.java +++ b/test/hotspot/jtreg/containers/cgroup/PlainRead.java @@ -73,6 +73,8 @@ public static void main(String[] args) throws Exception { if (wb.isContainerized()) { System.out.println("Inside a cgroup, testing..."); isContainer(output); + } else { + System.out.println("Not containerized. Skipping..."); } } } From fd2f24225d9111955b92e33924ff4f83aca6d5a4 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Thu, 7 Mar 2024 18:11:54 +0100 Subject: [PATCH 03/12] Implement fall-back logic for non-ro controller mounts --- .../os/linux/cgroupV1Subsystem_linux.cpp | 4 ++-- src/hotspot/os/linux/osContainer_linux.cpp | 22 +++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp index c15aef570786a..bdd5e82d590cb 100644 --- a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp @@ -201,9 +201,9 @@ jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() { bool CgroupV1Subsystem::is_containerized() { // containerized iff all required controllers are mounted - // read-only (for now). + // read-only. See OSContainer::is_containerized() for + // the full logic. // - // TODO: Implement fallback of looking at cpu/mem limits return _memory->controller()->is_read_only() && _cpu->controller()->is_read_only() && _cpuacct->is_read_only() && diff --git a/src/hotspot/os/linux/osContainer_linux.cpp b/src/hotspot/os/linux/osContainer_linux.cpp index 851b91f86456c..47f41d8e86233 100644 --- a/src/hotspot/os/linux/osContainer_linux.cpp +++ b/src/hotspot/os/linux/osContainer_linux.cpp @@ -58,8 +58,26 @@ void OSContainer::init() { if (cgroup_subsystem == nullptr) { return; // Required subsystem files not found or other error } - _is_containerized = cgroup_subsystem->is_containerized(); - log_trace(os, container)("OSContainer::init: is_containerized() = %s", _is_containerized ? "true" : "false"); + bool any_mem_cpu_limit_present = false; + bool ctrl_ro = cgroup_subsystem->is_containerized(); + if (ctrl_ro) { + // If all controllers are mounted read-only, we are containerized. + log_trace(os, container)("is_containerized()=true because all controllers mounted read-only"); + } else { + // We can be in one of two cases: + // 1.) On a physical Linux system without any limit + // 2.) On a physical Linux system with a limit enforced by other means (like systemd slice) + // In order to ensure we cover case 2) we query for cpu/memory limit. If either is present + // we are containerized. Otherwise we are not. + any_mem_cpu_limit_present = cgroup_subsystem->memory_limit_in_bytes() > 0 || + os::Linux::active_processor_count() != cgroup_subsystem->active_processor_count(); + if (any_mem_cpu_limit_present) { + log_trace(os, container)("is_containerized()=true because either a cpu or a memory limit in place"); + } + } + _is_containerized = ctrl_ro || any_mem_cpu_limit_present; + log_debug(os, container)("OSContainer::init: is_containerized() = %s", _is_containerized ? "true" : + "false, because no limit detected"); } const char * OSContainer::container_type() { From 419bd1f7414ba99eb3d52a941e54a113c88735a7 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Thu, 7 Mar 2024 19:30:45 +0100 Subject: [PATCH 04/12] Drop cgroups testing on plain Linux --- .../jtreg/containers/cgroup/PlainRead.java | 37 ++----------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/test/hotspot/jtreg/containers/cgroup/PlainRead.java b/test/hotspot/jtreg/containers/cgroup/PlainRead.java index 4476e8aba78da..43def3d0023b5 100644 --- a/test/hotspot/jtreg/containers/cgroup/PlainRead.java +++ b/test/hotspot/jtreg/containers/cgroup/PlainRead.java @@ -23,6 +23,7 @@ /* * @test PlainRead + * @bug 8261242 * @key cgroups * @requires os.family == "linux" * @requires vm.flagless @@ -33,48 +34,18 @@ */ import jdk.test.lib.process.ProcessTools; -import jdk.test.lib.process.OutputAnalyzer; -import jdk.test.lib.Platform; import jdk.test.whitebox.WhiteBox; public class PlainRead { - static public void match(OutputAnalyzer oa, String what, String value) { - oa.shouldMatch("^.*" + what + " *" + value + ".*$"); - } - - static public void noMatch(OutputAnalyzer oa, String what, String value) { - oa.shouldNotMatch("^.*" + what + " *" + value + ".*$"); - } - - static final String good_value = "(\\d+|-1|-2|Unlimited)"; - static final String bad_value = "(failed)"; - - static final String[] variables = {"Memory Limit is:", "CPU Quota is:", "CPU Period is:", "active_processor_count:"}; - - static public void isContainer(OutputAnalyzer oa) { - for (String v: variables) { - match(oa, v, good_value); - } - for (String v: variables) { - noMatch(oa, v, bad_value); - } - } - - static public void isNotContainer(OutputAnalyzer oa) { - oa.shouldMatch("^.*Can't open /proc/self/mountinfo.*$"); - } - public static void main(String[] args) throws Exception { WhiteBox wb = WhiteBox.getWhiteBox(); ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder("-Xlog:os+container=trace", "-version"); - OutputAnalyzer output = new OutputAnalyzer(pb.start()); + pb.start(); if (wb.isContainerized()) { - System.out.println("Inside a cgroup, testing..."); - isContainer(output); - } else { - System.out.println("Not containerized. Skipping..."); + throw new RuntimeException("Test failed! Expected not containerized on plain Linux."); } + System.out.println("Plain linux, no limits. Passed!"); } } From daebab23b344b15236370412ed04455f4ccc8118 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Fri, 8 Mar 2024 14:22:58 +0100 Subject: [PATCH 05/12] Some clean-up --- src/hotspot/os/linux/osContainer_linux.cpp | 31 ++++++++++++++----- ...{PlainRead.java => TestContainerized.java} | 9 ++---- 2 files changed, 27 insertions(+), 13 deletions(-) rename test/hotspot/jtreg/containers/cgroup/{PlainRead.java => TestContainerized.java} (89%) diff --git a/src/hotspot/os/linux/osContainer_linux.cpp b/src/hotspot/os/linux/osContainer_linux.cpp index 47f41d8e86233..a29449ad181d1 100644 --- a/src/hotspot/os/linux/osContainer_linux.cpp +++ b/src/hotspot/os/linux/osContainer_linux.cpp @@ -58,26 +58,43 @@ void OSContainer::init() { if (cgroup_subsystem == nullptr) { return; // Required subsystem files not found or other error } + /* + * In order to avoid a false positive on is_containerized() on + * Linux systems outside a container *and* to ensure compatibility + * with in-container usage, we detemine is_containerized() by two + * steps: + * 1.) Determine if all the cgroup controllers are mounted read only. + * If yes, is_containerized() == true. Otherwise, do the fallback + * in 2.) + * 2.) Query for memory and cpu limits. If any limit is set, we set + * is_containerized() == true. + * + * Step 1.) covers the basic in container use-cases. Step 2.) ensures + * that limits enforced by other means (e.g. systemd slice) are properly + * detected. + */ + const char *reason; bool any_mem_cpu_limit_present = false; bool ctrl_ro = cgroup_subsystem->is_containerized(); if (ctrl_ro) { - // If all controllers are mounted read-only, we are containerized. - log_trace(os, container)("is_containerized()=true because all controllers mounted read-only"); + // in-container case + reason = " because all controllers are mounted read-only (container case)"; } else { // We can be in one of two cases: // 1.) On a physical Linux system without any limit // 2.) On a physical Linux system with a limit enforced by other means (like systemd slice) - // In order to ensure we cover case 2) we query for cpu/memory limit. If either is present - // we are containerized. Otherwise we are not. any_mem_cpu_limit_present = cgroup_subsystem->memory_limit_in_bytes() > 0 || os::Linux::active_processor_count() != cgroup_subsystem->active_processor_count(); if (any_mem_cpu_limit_present) { - log_trace(os, container)("is_containerized()=true because either a cpu or a memory limit in place"); + reason = " because either a cpu or a memory limit is present"; + } else { + reason = " because no cpu or memory limit is present"; } } _is_containerized = ctrl_ro || any_mem_cpu_limit_present; - log_debug(os, container)("OSContainer::init: is_containerized() = %s", _is_containerized ? "true" : - "false, because no limit detected"); + log_debug(os, container)("OSContainer::init: is_containerized() = %s%s", + _is_containerized ? "true" : "false", + reason); } const char * OSContainer::container_type() { diff --git a/test/hotspot/jtreg/containers/cgroup/PlainRead.java b/test/hotspot/jtreg/containers/cgroup/TestContainerized.java similarity index 89% rename from test/hotspot/jtreg/containers/cgroup/PlainRead.java rename to test/hotspot/jtreg/containers/cgroup/TestContainerized.java index 43def3d0023b5..52cf5451a8d5c 100644 --- a/test/hotspot/jtreg/containers/cgroup/PlainRead.java +++ b/test/hotspot/jtreg/containers/cgroup/TestContainerized.java @@ -22,7 +22,7 @@ */ /* - * @test PlainRead + * @test * @bug 8261242 * @key cgroups * @requires os.family == "linux" @@ -30,19 +30,16 @@ * @library /testlibrary /test/lib * @build jdk.test.whitebox.WhiteBox * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI PlainRead + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI TestContainerized */ import jdk.test.lib.process.ProcessTools; import jdk.test.whitebox.WhiteBox; -public class PlainRead { +public class TestContainerized { public static void main(String[] args) throws Exception { WhiteBox wb = WhiteBox.getWhiteBox(); - ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder("-Xlog:os+container=trace", "-version"); - pb.start(); - if (wb.isContainerized()) { throw new RuntimeException("Test failed! Expected not containerized on plain Linux."); } From 1d39d30984d4e3b1eeb922f6af0147121598d9fe Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Fri, 8 Mar 2024 18:06:44 +0100 Subject: [PATCH 06/12] Implement Metrics.isContainerized() --- src/hotspot/share/include/jvm.h | 3 +++ src/hotspot/share/prims/jvm.cpp | 12 ++++++++++++ .../jdk/internal/platform/CgroupMetrics.java | 6 ++++++ .../jdk/internal/platform/CgroupSubsystem.java | 5 +++++ .../linux/native/libjava/CgroupMetrics.c | 6 ++++++ .../classes/jdk/internal/platform/Metrics.java | 17 +++++++++++++++++ .../classes/sun/launcher/LauncherHelper.java | 4 ++++ .../lib/containers/cgroup/MetricsTester.java | 11 +++++++++++ 8 files changed, 64 insertions(+) diff --git a/src/hotspot/share/include/jvm.h b/src/hotspot/share/include/jvm.h index b49c15d73b6d5..1b43ab23ed7a5 100644 --- a/src/hotspot/share/include/jvm.h +++ b/src/hotspot/share/include/jvm.h @@ -150,6 +150,9 @@ JVM_ActiveProcessorCount(void); JNIEXPORT jboolean JNICALL JVM_IsUseContainerSupport(void); +JNIEXPORT jboolean JNICALL +JVM_IsContainerized(void); + JNIEXPORT void * JNICALL JVM_LoadZipLibrary(); diff --git a/src/hotspot/share/prims/jvm.cpp b/src/hotspot/share/prims/jvm.cpp index 4ec0dfa34cc7e..69305c4f38c6c 100644 --- a/src/hotspot/share/prims/jvm.cpp +++ b/src/hotspot/share/prims/jvm.cpp @@ -113,6 +113,9 @@ #if INCLUDE_MANAGEMENT #include "services/finalizerService.hpp" #endif +#ifdef LINUX +#include "osContainer_linux.hpp" +#endif #include @@ -496,6 +499,15 @@ JVM_LEAF(jboolean, JVM_IsUseContainerSupport(void)) return JNI_FALSE; JVM_END +JVM_LEAF(jboolean, JVM_IsContainerized(void)) +#ifdef LINUX + if (OSContainer::is_containerized()) { + return JNI_TRUE; + } +#endif + return JNI_FALSE; +JVM_END + // java.lang.Throwable ////////////////////////////////////////////////////// JVM_ENTRY(void, JVM_FillInStackTrace(JNIEnv *env, jobject receiver)) diff --git a/src/java.base/linux/classes/jdk/internal/platform/CgroupMetrics.java b/src/java.base/linux/classes/jdk/internal/platform/CgroupMetrics.java index 8797711bf4b91..af551a07b1e71 100644 --- a/src/java.base/linux/classes/jdk/internal/platform/CgroupMetrics.java +++ b/src/java.base/linux/classes/jdk/internal/platform/CgroupMetrics.java @@ -35,6 +35,11 @@ public class CgroupMetrics implements Metrics { this.subsystem = Objects.requireNonNull(subsystem); } + @Override + public boolean isContainerized() { + return isContainerized0(); + } + @Override public String getProvider() { return subsystem.getProvider(); @@ -194,6 +199,7 @@ public static Metrics getInstance() { } private static native boolean isUseContainerSupport(); + private static native boolean isContainerized0(); private static native long getTotalMemorySize0(); private static native long getTotalSwapSize0(); diff --git a/src/java.base/linux/classes/jdk/internal/platform/CgroupSubsystem.java b/src/java.base/linux/classes/jdk/internal/platform/CgroupSubsystem.java index 952de13e9f25b..7df86d03ff4e3 100644 --- a/src/java.base/linux/classes/jdk/internal/platform/CgroupSubsystem.java +++ b/src/java.base/linux/classes/jdk/internal/platform/CgroupSubsystem.java @@ -31,6 +31,11 @@ */ public interface CgroupSubsystem extends Metrics { + + default boolean isContainerized() { + return false; // This default impl is never used + } + /** * Returned for metrics of type long if the underlying implementation * has determined that no limit is being imposed. diff --git a/src/java.base/linux/native/libjava/CgroupMetrics.c b/src/java.base/linux/native/libjava/CgroupMetrics.c index a5e41167bc319..e2f98633459be 100644 --- a/src/java.base/linux/native/libjava/CgroupMetrics.c +++ b/src/java.base/linux/native/libjava/CgroupMetrics.c @@ -36,6 +36,12 @@ Java_jdk_internal_platform_CgroupMetrics_isUseContainerSupport(JNIEnv *env, jcla return JVM_IsUseContainerSupport(); } +JNIEXPORT jboolean JNICALL +Java_jdk_internal_platform_CgroupMetrics_isContainerized0(JNIEnv *env, jclass ignored) +{ + return JVM_IsContainerized(); +} + JNIEXPORT jlong JNICALL Java_jdk_internal_platform_CgroupMetrics_getTotalMemorySize0 (JNIEnv *env, jclass ignored) diff --git a/src/java.base/share/classes/jdk/internal/platform/Metrics.java b/src/java.base/share/classes/jdk/internal/platform/Metrics.java index c45e3e5225721..b14dbd7738edf 100644 --- a/src/java.base/share/classes/jdk/internal/platform/Metrics.java +++ b/src/java.base/share/classes/jdk/internal/platform/Metrics.java @@ -71,6 +71,23 @@ public static Metrics systemMetrics() { */ public String getProvider(); + /** + * Determines whether or not the underlying system + * has useful metrics (a.k.a. is containerized). + * + * @implNote + * Note that Metrics on some systems aren't useful. + * For example on a regular Linux system with cgroups + * present, with no limits enforced and not running in + * a container, Metric aren't useful. This can be used + * in order to determine if the system is containerized. + * + * @return true when the JVM runs in containerized mode. + * false otherwise. + * + */ + public boolean isContainerized(); + /***************************************************************** * CPU Accounting Subsystem diff --git a/src/java.base/share/classes/sun/launcher/LauncherHelper.java b/src/java.base/share/classes/sun/launcher/LauncherHelper.java index 3e65949e59f34..029fbd83db73e 100644 --- a/src/java.base/share/classes/sun/launcher/LauncherHelper.java +++ b/src/java.base/share/classes/sun/launcher/LauncherHelper.java @@ -370,6 +370,10 @@ private static void printSystemMetrics() { final long longRetvalNotSupported = -2; ostream.println(INDENT + "Provider: " + c.getProvider()); + if (!c.isContainerized()) { + ostream.println(INDENT + "System not containerized."); + return; + } ostream.println(INDENT + "Effective CPU Count: " + c.getEffectiveCpuCount()); ostream.println(formatCpuVal(c.getCpuPeriod(), INDENT + "CPU Period: ", longRetvalNotSupported)); ostream.println(formatCpuVal(c.getCpuQuota(), INDENT + "CPU Quota: ", longRetvalNotSupported)); diff --git a/test/lib/jdk/test/lib/containers/cgroup/MetricsTester.java b/test/lib/jdk/test/lib/containers/cgroup/MetricsTester.java index a6eff3d237aa4..7c285d50cb8d6 100644 --- a/test/lib/jdk/test/lib/containers/cgroup/MetricsTester.java +++ b/test/lib/jdk/test/lib/containers/cgroup/MetricsTester.java @@ -69,6 +69,17 @@ private void testAll(Metrics m, boolean inContainer) throws Exception { tester.testMemoryUsage(); } tester.testMisc(); + testContainerized(m, inContainer); + } + + private void testContainerized(m, inContainer) { + if (m.isContainerized() != inContainer) { + throw new RuntimeException("containerized test failed. " + + "Expected isContainerized()==" + inContainer + + " but got '" + m.isContainerized() + "'"); + } + } + System.out.println("testContainerized() PASSED!"); } public static void main(String[] args) throws Exception { From b8267cae7546587d58251431a46cfe12396f5f62 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Fri, 8 Mar 2024 18:46:09 +0100 Subject: [PATCH 07/12] Fix tests --- .../platform/cgroup/TestSystemSettings.java | 45 +++++++++++++++++++ .../lib/containers/cgroup/MetricsTester.java | 3 +- 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 test/jdk/jdk/internal/platform/cgroup/TestSystemSettings.java diff --git a/test/jdk/jdk/internal/platform/cgroup/TestSystemSettings.java b/test/jdk/jdk/internal/platform/cgroup/TestSystemSettings.java new file mode 100644 index 0000000000000..8e869b0c8618d --- /dev/null +++ b/test/jdk/jdk/internal/platform/cgroup/TestSystemSettings.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2024, Red Hat, Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @key cgroups + * @requires os.family == "linux" + * @requires vm.flagless + * @library /test/lib + * @build TestSystemSettings + * @run main/othervm TestSystemSettings + */ + +import jdk.test.lib.process.ProcessTools; +import jdk.test.lib.process.OutputAnalyzer; + +public class TestSystemSettings { + + public static void main(String[] args) throws Exception { + ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder("-XshowSettings:system", "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + + output.shouldContain("System not containerized."); + } +} diff --git a/test/lib/jdk/test/lib/containers/cgroup/MetricsTester.java b/test/lib/jdk/test/lib/containers/cgroup/MetricsTester.java index 7c285d50cb8d6..0b29d288cd92d 100644 --- a/test/lib/jdk/test/lib/containers/cgroup/MetricsTester.java +++ b/test/lib/jdk/test/lib/containers/cgroup/MetricsTester.java @@ -72,12 +72,11 @@ private void testAll(Metrics m, boolean inContainer) throws Exception { testContainerized(m, inContainer); } - private void testContainerized(m, inContainer) { + private void testContainerized(Metrics m, boolean inContainer) { if (m.isContainerized() != inContainer) { throw new RuntimeException("containerized test failed. " + "Expected isContainerized()==" + inContainer + " but got '" + m.isContainerized() + "'"); - } } System.out.println("testContainerized() PASSED!"); } From 98325f1830cc08578a62d9099d11564cd1a23590 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Mon, 11 Mar 2024 18:39:56 +0100 Subject: [PATCH 08/12] jcheck fixes --- src/hotspot/os/linux/cgroupSubsystem_linux.hpp | 2 +- src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp | 6 +++--- .../jdk/internal/platform/cgroup/TestSystemSettings.java | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp index 433739a1c3192..bbed07a65bbc1 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp @@ -337,7 +337,7 @@ class CgroupSubsystemFactory: AllStatic { const char* name, char* mount_path, char* root_path, - bool read_only); + bool read_only); // Determine the cgroup type (version 1 or version 2), given // relevant paths to files. Sets 'flags' accordingly. static bool determine_type(CgroupInfo* cg_infos, diff --git a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp index bdd5e82d590cb..f52ea5322450c 100644 --- a/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp @@ -205,9 +205,9 @@ bool CgroupV1Subsystem::is_containerized() { // the full logic. // return _memory->controller()->is_read_only() && - _cpu->controller()->is_read_only() && - _cpuacct->is_read_only() && - _cpuset->is_read_only(); + _cpu->controller()->is_read_only() && + _cpuacct->is_read_only() && + _cpuset->is_read_only(); } /* memory_usage_in_bytes diff --git a/test/jdk/jdk/internal/platform/cgroup/TestSystemSettings.java b/test/jdk/jdk/internal/platform/cgroup/TestSystemSettings.java index 8e869b0c8618d..8d9279e1603c1 100644 --- a/test/jdk/jdk/internal/platform/cgroup/TestSystemSettings.java +++ b/test/jdk/jdk/internal/platform/cgroup/TestSystemSettings.java @@ -40,6 +40,6 @@ public static void main(String[] args) throws Exception { ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder("-XshowSettings:system", "-version"); OutputAnalyzer output = new OutputAnalyzer(pb.start()); - output.shouldContain("System not containerized."); + output.shouldContain("System not containerized."); } } From 8092c10db89538c63e406e61ff90369210ef9b54 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Fri, 3 May 2024 15:44:02 +0200 Subject: [PATCH 09/12] Unify naming of variables --- .../os/linux/cgroupSubsystem_linux.cpp | 42 +++++++++---------- .../os/linux/cgroupSubsystem_linux.hpp | 4 +- src/hotspot/os/linux/osContainer_linux.cpp | 6 +-- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp index b31b9d9321a61..d7d058322f370 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp @@ -65,7 +65,7 @@ CgroupSubsystem* CgroupSubsystemFactory::create() { // all controllers. CgroupController* unified = new CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path, cg_infos[MEMORY_IDX]._cgroup_path, - cg_infos[MEMORY_IDX]._is_ro); + cg_infos[MEMORY_IDX]._read_only); log_debug(os, container)("Detected cgroups v2 unified hierarchy"); cleanup(cg_infos); return new CgroupV2Subsystem(unified); @@ -102,19 +102,19 @@ CgroupSubsystem* CgroupSubsystemFactory::create() { CgroupInfo info = cg_infos[i]; if (info._data_complete) { // pids controller might have incomplete data if (strcmp(info._name, "memory") == 0) { - memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path, info._is_ro); + memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path, info._read_only); memory->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpuset") == 0) { - cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._is_ro); + cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); cpuset->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpu") == 0) { - cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._is_ro); + cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); cpu->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpuacct") == 0) { - cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._is_ro); + cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); cpuacct->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "pids") == 0) { - pids = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._is_ro); + pids = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); pids->set_subsystem_path(info._cgroup_path); } } else { @@ -130,7 +130,7 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, const char* name, char* mount_path, char* root_path, - bool is_read_only) { + bool read_only) { if (cg_infos[controller]._mount_path != nullptr) { // On some systems duplicate controllers get mounted in addition to // the main cgroup controllers most likely under /sys/fs/cgroup. In that @@ -142,7 +142,7 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, os::free(cg_infos[controller]._root_mount_path); cg_infos[controller]._mount_path = os::strdup(mount_path); cg_infos[controller]._root_mount_path = os::strdup(root_path); - cg_infos[controller]._is_ro = is_read_only; + cg_infos[controller]._read_only = read_only; } else { log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.", name, cg_infos[controller]._mount_path, mount_path); @@ -150,7 +150,7 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, } else { cg_infos[controller]._mount_path = os::strdup(mount_path); cg_infos[controller]._root_mount_path = os::strdup(root_path); - cg_infos[controller]._is_ro = is_read_only; + cg_infos[controller]._read_only = read_only; } } @@ -356,9 +356,9 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, any_cgroup_mounts_found = true; // For unified we only have a single line with cgroup2 fs type. // Therefore use that option for all CG info structs. - bool ro_opt = find_ro_opt(mount_opts); + bool ro_option = find_ro_opt(mount_opts); for (int i = 0; i < CG_INFO_LENGTH; i++) { - set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot, ro_opt); + set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot, ro_option); } } } @@ -384,28 +384,28 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, while ((token = strsep(&cptr, ",")) != nullptr) { if (strcmp(token, "memory") == 0) { any_cgroup_mounts_found = true; - bool ro_opt = find_ro_opt(mount_opts); - set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot, ro_opt); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot, ro_option); cg_infos[MEMORY_IDX]._data_complete = true; } else if (strcmp(token, "cpuset") == 0) { any_cgroup_mounts_found = true; - bool ro_opt = find_ro_opt(mount_opts); - set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot, ro_opt); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPUSET_IDX]._data_complete = true; } else if (strcmp(token, "cpu") == 0) { any_cgroup_mounts_found = true; - bool ro_opt = find_ro_opt(mount_opts); - set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot, ro_opt); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPU_IDX]._data_complete = true; } else if (strcmp(token, "cpuacct") == 0) { any_cgroup_mounts_found = true; - bool ro_opt = find_ro_opt(mount_opts); - set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot, ro_opt); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPUACCT_IDX]._data_complete = true; } else if (strcmp(token, "pids") == 0) { any_cgroup_mounts_found = true; - bool ro_opt = find_ro_opt(mount_opts); - set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot, ro_opt); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot, ro_option); cg_infos[PIDS_IDX]._data_complete = true; } } diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp index bbed07a65bbc1..984260402ff57 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.hpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.hpp @@ -293,7 +293,7 @@ class CgroupInfo : public StackObj { char* _name; int _hierarchy_id; bool _enabled; - bool _is_ro; // whether or not the mount path is mounted read-only + bool _read_only; // whether or not the mount path is mounted read-only bool _data_complete; // indicating cgroup v1 data is complete for this controller char* _cgroup_path; // cgroup controller path from /proc/self/cgroup char* _root_mount_path; // root mount path from /proc/self/mountinfo. Unused for cgroup v2 @@ -304,7 +304,7 @@ class CgroupInfo : public StackObj { _name = nullptr; _hierarchy_id = -1; _enabled = false; - _is_ro = false; + _read_only = false; _data_complete = false; _cgroup_path = nullptr; _root_mount_path = nullptr; diff --git a/src/hotspot/os/linux/osContainer_linux.cpp b/src/hotspot/os/linux/osContainer_linux.cpp index a29449ad181d1..25e8eb464108d 100644 --- a/src/hotspot/os/linux/osContainer_linux.cpp +++ b/src/hotspot/os/linux/osContainer_linux.cpp @@ -75,8 +75,8 @@ void OSContainer::init() { */ const char *reason; bool any_mem_cpu_limit_present = false; - bool ctrl_ro = cgroup_subsystem->is_containerized(); - if (ctrl_ro) { + bool controllers_read_only = cgroup_subsystem->is_containerized(); + if (controllers_read_only) { // in-container case reason = " because all controllers are mounted read-only (container case)"; } else { @@ -91,7 +91,7 @@ void OSContainer::init() { reason = " because no cpu or memory limit is present"; } } - _is_containerized = ctrl_ro || any_mem_cpu_limit_present; + _is_containerized = controllers_read_only || any_mem_cpu_limit_present; log_debug(os, container)("OSContainer::init: is_containerized() = %s%s", _is_containerized ? "true" : "false", reason); From 434430ca8360a61ffdf02745350c39d56b0b740f Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Fri, 3 May 2024 16:23:21 +0200 Subject: [PATCH 10/12] Add doc for mountinfo scanning. --- .../os/linux/cgroupSubsystem_linux.cpp | 55 +++++++++++++++++-- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp index d7d058322f370..7e74c834af547 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp @@ -343,12 +343,34 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, char *cptr = tmpcgroups; char *token; - // Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so - // as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1 - // block in the hybrid case. - // - // We collect the read only mount option in the cgroup infos so as to have that - // info ready when determining is_containerized(). + /* Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so + * as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1 + * block in the hybrid case. + * + * We collect the read only mount option in the cgroup infos so as to have that + * info ready when determining is_containerized(). + * + * The scanning of a single mountinfo line entry is as follows: + * + * 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + * (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12) + * + * The numbers in parentheses are labels for the descriptions below: + * + * (1) mount ID: matched with '%*d' and discarded + * (2) parent ID: matched with '%*d' and discarded + * (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded + * (4) minor: ---' + * (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty. + * (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty. + * (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty. + * (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen + * (9) separator: ---' and discarded. Note: The discarded match is space characters if there + * are no optionals. Otherwise it includes the optional fields as well. + * (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type' + * (11) mount source: matched with '%*s' and discarded + * (12) super options: matched with '%*s' and discarded + */ if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %*s", tmproot, tmpmount, mount_opts, tmp_fs_type) == 4) { // we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype if (strcmp("cgroup2", tmp_fs_type) == 0) { @@ -375,6 +397,27 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory * * 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids + * + * The scanning of a single mountinfo line entry is as follows: + * + * 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + * (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12) + * + * The numbers in parentheses are labels for the descriptions below: + * + * (1) mount ID: matched with '%*d' and discarded + * (2) parent ID: matched with '%*d' and discarded + * (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded + * (4) minor: ---' + * (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty. + * (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty. + * (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty. + * (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen + * (9) separator: ---' and discarded. Note: The discarded match is space characters if there + * are no optionals. Otherwise it includes the optional fields as well. + * (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type' + * (11) mount source: matched with '%*s' and discarded + * (12) super options: matched with '%s' and captured in 'tmpcgroups' */ if (sscanf(p, "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s", tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups) == 5) { if (strcmp("cgroup", tmp_fs_type) != 0) { From 3d98cbc2284c84e18728b9e8f67dc5e167d3e294 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Thu, 20 Jun 2024 13:59:59 +0200 Subject: [PATCH 11/12] Remove problem listing of PlainRead which is reworked here --- test/hotspot/jtreg/ProblemList.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt index c7c76c8fa3a2e..717f600317553 100644 --- a/test/hotspot/jtreg/ProblemList.txt +++ b/test/hotspot/jtreg/ProblemList.txt @@ -105,7 +105,6 @@ runtime/ClassInitErrors/TestStackOverflowDuringInit.java 8334545 generic-all applications/jcstress/copy.java 8229852 linux-all -containers/cgroup/PlainRead.java 8333967,8261242 linux-all containers/docker/TestJcmd.java 8278102 linux-all containers/docker/TestMemoryAwareness.java 8303470 linux-all containers/docker/TestJFREvents.java 8327723 linux-x64 From 532ea33b25cc90b4e3fbd451183334d4369cfb46 Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Tue, 25 Jun 2024 15:08:50 +0200 Subject: [PATCH 12/12] Refactor mount info matching to helper function --- .../os/linux/cgroupSubsystem_linux.cpp | 90 ++++++++++--------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp index a897025c67a0c..0dbd6ffd52be7 100644 --- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp +++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp @@ -170,6 +170,46 @@ static bool find_ro_opt(char* mount_opts) { return false; } +/* + * Read values of a /proc/self/mountinfo line into variables. For cgroups v1 + * super options are needed. On cgroups v2 super options are not used. + * + * The scanning of a single mountinfo line entry is as follows: + * + * 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + * (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12) + * + * The numbers in parentheses are labels for the descriptions below: + * + * (1) mount ID: matched with '%*d' and discarded + * (2) parent ID: matched with '%*d' and discarded + * (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded + * (4) minor: ---' + * (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty. + * (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty. + * (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty. + * (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen + * (9) separator: ---' and discarded. Note: The discarded match is space characters if there + * are no optionals. Otherwise it includes the optional fields as well. + * (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type' + * (11) mount source: matched with '%*s' and discarded + * (12) super options: matched with '%s' and captured in 'tmpcgroups' + */ +static inline bool match_mount_info_line(char* line, + char* tmproot, + char* tmpmount, + char* mount_opts, + char* tmp_fs_type, + char* tmpcgroups) { + return sscanf(line, + "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s", + tmproot, + tmpmount, + mount_opts, + tmp_fs_type, + tmpcgroups) == 5; +} + bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, const char* proc_cgroups, const char* proc_self_cgroup, @@ -349,29 +389,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, * * We collect the read only mount option in the cgroup infos so as to have that * info ready when determining is_containerized(). - * - * The scanning of a single mountinfo line entry is as follows: - * - * 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue - * (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12) - * - * The numbers in parentheses are labels for the descriptions below: - * - * (1) mount ID: matched with '%*d' and discarded - * (2) parent ID: matched with '%*d' and discarded - * (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded - * (4) minor: ---' - * (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty. - * (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty. - * (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty. - * (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen - * (9) separator: ---' and discarded. Note: The discarded match is space characters if there - * are no optionals. Otherwise it includes the optional fields as well. - * (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type' - * (11) mount source: matched with '%*s' and discarded - * (12) super options: matched with '%*s' and discarded */ - if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %*s", tmproot, tmpmount, mount_opts, tmp_fs_type) == 4) { + if (is_cgroupsV2 && match_mount_info_line(p, + tmproot, + tmpmount, + mount_opts, + tmp_fs_type, + tmpcgroups /* unused */)) { // we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype if (strcmp("cgroup2", tmp_fs_type) == 0) { cgroupv2_mount_point_found = true; @@ -398,28 +422,8 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, * * 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids * - * The scanning of a single mountinfo line entry is as follows: - * - * 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue - * (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12) - * - * The numbers in parentheses are labels for the descriptions below: - * - * (1) mount ID: matched with '%*d' and discarded - * (2) parent ID: matched with '%*d' and discarded - * (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded - * (4) minor: ---' - * (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty. - * (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty. - * (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty. - * (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen - * (9) separator: ---' and discarded. Note: The discarded match is space characters if there - * are no optionals. Otherwise it includes the optional fields as well. - * (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type' - * (11) mount source: matched with '%*s' and discarded - * (12) super options: matched with '%s' and captured in 'tmpcgroups' */ - if (sscanf(p, "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s", tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups) == 5) { + if (match_mount_info_line(p, tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups)) { if (strcmp("cgroup", tmp_fs_type) != 0) { // Skip cgroup2 fs lines on hybrid or unified hierarchy. continue;