From 08fe8626038697155fcbd98a53c4de092c8fd849 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Wed, 5 Jun 2024 10:38:07 +0200 Subject: [PATCH 1/6] svm: add mx check_libcontainer_annotations --- substratevm/ci/ci.jsonnet | 2 +- substratevm/mx.substratevm/mx_substratevm.py | 47 ++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/substratevm/ci/ci.jsonnet b/substratevm/ci/ci.jsonnet index efa1f45f4565..57b02b05a143 100644 --- a/substratevm/ci/ci.jsonnet +++ b/substratevm/ci/ci.jsonnet @@ -117,7 +117,7 @@ // START MAIN BUILD DEFINITION local task_dict = { - "style-fullbuild": mxgate("fullbuild,style,nativeimagehelp") + eclipse + jdt + maven + mx_build_exploded + gdb("10.2") + platform_spec(no_jobs) + platform_spec({ + "style-fullbuild": mxgate("fullbuild,style,nativeimagehelp,check_libcontainer_annotations") + eclipse + jdt + maven + mx_build_exploded + gdb("10.2") + platform_spec(no_jobs) + platform_spec({ // We could run the style gate on JDK 22 as well, and use old JDKs for running tools like StopBugs etc., // but since we support JDK 21 anyways, there is not good reason to do so. "linux:amd64:jdk21": gate + t("30:00"), diff --git a/substratevm/mx.substratevm/mx_substratevm.py b/substratevm/mx.substratevm/mx_substratevm.py index 3715e92ddc4d..0103c511f3fe 100644 --- a/substratevm/mx.substratevm/mx_substratevm.py +++ b/substratevm/mx.substratevm/mx_substratevm.py @@ -24,6 +24,7 @@ # import os +import pathlib import re import tempfile from glob import glob @@ -206,6 +207,7 @@ def __getattr__(self, name): 'hellomodule', 'condconfig', 'truffle_unittests', + 'check_libcontainer_annotations' ]) def vm_native_image_path(config=None): @@ -449,6 +451,10 @@ def help_stdout_check(output): mx.log('mx native-image --help output check detected no errors.') + with Task('Check ContainerLibrary annotations', tasks, tags=[GraalTags.check_libcontainer_annotations]) as t: + if t: + mx.command_function("check-libcontainer-annotations")([]) + with Task('module build demo', tasks, tags=[GraalTags.hellomodule]) as t: if t: hellomodule(args.extra_image_builder_arguments) @@ -2213,3 +2219,44 @@ def javac_image(args): def musl_helloworld(args, config=None): final_args = ['--static', '--libc=musl'] + args run_helloworld_command(final_args, config, 'muslhelloworld') + +@mx.command(suite, 'check-libcontainer-annotations') +def check_libcontainer_annotations(args): + """Verifies that files from libcontainer that are copied from hotspot have a @BasedOnJDKFile annotation in ContainerLibrary.""" + + # collect paths to check + + paths = [] + + libcontainer_project = mx.project("com.oracle.svm.native.libcontainer") + libcontainer_dir = libcontainer_project.dir + for src_dir in libcontainer_project.source_dirs(): + for path, _, files in os.walk(src_dir): + for name in files: + abs_path = pathlib.PurePath(path, name) + rel_path = abs_path.relative_to(libcontainer_dir) + src_svm = pathlib.PurePath("src", "svm") + if src_svm in rel_path.parents: + # replace "svm" with "hotspot" + stripped_path = rel_path.relative_to(src_svm) + if not stripped_path.as_posix().startswith("svm_container"): + hotspot_path = pathlib.PurePath("src", "hotspot") / stripped_path + paths.append(hotspot_path.as_posix()) + else: + paths.append(rel_path.as_posix()) + + + java_project = mx.project("com.oracle.svm.core") + container_library = pathlib.Path(java_project.dir, "src/com/oracle/svm/core/container/ContainerLibrary.java") + with open(container_library, "r") as fp: + annotation_lines = [x for x in fp.readlines() if "@BasedOnJDKFile" in x] + + # check all files are in an annotation + for f in paths: + if not any((a for a in annotation_lines if f in a)): + mx.abort(f"file {f} not found in any annotation in {container_library}") + + # check all annotations refer to a file + for a in annotation_lines: + if not any((f for f in paths if f in a)): + mx.abort(f"annotation {a} does not match any files in {libcontainer_dir}") From 1f62dbe29e692d90b3efd9d55b0c0c4e35cfaac5 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Tue, 11 Jun 2024 14:28:47 +0200 Subject: [PATCH 2/6] svm: add mx reimport_libcontainer_files command --- substratevm/mx.substratevm/mx_substratevm.py | 48 ++++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/substratevm/mx.substratevm/mx_substratevm.py b/substratevm/mx.substratevm/mx_substratevm.py index 0103c511f3fe..22064412c2f4 100644 --- a/substratevm/mx.substratevm/mx_substratevm.py +++ b/substratevm/mx.substratevm/mx_substratevm.py @@ -26,6 +26,7 @@ import os import pathlib import re +import shutil import tempfile from glob import glob from contextlib import contextmanager @@ -54,8 +55,6 @@ import sys - - suite = mx.suite('substratevm') svmSuites = [suite] @@ -2220,14 +2219,9 @@ def musl_helloworld(args, config=None): final_args = ['--static', '--libc=musl'] + args run_helloworld_command(final_args, config, 'muslhelloworld') -@mx.command(suite, 'check-libcontainer-annotations') -def check_libcontainer_annotations(args): - """Verifies that files from libcontainer that are copied from hotspot have a @BasedOnJDKFile annotation in ContainerLibrary.""" - - # collect paths to check +def _get_libcontainer_files(): paths = [] - libcontainer_project = mx.project("com.oracle.svm.native.libcontainer") libcontainer_dir = libcontainer_project.dir for src_dir in libcontainer_project.source_dirs(): @@ -2244,7 +2238,16 @@ def check_libcontainer_annotations(args): paths.append(hotspot_path.as_posix()) else: paths.append(rel_path.as_posix()) + return libcontainer_dir, paths + + +@mx.command(suite, 'check-libcontainer-annotations') +def check_libcontainer_annotations(args): + """Verifies that files from libcontainer that are copied from hotspot have a @BasedOnJDKFile annotation in ContainerLibrary.""" + + # collect paths to check + libcontainer_dir, paths = _get_libcontainer_files() java_project = mx.project("com.oracle.svm.core") container_library = pathlib.Path(java_project.dir, "src/com/oracle/svm/core/container/ContainerLibrary.java") @@ -2260,3 +2263,32 @@ def check_libcontainer_annotations(args): for a in annotation_lines: if not any((f for f in paths if f in a)): mx.abort(f"annotation {a} does not match any files in {libcontainer_dir}") + + +reimport_libcontainer_files_cmd = "reimport-libcontainer-files" + + +@mx.command(suite, reimport_libcontainer_files_cmd) +def reimport_libcontainer_files(args): + parser = ArgumentParser(prog=f"mx {reimport_libcontainer_files_cmd}") + parser.add_argument("--jdk-repo", required=True, help="Path to the OpenJDK repo to import the files from.") + parsed_args = parser.parse_args(args) + + libcontainer_dir, paths = _get_libcontainer_files() + + libcontainer_path = pathlib.Path(libcontainer_dir) + jdk_path = pathlib.Path(parsed_args.jdk_repo) + + missing = [] + + for path in paths: + jdk_file = jdk_path / path + svm_file = libcontainer_path / path + if jdk_file.is_file(): + if mx.ask_yes_no(f"Should I update {path}"): + shutil.copyfile(jdk_file, svm_file) + else: + missing.append(jdk_file) + mx.warn(f"File not found: {jdk_file}") + if mx.ask_yes_no(f"Should I delete {path}"): + svm_file.unlink() From 3d710fc64ac528a1d1290c2bc761b98c69788260 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Tue, 4 Jun 2024 12:31:22 +0200 Subject: [PATCH 3/6] svm: add @BasedOnJDKFile annotations to ContainerLibrary --- .../svm/core/container/ContainerLibrary.java | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/container/ContainerLibrary.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/container/ContainerLibrary.java index 13733f37d2e7..7cd840c6ed88 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/container/ContainerLibrary.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/container/ContainerLibrary.java @@ -33,9 +33,53 @@ import org.graalvm.word.UnsignedWord; import com.oracle.svm.core.SubstrateOptions; +import com.oracle.svm.core.util.BasedOnJDKFile; +/** + * Provides Java-level access to the native {@code libsvm_container} implementation. + * + * The native code is base on the container implementation in the JDK. The {@link BasedOnJDKFile} + * annotations below allow us to track upstream changes. Note that the referenced revisions/tags do + * not necessarily denote the date when the file was last imported (although often that is the + * case), but rather the last time upstream changes where reviewed. If there are changes that are + * irrelevant for SVM, we might omit updating our copies. That said, full updates are done + * regularly. See also the README file in + * {@code substratevm/src/com.oracle.svm.native.libcontainer/README.md}. + */ @CContext(ContainerLibraryDirectives.class) @CLibrary(value = "svm_container", requireStatic = true, dependsOn = "m") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/java.base/share/native/include/jni.h") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/java.base/unix/native/include/jni_md.h") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupSubsystem_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupSubsystem_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/osContainer_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/osContainer_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/os_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/os_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/os_linux.inline.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/posix/include/jvm_md.h") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/posix/os_posix.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/posix/os_posix.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/posix/os_posix.inline.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/memory/allocation.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/memory/allocation.inline.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/memory/allStatic.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/runtime/os.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/runtime/os.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/runtime/os.inline.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/attributeNoreturn.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/checkedCast.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/compilerWarnings_gcc.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/compilerWarnings.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/globalDefinitions_gcc.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/globalDefinitions.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/macros.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/ostream.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/ostream.hpp") class ContainerLibrary { static final int VERSION = 240100; From ea024b9b524ae28ab3651114b9fea1bfe77d325b Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Thu, 4 Jul 2024 09:47:46 +0200 Subject: [PATCH 4/6] svm/libcontainer: update README --- .../README.md | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/README.md b/substratevm/src/com.oracle.svm.native.libcontainer/README.md index 824c436f2c68..dc014ad7af60 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/README.md +++ b/substratevm/src/com.oracle.svm.native.libcontainer/README.md @@ -1,8 +1,7 @@ # Native cgroup support for SVM This contains `libsvm_container`, the native cgroup support for SVM (libsvm_container). -The C code is ported from the OpenJDK and currently based on: -https://github.com/openjdk/jdk/tree/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot +The C code is ported from the OpenJDK and update regularly (see "Updating" below). ## Building @@ -28,12 +27,25 @@ custom. They only provide the minimal required functionality and are specific to ## Updating -While the code in here is completely independent and does not need to be in sync with the OpenJDK, -it should be updated regularly to profit from upstream fixes and improvements. To do so, replace -the files in [`src/hotspot`](./src/hotspot) with those from the OpenJDK. Then reapply all the -changes (`#ifdef` guards) using the diff tool of your choice. Finally, adopt the files in -[`src/svm`](./src/svm) to provide new functionality, if needed. Don't forget to update the import -revision mention in this file. +While the code in `libsvm_container` is completely independent and does not need to be in sync with +the OpenJDK, it should be updated regularly to profit from upstream fixes and improvements. To keep +track of this, `ContainerLibrary.java` contains `@BasedOnJDKFile` annotations for each imported file, +which links to the source version in the JDK. With this information, all upstream changes can be +detected. Note that strictly speaking, the referenced version in the annotation does not necessarily +mean that the file was imported from that revision. Rather that all changes have been reviewed. If +there are changes that are irrelevant for `libsvm_container`, we might keep the file as is and still +bump the version. That said, we plan to do full reimports regularly, at least once every for every +release. + +To help keeping the `@BasedOnJDKFile` annotations up to date, the +`mx gate --tags check_libcontainer_annotations` command ensures that the actual files and +annotations are in sync. + +To do a full reimport, replace the files in [`src/hotspot`](./src/hotspot) with those from the OpenJDK. +The `mx reimport-libcontainer-files --jdk-repo path/to/jdk` can help with that. Then reapply all the +changes (`#ifdef` guards) using the diff tool of your choice. Then, adopt the files in +[`src/svm`](./src/svm) to provide new functionality, if needed. Finally, update the `@BasedOnJDKFile` +annotations in `ContainerLibrary.java` to reflect the import revision. ## Local Testing From 318ca3411844c34aca58d0b0b1e8989c7ed5f069 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Tue, 11 Jun 2024 15:50:51 +0200 Subject: [PATCH 5/6] svm/libcontainer: update to JDK 24+5 --- .../svm/core/container/ContainerLibrary.java | 73 +-- .../os/linux/cgroupSubsystem_linux.cpp | 383 +++++++++++++--- .../os/linux/cgroupSubsystem_linux.hpp | 283 ++++++------ .../linux/cgroupUtil_linux.cpp} | 45 +- .../src/hotspot/os/linux/cgroupUtil_linux.hpp | 39 ++ .../os/linux/cgroupV1Subsystem_linux.cpp | 266 ++++++----- .../os/linux/cgroupV1Subsystem_linux.hpp | 127 ++++-- .../os/linux/cgroupV2Subsystem_linux.cpp | 204 +++++---- .../os/linux/cgroupV2Subsystem_linux.hpp | 115 +++-- .../hotspot/os/linux/osContainer_linux.cpp | 46 +- .../hotspot/os/linux/osContainer_linux.hpp | 1 + .../src/hotspot/os/linux/os_linux.cpp | 414 ++++++++++-------- .../src/hotspot/os/linux/os_linux.hpp | 2 + .../src/hotspot/os/posix/os_posix.cpp | 209 ++++++++- .../src/hotspot/os/posix/os_posix.hpp | 2 +- .../src/hotspot/share/memory/allocation.hpp | 60 +-- .../src/hotspot/share/nmt/memflags.hpp | 83 ++++ .../src/hotspot/share/runtime/os.cpp | 244 ++++++++--- .../src/hotspot/share/runtime/os.hpp | 64 ++- .../share/utilities/globalDefinitions.hpp | 68 +-- .../share/utilities/globalDefinitions_gcc.hpp | 40 +- .../src/hotspot/share/utilities/macros.hpp | 6 + .../src/hotspot/share/utilities/ostream.cpp | 109 +++-- .../src/hotspot/share/utilities/ostream.hpp | 89 +++- .../src/svm/share/utilities/debug.hpp | 5 +- 25 files changed, 1939 insertions(+), 1038 deletions(-) rename substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/{share/utilities/attributeNoreturn.hpp => os/linux/cgroupUtil_linux.cpp} (54%) create mode 100644 substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupUtil_linux.hpp create mode 100644 substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/nmt/memflags.hpp diff --git a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/container/ContainerLibrary.java b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/container/ContainerLibrary.java index 7cd840c6ed88..cb166993d734 100644 --- a/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/container/ContainerLibrary.java +++ b/substratevm/src/com.oracle.svm.core/src/com/oracle/svm/core/container/ContainerLibrary.java @@ -48,38 +48,47 @@ */ @CContext(ContainerLibraryDirectives.class) @CLibrary(value = "svm_container", requireStatic = true, dependsOn = "m") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/java.base/share/native/include/jni.h") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/java.base/unix/native/include/jni_md.h") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupSubsystem_linux.cpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupSubsystem_linux.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/osContainer_linux.cpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/osContainer_linux.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/os_linux.cpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/os_linux.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/linux/os_linux.inline.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/posix/include/jvm_md.h") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/posix/os_posix.cpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/posix/os_posix.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/os/posix/os_posix.inline.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/memory/allocation.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/memory/allocation.inline.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/memory/allStatic.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/runtime/os.cpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/runtime/os.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/runtime/os.inline.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/attributeNoreturn.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/checkedCast.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/compilerWarnings_gcc.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/compilerWarnings.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/globalDefinitions_gcc.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/globalDefinitions.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/macros.hpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/ostream.cpp") -@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/9049402a1b9394095b04287eef1f2d46c4da60e9/src/hotspot/share/utilities/ostream.hpp") +// The following annotations are for files in `src/hotspot`, which are copied from the JDK +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/java.base/share/native/include/jni.h") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/java.base/unix/native/include/jni_md.h") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/cgroupSubsystem_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/cgroupSubsystem_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/cgroupUtil_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/cgroupUtil_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/osContainer_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/osContainer_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/os_linux.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/os_linux.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/linux/os_linux.inline.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/posix/include/jvm_md.h") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/posix/os_posix.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/posix/os_posix.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/os/posix/os_posix.inline.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/memory/allocation.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/memory/allocation.inline.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/memory/allStatic.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/nmt/memflags.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/runtime/os.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/runtime/os.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/runtime/os.inline.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/checkedCast.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/compilerWarnings_gcc.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/compilerWarnings.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/globalDefinitions_gcc.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/globalDefinitions.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/macros.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/ostream.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/ostream.hpp") +// The following annotations are for files in `src/svm`, which are completely customized for SVM +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/logging/log.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/memory/allocation.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/runtime/globals.hpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/debug.cpp") +@BasedOnJDKFile("https://github.com/openjdk/jdk/blob/jdk-24+5/src/hotspot/share/utilities/debug.hpp") class ContainerLibrary { static final int VERSION = 240100; diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupSubsystem_linux.cpp index bc3c943bf674..98b8bfde3e64 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupSubsystem_linux.cpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupSubsystem_linux.cpp @@ -29,6 +29,7 @@ #include "cgroupSubsystem_linux.hpp" #include "cgroupV1Subsystem_linux.hpp" #include "cgroupV2Subsystem_linux.hpp" +#include "cgroupUtil_linux.hpp" #include "logging/log.hpp" #include "memory/allocation.hpp" #include "os_linux.hpp" @@ -42,7 +43,7 @@ static const char* cg_controller_name[] = { "cpu", "cpuset", "cpuacct", "memory" CgroupSubsystem* CgroupSubsystemFactory::create() { CgroupV1MemoryController* memory = nullptr; CgroupV1Controller* cpuset = nullptr; - CgroupV1Controller* cpu = nullptr; + CgroupV1CpuController* cpu = nullptr; CgroupV1Controller* cpuacct = nullptr; CgroupV1Controller* pids = nullptr; CgroupInfo cg_infos[CG_INFO_LENGTH]; @@ -62,12 +63,18 @@ CgroupSubsystem* CgroupSubsystemFactory::create() { if (is_cgroup_v2(&cg_type_flags)) { // Cgroups v2 case, we have all the info we need. // Construct the subsystem, free resources and return - // Note: any index in cg_infos will do as the path is the same for - // all controllers. - CgroupController* unified = new CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path, cg_infos[MEMORY_IDX]._cgroup_path); + // Note: We use the memory for non-cpu non-memory controller look-ups. + // Perhaps we ought to have separate controllers for all. + CgroupV2Controller mem_other = CgroupV2Controller(cg_infos[MEMORY_IDX]._mount_path, + cg_infos[MEMORY_IDX]._cgroup_path, + cg_infos[MEMORY_IDX]._read_only); + CgroupV2MemoryController* memory = new CgroupV2MemoryController(mem_other); + CgroupV2CpuController* cpu = new CgroupV2CpuController(CgroupV2Controller(cg_infos[CPU_IDX]._mount_path, + cg_infos[CPU_IDX]._cgroup_path, + cg_infos[CPU_IDX]._read_only)); log_debug(os, container)("Detected cgroups v2 unified hierarchy"); cleanup(cg_infos); - return new CgroupV2Subsystem(unified); + return new CgroupV2Subsystem(memory, cpu, mem_other); } /* @@ -101,19 +108,19 @@ CgroupSubsystem* CgroupSubsystemFactory::create() { CgroupInfo info = cg_infos[i]; if (info._data_complete) { // pids controller might have incomplete data if (strcmp(info._name, "memory") == 0) { - memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path); + memory = new CgroupV1MemoryController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only)); memory->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpuset") == 0) { - cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); cpuset->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpu") == 0) { - cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpu = new CgroupV1CpuController(CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only)); cpu->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "cpuacct") == 0) { - cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path); + cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); cpuacct->set_subsystem_path(info._cgroup_path); } else if (strcmp(info._name, "pids") == 0) { - pids = new CgroupV1Controller(info._root_mount_path, info._mount_path); + pids = new CgroupV1Controller(info._root_mount_path, info._mount_path, info._read_only); pids->set_subsystem_path(info._cgroup_path); } } else { @@ -128,7 +135,8 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, int controller, const char* name, char* mount_path, - char* root_path) { + char* root_path, + bool read_only) { if (cg_infos[controller]._mount_path != nullptr) { // On some systems duplicate controllers get mounted in addition to // the main cgroup controllers most likely under /sys/fs/cgroup. In that @@ -140,6 +148,7 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, os::free(cg_infos[controller]._root_mount_path); cg_infos[controller]._mount_path = os::strdup(mount_path); cg_infos[controller]._root_mount_path = os::strdup(root_path); + cg_infos[controller]._read_only = read_only; } else { log_debug(os, container)("Duplicate %s controllers detected. Picking %s, skipping %s.", name, cg_infos[controller]._mount_path, mount_path); @@ -147,9 +156,66 @@ void CgroupSubsystemFactory::set_controller_paths(CgroupInfo* cg_infos, } else { cg_infos[controller]._mount_path = os::strdup(mount_path); cg_infos[controller]._root_mount_path = os::strdup(root_path); + cg_infos[controller]._read_only = read_only; } } +/* + * Determine whether or not the mount options, which are comma separated, + * contain the 'ro' string. + */ +static bool find_ro_opt(char* mount_opts) { + char* token; + char* mo_ptr = mount_opts; + // mount options are comma-separated (man proc). + while ((token = strsep(&mo_ptr, ",")) != NULL) { + if (strcmp(token, "ro") == 0) { + return true; + } + } + return false; +} + +/* + * Read values of a /proc/self/mountinfo line into variables. For cgroups v1 + * super options are needed. On cgroups v2 super options are not used. + * + * The scanning of a single mountinfo line entry is as follows: + * + * 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + * (1) (2) (3):(4) (5) (6) (7) (8) (9) (10) (11) (12) + * + * The numbers in parentheses are labels for the descriptions below: + * + * (1) mount ID: matched with '%*d' and discarded + * (2) parent ID: matched with '%*d' and discarded + * (3) major: ---,---> major, minor separated by ':'. matched with '%*d:%*d' and discarded + * (4) minor: ---' + * (5) root: matched with '%s' and captured in 'tmproot'. Must be non-empty. + * (6) mount point: matched with '%s' and captured in 'tmpmount'. Must be non-empty. + * (7) mount options: matched with '%s' and captured in 'mount_opts'. Must be non-empty. + * (8) optional fields: ---,---> matched with '%*[^-]-'. Anything not a hyphen, followed by a hyphen + * (9) separator: ---' and discarded. Note: The discarded match is space characters if there + * are no optionals. Otherwise it includes the optional fields as well. + * (10) filesystem type: matched with '%s' and captured in 'tmp_fs_type' + * (11) mount source: matched with '%*s' and discarded + * (12) super options: matched with '%s' and captured in 'tmpcgroups' + */ +static inline bool match_mount_info_line(char* line, + char* tmproot, + char* tmpmount, + char* mount_opts, + char* tmp_fs_type, + char* tmpcgroups) { + return sscanf(line, + "%*d %*d %*d:%*d %s %s %s%*[^-]- %s %*s %s", + tmproot, + tmpmount, + mount_opts, + tmp_fs_type, + tmpcgroups) == 5; +} + bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, const char* proc_cgroups, const char* proc_self_cgroup, @@ -319,26 +385,40 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, char tmproot[MAXPATHLEN+1]; char tmpmount[MAXPATHLEN+1]; char tmpcgroups[MAXPATHLEN+1]; + char mount_opts[MAXPATHLEN+1]; char *cptr = tmpcgroups; char *token; - // Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so - // as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1 - // block in the hybrid case. - if (is_cgroupsV2 && sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %*s", tmproot, tmpmount, tmp_fs_type) == 3) { + /* Cgroup v2 relevant info. We only look for the _mount_path iff is_cgroupsV2 so + * as to avoid memory stomping of the _mount_path pointer later on in the cgroup v1 + * block in the hybrid case. + * + * We collect the read only mount option in the cgroup infos so as to have that + * info ready when determining is_containerized(). + */ + if (is_cgroupsV2 && match_mount_info_line(p, + tmproot, + tmpmount, + mount_opts, + tmp_fs_type, + tmpcgroups /* unused */)) { // we likely have an early match return (e.g. cgroup fs match), be sure we have cgroup2 as fstype if (strcmp("cgroup2", tmp_fs_type) == 0) { cgroupv2_mount_point_found = true; any_cgroup_mounts_found = true; + // For unified we only have a single line with cgroup2 fs type. + // Therefore use that option for all CG info structs. + bool ro_option = find_ro_opt(mount_opts); for (int i = 0; i < CG_INFO_LENGTH; i++) { - set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot); + set_controller_paths(cg_infos, i, "(cg2, unified)", tmpmount, tmproot, ro_option); } } } /* Cgroup v1 relevant info * - * Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids + * Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids. For each controller + * determine whether or not they show up as mounted read only or not. * * Example for docker: * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory @@ -347,8 +427,9 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory * * 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids + * */ - if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) { + if (match_mount_info_line(p, tmproot, tmpmount, mount_opts, tmp_fs_type, tmpcgroups)) { if (strcmp("cgroup", tmp_fs_type) != 0) { // Skip cgroup2 fs lines on hybrid or unified hierarchy. continue; @@ -356,23 +437,28 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos, while ((token = strsep(&cptr, ",")) != nullptr) { if (strcmp(token, "memory") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, MEMORY_IDX, token, tmpmount, tmproot, ro_option); cg_infos[MEMORY_IDX]._data_complete = true; } else if (strcmp(token, "cpuset") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPUSET_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPUSET_IDX]._data_complete = true; } else if (strcmp(token, "cpu") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPU_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPU_IDX]._data_complete = true; } else if (strcmp(token, "cpuacct") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, CPUACCT_IDX, token, tmpmount, tmproot, ro_option); cg_infos[CPUACCT_IDX]._data_complete = true; } else if (strcmp(token, "pids") == 0) { any_cgroup_mounts_found = true; - set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot); + bool ro_option = find_ro_opt(mount_opts); + set_controller_paths(cg_infos, PIDS_IDX, token, tmpmount, tmproot, ro_option); cg_infos[PIDS_IDX]._data_complete = true; } } @@ -476,13 +562,13 @@ void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) { */ int CgroupSubsystem::active_processor_count() { int quota_count = 0; - int cpu_count, limit_count; + int cpu_count; int result; // We use a cache with a timeout to avoid performing expensive // computations in the event this function is called frequently. // [See 8227006]. - CachingCgroupController* contrl = cpu_controller(); + CachingCgroupController* contrl = cpu_controller(); CachedMetric* cpu_limit = contrl->metrics_cache(); if (!cpu_limit->should_check_metric()) { int val = (int)cpu_limit->value(); @@ -490,23 +576,8 @@ int CgroupSubsystem::active_processor_count() { return val; } - cpu_count = limit_count = os::Linux::active_processor_count(); - int quota = cpu_quota(); - int period = cpu_period(); - - if (quota > -1 && period > 0) { - quota_count = ceilf((float)quota / (float)period); - log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count); - } - - // Use quotas - if (quota_count != 0) { - limit_count = quota_count; - } - - result = MIN2(cpu_count, limit_count); - log_trace(os, container)("OSContainer::active_processor_count: %d", result); - + cpu_count = os::Linux::active_processor_count(); + result = CgroupUtil::processor_count(contrl->controller(), cpu_count); // Update cached metric to avoid re-reading container settings too often cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT); @@ -523,55 +594,229 @@ int CgroupSubsystem::active_processor_count() { * OSCONTAINER_ERROR for not supported */ jlong CgroupSubsystem::memory_limit_in_bytes() { - CachingCgroupController* contrl = memory_controller(); + CachingCgroupController* contrl = memory_controller(); CachedMetric* memory_limit = contrl->metrics_cache(); if (!memory_limit->should_check_metric()) { return memory_limit->value(); } jlong phys_mem = os::Linux::physical_memory(); log_trace(os, container)("total physical memory: " JLONG_FORMAT, phys_mem); - jlong mem_limit = read_memory_limit_in_bytes(); - - if (mem_limit <= 0 || mem_limit >= phys_mem) { - jlong read_mem_limit = mem_limit; - const char *reason; - if (mem_limit >= phys_mem) { - // Exceeding physical memory is treated as unlimited. Cg v1's implementation - // of read_memory_limit_in_bytes() caps this at phys_mem since Cg v1 has no - // value to represent 'max'. Cg v2 may return a value >= phys_mem if e.g. the - // container engine was started with a memory flag exceeding it. - reason = "ignored"; - mem_limit = -1; - } else if (OSCONTAINER_ERROR == mem_limit) { - reason = "failed"; - } else { - assert(mem_limit == -1, "Expected unlimited"); - reason = "unlimited"; - } - log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value " JLONG_FORMAT, - reason, read_mem_limit, phys_mem); - } - + jlong mem_limit = contrl->controller()->read_memory_limit_in_bytes(phys_mem); // Update cached metric to avoid re-reading container settings too often memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT); return mem_limit; } -jlong CgroupSubsystem::limit_from_str(char* limit_str) { +bool CgroupController::read_string(const char* filename, char* buf, size_t buf_size) { + assert(buf != nullptr, "buffer must not be null"); + assert(filename != nullptr, "filename must be given"); + char* s_path = subsystem_path(); + if (s_path == nullptr) { + log_debug(os, container)("read_string: subsystem path is null"); + return false; + } + + stringStream file_path; + file_path.print_raw(s_path); + file_path.print_raw(filename); + + if (file_path.size() > MAXPATHLEN) { + log_debug(os, container)("File path too long %s, %s", file_path.base(), filename); + return false; + } + const char* absolute_path = file_path.freeze(); + log_trace(os, container)("Path to %s is %s", filename, absolute_path); + + FILE* fp = os::fopen(absolute_path, "r"); + if (fp == nullptr) { + log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno)); + return false; + } + + // Read a single line into the provided buffer. + // At most buf_size - 1 characters. + char* line = fgets(buf, buf_size, fp); + fclose(fp); + if (line == nullptr) { + log_debug(os, container)("Empty file %s", absolute_path); + return false; + } + size_t len = strlen(line); + assert(len <= buf_size - 1, "At most buf_size - 1 bytes can be read"); + if (line[len - 1] == '\n') { + line[len - 1] = '\0'; // trim trailing new line + } + return true; +} + +bool CgroupController::read_number(const char* filename, julong* result) { + char buf[1024]; + bool is_ok = read_string(filename, buf, 1024); + if (!is_ok) { + return false; + } + int matched = sscanf(buf, JULONG_FORMAT, result); + if (matched == 1) { + return true; + } + return false; +} + +bool CgroupController::read_number_handle_max(const char* filename, jlong* result) { + char buf[1024]; + bool is_ok = read_string(filename, buf, 1024); + if (!is_ok) { + return false; + } + jlong val = limit_from_str(buf); + if (val == OSCONTAINER_ERROR) { + return false; + } + *result = val; + return true; +} + +bool CgroupController::read_numerical_key_value(const char* filename, const char* key, julong* result) { + assert(key != nullptr, "key must be given"); + assert(result != nullptr, "result pointer must not be null"); + assert(filename != nullptr, "file to search in must be given"); + char* s_path = subsystem_path(); + if (s_path == nullptr) { + log_debug(os, container)("read_numerical_key_value: subsystem path is null"); + return false; + } + + stringStream file_path; + file_path.print_raw(s_path); + file_path.print_raw(filename); + + if (file_path.size() > MAXPATHLEN) { + log_debug(os, container)("File path too long %s, %s", file_path.base(), filename); + return false; + } + const char* absolute_path = file_path.freeze(); + log_trace(os, container)("Path to %s is %s", filename, absolute_path); + FILE* fp = os::fopen(absolute_path, "r"); + if (fp == nullptr) { + log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno)); + return false; + } + + const int buf_len = MAXPATHLEN+1; + char buf[buf_len]; + char* line = fgets(buf, buf_len, fp); + bool found_match = false; + // File consists of multiple lines in a "key value" + // fashion, we have to find the key. + const size_t key_len = strlen(key); + for (; line != nullptr; line = fgets(buf, buf_len, fp)) { + char after_key = line[key_len]; + if (strncmp(line, key, key_len) == 0 + && isspace((unsigned char) after_key) != 0 + && after_key != '\n') { + // Skip key, skip space + const char* value_substr = line + key_len + 1; + int matched = sscanf(value_substr, JULONG_FORMAT, result); + found_match = matched == 1; + if (found_match) { + break; + } + } + } + fclose(fp); + if (found_match) { + return true; + } + log_debug(os, container)("Type %s (key == %s) not found in file %s", JULONG_FORMAT, + key, absolute_path); + return false; +} + +bool CgroupController::read_numerical_tuple_value(const char* filename, bool use_first, jlong* result) { + char buf[1024]; + bool is_ok = read_string(filename, buf, 1024); + if (!is_ok) { + return false; + } + char token[1024]; + const int matched = sscanf(buf, (use_first ? "%1023s %*s" : "%*s %1023s"), token); + if (matched != 1) { + return false; + } + jlong val = limit_from_str(token); + if (val == OSCONTAINER_ERROR) { + return false; + } + *result = val; + return true; +} + +jlong CgroupController::limit_from_str(char* limit_str) { if (limit_str == nullptr) { return OSCONTAINER_ERROR; } // Unlimited memory in cgroups is the literal string 'max' for // some controllers, for example the pids controller. if (strcmp("max", limit_str) == 0) { - os::free(limit_str); return (jlong)-1; } julong limit; if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) { - os::free(limit_str); return OSCONTAINER_ERROR; } - os::free(limit_str); return (jlong)limit; } + +// CgroupSubsystem implementations + +jlong CgroupSubsystem::memory_and_swap_limit_in_bytes() { + julong phys_mem = os::Linux::physical_memory(); + julong host_swap = os::Linux::host_swap(); + return memory_controller()->controller()->memory_and_swap_limit_in_bytes(phys_mem, host_swap); +} + +jlong CgroupSubsystem::memory_and_swap_usage_in_bytes() { + julong phys_mem = os::Linux::physical_memory(); + julong host_swap = os::Linux::host_swap(); + return memory_controller()->controller()->memory_and_swap_usage_in_bytes(phys_mem, host_swap); +} + +jlong CgroupSubsystem::memory_soft_limit_in_bytes() { + julong phys_mem = os::Linux::physical_memory(); + return memory_controller()->controller()->memory_soft_limit_in_bytes(phys_mem); +} + +jlong CgroupSubsystem::memory_usage_in_bytes() { + return memory_controller()->controller()->memory_usage_in_bytes(); +} + +jlong CgroupSubsystem::memory_max_usage_in_bytes() { + return memory_controller()->controller()->memory_max_usage_in_bytes(); +} + +jlong CgroupSubsystem::rss_usage_in_bytes() { + return memory_controller()->controller()->rss_usage_in_bytes(); +} + +jlong CgroupSubsystem::cache_usage_in_bytes() { + return memory_controller()->controller()->cache_usage_in_bytes(); +} + +int CgroupSubsystem::cpu_quota() { + return cpu_controller()->controller()->cpu_quota(); +} + +int CgroupSubsystem::cpu_period() { + return cpu_controller()->controller()->cpu_period(); +} + +int CgroupSubsystem::cpu_shares() { + return cpu_controller()->controller()->cpu_shares(); +} + +#ifndef NATIVE_IMAGE +void CgroupSubsystem::print_version_specific_info(outputStream* st) { + julong phys_mem = os::Linux::physical_memory(); + memory_controller()->controller()->print_version_specific_info(st, phys_mem); +} +#endif // !NATIVE_IMAGE diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupSubsystem_linux.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupSubsystem_linux.hpp index 8827a39e8f14..f660e1f6b1f2 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupSubsystem_linux.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupSubsystem_linux.hpp @@ -70,148 +70,92 @@ #define MEMORY_IDX 3 #define PIDS_IDX 4 -typedef char * cptr; - -class CgroupController: public CHeapObj { - public: - virtual char *subsystem_path() = 0; -}; - -PRAGMA_DIAG_PUSH -PRAGMA_FORMAT_NONLITERAL_IGNORED -// Parses a subsystem's file, looking for a matching line. -// If key is null, then the first line will be matched with scan_fmt. -// If key isn't null, then each line will be matched, looking for something that matches "$key $scan_fmt". -// The matching value will be assigned to returnval. -// scan_fmt uses scanf() syntax. -// Return value: 0 on match, OSCONTAINER_ERROR on error. -template int subsystem_file_line_contents(CgroupController* c, - const char *filename, - const char *key, - const char *scan_fmt, - T returnval) { - if (c == nullptr) { - log_debug(os, container)("subsystem_file_line_contents: CgroupController* is null"); - return OSCONTAINER_ERROR; - } - if (c->subsystem_path() == nullptr) { - log_debug(os, container)("subsystem_file_line_contents: subsystem path is null"); - return OSCONTAINER_ERROR; - } - - stringStream file_path; - file_path.print_raw(c->subsystem_path()); - file_path.print_raw(filename); - - if (file_path.size() > (MAXPATHLEN-1)) { - log_debug(os, container)("File path too long %s, %s", file_path.base(), filename); - return OSCONTAINER_ERROR; - } - const char* absolute_path = file_path.freeze(); - log_trace(os, container)("Path to %s is %s", filename, absolute_path); - - FILE* fp = os::fopen(absolute_path, "r"); - if (fp == nullptr) { - log_debug(os, container)("Open of file %s failed, %s", absolute_path, os::strerror(errno)); - return OSCONTAINER_ERROR; - } - - const int buf_len = MAXPATHLEN+1; - char buf[buf_len]; - char* line = fgets(buf, buf_len, fp); - if (line == nullptr) { - log_debug(os, container)("Empty file %s", absolute_path); - fclose(fp); - return OSCONTAINER_ERROR; - } - - bool found_match = false; - if (key == nullptr) { - // File consists of a single line according to caller, with only a value - int matched = sscanf(line, scan_fmt, returnval); - found_match = matched == 1; - } else { - // File consists of multiple lines in a "key value" - // fashion, we have to find the key. - const int key_len = (int)strlen(key); - for (; line != nullptr; line = fgets(buf, buf_len, fp)) { - char* key_substr = strstr(line, key); - char after_key = line[key_len]; - if (key_substr == line - && isspace(after_key) != 0 - && after_key != '\n') { - // Skip key, skip space - const char* value_substr = line + key_len + 1; - int matched = sscanf(value_substr, scan_fmt, returnval); - found_match = matched == 1; - if (found_match) { - break; - } - } - } - } - fclose(fp); - if (found_match) { - return 0; - } - log_debug(os, container)("Type %s (key == %s) not found in file %s", scan_fmt, - (key == nullptr ? "null" : key), absolute_path); - return OSCONTAINER_ERROR; -} -PRAGMA_DIAG_POP - -// log_fmt can be different than scan_fmt. For example -// cpu_period() for cgv2 uses log_fmt='%d' and scan_fmt='%*s %d' -#define GET_CONTAINER_INFO(return_type, subsystem, filename, \ - logstring, log_fmt, scan_fmt, variable) \ - return_type variable; \ -{ \ - int err; \ - err = subsystem_file_line_contents(subsystem, \ - filename, \ - nullptr, \ - scan_fmt, \ - &variable); \ - if (err != 0) { \ - log_trace(os, container)(logstring "%d", OSCONTAINER_ERROR); \ - return (return_type) OSCONTAINER_ERROR; \ - } \ - \ - log_trace(os, container)(logstring log_fmt, variable); \ +#define CONTAINER_READ_NUMBER_CHECKED(controller, filename, log_string, retval) \ +{ \ + bool is_ok; \ + is_ok = controller->read_number(filename, &retval); \ + if (!is_ok) { \ + log_trace(os, container)(log_string " failed: %d", OSCONTAINER_ERROR); \ + return OSCONTAINER_ERROR; \ + } \ + log_trace(os, container)(log_string " is: " JULONG_FORMAT, retval); \ } -#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ - logstring, scan_fmt, variable, bufsize) \ - char variable[bufsize]; \ -{ \ - int err; \ - err = subsystem_file_line_contents(subsystem, \ - filename, \ - nullptr, \ - scan_fmt, \ - variable); \ - if (err != 0) \ - return (return_type) nullptr; \ - \ - log_trace(os, container)(logstring, variable); \ +#define CONTAINER_READ_NUMBER_CHECKED_MAX(controller, filename, log_string, retval) \ +{ \ + bool is_ok; \ + is_ok = controller->read_number_handle_max(filename, &retval); \ + if (!is_ok) { \ + log_trace(os, container)(log_string " failed: %d", OSCONTAINER_ERROR); \ + return OSCONTAINER_ERROR; \ + } \ + log_trace(os, container)(log_string " is: " JLONG_FORMAT, retval); \ } -#define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \ - matchline, logstring, scan_fmt, variable) \ - return_type variable; \ -{ \ - int err; \ - err = subsystem_file_line_contents(controller, \ - filename, \ - matchline, \ - scan_fmt, \ - &variable); \ - if (err != 0) \ - return (return_type) OSCONTAINER_ERROR; \ - \ - log_trace(os, container)(logstring, variable); \ +#define CONTAINER_READ_STRING_CHECKED(controller, filename, log_string, retval, buf_size) \ +{ \ + bool is_ok; \ + is_ok = controller->read_string(filename, retval, buf_size); \ + if (!is_ok) { \ + log_trace(os, container)(log_string " failed: %d", OSCONTAINER_ERROR); \ + return nullptr; \ + } \ + log_trace(os, container)(log_string " is: %s", retval); \ } +class CgroupController: public CHeapObj { + public: + virtual char* subsystem_path() = 0; + virtual bool is_read_only() = 0; + + /* Read a numerical value as unsigned long + * + * returns: false if any error occurred. true otherwise and + * the parsed value is set in the provided julong pointer. + */ + bool read_number(const char* filename, julong* result); + + /* Convenience method to deal with numbers as well as the string 'max' + * in interface files. Otherwise same as read_number(). + * + * returns: false if any error occurred. true otherwise and + * the parsed value (which might be negative) is being set in + * the provided jlong pointer. + */ + bool read_number_handle_max(const char* filename, jlong* result); + + /* Read a string of at most buf_size - 1 characters from the interface file. + * The provided buffer must be at least buf_size in size so as to account + * for the null terminating character. Callers must ensure that the buffer + * is appropriately in-scope and of sufficient size. + * + * returns: false if any error occured. true otherwise and the passed + * in buffer will contain the first buf_size - 1 characters of the string + * or up to the first new line character ('\n') whichever comes first. + */ + bool read_string(const char* filename, char* buf, size_t buf_size); + + /* Read a tuple value as a number. Tuple is: ' '. + * Handles 'max' (for unlimited) for any tuple value. This is handy for + * parsing interface files like cpu.max which contain such tuples. + * + * returns: false if any error occurred. true otherwise and the parsed + * value of the appropriate tuple entry set in the provided jlong pointer. + */ + bool read_numerical_tuple_value(const char* filename, bool use_first, jlong* result); + + /* Read a numerical value from a multi-line interface file. The matched line is + * determined by the provided 'key'. The associated numerical value is being set + * via the passed in julong pointer. Example interface file 'memory.stat' + * + * returns: false if any error occurred. true otherwise and the parsed value is + * being set in the provided julong pointer. + */ + bool read_numerical_key_value(const char* filename, const char* key, julong* result); + + private: + static jlong limit_from_str(char* limit_str); +}; class CachedMetric : public CHeapObj{ private: @@ -244,48 +188,76 @@ class CachedMetric : public CHeapObj{ } }; +template class CachingCgroupController : public CHeapObj { private: - CgroupController* _controller; + T* _controller; CachedMetric* _metrics_cache; public: - CachingCgroupController(CgroupController* cont) { + CachingCgroupController(T* cont) { _controller = cont; _metrics_cache = new CachedMetric(); } CachedMetric* metrics_cache() { return _metrics_cache; } - CgroupController* controller() { return _controller; } + T* controller() { return _controller; } }; -class CgroupSubsystem: public CHeapObj { +// Pure virtual class representing version agnostic CPU controllers +class CgroupCpuController: public CHeapObj { public: - jlong memory_limit_in_bytes(); - int active_processor_count(); - jlong limit_from_str(char* limit_str); - virtual int cpu_quota() = 0; virtual int cpu_period() = 0; virtual int cpu_shares() = 0; - virtual jlong pids_max() = 0; - virtual jlong pids_current() = 0; + virtual bool is_read_only() = 0; +}; + +// Pure virtual class representing version agnostic memory controllers +class CgroupMemoryController: public CHeapObj { + public: + virtual jlong read_memory_limit_in_bytes(julong upper_bound) = 0; virtual jlong memory_usage_in_bytes() = 0; - virtual jlong memory_and_swap_limit_in_bytes() = 0; - virtual jlong memory_soft_limit_in_bytes() = 0; + virtual jlong memory_and_swap_limit_in_bytes(julong host_mem, julong host_swap) = 0; + virtual jlong memory_and_swap_usage_in_bytes(julong host_mem, julong host_swap) = 0; + virtual jlong memory_soft_limit_in_bytes(julong upper_bound) = 0; virtual jlong memory_max_usage_in_bytes() = 0; virtual jlong rss_usage_in_bytes() = 0; virtual jlong cache_usage_in_bytes() = 0; +#ifndef NATIVE_IMAGE + virtual void print_version_specific_info(outputStream* st, julong host_mem) = 0; +#endif // !NATIVE_IMAGE + virtual bool is_read_only() = 0; +}; + +class CgroupSubsystem: public CHeapObj { + public: + jlong memory_limit_in_bytes(); + int active_processor_count(); + + virtual jlong pids_max() = 0; + virtual jlong pids_current() = 0; + virtual bool is_containerized() = 0; virtual char * cpu_cpuset_cpus() = 0; virtual char * cpu_cpuset_memory_nodes() = 0; - virtual jlong read_memory_limit_in_bytes() = 0; virtual const char * container_type() = 0; - virtual CachingCgroupController* memory_controller() = 0; - virtual CachingCgroupController* cpu_controller() = 0; - + virtual CachingCgroupController* memory_controller() = 0; + virtual CachingCgroupController* cpu_controller() = 0; + + int cpu_quota(); + int cpu_period(); + int cpu_shares(); + + jlong memory_usage_in_bytes(); + jlong memory_and_swap_limit_in_bytes(); + jlong memory_and_swap_usage_in_bytes(); + jlong memory_soft_limit_in_bytes(); + jlong memory_max_usage_in_bytes(); + jlong rss_usage_in_bytes(); + jlong cache_usage_in_bytes(); #ifndef NATIVE_IMAGE - virtual void print_version_specific_info(outputStream* st) = 0; + void print_version_specific_info(outputStream* st); #endif // !NATIVE_IMAGE }; @@ -300,6 +272,7 @@ class CgroupInfo : public StackObj { char* _name; int _hierarchy_id; bool _enabled; + bool _read_only; // whether or not the mount path is mounted read-only bool _data_complete; // indicating cgroup v1 data is complete for this controller char* _cgroup_path; // cgroup controller path from /proc/self/cgroup char* _root_mount_path; // root mount path from /proc/self/mountinfo. Unused for cgroup v2 @@ -310,6 +283,7 @@ class CgroupInfo : public StackObj { _name = nullptr; _hierarchy_id = -1; _enabled = false; + _read_only = false; _data_complete = false; _cgroup_path = nullptr; _root_mount_path = nullptr; @@ -341,7 +315,8 @@ class CgroupSubsystemFactory: AllStatic { int controller, const char* name, char* mount_path, - char* root_path); + char* root_path, + bool read_only); // Determine the cgroup type (version 1 or version 2), given // relevant paths to files. Sets 'flags' accordingly. static bool determine_type(CgroupInfo* cg_infos, diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/attributeNoreturn.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupUtil_linux.cpp similarity index 54% rename from substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/attributeNoreturn.hpp rename to substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupUtil_linux.cpp index 8706ac44c157..19ccf9580fa5 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/attributeNoreturn.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupUtil_linux.cpp @@ -1,5 +1,6 @@ /* - * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2024, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,29 +24,27 @@ * questions. */ -#ifndef SHARE_UTILITY_ATTRIBUTENORETURN_HPP -#define SHARE_UTILITY_ATTRIBUTENORETURN_HPP +#include "cgroupUtil_linux.hpp" -// Provide a (temporary) macro for the [[noreturn]] attribute. -// -// Unfortunately, some older (though still in use) compilers have bugs when -// using [[noreturn]]. For them we use an empty definition for the attribute. -// -// Note: This can't be placed in globalDefinitions_xxx.hpp because the -// attribute is used in debug.hpp, which can't include globalDefinitions.hpp. +int CgroupUtil::processor_count(CgroupCpuController* cpu_ctrl, int host_cpus) { + assert(host_cpus > 0, "physical host cpus must be positive"); + int limit_count = host_cpus; + int quota = cpu_ctrl->cpu_quota(); + int period = cpu_ctrl->cpu_period(); + int quota_count = 0; + int result = 0; -// clang 12 (and possibly prior) crashes during build if we use [[noreturn]] -// for assertion failure reporting functions. The problem seems to be fixed -// in clang 13. -#ifdef __clang__ -#if __clang_major__ < 13 -#define ATTRIBUTE_NORETURN -#endif -#endif + if (quota > -1 && period > 0) { + quota_count = ceilf((float)quota / (float)period); + log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count); + } -// All other platforms can use [[noreturn]]. -#ifndef ATTRIBUTE_NORETURN -#define ATTRIBUTE_NORETURN [[noreturn]] -#endif + // Use quotas + if (quota_count != 0) { + limit_count = quota_count; + } -#endif // SHARE_UTILITY_ATTRIBUTENORETURN_HPP + result = MIN2(host_cpus, limit_count); + log_trace(os, container)("OSContainer::active_processor_count: %d", result); + return result; +} diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupUtil_linux.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupUtil_linux.hpp new file mode 100644 index 000000000000..43d7a157c8ce --- /dev/null +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupUtil_linux.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2024, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef CGROUP_UTIL_LINUX_HPP +#define CGROUP_UTIL_LINUX_HPP + +#include "utilities/globalDefinitions.hpp" +#include "cgroupSubsystem_linux.hpp" + +class CgroupUtil: AllStatic { + + public: + static int processor_count(CgroupCpuController* cpu, int host_cpus); +}; + +#endif // CGROUP_UTIL_LINUX_HPP diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp index 86f289c6dc7a..6fef7f1139ba 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #include #include #include "cgroupV1Subsystem_linux.hpp" +#include "cgroupUtil_linux.hpp" #include "logging/log.hpp" #include "memory/allocation.hpp" #include "runtime/globals.hpp" @@ -76,38 +77,63 @@ void CgroupV1Controller::set_subsystem_path(char *cgroup_path) { * OSCONTAINER_ERROR for not supported */ jlong CgroupV1MemoryController::uses_mem_hierarchy() { - GET_CONTAINER_INFO(jlong, this, "/memory.use_hierarchy", - "Use Hierarchy is: ", JLONG_FORMAT, JLONG_FORMAT, use_hierarchy); - return use_hierarchy; + julong use_hierarchy; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.use_hierarchy", "Use Hierarchy", use_hierarchy); + return (jlong)use_hierarchy; } void CgroupV1MemoryController::set_subsystem_path(char *cgroup_path) { - CgroupV1Controller::set_subsystem_path(cgroup_path); + reader()->set_subsystem_path(cgroup_path); jlong hierarchy = uses_mem_hierarchy(); if (hierarchy > 0) { set_hierarchical(true); } } -jlong CgroupV1Subsystem::read_memory_limit_in_bytes() { - GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.limit_in_bytes", - "Memory Limit is: ", JULONG_FORMAT, JULONG_FORMAT, memlimit); +static inline +void verbose_log(julong read_mem_limit, julong host_mem) { + if (log_is_enabled(Debug, os, container)) { + jlong mem_limit = (jlong)read_mem_limit; // account for negative values + if (mem_limit < 0 || read_mem_limit >= host_mem) { + const char *reason; + if (mem_limit == OSCONTAINER_ERROR) { + reason = "failed"; + } else if (mem_limit == -1) { + reason = "unlimited"; + } else { + assert(read_mem_limit >= host_mem, "Expected read value exceeding host_mem"); + // Exceeding physical memory is treated as unlimited. This implementation + // caps it at host_mem since Cg v1 has no value to represent 'max'. + reason = "ignored"; + } + log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value " JLONG_FORMAT, + reason, mem_limit, host_mem); + } + } +} - if (memlimit >= os::Linux::physical_memory()) { +jlong CgroupV1MemoryController::read_memory_limit_in_bytes(julong phys_mem) { + julong memlimit; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.limit_in_bytes", "Memory Limit", memlimit); + if (memlimit >= phys_mem) { log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited"); - CgroupV1MemoryController* mem_controller = reinterpret_cast(_memory->controller()); - if (mem_controller->is_hierarchical()) { - GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", "hierarchical_memory_limit", - "Hierarchical Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, hier_memlimit) - if (hier_memlimit >= os::Linux::physical_memory()) { - log_trace(os, container)("Hierarchical Memory Limit is: Unlimited"); - } else { + if (is_hierarchical()) { + julong hier_memlimit; + bool is_ok = reader()->read_numerical_key_value("/memory.stat", "hierarchical_memory_limit", &hier_memlimit); + if (!is_ok) { + return OSCONTAINER_ERROR; + } + log_trace(os, container)("Hierarchical Memory Limit is: " JULONG_FORMAT, hier_memlimit); + if (hier_memlimit < phys_mem) { + verbose_log(hier_memlimit, phys_mem); return (jlong)hier_memlimit; } + log_trace(os, container)("Hierarchical Memory Limit is: Unlimited"); } + verbose_log(memlimit, phys_mem); return (jlong)-1; - } - else { + } else { + verbose_log(memlimit, phys_mem); return (jlong)memlimit; } } @@ -124,18 +150,21 @@ jlong CgroupV1Subsystem::read_memory_limit_in_bytes() { * * -1 if there isn't any limit in place (note: includes values which exceed a physical * upper bound) */ -jlong CgroupV1Subsystem::read_mem_swap() { - julong host_total_memsw; - GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.memsw.limit_in_bytes", - "Memory and Swap Limit is: ", JULONG_FORMAT, JULONG_FORMAT, memswlimit); - host_total_memsw = os::Linux::host_swap() + os::Linux::physical_memory(); +jlong CgroupV1MemoryController::read_mem_swap(julong host_total_memsw) { + julong hier_memswlimit; + julong memswlimit; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.memsw.limit_in_bytes", "Memory and Swap Limit", memswlimit); if (memswlimit >= host_total_memsw) { log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited"); - CgroupV1MemoryController* mem_controller = reinterpret_cast(_memory->controller()); - if (mem_controller->is_hierarchical()) { + if (is_hierarchical()) { const char* matchline = "hierarchical_memsw_limit"; - GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline, - "Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, JULONG_FORMAT, hier_memswlimit) + bool is_ok = reader()->read_numerical_key_value("/memory.stat", + matchline, + &hier_memswlimit); + if (!is_ok) { + return OSCONTAINER_ERROR; + } + log_trace(os, container)("Hierarchical Memory and Swap Limit is: " JULONG_FORMAT, hier_memswlimit); if (hier_memswlimit >= host_total_memsw) { log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited"); } else { @@ -148,8 +177,8 @@ jlong CgroupV1Subsystem::read_mem_swap() { } } -jlong CgroupV1Subsystem::memory_and_swap_limit_in_bytes() { - jlong memory_swap = read_mem_swap(); +jlong CgroupV1MemoryController::memory_and_swap_limit_in_bytes(julong host_mem, julong host_swap) { + jlong memory_swap = read_mem_swap(host_mem + host_swap); if (memory_swap == -1) { return memory_swap; } @@ -158,7 +187,7 @@ jlong CgroupV1Subsystem::memory_and_swap_limit_in_bytes() { // supported. jlong swappiness = read_mem_swappiness(); if (swappiness == 0 || memory_swap == OSCONTAINER_ERROR) { - jlong memlimit = read_memory_limit_in_bytes(); + jlong memlimit = read_memory_limit_in_bytes(host_mem); if (memory_swap == OSCONTAINER_ERROR) { log_trace(os, container)("Memory and Swap Limit has been reset to " JLONG_FORMAT " because swap is not supported", memlimit); } else { @@ -169,16 +198,35 @@ jlong CgroupV1Subsystem::memory_and_swap_limit_in_bytes() { return memory_swap; } -jlong CgroupV1Subsystem::read_mem_swappiness() { - GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.swappiness", - "Swappiness is: ", JULONG_FORMAT, JULONG_FORMAT, swappiness); - return swappiness; +static inline +jlong memory_swap_usage_impl(CgroupController* ctrl) { + julong memory_swap_usage; + CONTAINER_READ_NUMBER_CHECKED(ctrl, "/memory.memsw.usage_in_bytes", "mem swap usage", memory_swap_usage); + return (jlong)memory_swap_usage; +} + +jlong CgroupV1MemoryController::memory_and_swap_usage_in_bytes(julong phys_mem, julong host_swap) { + jlong memory_sw_limit = memory_and_swap_limit_in_bytes(phys_mem, host_swap); + jlong memory_limit = read_memory_limit_in_bytes(phys_mem); + if (memory_sw_limit > 0 && memory_limit > 0) { + jlong delta_swap = memory_sw_limit - memory_limit; + if (delta_swap > 0) { + return memory_swap_usage_impl(reader()); + } + } + return memory_usage_in_bytes(); +} + +jlong CgroupV1MemoryController::read_mem_swappiness() { + julong swappiness; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.swappiness", "Swappiness", swappiness); + return (jlong)swappiness; } -jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() { - GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.soft_limit_in_bytes", - "Memory Soft Limit is: ", JULONG_FORMAT, JULONG_FORMAT, memsoftlimit); - if (memsoftlimit >= os::Linux::physical_memory()) { +jlong CgroupV1MemoryController::memory_soft_limit_in_bytes(julong phys_mem) { + julong memsoftlimit; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.soft_limit_in_bytes", "Memory Soft Limit", memsoftlimit); + if (memsoftlimit >= phys_mem) { log_trace(os, container)("Memory Soft Limit is: Unlimited"); return (jlong)-1; } else { @@ -186,6 +234,17 @@ jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() { } } +bool CgroupV1Subsystem::is_containerized() { + // containerized iff all required controllers are mounted + // read-only. See OSContainer::is_containerized() for + // the full logic. + // + return _memory->controller()->is_read_only() && + _cpu->controller()->is_read_only() && + _cpuacct->is_read_only() && + _cpuset->is_read_only(); +} + /* memory_usage_in_bytes * * Return the amount of used memory for this process. @@ -195,10 +254,10 @@ jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() { * -1 for unlimited * OSCONTAINER_ERROR for not supported */ -jlong CgroupV1Subsystem::memory_usage_in_bytes() { - GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.usage_in_bytes", - "Memory Usage is: ", JLONG_FORMAT, JLONG_FORMAT, memusage); - return memusage; +jlong CgroupV1MemoryController::memory_usage_in_bytes() { + julong memusage; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.usage_in_bytes", "Memory Usage", memusage); + return (jlong)memusage; } /* memory_max_usage_in_bytes @@ -209,49 +268,57 @@ jlong CgroupV1Subsystem::memory_usage_in_bytes() { * max memory usage in bytes or * OSCONTAINER_ERROR for not supported */ -jlong CgroupV1Subsystem::memory_max_usage_in_bytes() { - GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.max_usage_in_bytes", - "Maximum Memory Usage is: ", JLONG_FORMAT, JLONG_FORMAT, memmaxusage); - return memmaxusage; +jlong CgroupV1MemoryController::memory_max_usage_in_bytes() { + julong memmaxusage; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.max_usage_in_bytes", "Maximum Memory Usage", memmaxusage); + return (jlong)memmaxusage; } -jlong CgroupV1Subsystem::rss_usage_in_bytes() { - GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", - "rss", JULONG_FORMAT, JULONG_FORMAT, rss); - return rss; +jlong CgroupV1MemoryController::rss_usage_in_bytes() { + julong rss; + bool is_ok = reader()->read_numerical_key_value("/memory.stat", "rss", &rss); + if (!is_ok) { + return OSCONTAINER_ERROR; + } + log_trace(os, container)("RSS usage is: " JULONG_FORMAT, rss); + return (jlong)rss; } -jlong CgroupV1Subsystem::cache_usage_in_bytes() { - GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", - "cache", JULONG_FORMAT, JULONG_FORMAT, cache); +jlong CgroupV1MemoryController::cache_usage_in_bytes() { + julong cache; + bool is_ok = reader()->read_numerical_key_value("/memory.stat", "cache", &cache); + if (!is_ok) { + return OSCONTAINER_ERROR; + } + log_trace(os, container)("Cache usage is: " JULONG_FORMAT, cache); return cache; } -jlong CgroupV1Subsystem::kernel_memory_usage_in_bytes() { - GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.kmem.usage_in_bytes", - "Kernel Memory Usage is: ", JLONG_FORMAT, JLONG_FORMAT, kmem_usage); - return kmem_usage; +jlong CgroupV1MemoryController::kernel_memory_usage_in_bytes() { + julong kmem_usage; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.kmem.usage_in_bytes", "Kernel Memory Usage", kmem_usage); + return (jlong)kmem_usage; } -jlong CgroupV1Subsystem::kernel_memory_limit_in_bytes() { - GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.kmem.limit_in_bytes", - "Kernel Memory Limit is: ", JULONG_FORMAT, JULONG_FORMAT, kmem_limit); - if (kmem_limit >= os::Linux::physical_memory()) { +jlong CgroupV1MemoryController::kernel_memory_limit_in_bytes(julong phys_mem) { + julong kmem_limit; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.kmem.limit_in_bytes", "Kernel Memory Limit", kmem_limit); + if (kmem_limit >= phys_mem) { return (jlong)-1; } return (jlong)kmem_limit; } -jlong CgroupV1Subsystem::kernel_memory_max_usage_in_bytes() { - GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.kmem.max_usage_in_bytes", - "Maximum Kernel Memory Usage is: ", JLONG_FORMAT, JLONG_FORMAT, kmem_max_usage); - return kmem_max_usage; +jlong CgroupV1MemoryController::kernel_memory_max_usage_in_bytes() { + julong kmem_max_usage; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.kmem.max_usage_in_bytes", "Maximum Kernel Memory Usage", kmem_max_usage); + return (jlong)kmem_max_usage; } #ifndef NATIVE_IMAGE -void CgroupV1Subsystem::print_version_specific_info(outputStream* st) { +void CgroupV1MemoryController::print_version_specific_info(outputStream* st, julong phys_mem) { jlong kmem_usage = kernel_memory_usage_in_bytes(); - jlong kmem_limit = kernel_memory_limit_in_bytes(); + jlong kmem_limit = kernel_memory_limit_in_bytes(phys_mem); jlong kmem_max_usage = kernel_memory_max_usage_in_bytes(); OSContainer::print_container_helper(st, kmem_usage, "kernel_memory_usage_in_bytes"); @@ -260,15 +327,15 @@ void CgroupV1Subsystem::print_version_specific_info(outputStream* st) { } #endif // !NATIVE_IMAGE -char * CgroupV1Subsystem::cpu_cpuset_cpus() { - GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.cpus", - "cpuset.cpus is: %s", "%1023s", cpus, 1024); +char* CgroupV1Subsystem::cpu_cpuset_cpus() { + char cpus[1024]; + CONTAINER_READ_STRING_CHECKED(_cpuset, "/cpuset.cpus", "cpuset.cpus", cpus, 1024); return os::strdup(cpus); } -char * CgroupV1Subsystem::cpu_cpuset_memory_nodes() { - GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.mems", - "cpuset.mems is: %s", "%1023s", mems, 1024); +char* CgroupV1Subsystem::cpu_cpuset_memory_nodes() { + char mems[1024]; + CONTAINER_READ_STRING_CHECKED(_cpuset, "/cpuset.mems", "cpuset.mems", mems, 1024); return os::strdup(mems); } @@ -282,16 +349,24 @@ char * CgroupV1Subsystem::cpu_cpuset_memory_nodes() { * -1 for no quota * OSCONTAINER_ERROR for not supported */ -int CgroupV1Subsystem::cpu_quota() { - GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_quota_us", - "CPU Quota is: ", "%d", "%d", quota); - return quota; +int CgroupV1CpuController::cpu_quota() { + julong quota; + bool is_ok = reader()->read_number("/cpu.cfs_quota_us", "a); + if (!is_ok) { + log_trace(os, container)("CPU Quota failed: %d", OSCONTAINER_ERROR); + return OSCONTAINER_ERROR; + } + // cast to int since the read value might be negative + // and we want to avoid logging -1 as a large unsigned value. + int quota_int = (int)quota; + log_trace(os, container)("CPU Quota is: %d", quota_int); + return quota_int; } -int CgroupV1Subsystem::cpu_period() { - GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_period_us", - "CPU Period is: ", "%d", "%d", period); - return period; +int CgroupV1CpuController::cpu_period() { + julong period; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/cpu.cfs_period_us", "CPU Period", period); + return (int)period; } /* cpu_shares @@ -304,20 +379,14 @@ int CgroupV1Subsystem::cpu_period() { * -1 for no share setup * OSCONTAINER_ERROR for not supported */ -int CgroupV1Subsystem::cpu_shares() { - GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.shares", - "CPU Shares is: ", "%d", "%d", shares); +int CgroupV1CpuController::cpu_shares() { + julong shares; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/cpu.shares", "CPU Shares", shares); + int shares_int = (int)shares; // Convert 1024 to no shares setup - if (shares == 1024) return -1; - - return shares; -} - + if (shares_int == 1024) return -1; -char* CgroupV1Subsystem::pids_max_val() { - GET_CONTAINER_INFO_CPTR(cptr, _pids, "/pids.max", - "Maximum number of tasks is: %s", "%1023s", pidsmax, 1024); - return os::strdup(pidsmax); + return shares_int; } /* pids_max @@ -331,8 +400,9 @@ char* CgroupV1Subsystem::pids_max_val() { */ jlong CgroupV1Subsystem::pids_max() { if (_pids == nullptr) return OSCONTAINER_ERROR; - char * pidsmax_str = pids_max_val(); - return limit_from_str(pidsmax_str); + jlong pids_max; + CONTAINER_READ_NUMBER_CHECKED_MAX(_pids, "/pids.max", "Maximum number of tasks", pids_max); + return pids_max; } /* pids_current @@ -345,7 +415,7 @@ jlong CgroupV1Subsystem::pids_max() { */ jlong CgroupV1Subsystem::pids_current() { if (_pids == nullptr) return OSCONTAINER_ERROR; - GET_CONTAINER_INFO(jlong, _pids, "/pids.current", - "Current number of tasks is: ", JLONG_FORMAT, JLONG_FORMAT, pids_current); - return pids_current; + julong pids_current; + CONTAINER_READ_NUMBER_CHECKED(_pids, "/pids.current", "Current number of tasks", pids_current); + return (jlong)pids_current; } diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp index e1c57a78fdb5..6677fdb0a03b 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp @@ -35,28 +35,62 @@ class CgroupV1Controller: public CgroupController { private: /* mountinfo contents */ - char *_root; - char *_mount_point; + char* _root; + char* _mount_point; + bool _read_only; /* Constructed subsystem directory */ - char *_path; + char* _path; public: - CgroupV1Controller(char *root, char *mountpoint) { - _root = os::strdup(root); - _mount_point = os::strdup(mountpoint); - _path = nullptr; + CgroupV1Controller(char *root, + char *mountpoint, + bool ro) : _root(os::strdup(root)), + _mount_point(os::strdup(mountpoint)), + _read_only(ro), + _path(nullptr) { + } + // Shallow copy constructor + CgroupV1Controller(const CgroupV1Controller& o) : _root(o._root), + _mount_point(o._mount_point), + _read_only(o._read_only), + _path(o._path) { + } + ~CgroupV1Controller() { + // At least one subsystem controller exists with paths to malloc'd path + // names } - virtual void set_subsystem_path(char *cgroup_path); - char *subsystem_path() { return _path; } + void set_subsystem_path(char *cgroup_path); + char *subsystem_path() override { return _path; } + bool is_read_only() { return _read_only; } }; -class CgroupV1MemoryController: public CgroupV1Controller { +class CgroupV1MemoryController final : public CgroupMemoryController { + private: + CgroupV1Controller _reader; + CgroupV1Controller* reader() { return &_reader; } public: bool is_hierarchical() { return _uses_mem_hierarchy; } void set_subsystem_path(char *cgroup_path); + jlong read_memory_limit_in_bytes(julong upper_bound) override; + jlong memory_usage_in_bytes() override; + jlong memory_and_swap_limit_in_bytes(julong host_mem, julong host_swap) override; + jlong memory_and_swap_usage_in_bytes(julong host_mem, julong host_swap) override; + jlong memory_soft_limit_in_bytes(julong upper_bound) override; + jlong memory_max_usage_in_bytes() override; + jlong rss_usage_in_bytes() override; + jlong cache_usage_in_bytes() override; + jlong kernel_memory_usage_in_bytes(); + jlong kernel_memory_limit_in_bytes(julong host_mem); + jlong kernel_memory_max_usage_in_bytes(); +#ifndef NATIVE_IMAGE + void print_version_specific_info(outputStream* st, julong host_mem) override; +#endif // !NATIVE_IMAGE + bool is_read_only() override { + return reader()->is_read_only(); + } private: /* Some container runtimes set limits via cgroup * hierarchy. If set to true consider also memory.stat @@ -64,25 +98,41 @@ class CgroupV1MemoryController: public CgroupV1Controller { bool _uses_mem_hierarchy; jlong uses_mem_hierarchy(); void set_hierarchical(bool value) { _uses_mem_hierarchy = value; } + jlong read_mem_swappiness(); + jlong read_mem_swap(julong host_total_memsw); public: - CgroupV1MemoryController(char *root, char *mountpoint) : CgroupV1Controller(root, mountpoint) { - _uses_mem_hierarchy = false; + CgroupV1MemoryController(const CgroupV1Controller& reader) + : _reader(reader), + _uses_mem_hierarchy(false) { } }; -class CgroupV1Subsystem: public CgroupSubsystem { +class CgroupV1CpuController final : public CgroupCpuController { + private: + CgroupV1Controller _reader; + CgroupV1Controller* reader() { return &_reader; } public: - jlong read_memory_limit_in_bytes(); - jlong memory_and_swap_limit_in_bytes(); - jlong memory_soft_limit_in_bytes(); - jlong memory_usage_in_bytes(); - jlong memory_max_usage_in_bytes(); - jlong rss_usage_in_bytes(); - jlong cache_usage_in_bytes(); + int cpu_quota() override; + int cpu_period() override; + int cpu_shares() override; + void set_subsystem_path(char *cgroup_path) { + reader()->set_subsystem_path(cgroup_path); + } + bool is_read_only() override { + return reader()->is_read_only(); + } + public: + CgroupV1CpuController(const CgroupV1Controller& reader) : _reader(reader) { + } +}; + +class CgroupV1Subsystem: public CgroupSubsystem { + + public: jlong kernel_memory_usage_in_bytes(); jlong kernel_memory_limit_in_bytes(); jlong kernel_memory_max_usage_in_bytes(); @@ -90,48 +140,35 @@ class CgroupV1Subsystem: public CgroupSubsystem { char * cpu_cpuset_cpus(); char * cpu_cpuset_memory_nodes(); - int cpu_quota(); - int cpu_period(); - - int cpu_shares(); - jlong pids_max(); jlong pids_current(); - -#ifndef NATIVE_IMAGE - void print_version_specific_info(outputStream* st); -#endif // !NATIVE_IMAGE + bool is_containerized(); const char * container_type() { return "cgroupv1"; } - CachingCgroupController * memory_controller() { return _memory; } - CachingCgroupController * cpu_controller() { return _cpu; } + CachingCgroupController* memory_controller() { return _memory; } + CachingCgroupController* cpu_controller() { return _cpu; } private: /* controllers */ - CachingCgroupController* _memory = nullptr; + CachingCgroupController* _memory = nullptr; CgroupV1Controller* _cpuset = nullptr; - CachingCgroupController* _cpu = nullptr; + CachingCgroupController* _cpu = nullptr; CgroupV1Controller* _cpuacct = nullptr; CgroupV1Controller* _pids = nullptr; - char * pids_max_val(); - - jlong read_mem_swappiness(); - jlong read_mem_swap(); - public: CgroupV1Subsystem(CgroupV1Controller* cpuset, - CgroupV1Controller* cpu, + CgroupV1CpuController* cpu, CgroupV1Controller* cpuacct, CgroupV1Controller* pids, - CgroupV1MemoryController* memory) { - _cpuset = cpuset; - _cpu = new CachingCgroupController(cpu); - _cpuacct = cpuacct; - _pids = pids; - _memory = new CachingCgroupController(memory); + CgroupV1MemoryController* memory) : + _memory(new CachingCgroupController(memory)), + _cpuset(cpuset), + _cpu(new CachingCgroupController(cpu)), + _cpuacct(cpuacct), + _pids(pids) { } }; diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp index 196d22fd7ac8..110e1c0fd5eb 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp @@ -25,6 +25,7 @@ */ #include "cgroupV2Subsystem_linux.hpp" +#include "cgroupUtil_linux.hpp" /* cpu_shares * @@ -36,11 +37,12 @@ * -1 for no share setup * OSCONTAINER_ERROR for not supported */ -int CgroupV2Subsystem::cpu_shares() { - GET_CONTAINER_INFO(int, _unified, "/cpu.weight", - "Raw value for CPU Shares is: ", "%d", "%d", shares); +int CgroupV2CpuController::cpu_shares() { + julong shares; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/cpu.weight", "Raw value for CPU Shares", shares); + int shares_int = (int)shares; // Convert default value of 100 to no shares setup - if (shares == 100) { + if (shares_int == 100) { log_debug(os, container)("CPU Shares is: %d", -1); return -1; } @@ -52,7 +54,7 @@ int CgroupV2Subsystem::cpu_shares() { // Use the inverse of (x == OCI value, y == cgroupsv2 value): // ((262142 * y - 1)/9999) + 2 = x // - int x = 262142 * shares - 1; + int x = 262142 * shares_int - 1; double frac = x/9999.0; x = ((int)frac) + 2; log_trace(os, container)("Scaled CPU shares value is: %d", x); @@ -84,34 +86,44 @@ int CgroupV2Subsystem::cpu_shares() { * -1 for no quota * OSCONTAINER_ERROR for not supported */ -int CgroupV2Subsystem::cpu_quota() { - char * cpu_quota_str = cpu_quota_val(); - int limit = (int)limit_from_str(cpu_quota_str); +int CgroupV2CpuController::cpu_quota() { + jlong quota_val; + bool is_ok = reader()->read_numerical_tuple_value("/cpu.max", true /* use_first */, "a_val); + if (!is_ok) { + return OSCONTAINER_ERROR; + } + int limit = (int)quota_val; log_trace(os, container)("CPU Quota is: %d", limit); return limit; } -char * CgroupV2Subsystem::cpu_cpuset_cpus() { - GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.cpus", - "cpuset.cpus is: %s", "%1023s", cpus, 1024); - return os::strdup(cpus); +bool CgroupV2Subsystem::is_containerized() { + return _unified.is_read_only() && + _memory->controller()->is_read_only() && + _cpu->controller()->is_read_only(); } -char* CgroupV2Subsystem::cpu_quota_val() { - GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpu.max", - "Raw value for CPU quota is: %s", "%1023s %*d", quota, 1024); - return os::strdup(quota); +char* CgroupV2Subsystem::cpu_cpuset_cpus() { + char cpus[1024]; + CONTAINER_READ_STRING_CHECKED(unified(), "/cpuset.cpus", "cpuset.cpus", cpus, 1024); + return os::strdup(cpus); } -char * CgroupV2Subsystem::cpu_cpuset_memory_nodes() { - GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.mems", - "cpuset.mems is: %s", "%1023s", mems, 1024); +char* CgroupV2Subsystem::cpu_cpuset_memory_nodes() { + char mems[1024]; + CONTAINER_READ_STRING_CHECKED(unified(), "/cpuset.mems", "cpuset.mems", mems, 1024); return os::strdup(mems); } -int CgroupV2Subsystem::cpu_period() { - GET_CONTAINER_INFO(int, _unified, "/cpu.max", - "CPU Period is: ", "%d", "%*s %d", period); +int CgroupV2CpuController::cpu_period() { + jlong period_val; + bool is_ok = reader()->read_numerical_tuple_value("/cpu.max", false /* use_first */, &period_val); + if (!is_ok) { + log_trace(os, container)("CPU Period failed: %d", OSCONTAINER_ERROR); + return OSCONTAINER_ERROR; + } + int period = (int)period_val; + log_trace(os, container)("CPU Period is: %d", period); return period; } @@ -124,39 +136,42 @@ int CgroupV2Subsystem::cpu_period() { * -1 for unlimited * OSCONTAINER_ERROR for not supported */ -jlong CgroupV2Subsystem::memory_usage_in_bytes() { - GET_CONTAINER_INFO(jlong, _unified, "/memory.current", - "Memory Usage is: ", JLONG_FORMAT, JLONG_FORMAT, memusage); - return memusage; +jlong CgroupV2MemoryController::memory_usage_in_bytes() { + julong memusage; + CONTAINER_READ_NUMBER_CHECKED(reader(), "/memory.current", "Memory Usage", memusage); + return (jlong)memusage; } -jlong CgroupV2Subsystem::memory_soft_limit_in_bytes() { - char* mem_soft_limit_str = mem_soft_limit_val(); - return limit_from_str(mem_soft_limit_str); +jlong CgroupV2MemoryController::memory_soft_limit_in_bytes(julong phys_mem) { + jlong mem_soft_limit; + CONTAINER_READ_NUMBER_CHECKED_MAX(reader(), "/memory.low", "Memory Soft Limit", mem_soft_limit); + return mem_soft_limit; } -jlong CgroupV2Subsystem::memory_max_usage_in_bytes() { +jlong CgroupV2MemoryController::memory_max_usage_in_bytes() { // Log this string at trace level so as to make tests happy. log_trace(os, container)("Maximum Memory Usage is not supported."); return OSCONTAINER_ERROR; // not supported } -jlong CgroupV2Subsystem::rss_usage_in_bytes() { - GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", - "anon", JULONG_FORMAT, JULONG_FORMAT, rss); - return rss; -} - -jlong CgroupV2Subsystem::cache_usage_in_bytes() { - GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", - "file", JULONG_FORMAT, JULONG_FORMAT, cache); - return cache; +jlong CgroupV2MemoryController::rss_usage_in_bytes() { + julong rss; + bool is_ok = reader()->read_numerical_key_value("/memory.stat", "anon", &rss); + if (!is_ok) { + return OSCONTAINER_ERROR; + } + log_trace(os, container)("RSS usage is: " JULONG_FORMAT, rss); + return (jlong)rss; } -char* CgroupV2Subsystem::mem_soft_limit_val() { - GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.low", - "Memory Soft Limit is: %s", "%1023s", mem_soft_limit_str, 1024); - return os::strdup(mem_soft_limit_str); +jlong CgroupV2MemoryController::cache_usage_in_bytes() { + julong cache; + bool is_ok = reader()->read_numerical_key_value("/memory.stat", "file", &cache); + if (!is_ok) { + return OSCONTAINER_ERROR; + } + log_trace(os, container)("Cache usage is: " JULONG_FORMAT, cache); + return (jlong)cache; } // Note that for cgroups v2 the actual limits set for swap and @@ -164,17 +179,19 @@ char* CgroupV2Subsystem::mem_soft_limit_val() { // respectively. In order to properly report a cgroup v1 like // compound value we need to sum the two values. Setting a swap limit // without also setting a memory limit is not allowed. -jlong CgroupV2Subsystem::memory_and_swap_limit_in_bytes() { - char* mem_swp_limit_str = mem_swp_limit_val(); - if (mem_swp_limit_str == nullptr) { +jlong CgroupV2MemoryController::memory_and_swap_limit_in_bytes(julong phys_mem, + julong host_swap /* unused in cg v2 */) { + jlong swap_limit; + bool is_ok = reader()->read_number_handle_max("/memory.swap.max", &swap_limit); + if (!is_ok) { // Some container tests rely on this trace logging to happen. - log_trace(os, container)("Memory and Swap Limit is: %d", OSCONTAINER_ERROR); + log_trace(os, container)("Swap Limit failed: %d", OSCONTAINER_ERROR); // swap disabled at kernel level, treat it as no swap - return read_memory_limit_in_bytes(); + return read_memory_limit_in_bytes(phys_mem); } - jlong swap_limit = limit_from_str(mem_swp_limit_str); + log_trace(os, container)("Swap Limit is: " JLONG_FORMAT, swap_limit); if (swap_limit >= 0) { - jlong memory_limit = read_memory_limit_in_bytes(); + jlong memory_limit = read_memory_limit_in_bytes(phys_mem); assert(memory_limit >= 0, "swap limit without memory limit?"); return memory_limit + swap_limit; } @@ -182,20 +199,31 @@ jlong CgroupV2Subsystem::memory_and_swap_limit_in_bytes() { return swap_limit; } -char* CgroupV2Subsystem::mem_swp_limit_val() { - GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.swap.max", - "Memory and Swap Limit is: %s", "%1023s", mem_swp_limit_str, 1024); - return os::strdup(mem_swp_limit_str); +// memory.swap.current : total amount of swap currently used by the cgroup and its descendants +static +jlong memory_swap_current_value(CgroupV2Controller* ctrl) { + julong swap_current; + CONTAINER_READ_NUMBER_CHECKED(ctrl, "/memory.swap.current", "Swap currently used", swap_current); + return (jlong)swap_current; } -// memory.swap.current : total amount of swap currently used by the cgroup and its descendants -char* CgroupV2Subsystem::mem_swp_current_val() { - GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.swap.current", - "Swap currently used is: %s", "%1023s", mem_swp_current_str, 1024); - return os::strdup(mem_swp_current_str); +jlong CgroupV2MemoryController::memory_and_swap_usage_in_bytes(julong host_mem, julong host_swap) { + jlong memory_usage = memory_usage_in_bytes(); + if (memory_usage >= 0) { + jlong swap_current = memory_swap_current_value(reader()); + return memory_usage + (swap_current >= 0 ? swap_current : 0); + } + return memory_usage; // not supported or unlimited case +} + +static +jlong memory_limit_value(CgroupV2Controller* ctrl) { + jlong memory_limit; + CONTAINER_READ_NUMBER_CHECKED_MAX(ctrl, "/memory.max", "Memory Limit", memory_limit); + return memory_limit; } -/* memory_limit_in_bytes +/* read_memory_limit_in_bytes * * Return the limit of available memory for this process. * @@ -203,9 +231,8 @@ char* CgroupV2Subsystem::mem_swp_current_val() { * memory limit in bytes or * -1 for unlimited, OSCONTAINER_ERROR for an error */ -jlong CgroupV2Subsystem::read_memory_limit_in_bytes() { - char * mem_limit_str = mem_limit_val(); - jlong limit = limit_from_str(mem_limit_str); +jlong CgroupV2MemoryController::read_memory_limit_in_bytes(julong phys_mem) { + jlong limit = memory_limit_value(reader()); if (log_is_enabled(Trace, os, container)) { if (limit == -1) { log_trace(os, container)("Memory Limit is: Unlimited"); @@ -213,22 +240,36 @@ jlong CgroupV2Subsystem::read_memory_limit_in_bytes() { log_trace(os, container)("Memory Limit is: " JLONG_FORMAT, limit); } } + if (log_is_enabled(Debug, os, container)) { + julong read_limit = (julong)limit; // avoid signed/unsigned compare + if (limit < 0 || read_limit >= phys_mem) { + const char* reason; + if (limit == -1) { + reason = "unlimited"; + } else if (limit == OSCONTAINER_ERROR) { + reason = "failed"; + } else { + assert(read_limit >= phys_mem, "Expected mem limit to exceed host memory"); + reason = "ignored"; + } + log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value " JLONG_FORMAT, + reason, limit, phys_mem); + } + } return limit; } -char* CgroupV2Subsystem::mem_limit_val() { - GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max", - "Raw value for memory limit is: %s", "%1023s", mem_limit_str, 1024); - return os::strdup(mem_limit_str); +static +jlong memory_swap_limit_value(CgroupV2Controller* ctrl) { + jlong swap_limit; + CONTAINER_READ_NUMBER_CHECKED_MAX(ctrl, "/memory.swap.max", "Swap Limit", swap_limit); + return swap_limit; } #ifndef NATIVE_IMAGE -void CgroupV2Subsystem::print_version_specific_info(outputStream* st) { - char* mem_swp_current_str = mem_swp_current_val(); - jlong swap_current = limit_from_str(mem_swp_current_str); - - char* mem_swp_limit_str = mem_swp_limit_val(); - jlong swap_limit = limit_from_str(mem_swp_limit_str); +void CgroupV2MemoryController::print_version_specific_info(outputStream* st, julong phys_mem) { + jlong swap_current = memory_swap_current_value(reader()); + jlong swap_limit = memory_swap_limit_value(reader()); OSContainer::print_container_helper(st, swap_current, "memory_swap_current_in_bytes"); OSContainer::print_container_helper(st, swap_limit, "memory_swap_max_limit_in_bytes"); @@ -244,12 +285,6 @@ char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) { return os::strdup(ss.base()); } -char* CgroupV2Subsystem::pids_max_val() { - GET_CONTAINER_INFO_CPTR(cptr, _unified, "/pids.max", - "Maximum number of tasks is: %s", "%1023s", pidsmax, 1024); - return os::strdup(pidsmax); -} - /* pids_max * * Return the maximum number of tasks available to the process @@ -260,8 +295,9 @@ char* CgroupV2Subsystem::pids_max_val() { * OSCONTAINER_ERROR for not supported */ jlong CgroupV2Subsystem::pids_max() { - char * pidsmax_str = pids_max_val(); - return limit_from_str(pidsmax_str); + jlong pids_max; + CONTAINER_READ_NUMBER_CHECKED_MAX(unified(), "/pids.max", "Maximum number of tasks", pids_max); + return pids_max; } /* pids_current @@ -273,7 +309,7 @@ jlong CgroupV2Subsystem::pids_max() { * OSCONTAINER_ERROR for not supported */ jlong CgroupV2Subsystem::pids_current() { - GET_CONTAINER_INFO(jlong, _unified, "/pids.current", - "Current number of tasks is: ", JLONG_FORMAT, JLONG_FORMAT, pids_current); + julong pids_current; + CONTAINER_READ_NUMBER_CHECKED(unified(), "/pids.current", "Current number of tasks", pids_current); return pids_current; } diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp index c25b740b3b63..f105cd201f94 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp @@ -35,68 +35,105 @@ class CgroupV2Controller: public CgroupController { char *_mount_path; /* The cgroup path for the controller */ char *_cgroup_path; + bool _read_only; /* Constructed full path to the subsystem directory */ char *_path; static char* construct_path(char* mount_path, char *cgroup_path); public: - CgroupV2Controller(char * mount_path, char *cgroup_path) { - _mount_path = mount_path; - _cgroup_path = os::strdup(cgroup_path); - _path = construct_path(mount_path, cgroup_path); + CgroupV2Controller(char* mount_path, + char *cgroup_path, + bool ro) : _mount_path(os::strdup(mount_path)), + _cgroup_path(os::strdup(cgroup_path)), + _read_only(ro), + _path(construct_path(mount_path, cgroup_path)) { + } + // Shallow copy constructor + CgroupV2Controller(const CgroupV2Controller& o) : + _mount_path(o._mount_path), + _cgroup_path(o._cgroup_path), + _read_only(o._read_only), + _path(o._path) { + } + ~CgroupV2Controller() { + // At least one controller exists with references to the paths } - char *subsystem_path() { return _path; } + char *subsystem_path() override { return _path; } + bool is_read_only() override { return _read_only; } +}; + +class CgroupV2CpuController: public CgroupCpuController { + private: + CgroupV2Controller _reader; + CgroupV2Controller* reader() { return &_reader; } + public: + CgroupV2CpuController(const CgroupV2Controller& reader) : _reader(reader) { + } + int cpu_quota() override; + int cpu_period() override; + int cpu_shares() override; + bool is_read_only() override { + return reader()->is_read_only(); + } +}; + +class CgroupV2MemoryController final: public CgroupMemoryController { + private: + CgroupV2Controller _reader; + CgroupV2Controller* reader() { return &_reader; } + public: + CgroupV2MemoryController(const CgroupV2Controller& reader) : _reader(reader) { + } + + jlong read_memory_limit_in_bytes(julong upper_bound) override; + jlong memory_and_swap_limit_in_bytes(julong host_mem, julong host_swp) override; + jlong memory_and_swap_usage_in_bytes(julong host_mem, julong host_swp) override; + jlong memory_soft_limit_in_bytes(julong upper_bound) override; + jlong memory_usage_in_bytes() override; + jlong memory_max_usage_in_bytes() override; + jlong rss_usage_in_bytes() override; + jlong cache_usage_in_bytes() override; +#ifndef NATIVE_IMAGE + void print_version_specific_info(outputStream* st, julong host_mem) override; +#endif // !NATIVE_IMAGE + bool is_read_only() override { + return reader()->is_read_only(); + } }; class CgroupV2Subsystem: public CgroupSubsystem { private: /* One unified controller */ - CgroupController* _unified = nullptr; + CgroupV2Controller _unified; /* Caching wrappers for cpu/memory metrics */ - CachingCgroupController* _memory = nullptr; - CachingCgroupController* _cpu = nullptr; + CachingCgroupController* _memory = nullptr; + CachingCgroupController* _cpu = nullptr; - char *mem_limit_val(); - char *mem_swp_limit_val(); - char *mem_swp_current_val(); - char *mem_soft_limit_val(); - char *cpu_quota_val(); - char *pids_max_val(); + CgroupV2Controller* unified() { return &_unified; } public: - CgroupV2Subsystem(CgroupController * unified) { - _unified = unified; - _memory = new CachingCgroupController(unified); - _cpu = new CachingCgroupController(unified); + CgroupV2Subsystem(CgroupV2MemoryController* memory, + CgroupV2CpuController* cpu, + CgroupV2Controller unified) : + _unified(unified), + _memory(new CachingCgroupController(memory)), + _cpu(new CachingCgroupController(cpu)) { } - jlong read_memory_limit_in_bytes(); - int cpu_quota(); - int cpu_period(); - int cpu_shares(); - jlong memory_and_swap_limit_in_bytes(); - jlong memory_soft_limit_in_bytes(); - jlong memory_usage_in_bytes(); - jlong memory_max_usage_in_bytes(); - jlong rss_usage_in_bytes(); - jlong cache_usage_in_bytes(); - - char * cpu_cpuset_cpus(); - char * cpu_cpuset_memory_nodes(); - jlong pids_max(); - jlong pids_current(); + char * cpu_cpuset_cpus() override; + char * cpu_cpuset_memory_nodes() override; + jlong pids_max() override; + jlong pids_current() override; -#ifndef NATIVE_IMAGE - void print_version_specific_info(outputStream* st); -#endif // !NATIVE_IMAGE + bool is_containerized() override; - const char * container_type() { + const char * container_type() override { return "cgroupv2"; } - CachingCgroupController * memory_controller() { return _memory; } - CachingCgroupController * cpu_controller() { return _cpu; } + CachingCgroupController* memory_controller() { return _memory; } + CachingCgroupController* cpu_controller() { return _cpu; } }; #endif // CGROUP_V2_SUBSYSTEM_LINUX_HPP diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/osContainer_linux.cpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/osContainer_linux.cpp index d3f1aca2356b..a4bce201ad17 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/osContainer_linux.cpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/osContainer_linux.cpp @@ -59,8 +59,43 @@ void OSContainer::init() { if (cgroup_subsystem == nullptr) { return; // Required subsystem files not found or other error } - - _is_containerized = true; + /* + * In order to avoid a false positive on is_containerized() on + * Linux systems outside a container *and* to ensure compatibility + * with in-container usage, we detemine is_containerized() by two + * steps: + * 1.) Determine if all the cgroup controllers are mounted read only. + * If yes, is_containerized() == true. Otherwise, do the fallback + * in 2.) + * 2.) Query for memory and cpu limits. If any limit is set, we set + * is_containerized() == true. + * + * Step 1.) covers the basic in container use-cases. Step 2.) ensures + * that limits enforced by other means (e.g. systemd slice) are properly + * detected. + */ + const char *reason; + bool any_mem_cpu_limit_present = false; + bool controllers_read_only = cgroup_subsystem->is_containerized(); + if (controllers_read_only) { + // in-container case + reason = " because all controllers are mounted read-only (container case)"; + } else { + // We can be in one of two cases: + // 1.) On a physical Linux system without any limit + // 2.) On a physical Linux system with a limit enforced by other means (like systemd slice) + any_mem_cpu_limit_present = cgroup_subsystem->memory_limit_in_bytes() > 0 || + os::Linux::active_processor_count() != cgroup_subsystem->active_processor_count(); + if (any_mem_cpu_limit_present) { + reason = " because either a cpu or a memory limit is present"; + } else { + reason = " because no cpu or memory limit is present"; + } + } + _is_containerized = controllers_read_only || any_mem_cpu_limit_present; + log_debug(os, container)("OSContainer::init: is_containerized() = %s%s", + _is_containerized ? "true" : "false", + reason); } const char * OSContainer::container_type() { @@ -78,6 +113,11 @@ jlong OSContainer::memory_and_swap_limit_in_bytes() { return cgroup_subsystem->memory_and_swap_limit_in_bytes(); } +jlong OSContainer::memory_and_swap_usage_in_bytes() { + assert(cgroup_subsystem != nullptr, "cgroup subsystem not available"); + return cgroup_subsystem->memory_and_swap_usage_in_bytes(); +} + jlong OSContainer::memory_soft_limit_in_bytes() { assert(cgroup_subsystem != nullptr, "cgroup subsystem not available"); return cgroup_subsystem->memory_soft_limit_in_bytes(); @@ -153,7 +193,7 @@ jlong OSContainer::pids_current() { #ifndef NATIVE_IMAGE void OSContainer::print_container_helper(outputStream* st, jlong j, const char* metrics) { st->print("%s: ", metrics); - if (j > 0) { + if (j >= 0) { if (j >= 1024) { st->print_cr(UINT64_FORMAT " k", uint64_t(j) / K); } else { diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/osContainer_linux.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/osContainer_linux.hpp index e8ab1bd2a28c..e6573eab54e6 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/osContainer_linux.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/osContainer_linux.hpp @@ -55,6 +55,7 @@ class OSContainer: AllStatic { static jlong memory_limit_in_bytes(); static jlong memory_and_swap_limit_in_bytes(); + static jlong memory_and_swap_usage_in_bytes(); static jlong memory_soft_limit_in_bytes(); static jlong memory_usage_in_bytes(); static jlong memory_max_usage_in_bytes(); diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/os_linux.cpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/os_linux.cpp index d6463aaa9cfa..8b8d458909e9 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/os_linux.cpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/os_linux.cpp @@ -27,7 +27,6 @@ #ifndef NATIVE_IMAGE // no precompiled headers #include "classfile/vmSymbols.hpp" -#include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "compiler/compileBroker.hpp" #include "compiler/disassembler.hpp" @@ -97,6 +96,8 @@ #endif // !NATIVE_IMAGE // put OS-includes here +# include +# include # include # include # include @@ -302,6 +303,60 @@ julong os::Linux::free_memory() { log_trace(os)("free memory: " JULONG_FORMAT, free_mem); return free_mem; } + +jlong os::total_swap_space() { + if (OSContainer::is_containerized()) { + if (OSContainer::memory_limit_in_bytes() > 0) { + return (jlong)(OSContainer::memory_and_swap_limit_in_bytes() - OSContainer::memory_limit_in_bytes()); + } + } + struct sysinfo si; + int ret = sysinfo(&si); + if (ret != 0) { + return -1; + } + return (jlong)(si.totalswap * si.mem_unit); +} + +static jlong host_free_swap() { + struct sysinfo si; + int ret = sysinfo(&si); + if (ret != 0) { + return -1; + } + return (jlong)(si.freeswap * si.mem_unit); +} + +jlong os::free_swap_space() { + // os::total_swap_space() might return the containerized limit which might be + // less than host_free_swap(). The upper bound of free swap needs to be the lower of the two. + jlong host_free_swap_val = MIN2(os::total_swap_space(), host_free_swap()); + assert(host_free_swap_val >= 0, "sysinfo failed?"); + if (OSContainer::is_containerized()) { + jlong mem_swap_limit = OSContainer::memory_and_swap_limit_in_bytes(); + jlong mem_limit = OSContainer::memory_limit_in_bytes(); + if (mem_swap_limit >= 0 && mem_limit >= 0) { + jlong delta_limit = mem_swap_limit - mem_limit; + if (delta_limit <= 0) { + return 0; + } + jlong mem_swap_usage = OSContainer::memory_and_swap_usage_in_bytes(); + jlong mem_usage = OSContainer::memory_usage_in_bytes(); + if (mem_swap_usage > 0 && mem_usage > 0) { + jlong delta_usage = mem_swap_usage - mem_usage; + if (delta_usage >= 0) { + jlong free_swap = delta_limit - delta_usage; + return free_swap >= 0 ? free_swap : 0; + } + } + } + // unlimited or not supported. Fall through to return host value + log_trace(os,container)("os::free_swap_space: container_swap_limit=" JLONG_FORMAT + " container_mem_limit=" JLONG_FORMAT " returning host value: " JLONG_FORMAT, + mem_swap_limit, mem_limit, host_free_swap_val); + } + return host_free_swap_val; +} #endif // !NATIVE_IMAGE julong os::physical_memory() { @@ -320,6 +375,15 @@ julong os::physical_memory() { } #ifndef NATIVE_IMAGE +size_t os::rss() { + size_t size = 0; + os::Linux::meminfo_t info; + if (os::Linux::query_process_memory_info(&info)) { + size = info.vmrss * K; + } + return size; +} + static uint64_t initial_total_ticks = 0; static uint64_t initial_steal_ticks = 0; static bool has_initial_tick_info = false; @@ -331,6 +395,22 @@ static void next_line(FILE *f) { } while (c != '\n' && c != EOF); } +void os::Linux::kernel_version(long* major, long* minor) { + *major = -1; + *minor = -1; + + struct utsname buffer; + int ret = uname(&buffer); + if (ret != 0) { + log_warning(os)("uname(2) failed to get kernel version: %s", os::errno_name(ret)); + return; + } + int nr_matched = sscanf(buffer.release, "%ld.%ld", major, minor); + if (nr_matched != 2) { + log_warning(os)("Parsing kernel version failed, expected 2 version numbers, only matched %d", nr_matched); + } +} + bool os::Linux::get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu) { FILE* fh; uint64_t userTicks, niceTicks, systemTicks, idleTicks; @@ -434,7 +514,7 @@ pid_t os::Linux::gettid() { julong os::Linux::host_swap() { struct sysinfo si; sysinfo(&si); - return (julong)si.totalswap; + return (julong)(si.totalswap * si.mem_unit); } #ifndef NATIVE_IMAGE @@ -587,17 +667,6 @@ void os::init_system_properties_values() { #undef EXTENSIONS_DIR } -//////////////////////////////////////////////////////////////////////////////// -// breakpoint support - -void os::breakpoint() { - BREAKPOINT; -} - -extern "C" void breakpoint() { - // use debugger to set breakpoint here -} - ////////////////////////////////////////////////////////////////////////////// // detecting pthread library @@ -1318,7 +1387,7 @@ void os::Linux::capture_initial_stack(size_t max_size) { i = 0; if (s) { // Skip blank chars - do { s++; } while (s && isspace(*s)); + do { s++; } while (s && isspace((unsigned char) *s)); #define _UFM UINTX_FORMAT #define _DFM INTX_FORMAT @@ -2240,6 +2309,8 @@ void os::Linux::print_proc_sys_info(outputStream* st) { "/proc/sys/kernel/threads-max", st); _print_ascii_file_h("/proc/sys/vm/max_map_count (maximum number of memory map areas a process may have)", "/proc/sys/vm/max_map_count", st); + _print_ascii_file_h("/proc/sys/vm/swappiness (control to define how aggressively the kernel swaps out anonymous memory)", + "/proc/sys/vm/swappiness", st); _print_ascii_file_h("/proc/sys/kernel/pid_max (system-wide limit on number of process identifiers)", "/proc/sys/kernel/pid_max", st); } @@ -2794,6 +2865,8 @@ void os::jvm_path(char *buf, jint buflen) { void linux_wrap_code(char* base, size_t size) { static volatile jint cnt = 0; + static_assert(sizeof(off_t) == 8, "Expected Large File Support in this file"); + if (!UseOprofile) { return; } @@ -2872,11 +2945,20 @@ int os::Linux::commit_memory_impl(char* addr, size_t size, bool exec) { numa_make_global(addr, size); } return 0; + } else { + ErrnoPreserver ep; + log_trace(os, map)("mmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(addr, size), + os::strerror(ep.saved_errno())); } int err = errno; // save errno from mmap() call above if (!recoverable_mmap_error(err)) { + ErrnoPreserver ep; + log_trace(os, map)("mmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(addr, size), + os::strerror(ep.saved_errno())); warn_fail_commit_memory(addr, size, exec, err); vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "committing reserved memory."); } @@ -2918,6 +3000,15 @@ void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec, #define MADV_HUGEPAGE 14 #endif +// Define MADV_POPULATE_WRITE here so we can build HotSpot on old systems. +#define MADV_POPULATE_WRITE_value 23 +#ifndef MADV_POPULATE_WRITE + #define MADV_POPULATE_WRITE MADV_POPULATE_WRITE_value +#else + // Sanity-check our assumed default value if we build with a new enough libc. + STATIC_ASSERT(MADV_POPULATE_WRITE == MADV_POPULATE_WRITE_value); +#endif + // Note that the value for MAP_FIXED_NOREPLACE differs between architectures, but all architectures // supported by OpenJDK share the same flag value. #define MAP_FIXED_NOREPLACE_value 0x100000 @@ -2925,7 +3016,7 @@ void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec, #define MAP_FIXED_NOREPLACE MAP_FIXED_NOREPLACE_value #else // Sanity-check our assumed default value if we build with a new enough libc. - static_assert(MAP_FIXED_NOREPLACE == MAP_FIXED_NOREPLACE_value, "MAP_FIXED_NOREPLACE != MAP_FIXED_NOREPLACE_value"); + STATIC_ASSERT(MAP_FIXED_NOREPLACE == MAP_FIXED_NOREPLACE_value); #endif int os::Linux::commit_memory_impl(char* addr, size_t size, @@ -2977,6 +3068,31 @@ void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) { } } +size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) { + const size_t len = pointer_delta(last, first, sizeof(char)) + page_size; + // Use madvise to pretouch on Linux when THP is used, and fallback to the + // common method if unsupported. THP can form right after madvise rather than + // being assembled later. + if (HugePages::thp_mode() == THPMode::always || UseTransparentHugePages) { + int err = 0; + if (UseMadvPopulateWrite && + ::madvise(first, len, MADV_POPULATE_WRITE) == -1) { + err = errno; + } + if (!UseMadvPopulateWrite || err == EINVAL) { // Not to use or not supported + // When using THP we need to always pre-touch using small pages as the + // OS will initially always use small pages. + return os::vm_page_size(); + } else if (err != 0) { + log_info(gc, os)("::madvise(" PTR_FORMAT ", " SIZE_FORMAT ", %d) failed; " + "error='%s' (errno=%d)", p2i(first), len, + MADV_POPULATE_WRITE, os::strerror(err), err); + } + return 0; + } + return page_size; +} + void os::numa_make_global(char *addr, size_t bytes) { Linux::numa_interleave_memory(addr, bytes); } @@ -3374,7 +3490,14 @@ struct bitmask* os::Linux::_numa_membind_bitmask; bool os::pd_uncommit_memory(char* addr, size_t size, bool exec) { uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE, MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0); - return res != (uintptr_t) MAP_FAILED; + if (res == (uintptr_t) MAP_FAILED) { + ErrnoPreserver ep; + log_trace(os, map)("mmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(addr, size), + os::strerror(ep.saved_errno())); + return false; + } + return true; } static address get_stack_commited_bottom(address bottom, size_t size) { @@ -3424,81 +3547,6 @@ static address get_stack_commited_bottom(address bottom, size_t size) { return nbot; } -bool os::committed_in_range(address start, size_t size, address& committed_start, size_t& committed_size) { - int mincore_return_value; - const size_t stripe = 1024; // query this many pages each time - unsigned char vec[stripe + 1]; - // set a guard - vec[stripe] = 'X'; - - const size_t page_sz = os::vm_page_size(); - uintx pages = size / page_sz; - - assert(is_aligned(start, page_sz), "Start address must be page aligned"); - assert(is_aligned(size, page_sz), "Size must be page aligned"); - - committed_start = nullptr; - - int loops = checked_cast((pages + stripe - 1) / stripe); - int committed_pages = 0; - address loop_base = start; - bool found_range = false; - - for (int index = 0; index < loops && !found_range; index ++) { - assert(pages > 0, "Nothing to do"); - uintx pages_to_query = (pages >= stripe) ? stripe : pages; - pages -= pages_to_query; - - // Get stable read - while ((mincore_return_value = mincore(loop_base, pages_to_query * page_sz, vec)) == -1 && errno == EAGAIN); - - // During shutdown, some memory goes away without properly notifying NMT, - // E.g. ConcurrentGCThread/WatcherThread can exit without deleting thread object. - // Bailout and return as not committed for now. - if (mincore_return_value == -1 && errno == ENOMEM) { - return false; - } - - // If mincore is not supported. - if (mincore_return_value == -1 && errno == ENOSYS) { - return false; - } - - assert(vec[stripe] == 'X', "overflow guard"); - assert(mincore_return_value == 0, "Range must be valid"); - // Process this stripe - for (uintx vecIdx = 0; vecIdx < pages_to_query; vecIdx ++) { - if ((vec[vecIdx] & 0x01) == 0) { // not committed - // End of current contiguous region - if (committed_start != nullptr) { - found_range = true; - break; - } - } else { // committed - // Start of region - if (committed_start == nullptr) { - committed_start = loop_base + page_sz * vecIdx; - } - committed_pages ++; - } - } - - loop_base += pages_to_query * page_sz; - } - - if (committed_start != nullptr) { - assert(committed_pages > 0, "Must have committed region"); - assert(committed_pages <= int(size / page_sz), "Can not commit more than it has"); - assert(committed_start >= start && committed_start < start + size, "Out of range"); - committed_size = page_sz * committed_pages; - return true; - } else { - assert(committed_pages == 0, "Should not have committed region"); - return false; - } -} - - // Linux uses a growable mapping for the stack, and if the mapping for // the stack guard pages is not removed when we detach a thread the // stack cannot grow beyond the pages where the stack guard was @@ -3593,8 +3641,14 @@ static char* anon_mmap(char* requested_addr, size_t bytes) { // touch an uncommitted page. Otherwise, the read/write might // succeed if we have enough swap space to back the physical page. char* addr = (char*)::mmap(requested_addr, bytes, PROT_NONE, flags, -1, 0); - - return addr == MAP_FAILED ? nullptr : addr; + if (addr == MAP_FAILED) { + ErrnoPreserver ep; + log_trace(os, map)("mmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(requested_addr, bytes), + os::strerror(ep.saved_errno())); + return nullptr; + } + return addr; } // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address @@ -3615,7 +3669,12 @@ static char* anon_mmap_aligned(char* req_addr, size_t bytes, size_t alignment) { if (start != nullptr) { if (req_addr != nullptr) { if (start != req_addr) { - ::munmap(start, extra_size); + if (::munmap(start, extra_size) != 0) { + ErrnoPreserver ep; + log_trace(os, map)("munmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(start, extra_size), + os::strerror(ep.saved_errno())); + } start = nullptr; } } else { @@ -3623,10 +3682,22 @@ static char* anon_mmap_aligned(char* req_addr, size_t bytes, size_t alignment) { char* const end_aligned = start_aligned + bytes; char* const end = start + extra_size; if (start_aligned > start) { - ::munmap(start, start_aligned - start); + const size_t l = start_aligned - start; + if (::munmap(start, l) != 0) { + ErrnoPreserver ep; + log_trace(os, map)("munmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(start, l), + os::strerror(ep.saved_errno())); + } } if (end_aligned < end) { - ::munmap(end_aligned, end - end_aligned); + const size_t l = end - end_aligned; + if (::munmap(end_aligned, l) != 0) { + ErrnoPreserver ep; + log_trace(os, map)("munmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(end_aligned, l), + os::strerror(ep.saved_errno())); + } } start = start_aligned; } @@ -3635,7 +3706,14 @@ static char* anon_mmap_aligned(char* req_addr, size_t bytes, size_t alignment) { } static int anon_munmap(char * addr, size_t size) { - return ::munmap(addr, size) == 0; + if (::munmap(addr, size) != 0) { + ErrnoPreserver ep; + log_trace(os, map)("munmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(addr, size), + os::strerror(ep.saved_errno())); + return 0; + } + return 1; } char* os::pd_reserve_memory(size_t bytes, bool exec) { @@ -3667,7 +3745,7 @@ static bool linux_mprotect(char* addr, size_t size, int prot) { #ifdef CAN_SHOW_REGISTERS_ON_ASSERT if (addr != g_assert_poison) #endif - Events::log(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(bottom), p2i(bottom+size), prot); + Events::log_memprotect(nullptr, "Protecting memory [" INTPTR_FORMAT "," INTPTR_FORMAT "] with protection modes %x", p2i(bottom), p2i(bottom+size), prot); return ::mprotect(bottom, size, prot) == 0; } @@ -3696,14 +3774,14 @@ bool os::unguard_memory(char* addr, size_t size) { } static int hugetlbfs_page_size_flag(size_t page_size) { - if (page_size != HugePages::default_static_hugepage_size()) { + if (page_size != HugePages::default_explicit_hugepage_size()) { return (exact_log2(page_size) << MAP_HUGE_SHIFT); } return 0; } static bool hugetlbfs_sanity_check(size_t page_size) { - const os::PageSizes page_sizes = HugePages::static_info().pagesizes(); + const os::PageSizes page_sizes = HugePages::explicit_hugepage_info().pagesizes(); assert(page_sizes.contains(page_size), "Invalid page sizes passed"); // Include the page size flag to ensure we sanity check the correct page size. @@ -3883,8 +3961,8 @@ void os::Linux::large_page_init() { return; } - // Check if the OS supports static hugepages. - if (!UseTransparentHugePages && !HugePages::supports_static_hugepages()) { + // Check if the OS supports explicit hugepages. + if (!UseTransparentHugePages && !HugePages::supports_explicit_hugepages()) { warn_no_large_pages_configured(); UseLargePages = false; return; @@ -3894,8 +3972,12 @@ void os::Linux::large_page_init() { // In THP mode: // - os::large_page_size() is the *THP page size* // - os::pagesizes() has two members, the THP page size and the system page size - assert(HugePages::thp_pagesize() > 0, "Missing OS info"); _large_page_size = HugePages::thp_pagesize(); + if (_large_page_size == 0) { + log_info(pagesize) ("Cannot determine THP page size (kernel < 4.10 ?)"); + _large_page_size = HugePages::thp_pagesize_fallback(); + log_info(pagesize) ("Assuming THP page size to be: " EXACTFMT " (heuristics)", EXACTFMTARGS(_large_page_size)); + } _page_sizes.add(_large_page_size); _page_sizes.add(os::vm_page_size()); // +UseTransparentHugePages implies +UseLargePages @@ -3903,12 +3985,12 @@ void os::Linux::large_page_init() { } else { - // In static hugepage mode: - // - os::large_page_size() is the default static hugepage size (/proc/meminfo "Hugepagesize") + // In explicit hugepage mode: + // - os::large_page_size() is the default explicit hugepage size (/proc/meminfo "Hugepagesize") // - os::pagesizes() contains all hugepage sizes the kernel supports, regardless whether there // are pages configured in the pool or not (from /sys/kernel/hugepages/hugepage-xxxx ...) - os::PageSizes all_large_pages = HugePages::static_info().pagesizes(); - const size_t default_large_page_size = HugePages::default_static_hugepage_size(); + os::PageSizes all_large_pages = HugePages::explicit_hugepage_info().pagesizes(); + const size_t default_large_page_size = HugePages::default_explicit_hugepage_size(); // 3) Consistency check and post-processing @@ -3992,7 +4074,7 @@ static bool commit_memory_special(size_t bytes, char* req_addr, bool exec) { assert(UseLargePages, "Should only get here for huge pages"); - assert(!UseTransparentHugePages, "Should only get here for static hugepage mode"); + assert(!UseTransparentHugePages, "Should only get here for explicit hugepage mode"); assert(is_aligned(bytes, page_size), "Unaligned size"); assert(is_aligned(req_addr, page_size), "Unaligned address"); assert(req_addr != nullptr, "Must have a requested address for special mappings"); @@ -4026,7 +4108,7 @@ static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t page_size, char* req_addr, bool exec) { - const os::PageSizes page_sizes = HugePages::static_info().pagesizes(); + const os::PageSizes page_sizes = HugePages::explicit_hugepage_info().pagesizes(); assert(UseLargePages, "only for Huge TLBFS large pages"); assert(is_aligned(req_addr, alignment), "Must be"); assert(is_aligned(req_addr, page_size), "Must be"); @@ -4065,7 +4147,12 @@ static char* reserve_memory_special_huge_tlbfs(size_t bytes, if (!large_committed) { // Failed to commit large pages, so we need to unmap the // reminder of the orinal reservation. - ::munmap(small_start, small_size); + if (::munmap(small_start, small_size) != 0) { + ErrnoPreserver ep; + log_trace(os, map)("munmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(small_start, small_size), + os::strerror(ep.saved_errno())); + } return nullptr; } @@ -4074,7 +4161,12 @@ static char* reserve_memory_special_huge_tlbfs(size_t bytes, if (!small_committed) { // Failed to commit the remaining size, need to unmap // the large pages part of the reservation. - ::munmap(aligned_start, large_bytes); + if (::munmap(aligned_start, large_bytes) != 0) { + ErrnoPreserver ep; + log_trace(os, map)("munmap failed: " RANGEFMT " errno=(%s)", + RANGEFMTARGS(aligned_start, large_bytes), + os::strerror(ep.saved_errno())); + } return nullptr; } return aligned_start; @@ -4105,7 +4197,7 @@ size_t os::large_page_size() { return _large_page_size; } -// static hugepages (hugetlbfs) allow application to commit large page memory +// explicit hugepages (hugetlbfs) allow application to commit large page memory // on demand. // However, when committing memory with hugepages fails, the region // that was supposed to be committed will lose the old reservation @@ -4147,7 +4239,10 @@ char* os::pd_attempt_reserve_memory_at(char* requested_addr, size_t bytes, bool if (addr != nullptr) { // mmap() is successful but it fails to reserve at the requested address - log_trace(os, map)("Kernel rejected " PTR_FORMAT ", offered " PTR_FORMAT ".", p2i(requested_addr), p2i(addr)); + log_trace(os, map)("Kernel rejected " PTR_FORMAT + ", offered " PTR_FORMAT ".", + p2i(requested_addr), + p2i(addr)); anon_munmap(addr, bytes); } @@ -4156,7 +4251,7 @@ char* os::pd_attempt_reserve_memory_at(char* requested_addr, size_t bytes, bool size_t os::vm_min_address() { // Determined by sysctl vm.mmap_min_addr. It exists as a safety zone to prevent - // NULL pointer dereferences. + // null pointer dereferences. // Most distros set this value to 64 KB. It *can* be zero, but rarely is. Here, // we impose a minimum value if vm.mmap_min_addr is too low, for increased protection. static size_t value = 0; @@ -4174,25 +4269,6 @@ size_t os::vm_min_address() { return value; } -// Used to convert frequent JVM_Yield() to nops -bool os::dont_yield() { - return DontYieldALot; -} - -// Linux CFS scheduler (since 2.6.23) does not guarantee sched_yield(2) will -// actually give up the CPU. Since skip buddy (v2.6.28): -// -// * Sets the yielding task as skip buddy for current CPU's run queue. -// * Picks next from run queue, if empty, picks a skip buddy (can be the yielding task). -// * Clears skip buddies for this run queue (yielding task no longer a skip buddy). -// -// An alternative is calling os::naked_short_nanosleep with a small number to avoid -// getting re-scheduled immediately. -// -void os::naked_yield() { - sched_yield(); -} - //////////////////////////////////////////////////////////////////////////////// // thread priority support @@ -4284,7 +4360,7 @@ jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) { // the number of bytes written to out_fd is returned if transfer was successful // otherwise, returns -1 that implies an error jlong os::Linux::sendfile(int out_fd, int in_fd, jlong* offset, jlong count) { - return ::sendfile64(out_fd, in_fd, (off64_t*)offset, (size_t)count); + return ::sendfile(out_fd, in_fd, (off_t*)offset, (size_t)count); } // Determine if the vmid is the parent pid for a child in a PID namespace. @@ -4399,6 +4475,9 @@ void os::init(void) { check_pax(); + // Check the availability of MADV_POPULATE_WRITE. + FLAG_SET_DEFAULT(UseMadvPopulateWrite, (::madvise(0, 0, MADV_POPULATE_WRITE) == 0)); + os::Posix::init(); } @@ -4964,14 +5043,14 @@ int os::open(const char *path, int oflag, int mode) { oflag |= O_CLOEXEC; #endif - int fd = ::open64(path, oflag, mode); + int fd = ::open(path, oflag, mode); if (fd == -1) return -1; //If the open succeeded, the file might still be a directory { - struct stat64 buf64; - int ret = ::fstat64(fd, &buf64); - int st_mode = buf64.st_mode; + struct stat buf; + int ret = ::fstat(fd, &buf); + int st_mode = buf.st_mode; if (ret != -1) { if ((st_mode & S_IFMT) == S_IFDIR) { @@ -5003,67 +5082,14 @@ int os::open(const char *path, int oflag, int mode) { return fd; } - -// create binary file, rewriting existing file if required -int os::create_binary_file(const char* path, bool rewrite_existing) { - int oflags = O_WRONLY | O_CREAT; - oflags |= rewrite_existing ? O_TRUNC : O_EXCL; - return ::open64(path, oflags, S_IREAD | S_IWRITE); -} - // return current position of file pointer jlong os::current_file_offset(int fd) { - return (jlong)::lseek64(fd, (off64_t)0, SEEK_CUR); + return (jlong)::lseek(fd, (off_t)0, SEEK_CUR); } // move file pointer to the specified offset jlong os::seek_to_file_offset(int fd, jlong offset) { - return (jlong)::lseek64(fd, (off64_t)offset, SEEK_SET); -} - -// Map a block of memory. -char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset, - char *addr, size_t bytes, bool read_only, - bool allow_exec) { - int prot; - int flags = MAP_PRIVATE; - - if (read_only) { - prot = PROT_READ; - } else { - prot = PROT_READ | PROT_WRITE; - } - - if (allow_exec) { - prot |= PROT_EXEC; - } - - if (addr != nullptr) { - flags |= MAP_FIXED; - } - - char* mapped_address = (char*)mmap(addr, (size_t)bytes, prot, flags, - fd, file_offset); - if (mapped_address == MAP_FAILED) { - return nullptr; - } - return mapped_address; -} - - -// Remap a block of memory. -char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset, - char *addr, size_t bytes, bool read_only, - bool allow_exec) { - // same as map_memory() on this OS - return os::map_memory(fd, file_name, file_offset, addr, bytes, read_only, - allow_exec); -} - - -// Unmap a block of memory. -bool os::pd_unmap_memory(char* addr, size_t bytes) { - return munmap(addr, bytes) == 0; + return (jlong)::lseek(fd, (off_t)offset, SEEK_SET); } static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time); @@ -5154,7 +5180,7 @@ static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) { if (s == nullptr) return -1; // Skip blank chars - do { s++; } while (s && isspace(*s)); + do { s++; } while (s && isspace((unsigned char) *s)); count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu", &cdummy, &idummy, &idummy, &idummy, &idummy, &idummy, diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/os_linux.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/os_linux.hpp index 038141b4a3f2..bfcd92a6233f 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/os_linux.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/linux/os_linux.hpp @@ -102,6 +102,8 @@ class os::Linux { bool has_steal_ticks; }; + static void kernel_version(long* major, long* minor); + // which_logical_cpu=-1 returns accumulated ticks for all cpus. static bool get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu); static bool _stack_is_executable; diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/posix/os_posix.cpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/posix/os_posix.cpp index dcc353604f1b..46c1b2330a11 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/posix/os_posix.cpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/posix/os_posix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -99,6 +99,9 @@ #define MAP_ANONYMOUS MAP_ANON #endif +/* Input/Output types for mincore(2) */ +typedef LINUX_ONLY(unsigned) char mincore_vec_t; + static jlong initial_time_count = 0; static int clock_tics_per_sec = 100; @@ -152,6 +155,94 @@ void os::check_dump_limit(char* buffer, size_t bufferSize) { VMError::record_coredump_status(buffer, success); } +bool os::committed_in_range(address start, size_t size, address& committed_start, size_t& committed_size) { + +#ifdef _AIX + committed_start = start; + committed_size = size; + return true; +#else + + int mincore_return_value; + constexpr size_t stripe = 1024; // query this many pages each time + mincore_vec_t vec [stripe + 1]; + + // set a guard + DEBUG_ONLY(vec[stripe] = 'X'); + + size_t page_sz = os::vm_page_size(); + uintx pages = size / page_sz; + + assert(is_aligned(start, page_sz), "Start address must be page aligned"); + assert(is_aligned(size, page_sz), "Size must be page aligned"); + + committed_start = nullptr; + + int loops = checked_cast((pages + stripe - 1) / stripe); + int committed_pages = 0; + address loop_base = start; + bool found_range = false; + + for (int index = 0; index < loops && !found_range; index ++) { + assert(pages > 0, "Nothing to do"); + uintx pages_to_query = (pages >= stripe) ? stripe : pages; + pages -= pages_to_query; + + // Get stable read + int fail_count = 0; + while ((mincore_return_value = mincore(loop_base, pages_to_query * page_sz, vec)) == -1 && errno == EAGAIN){ + if (++fail_count == 1000){ + return false; + } + } + + // During shutdown, some memory goes away without properly notifying NMT, + // E.g. ConcurrentGCThread/WatcherThread can exit without deleting thread object. + // Bailout and return as not committed for now. + if (mincore_return_value == -1 && errno == ENOMEM) { + return false; + } + + // If mincore is not supported. + if (mincore_return_value == -1 && errno == ENOSYS) { + return false; + } + + assert(vec[stripe] == 'X', "overflow guard"); + assert(mincore_return_value == 0, "Range must be valid"); + // Process this stripe + for (uintx vecIdx = 0; vecIdx < pages_to_query; vecIdx ++) { + if ((vec[vecIdx] & 0x01) == 0) { // not committed + // End of current contiguous region + if (committed_start != nullptr) { + found_range = true; + break; + } + } else { // committed + // Start of region + if (committed_start == nullptr) { + committed_start = loop_base + page_sz * vecIdx; + } + committed_pages ++; + } + } + + loop_base += pages_to_query * page_sz; + } + + if (committed_start != nullptr) { + assert(committed_pages > 0, "Must have committed region"); + assert(committed_pages <= int(size / page_sz), "Can not commit more than it has"); + assert(committed_start >= start && committed_start < start + size, "Out of range"); + committed_size = page_sz * committed_pages; + return true; + } else { + assert(committed_pages == 0, "Should not have committed region"); + return false; + } +#endif +} + int os::get_native_stack(address* stack, int frames, int toSkip) { int frame_idx = 0; int num_of_frames; // number of frames captured @@ -193,6 +284,17 @@ size_t os::lasterror(char *buf, size_t len) { return n; } +//////////////////////////////////////////////////////////////////////////////// +// breakpoint support + +void os::breakpoint() { + BREAKPOINT; +} + +extern "C" void breakpoint() { + // use debugger to set breakpoint here +} + // Return true if user is running as root. bool os::have_special_privileges() { static bool privileges = (getuid() != geteuid()) || (getgid() != getegid()); @@ -271,7 +373,7 @@ bool os::dir_is_empty(const char* path) { return result; } -static char* reserve_mmapped_memory(size_t bytes, char* requested_addr) { +static char* reserve_mmapped_memory(size_t bytes, char* requested_addr, MEMFLAGS flag) { char * addr; int flags = MAP_PRIVATE NOT_AIX( | MAP_NORESERVE ) | MAP_ANONYMOUS; if (requested_addr != nullptr) { @@ -286,13 +388,14 @@ static char* reserve_mmapped_memory(size_t bytes, char* requested_addr) { flags, -1, 0); if (addr != MAP_FAILED) { - MemTracker::record_virtual_memory_reserve((address)addr, bytes, CALLER_PC); + MemTracker::record_virtual_memory_reserve((address)addr, bytes, CALLER_PC, flag); return addr; } return nullptr; } static int util_posix_fallocate(int fd, off_t offset, off_t len) { + static_assert(sizeof(off_t) == 8, "Expected Large File Support in this file"); #ifdef __APPLE__ fstore_t store = { F_ALLOCATECONTIG, F_PEOFPOSMODE, 0, len }; // First we try to get a continuous chunk of disk space @@ -398,7 +501,7 @@ char* os::reserve_memory_aligned(size_t size, size_t alignment, bool exec) { return chop_extra_memory(size, alignment, extra_base, extra_size); } -char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_desc) { +char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_desc, MEMFLAGS flag) { size_t extra_size = calculate_aligned_extra_size(size, alignment); // For file mapping, we do not call os:map_memory_to_file(size,fd) since: // - we later chop away parts of the mapping using os::release_memory and that could fail if the @@ -406,7 +509,7 @@ char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_des // - The memory API os::reserve_memory uses is an implementation detail. It may (and usually is) // mmap but it also may System V shared memory which cannot be uncommitted as a whole, so // chopping off and unmapping excess bits back and front (see below) would not work. - char* extra_base = reserve_mmapped_memory(extra_size, nullptr); + char* extra_base = reserve_mmapped_memory(extra_size, nullptr, flag); if (extra_base == nullptr) { return nullptr; } @@ -420,17 +523,6 @@ char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_des } #endif // !NATIVE_IMAGE -int os::vsnprintf(char* buf, size_t len, const char* fmt, va_list args) { - // All supported POSIX platforms provide C99 semantics. - ALLOW_C_FUNCTION(::vsnprintf, int result = ::vsnprintf(buf, len, fmt, args);) - // If an encoding error occurred (result < 0) then it's not clear - // whether the buffer is NUL terminated, so ensure it is. - if ((result < 0) && (len > 0)) { - buf[len - 1] = '\0'; - } - return result; -} - #ifndef NATIVE_IMAGE int os::get_fileno(FILE* fp) { return NOT_AIX(::)fileno(fp); @@ -504,7 +596,7 @@ void os::Posix::print_rlimit_info(outputStream* st) { #if defined(AIX) st->print(", NPROC "); - st->print("%d", sysconf(_SC_CHILD_MAX)); + st->print("%ld", sysconf(_SC_CHILD_MAX)); print_rlimit(st, ", THREADS", RLIMIT_THREADS); #else @@ -617,9 +709,14 @@ void os::print_jni_name_suffix_on(outputStream* st, int args_size) { bool os::get_host_name(char* buf, size_t buflen) { struct utsname name; - uname(&name); - jio_snprintf(buf, buflen, "%s", name.nodename); - return true; + int retcode = uname(&name); + if (retcode != -1) { + jio_snprintf(buf, buflen, "%s", name.nodename); + return true; + } + const char* errmsg = os::strerror(errno); + log_warning(os)("Failed to get host name, error message: %s", errmsg); + return false; } #ifndef _LP64 @@ -718,7 +815,16 @@ void* os::get_default_process_handle() { } void* os::dll_lookup(void* handle, const char* name) { - return dlsym(handle, name); + ::dlerror(); // Clear any previous error + void* ret = ::dlsym(handle, name); + if (ret == nullptr) { + const char* tmp = ::dlerror(); + // It is possible that we found a NULL symbol, hence no error. + if (tmp != nullptr) { + log_debug(os)("Symbol %s not found in dll: %s", name, tmp); + } + } + return ret; } void os::dll_unload(void *lib) { @@ -760,11 +866,11 @@ void os::dll_unload(void *lib) { } jlong os::lseek(int fd, jlong offset, int whence) { - return (jlong) BSD_ONLY(::lseek) NOT_BSD(::lseek64)(fd, offset, whence); + return (jlong) ::lseek(fd, offset, whence); } int os::ftruncate(int fd, jlong length) { - return BSD_ONLY(::ftruncate) NOT_BSD(::ftruncate64)(fd, length); + return ::ftruncate(fd, length); } const char* os::get_current_directory(char *buf, size_t buflen) { @@ -836,6 +942,14 @@ void os::_exit(int num) { ALLOW_C_FUNCTION(::_exit, ::_exit(num);) } +bool os::dont_yield() { + return DontYieldALot; +} + +void os::naked_yield() { + sched_yield(); +} + // Builds a platform dependent Agent_OnLoad_ function name // which is used to find statically linked in agents. // Parameters: @@ -2039,4 +2153,53 @@ const char* os::file_separator() { return "/"; } const char* os::line_separator() { return "\n"; } const char* os::path_separator() { return ":"; } +// Map file into memory; uses mmap(). +// Notes: +// - if caller specifies addr, MAP_FIXED is used. That means existing +// mappings will be replaced. +// - The file descriptor must be valid (to create anonymous mappings, use +// os::reserve_memory()). +// Returns address to mapped memory, nullptr on error +char* os::pd_map_memory(int fd, const char* unused, + size_t file_offset, char *addr, size_t bytes, + bool read_only, bool allow_exec) { + + assert(fd != -1, "Specify a valid file descriptor"); + + int prot; + int flags = MAP_PRIVATE; + + if (read_only) { + prot = PROT_READ; + } else { + prot = PROT_READ | PROT_WRITE; + } + + if (allow_exec) { + prot |= PROT_EXEC; + } + + if (addr != nullptr) { + flags |= MAP_FIXED; + } + + char* mapped_address = (char*)mmap(addr, (size_t)bytes, prot, flags, + fd, file_offset); + if (mapped_address == MAP_FAILED) { + return nullptr; + } + + // If we did specify an address, and the mapping succeeded, it should + // have returned that address since we specify MAP_FIXED + assert(addr == nullptr || addr == mapped_address, + "mmap+MAP_FIXED returned " PTR_FORMAT ", expected " PTR_FORMAT, + p2i(mapped_address), p2i(addr)); + + return mapped_address; +} + +// Unmap a block of memory. Uses munmap. +bool os::pd_unmap_memory(char* addr, size_t bytes) { + return munmap(addr, bytes) == 0; +} #endif // !NATIVE_IMAGE diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/posix/os_posix.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/posix/os_posix.hpp index 25db0ebc7a3e..fa42acbd9246 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/posix/os_posix.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/os/posix/os_posix.hpp @@ -33,7 +33,7 @@ #ifndef NATIVE_IMAGE // Note: the Posix API aims to capture functionality available on all Posix // compliant platforms, but in practice the implementations may depend on -// non-Posix functionality. For example, the use of lseek64 and ftruncate64. +// non-Posix functionality. // This use of non-Posix API's is made possible by compiling/linking in a mode // that is not restricted to being fully Posix complaint, such as by declaring // -D_GNU_SOURCE. But be aware that in doing so we may enable non-Posix diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/memory/allocation.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/memory/allocation.hpp index 565ff0b6a1b2..021a3816c0ba 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/memory/allocation.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/memory/allocation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #define SHARE_MEMORY_ALLOCATION_HPP #include "memory/allStatic.hpp" +#include "nmt/memflags.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/macros.hpp" @@ -100,63 +101,6 @@ typedef AllocFailStrategy::AllocFailEnum AllocFailType; // void FreeHeap(void* p); // -#define MEMORY_TYPES_DO(f) \ - /* Memory type by sub systems. It occupies lower byte. */ \ - f(mtJavaHeap, "Java Heap") /* Java heap */ \ - f(mtClass, "Class") /* Java classes */ \ - f(mtThread, "Thread") /* thread objects */ \ - f(mtThreadStack, "Thread Stack") \ - f(mtCode, "Code") /* generated code */ \ - f(mtGC, "GC") \ - f(mtGCCardSet, "GCCardSet") /* G1 card set remembered set */ \ - f(mtCompiler, "Compiler") \ - f(mtJVMCI, "JVMCI") \ - f(mtInternal, "Internal") /* memory used by VM, but does not belong to */ \ - /* any of above categories, and not used by */ \ - /* NMT */ \ - f(mtOther, "Other") /* memory not used by VM */ \ - f(mtSymbol, "Symbol") \ - f(mtNMT, "Native Memory Tracking") /* memory used by NMT */ \ - f(mtClassShared, "Shared class space") /* class data sharing */ \ - f(mtChunk, "Arena Chunk") /* chunk that holds content of arenas */ \ - f(mtTest, "Test") /* Test type for verifying NMT */ \ - f(mtTracing, "Tracing") \ - f(mtLogging, "Logging") \ - f(mtStatistics, "Statistics") \ - f(mtArguments, "Arguments") \ - f(mtModule, "Module") \ - f(mtSafepoint, "Safepoint") \ - f(mtSynchronizer, "Synchronization") \ - f(mtServiceability, "Serviceability") \ - f(mtMetaspace, "Metaspace") \ - f(mtStringDedup, "String Deduplication") \ - f(mtObjectMonitor, "Object Monitors") \ - f(mtNone, "Unknown") \ - //end - -#define MEMORY_TYPE_DECLARE_ENUM(type, human_readable) \ - type, - -/* - * Memory types - */ -enum class MEMFLAGS : uint8_t { - MEMORY_TYPES_DO(MEMORY_TYPE_DECLARE_ENUM) - mt_number_of_types // number of memory types (mtDontTrack - // is not included as validate type) -}; -// Extra insurance that MEMFLAGS truly has the same size as uint8_t. -STATIC_ASSERT(sizeof(MEMFLAGS) == sizeof(uint8_t)); - -#define MEMORY_TYPE_SHORTNAME(type, human_readable) \ - constexpr MEMFLAGS type = MEMFLAGS::type; - -// Generate short aliases for the enum values. E.g. mtGC instead of MEMFLAGS::mtGC. -MEMORY_TYPES_DO(MEMORY_TYPE_SHORTNAME) - -// Make an int version of the sentinel end value. -constexpr int mt_number_of_types = static_cast(MEMFLAGS::mt_number_of_types); - extern bool NMT_track_callsite; class NativeCallStack; diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/nmt/memflags.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/nmt/memflags.hpp new file mode 100644 index 000000000000..dba4b0002d78 --- /dev/null +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/nmt/memflags.hpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef SHARE_NMT_MEMFLAGS_HPP +#define SHARE_NMT_MEMFLAGS_HPP + +#include "utilities/globalDefinitions.hpp" + +#define MEMORY_TYPES_DO(f) \ + /* Memory type by sub systems. It occupies lower byte. */ \ + f(mtJavaHeap, "Java Heap") /* Java heap */ \ + f(mtClass, "Class") /* Java classes */ \ + f(mtThread, "Thread") /* thread objects */ \ + f(mtThreadStack, "Thread Stack") \ + f(mtCode, "Code") /* generated code */ \ + f(mtGC, "GC") \ + f(mtGCCardSet, "GCCardSet") /* G1 card set remembered set */ \ + f(mtCompiler, "Compiler") \ + f(mtJVMCI, "JVMCI") \ + f(mtInternal, "Internal") /* memory used by VM, but does not belong to */ \ + /* any of above categories, and not used by */ \ + /* NMT */ \ + f(mtOther, "Other") /* memory not used by VM */ \ + f(mtSymbol, "Symbol") \ + f(mtNMT, "Native Memory Tracking") /* memory used by NMT */ \ + f(mtClassShared, "Shared class space") /* class data sharing */ \ + f(mtChunk, "Arena Chunk") /* chunk that holds content of arenas */ \ + f(mtTest, "Test") /* Test type for verifying NMT */ \ + f(mtTracing, "Tracing") \ + f(mtLogging, "Logging") \ + f(mtStatistics, "Statistics") \ + f(mtArguments, "Arguments") \ + f(mtModule, "Module") \ + f(mtSafepoint, "Safepoint") \ + f(mtSynchronizer, "Synchronization") \ + f(mtServiceability, "Serviceability") \ + f(mtMetaspace, "Metaspace") \ + f(mtStringDedup, "String Deduplication") \ + f(mtObjectMonitor, "Object Monitors") \ + f(mtNone, "Unknown") \ + //end + +#define MEMORY_TYPE_DECLARE_ENUM(type, human_readable) \ + type, + +enum class MEMFLAGS : uint8_t { + MEMORY_TYPES_DO(MEMORY_TYPE_DECLARE_ENUM) + mt_number_of_types // number of memory types (mtDontTrack + // is not included as validate type) +}; + +#define MEMORY_TYPE_SHORTNAME(type, human_readable) \ + constexpr MEMFLAGS type = MEMFLAGS::type; + +// Generate short aliases for the enum values. E.g. mtGC instead of MEMFLAGS::mtGC. +MEMORY_TYPES_DO(MEMORY_TYPE_SHORTNAME) + +// Make an int version of the sentinel end value. +constexpr int mt_number_of_types = static_cast(MEMFLAGS::mt_number_of_types); + +#endif // SHARE_NMT_MEMFLAGS_HPP diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/runtime/os.cpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/runtime/os.cpp index 3e9857f5b3f4..09c95a0c9b2e 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/runtime/os.cpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/runtime/os.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,6 @@ #include "classfile/vmClasses.hpp" #include "classfile/vmSymbols.hpp" #include "code/codeCache.hpp" -#include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "gc/shared/gcVMOperations.hpp" #include "interpreter/interpreter.hpp" @@ -84,8 +83,13 @@ #include "utilities/defaultStream.hpp" #include "utilities/events.hpp" #include "utilities/fastrand.hpp" +#include "utilities/macros.hpp" #include "utilities/powerOfTwo.hpp" +#ifdef LINUX +#include "osContainer_linux.hpp" +#endif + #ifndef _WINDOWS # include #endif @@ -122,6 +126,16 @@ int os::snprintf_checked(char* buf, size_t len, const char* fmt, ...) { return result; } +int os::vsnprintf(char* buf, size_t len, const char* fmt, va_list args) { + ALLOW_C_FUNCTION(::vsnprintf, int result = ::vsnprintf(buf, len, fmt, args);) + // If an encoding error occurred (result < 0) then it's not clear + // whether the buffer is NUL terminated, so ensure it is. + if ((result < 0) && (len > 0)) { + buf[len - 1] = '\0'; + } + return result; +} + #ifndef NATIVE_IMAGE // Fill in buffer with current local time as an ISO-8601 string. // E.g., YYYY-MM-DDThh:mm:ss.mmm+zzzz. @@ -274,13 +288,6 @@ bool os::dll_build_name(char* buffer, size_t size, const char* fname) { return (n != -1); } -#if !defined(LINUX) && !defined(_WINDOWS) -bool os::committed_in_range(address start, size_t size, address& committed_start, size_t& committed_size) { - committed_start = start; - committed_size = size; - return true; -} -#endif // Helper for dll_locate_lib. // Pass buffer and printbuffer as we already printed the path to buffer @@ -974,56 +981,73 @@ bool os::print_function_and_library_name(outputStream* st, return have_function_name || have_library_name; } -ATTRIBUTE_NO_ASAN static bool read_safely_from(intptr_t* p, intptr_t* result) { - const intptr_t errval = 0x1717; - intptr_t i = SafeFetchN(p, errval); +ATTRIBUTE_NO_ASAN static bool read_safely_from(const uintptr_t* p, uintptr_t* result) { + DEBUG_ONLY(*result = 0xAAAA;) + const uintptr_t errval = 0x1717; + uintptr_t i = (uintptr_t)SafeFetchN((intptr_t*)p, errval); if (i == errval) { - i = SafeFetchN(p, ~errval); + i = (uintptr_t)SafeFetchN((intptr_t*)p, ~errval); if (i == ~errval) { return false; } } - (*result) = i; + (*result) = (uintptr_t)i; return true; } -static void print_hex_location(outputStream* st, address p, int unitsize) { +// Helper for os::print_hex_dump +static void print_ascii_form(stringStream& ascii_form, uint64_t value, int unitsize) { + union { + uint64_t v; + uint8_t c[sizeof(v)]; + } u = { value }; + for (int i = 0; i < unitsize; i++) { + const int idx = LITTLE_ENDIAN_ONLY(i) BIG_ENDIAN_ONLY(sizeof(u.v) - 1 - i); + const uint8_t c = u.c[idx]; + ascii_form.put(isprint(c) && isascii(c) ? c : '.'); + } +} + +// Helper for os::print_hex_dump +static void print_hex_location(outputStream* st, const_address p, int unitsize, stringStream& ascii_form) { assert(is_aligned(p, unitsize), "Unaligned"); - address pa = align_down(p, sizeof(intptr_t)); + const uintptr_t* pa = (const uintptr_t*) align_down(p, sizeof(intptr_t)); #ifndef _LP64 // Special handling for printing qwords on 32-bit platforms if (unitsize == 8) { - intptr_t i1, i2; - if (read_safely_from((intptr_t*)pa, &i1) && - read_safely_from((intptr_t*)pa + 1, &i2)) { + uintptr_t i1 = 0, i2 = 0; + if (read_safely_from(pa, &i1) && + read_safely_from(pa + 1, &i2)) { const uint64_t value = LITTLE_ENDIAN_ONLY((((uint64_t)i2) << 32) | i1) BIG_ENDIAN_ONLY((((uint64_t)i1) << 32) | i2); st->print("%016" FORMAT64_MODIFIER "x", value); + print_ascii_form(ascii_form, value, unitsize); } else { st->print_raw("????????????????"); } return; } #endif // 32-bit, qwords - intptr_t i = 0; - if (read_safely_from((intptr_t*)pa, &i)) { + uintptr_t i = 0; + if (read_safely_from(pa, &i)) { // bytes: CA FE BA BE DE AD C0 DE // bytoff: 0 1 2 3 4 5 6 7 // LE bits: 0 8 16 24 32 40 48 56 // BE bits: 56 48 40 32 24 16 8 0 - const int offset = (int)(p - (address)pa); + const int offset = (int)(p - (const_address)pa); const int bitoffset = LITTLE_ENDIAN_ONLY(offset * BitsPerByte) BIG_ENDIAN_ONLY((int)((sizeof(intptr_t) - unitsize - offset) * BitsPerByte)); const int bitfieldsize = unitsize * BitsPerByte; - intptr_t value = bitfield(i, bitoffset, bitfieldsize); + uintptr_t value = bitfield(i, bitoffset, bitfieldsize); switch (unitsize) { case 1: st->print("%02x", (u1)value); break; case 2: st->print("%04x", (u2)value); break; case 4: st->print("%08x", (u4)value); break; case 8: st->print("%016" FORMAT64_MODIFIER "x", (u8)value); break; } + print_ascii_form(ascii_form, value, unitsize); } else { switch (unitsize) { case 1: st->print_raw("??"); break; @@ -1034,36 +1058,56 @@ static void print_hex_location(outputStream* st, address p, int unitsize) { } } -void os::print_hex_dump(outputStream* st, address start, address end, int unitsize, - int bytes_per_line, address logical_start) { +void os::print_hex_dump(outputStream* st, const_address start, const_address end, int unitsize, + bool print_ascii, int bytes_per_line, const_address logical_start) { + constexpr int max_bytes_per_line = 64; assert(unitsize == 1 || unitsize == 2 || unitsize == 4 || unitsize == 8, "just checking"); + assert(bytes_per_line > 0 && bytes_per_line <= max_bytes_per_line && + is_power_of_2(bytes_per_line), "invalid bytes_per_line"); start = align_down(start, unitsize); logical_start = align_down(logical_start, unitsize); bytes_per_line = align_up(bytes_per_line, 8); int cols = 0; - int cols_per_line = bytes_per_line / unitsize; + const int cols_per_line = bytes_per_line / unitsize; - address p = start; - address logical_p = logical_start; + const_address p = start; + const_address logical_p = logical_start; + + stringStream ascii_form; // Print out the addresses as if we were starting from logical_start. - st->print(PTR_FORMAT ": ", p2i(logical_p)); while (p < end) { - print_hex_location(st, p, unitsize); + if (cols == 0) { + st->print(PTR_FORMAT ": ", p2i(logical_p)); + } + print_hex_location(st, p, unitsize, ascii_form); p += unitsize; logical_p += unitsize; cols++; - if (cols >= cols_per_line && p < end) { - cols = 0; + if (cols >= cols_per_line) { + if (print_ascii && !ascii_form.is_empty()) { + st->print(" %s", ascii_form.base()); + } + ascii_form.reset(); st->cr(); - st->print(PTR_FORMAT ": ", p2i(logical_p)); + cols = 0; } else { st->print(" "); } } - st->cr(); + + if (cols > 0) { // did not print a full line + if (print_ascii) { + // indent last ascii part to match that of full lines + const int size_of_printed_unit = unitsize * 2; + const int space_left = (cols_per_line - cols) * (size_of_printed_unit + 1); + st->sp(space_left); + st->print(" %s", ascii_form.base()); + } + st->cr(); + } } void os::print_dhm(outputStream* st, const char* startStr, long sec) { @@ -1081,7 +1125,7 @@ void os::print_tos(outputStream* st, address sp) { void os::print_instructions(outputStream* st, address pc, int unitsize) { st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); - print_hex_dump(st, pc - 256, pc + 256, unitsize); + print_hex_dump(st, pc - 256, pc + 256, unitsize, /* print_ascii=*/false); } void os::print_environment_variables(outputStream* st, const char** env_list) { @@ -1243,6 +1287,8 @@ void os::print_location(outputStream* st, intptr_t x, bool verbose) { return; } +#if !INCLUDE_ASAN + bool accessible = is_readable_pointer(addr); // Check if addr is a JNI handle. @@ -1297,7 +1343,7 @@ void os::print_location(outputStream* st, intptr_t x, bool verbose) { #ifdef _LP64 if (UseCompressedClassPointers && ((uintptr_t)addr &~ (uintptr_t)max_juint) == 0) { narrowKlass narrow_klass = (narrowKlass)(uintptr_t)addr; - Klass* k = CompressedKlassPointers::decode_raw(narrow_klass); + Klass* k = CompressedKlassPointers::decode_without_asserts(narrow_klass); if (Klass::is_valid(k)) { st->print_cr(UINT32_FORMAT " is a compressed pointer to class: " INTPTR_FORMAT, narrow_klass, p2i((HeapWord*)k)); @@ -1329,10 +1375,13 @@ void os::print_location(outputStream* st, intptr_t x, bool verbose) { return; } +#endif // !INCLUDE_ASAN + st->print_cr(INTPTR_FORMAT " is an unknown value", p2i(addr)); + } -bool is_pointer_bad(intptr_t* ptr) { +static bool is_pointer_bad(intptr_t* ptr) { return !is_aligned(ptr, sizeof(uintptr_t)) || !os::is_readable_pointer(ptr); } @@ -1753,6 +1802,13 @@ const char* os::errno_name(int e) { } #ifndef NATIVE_IMAGE +// create binary file, rewriting existing file if required +int os::create_binary_file(const char* path, bool rewrite_existing) { + int oflags = O_WRONLY | O_CREAT WINDOWS_ONLY(| O_BINARY); + oflags |= rewrite_existing ? O_TRUNC : O_EXCL; + return ::open(path, oflags, S_IREAD | S_IWRITE); +} + #define trace_page_size_params(size) byte_size_in_exact_unit(size), exact_unit_for_byte_size(size) void os::trace_page_sizes(const char* str, @@ -1859,18 +1915,21 @@ char* os::reserve_memory(size_t bytes, bool executable, MEMFLAGS flags) { char* result = pd_reserve_memory(bytes, executable); if (result != nullptr) { MemTracker::record_virtual_memory_reserve(result, bytes, CALLER_PC, flags); + log_debug(os, map)("Reserved " RANGEFMT, RANGEFMTARGS(result, bytes)); + } else { + log_info(os, map)("Reserve failed (%zu bytes)", bytes); } return result; } -char* os::attempt_reserve_memory_at(char* addr, size_t bytes, bool executable) { +char* os::attempt_reserve_memory_at(char* addr, size_t bytes, bool executable, MEMFLAGS flag) { char* result = SimulateFullAddressSpace ? nullptr : pd_attempt_reserve_memory_at(addr, bytes, executable); if (result != nullptr) { - MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); - log_debug(os)("Reserved memory at " INTPTR_FORMAT " for " SIZE_FORMAT " bytes.", p2i(addr), bytes); + MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC, flag); + log_debug(os, map)("Reserved " RANGEFMT, RANGEFMTARGS(result, bytes)); } else { - log_debug(os)("Attempt to reserve memory at " INTPTR_FORMAT " for " - SIZE_FORMAT " bytes failed, errno %d", p2i(addr), bytes, get_last_error()); + log_info(os, map)("Attempt to reserve " RANGEFMT " failed", + RANGEFMTARGS(addr, bytes)); } return result; } @@ -1940,7 +1999,16 @@ char* os::attempt_reserve_memory_between(char* min, char* max, size_t bytes, siz char* const absolute_max = (char*)(NOT_LP64(G * 3) LP64_ONLY(G * 128 * 1024)); char* const absolute_min = (char*) os::vm_min_address(); - const size_t alignment_adjusted = MAX2(alignment, os::vm_allocation_granularity()); + // AIX is the only platform that uses System V shm for reserving virtual memory. + // In this case, the required alignment of the allocated size (64K) and the alignment + // of possible start points of the memory region (256M) differ. + // This is not reflected by os_allocation_granularity(). + // The logic here is dual to the one in pd_reserve_memory in os_aix.cpp + const size_t system_allocation_granularity = + AIX_ONLY(os::vm_page_size() == 4*K ? 4*K : 256*M) + NOT_AIX(os::vm_allocation_granularity()); + + const size_t alignment_adjusted = MAX2(alignment, system_allocation_granularity); // Calculate first and last possible attach points: char* const lo_att = align_up(MAX2(absolute_min, min), alignment_adjusted); @@ -1948,7 +2016,11 @@ char* os::attempt_reserve_memory_between(char* min, char* max, size_t bytes, siz return nullptr; // overflow } - char* const hi_att = align_down(MIN2(max, absolute_max) - bytes, alignment_adjusted); + char* const hi_end = MIN2(max, absolute_max); + if ((uintptr_t)hi_end < bytes) { + return nullptr; // no need to go on + } + char* const hi_att = align_down(hi_end - bytes, alignment_adjusted); if (hi_att > max) { return nullptr; // overflow } @@ -2073,11 +2145,27 @@ static void assert_nonempty_range(const char* addr, size_t bytes) { p2i(addr), p2i(addr) + bytes); } +julong os::used_memory() { +#ifdef LINUX + if (OSContainer::is_containerized()) { + jlong mem_usage = OSContainer::memory_usage_in_bytes(); + if (mem_usage > 0) { + return mem_usage; + } + } +#endif + return os::physical_memory() - os::available_memory(); +} + + bool os::commit_memory(char* addr, size_t bytes, bool executable) { assert_nonempty_range(addr, bytes); bool res = pd_commit_memory(addr, bytes, executable); if (res) { MemTracker::record_virtual_memory_commit((address)addr, bytes, CALLER_PC); + log_debug(os, map)("Committed " RANGEFMT, RANGEFMTARGS(addr, bytes)); + } else { + log_info(os, map)("Failed to commit " RANGEFMT, RANGEFMTARGS(addr, bytes)); } return res; } @@ -2088,6 +2176,9 @@ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint, bool res = os::pd_commit_memory(addr, size, alignment_hint, executable); if (res) { MemTracker::record_virtual_memory_commit((address)addr, size, CALLER_PC); + log_debug(os, map)("Committed " RANGEFMT, RANGEFMTARGS(addr, size)); + } else { + log_info(os, map)("Failed to commit " RANGEFMT, RANGEFMTARGS(addr, size)); } return res; } @@ -2110,14 +2201,21 @@ bool os::uncommit_memory(char* addr, size_t bytes, bool executable) { assert_nonempty_range(addr, bytes); bool res; if (MemTracker::enabled()) { - Tracker tkr(Tracker::uncommit); + ThreadCritical tc; res = pd_uncommit_memory(addr, bytes, executable); if (res) { - tkr.record((address)addr, bytes); + MemTracker::record_virtual_memory_uncommit((address)addr, bytes); } } else { res = pd_uncommit_memory(addr, bytes, executable); } + + if (res) { + log_debug(os, map)("Uncommitted " RANGEFMT, RANGEFMTARGS(addr, bytes)); + } else { + log_info(os, map)("Failed to uncommit " RANGEFMT, RANGEFMTARGS(addr, bytes)); + } + return res; } @@ -2125,17 +2223,18 @@ bool os::release_memory(char* addr, size_t bytes) { assert_nonempty_range(addr, bytes); bool res; if (MemTracker::enabled()) { - // Note: Tracker contains a ThreadCritical. - Tracker tkr(Tracker::release); + ThreadCritical tc; res = pd_release_memory(addr, bytes); if (res) { - tkr.record((address)addr, bytes); + MemTracker::record_virtual_memory_release((address)addr, bytes); } } else { res = pd_release_memory(addr, bytes); } if (!res) { - log_info(os)("os::release_memory failed (" PTR_FORMAT ", " SIZE_FORMAT ")", p2i(addr), bytes); + log_info(os, map)("Failed to release " RANGEFMT, RANGEFMTARGS(addr, bytes)); + } else { + log_debug(os, map)("Released " RANGEFMT, RANGEFMTARGS(addr, bytes)); } return res; } @@ -2161,33 +2260,37 @@ void os::pretouch_memory(void* start, void* end, size_t page_size) { // We're doing concurrent-safe touch and memory state has page // granularity, so we can touch anywhere in a page. Touch at the // beginning of each page to simplify iteration. - char* cur = static_cast(align_down(start, page_size)); + void* first = align_down(start, page_size); void* last = align_down(static_cast(end) - 1, page_size); - assert(cur <= last, "invariant"); - // Iterate from first page through last (inclusive), being careful to - // avoid overflow if the last page abuts the end of the address range. - for ( ; true; cur += page_size) { - Atomic::add(reinterpret_cast(cur), 0, memory_order_relaxed); - if (cur >= last) break; + assert(first <= last, "invariant"); + const size_t pd_page_size = pd_pretouch_memory(first, last, page_size); + if (pd_page_size > 0) { + // Iterate from first page through last (inclusive), being careful to + // avoid overflow if the last page abuts the end of the address range. + last = align_down(static_cast(end) - 1, pd_page_size); + for (char* cur = static_cast(first); /* break */; cur += pd_page_size) { + Atomic::add(reinterpret_cast(cur), 0, memory_order_relaxed); + if (cur >= last) break; + } } } } -char* os::map_memory_to_file(size_t bytes, int file_desc) { +char* os::map_memory_to_file(size_t bytes, int file_desc, MEMFLAGS flag) { // Could have called pd_reserve_memory() followed by replace_existing_mapping_with_file_mapping(), // but AIX may use SHM in which case its more trouble to detach the segment and remap memory to the file. // On all current implementations null is interpreted as any available address. char* result = os::map_memory_to_file(nullptr /* addr */, bytes, file_desc); if (result != nullptr) { - MemTracker::record_virtual_memory_reserve_and_commit(result, bytes, CALLER_PC); + MemTracker::record_virtual_memory_reserve_and_commit(result, bytes, CALLER_PC, flag); } return result; } -char* os::attempt_map_memory_to_file_at(char* addr, size_t bytes, int file_desc) { +char* os::attempt_map_memory_to_file_at(char* addr, size_t bytes, int file_desc, MEMFLAGS flag) { char* result = pd_attempt_map_memory_to_file_at(addr, bytes, file_desc); if (result != nullptr) { - MemTracker::record_virtual_memory_reserve_and_commit((address)result, bytes, CALLER_PC); + MemTracker::record_virtual_memory_reserve_and_commit((address)result, bytes, CALLER_PC, flag); } return result; } @@ -2202,20 +2305,13 @@ char* os::map_memory(int fd, const char* file_name, size_t file_offset, return result; } -char* os::remap_memory(int fd, const char* file_name, size_t file_offset, - char *addr, size_t bytes, bool read_only, - bool allow_exec) { - return pd_remap_memory(fd, file_name, file_offset, addr, bytes, - read_only, allow_exec); -} - bool os::unmap_memory(char *addr, size_t bytes) { bool result; if (MemTracker::enabled()) { - Tracker tkr(Tracker::release); + ThreadCritical tc; result = pd_unmap_memory(addr, bytes); if (result) { - tkr.record((address)addr, bytes); + MemTracker::record_virtual_memory_release((address)addr, bytes); } } else { result = pd_unmap_memory(addr, bytes); @@ -2240,6 +2336,9 @@ char* os::reserve_memory_special(size_t size, size_t alignment, size_t page_size if (result != nullptr) { // The memory is committed MemTracker::record_virtual_memory_reserve_and_commit((address)result, size, CALLER_PC); + log_debug(os, map)("Reserved and committed " RANGEFMT, RANGEFMTARGS(result, size)); + } else { + log_info(os, map)("Reserve and commit failed (%zu bytes)", size); } return result; @@ -2248,11 +2347,10 @@ char* os::reserve_memory_special(size_t size, size_t alignment, size_t page_size bool os::release_memory_special(char* addr, size_t bytes) { bool res; if (MemTracker::enabled()) { - // Note: Tracker contains a ThreadCritical. - Tracker tkr(Tracker::release); + ThreadCritical tc; res = pd_release_memory_special(addr, bytes); if (res) { - tkr.record((address)addr, bytes); + MemTracker::record_virtual_memory_release((address)addr, bytes); } } else { res = pd_release_memory_special(addr, bytes); diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/runtime/os.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/runtime/os.hpp index 3d3359844dcc..87e933d2ab8b 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/runtime/os.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/runtime/os.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -157,6 +157,19 @@ typedef void (*java_call_t)(JavaValue* value, const methodHandle& method, JavaCa class MallocTracker; #endif // !NATIVE_IMAGE +// Preserve errno across a range of calls + +class ErrnoPreserver { + int _e; + +public: + ErrnoPreserver() { _e = errno; } + + ~ErrnoPreserver() { errno = _e; } + + int saved_errno() { return _e; } +}; + class os: AllStatic { friend class VMStructs; friend class JVMCIVMStructs; @@ -197,7 +210,7 @@ class os: AllStatic { static PageSizes _page_sizes; // The default value for os::vm_min_address() unless the platform knows better. This value - // is chosen to give us reasonable protection against NULL pointer dereferences while being + // is chosen to give us reasonable protection against null pointer dereferences while being // low enough to leave most of the valuable low-4gb address space open. static constexpr size_t _vm_min_address_default = 16 * M; @@ -223,13 +236,14 @@ class os: AllStatic { static char* pd_map_memory(int fd, const char* file_name, size_t file_offset, char *addr, size_t bytes, bool read_only = false, bool allow_exec = false); - static char* pd_remap_memory(int fd, const char* file_name, size_t file_offset, - char *addr, size_t bytes, bool read_only, - bool allow_exec); static bool pd_unmap_memory(char *addr, size_t bytes); static void pd_free_memory(char *addr, size_t bytes, size_t alignment_hint); static void pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint); + // Returns 0 if pretouch is done via platform dependent method, or otherwise + // returns page_size that should be used for the common method. + static size_t pd_pretouch_memory(void* first, void* last, size_t page_size); + static char* pd_reserve_memory_special(size_t size, size_t alignment, size_t page_size, char* addr, bool executable); @@ -330,13 +344,18 @@ class os: AllStatic { // than "free" memory (`MemFree` in `/proc/meminfo`) because Linux can free memory // aggressively (e.g. clear caches) so that it becomes available. static julong available_memory(); + static julong used_memory(); static julong free_memory(); + + static jlong total_swap_space(); + static jlong free_swap_space(); #endif // !NATIVE_IMAGE static julong physical_memory(); #ifndef NATIVE_IMAGE static bool has_allocatable_memory_limit(size_t* limit); static bool is_server_class_machine(); + static size_t rss(); // Returns the id of the processor on which the calling thread is currently executing. // The returned value is guaranteed to be between 0 and (os::processor_count() - 1). @@ -450,7 +469,7 @@ class os: AllStatic { // Attempts to reserve the virtual memory at [addr, addr + bytes). // Does not overwrite existing mappings. - static char* attempt_reserve_memory_at(char* addr, size_t bytes, bool executable = false); + static char* attempt_reserve_memory_at(char* addr, size_t bytes, bool executable = false, MEMFLAGS flag = mtNone); // Given an address range [min, max), attempts to reserve memory within this area, with the given alignment. // If randomize is true, the location will be randomized. @@ -502,19 +521,16 @@ class os: AllStatic { static int create_file_for_heap(const char* dir); // Map memory to the file referred by fd. This function is slightly different from map_memory() // and is added to be used for implementation of -XX:AllocateHeapAt - static char* map_memory_to_file(size_t size, int fd); - static char* map_memory_to_file_aligned(size_t size, size_t alignment, int fd); + static char* map_memory_to_file(size_t size, int fd, MEMFLAGS flag = mtNone); + static char* map_memory_to_file_aligned(size_t size, size_t alignment, int fd, MEMFLAGS flag = mtNone); static char* map_memory_to_file(char* base, size_t size, int fd); - static char* attempt_map_memory_to_file_at(char* base, size_t size, int fd); + static char* attempt_map_memory_to_file_at(char* base, size_t size, int fd, MEMFLAGS flag = mtNone); // Replace existing reserved memory with file mapping static char* replace_existing_mapping_with_file_mapping(char* base, size_t size, int fd); static char* map_memory(int fd, const char* file_name, size_t file_offset, char *addr, size_t bytes, bool read_only = false, bool allow_exec = false, MEMFLAGS flags = mtNone); - static char* remap_memory(int fd, const char* file_name, size_t file_offset, - char *addr, size_t bytes, bool read_only, - bool allow_exec); static bool unmap_memory(char *addr, size_t bytes); static void free_memory(char *addr, size_t bytes, size_t alignment_hint); static void realign_memory(char *addr, size_t bytes, size_t alignment_hint); @@ -604,7 +620,7 @@ class os: AllStatic { // multiple calls to naked_short_sleep. Only for use by non-JavaThreads. static void naked_sleep(jlong millis); // Never returns, use with CAUTION - ATTRIBUTE_NORETURN static void infinite_sleep(); + [[noreturn]] static void infinite_sleep(); static void naked_yield () ; static OSReturn set_priority(Thread* thread, ThreadPriority priority); static OSReturn get_priority(const Thread* const thread, ThreadPriority& priority); @@ -628,26 +644,26 @@ class os: AllStatic { static int fork_and_exec(const char *cmd); // Call ::exit() on all platforms - ATTRIBUTE_NORETURN static void exit(int num); + [[noreturn]] static void exit(int num); // Call ::_exit() on all platforms. Similar semantics to die() except we never // want a core dump. - ATTRIBUTE_NORETURN static void _exit(int num); + [[noreturn]] static void _exit(int num); // Terminate the VM, but don't exit the process static void shutdown(); // Terminate with an error. Default is to generate a core file on platforms // that support such things. This calls shutdown() and then aborts. - ATTRIBUTE_NORETURN static void abort(bool dump_core, void *siginfo, const void *context); - ATTRIBUTE_NORETURN static void abort(bool dump_core = true); + [[noreturn]] static void abort(bool dump_core, void *siginfo, const void *context); + [[noreturn]] static void abort(bool dump_core = true); // Die immediately, no exit hook, no abort hook, no cleanup. // Dump a core file, if possible, for debugging. os::abort() is the // preferred means to abort the VM on error. os::die() should only // be called if something has gone badly wrong. CreateCoredumpOnCrash // is intentionally not honored by this function. - ATTRIBUTE_NORETURN static void die(); + [[noreturn]] static void die(); // File i/o operations static int open(const char *path, int oflag, int mode); @@ -772,8 +788,8 @@ class os: AllStatic { const char *syms[], size_t syms_len); #endif // !NATIVE_IMAGE - // Provide C99 compliant versions of these functions, since some versions - // of some platforms don't. + // Provide wrapper versions of these functions to guarantee NUL-termination + // in all cases. static int vsnprintf(char* buf, size_t len, const char* fmt, va_list args) ATTRIBUTE_PRINTF(3, 0); #ifndef NATIVE_IMAGE static int snprintf(char* buf, size_t len, const char* fmt, ...) ATTRIBUTE_PRINTF(3, 4); @@ -860,10 +876,10 @@ class os: AllStatic { // return current frame. pc() and sp() are set to null on failure. static frame current_frame(); - static void print_hex_dump(outputStream* st, address start, address end, int unitsize, - int bytes_per_line, address logical_start); - static void print_hex_dump(outputStream* st, address start, address end, int unitsize) { - print_hex_dump(st, start, end, unitsize, /*bytes_per_line=*/16, /*logical_start=*/start); + static void print_hex_dump(outputStream* st, const_address start, const_address end, int unitsize, bool print_ascii, + int bytes_per_line, const_address logical_start); + static void print_hex_dump(outputStream* st, const_address start, const_address end, int unitsize, bool print_ascii = true) { + print_hex_dump(st, start, end, unitsize, print_ascii, /*bytes_per_line=*/16, /*logical_start=*/start); } // returns a string to describe the exception/signal; diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/globalDefinitions.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/globalDefinitions.hpp index 88a516a4c496..ee96928fe02b 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/globalDefinitions.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/globalDefinitions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,9 +26,6 @@ #ifndef SHARE_UTILITIES_GLOBALDEFINITIONS_HPP #define SHARE_UTILITIES_GLOBALDEFINITIONS_HPP -#ifndef NATIVE_IMAGE -#include "utilities/attributeNoreturn.hpp" -#endif // !NATIVE_IMAGE #include "utilities/compilerWarnings.hpp" #include "utilities/debug.hpp" #include "utilities/macros.hpp" @@ -42,6 +39,7 @@ #include #include +#include #include #ifndef NATIVE_IMAGE @@ -159,6 +157,11 @@ class oopDesc; #define INTX_FORMAT_W(width) "%" #width PRIdPTR #define UINTX_FORMAT "%" PRIuPTR #define UINTX_FORMAT_X "0x%" PRIxPTR +#ifdef _LP64 +#define UINTX_FORMAT_X_0 "0x%016" PRIxPTR +#else +#define UINTX_FORMAT_X_0 "0x%08" PRIxPTR +#endif #define UINTX_FORMAT_W(width) "%" #width PRIuPTR // Format jlong, if necessary @@ -394,6 +397,9 @@ inline T byte_size_in_proper_unit(T s) { #define PROPERFMT SIZE_FORMAT "%s" #define PROPERFMTARGS(s) byte_size_in_proper_unit(s), proper_unit_for_byte_size(s) +#define RANGEFMT "[" PTR_FORMAT " - " PTR_FORMAT "), (" SIZE_FORMAT " bytes)" +#define RANGEFMTARGS(p1, size) p2i(p1), p2i(p1 + size), size + inline const char* exact_unit_for_byte_size(size_t s) { #ifdef _LP64 if (s >= G && (s % G) == 0) { @@ -462,6 +468,7 @@ typedef unsigned int uint; NEEDS_CLEANUP typedef signed char s_char; typedef unsigned char u_char; typedef u_char* address; +typedef const u_char* const_address; // Pointer subtraction. // The idea here is to avoid ptrdiff_t, which is signed and so doesn't have @@ -598,13 +605,16 @@ extern uint64_t OopEncodingHeapMax; // Machine dependent stuff +#include CPU_HEADER(globalDefinitions) + // The maximum size of the code cache. Can be overridden by targets. +#ifndef CODE_CACHE_SIZE_LIMIT #define CODE_CACHE_SIZE_LIMIT (2*G) +#endif + // Allow targets to reduce the default size of the code cache. #define CODE_CACHE_DEFAULT_LIMIT CODE_CACHE_SIZE_LIMIT -#include CPU_HEADER(globalDefinitions) - // To assure the IRIW property on processors that are not multiple copy // atomic, sync instructions must be issued between volatile reads to // assure their ordering, instead of after volatile stores. @@ -630,13 +640,6 @@ const bool support_IRIW_for_not_multiple_copy_atomic_cpu = PPC64_ONLY(true) NOT_ #endif -//---------------------------------------------------------------------------------------------------- -// Utility macros for compilers -// used to silence compiler warnings - -#define Unused_Variable(var) var - - //---------------------------------------------------------------------------------------------------- // Miscellaneous @@ -1042,19 +1045,19 @@ enum LockingMode { //---------------------------------------------------------------------------------------------------- // Special constants for debugging -const jint badInt = -3; // generic "bad int" value -const intptr_t badAddressVal = -2; // generic "bad address" value -const intptr_t badOopVal = -1; // generic "bad oop" value -const intptr_t badHeapOopVal = (intptr_t) CONST64(0x2BAD4B0BBAADBABE); // value used to zap heap after GC -const int badStackSegVal = 0xCA; // value used to zap stack segments -const int badHandleValue = 0xBC; // value used to zap vm handle area -const int badResourceValue = 0xAB; // value used to zap resource area -const int freeBlockPad = 0xBA; // value used to pad freed blocks. -const int uninitBlockPad = 0xF1; // value used to zap newly malloc'd blocks. -const juint uninitMetaWordVal= 0xf7f7f7f7; // value used to zap newly allocated metachunk -const juint badHeapWordVal = 0xBAADBABE; // value used to zap heap after GC -const juint badMetaWordVal = 0xBAADFADE; // value used to zap metadata heap after GC -const int badCodeHeapNewVal= 0xCC; // value used to zap Code heap at allocation +const jint badInt = -3; // generic "bad int" value +const intptr_t badAddressVal = -2; // generic "bad address" value +const intptr_t badOopVal = -1; // generic "bad oop" value +const intptr_t badHeapOopVal = (intptr_t) CONST64(0x2BAD4B0BBAADBABE); // value used to zap heap after GC +const int badStackSegVal = 0xCA; // value used to zap stack segments +const int badHandleValue = 0xBC; // value used to zap vm handle area +const int badResourceValue = 0xAB; // value used to zap resource area +const int freeBlockPad = 0xBA; // value used to pad freed blocks. +const int uninitBlockPad = 0xF1; // value used to zap newly malloc'd blocks. +const juint uninitMetaWordVal = 0xf7f7f7f7; // value used to zap newly allocated metachunk +const jubyte heapPaddingByteVal = 0xBD; // value used to zap object padding in the heap +const juint badHeapWordVal = 0xBAADBABE; // value used to zap heap after GC +const int badCodeHeapNewVal = 0xCC; // value used to zap Code heap at allocation const int badCodeHeapFreeVal = 0xDD; // value used to zap Code heap at deallocation const intptr_t badDispHeaderDeopt = 0xDE0BD000; // value to fill unused displaced header during deoptimization const intptr_t badDispHeaderOSR = 0xDEAD05A0; // value to fill unused displaced header during OSR @@ -1125,7 +1128,18 @@ template constexpr T MIN3(T a, T b, T c) { return MIN2(MIN2(a, b), template constexpr T MAX4(T a, T b, T c, T d) { return MAX2(MAX3(a, b, c), d); } template constexpr T MIN4(T a, T b, T c, T d) { return MIN2(MIN3(a, b, c), d); } -template inline T ABS(T x) { return (x > 0) ? x : -x; } +#define ABS(x) asserted_abs(x, __FILE__, __LINE__) + +template inline T asserted_abs(T x, const char* file, int line) { + bool valid_arg = !(std::is_integral::value && x == std::numeric_limits::min()); +#ifdef ASSERT + if (!valid_arg) { + report_vm_error(file, line, "ABS: argument should not allow overflow"); + } +#endif + // Prevent exposure to UB by checking valid_arg here as well. + return (x < 0 && valid_arg) ? -x : x; +} // Return the given value clamped to the range [min ... max] template diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/globalDefinitions_gcc.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/globalDefinitions_gcc.hpp index 9132b3523863..a67c12ba0d50 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/globalDefinitions_gcc.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/globalDefinitions_gcc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,12 +32,23 @@ // globally used constants & types, class (forward) // declarations and a few frequently used utility functions. +#include #include +#include #include #include #include +#include #include #include +// In stdlib.h on AIX malloc is defined as a macro causing +// compiler errors when resolving them in different depths as it +// happens in the log tags. This avoids the macro. +#if (defined(__VEC__) || defined(__AIXVEC)) && defined(AIX) \ + && defined(__open_xl_version__) && __open_xl_version__ >= 17 + #undef malloc + extern void *malloc(size_t) asm("vec_malloc"); +#endif #include #include @@ -49,8 +60,7 @@ #include #include -#if defined(LINUX) || defined(_ALLBSD_SOURCE) -#include +#if defined(LINUX) || defined(_ALLBSD_SOURCE) || defined(_AIX) #include #ifndef __OpenBSD__ #include @@ -80,32 +90,10 @@ #define NULL_WORD NULL #endif -#if !defined(LINUX) && !defined(_ALLBSD_SOURCE) -// Compiler-specific primitive types -typedef unsigned short uint16_t; -#ifndef _UINT32_T -#define _UINT32_T -typedef unsigned int uint32_t; -#endif // _UINT32_T - -#if !defined(_SYS_INT_TYPES_H) -#ifndef _UINT64_T -#define _UINT64_T -typedef unsigned long long uint64_t; -#endif // _UINT64_T -// %%%% how to access definition of intptr_t portably in 5.5 onward? -typedef int intptr_t; -typedef unsigned int uintptr_t; -// If this gets an error, figure out a symbol XXX that implies the -// prior definition of intptr_t, and add "&& !defined(XXX)" above. -#endif // _SYS_INT_TYPES_H - -#endif // !LINUX && !_ALLBSD_SOURCE - // checking for nanness #if defined(__APPLE__) inline int g_isnan(double f) { return isnan(f); } -#elif defined(LINUX) || defined(_ALLBSD_SOURCE) +#elif defined(LINUX) || defined(_ALLBSD_SOURCE) || defined(_AIX) inline int g_isnan(float f) { return isnan(f); } inline int g_isnan(double f) { return isnan(f); } #else diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/macros.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/macros.hpp index 9e027976938f..2c5de573f21a 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/macros.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/macros.hpp @@ -636,4 +636,10 @@ #define NOT_CDS_JAVA_HEAP_RETURN_(code) { return code; } #endif +#ifdef ADDRESS_SANITIZER +#define INCLUDE_ASAN 1 +#else +#define INCLUDE_ASAN 0 +#endif + #endif // SHARE_UTILITIES_MACROS_HPP diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/ostream.cpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/ostream.cpp index 7598cdccc705..d781a5e2f6e4 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/ostream.cpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/ostream.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,18 +50,11 @@ extern "C" void jio_print(const char* s, size_t len); extern "C" int jio_printf(const char *fmt, ...); -outputStream::outputStream() { - _position = 0; - _precount = 0; - _indentation = 0; - _scratch = nullptr; - _scratch_len = 0; -} - outputStream::outputStream(bool has_time_stamps) { _position = 0; _precount = 0; _indentation = 0; + _autoindent = false; _scratch = nullptr; _scratch_len = 0; if (has_time_stamps) _stamp.update(); @@ -87,45 +80,67 @@ bool outputStream::update_position(const char* s, size_t len) { } // Execute a vsprintf, using the given buffer if necessary. -// Return a pointer to the formatted string. +// Return a pointer to the formatted string. Optimise for +// strings without format specifiers, or only "%s". See +// comments in the header file for more details. const char* outputStream::do_vsnprintf(char* buffer, size_t buflen, const char* format, va_list ap, bool add_cr, size_t& result_len) { assert(buflen >= 2, "buffer too small"); - const char* result; - if (add_cr) buflen--; + const char* result; // The string to return. May not be the buffer. + size_t required_len = 0; // The length of buffer needed to avoid truncation + // (excluding space for the nul terminator). + + if (add_cr) { // Ensure space for CR even if truncation occurs. + buflen--; + } + if (!strchr(format, '%')) { // constant format string result = format; result_len = strlen(result); - if (add_cr && result_len >= buflen) result_len = buflen-1; // truncate - } else if (format[0] == '%' && format[1] == 's' && format[2] == '\0') { + if (add_cr && result_len >= buflen) { // truncate + required_len = result_len + 1; + result_len = buflen - 1; + } + } else if (strncmp(format, "%s", 3) == 0) { //(format[0] == '%' && format[1] == 's' && format[2] == '\0') { // trivial copy-through format string result = va_arg(ap, const char*); result_len = strlen(result); - if (add_cr && result_len >= buflen) result_len = buflen-1; // truncate + if (add_cr && result_len >= buflen) { // truncate + required_len = result_len + 1; + result_len = buflen - 1; + } } else { - int required_len = os::vsnprintf(buffer, buflen, format, ap); - assert(required_len >= 0, "vsnprintf encoding error"); + int required_buffer_len = os::vsnprintf(buffer, buflen, format, ap); + assert(required_buffer_len >= 0, "vsnprintf encoding error"); result = buffer; - if ((size_t)required_len < buflen) { + required_len = required_buffer_len; + if (required_len < buflen) { result_len = required_len; - } else { - DEBUG_ONLY(warning("outputStream::do_vsnprintf output truncated -- buffer length is %d bytes but %d bytes are needed.", - add_cr ? (int)buflen + 1 : (int)buflen, add_cr ? required_len + 2 : required_len + 1);) + } else { // truncation result_len = buflen - 1; } } if (add_cr) { - if (result != buffer) { + if (result != buffer) { // Need to copy to add CR memcpy(buffer, result, result_len); result = buffer; + } else { + required_len++; } buffer[result_len++] = '\n'; buffer[result_len] = 0; } +#ifdef ASSERT + if (required_len > result_len) { + warning("outputStream::do_vsnprintf output truncated -- buffer length is " SIZE_FORMAT + " bytes but " SIZE_FORMAT " bytes are needed.", + add_cr ? buflen + 1 : buflen, required_len + 1); + } +#endif return result; } @@ -143,6 +158,9 @@ void outputStream::do_vsnprintf_and_write_with_scratch_buffer(const char* format } void outputStream::do_vsnprintf_and_write(const char* format, va_list ap, bool add_cr) { + if (_autoindent && _position == 0) { + indent(); + } if (_scratch) { do_vsnprintf_and_write_with_scratch_buffer(format, ap, add_cr); } else { @@ -172,9 +190,21 @@ void outputStream::vprint_cr(const char* format, va_list argptr) { do_vsnprintf_and_write(format, argptr, true); } -void outputStream::fill_to(int col) { - int need_fill = col - position(); +#endif // !NATIVE_IMAGE +void outputStream::print_raw(const char* str, size_t len) { +#ifndef NATIVE_IMAGE + if (_autoindent && _position == 0) { + indent(); + } +#endif // !NATIVE_IMAGE + write(str, len); +} +#ifndef NATIVE_IMAGE + +int outputStream::fill_to(int col) { + const int need_fill = MAX2(col - position(), 0); sp(need_fill); + return need_fill; } void outputStream::move_to(int col, int slop, int min_space) { @@ -258,6 +288,12 @@ outputStream& outputStream::indent() { return *this; } +bool outputStream::set_autoindent(bool value) { + const bool old = _autoindent; + _autoindent = value; + return old; +} + void outputStream::print_jlong(jlong value) { print(JLONG_FORMAT, value); } @@ -409,7 +445,7 @@ char* stringStream::as_string(bool c_heap) const { char* copy = c_heap ? NEW_C_HEAP_ARRAY(char, _written + 1, mtInternal) : NEW_RESOURCE_ARRAY(char, _written + 1); ::memcpy(copy, _buffer, _written); - copy[_written] = 0; // terminating null + copy[_written] = '\0'; // terminating null if (c_heap) { // Need to ensure our content is written to memory before we return // the pointer to it. @@ -443,7 +479,7 @@ xmlStream* xtty; #define EXTRACHARLEN 32 #define CURRENTAPPX ".current" // convert YYYY-MM-DD HH:MM:SS to YYYY-MM-DD_HH-MM-SS -char* get_datetime_string(char *buf, size_t len) { +static char* get_datetime_string(char *buf, size_t len) { os::local_time_string(buf, len); int i = (int)strlen(buf); while (--i >= 0) { @@ -604,23 +640,10 @@ long fileStream::fileSize() { return size; } -char* fileStream::readln(char *data, int count ) { - char * ret = nullptr; - if (_file != nullptr) { - ret = ::fgets(data, count, _file); - // Get rid of annoying \n char only if it is present. - size_t len = ::strlen(data); - if (len > 0 && data[len - 1] == '\n') { - data[len - 1] = '\0'; - } - } - return ret; -} - fileStream::~fileStream() { if (_file != nullptr) { - if (_need_close) fclose(_file); - _file = nullptr; + close(); + _file = nullptr; } } @@ -981,7 +1004,7 @@ void ostream_exit() { ClassListWriter::delete_classlist(); // Make sure tty works after VM exit by assigning an always-on functioning fdStream. outputStream* tmp = tty; - tty = DisplayVMOutputToStderr ? fdStream::stdout_stream() : fdStream::stderr_stream(); + tty = DisplayVMOutputToStderr ? fdStream::stderr_stream() : fdStream::stdout_stream(); if (tmp != &tty_preinit_stream && tmp != defaultStream::instance) { delete tmp; } @@ -1033,7 +1056,7 @@ void bufferedStream::write(const char* s, size_t len) { const size_t reasonable_cap = MAX2(100 * M, buffer_max * 2); if (end > reasonable_cap) { // In debug VM, assert right away. - assert(false, "Exceeded max buffer size for this string."); + assert(false, "Exceeded max buffer size for this string (\"%.200s...\").", buffer); // Release VM: silently truncate. We do this since these kind of errors // are both difficult to predict with testing (depending on logging content) // and usually not serious enough to kill a production VM for it. diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/ostream.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/ostream.hpp index ce7ce45dc0ee..6ad829783430 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/ostream.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/hotspot/share/utilities/ostream.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,14 +46,16 @@ DEBUG_ONLY(class ResourceMark;) // we may use jio_printf: // jio_fprintf(defaultStream::output_stream(), "Message"); // This allows for redirection via -XX:+DisplayVMOutputToStdout and -// -XX:+DisplayVMOutputToStderr +// -XX:+DisplayVMOutputToStderr. + class outputStream : public CHeapObjBase { #ifndef NATIVE_IMAGE private: NONCOPYABLE(outputStream); + int _indentation; // current indentation + bool _autoindent; // if true, every line starts with indentation protected: - int _indentation; // current indentation int _position; // visual position on the current line uint64_t _precount; // number of chars output, less than _position TimeStamp _stamp; // for time stamps @@ -62,6 +64,23 @@ class outputStream : public CHeapObjBase { // Returns whether a newline was seen or not bool update_position(const char* s, size_t len); + + // Processes the given format string and the supplied arguments + // to produce a formatted string in the supplied buffer. Returns + // the formatted string (in the buffer). If the formatted string + // would be longer than the buffer, it is truncated. + // + // If the format string is a plain string (no format specifiers) + // or is exactly "%s" to print a supplied argument string, then + // the buffer is ignored, and we return the string directly. + // However, if `add_cr` is true then we have to copy the string + // into the buffer, which risks truncation if the string is too long. + // + // The `result_len` reference is always set to the length of the returned string. + // + // If add_cr is true then the cr will always be placed in the buffer (buffer minimum size is 2). + // + // In a debug build, if truncation occurs a VM warning is issued. static const char* do_vsnprintf(char* buffer, size_t buflen, const char* format, va_list ap, bool add_cr, @@ -77,9 +96,10 @@ class outputStream : public CHeapObjBase { public: #ifndef NATIVE_IMAGE + class TestSupport; // Unit test support + // creation - outputStream(); - outputStream(bool has_time_stamps); + outputStream(bool has_time_stamps = false); // indentation outputStream& indent(); @@ -89,9 +109,17 @@ class outputStream : public CHeapObjBase { void dec(int n) { _indentation -= n; }; int indentation() const { return _indentation; } void set_indentation(int i) { _indentation = i; } - void fill_to(int col); + int fill_to(int col); void move_to(int col, int slop = 6, int min_space = 2); + // Automatic indentation: + // If autoindent mode is on, the following APIs will automatically indent + // line starts depending on the current indentation level: + // print(), print_cr(), print_raw(), print_raw_cr() + // Other APIs are unaffected + // Returns old autoindent state. + bool set_autoindent(bool value); + // sizing int position() const { return _position; } julong count() const { return _precount + _position; } @@ -99,16 +127,20 @@ class outputStream : public CHeapObjBase { void set_position(int pos) { _position = pos; } // printing + // Note that (v)print_cr forces the use of internal buffering to allow + // appending of the "cr". This can lead to truncation if the buffer is + // too small. + void print(const char* format, ...) ATTRIBUTE_PRINTF(2, 3); void print_cr(const char* format, ...) ATTRIBUTE_PRINTF(2, 3); void vprint(const char *format, va_list argptr) ATTRIBUTE_PRINTF(2, 0); void vprint_cr(const char* format, va_list argptr) ATTRIBUTE_PRINTF(2, 0); #endif // !NATIVE_IMAGE - void print_raw(const char* str) { write(str, strlen(str)); } - void print_raw(const char* str, size_t len) { write(str, len); } + void print_raw(const char* str) { print_raw(str, strlen(str)); } + void print_raw(const char* str, size_t len); #ifndef NATIVE_IMAGE - void print_raw_cr(const char* str) { write(str, strlen(str)); cr(); } - void print_raw_cr(const char* str, size_t len){ write(str, len); cr(); } + void print_raw_cr(const char* str) { print_raw(str); cr(); } + void print_raw_cr(const char* str, size_t len) { print_raw(str, len); cr(); } void print_data(void* data, size_t len, bool with_ascii, bool rel_addr=true); void put(char ch); void sp(int count = 1); @@ -158,17 +190,26 @@ class outputStream : public CHeapObjBase { extern outputStream* tty; // tty output class streamIndentor : public StackObj { - private: - outputStream* _str; - int _amount; - - public: + outputStream* const _str; + const int _amount; + NONCOPYABLE(streamIndentor); +public: streamIndentor(outputStream* str, int amt = 2) : _str(str), _amount(amt) { _str->inc(_amount); } ~streamIndentor() { _str->dec(_amount); } }; +class StreamAutoIndentor : public StackObj { + outputStream* const _os; + const bool _old; + NONCOPYABLE(StreamAutoIndentor); + public: + StreamAutoIndentor(outputStream* os) : + _os(os), _old(os->set_autoindent(true)) {} + ~StreamAutoIndentor() { _os->set_autoindent(_old); } +}; + // advisory locking for the shared tty stream: class ttyLocker: StackObj { friend class ttyUnlocker; @@ -244,6 +285,7 @@ class stringStream : public outputStream { }; #ifndef NATIVE_IMAGE void reset(); + bool is_empty() const { return _buffer[0] == '\0'; } // Copy to a resource, or C-heap, array as requested char* as_string(bool c_heap = false) const; #endif // !NATIVE_IMAGE @@ -262,11 +304,20 @@ class fileStream : public outputStream { ~fileStream(); bool is_open() const { return _file != nullptr; } virtual void write(const char* c, size_t len); - size_t read(void *data, size_t size, size_t count) { return _file != nullptr ? ::fread(data, size, count, _file) : 0; } - char* readln(char *data, int count); - int eof() { return _file != nullptr ? feof(_file) : -1; } + // unlike other classes in this file, fileStream can perform input as well as output + size_t read(void* data, size_t size) { + if (_file == nullptr) return 0; + return ::fread(data, 1, size, _file); + } + size_t read(void *data, size_t size, size_t count) { + return read(data, size * count); + } + void close() { + if (_file == nullptr || !_need_close) return; + fclose(_file); + _need_close = false; + } long fileSize(); - void rewind() { if (_file != nullptr) ::rewind(_file); } void flush(); }; diff --git a/substratevm/src/com.oracle.svm.native.libcontainer/src/svm/share/utilities/debug.hpp b/substratevm/src/com.oracle.svm.native.libcontainer/src/svm/share/utilities/debug.hpp index 6073ac8a7864..8056a057a722 100644 --- a/substratevm/src/com.oracle.svm.native.libcontainer/src/svm/share/utilities/debug.hpp +++ b/substratevm/src/com.oracle.svm.native.libcontainer/src/svm/share/utilities/debug.hpp @@ -26,15 +26,14 @@ #ifndef SHARE_UTILITIES_DEBUG_HPP #define SHARE_UTILITIES_DEBUG_HPP -#include "utilities/attributeNoreturn.hpp" #include "utilities/compilerWarnings.hpp" #ifdef ASSERT // error reporting helper functions -ATTRIBUTE_NORETURN +[[noreturn]] void report_vm_error(const char* file, int line, const char* error_msg); -ATTRIBUTE_NORETURN +[[noreturn]] ATTRIBUTE_PRINTF(4, 5) void report_vm_error(const char* file, int line, const char* error_msg, const char* detail_fmt, ...); From f912c53dc712c93ef16fe21eef81b4d72dcd2904 Mon Sep 17 00:00:00 2001 From: Josef Eisl Date: Mon, 8 Jul 2024 13:33:42 +0200 Subject: [PATCH 6/6] svm: re-enable native JFR unittests in guard them with Container.isContainerized() [GR-55178] This reverts commit 57f96dcb317caa4c85a79f4b6b52e79dd77e623f. --- .../src/com/oracle/svm/test/jfr/TestContainerEvent.java | 2 +- .../src/com/oracle/svm/test/jfr/TestJdkContainerEvents.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/substratevm/src/com.oracle.svm.test/src/com/oracle/svm/test/jfr/TestContainerEvent.java b/substratevm/src/com.oracle.svm.test/src/com/oracle/svm/test/jfr/TestContainerEvent.java index 57a18c6b1192..dc43e13bd8ad 100644 --- a/substratevm/src/com.oracle.svm.test/src/com/oracle/svm/test/jfr/TestContainerEvent.java +++ b/substratevm/src/com.oracle.svm.test/src/com/oracle/svm/test/jfr/TestContainerEvent.java @@ -47,7 +47,7 @@ public class TestContainerEvent extends JfrRecordingTest { @Test public void test() throws Throwable { Assume.assumeTrue("Container support not enabled or available", Container.isSupported()); - Assume.assumeTrue("Container detection currently not working GR-55178", false); + Assume.assumeTrue("Test assumes running containerized", Container.singleton().isContainerized()); String[] events = new String[]{"jdk.ContainerConfiguration"}; Recording recording = startRecording(events); diff --git a/substratevm/src/com.oracle.svm.test/src/com/oracle/svm/test/jfr/TestJdkContainerEvents.java b/substratevm/src/com.oracle.svm.test/src/com/oracle/svm/test/jfr/TestJdkContainerEvents.java index 26391c30f53b..a87a35ec0a51 100644 --- a/substratevm/src/com.oracle.svm.test/src/com/oracle/svm/test/jfr/TestJdkContainerEvents.java +++ b/substratevm/src/com.oracle.svm.test/src/com/oracle/svm/test/jfr/TestJdkContainerEvents.java @@ -45,7 +45,7 @@ public class TestJdkContainerEvents extends JfrRecordingTest { @Test public void test() throws Throwable { Assume.assumeTrue("Container support not enabled or available", Container.isSupported()); - Assume.assumeTrue("Container detection currently not working GR-55178", false); + Assume.assumeTrue("Test assumes running containerized", Container.singleton().isContainerized()); String[] events = new String[]{"jdk.ContainerCPUThrottling", "jdk.ContainerCPUUsage", "jdk.ContainerConfiguration", "jdk.ContainerIOUsage", "jdk.ContainerMemoryUsage"};