diff --git a/conftest.py b/conftest.py index 31c19ccbf6..9070c7172c 100644 --- a/conftest.py +++ b/conftest.py @@ -58,16 +58,15 @@ from smartsim.entity import Application from smartsim.error import SSConfigError, SSInternalError from smartsim.log import get_logger -from smartsim.settings import ( - AprunSettings, - DragonRunSettings, - JsrunSettings, - MpiexecSettings, - MpirunSettings, - PalsMpiexecSettings, - RunSettings, - SrunSettings, -) +# Mock imports +class AprunSettings: pass +class DragonRunSettings: pass +class JsrunSettings: pass +class MpiexecSettings: pass +class MpirunSettings: pass +class PalsMpiexecSettings: pass +class RunSettings: pass +class SrunSettings: pass logger = get_logger(__name__) diff --git a/smartsim/_core/commands/__init__.py b/smartsim/_core/commands/__init__.py new file mode 100644 index 0000000000..72ef1f674a --- /dev/null +++ b/smartsim/_core/commands/__init__.py @@ -0,0 +1,29 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .command import Command +from .commandList import CommandList +from .launchCommands import LaunchCommands diff --git a/smartsim/_core/commands/command.py b/smartsim/_core/commands/command.py new file mode 100644 index 0000000000..d89aa41ad9 --- /dev/null +++ b/smartsim/_core/commands/command.py @@ -0,0 +1,78 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import typing as t +from collections.abc import MutableSequence + +from ...settings.launchCommand import LauncherType + + +class Command(MutableSequence[str]): + """Basic container for command information""" + + def __init__(self, launcher: LauncherType, command: t.List[str]) -> None: + """Command constructor""" + self._launcher = launcher + self._command = command + + @property + def launcher(self) -> LauncherType: + """Get the launcher type. + Return a reference to the LauncherType. + """ + return self._launcher + + @property + def command(self) -> t.List[str]: + """Get the command list. + Return a reference to the command list. + """ + return self._command + + def __getitem__(self, idx: int) -> str: + """Get the command at the specified index.""" + return self._command[idx] + + def __setitem__(self, idx: int, value: str) -> None: + """Set the command at the specified index.""" + self._command[idx] = value + + def __delitem__(self, idx: int) -> None: + """Delete the command at the specified index.""" + del self._command[idx] + + def __len__(self) -> int: + """Get the length of the command list.""" + return len(self._command) + + def insert(self, idx: int, value: str) -> None: + """Insert a command at the specified index.""" + self._command.insert(idx, value) + + def __str__(self) -> str: # pragma: no cover + string = f"\nLauncher: {self.launcher.value}\n" + string += f"Command: {' '.join(str(cmd) for cmd in self.command)}" + return string diff --git a/smartsim/_core/commands/commandList.py b/smartsim/_core/commands/commandList.py new file mode 100644 index 0000000000..08b95bbfd1 --- /dev/null +++ b/smartsim/_core/commands/commandList.py @@ -0,0 +1,74 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import typing as t +from collections.abc import MutableSequence + +from .command import Command + + +class CommandList(MutableSequence[Command]): + """Container for a Sequence of Command objects""" + + def __init__(self, commands: t.Union[Command, t.List[Command]]): + """CommandList constructor""" + if isinstance(commands, Command): + commands = [commands] + self._commands: t.List[Command] = list(commands) + + @property + def commands(self) -> t.List[Command]: + """Get the Command list. + Return a reference to the Command list. + """ + return self._commands + + def __getitem__(self, idx: int) -> Command: + """Get the Command at the specified index.""" + return self._commands[idx] + + def __setitem__(self, idx: int, value: Command) -> None: + """Set the Command at the specified index.""" + self._commands[idx] = value + + def __delitem__(self, idx: int) -> None: + """Delete the Command at the specified index.""" + del self._commands[idx] + + def __len__(self) -> int: + """Get the length of the Command list.""" + return len(self._commands) + + def insert(self, idx: int, value: Command) -> None: + """Insert a Command at the specified index.""" + self._commands.insert(idx, value) + + def __str__(self) -> str: # pragma: no cover + string = "\n\nCommand List:\n\n" + for counter, cmd in enumerate(self.commands): + string += f"CommandList index {counter} value:" + string += f"{cmd}\n\n" + return string diff --git a/smartsim/_core/commands/launchCommands.py b/smartsim/_core/commands/launchCommands.py new file mode 100644 index 0000000000..c62186671b --- /dev/null +++ b/smartsim/_core/commands/launchCommands.py @@ -0,0 +1,51 @@ +from .commandList import CommandList + + +class LaunchCommands: + """Container for aggregating prelaunch commands (e.g. file + system operations), launch commands, and postlaunch commands + """ + + def __init__( + self, + prelaunch_commands: CommandList, + launch_commands: CommandList, + postlaunch_commands: CommandList, + ) -> None: + """LaunchCommand constructor""" + self._prelaunch_commands = prelaunch_commands + self._launch_commands = launch_commands + self._postlaunch_commands = postlaunch_commands + + @property + def prelaunch_command(self) -> CommandList: + """Get the prelaunch command list. + Return a reference to the command list. + """ + return self._prelaunch_commands + + @property + def launch_command(self) -> CommandList: + """Get the launch command list. + Return a reference to the command list. + """ + return self._launch_commands + + @property + def postlaunch_command(self) -> CommandList: + """Get the postlaunch command list. + Return a reference to the command list. + """ + return self._postlaunch_commands + + def __str__(self) -> str: # pragma: no cover + string = "\n\nPrelaunch Command List:\n" + for pre_cmd in self.prelaunch_command: + string += f"{pre_cmd}\n" + string += "\n\nLaunch Command List:\n" + for launch_cmd in self.launch_command: + string += f"{launch_cmd}\n" + string += "\n\nPostlaunch Command List:\n" + for post_cmd in self.postlaunch_command: + string += f"{post_cmd}\n" + return string diff --git a/smartsim/_core/launcher/step/localStep.py b/smartsim/_core/launcher/step/localStep.py index 7d02ca70f4..49666a2059 100644 --- a/smartsim/_core/launcher/step/localStep.py +++ b/smartsim/_core/launcher/step/localStep.py @@ -29,8 +29,7 @@ import typing as t from ....entity import Application, FSNode -from ....settings import Singularity -from ....settings.base import RunSettings +from ....settings import RunSettings, Singularity from .step import Step, proxyable_launch_cmd diff --git a/smartsim/_core/launcher/step/lsfStep.py b/smartsim/_core/launcher/step/lsfStep.py index c7e56d2ec4..372e21c81b 100644 --- a/smartsim/_core/launcher/step/lsfStep.py +++ b/smartsim/_core/launcher/step/lsfStep.py @@ -31,8 +31,7 @@ from ....entity import Application, FSNode from ....error import AllocationError from ....log import get_logger -from ....settings import BsubBatchSettings, JsrunSettings -from ....settings.base import RunSettings +from ....settings import BsubBatchSettings, JsrunSettings, RunSettings from .step import Step logger = get_logger(__name__) diff --git a/smartsim/_core/launcher/step/mpiStep.py b/smartsim/_core/launcher/step/mpiStep.py index 931f901b45..8806649c84 100644 --- a/smartsim/_core/launcher/step/mpiStep.py +++ b/smartsim/_core/launcher/step/mpiStep.py @@ -32,8 +32,7 @@ from ....entity import Application, FSNode from ....error import AllocationError, SmartSimError from ....log import get_logger -from ....settings import MpiexecSettings, MpirunSettings, OrterunSettings -from ....settings.base import RunSettings +from ....settings import MpiexecSettings, MpirunSettings, OrterunSettings, RunSettings from .step import Step, proxyable_launch_cmd logger = get_logger(__name__) diff --git a/smartsim/_core/launcher/step/step.py b/smartsim/_core/launcher/step/step.py index c2aa444c08..dafe94336d 100644 --- a/smartsim/_core/launcher/step/step.py +++ b/smartsim/_core/launcher/step/step.py @@ -39,7 +39,7 @@ from ....entity import Application, Ensemble, FSNode from ....log import get_logger -from ....settings.base import RunSettings, SettingsBase +from ....settings import RunSettings, SettingsBase from ...utils.helpers import encode_cmd, get_base_36_repr from ..colocated import write_colocated_launch_script diff --git a/smartsim/_core/utils/helpers.py b/smartsim/_core/utils/helpers.py index a56517dbf0..70f52bc4e1 100644 --- a/smartsim/_core/utils/helpers.py +++ b/smartsim/_core/utils/helpers.py @@ -87,7 +87,7 @@ def check_dev_log_level() -> bool: return lvl == "developer" -def fmt_dict(value: t.Dict[str, t.Any]) -> str: +def fmt_dict(value: t.Mapping[str, t.Any]) -> str: fmt_str = "" for k, v in value.items(): fmt_str += "\t" + str(k) + " = " + str(v) diff --git a/smartsim/database/orchestrator.py b/smartsim/database/orchestrator.py index 75b4bca959..446adfab08 100644 --- a/smartsim/database/orchestrator.py +++ b/smartsim/database/orchestrator.py @@ -52,6 +52,7 @@ from ..servertype import CLUSTERED, STANDALONE from ..settings import ( AprunSettings, + BatchSettings, BsubBatchSettings, JsrunSettings, MpiexecSettings, @@ -59,11 +60,12 @@ OrterunSettings, PalsMpiexecSettings, QsubBatchSettings, + RunSettings, SbatchSettings, SrunSettings, + create_batch_settings, + create_run_settings, ) -from ..settings.base import BatchSettings, RunSettings -from ..settings.settings import create_batch_settings, create_run_settings from ..wlm import detect_launcher logger = get_logger(__name__) diff --git a/smartsim/entity/dbnode.py b/smartsim/entity/dbnode.py index 91bffdb79e..16fd9863f8 100644 --- a/smartsim/entity/dbnode.py +++ b/smartsim/entity/dbnode.py @@ -37,7 +37,7 @@ from .._core.utils.helpers import expand_exe_path from ..error import SSDBFilesNotParseable from ..log import get_logger -from ..settings.base import RunSettings +from ..settings import RunSettings from .entity import SmartSimEntity logger = get_logger(__name__) diff --git a/smartsim/entity/ensemble.py b/smartsim/entity/ensemble.py index c6b6fad3a3..e5ea26453d 100644 --- a/smartsim/entity/ensemble.py +++ b/smartsim/entity/ensemble.py @@ -40,7 +40,7 @@ UserStrategyError, ) from ..log import get_logger -from ..settings.base import BatchSettings, RunSettings +from ..settings import BatchSettings, RunSettings from .dbobject import FSModel, FSScript from .entity import SmartSimEntity from .entityList import EntityList diff --git a/smartsim/entity/entity.py b/smartsim/entity/entity.py index b68ea017fd..2f4b651f99 100644 --- a/smartsim/entity/entity.py +++ b/smartsim/entity/entity.py @@ -26,10 +26,6 @@ import typing as t -if t.TYPE_CHECKING: - # pylint: disable-next=unused-import - import smartsim.settings.base - class TelemetryConfiguration: """A base class for configuraing telemetry production behavior on diff --git a/smartsim/entity/model.py b/smartsim/entity/model.py index 76af42152c..95673d2548 100644 --- a/smartsim/entity/model.py +++ b/smartsim/entity/model.py @@ -39,7 +39,7 @@ from .._core.utils.helpers import cat_arg_and_value, expand_exe_path from ..error import EntityExistsError, SSUnsupportedError from ..log import get_logger -from ..settings.base import BatchSettings, RunSettings +from ..settings import BatchSettings, RunSettings from .dbobject import FSModel, FSScript from .entity import SmartSimEntity from .files import EntityFiles @@ -75,7 +75,8 @@ def __init__( application as a batch job """ super().__init__(name, str(path), run_settings) - self.exe = [exe] if run_settings.container else [expand_exe_path(exe)] + self.exe = [expand_exe_path(exe)] + # self.exe = [exe] if run_settings.container else [expand_exe_path(exe)] self.exe_args = exe_args or [] self.params = params self.params_as_args = params_as_args diff --git a/smartsim/experiment.py b/smartsim/experiment.py index 0caad3bbf4..087f6664d6 100644 --- a/smartsim/experiment.py +++ b/smartsim/experiment.py @@ -48,7 +48,7 @@ ) from .error import SmartSimError from .log import ctx_exp_path, get_logger, method_contextualizer -from .settings import Container, base, settings +from .settings import BatchSettings, Container, RunSettings from .wlm import detect_launcher logger = get_logger(__name__) diff --git a/smartsim/launchable/job.py b/smartsim/launchable/job.py index 8bc7536ec1..d9038db047 100644 --- a/smartsim/launchable/job.py +++ b/smartsim/launchable/job.py @@ -24,12 +24,12 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from abc import abstractmethod from copy import deepcopy +from smartsim._core.commands.launchCommands import LaunchCommands from smartsim.entity.entity import SmartSimEntity from smartsim.launchable.basejob import BaseJob -from smartsim.settings import RunSettings +from smartsim.settings import LaunchSettings class Job(BaseJob): @@ -44,8 +44,8 @@ class Job(BaseJob): def __init__( self, entity: SmartSimEntity, - launch_settings: RunSettings, # TODO: rename to LaunchSettings - ) -> None: + launch_settings: LaunchSettings, + ): super().__init__() self._entity = deepcopy(entity) self._launch_settings = deepcopy(launch_settings) @@ -60,14 +60,14 @@ def entity(self, value): self._entity = deepcopy(value) @property - def launch_settings(self) -> RunSettings: + def launch_settings(self) -> LaunchSettings: return deepcopy(self._launch_settings) @launch_settings.setter def launch_settings(self, value): self._launch_settings = deepcopy(value) - def get_launch_steps(self) -> None: # -> LaunchCommands: + def get_launch_steps(self) -> LaunchCommands: """Return the launch steps corresponding to the internal data. """ diff --git a/smartsim/launchable/mpmdjob.py b/smartsim/launchable/mpmdjob.py index e9b238f5b6..49bf8a1b36 100644 --- a/smartsim/launchable/mpmdjob.py +++ b/smartsim/launchable/mpmdjob.py @@ -31,7 +31,7 @@ from smartsim.error.errors import SSUnsupportedError from smartsim.launchable.basejob import BaseJob from smartsim.launchable.mpmdpair import MPMDPair -from smartsim.settings.base import RunSettings +from smartsim.settings.launchSettings import LaunchSettings def _check_launcher(mpmd_pairs: t.List[MPMDPair]) -> None: @@ -40,11 +40,11 @@ def _check_launcher(mpmd_pairs: t.List[MPMDPair]) -> None: ret = None for mpmd_pair in mpmd_pairs: if flag == 1: - if ret == mpmd_pair.launch_settings.run_command: + if ret == mpmd_pair.launch_settings.launcher: flag = 0 else: raise SSUnsupportedError("MPMD pairs must all share the same launcher.") - ret = mpmd_pair.launch_settings.run_command + ret = mpmd_pair.launch_settings.launcher flag = 1 @@ -86,7 +86,7 @@ def mpmd_pair(self, value): self._mpmd_pair = deepcopy(value) def add_mpmd_pair( - self, entity: SmartSimEntity, launch_settings: RunSettings + self, entity: SmartSimEntity, launch_settings: LaunchSettings ) -> None: """ Add a mpmd pair to the mpmd job diff --git a/smartsim/launchable/mpmdpair.py b/smartsim/launchable/mpmdpair.py index 37b155cb11..2b6ce36392 100644 --- a/smartsim/launchable/mpmdpair.py +++ b/smartsim/launchable/mpmdpair.py @@ -27,14 +27,12 @@ import copy from smartsim.entity.entity import SmartSimEntity -from smartsim.settings.base import RunSettings +from smartsim.settings.launchSettings import LaunchSettings class MPMDPair: """Class to store MPMD Pairs""" - def __init__( - self, entity: SmartSimEntity, launch_settings: RunSettings - ): # TODO: rename to LaunchSettings + def __init__(self, entity: SmartSimEntity, launch_settings: LaunchSettings): self.entity = copy.deepcopy(entity) self.launch_settings = copy.deepcopy(launch_settings) diff --git a/smartsim/settings/__init__.py b/smartsim/settings/__init__.py index 6e8f0bc969..b426a266e2 100644 --- a/smartsim/settings/__init__.py +++ b/smartsim/settings/__init__.py @@ -24,30 +24,73 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from .alpsSettings import AprunSettings -from .base import RunSettings, SettingsBase -from .containers import Container, Singularity -from .dragonRunSettings import DragonRunSettings -from .lsfSettings import BsubBatchSettings, JsrunSettings -from .mpiSettings import MpiexecSettings, MpirunSettings, OrterunSettings -from .palsSettings import PalsMpiexecSettings -from .pbsSettings import QsubBatchSettings -from .slurmSettings import SbatchSettings, SrunSettings - -__all__ = [ - "AprunSettings", - "BsubBatchSettings", - "JsrunSettings", - "MpirunSettings", - "MpiexecSettings", - "OrterunSettings", - "QsubBatchSettings", - "RunSettings", - "SettingsBase", - "SbatchSettings", - "SrunSettings", - "PalsMpiexecSettings", - "DragonRunSettings", - "Container", - "Singularity", -] +from .baseSettings import BaseSettings +from .batchSettings import BatchSettings +from .launchSettings import LaunchSettings + +__all__ = ["LaunchSettings", "BaseSettings", "BatchSettings"] + + +# TODO Mock imports for compiling tests +class DragonRunSettings: + pass + + +class QsubBatchSettings: + pass + + +class SbatchSettings: + pass + + +class Singularity: + pass + + +class SettingsBase: + pass + + +class AprunSettings: + pass + + +class RunSettings: + pass + + +class OrterunSettings: + pass + + +class MpirunSettings: + pass + + +class MpiexecSettings: + pass + + +class JsrunSettings: + pass + + +class BsubBatchSettings: + pass + + +class PalsMpiexecSettings: + pass + + +class SrunSettings: + pass + + +class Container: + pass + + +def create_batch_settings() -> None: ... +def create_run_settings() -> None: ... diff --git a/smartsim/settings/base.py b/smartsim/settings/base.py deleted file mode 100644 index fa1bed0362..0000000000 --- a/smartsim/settings/base.py +++ /dev/null @@ -1,641 +0,0 @@ -# BSD 2-Clause License # -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from __future__ import annotations - -import copy -import typing as t - -from smartsim.settings.containers import Container - -from .._core.utils.helpers import expand_exe_path, fmt_dict, is_valid_cmd -from ..entity.dbobject import FSModel, FSScript -from ..log import get_logger - -logger = get_logger(__name__) - -# fmt: off -class SettingsBase: - ... -# fmt: on - - -# pylint: disable=too-many-public-methods -class RunSettings(SettingsBase): - # pylint: disable=unused-argument - - def __init__( - self, - run_command: str = "", - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - container: t.Optional[Container] = None, - **_kwargs: t.Any, - ) -> None: - """Run parameters for a `Aapplication`` - - The base ``RunSettings`` class should only be used with the `local` - launcher on single node, workstations, or laptops. - - If no ``run_command`` is specified, the executable will be launched - locally. - - ``run_args`` passed as a dict will be interpreted literally for - local ``RunSettings`` and added directly to the ``run_command`` - e.g. run_args = {"-np": 2} will be "-np 2" - - Example initialization - - .. highlight:: python - .. code-block:: python - - rs = RunSettings("echo", "hello", "mpirun", run_args={"-np": "2"}) - - :param run_command: launch binary (e.g. "srun") - :param run_args: arguments for run command (e.g. `-np` for `mpiexec`) - :param env_vars: environment vars to launch job with - :param container: container type for workload (e.g. "singularity") - """ - # Do not expand executable if running within a container - self.run_args = run_args or {} - self.env_vars = env_vars or {} - self.container = container - self._run_command = run_command - self.in_batch = False - self.colocated_fs_settings: t.Optional[ - t.Dict[ - str, - t.Union[ - bool, - int, - str, - None, - t.List[str], - t.Iterable[t.Union[int, t.Iterable[int]]], - t.List[FSModel], - t.List[FSScript], - t.Dict[str, t.Union[int, None]], - t.Dict[str, str], - ], - ] - ] = None - - @property - def run_args(self) -> t.Dict[str, t.Union[int, str, float, None]]: - """Return an immutable list of attached run arguments. - - :returns: attached run arguments - """ - return self._run_args - - @run_args.setter - def run_args(self, value: t.Dict[str, t.Union[int, str, float, None]]) -> None: - """Set the run arguments. - - :param value: run arguments - """ - self._run_args = copy.deepcopy(value) - - @property - def env_vars(self) -> t.Dict[str, t.Optional[str]]: - """Return an immutable list of attached environment variables. - - :returns: attached environment variables - """ - return self._env_vars - - @env_vars.setter - def env_vars(self, value: t.Dict[str, t.Optional[str]]) -> None: - """Set the environment variables. - - :param value: environment variables - """ - self._env_vars = copy.deepcopy(value) - - # To be overwritten by subclasses. Set of reserved args a user cannot change - reserved_run_args = set() # type: set[str] - - def set_nodes(self, nodes: int) -> None: - """Set the number of nodes - - :param nodes: number of nodes to run with - """ - logger.warning( - ( - "Node specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_tasks(self, tasks: int) -> None: - """Set the number of tasks to launch - - :param tasks: number of tasks to launch - """ - logger.warning( - ( - "Task specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_tasks_per_node(self, tasks_per_node: int) -> None: - """Set the number of tasks per node - - :param tasks_per_node: number of tasks to launch per node - """ - logger.warning( - ( - "Task per node specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_task_map(self, task_mapping: str) -> None: - """Set a task mapping - - :param task_mapping: task mapping - """ - logger.warning( - ( - "Task mapping specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_cpus_per_task(self, cpus_per_task: int) -> None: - """Set the number of cpus per task - - :param cpus_per_task: number of cpus per task - """ - logger.warning( - ( - "CPU per node specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: - """Specify the hostlist for this job - - :param host_list: hosts to launch on - """ - logger.warning( - ( - "Hostlist specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_hostlist_from_file(self, file_path: str) -> None: - """Use the contents of a file to specify the hostlist for this job - - :param file_path: Path to the hostlist file - """ - logger.warning( - ( - "Hostlist from file specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_excluded_hosts(self, host_list: t.Union[str, t.List[str]]) -> None: - """Specify a list of hosts to exclude for launching this job - - :param host_list: hosts to exclude - """ - logger.warning( - ( - "Excluded host list specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_cpu_bindings(self, bindings: t.Union[int, t.List[int]]) -> None: - """Set the cores to which MPI processes are bound - - :param bindings: List specifing the cores to which MPI processes are bound - """ - logger.warning( - ( - "CPU binding specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_memory_per_node(self, memory_per_node: int) -> None: - """Set the amount of memory required per node in megabytes - - :param memory_per_node: Number of megabytes per node - """ - logger.warning( - ( - "Memory per node specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_verbose_launch(self, verbose: bool) -> None: - """Set the job to run in verbose mode - - :param verbose: Whether the job should be run verbosely - """ - logger.warning( - ( - "Verbose specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_quiet_launch(self, quiet: bool) -> None: - """Set the job to run in quiet mode - - :param quiet: Whether the job should be run quietly - """ - logger.warning( - ( - "Quiet specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_broadcast(self, dest_path: t.Optional[str] = None) -> None: - """Copy executable file to allocated compute nodes - - :param dest_path: Path to copy an executable file - """ - logger.warning( - ( - "Broadcast specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_time(self, hours: int = 0, minutes: int = 0, seconds: int = 0) -> None: - """Automatically format and set wall time - - :param hours: number of hours to run job - :param minutes: number of minutes to run job - :param seconds: number of seconds to run job - """ - return self.set_walltime( - self._fmt_walltime(int(hours), int(minutes), int(seconds)) - ) - - def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None: - """Specify the node feature for this job - - :param feature_list: node feature to launch on - """ - logger.warning( - ( - "Feature specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - @staticmethod - def _fmt_walltime(hours: int, minutes: int, seconds: int) -> str: - """Convert hours, minutes, and seconds into valid walltime format - - By defualt the formatted wall time is the total number of seconds. - - :param hours: number of hours to run job - :param minutes: number of minutes to run job - :param seconds: number of seconds to run job - :returns: Formatted walltime - """ - time_ = hours * 3600 - time_ += minutes * 60 - time_ += seconds - return str(time_) - - def set_walltime(self, walltime: str) -> None: - """Set the formatted walltime - - :param walltime: Time in format required by launcher`` - """ - logger.warning( - ( - "Walltime specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_binding(self, binding: str) -> None: - """Set binding - - :param binding: Binding - """ - logger.warning( - ( - "binding specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def set_mpmd_preamble(self, preamble_lines: t.List[str]) -> None: - """Set preamble to a file to make a job MPMD - - :param preamble_lines: lines to put at the beginning of a file. - """ - logger.warning( - ( - "MPMD preamble specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - def make_mpmd(self, settings: RunSettings) -> None: - """Make job an MPMD job - - :param settings: ``RunSettings`` instance - """ - logger.warning( - ( - "Make MPMD specification not implemented for this " - f"RunSettings type: {type(self)}" - ) - ) - - @property - def run_command(self) -> t.Optional[str]: - """Return the launch binary used to launch the executable - - Attempt to expand the path to the executable if possible - - :returns: launch binary e.g. mpiexec - """ - cmd = self._run_command - - if cmd: - if is_valid_cmd(cmd): - # command is valid and will be expanded - return expand_exe_path(cmd) - # command is not valid, so return it as is - # it may be on the compute nodes but not local machine - return cmd - # run without run command - return None - - def update_env(self, env_vars: t.Dict[str, t.Union[str, int, float, bool]]) -> None: - """Update the job environment variables - - To fully inherit the current user environment, add the - workload-manager-specific flag to the launch command. For example, - ``--export=ALL`` for slurm, or ``-V`` for PBS/aprun. - - :param env_vars: environment variables to update or add - :raises TypeError: if env_vars values cannot be coerced to strings - """ - val_types = (str, int, float, bool) - # Coerce env_vars values to str as a convenience to user - for env, val in env_vars.items(): - if not isinstance(val, val_types): - raise TypeError( - f"env_vars[{env}] was of type {type(val)}, not {val_types}" - ) - - self.env_vars[env] = str(val) - - def set( - self, - arg: t.Union[str, int], - value: t.Optional[str] = None, - condition: bool = True, - ) -> None: - """Allows users to set individual run arguments. - - A method that allows users to set run arguments after object - instantiation. Does basic formatting such as stripping leading dashes. - If the argument has been set previously, this method will log warning - but ultimately comply. - - Conditional expressions may be passed to the conditional parameter. If the - expression evaluates to True, the argument will be set. In not an info - message is logged and no further operation is performed. - - Basic Usage - - .. highlight:: python - .. code-block:: python - - rs = RunSettings("python") - rs.set("an-arg", "a-val") - rs.set("a-flag") - rs.format_run_args() # returns ["an-arg", "a-val", "a-flag", "None"] - - Slurm Example with Conditional Setting - - .. highlight:: python - .. code-block:: python - - import socket - - rs = SrunSettings("echo", "hello") - rs.set_tasks(1) - rs.set("exclusive") - - # Only set this argument if condition param evals True - # Otherwise log and NOP - rs.set("partition", "debug", - condition=socket.gethostname()=="testing-system") - - rs.format_run_args() - # returns ["exclusive", "None", "partition", "debug"] iff - socket.gethostname()=="testing-system" - # otherwise returns ["exclusive", "None"] - - :param arg: name of the argument - :param value: value of the argument - :param conditon: set the argument if condition evaluates to True - """ - if not isinstance(arg, str): - raise TypeError("Argument name should be of type str") - if value is not None and not isinstance(value, (str, int)): - raise TypeError("Argument value should be of type str, int, or None") - - res_arg = arg - arg = arg.strip().lstrip("-") - - if arg != res_arg: - logger.warning( - "One or more leading `-` characters were provided to the run argument. \ -Leading dashes were stripped and the arguments were passed to the run_command." - ) - - if not condition: - logger.info(f"Could not set argument '{arg}': condition not met") - return - if arg in self.reserved_run_args: - logger.warning( - ( - f"Could not set argument '{arg}': " - f"it is a reserved arguement of '{type(self).__name__}'" - ) - ) - return - - if arg in self.run_args and value != self.run_args[arg]: - logger.warning(f"Overwritting argument '{arg}' with value '{value}'") - - self.run_args[arg] = value - - def format_run_args(self) -> t.List[str]: - """Return formatted run arguments - - For ``RunSettings``, the run arguments are passed - literally with no formatting. - - :return: list run arguments for these settings - """ - formatted = [] - for arg, value in self.run_args.items(): - formatted.append(arg) - formatted.append(str(value)) - return formatted - - def format_env_vars(self) -> t.List[str]: - """Build environment variable string - - :returns: formatted list of strings to export variables - """ - formatted = [] - for key, val in self.env_vars.items(): - if val is None: - formatted.append(f"{key}=") - else: - formatted.append(f"{key}={val}") - return formatted - - def __str__(self) -> str: # pragma: no-cover - string = "" - if self.run_command: - string += f"\nRun Command: {self.run_command}" - if self.run_args: - string += f"\nRun Arguments:\n{fmt_dict(self.run_args)}" - if self.colocated_fs_settings: - string += "\nCo-located Feature Store: True" - return string - - -class BatchSettings(SettingsBase): - def __init__( - self, - batch_cmd: str, - batch_args: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **kwargs: t.Any, - ) -> None: - self._batch_cmd = batch_cmd - self.batch_args = batch_args or {} - self._preamble: t.List[str] = [] - self.set_nodes(kwargs.get("nodes", None)) - self.set_walltime(kwargs.get("time", None)) - self.set_queue(kwargs.get("queue", None)) - self.set_account(kwargs.get("account", None)) - - @property - def batch_cmd(self) -> str: - """Return the batch command - - Tests to see if we can expand the batch command - path. If we can, then returns the expanded batch - command. If we cannot, returns the batch command as is. - - :returns: batch command - """ - if is_valid_cmd(self._batch_cmd): - return expand_exe_path(self._batch_cmd) - - return self._batch_cmd - - @property - def batch_args(self) -> t.Dict[str, t.Optional[str]]: - """Retrieve attached batch arguments - - :returns: attached batch arguments - """ - return self._batch_args - - @batch_args.setter - def batch_args(self, value: t.Dict[str, t.Optional[str]]) -> None: - """Attach batch arguments - - :param value: dictionary of batch arguments - """ - self._batch_args = copy.deepcopy(value) if value else {} - - def set_nodes(self, num_nodes: int) -> None: - raise NotImplementedError - - def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: - raise NotImplementedError - - def set_queue(self, queue: str) -> None: - raise NotImplementedError - - def set_walltime(self, walltime: str) -> None: - raise NotImplementedError - - def set_account(self, account: str) -> None: - raise NotImplementedError - - def format_batch_args(self) -> t.List[str]: - raise NotImplementedError - - def set_batch_command(self, command: str) -> None: - """Set the command used to launch the batch e.g. ``sbatch`` - - :param command: batch command - """ - self._batch_cmd = command - - def add_preamble(self, lines: t.List[str]) -> None: - """Add lines to the batch file preamble. The lines are just - written (unmodified) at the beginning of the batch file - (after the WLM directives) and can be used to e.g. - start virtual environments before running the executables. - - :param line: lines to add to preamble. - """ - if isinstance(lines, str): - self._preamble += [lines] - elif isinstance(lines, list): - self._preamble += lines - else: - raise TypeError("Expected str or List[str] for lines argument") - - @property - def preamble(self) -> t.Iterable[str]: - """Return an iterable of preamble clauses to be prepended to the batch file - - :return: attached preamble clauses - """ - return (clause for clause in self._preamble) - - def __str__(self) -> str: # pragma: no-cover - string = f"Batch Command: {self._batch_cmd}" - if self.batch_args: - string += f"\nBatch arguments:\n{fmt_dict(self.batch_args)}" - return string diff --git a/smartsim/settings/baseSettings.py b/smartsim/settings/baseSettings.py new file mode 100644 index 0000000000..1acd5f6057 --- /dev/null +++ b/smartsim/settings/baseSettings.py @@ -0,0 +1,29 @@ +# BSD 2-Clause License # +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# fmt: off +class BaseSettings: + ... +# fmt: on diff --git a/smartsim/settings/batchCommand.py b/smartsim/settings/batchCommand.py new file mode 100644 index 0000000000..8f3b0c89d5 --- /dev/null +++ b/smartsim/settings/batchCommand.py @@ -0,0 +1,37 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from enum import Enum + + +class SchedulerType(Enum): + """Schedulers that are supported by + SmartSim. + """ + + Slurm = "slurm" + Pbs = "pbs" + Lsf = "lsf" diff --git a/smartsim/settings/batchSettings.py b/smartsim/settings/batchSettings.py new file mode 100644 index 0000000000..efa7c4b477 --- /dev/null +++ b/smartsim/settings/batchSettings.py @@ -0,0 +1,111 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import copy +import typing as t + +from smartsim.log import get_logger + +from .._core.utils.helpers import fmt_dict +from .baseSettings import BaseSettings +from .batchCommand import SchedulerType +from .builders import BatchArgBuilder +from .builders.batch.lsf import BsubBatchArgBuilder +from .builders.batch.pbs import QsubBatchArgBuilder +from .builders.batch.slurm import SlurmBatchArgBuilder +from .common import StringArgument + +logger = get_logger(__name__) + + +class BatchSettings(BaseSettings): + def __init__( + self, + batch_scheduler: t.Union[SchedulerType, str], + scheduler_args: t.Dict[str, t.Union[str, None]] | None = None, + env_vars: StringArgument | None = None, + ) -> None: + try: + self._batch_scheduler = SchedulerType(batch_scheduler) + except ValueError: + raise ValueError(f"Invalid scheduler type: {batch_scheduler}") from None + self._arg_builder = self._get_arg_builder(scheduler_args) + self.env_vars = env_vars or {} + + @property + def scheduler(self) -> str: + """Return the launcher name.""" + return self._batch_scheduler.value + + @property + def batch_scheduler(self) -> str: + """Return the scheduler name.""" + return self._batch_scheduler.value + + @property + def scheduler_args(self) -> BatchArgBuilder: + """Return the batch argument translator.""" + return self._arg_builder + + @property + def env_vars(self) -> StringArgument: + """Return an immutable list of attached environment variables.""" + return copy.deepcopy(self._env_vars) + + @env_vars.setter + def env_vars(self, value: t.Dict[str, str | None]) -> None: + """Set the environment variables.""" + self._env_vars = copy.deepcopy(value) + + def _get_arg_builder( + self, scheduler_args: StringArgument | None + ) -> BatchArgBuilder: + """Map the Scheduler to the BatchArgBuilder""" + if self._batch_scheduler == SchedulerType.Slurm: + return SlurmBatchArgBuilder(scheduler_args) + elif self._batch_scheduler == SchedulerType.Lsf: + return BsubBatchArgBuilder(scheduler_args) + elif self._batch_scheduler == SchedulerType.Pbs: + return QsubBatchArgBuilder(scheduler_args) + else: + raise ValueError(f"Invalid scheduler type: {self._batch_scheduler}") + + def format_batch_args(self) -> t.List[str]: + """Get the formatted batch arguments for a preview + + :return: batch arguments for Sbatch + """ + return self._arg_builder.format_batch_args() + + def __str__(self) -> str: # pragma: no-cover + string = f"\nScheduler: {self.scheduler}" + if self.scheduler_args: + string += str(self.scheduler_args) + if self.env_vars: + string += f"\nEnvironment variables: \n{fmt_dict(self.env_vars)}" + return string diff --git a/smartsim/settings/builders/__init__.py b/smartsim/settings/builders/__init__.py new file mode 100644 index 0000000000..9cfdd5f9c5 --- /dev/null +++ b/smartsim/settings/builders/__init__.py @@ -0,0 +1,30 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .batchArgBuilder import BatchArgBuilder +from .launchArgBuilder import LaunchArgBuilder + +__all__ = ["LaunchArgBuilder", "BatchArgBuilder"] diff --git a/smartsim/settings/builders/batch/__init.__.py b/smartsim/settings/builders/batch/__init.__.py new file mode 100644 index 0000000000..41dcbbfc2c --- /dev/null +++ b/smartsim/settings/builders/batch/__init.__.py @@ -0,0 +1,35 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .lsf import BsubBatchArgBuilder +from .pbs import QsubBatchArgBuilder +from .slurm import SlurmBatchArgBuilder + +__all__ = [ + "BsubBatchArgBuilder", + "QsubBatchArgBuilder", + "SlurmBatchArgBuilder", +] diff --git a/smartsim/settings/builders/batch/lsf.py b/smartsim/settings/builders/batch/lsf.py new file mode 100644 index 0000000000..4bb7bbd27a --- /dev/null +++ b/smartsim/settings/builders/batch/lsf.py @@ -0,0 +1,150 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import typing as t + +from smartsim.log import get_logger + +from ...batchCommand import SchedulerType +from ...common import StringArgument +from ..batchArgBuilder import BatchArgBuilder + +logger = get_logger(__name__) + + +class BsubBatchArgBuilder(BatchArgBuilder): + def scheduler_str(self) -> str: + """Get the string representation of the scheduler""" + return SchedulerType.Lsf.value + + def set_walltime(self, walltime: str) -> None: + """Set the walltime + + This sets ``-W``. + + :param walltime: Time in hh:mm format, e.g. "10:00" for 10 hours, + if time is supplied in hh:mm:ss format, seconds + will be ignored and walltime will be set as ``hh:mm`` + """ + # For compatibility with other launchers, as explained in docstring + if walltime: + if len(walltime.split(":")) > 2: + walltime = ":".join(walltime.split(":")[:2]) + self.set("W", walltime) + + def set_smts(self, smts: int) -> None: + """Set SMTs + + This sets ``-alloc_flags``. If the user sets + SMT explicitly through ``-alloc_flags``, then that + takes precedence. + + :param smts: SMT (e.g on Summit: 1, 2, or 4) + """ + self.set("alloc_flags", str(smts)) + + def set_project(self, project: str) -> None: + """Set the project + + This sets ``-P``. + + :param time: project name + """ + self.set("P", project) + + def set_account(self, account: str) -> None: + """Set the project + + this function is an alias for `set_project`. + + :param account: project name + """ + return self.set_project(account) + + def set_nodes(self, num_nodes: int) -> None: + """Set the number of nodes for this batch job + + This sets ``-nnodes``. + + :param nodes: number of nodes + """ + self.set("nnodes", str(num_nodes)) + + def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: + """Specify the hostlist for this job + + :param host_list: hosts to launch on + :raises TypeError: if not str or list of str + """ + if isinstance(host_list, str): + host_list = [host_list.strip()] + if not isinstance(host_list, list): + raise TypeError("host_list argument must be a list of strings") + if not all(isinstance(host, str) for host in host_list): + raise TypeError("host_list argument must be list of strings") + self.set("m", '"' + " ".join(host_list) + '"') + + def set_tasks(self, tasks: int) -> None: + """Set the number of tasks for this job + + This sets ``-n`` + + :param tasks: number of tasks + """ + self.set("n", str(tasks)) + + def set_queue(self, queue: str) -> None: + """Set the queue for this job + + This sets ``-q`` + + :param queue: The queue to submit the job on + """ + self.set("q", queue) + + def format_batch_args(self) -> t.List[str]: + """Get the formatted batch arguments for a preview + + :return: list of batch arguments for Qsub + """ + opts = [] + + for opt, value in self._scheduler_args.items(): + + prefix = "-" # LSF only uses single dashses + + if value is None: + opts += [prefix + opt] + else: + opts += [f"{prefix}{opt}", str(value)] + + return opts + + def set(self, key: str, value: str | None) -> None: + # Store custom arguments in the launcher_args + self._scheduler_args[key] = value diff --git a/smartsim/settings/builders/batch/pbs.py b/smartsim/settings/builders/batch/pbs.py new file mode 100644 index 0000000000..d04b4beba1 --- /dev/null +++ b/smartsim/settings/builders/batch/pbs.py @@ -0,0 +1,174 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import typing as t +from copy import deepcopy + +from smartsim.log import get_logger + +from ....error import SSConfigError +from ...batchCommand import SchedulerType +from ...common import StringArgument +from ..batchArgBuilder import BatchArgBuilder + +logger = get_logger(__name__) + + +class QsubBatchArgBuilder(BatchArgBuilder): + def scheduler_str(self) -> str: + """Get the string representation of the scheduler""" + return SchedulerType.Pbs.value + + def set_nodes(self, num_nodes: int) -> None: + """Set the number of nodes for this batch job + + In PBS, 'select' is the more primitive way of describing how + many nodes to allocate for the job. 'nodes' is equivalent to + 'select' with a 'place' statement. Assuming that only advanced + users would use 'set_resource' instead, defining the number of + nodes here is sets the 'nodes' resource. + + :param num_nodes: number of nodes + """ + + self.set("nodes", str(num_nodes)) + + def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: + """Specify the hostlist for this job + + :param host_list: hosts to launch on + :raises TypeError: if not str or list of str + """ + if isinstance(host_list, str): + host_list = [host_list.strip()] + if not isinstance(host_list, list): + raise TypeError("host_list argument must be a list of strings") + if not all(isinstance(host, str) for host in host_list): + raise TypeError("host_list argument must be a list of strings") + self.set("hostname", ",".join(host_list)) + + def set_walltime(self, walltime: str) -> None: + """Set the walltime of the job + + format = "HH:MM:SS" + + If a walltime argument is provided in + ``QsubBatchSettings.resources``, then + this value will be overridden + + :param walltime: wall time + """ + self.set("walltime", walltime) + + def set_queue(self, queue: str) -> None: + """Set the queue for the batch job + + :param queue: queue name + """ + self.set("q", str(queue)) + + def set_ncpus(self, num_cpus: int) -> None: + """Set the number of cpus obtained in each node. + + If a select argument is provided in + ``QsubBatchSettings.resources``, then + this value will be overridden + + :param num_cpus: number of cpus per node in select + """ + self.set("ppn", str(num_cpus)) + + def set_account(self, account: str) -> None: + """Set the account for this batch job + + :param acct: account id + """ + self.set("A", str(account)) + + def format_batch_args(self) -> t.List[str]: + """Get the formatted batch arguments for a preview + + :return: batch arguments for Qsub + :raises ValueError: if options are supplied without values + """ + opts, batch_arg_copy = self._create_resource_list(self._scheduler_args) + for opt, value in batch_arg_copy.items(): + prefix = "-" + if not value: + raise ValueError("PBS options without values are not allowed") + opts += [f"{prefix}{opt}", str(value)] + return opts + + @staticmethod + def _sanity_check_resources(batch_args: t.Dict[str, str | None]) -> None: + """Check that only select or nodes was specified in resources + + Note: For PBS Pro, nodes is equivalent to 'select' and 'place' so + they are not quite synonyms. Here we assume that + """ + + has_select = batch_args.get("select", None) + has_nodes = batch_args.get("nodes", None) + + if has_select and has_nodes: + raise SSConfigError( + "'select' and 'nodes' cannot both be specified. This can happen " + "if nodes were specified using the 'set_nodes' method and " + "'select' was set using 'set_resource'. Please only specify one." + ) + + def _create_resource_list( + self, batch_args: t.Dict[str, str | None] + ) -> t.Tuple[t.List[str], t.Dict[str, str | None]]: + self._sanity_check_resources(batch_args) + res = [] + + batch_arg_copy = deepcopy(batch_args) + # Construct the basic select/nodes statement + if select := batch_arg_copy.pop("select", None): + select_command = f"-l select={select}" + elif nodes := batch_arg_copy.pop("nodes", None): + select_command = f"-l nodes={nodes}" + else: + raise SSConfigError( + "Insufficient resource specification: no nodes or select statement" + ) + if ncpus := batch_arg_copy.pop("ppn", None): + select_command += f":ncpus={ncpus}" + if hosts := batch_arg_copy.pop("hostname", None): + hosts_list = ["=".join(("host", str(host))) for host in hosts.split(",")] + select_command += f":{'+'.join(hosts_list)}" + res += select_command.split() + if walltime := batch_arg_copy.pop("walltime", None): + res += ["-l", f"walltime={walltime}"] + + return res, batch_arg_copy + + def set(self, key: str, value: str | None) -> None: + # Store custom arguments in the launcher_args + self._scheduler_args[key] = value diff --git a/smartsim/settings/builders/batch/slurm.py b/smartsim/settings/builders/batch/slurm.py new file mode 100644 index 0000000000..5a03f5acd1 --- /dev/null +++ b/smartsim/settings/builders/batch/slurm.py @@ -0,0 +1,143 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import re +import typing as t + +from smartsim.log import get_logger + +from ...batchCommand import SchedulerType +from ...common import StringArgument +from ..batchArgBuilder import BatchArgBuilder + +logger = get_logger(__name__) + + +class SlurmBatchArgBuilder(BatchArgBuilder): + def scheduler_str(self) -> str: + """Get the string representation of the scheduler""" + return SchedulerType.Slurm.value + + def set_walltime(self, walltime: str) -> None: + """Set the walltime of the job + + format = "HH:MM:SS" + + :param walltime: wall time + """ + pattern = r"^\d{2}:\d{2}:\d{2}$" + if walltime and re.match(pattern, walltime): + self.set("time", str(walltime)) + else: + raise ValueError("Invalid walltime format. Please use 'HH:MM:SS' format.") + + def set_nodes(self, num_nodes: int) -> None: + """Set the number of nodes for this batch job + + This sets ``--nodes``. + + :param num_nodes: number of nodes + """ + self.set("nodes", str(num_nodes)) + + def set_account(self, account: str) -> None: + """Set the account for this batch job + + This sets ``--account``. + + :param account: account id + """ + self.set("account", account) + + def set_partition(self, partition: str) -> None: + """Set the partition for the batch job + + This sets ``--partition``. + + :param partition: partition name + """ + self.set("partition", str(partition)) + + def set_queue(self, queue: str) -> None: + """alias for set_partition + + Sets the partition for the slurm batch job + + :param queue: the partition to run the batch job on + """ + return self.set_partition(queue) + + def set_cpus_per_task(self, cpus_per_task: int) -> None: + """Set the number of cpus to use per task + + This sets ``--cpus-per-task`` + + :param num_cpus: number of cpus to use per task + """ + self.set("cpus-per-task", str(cpus_per_task)) + + def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: + """Specify the hostlist for this job + + This sets ``--nodelist``. + + :param host_list: hosts to launch on + :raises TypeError: if not str or list of str + """ + if isinstance(host_list, str): + host_list = [host_list.strip()] + if not isinstance(host_list, list): + raise TypeError("host_list argument must be a list of strings") + if not all(isinstance(host, str) for host in host_list): + raise TypeError("host_list argument must be list of strings") + self.set("nodelist", ",".join(host_list)) + + def format_batch_args(self) -> t.List[str]: + """Get the formatted batch arguments for a preview + + :return: batch arguments for Sbatch + """ + opts = [] + # TODO add restricted here + for opt, value in self._scheduler_args.items(): + # attach "-" prefix if argument is 1 character otherwise "--" + short_arg = len(opt) == 1 + prefix = "-" if short_arg else "--" + + if not value: + opts += [prefix + opt] + else: + if short_arg: + opts += [prefix + opt, str(value)] + else: + opts += ["=".join((prefix + opt, str(value)))] + return opts + + def set(self, key: str, value: str | None) -> None: + # Store custom arguments in the launcher_args + self._scheduler_args[key] = value diff --git a/smartsim/settings/builders/batchArgBuilder.py b/smartsim/settings/builders/batchArgBuilder.py new file mode 100644 index 0000000000..ad466f2541 --- /dev/null +++ b/smartsim/settings/builders/batchArgBuilder.py @@ -0,0 +1,108 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import copy +import typing as t +from abc import ABC, abstractmethod + +from smartsim.log import get_logger + +from ..._core.utils.helpers import fmt_dict + +logger = get_logger(__name__) + + +class BatchArgBuilder(ABC): + """Abstract base class that defines all generic scheduler + argument methods that are not supported. It is the + responsibility of child classes for each launcher to translate + the input parameter to a properly formatted launcher argument. + """ + + def __init__(self, scheduler_args: t.Dict[str, str | None] | None) -> None: + self._scheduler_args = copy.deepcopy(scheduler_args) or {} + + @abstractmethod + def scheduler_str(self) -> str: + """Get the string representation of the launcher""" + pass + + @abstractmethod + def set_account(self, account: str) -> None: + """Set the account for this batch job + + :param account: account id + """ + pass + + @abstractmethod + def set_queue(self, queue: str) -> None: + """alias for set_partition + + Sets the partition for the slurm batch job + + :param queue: the partition to run the batch job on + """ + pass + + @abstractmethod + def set_walltime(self, walltime: str) -> None: + """Set the walltime of the job + + :param walltime: wall time + """ + pass + + @abstractmethod + def set_nodes(self, num_nodes: int) -> None: + """Set the number of nodes for this batch job + + :param num_nodes: number of nodes + """ + pass + + @abstractmethod + def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: + """Specify the hostlist for this job + + :param host_list: hosts to launch on + :raises TypeError: if not str or list of str + """ + pass + + @abstractmethod + def format_batch_args(self) -> t.List[str]: + """Get the formatted batch arguments for a preview + + :return: batch arguments for Sbatch + """ + pass + + def __str__(self) -> str: # pragma: no-cover + string = f"\nScheduler Arguments:\n{fmt_dict(self._scheduler_args)}" + return string diff --git a/smartsim/settings/builders/launch/__init__.py b/smartsim/settings/builders/launch/__init__.py new file mode 100644 index 0000000000..d593c59f7c --- /dev/null +++ b/smartsim/settings/builders/launch/__init__.py @@ -0,0 +1,19 @@ +from .alps import AprunArgBuilder +from .dragon import DragonArgBuilder +from .local import LocalArgBuilder +from .lsf import JsrunArgBuilder +from .mpi import MpiArgBuilder, MpiexecArgBuilder, OrteArgBuilder +from .pals import PalsMpiexecArgBuilder +from .slurm import SlurmArgBuilder + +__all__ = [ + "AprunArgBuilder", + "DragonArgBuilder", + "LocalArgBuilder", + "JsrunArgBuilder", + "MpiArgBuilder", + "MpiexecArgBuilder", + "OrteArgBuilder", + "PalsMpiexecArgBuilder", + "SlurmArgBuilder", +] diff --git a/smartsim/settings/alpsSettings.py b/smartsim/settings/builders/launch/alps.py similarity index 65% rename from smartsim/settings/alpsSettings.py rename to smartsim/settings/builders/launch/alps.py index b3a086c7c9..a527cafac0 100644 --- a/smartsim/settings/alpsSettings.py +++ b/smartsim/settings/builders/launch/alps.py @@ -28,49 +28,23 @@ import typing as t -from ..error import SSUnsupportedError -from .base import RunSettings +from smartsim.log import get_logger +from ...common import StringArgument, set_check_input +from ...launchCommand import LauncherType +from ..launchArgBuilder import LaunchArgBuilder -class AprunSettings(RunSettings): - def __init__( - self, - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **kwargs: t.Any, - ): - """Settings to run job with ``aprun`` command +logger = get_logger(__name__) - ``AprunSettings`` can be used for the `pbs` launcher. - :param run_args: arguments for run command - :param env_vars: environment vars to launch job with - """ - super().__init__( - run_command="aprun", - run_args=run_args, - env_vars=env_vars, - **kwargs, - ) - self.mpmd: t.List[RunSettings] = [] - - def make_mpmd(self, settings: RunSettings) -> None: - """Make job an MPMD job - - This method combines two ``AprunSettings`` - into a single MPMD command joined with ':' +class AprunArgBuilder(LaunchArgBuilder): + def _reserved_launch_args(self) -> set[str]: + """Return reserved launch arguments.""" + return {"wdir"} - :param settings: ``AprunSettings`` instance - """ - if self.colocated_fs_settings: - raise SSUnsupportedError( - "Colocated applications cannot be run as a mpmd workload" - ) - if self.container: - raise SSUnsupportedError( - "Containerized MPMD workloads are not yet supported." - ) - self.mpmd.append(settings) + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Alps.value def set_cpus_per_task(self, cpus_per_task: int) -> None: """Set the number of cpus to use per task @@ -79,7 +53,7 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None: :param cpus_per_task: number of cpus to use per task """ - self.run_args["cpus-per-pe"] = int(cpus_per_task) + self.set("cpus-per-pe", str(cpus_per_task)) def set_tasks(self, tasks: int) -> None: """Set the number of tasks for this job @@ -88,7 +62,7 @@ def set_tasks(self, tasks: int) -> None: :param tasks: number of tasks """ - self.run_args["pes"] = int(tasks) + self.set("pes", str(tasks)) def set_tasks_per_node(self, tasks_per_node: int) -> None: """Set the number of tasks for this job @@ -97,11 +71,13 @@ def set_tasks_per_node(self, tasks_per_node: int) -> None: :param tasks_per_node: number of tasks per node """ - self.run_args["pes-per-node"] = int(tasks_per_node) + self.set("pes-per-node", str(tasks_per_node)) def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: """Specify the hostlist for this job + This sets ``--node-list`` + :param host_list: hosts to launch on :raises TypeError: if not str or list of str """ @@ -111,7 +87,7 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: raise TypeError("host_list argument must be a list of strings") if not all(isinstance(host, str) for host in host_list): raise TypeError("host_list argument must be list of strings") - self.run_args["node-list"] = ",".join(host_list) + self.set("node-list", ",".join(host_list)) def set_hostlist_from_file(self, file_path: str) -> None: """Use the contents of a file to set the node list @@ -120,11 +96,13 @@ def set_hostlist_from_file(self, file_path: str) -> None: :param file_path: Path to the hostlist file """ - self.run_args["node-list-file"] = file_path + self.set("node-list-file", file_path) def set_excluded_hosts(self, host_list: t.Union[str, t.List[str]]) -> None: """Specify a list of hosts to exclude for launching this job + This sets ``--exclude-node-list`` + :param host_list: hosts to exclude :raises TypeError: if not str or list of str """ @@ -134,7 +112,7 @@ def set_excluded_hosts(self, host_list: t.Union[str, t.List[str]]) -> None: raise TypeError("host_list argument must be a list of strings") if not all(isinstance(host, str) for host in host_list): raise TypeError("host_list argument must be list of strings") - self.run_args["exclude-node-list"] = ",".join(host_list) + self.set("exclude-node-list", ",".join(host_list)) def set_cpu_bindings(self, bindings: t.Union[int, t.List[int]]) -> None: """Specifies the cores to which MPI processes are bound @@ -145,7 +123,7 @@ def set_cpu_bindings(self, bindings: t.Union[int, t.List[int]]) -> None: """ if isinstance(bindings, int): bindings = [bindings] - self.run_args["cpu-binding"] = ",".join(str(int(num)) for num in bindings) + self.set("cpu-binding", ",".join(str(num) for num in bindings)) def set_memory_per_node(self, memory_per_node: int) -> None: """Specify the real memory required per node @@ -154,7 +132,16 @@ def set_memory_per_node(self, memory_per_node: int) -> None: :param memory_per_node: Per PE memory limit in megabytes """ - self.run_args["memory-per-pe"] = int(memory_per_node) + self.set("memory-per-pe", str(memory_per_node)) + + def set_walltime(self, walltime: str) -> None: + """Set the walltime of the job + + Walltime is given in total number of seconds + + :param walltime: wall time + """ + self.set("cpu-time-limit", str(walltime)) def set_verbose_launch(self, verbose: bool) -> None: """Set the job to run in verbose mode @@ -164,9 +151,9 @@ def set_verbose_launch(self, verbose: bool) -> None: :param verbose: Whether the job should be run verbosely """ if verbose: - self.run_args["debug"] = 7 + self.set("debug", "7") else: - self.run_args.pop("debug", None) + self._launch_args.pop("debug", None) def set_quiet_launch(self, quiet: bool) -> None: """Set the job to run in quiet mode @@ -176,48 +163,53 @@ def set_quiet_launch(self, quiet: bool) -> None: :param quiet: Whether the job should be run quietly """ if quiet: - self.run_args["quiet"] = None + self._launch_args["quiet"] = None else: - self.run_args.pop("quiet", None) - - def format_run_args(self) -> t.List[str]: - """Return a list of ALPS formatted run arguments - - :return: list of ALPS arguments for these settings - """ - # args launcher uses - args = [] - restricted = ["wdir"] - - for opt, value in self.run_args.items(): - if opt not in restricted: - short_arg = bool(len(str(opt)) == 1) - prefix = "-" if short_arg else "--" - if not value: - args += [prefix + opt] - else: - if short_arg: - args += [prefix + opt, str(value)] - else: - args += ["=".join((prefix + opt, str(value)))] - return args + self._launch_args.pop("quiet", None) - def format_env_vars(self) -> t.List[str]: + def format_env_vars( + self, env_vars: t.Optional[t.Dict[str, t.Optional[str]]] + ) -> t.Union[t.List[str], None]: """Format the environment variables for aprun :return: list of env vars """ formatted = [] - if self.env_vars: - for name, value in self.env_vars.items(): + if env_vars: + for name, value in env_vars.items(): formatted += ["-e", name + "=" + str(value)] return formatted - def set_walltime(self, walltime: str) -> None: - """Set the walltime of the job - - Walltime is given in total number of seconds + def format_launch_args(self) -> t.Union[t.List[str], None]: + """Return a list of ALPS formatted run arguments - :param walltime: wall time + :return: list of ALPS arguments for these settings """ - self.run_args["cpu-time-limit"] = str(walltime) + # args launcher uses + args = [] + for opt, value in self._launch_args.items(): + short_arg = len(opt) == 1 + prefix = "-" if short_arg else "--" + if not value: + args += [prefix + opt] + else: + if short_arg: + args += [prefix + opt, str(value)] + else: + args += ["=".join((prefix + opt, str(value)))] + return args + + def set(self, key: str, value: str | None) -> None: + """Set the launch arguments""" + set_check_input(key, value) + if key in self._reserved_launch_args(): + logger.warning( + ( + f"Could not set argument '{key}': " + f"it is a reserved argument of '{type(self).__name__}'" + ) + ) + return + if key in self._launch_args and key != self._launch_args[key]: + logger.warning(f"Overwritting argument '{key}' with value '{value}'") + self._launch_args[key] = value diff --git a/smartsim/settings/dragonRunSettings.py b/smartsim/settings/builders/launch/dragon.py similarity index 60% rename from smartsim/settings/dragonRunSettings.py rename to smartsim/settings/builders/launch/dragon.py index b8baa4708c..1ca0a244de 100644 --- a/smartsim/settings/dragonRunSettings.py +++ b/smartsim/settings/builders/launch/dragon.py @@ -28,51 +28,37 @@ import typing as t -from ..log import get_logger -from .base import RunSettings +from smartsim.log import get_logger -logger = get_logger(__name__) - - -class DragonRunSettings(RunSettings): - def __init__( - self, - exe: str, - exe_args: t.Optional[t.Union[str, t.List[str]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **kwargs: t.Any, - ) -> None: - """Initialize run parameters for a Dragon process +from ...common import StringArgument, set_check_input +from ...launchCommand import LauncherType +from ..launchArgBuilder import LaunchArgBuilder - ``DragonRunSettings`` should only be used on systems where Dragon - is available and installed in the current environment. +logger = get_logger(__name__) - If an allocation is specified, the instance receiving these run - parameters will launch on that allocation. - :param exe: executable to run - :param exe_args: executable arguments, defaults to None - :param env_vars: environment variables for job, defaults to None - :param alloc: allocation ID if running on existing alloc, defaults to None - """ - super().__init__( - exe, - exe_args, - run_command="", - env_vars=env_vars, - **kwargs, - ) +class DragonArgBuilder(LaunchArgBuilder): + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Dragon.value def set_nodes(self, nodes: int) -> None: """Set the number of nodes :param nodes: number of nodes to run with """ - self.run_args["nodes"] = nodes + self.set("nodes", str(nodes)) def set_tasks_per_node(self, tasks_per_node: int) -> None: """Set the number of tasks for this job :param tasks_per_node: number of tasks per node """ - self.run_args["tasks-per-node"] = tasks_per_node + self.set("tasks-per-node", str(tasks_per_node)) + + def set(self, key: str, value: str | None) -> None: + """Set the launch arguments""" + set_check_input(key, value) + if key in self._launch_args and key != self._launch_args[key]: + logger.warning(f"Overwritting argument '{key}' with value '{value}'") + self._launch_args[key] = value diff --git a/smartsim/settings/builders/launch/local.py b/smartsim/settings/builders/launch/local.py new file mode 100644 index 0000000000..595514f155 --- /dev/null +++ b/smartsim/settings/builders/launch/local.py @@ -0,0 +1,74 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import typing as t + +from smartsim.log import get_logger + +from ...common import StringArgument, set_check_input +from ...launchCommand import LauncherType +from ..launchArgBuilder import LaunchArgBuilder + +logger = get_logger(__name__) + + +class LocalArgBuilder(LaunchArgBuilder): + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Local.value + + def format_env_vars(self, env_vars: StringArgument) -> t.Union[t.List[str], None]: + """Build environment variable string + + :returns: formatted list of strings to export variables + """ + formatted = [] + for key, val in env_vars.items(): + if val is None: + formatted.append(f"{key}=") + else: + formatted.append(f"{key}={val}") + return formatted + + def format_launch_args(self) -> t.Union[t.List[str], None]: + """Build launcher argument string + + :returns: formatted list of launcher arguments + """ + formatted = [] + for arg, value in self._launch_args.items(): + formatted.append(arg) + formatted.append(str(value)) + return formatted + + def set(self, key: str, value: str | None) -> None: + """Set the launch arguments""" + set_check_input(key, value) + if key in self._launch_args and key != self._launch_args[key]: + logger.warning(f"Overwritting argument '{key}' with value '{value}'") + self._launch_args[key] = value diff --git a/smartsim/settings/builders/launch/lsf.py b/smartsim/settings/builders/launch/lsf.py new file mode 100644 index 0000000000..2c72002e54 --- /dev/null +++ b/smartsim/settings/builders/launch/lsf.py @@ -0,0 +1,117 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import typing as t + +from smartsim.log import get_logger + +from ...common import StringArgument, set_check_input +from ...launchCommand import LauncherType +from ..launchArgBuilder import LaunchArgBuilder + +logger = get_logger(__name__) + + +class JsrunArgBuilder(LaunchArgBuilder): + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Lsf.value + + def _reserved_launch_args(self) -> set[str]: + """Return reserved launch arguments.""" + return {"chdir", "h", "stdio_stdout", "o", "stdio_stderr", "k"} + + def set_tasks(self, tasks: int) -> None: + """Set the number of tasks for this job + + This sets ``--np`` + + :param tasks: number of tasks + """ + self.set("np", str(tasks)) + + def set_binding(self, binding: str) -> None: + """Set binding + + This sets ``--bind`` + + :param binding: Binding, e.g. `packed:21` + """ + self.set("bind", binding) + + def format_env_vars( + self, env_vars: t.Dict[str, t.Optional[str]] + ) -> t.Union[t.List[str], None]: + """Format environment variables. Each variable needs + to be passed with ``--env``. If a variable is set to ``None``, + its value is propagated from the current environment. + + :returns: formatted list of strings to export variables + """ + format_str = [] + for k, v in env_vars.items(): + if v: + format_str += ["-E", f"{k}={v}"] + else: + format_str += ["-E", f"{k}"] + return format_str + + def format_launch_args(self) -> t.Union[t.List[str], None]: + """Return a list of LSF formatted run arguments + + :return: list of LSF arguments for these settings + """ + # args launcher uses + args = [] + + for opt, value in self._launch_args.items(): + short_arg = bool(len(str(opt)) == 1) + prefix = "-" if short_arg else "--" + if value is None: + args += [prefix + opt] + else: + if short_arg: + args += [prefix + opt, str(value)] + else: + args += ["=".join((prefix + opt, str(value)))] + return args + + def set(self, key: str, value: str | None) -> None: + """Set the launch arguments""" + set_check_input(key, value) + if key in self._reserved_launch_args(): + logger.warning( + ( + f"Could not set argument '{key}': " + f"it is a reserved argument of '{type(self).__name__}'" + ) + ) + return + if key in self._launch_args and key != self._launch_args[key]: + logger.warning(f"Overwritting argument '{key}' with value '{value}'") + self._launch_args[key] = value diff --git a/smartsim/settings/palsSettings.py b/smartsim/settings/builders/launch/mpi.py similarity index 53% rename from smartsim/settings/palsSettings.py rename to smartsim/settings/builders/launch/mpi.py index 48f254a0b8..1331be317f 100644 --- a/smartsim/settings/palsSettings.py +++ b/smartsim/settings/builders/launch/mpi.py @@ -24,61 +24,23 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import typing as t - -from ..log import get_logger -from .mpiSettings import _BaseMPISettings - -logger = get_logger(__name__) +from __future__ import annotations +import typing as t -class PalsMpiexecSettings(_BaseMPISettings): - """Settings to run job with ``mpiexec`` under the HPE Cray - Parallel Application Launch Service (PALS) +from smartsim.log import get_logger - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment +from ...common import set_check_input +from ...launchCommand import LauncherType +from ..launchArgBuilder import LaunchArgBuilder - Any arguments passed in the ``run_args`` dict will be converted - into ``mpiexec`` arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. +logger = get_logger(__name__) - :param run_args: arguments for run command - :param env_vars: environment vars to launch job with - """ - def __init__( - self, - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - fail_if_missing_exec: bool = True, - **kwargs: t.Any, - ) -> None: - """Settings to format run job with an MPI-standard binary - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - command line arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param exe: executable - :param exe_args: executable arguments - :param run_args: arguments for run command - :param env_vars: environment vars to launch job with - :param fail_if_missing_exec: Throw an exception of the MPI command - is missing. Otherwise, throw a warning - """ - super().__init__( - run_command="mpiexec", - run_args=run_args, - env_vars=env_vars, - fail_if_missing_exec=fail_if_missing_exec, - **kwargs, - ) +class _BaseMPIArgBuilder(LaunchArgBuilder): + def _reserved_launch_args(self) -> set[str]: + """Return reserved launch arguments.""" + return {"wd", "wdir"} def set_task_map(self, task_mapping: str) -> None: """Set ``mpirun`` task mapping @@ -89,7 +51,7 @@ def set_task_map(self, task_mapping: str) -> None: :param task_mapping: task mapping """ - logger.warning("set_task_map not supported under PALS") + self.set("map-by", task_mapping) def set_cpus_per_task(self, cpus_per_task: int) -> None: """Set the number of tasks for this job @@ -101,7 +63,23 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None: :param cpus_per_task: number of tasks """ - logger.warning("set_cpus_per_task not supported under PALS") + self.set("cpus-per-proc", str(cpus_per_task)) + + def set_executable_broadcast(self, dest_path: str) -> None: + """Copy the specified executable(s) to remote machines + + This sets ``--preload-binary`` + + :param dest_path: Destination path (Ignored) + """ + if dest_path is not None and isinstance(dest_path, str): + logger.warning( + ( + f"{type(self)} cannot set a destination path during broadcast. " + "Using session directory instead" + ) + ) + self.set("preload-binary", dest_path) def set_cpu_binding_type(self, bind_type: str) -> None: """Specifies the cores to which MPI processes are bound @@ -110,118 +88,163 @@ def set_cpu_binding_type(self, bind_type: str) -> None: :param bind_type: binding type """ - self.run_args["cpu-bind"] = bind_type - - def set_tasks(self, tasks: int) -> None: - """Set the number of tasks - - :param tasks: number of total tasks to launch - """ - self.run_args["np"] = int(tasks) + self.set("bind-to", bind_type) def set_tasks_per_node(self, tasks_per_node: int) -> None: """Set the number of tasks per node :param tasks_per_node: number of tasks to launch per node """ - self.run_args["ppn"] = int(tasks_per_node) + self.set("npernode", str(tasks_per_node)) - def set_quiet_launch(self, quiet: bool) -> None: - """Set the job to run in quiet mode + def set_tasks(self, tasks: int) -> None: + """Set the number of tasks for this job - This sets ``--quiet`` + This sets ``-n`` for MPI compliant implementations - :param quiet: Whether the job should be run quietly + :param tasks: number of tasks """ + self.set("n", str(tasks)) - logger.warning("set_quiet_launch not supported under PALS") + def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: + """Set the hostlist for the ``mpirun`` command - def set_broadcast(self, dest_path: t.Optional[str] = None) -> None: - """Copy the specified executable(s) to remote machines + This sets ``--host`` - This sets ``--preload-binary`` + :param host_list: list of host names + :raises TypeError: if not str or list of str + """ + if isinstance(host_list, str): + host_list = [host_list.strip()] + if not isinstance(host_list, list): + raise TypeError("host_list argument must be a list of strings") + if not all(isinstance(host, str) for host in host_list): + raise TypeError("host_list argument must be list of strings") + self.set("host", ",".join(host_list)) - :param dest_path: Destination path (Ignored) + def set_hostlist_from_file(self, file_path: str) -> None: + """Use the contents of a file to set the hostlist + + This sets ``--hostfile`` + + :param file_path: Path to the hostlist file """ - if dest_path is not None and isinstance(dest_path, str): - logger.warning( - ( - f"{type(self)} cannot set a destination path during broadcast. " - "Using session directory instead" - ) - ) - self.run_args["transfer"] = None + self.set("hostfile", file_path) + + def set_verbose_launch(self, verbose: bool) -> None: + """Set the job to run in verbose mode + + This sets ``--verbose`` + + :param verbose: Whether the job should be run verbosely + """ + if verbose: + self.set("verbose", None) + else: + self._launch_args.pop("verbose", None) def set_walltime(self, walltime: str) -> None: """Set the maximum number of seconds that a job will run + This sets ``--timeout`` + :param walltime: number like string of seconds that a job will run in secs """ - logger.warning("set_walltime not supported under PALS") + self.set("timeout", walltime) - def set_gpu_affinity_script(self, affinity: str, *args: t.Any) -> None: - """Set the GPU affinity through a bash script + def set_quiet_launch(self, quiet: bool) -> None: + """Set the job to run in quiet mode + + This sets ``--quiet`` - :param affinity: path to the affinity script + :param quiet: Whether the job should be run quietly """ - self.affinity_script.append(str(affinity)) - for arg in args: - self.affinity_script.append(str(arg)) + if quiet: + self.set("quiet", None) + else: + self._launch_args.pop("quiet", None) + + def format_env_vars( + self, env_vars: t.Optional[t.Dict[str, t.Optional[str]]] + ) -> t.Union[t.List[str], None]: + """Format the environment variables for mpirun - def format_run_args(self) -> t.List[str]: + :return: list of env vars + """ + formatted = [] + env_string = "-x" + + if env_vars: + for name, value in env_vars.items(): + if value: + formatted += [env_string, "=".join((name, str(value)))] + else: + formatted += [env_string, name] + return formatted + + def format_launch_args(self) -> t.List[str]: """Return a list of MPI-standard formatted run arguments :return: list of MPI-standard arguments for these settings """ # args launcher uses args = [] - restricted = ["wdir", "wd"] - for opt, value in self.run_args.items(): - if opt not in restricted: - prefix = "--" - if not value: - args += [prefix + opt] - else: - args += [prefix + opt, str(value)] + for opt, value in self._launch_args.items(): + prefix = "--" + if not value: + args += [prefix + opt] + else: + args += [prefix + opt, str(value)] + return args - if self.affinity_script: - args += self.affinity_script + def set(self, key: str, value: str | None) -> None: + """Set the launch arguments""" + set_check_input(key, value) + if key in self._reserved_launch_args(): + logger.warning( + ( + f"Could not set argument '{key}': " + f"it is a reserved argument of '{type(self).__name__}'" + ) + ) + return + if key in self._launch_args and key != self._launch_args[key]: + logger.warning(f"Overwritting argument '{key}' with value '{value}'") + self._launch_args[key] = value - return args - def format_env_vars(self) -> t.List[str]: - """Format the environment variables for mpirun +class MpiArgBuilder(_BaseMPIArgBuilder): + def __init__( + self, + launch_args: t.Dict[str, str | None] | None, + ) -> None: + super().__init__(launch_args) - :return: list of env vars - """ - formatted = [] + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Mpirun.value - export_vars = [] - if self.env_vars: - for name, value in self.env_vars.items(): - if value: - formatted += ["--env", "=".join((name, str(value)))] - else: - export_vars.append(name) - if export_vars: - formatted += ["--envlist", ",".join(export_vars)] +class MpiexecArgBuilder(_BaseMPIArgBuilder): + def __init__( + self, + launch_args: t.Dict[str, str | None] | None, + ) -> None: + super().__init__(launch_args) - return formatted + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Mpiexec.value - def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: - """Set the hostlist for the PALS ``mpiexec`` command - This sets ``--hosts`` +class OrteArgBuilder(_BaseMPIArgBuilder): + def __init__( + self, + launch_args: t.Dict[str, str | None] | None, + ) -> None: + super().__init__(launch_args) - :param host_list: list of host names - :raises TypeError: if not str or list of str - """ - if isinstance(host_list, str): - host_list = [host_list.strip()] - if not isinstance(host_list, list): - raise TypeError("host_list argument must be a list of strings") - if not all(isinstance(host, str) for host in host_list): - raise TypeError("host_list argument must be list of strings") - self.run_args["hosts"] = ",".join(host_list) + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Orterun.value diff --git a/smartsim/settings/builders/launch/pals.py b/smartsim/settings/builders/launch/pals.py new file mode 100644 index 0000000000..051409c295 --- /dev/null +++ b/smartsim/settings/builders/launch/pals.py @@ -0,0 +1,151 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import typing as t + +from smartsim.log import get_logger + +from ...common import StringArgument, set_check_input +from ...launchCommand import LauncherType +from ..launchArgBuilder import LaunchArgBuilder + +logger = get_logger(__name__) + + +class PalsMpiexecArgBuilder(LaunchArgBuilder): + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Pals.value + + def _reserved_launch_args(self) -> set[str]: + """Return reserved launch arguments.""" + return {"wdir", "wd"} + + def set_cpu_binding_type(self, bind_type: str) -> None: + """Specifies the cores to which MPI processes are bound + + This sets ``--bind-to`` for MPI compliant implementations + + :param bind_type: binding type + """ + self.set("bind-to", bind_type) + + def set_tasks(self, tasks: int) -> None: + """Set the number of tasks + + :param tasks: number of total tasks to launch + """ + self.set("np", str(tasks)) + + def set_executable_broadcast(self, dest_path: str) -> None: + """Copy the specified executable(s) to remote machines + + This sets ``--transfer`` + + :param dest_path: Destination path (Ignored) + """ + self.set("transfer", dest_path) + + def set_tasks_per_node(self, tasks_per_node: int) -> None: + """Set the number of tasks per node + + This sets ``--ppn`` + + :param tasks_per_node: number of tasks to launch per node + """ + self.set("ppn", str(tasks_per_node)) + + def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: + """Set the hostlist for the PALS ``mpiexec`` command + + This sets ``hosts`` + + :param host_list: list of host names + :raises TypeError: if not str or list of str + """ + if isinstance(host_list, str): + host_list = [host_list.strip()] + if not isinstance(host_list, list): + raise TypeError("host_list argument must be a list of strings") + if not all(isinstance(host, str) for host in host_list): + raise TypeError("host_list argument must be list of strings") + self.set("hosts", ",".join(host_list)) + + def format_env_vars( + self, env_vars: t.Optional[t.Dict[str, t.Optional[str]]] + ) -> t.Union[t.List[str], None]: + """Format the environment variables for mpirun + + :return: list of env vars + """ + formatted = [] + + export_vars = [] + if env_vars: + for name, value in env_vars.items(): + if value: + formatted += ["--env", "=".join((name, str(value)))] + else: + export_vars.append(name) + + if export_vars: + formatted += ["--envlist", ",".join(export_vars)] + + return formatted + + def format_launch_args(self) -> t.List[str]: + """Return a list of MPI-standard formatted launcher arguments + + :return: list of MPI-standard arguments for these settings + """ + # args launcher uses + args = [] + + for opt, value in self._launch_args.items(): + prefix = "--" + if not value: + args += [prefix + opt] + else: + args += [prefix + opt, str(value)] + + return args + + def set(self, key: str, value: str | None) -> None: + """Set the launch arguments""" + set_check_input(key, value) + if key in self._reserved_launch_args(): + logger.warning( + ( + f"Could not set argument '{key}': " + f"it is a reserved argument of '{type(self).__name__}'" + ) + ) + return + if key in self._launch_args and key != self._launch_args[key]: + logger.warning(f"Overwritting argument '{key}' with value '{value}'") + self._launch_args[key] = value diff --git a/smartsim/settings/builders/launch/slurm.py b/smartsim/settings/builders/launch/slurm.py new file mode 100644 index 0000000000..80d3d6be28 --- /dev/null +++ b/smartsim/settings/builders/launch/slurm.py @@ -0,0 +1,317 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import os +import re +import typing as t + +from smartsim.log import get_logger + +from ...common import set_check_input +from ...launchCommand import LauncherType +from ..launchArgBuilder import LaunchArgBuilder + +logger = get_logger(__name__) + + +class SlurmArgBuilder(LaunchArgBuilder): + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + return LauncherType.Slurm.value + + def _reserved_launch_args(self) -> set[str]: + """Return reserved launch arguments.""" + return {"chdir", "D"} + + def set_nodes(self, nodes: int) -> None: + """Set the number of nodes + + Effectively this is setting: ``srun --nodes `` + + :param nodes: nodes to launch on + :return: launcher argument + """ + self.set("nodes", str(nodes)) + + def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: + """Specify the hostlist for this job + + This sets ``--nodelist`` + + :param host_list: hosts to launch on + :raises TypeError: if not str or list of str + """ + if isinstance(host_list, str): + host_list = [host_list.strip()] + elif not isinstance(host_list, list): + raise TypeError("host_list argument must be a string or list of strings") + elif not all(isinstance(host, str) for host in host_list): + raise TypeError("host_list argument must be list of strings") + self.set("nodelist", ",".join(host_list)) + + def set_hostlist_from_file(self, file_path: str) -> None: + """Use the contents of a file to set the node list + + This sets ``--nodefile`` + + :param file_path: Path to the nodelist file + """ + self.set("nodefile", file_path) + + def set_excluded_hosts(self, host_list: t.Union[str, t.List[str]]) -> None: + """Specify a list of hosts to exclude for launching this job + + :param host_list: hosts to exclude + :raises TypeError: if not str or list of str + """ + if isinstance(host_list, str): + host_list = [host_list.strip()] + if not isinstance(host_list, list): + raise TypeError("host_list argument must be a list of strings") + if not all(isinstance(host, str) for host in host_list): + raise TypeError("host_list argument must be list of strings") + self.set("exclude", ",".join(host_list)) + + def set_cpus_per_task(self, cpus_per_task: int) -> None: + """Set the number of cpus to use per task + + This sets ``--cpus-per-task`` + + :param num_cpus: number of cpus to use per task + """ + self.set("cpus-per-task", str(cpus_per_task)) + + def set_tasks(self, tasks: int) -> None: + """Set the number of tasks for this job + + This sets ``--ntasks`` + + :param tasks: number of tasks + """ + self.set("ntasks", str(tasks)) + + def set_tasks_per_node(self, tasks_per_node: int) -> None: + """Set the number of tasks for this job + + This sets ``--ntasks-per-node`` + + :param tasks_per_node: number of tasks per node + """ + self.set("ntasks-per-node", str(tasks_per_node)) + + def set_cpu_bindings(self, bindings: t.Union[int, t.List[int]]) -> None: + """Bind by setting CPU masks on tasks + + This sets ``--cpu-bind`` using the ``map_cpu:`` option + + :param bindings: List specifing the cores to which MPI processes are bound + """ + if isinstance(bindings, int): + bindings = [bindings] + self.set("cpu_bind", "map_cpu:" + ",".join(str(num) for num in bindings)) + + def set_memory_per_node(self, memory_per_node: int) -> None: + """Specify the real memory required per node + + This sets ``--mem`` in megabytes + + :param memory_per_node: Amount of memory per node in megabytes + """ + self.set("mem", f"{memory_per_node}M") + + def set_executable_broadcast(self, dest_path: str) -> None: + """Copy executable file to allocated compute nodes + + This sets ``--bcast`` + + :param dest_path: Path to copy an executable file + """ + self.set("bcast", dest_path) + + def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None: + """Specify the node feature for this job + + This sets ``-C`` + + :param feature_list: node feature to launch on + :raises TypeError: if not str or list of str + """ + if isinstance(feature_list, str): + feature_list = [feature_list.strip()] + elif not all(isinstance(feature, str) for feature in feature_list): + raise TypeError("node_feature argument must be string or list of strings") + self.set("C", ",".join(feature_list)) + + def set_walltime(self, walltime: str) -> None: + """Set the walltime of the job + + format = "HH:MM:SS" + + :param walltime: wall time + """ + pattern = r"^\d{2}:\d{2}:\d{2}$" + if walltime and re.match(pattern, walltime): + self.set("time", str(walltime)) + else: + raise ValueError("Invalid walltime format. Please use 'HH:MM:SS' format.") + + def set_het_group(self, het_group: t.Iterable[int]) -> None: + """Set the heterogeneous group for this job + + this sets `--het-group` + + :param het_group: list of heterogeneous groups + """ + het_size_env = os.getenv("SLURM_HET_SIZE") + if het_size_env is None: + msg = "Requested to set het group, but the allocation is not a het job" + raise ValueError(msg) + het_size = int(het_size_env) + if any(group >= het_size for group in het_group): + msg = ( + f"Het group {max(het_group)} requested, " + f"but max het group in allocation is {het_size-1}" + ) + raise ValueError(msg) + self.set("het-group", ",".join(str(group) for group in het_group)) + + def set_verbose_launch(self, verbose: bool) -> None: + """Set the job to run in verbose mode + + This sets ``--verbose`` + + :param verbose: Whether the job should be run verbosely + """ + if verbose: + self.set("verbose", None) + else: + self._launch_args.pop("verbose", None) + + def set_quiet_launch(self, quiet: bool) -> None: + """Set the job to run in quiet mode + + This sets ``--quiet`` + + :param quiet: Whether the job should be run quietly + """ + if quiet: + self.set("quiet", None) + else: + self._launch_args.pop("quiet", None) + + def format_launch_args(self) -> t.Union[t.List[str], None]: + """Return a list of slurm formatted launch arguments + + :return: list of slurm arguments for these settings + """ + formatted = [] + for key, value in self._launch_args.items(): + short_arg = bool(len(str(key)) == 1) + prefix = "-" if short_arg else "--" + if not value: + formatted += [prefix + key] + else: + if short_arg: + formatted += [prefix + key, str(value)] + else: + formatted += ["=".join((prefix + key, str(value)))] + return formatted + + def format_env_vars( + self, env_vars: t.Dict[str, t.Optional[str]] + ) -> t.Union[t.List[str], None]: + """Build bash compatible environment variable string for Slurm + + :returns: the formatted string of environment variables + """ + self._check_env_vars(env_vars) + return [f"{k}={v}" for k, v in env_vars.items() if "," not in str(v)] + + def format_comma_sep_env_vars( + self, env_vars: t.Dict[str, t.Optional[str]] + ) -> t.Union[t.Tuple[str, t.List[str]], None]: + """Build environment variable string for Slurm + + Slurm takes exports in comma separated lists + the list starts with all as to not disturb the rest of the environment + for more information on this, see the slurm documentation for srun + + :returns: the formatted string of environment variables + """ + self._check_env_vars(env_vars) + exportable_env, compound_env, key_only = [], [], [] + + for k, v in env_vars.items(): + kvp = f"{k}={v}" + + if "," in str(v): + key_only.append(k) + compound_env.append(kvp) + else: + exportable_env.append(kvp) + + # Append keys to exportable KVPs, e.g. `--export x1=v1,KO1,KO2` + fmt_exported_env = ",".join(v for v in exportable_env + key_only) + + return fmt_exported_env, compound_env + + def _check_env_vars(self, env_vars: t.Dict[str, t.Optional[str]]) -> None: + """Warn a user trying to set a variable which is set in the environment + + Given Slurm's env var precedence, trying to export a variable which is already + present in the environment will not work. + """ + for k, v in env_vars.items(): + if "," not in str(v): + # If a variable is defined, it will take precedence over --export + # we warn the user + preexisting_var = os.environ.get(k, None) + if preexisting_var is not None and preexisting_var != v: + msg = ( + f"Variable {k} is set to {preexisting_var} in current " + "environment. If the job is running in an interactive " + f"allocation, the value {v} will not be set. Please " + "consider removing the variable from the environment " + "and re-run the experiment." + ) + logger.warning(msg) + + def set(self, key: str, value: str | None) -> None: + """Set the launch arguments""" + set_check_input(key, value) + if key in self._reserved_launch_args(): + logger.warning( + ( + f"Could not set argument '{key}': " + f"it is a reserved argument of '{type(self).__name__}'" + ) + ) + return + if key in self._launch_args and key != self._launch_args[key]: + logger.warning(f"Overwritting argument '{key}' with value '{value}'") + self._launch_args[key] = value diff --git a/smartsim/settings/builders/launchArgBuilder.py b/smartsim/settings/builders/launchArgBuilder.py new file mode 100644 index 0000000000..bb1f389f3c --- /dev/null +++ b/smartsim/settings/builders/launchArgBuilder.py @@ -0,0 +1,92 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import copy +import typing as t +from abc import ABC, abstractmethod + +from smartsim.log import get_logger + +from ..._core.utils.helpers import fmt_dict + +logger = get_logger(__name__) + + +class LaunchArgBuilder(ABC): + """Abstract base class that defines all generic launcher + argument methods that are not supported. It is the + responsibility of child classes for each launcher to translate + the input parameter to a properly formatted launcher argument. + """ + + def __init__(self, launch_args: t.Dict[str, str | None] | None) -> None: + self._launch_args = copy.deepcopy(launch_args) or {} + + @abstractmethod + def launcher_str(self) -> str: + """Get the string representation of the launcher""" + pass + + @abstractmethod + def set(self, arg: str, val: str | None) -> None: + """Set the launch arguments""" + pass + + def format_launch_args(self) -> t.Union[t.List[str], None]: + """Build formatted launch arguments""" + logger.warning( + f"format_launcher_args() not supported for {self.launcher_str()}." + ) + return None + + def format_comma_sep_env_vars( + self, env_vars: t.Dict[str, t.Optional[str]] + ) -> t.Union[t.Tuple[str, t.List[str]], None]: + """Build environment variable string for Slurm + Slurm takes exports in comma separated lists + the list starts with all as to not disturb the rest of the environment + for more information on this, see the slurm documentation for srun + :returns: the formatted string of environment variables + """ + logger.warning( + f"format_comma_sep_env_vars() not supported for {self.launcher_str()}." + ) + return None + + def format_env_vars( + self, env_vars: t.Dict[str, t.Optional[str]] + ) -> t.Union[t.List[str], None]: + """Build bash compatible environment variable string for Slurm + :returns: the formatted string of environment variables + """ + logger.warning(f"format_env_vars() not supported for {self.launcher_str()}.") + return None + + def __str__(self) -> str: # pragma: no-cover + string = f"\nLaunch Arguments:\n{fmt_dict(self._launch_args)}" + return string diff --git a/smartsim/settings/common.py b/smartsim/settings/common.py new file mode 100644 index 0000000000..1f6bb5170a --- /dev/null +++ b/smartsim/settings/common.py @@ -0,0 +1,49 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import typing as t + +from smartsim.log import get_logger + +IntegerArgument = t.Dict[str, t.Optional[int]] +StringArgument = t.Dict[str, t.Optional[str]] + +logger = get_logger(__name__) + + +def set_check_input(key: str, value: t.Optional[str]) -> None: + if not isinstance(key, str): + raise TypeError(f"Key '{key}' should be of type str") + if not isinstance(value, (str, type(None))): + raise TypeError(f"Value '{value}' should be of type str or None") + if key.startswith("-"): + key = key.lstrip("-") + logger.warning( + "One or more leading `-` characters were provided to the run argument. \ + Leading dashes were stripped and the arguments were passed to the run_command." + ) diff --git a/smartsim/settings/containers.py b/smartsim/settings/containers.py deleted file mode 100644 index d2fd4fca27..0000000000 --- a/smartsim/settings/containers.py +++ /dev/null @@ -1,173 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import shutil -import typing as t - -from ..log import get_logger - -logger = get_logger(__name__) - - -class Container: - """Base class for container types in SmartSim. - - Container types are used to embed all the information needed to - launch a workload within a container into a single object. - - :param image: local or remote path to container image - :param args: arguments to container command - :param mount: paths to mount (bind) from host machine into image. - :param working_directory: path of the working directory within the container - """ - - def __init__( - self, image: str, args: str = "", mount: str = "", working_directory: str = "" - ) -> None: - # Validate types - if not isinstance(image, str): - raise TypeError("image must be a str") - if not isinstance(args, (str, list)): - raise TypeError("args must be a str | list") - if not isinstance(mount, (str, list, dict)): - raise TypeError("mount must be a str | list | dict") - if not isinstance(working_directory, str): - raise TypeError("working_directory must be a str") - - self.image = image - self.args = args - self.mount = mount - self.working_directory = working_directory - - def _containerized_run_command(self, run_command: str) -> str: - """Return modified run_command with container commands prepended. - - :param run_command: run command from a RunSettings class - """ - raise NotImplementedError( - "Containerized run command specification not implemented for this " - f"Container type: {type(self)}" - ) - - -class Singularity(Container): - # pylint: disable=abstract-method - # todo: determine if _containerized_run_command should be abstract - - """Singularity (apptainer) container type. To be passed into a - ``RunSettings`` class initializer or ``Experiment.create_run_settings``. - - .. note:: - - Singularity integration is currently tested with - `Apptainer 1.0 `_ - with slurm and PBS workload managers only. - - Also, note that user-defined bind paths (``mount`` argument) may be - disabled by a - `system administrator - `_ - - - :param image: local or remote path to container image, - e.g. ``docker://sylabsio/lolcow`` - :param args: arguments to 'singularity exec' command - :param mount: paths to mount (bind) from host machine into image. - """ - - def __init__(self, *args: t.Any, **kwargs: t.Any) -> None: - super().__init__(*args, **kwargs) - - def _container_cmds(self, default_working_directory: str = "") -> t.List[str]: - """Return list of container commands to be inserted before exe. - Container members are validated during this call. - - :raises TypeError: if object members are invalid types - """ - serialized_args = "" - if self.args: - # Serialize args into a str - if isinstance(self.args, str): - serialized_args = self.args - elif isinstance(self.args, list): - serialized_args = " ".join(self.args) - else: - raise TypeError("self.args must be a str | list") - - serialized_mount = "" - if self.mount: - if isinstance(self.mount, str): - serialized_mount = self.mount - elif isinstance(self.mount, list): - serialized_mount = ",".join(self.mount) - elif isinstance(self.mount, dict): - paths = [] - for host_path, img_path in self.mount.items(): - if img_path: - paths.append(f"{host_path}:{img_path}") - else: - paths.append(host_path) - serialized_mount = ",".join(paths) - else: - raise TypeError("self.mount must be str | list | dict") - - working_directory = default_working_directory - if self.working_directory: - working_directory = self.working_directory - - if working_directory not in serialized_mount: - if serialized_mount: - serialized_mount = ",".join([working_directory, serialized_mount]) - else: - serialized_mount = working_directory - logger.warning( - f"Working directory not specified in mount: \n {working_directory}\n" - "Automatically adding it to the list of bind points" - ) - - # Find full path to singularity - singularity = shutil.which("singularity") - - # Some systems have singularity available on compute nodes only, - # so warn instead of error - if not singularity: - logger.warning( - "Unable to find singularity. Continuing in case singularity is " - "available on compute node" - ) - - # Construct containerized launch command - cmd_list = [singularity or "singularity", "exec"] - if working_directory: - cmd_list.extend(["--pwd", working_directory]) - - if serialized_args: - cmd_list.append(serialized_args) - if serialized_mount: - cmd_list.extend(["--bind", serialized_mount]) - cmd_list.append(self.image) - - return cmd_list diff --git a/smartsim/settings/launchCommand.py b/smartsim/settings/launchCommand.py new file mode 100644 index 0000000000..491f01d867 --- /dev/null +++ b/smartsim/settings/launchCommand.py @@ -0,0 +1,43 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from enum import Enum + + +class LauncherType(Enum): + """Launchers that are supported by + SmartSim. + """ + + Dragon = "dragon" + Slurm = "slurm" + Pals = "pals" + Alps = "alps" + Local = "local" + Mpiexec = "mpiexec" + Mpirun = "mpirun" + Orterun = "orterun" + Lsf = "lsf" diff --git a/smartsim/settings/launchSettings.py b/smartsim/settings/launchSettings.py new file mode 100644 index 0000000000..a9e5e81035 --- /dev/null +++ b/smartsim/settings/launchSettings.py @@ -0,0 +1,165 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from __future__ import annotations + +import copy +import typing as t + +from smartsim.log import get_logger + +from .._core.utils.helpers import fmt_dict +from .baseSettings import BaseSettings +from .builders import LaunchArgBuilder +from .builders.launch.alps import AprunArgBuilder +from .builders.launch.dragon import DragonArgBuilder +from .builders.launch.local import LocalArgBuilder +from .builders.launch.lsf import JsrunArgBuilder +from .builders.launch.mpi import MpiArgBuilder, MpiexecArgBuilder, OrteArgBuilder +from .builders.launch.pals import PalsMpiexecArgBuilder +from .builders.launch.slurm import SlurmArgBuilder +from .common import StringArgument +from .launchCommand import LauncherType + +logger = get_logger(__name__) + + +class LaunchSettings(BaseSettings): + def __init__( + self, + launcher: t.Union[LauncherType, str], + launch_args: StringArgument | None = None, + env_vars: StringArgument | None = None, + ) -> None: + try: + self._launcher = LauncherType(launcher) + except ValueError: + raise ValueError(f"Invalid launcher type: {launcher}") + self._arg_builder = self._get_arg_builder(launch_args) + self.env_vars = copy.deepcopy(env_vars) if env_vars else {} + + @property + def launcher(self) -> str: + """Return the launcher name.""" + return self._launcher.value + + @property + def launch_args(self) -> LaunchArgBuilder: + """Return the launch argument translator.""" + return self._arg_builder + + @launch_args.setter + def launch_args(self, args: t.Mapping[str, str]) -> None: + """Update the launch arguments.""" + self.launch_args._launch_args.clear() + for k, v in args.items(): + self.launch_args.set(k, v) + + @property + def env_vars(self) -> t.Mapping[str, str | None]: + """Return an immutable list of attached environment variables.""" + return copy.deepcopy(self._env_vars) + + @env_vars.setter + def env_vars(self, value: t.Dict[str, str]) -> None: + """Set the environment variables.""" + self._env_vars = copy.deepcopy(value) + + def _get_arg_builder(self, launch_args: StringArgument | None) -> LaunchArgBuilder: + """Map the Launcher to the LaunchArgBuilder""" + if self._launcher == LauncherType.Slurm: + return SlurmArgBuilder(launch_args) + elif self._launcher == LauncherType.Mpiexec: + return MpiexecArgBuilder(launch_args) + elif self._launcher == LauncherType.Mpirun: + return MpiArgBuilder(launch_args) + elif self._launcher == LauncherType.Orterun: + return OrteArgBuilder(launch_args) + elif self._launcher == LauncherType.Alps: + return AprunArgBuilder(launch_args) + elif self._launcher == LauncherType.Lsf: + return JsrunArgBuilder(launch_args) + elif self._launcher == LauncherType.Pals: + return PalsMpiexecArgBuilder(launch_args) + elif self._launcher == LauncherType.Dragon: + return DragonArgBuilder(launch_args) + elif self._launcher == LauncherType.Local: + return LocalArgBuilder(launch_args) + else: + raise ValueError(f"Invalid launcher type: {self._launcher}") + + def update_env(self, env_vars: t.Dict[str, str | None]) -> None: + """Update the job environment variables + + To fully inherit the current user environment, add the + workload-manager-specific flag to the launch command through the + :meth:`add_exe_args` method. For example, ``--export=ALL`` for + slurm, or ``-V`` for PBS/aprun. + + + :param env_vars: environment variables to update or add + :raises TypeError: if env_vars values cannot be coerced to strings + """ + # Coerce env_vars values to str as a convenience to user + for env, val in env_vars.items(): + if not isinstance(env, str): + raise TypeError(f"The key '{env}' of env_vars should be of type str") + if not isinstance(val, (str, type(None))): + raise TypeError( + f"The value '{val}' of env_vars should be of type str or None" + ) + self._env_vars.update(env_vars) + + def format_env_vars(self) -> t.Union[t.List[str], None]: + """Build bash compatible environment variable string for Slurm + :returns: the formatted string of environment variables + """ + return self._arg_builder.format_env_vars(self._env_vars) + + def format_comma_sep_env_vars(self) -> t.Union[t.Tuple[str, t.List[str]], None]: + """Build environment variable string for Slurm + Slurm takes exports in comma separated lists + the list starts with all as to not disturb the rest of the environment + for more information on this, see the slurm documentation for srun + :returns: the formatted string of environment variables + """ + return self._arg_builder.format_comma_sep_env_vars(self._env_vars) + + def format_launch_args(self) -> t.Union[t.List[str], None]: + """Return formatted launch arguments + For ``RunSettings``, the run arguments are passed + literally with no formatting. + :return: list run arguments for these settings + """ + return self._arg_builder.format_launch_args() + + def __str__(self) -> str: # pragma: no-cover + string = f"\nLauncher: {self.launcher}" + if self.launch_args: + string += str(self.launch_args) + if self.env_vars: + string += f"\nEnvironment variables: \n{fmt_dict(self.env_vars)}" + return string diff --git a/smartsim/settings/lsfSettings.py b/smartsim/settings/lsfSettings.py deleted file mode 100644 index 841505ca5b..0000000000 --- a/smartsim/settings/lsfSettings.py +++ /dev/null @@ -1,556 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from __future__ import annotations - -import copy -import typing as t -from pprint import pformat - -from ..error import SSUnsupportedError -from ..log import get_logger -from .base import BatchSettings, RunSettings - -logger = get_logger(__name__) - - -class JsrunSettings(RunSettings): - def __init__( - self, - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **_kwargs: t.Any, - ) -> None: - """Settings to run job with ``jsrun`` command - - ``JsrunSettings`` should only be used on LSF-based systems. - - :param exe: executable - :param exe_args: executable arguments - :param run_args: arguments for run command - :param env_vars: environment vars to launch job with - """ - super().__init__( - run_command="jsrun", - run_args=run_args, - env_vars=env_vars, - ) - - # Parameters needed for MPMD run - self.erf_sets = {"host": "*", "cpu": "*", "ranks": "1"} - self.mpmd_preamble_lines: t.List[str] = [] - self.mpmd: t.List[RunSettings] = [] - self.individual_suffix = "" - - reserved_run_args = {"chdir", "h"} - - def set_num_rs(self, num_rs: t.Union[str, int]) -> None: - """Set the number of resource sets to use - - This sets ``--nrs``. - - :param num_rs: Number of resource sets or `ALL_HOSTS` - """ - if isinstance(num_rs, str): - self.run_args["nrs"] = num_rs - else: - self.run_args["nrs"] = int(num_rs) - - def set_cpus_per_rs(self, cpus_per_rs: int) -> None: - """Set the number of cpus to use per resource set - - This sets ``--cpu_per_rs`` - - :param cpus_per_rs: number of cpus to use per resource set or ALL_CPUS - """ - if self.colocated_fs_settings: - fs_cpus = int(t.cast(int, self.colocated_fs_settings.get("fs_cpus", 0))) - if not fs_cpus: - raise ValueError("fs_cpus must be configured on colocated_fs_settings") - - if cpus_per_rs < fs_cpus: - raise ValueError( - f"Cannot set cpus_per_rs ({cpus_per_rs}) to less than " - + f"fs_cpus ({fs_cpus})" - ) - if isinstance(cpus_per_rs, str): - self.run_args["cpu_per_rs"] = cpus_per_rs - else: - self.run_args["cpu_per_rs"] = int(cpus_per_rs) - - def set_gpus_per_rs(self, gpus_per_rs: int) -> None: - """Set the number of gpus to use per resource set - - This sets ``--gpu_per_rs`` - - :param gpus_per_rs: number of gpus to use per resource set or ALL_GPUS - """ - if isinstance(gpus_per_rs, str): - self.run_args["gpu_per_rs"] = gpus_per_rs - else: - self.run_args["gpu_per_rs"] = int(gpus_per_rs) - - def set_rs_per_host(self, rs_per_host: int) -> None: - """Set the number of resource sets to use per host - - This sets ``--rs_per_host`` - - :param rs_per_host: number of resource sets to use per host - """ - self.run_args["rs_per_host"] = int(rs_per_host) - - def set_tasks(self, tasks: int) -> None: - """Set the number of tasks for this job - - This sets ``--np`` - - :param tasks: number of tasks - """ - self.run_args["np"] = int(tasks) - - def set_tasks_per_rs(self, tasks_per_rs: int) -> None: - """Set the number of tasks per resource set - - This sets ``--tasks_per_rs`` - - :param tasks_per_rs: number of tasks per resource set - """ - self.run_args["tasks_per_rs"] = int(tasks_per_rs) - - def set_tasks_per_node(self, tasks_per_node: int) -> None: - """Set the number of tasks per resource set. - - This function is an alias for `set_tasks_per_rs`. - - :param tasks_per_node: number of tasks per resource set - """ - self.set_tasks_per_rs(int(tasks_per_node)) - - def set_cpus_per_task(self, cpus_per_task: int) -> None: - """Set the number of cpus per tasks. - - This function is an alias for `set_cpus_per_rs`. - - :param cpus_per_task: number of cpus per resource set - """ - self.set_cpus_per_rs(int(cpus_per_task)) - - def set_memory_per_rs(self, memory_per_rs: int) -> None: - """Specify the number of megabytes of memory to assign to a resource set - - This sets ``--memory_per_rs`` - - :param memory_per_rs: Number of megabytes per rs - """ - self.run_args["memory_per_rs"] = int(memory_per_rs) - - def set_memory_per_node(self, memory_per_node: int) -> None: - """Specify the number of megabytes of memory to assign to a resource set - - Alias for `set_memory_per_rs`. - - :param memory_per_node: Number of megabytes per rs - """ - self.set_memory_per_rs(int(memory_per_node)) - - def set_binding(self, binding: str) -> None: - """Set binding - - This sets ``--bind`` - - :param binding: Binding, e.g. `packed:21` - """ - self.run_args["bind"] = binding - - def make_mpmd(self, settings: RunSettings) -> None: - """Make step an MPMD (or SPMD) job. - - This method will activate job execution through an ERF file. - - Optionally, this method adds an instance of ``JsrunSettings`` to - the list of settings to be launched in the same ERF file. - - :param settings: ``JsrunSettings`` instance - """ - if self.colocated_fs_settings: - raise SSUnsupportedError( - "Colocated applications cannot be run as a mpmd workload" - ) - - self.mpmd.append(settings) - - def set_mpmd_preamble(self, preamble_lines: t.List[str]) -> None: - """Set preamble used in ERF file. Typical lines include - `oversubscribe-cpu : allow` or `overlapping-rs : allow`. - Can be used to set `launch_distribution`. If it is not present, - it will be inferred from the settings, or set to `packed` by - default. - - :param preamble_lines: lines to put at the beginning of the ERF - file. - """ - self.mpmd_preamble_lines = preamble_lines - - def set_erf_sets(self, erf_sets: t.Dict[str, str]) -> None: - """Set resource sets used for ERF (SPMD or MPMD) steps. - - ``erf_sets`` is a dictionary used to fill the ERF - line representing these settings, e.g. - `{"host": "1", "cpu": "{0:21}, {21:21}", "gpu": "*"}` - can be used to specify rank (or rank_count), hosts, cpus, gpus, - and memory. - The key `rank` is used to give specific ranks, as in - `{"rank": "1, 2, 5"}`, while the key `rank_count` is used to specify - the count only, as in `{"rank_count": "3"}`. If both are specified, - only `rank` is used. - - :param hosts: dictionary of resources - """ - self.erf_sets = copy.deepcopy(erf_sets) - - def format_env_vars(self) -> t.List[str]: - """Format environment variables. Each variable needs - to be passed with ``--env``. If a variable is set to ``None``, - its value is propagated from the current environment. - - :returns: formatted list of strings to export variables - """ - format_str = [] - for k, v in self.env_vars.items(): - if v: - format_str += ["-E", f"{k}={v}"] - else: - format_str += ["-E", f"{k}"] - return format_str - - def set_individual_output(self, suffix: t.Optional[str] = None) -> None: - """Set individual std output. - - This sets ``--stdio_mode individual`` - and inserts the suffix into the output name. The resulting - output name will be ``self.name + suffix + .out``. - - :param suffix: Optional suffix to add to output file names, - it can contain `%j`, `%h`, `%p`, or `%t`, - as specified by `jsrun` options. - """ - self.run_args["stdio_mode"] = "individual" - if suffix: - self.individual_suffix = suffix - - def format_run_args(self) -> t.List[str]: - """Return a list of LSF formatted run arguments - - :return: list of LSF arguments for these settings - """ - # args launcher uses - args = [] - restricted = ["chdir", "h", "stdio_stdout", "o", "stdio_stderr", "k"] - if self.mpmd or "erf_input" in self.run_args.keys(): - restricted.extend( - [ - "tasks_per_rs", - "a", - "np", - "p", - "cpu_per_rs", - "c", - "gpu_per_rs", - "g", - "latency_priority", - "l", - "memory_per_rs", - "m", - "nrs", - "n", - "rs_per_host", - "r", - "rs_per_socket", - "K", - "appfile", - "f", - "allocate_only", - "A", - "launch_node_task", - "H", - "use_reservation", - "J", - "use_resources", - "bind", - "b", - "launch_distribution", - "d", - ] - ) - - for opt, value in self.run_args.items(): - if opt not in restricted: - short_arg = bool(len(str(opt)) == 1) - prefix = "-" if short_arg else "--" - if not value: - args += [prefix + opt] - else: - if short_arg: - args += [prefix + opt, str(value)] - else: - args += ["=".join((prefix + opt, str(value)))] - return args - - def __str__(self) -> str: - string = super().__str__() - if self.mpmd: - string += "\nERF settings: " + pformat(self.erf_sets) - return string - - def _prep_colocated_fs(self, fs_cpus: int) -> None: - cpus_per_flag_set = False - for cpu_per_rs_flag in ["cpu_per_rs", "c"]: - if run_arg_value := self.run_args.get(cpu_per_rs_flag, 0): - cpus_per_flag_set = True - cpu_per_rs = int(run_arg_value) - if cpu_per_rs < fs_cpus: - msg = ( - f"{cpu_per_rs_flag} flag was set to {cpu_per_rs}, but " - f"colocated db requires {fs_cpus} CPUs per RS. Automatically " - f"setting {cpu_per_rs_flag} flag to {fs_cpus}" - ) - logger.info(msg) - self.run_args[cpu_per_rs_flag] = fs_cpus - if not cpus_per_flag_set: - msg = f"Colocated fs requires {fs_cpus} CPUs per RS. Automatically setting " - msg += f"--cpus_per_rs=={fs_cpus}" - logger.info(msg) - self.set_cpus_per_rs(fs_cpus) - - rs_per_host_set = False - for rs_per_host_flag in ["rs_per_host", "r"]: - if rs_per_host_flag in self.run_args: - rs_per_host_set = True - rs_per_host = self.run_args[rs_per_host_flag] - if rs_per_host != 1: - msg = f"{rs_per_host_flag} flag was set to {rs_per_host}, " - msg += ( - "but colocated fs requires running ONE resource set per host. " - ) - msg += f"Automatically setting {rs_per_host_flag} flag to 1" - logger.info(msg) - self.run_args[rs_per_host_flag] = "1" - if not rs_per_host_set: - msg = "Colocated fs requires one resource set per host. " - msg += " Automatically setting --rs_per_host==1" - logger.info(msg) - self.set_rs_per_host(1) - - -class BsubBatchSettings(BatchSettings): - def __init__( - self, - nodes: t.Optional[int] = None, - time: t.Optional[str] = None, - project: t.Optional[str] = None, - batch_args: t.Optional[t.Dict[str, t.Optional[str]]] = None, - smts: int = 0, - **kwargs: t.Any, - ) -> None: - """Specify ``bsub`` batch parameters for a job - - :param nodes: number of nodes for batch - :param time: walltime for batch job in format hh:mm - :param project: project for batch launch - :param batch_args: overrides for LSF batch arguments - :param smts: SMTs - """ - self.project: t.Optional[str] = None - - if project: - kwargs.pop("account", None) - else: - project = kwargs.pop("account", None) - - super().__init__( - "bsub", - batch_args=batch_args, - nodes=nodes, - account=project, - time=time, - **kwargs, - ) - - self.smts = 0 - if smts: - self.set_smts(smts) - - self.expert_mode = False - self.easy_settings = ["ln_slots", "ln_mem", "cn_cu", "nnodes"] - - def set_walltime(self, walltime: str) -> None: - """Set the walltime - - This sets ``-W``. - - :param walltime: Time in hh:mm format, e.g. "10:00" for 10 hours, - if time is supplied in hh:mm:ss format, seconds - will be ignored and walltime will be set as ``hh:mm`` - """ - # For compatibility with other launchers, as explained in docstring - if walltime: - if len(walltime.split(":")) > 2: - walltime = ":".join(walltime.split(":")[:2]) - self.walltime = walltime - - def set_smts(self, smts: int) -> None: - """Set SMTs - - This sets ``-alloc_flags``. If the user sets - SMT explicitly through ``-alloc_flags``, then that - takes precedence. - - :param smts: SMT (e.g on Summit: 1, 2, or 4) - """ - self.smts = smts - - def set_project(self, project: str) -> None: - """Set the project - - This sets ``-P``. - - :param time: project name - """ - if project: - self.project = project - - def set_account(self, account: str) -> None: - """Set the project - - this function is an alias for `set_project`. - - :param account: project name - """ - self.set_project(account) - - def set_nodes(self, num_nodes: int) -> None: - """Set the number of nodes for this batch job - - This sets ``-nnodes``. - - :param nodes: number of nodes - """ - if num_nodes: - self.batch_args["nnodes"] = str(int(num_nodes)) - - def set_expert_mode_req(self, res_req: str, slots: int) -> None: - """Set allocation for expert mode. This - will activate expert mode (``-csm``) and - disregard all other allocation options. - - This sets ``-csm -n slots -R res_req`` - - :param res_req: specific resource requirements - :param slots: number of resources to allocate - """ - self.expert_mode = True - self.batch_args["csm"] = "y" - self.batch_args["R"] = res_req - self.batch_args["n"] = str(slots) - - def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: - """Specify the hostlist for this job - - :param host_list: hosts to launch on - :raises TypeError: if not str or list of str - """ - if isinstance(host_list, str): - host_list = [host_list.strip()] - if not isinstance(host_list, list): - raise TypeError("host_list argument must be a list of strings") - if not all(isinstance(host, str) for host in host_list): - raise TypeError("host_list argument must be list of strings") - self.batch_args["m"] = '"' + " ".join(host_list) + '"' - - def set_tasks(self, tasks: int) -> None: - """Set the number of tasks for this job - - This sets ``-n`` - - :param tasks: number of tasks - """ - self.batch_args["n"] = str(int(tasks)) - - def set_queue(self, queue: str) -> None: - """Set the queue for this job - - :param queue: The queue to submit the job on - """ - if queue: - self.batch_args["q"] = queue - - def _format_alloc_flags(self) -> None: - """Format ``alloc_flags`` checking if user already - set it. Currently only adds SMT flag if missing - and ``self.smts`` is set. - """ - - if self.smts: - if "alloc_flags" not in self.batch_args.keys(): - self.batch_args["alloc_flags"] = f"smt{self.smts}" - else: - # Check if smt is in the flag, otherwise add it - flags: t.List[str] = [] - if flags_arg := self.batch_args.get("alloc_flags", ""): - flags = flags_arg.strip('"').split() - if not any(flag.startswith("smt") for flag in flags): - flags.append(f"smt{self.smts}") - self.batch_args["alloc_flags"] = " ".join(flags) - - # Check if alloc_flags has to be enclosed in quotes - if "alloc_flags" in self.batch_args.keys(): - flags = [] - if flags_arg := self.batch_args.get("alloc_flags", ""): - flags = flags_arg.strip('"').split() - if len(flags) > 1: - self.batch_args["alloc_flags"] = '"' + " ".join(flags) + '"' - - def format_batch_args(self) -> t.List[str]: - """Get the formatted batch arguments for a preview - - :return: list of batch arguments for Qsub - """ - opts = [] - - self._format_alloc_flags() - - for opt, value in self.batch_args.items(): - if self.expert_mode and opt in self.easy_settings: - continue - - prefix = "-" # LSF only uses single dashses - - if not value: - opts += [prefix + opt] - else: - opts += [" ".join((prefix + opt, str(value)))] - - return opts diff --git a/smartsim/settings/mpiSettings.py b/smartsim/settings/mpiSettings.py deleted file mode 100644 index 66b965938b..0000000000 --- a/smartsim/settings/mpiSettings.py +++ /dev/null @@ -1,332 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from __future__ import annotations - -import shutil -import subprocess -import typing as t - -from ..error import LauncherError, SSUnsupportedError -from ..log import get_logger -from .base import RunSettings - -logger = get_logger(__name__) - - -class _BaseMPISettings(RunSettings): - """Base class for all common arguments of MPI-standard run commands""" - - def __init__( - self, - run_command: str = "mpiexec", - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - fail_if_missing_exec: bool = True, - **kwargs: t.Any, - ) -> None: - """Settings to format run job with an MPI-standard binary - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - command line arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param run_args: arguments for run command - :param env_vars: environment vars to launch job with - :param fail_if_missing_exec: Throw an exception of the MPI command - is missing. Otherwise, throw a warning - """ - super().__init__( - run_command=run_command, - run_args=run_args, - env_vars=env_vars, - **kwargs, - ) - self.mpmd: t.List[RunSettings] = [] - self.affinity_script: t.List[str] = [] - - if not shutil.which(self._run_command): - msg = ( - f"Cannot find {self._run_command}. Try passing the " - "full path via run_command." - ) - if fail_if_missing_exec: - raise LauncherError(msg) - logger.warning(msg) - - reserved_run_args = {"wd", "wdir"} - - def make_mpmd(self, settings: RunSettings) -> None: - """Make a mpmd workload by combining two ``mpirun`` commands - - This connects the two settings to be executed with a single - Application instance - - :param settings: MpirunSettings instance - """ - if self.colocated_fs_settings: - raise SSUnsupportedError( - "Colocated applications cannot be run as a mpmd workload" - ) - self.mpmd.append(settings) - - def set_task_map(self, task_mapping: str) -> None: - """Set ``mpirun`` task mapping - - this sets ``--map-by `` - - For examples, see the man page for ``mpirun`` - - :param task_mapping: task mapping - """ - self.run_args["map-by"] = task_mapping - - def set_cpus_per_task(self, cpus_per_task: int) -> None: - """Set the number of tasks for this job - - This sets ``--cpus-per-proc`` for MPI compliant implementations - - note: this option has been deprecated in openMPI 4.0+ - and will soon be replaced. - - :param cpus_per_task: number of tasks - """ - self.run_args["cpus-per-proc"] = int(cpus_per_task) - - def set_cpu_binding_type(self, bind_type: str) -> None: - """Specifies the cores to which MPI processes are bound - - This sets ``--bind-to`` for MPI compliant implementations - - :param bind_type: binding type - """ - self.run_args["bind-to"] = bind_type - - def set_tasks_per_node(self, tasks_per_node: int) -> None: - """Set the number of tasks per node - - :param tasks_per_node: number of tasks to launch per node - """ - self.run_args["npernode"] = int(tasks_per_node) - - def set_tasks(self, tasks: int) -> None: - """Set the number of tasks for this job - - This sets ``-n`` for MPI compliant implementations - - :param tasks: number of tasks - """ - self.run_args["n"] = int(tasks) - - def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: - """Set the hostlist for the ``mpirun`` command - - This sets ``--host`` - - :param host_list: list of host names - :raises TypeError: if not str or list of str - """ - if isinstance(host_list, str): - host_list = [host_list.strip()] - if not isinstance(host_list, list): - raise TypeError("host_list argument must be a list of strings") - if not all(isinstance(host, str) for host in host_list): - raise TypeError("host_list argument must be list of strings") - self.run_args["host"] = ",".join(host_list) - - def set_hostlist_from_file(self, file_path: str) -> None: - """Use the contents of a file to set the hostlist - - This sets ``--hostfile`` - - :param file_path: Path to the hostlist file - """ - self.run_args["hostfile"] = file_path - - def set_verbose_launch(self, verbose: bool) -> None: - """Set the job to run in verbose mode - - This sets ``--verbose`` - - :param verbose: Whether the job should be run verbosely - """ - if verbose: - self.run_args["verbose"] = None - else: - self.run_args.pop("verbose", None) - - def set_quiet_launch(self, quiet: bool) -> None: - """Set the job to run in quiet mode - - This sets ``--quiet`` - - :param quiet: Whether the job should be run quietly - """ - if quiet: - self.run_args["quiet"] = None - else: - self.run_args.pop("quiet", None) - - def set_broadcast(self, dest_path: t.Optional[str] = None) -> None: - """Copy the specified executable(s) to remote machines - - This sets ``--preload-binary`` - - :param dest_path: Destination path (Ignored) - """ - if dest_path is not None and isinstance(dest_path, str): - logger.warning( - ( - f"{type(self)} cannot set a destination path during broadcast. " - "Using session directory instead" - ) - ) - self.run_args["preload-binary"] = None - - def set_walltime(self, walltime: str) -> None: - """Set the maximum number of seconds that a job will run - - This sets ``--timeout`` - - :param walltime: number like string of seconds that a job will run in secs - """ - self.run_args["timeout"] = walltime - - def format_run_args(self) -> t.List[str]: - """Return a list of MPI-standard formatted run arguments - - :return: list of MPI-standard arguments for these settings - """ - # args launcher uses - args = [] - restricted = ["wdir", "wd"] - - for opt, value in self.run_args.items(): - if opt not in restricted: - prefix = "--" - if not value: - args += [prefix + opt] - else: - args += [prefix + opt, str(value)] - return args - - def format_env_vars(self) -> t.List[str]: - """Format the environment variables for mpirun - - :return: list of env vars - """ - formatted = [] - env_string = "-x" - - if self.env_vars: - for name, value in self.env_vars.items(): - if value: - formatted += [env_string, "=".join((name, str(value)))] - else: - formatted += [env_string, name] - return formatted - - -class MpirunSettings(_BaseMPISettings): - def __init__( - self, - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **kwargs: t.Any, - ) -> None: - """Settings to run job with ``mpirun`` command (MPI-standard) - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - into ``mpirun`` arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param run_args: arguments for run command - :param env_vars: environment vars to launch job with - """ - super().__init__("mpirun", run_args, env_vars, **kwargs) - - -class MpiexecSettings(_BaseMPISettings): - def __init__( - self, - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **kwargs: t.Any, - ) -> None: - """Settings to run job with ``mpiexec`` command (MPI-standard) - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - into ``mpiexec`` arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param run_args: arguments for run command - :param env_vars: environment vars to launch job with - """ - super().__init__("mpiexec", run_args, env_vars, **kwargs) - - completed_process = subprocess.run( - [self._run_command, "--help"], capture_output=True, check=False - ) - help_statement = completed_process.stdout.decode() - if "mpiexec.slurm" in help_statement: - raise SSUnsupportedError( - "Slurm's wrapper for mpiexec is unsupported. Use slurmSettings instead" - ) - - -class OrterunSettings(_BaseMPISettings): - def __init__( - self, - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **kwargs: t.Any, - ) -> None: - """Settings to run job with ``orterun`` command (MPI-standard) - - Note that environment variables can be passed with a None - value to signify that they should be exported from the current - environment - - Any arguments passed in the ``run_args`` dict will be converted - into ``orterun`` arguments and prefixed with ``--``. Values of - None can be provided for arguments that do not have values. - - :param run_args: arguments for run command - :param env_vars: environment vars to launch job with - """ - super().__init__("orterun", run_args, env_vars, **kwargs) diff --git a/smartsim/settings/pbsSettings.py b/smartsim/settings/pbsSettings.py deleted file mode 100644 index 2e9f8fb469..0000000000 --- a/smartsim/settings/pbsSettings.py +++ /dev/null @@ -1,264 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import typing as t - -from ..error import SSConfigError -from ..log import get_logger -from .base import BatchSettings - -logger = get_logger(__name__) - - -class QsubBatchSettings(BatchSettings): - def __init__( - self, - nodes: t.Optional[int] = None, - ncpus: t.Optional[int] = None, - time: t.Optional[str] = None, - queue: t.Optional[str] = None, - account: t.Optional[str] = None, - resources: t.Optional[t.Dict[str, t.Union[str, int]]] = None, - batch_args: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **kwargs: t.Any, - ): - """Specify ``qsub`` batch parameters for a job - - ``nodes``, and ``ncpus`` are used to create the - select statement for PBS if a select statement is not - included in the ``resources``. If both are supplied - the value for select statement supplied in ``resources`` - will override. - - :param nodes: number of nodes for batch - :param ncpus: number of cpus per node - :param time: walltime for batch job - :param queue: queue to run batch in - :param account: account for batch launch - :param resources: overrides for resource arguments - :param batch_args: overrides for PBS batch arguments - """ - - self._ncpus = ncpus - - self.resources = resources or {} - resource_nodes = self.resources.get("nodes", None) - - if nodes and resource_nodes: - raise ValueError( - "nodes was incorrectly specified as a constructor parameter and also " - "as a key in the resource mapping" - ) - - # time, queue, nodes, and account set in parent class init - super().__init__( - "qsub", - batch_args=batch_args, - nodes=nodes, - account=account, - queue=queue, - time=time, - **kwargs, - ) - - self._hosts: t.List[str] = [] - - @property - def resources(self) -> t.Dict[str, t.Union[str, int]]: - return self._resources.copy() - - @resources.setter - def resources(self, resources: t.Dict[str, t.Union[str, int]]) -> None: - self._sanity_check_resources(resources) - self._resources = resources.copy() - - def set_nodes(self, num_nodes: int) -> None: - """Set the number of nodes for this batch job - - In PBS, 'select' is the more primitive way of describing how - many nodes to allocate for the job. 'nodes' is equivalent to - 'select' with a 'place' statement. Assuming that only advanced - users would use 'set_resource' instead, defining the number of - nodes here is sets the 'nodes' resource. - - :param num_nodes: number of nodes - """ - - if num_nodes: - self.set_resource("nodes", num_nodes) - - def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: - """Specify the hostlist for this job - - :param host_list: hosts to launch on - :raises TypeError: if not str or list of str - """ - if isinstance(host_list, str): - host_list = [host_list.strip()] - if not isinstance(host_list, list): - raise TypeError("host_list argument must be a list of strings") - if not all(isinstance(host, str) for host in host_list): - raise TypeError("host_list argument must be a list of strings") - self._hosts = host_list - - def set_walltime(self, walltime: str) -> None: - """Set the walltime of the job - - format = "HH:MM:SS" - - If a walltime argument is provided in - ``QsubBatchSettings.resources``, then - this value will be overridden - - :param walltime: wall time - """ - if walltime: - self.set_resource("walltime", walltime) - - def set_queue(self, queue: str) -> None: - """Set the queue for the batch job - - :param queue: queue name - """ - if queue: - self.batch_args["q"] = str(queue) - - def set_ncpus(self, num_cpus: t.Union[int, str]) -> None: - """Set the number of cpus obtained in each node. - - If a select argument is provided in - ``QsubBatchSettings.resources``, then - this value will be overridden - - :param num_cpus: number of cpus per node in select - """ - self._ncpus = int(num_cpus) - - def set_account(self, account: str) -> None: - """Set the account for this batch job - - :param acct: account id - """ - if account: - self.batch_args["A"] = str(account) - - def set_resource(self, resource_name: str, value: t.Union[str, int]) -> None: - """Set a resource value for the Qsub batch - - If a select statement is provided, the nodes and ncpus - arguments will be overridden. Likewise for Walltime - - :param resource_name: name of resource, e.g. walltime - :param value: value - """ - # TODO add error checking here - # TODO include option to overwrite place (warning for featurestore?) - updated_dict = self.resources - print(f"name of resource: {resource_name}") - updated_dict.update({resource_name: value}) - self._sanity_check_resources(updated_dict) - self.resources = updated_dict - - def format_batch_args(self) -> t.List[str]: - """Get the formatted batch arguments for a preview - - :return: batch arguments for Qsub - :raises ValueError: if options are supplied without values - """ - opts = self._create_resource_list() - for opt, value in self.batch_args.items(): - prefix = "-" - if not value: - raise ValueError("PBS options without values are not allowed") - opts += [" ".join((prefix + opt, str(value)))] - return opts - - def _sanity_check_resources( - self, resources: t.Optional[t.Dict[str, t.Union[str, int]]] = None - ) -> None: - """Check that only select or nodes was specified in resources - - Note: For PBS Pro, nodes is equivalent to 'select' and 'place' so - they are not quite synonyms. Here we assume that - """ - # Note: isinstance check here to avoid collision with default - checked_resources = resources if isinstance(resources, dict) else self.resources - - has_select = checked_resources.get("select", None) - has_nodes = checked_resources.get("nodes", None) - - if has_select and has_nodes: - raise SSConfigError( - "'select' and 'nodes' cannot both be specified. This can happen " - "if nodes were specified using the 'set_nodes' method and " - "'select' was set using 'set_resource'. Please only specify one." - ) - - if has_select and not isinstance(has_select, int): - raise TypeError("The value for 'select' must be an integer") - if has_nodes and not isinstance(has_nodes, int): - raise TypeError("The value for 'nodes' must be an integer") - - for key, value in checked_resources.items(): - if not isinstance(key, str): - raise TypeError( - f"The type of {key=} is {type(key)}. Only int and str " - "are allowed." - ) - if not isinstance(value, (str, int)): - raise TypeError( - f"The value associated with {key=} is {type(value)}. Only int " - "and str are allowed." - ) - - def _create_resource_list(self) -> t.List[str]: - self._sanity_check_resources() - res = [] - - # Pop off some specific keywords that need to be treated separately - resources = self.resources # Note this is a copy so not modifying original - - # Construct the basic select/nodes statement - if select := resources.pop("select", None): - select_command = f"-l select={select}" - elif nodes := resources.pop("nodes", None): - select_command = f"-l nodes={nodes}" - else: - raise SSConfigError( - "Insufficient resource specification: no nodes or select statement" - ) - if self._ncpus: - select_command += f":ncpus={self._ncpus}" - if self._hosts: - hosts = ["=".join(("host", str(host))) for host in self._hosts] - select_command += f":{'+'.join(hosts)}" - res += [select_command] - - # All other "standard" resource specs - for resource, value in resources.items(): - res += [f"-l {resource}={value}"] - - return res diff --git a/smartsim/settings/settings.py b/smartsim/settings/settings.py deleted file mode 100644 index e5f0053d30..0000000000 --- a/smartsim/settings/settings.py +++ /dev/null @@ -1,219 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import typing as t - -from ..log import get_logger - -logger = get_logger(__name__) - -from .._core.utils.helpers import is_valid_cmd -from ..error import SmartSimError -from ..settings import ( - AprunSettings, - BsubBatchSettings, - Container, - DragonRunSettings, - JsrunSettings, - MpiexecSettings, - MpirunSettings, - OrterunSettings, - PalsMpiexecSettings, - QsubBatchSettings, - RunSettings, - SbatchSettings, - SrunSettings, - base, -) -from ..wlm import detect_launcher - -_TRunSettingsSelector = t.Callable[[str], t.Callable[..., RunSettings]] - - -def create_batch_settings( - launcher: str, - nodes: t.Optional[int] = None, - time: str = "", - queue: t.Optional[str] = None, - account: t.Optional[str] = None, - batch_args: t.Optional[t.Dict[str, str]] = None, - **kwargs: t.Any, -) -> base.BatchSettings: - """Create a ``BatchSettings`` instance - - See Experiment.create_batch_settings for details - - :param launcher: launcher for this experiment, if set to 'auto', - an attempt will be made to find an available launcher on the system - :param nodes: number of nodes for batch job - :param time: length of batch job - :param queue: queue or partition (if slurm) - :param account: user account name for batch system - :param batch_args: additional batch arguments - :return: a newly created BatchSettings instance - :raises SmartSimError: if batch creation fails - """ - if batch_args: - res_arg = batch_args - batch_args = {k.strip().lstrip("-"): _ for k, _ in batch_args.items()} - - if batch_args != res_arg: - logger.warning( - "One or more leading `-` characters were provided to the run argument. \ -Leading dashes were stripped and the arguments were passed to the run_command." - ) - # all supported batch class implementations - by_launcher: t.Dict[str, t.Callable[..., base.BatchSettings]] = { - "pbs": QsubBatchSettings, - "slurm": SbatchSettings, - "lsf": BsubBatchSettings, - "pals": QsubBatchSettings, - } - - if launcher in ["auto", "dragon"]: - launcher = detect_launcher() - if launcher == "dragon": - by_launcher["dragon"] = by_launcher[launcher] - - if launcher == "local": - raise SmartSimError("Local launcher does not support batch workloads") - - # detect the batch class to use based on the launcher provided by - # the user - try: - batch_class = by_launcher[launcher] - batch_settings = batch_class( - nodes=nodes, - time=time, - batch_args=batch_args, - queue=queue, - account=account, - **kwargs, - ) - return batch_settings - - except KeyError: - raise SmartSimError( - f"User attempted to make batch settings for unsupported launcher {launcher}" - ) from None - - -def create_run_settings( - launcher: str, - run_command: str = "auto", - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - container: t.Optional[Container] = None, - **kwargs: t.Any, -) -> RunSettings: - """Create a ``RunSettings`` instance. - - See Experiment.create_run_settings docstring for more details - - :param launcher: launcher to create settings for, if set to 'auto', - an attempt will be made to find an available launcher on the system - :param run_command: command to run the executable - :param run_args: arguments to pass to the ``run_command`` - :param env_vars: environment variables to pass to the executable - :param container: container type for workload (e.g. "singularity") - :return: the created ``RunSettings`` - :raises SmartSimError: if run_command=="auto" and detection fails - """ - if run_args: - reserve_run_args = run_args - run_args = {k.strip().lstrip("-"): _ for k, _ in run_args.items()} - - if set(reserve_run_args) != set(run_args): - logger.warning( - "One or more leading `-` characters were provided to the run argument. \ -Leading dashes were stripped and arguments were passed to the run_command." - ) - - # all supported RunSettings child classes - supported: t.Dict[str, _TRunSettingsSelector] = { - "aprun": lambda launcher: AprunSettings, - "srun": lambda launcher: SrunSettings, - "mpirun": lambda launcher: MpirunSettings, - "mpiexec": lambda launcher: ( - MpiexecSettings if launcher != "pals" else PalsMpiexecSettings - ), - "orterun": lambda launcher: OrterunSettings, - "jsrun": lambda launcher: JsrunSettings, - } - - # run commands supported by each launcher - # in order of suspected user preference - by_launcher = { - "dragon": [""], - "slurm": ["srun", "mpirun", "mpiexec"], - "pbs": ["aprun", "mpirun", "mpiexec"], - "pals": ["mpiexec"], - "lsf": ["jsrun", "mpirun", "mpiexec"], - "local": [""], - } - - if launcher == "auto": - launcher = detect_launcher() - - def _detect_command(launcher: str) -> str: - if launcher in by_launcher: - if launcher in ["local", "dragon"]: - return "" - - for cmd in by_launcher[launcher]: - if is_valid_cmd(cmd): - return cmd - msg = ( - "Could not automatically detect a run command to use for launcher " - f"{launcher}\nSearched for and could not find the following " - f"commands: {by_launcher[launcher]}" - ) - raise SmartSimError(msg) - - if run_command: - run_command = run_command.lower() - launcher = launcher.lower() - - # detect run_command automatically for all but local launcher - if run_command == "auto": - # no auto detection for local, revert to false - run_command = _detect_command(launcher) - - if launcher == "dragon": - return DragonRunSettings( - exe=exe, exe_args=exe_args, env_vars=env_vars, container=container, **kwargs - ) - - # if user specified and supported or auto detection worked - if run_command and run_command in supported: - return supported[run_command](launcher)( - run_args, env_vars, container=container, **kwargs - ) - - # 1) user specified and not implementation in SmartSim - # 2) user supplied run_command=None - # 3) local launcher being used and default of "auto" was passed. - return RunSettings(run_command, run_args, env_vars, container=container) diff --git a/smartsim/settings/slurmSettings.py b/smartsim/settings/slurmSettings.py deleted file mode 100644 index 1d05169b0d..0000000000 --- a/smartsim/settings/slurmSettings.py +++ /dev/null @@ -1,507 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from __future__ import annotations - -import datetime -import os -import typing as t - -from ..error import SSUnsupportedError -from ..log import get_logger -from .base import BatchSettings, RunSettings - -logger = get_logger(__name__) - - -class SrunSettings(RunSettings): - def __init__( - self, - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - alloc: t.Optional[str] = None, - **kwargs: t.Any, - ) -> None: - """Initialize run parameters for a slurm job with ``srun`` - - ``SrunSettings`` should only be used on Slurm based systems. - - If an allocation is specified, the instance receiving these run - parameters will launch on that allocation. - - :param run_args: srun arguments without dashes - :param env_vars: environment variables for job - :param alloc: allocation ID if running on existing alloc - """ - super().__init__( - run_command="srun", - run_args=run_args, - env_vars=env_vars, - **kwargs, - ) - self.alloc = alloc - self.mpmd: t.List[RunSettings] = [] - - reserved_run_args = {"chdir", "D"} - - def set_nodes(self, nodes: int) -> None: - """Set the number of nodes - - Effectively this is setting: ``srun --nodes `` - - :param nodes: number of nodes to run with - """ - self.run_args["nodes"] = int(nodes) - - def make_mpmd(self, settings: RunSettings) -> None: - """Make a mpmd workload by combining two ``srun`` commands - - This connects the two settings to be executed with a single - Application instance - - :param settings: SrunSettings instance - """ - if self.colocated_fs_settings: - raise SSUnsupportedError( - "Colocated applications cannot be run as a mpmd workload" - ) - if self.container: - raise SSUnsupportedError( - "Containerized MPMD workloads are not yet supported." - ) - if os.getenv("SLURM_HET_SIZE") is not None: - raise ValueError( - "Slurm does not support MPMD workloads in heterogeneous jobs." - ) - self.mpmd.append(settings) - - def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: - """Specify the hostlist for this job - - This sets ``--nodelist`` - - :param host_list: hosts to launch on - :raises TypeError: if not str or list of str - """ - if isinstance(host_list, str): - host_list = [host_list.strip()] - if not isinstance(host_list, list): - raise TypeError("host_list argument must be a list of strings") - if not all(isinstance(host, str) for host in host_list): - raise TypeError("host_list argument must be list of strings") - self.run_args["nodelist"] = ",".join(host_list) - - def set_hostlist_from_file(self, file_path: str) -> None: - """Use the contents of a file to set the node list - - This sets ``--nodefile`` - - :param file_path: Path to the hostlist file - """ - self.run_args["nodefile"] = file_path - - def set_excluded_hosts(self, host_list: t.Union[str, t.List[str]]) -> None: - """Specify a list of hosts to exclude for launching this job - - :param host_list: hosts to exclude - :raises TypeError: - """ - if isinstance(host_list, str): - host_list = [host_list.strip()] - if not isinstance(host_list, list): - raise TypeError("host_list argument must be a list of strings") - if not all(isinstance(host, str) for host in host_list): - raise TypeError("host_list argument must be list of strings") - self.run_args["exclude"] = ",".join(host_list) - - def set_cpus_per_task(self, cpus_per_task: int) -> None: - """Set the number of cpus to use per task - - This sets ``--cpus-per-task`` - - :param num_cpus: number of cpus to use per task - """ - self.run_args["cpus-per-task"] = int(cpus_per_task) - - def set_tasks(self, tasks: int) -> None: - """Set the number of tasks for this job - - This sets ``--ntasks`` - - :param tasks: number of tasks - """ - self.run_args["ntasks"] = int(tasks) - - def set_tasks_per_node(self, tasks_per_node: int) -> None: - """Set the number of tasks for this job - - This sets ``--ntasks-per-node`` - - :param tasks_per_node: number of tasks per node - """ - self.run_args["ntasks-per-node"] = int(tasks_per_node) - - def set_cpu_bindings(self, bindings: t.Union[int, t.List[int]]) -> None: - """Bind by setting CPU masks on tasks - - This sets ``--cpu-bind`` using the ``map_cpu:`` option - - :param bindings: List specifing the cores to which MPI processes are bound - """ - if isinstance(bindings, int): - bindings = [bindings] - self.run_args["cpu_bind"] = "map_cpu:" + ",".join( - str(int(num)) for num in bindings - ) - - def set_memory_per_node(self, memory_per_node: int) -> None: - """Specify the real memory required per node - - This sets ``--mem`` in megabytes - - :param memory_per_node: Amount of memory per node in megabytes - """ - self.run_args["mem"] = f"{int(memory_per_node)}M" - - def set_verbose_launch(self, verbose: bool) -> None: - """Set the job to run in verbose mode - - This sets ``--verbose`` - - :param verbose: Whether the job should be run verbosely - """ - if verbose: - self.run_args["verbose"] = None - else: - self.run_args.pop("verbose", None) - - def set_quiet_launch(self, quiet: bool) -> None: - """Set the job to run in quiet mode - - This sets ``--quiet`` - - :param quiet: Whether the job should be run quietly - """ - if quiet: - self.run_args["quiet"] = None - else: - self.run_args.pop("quiet", None) - - def set_broadcast(self, dest_path: t.Optional[str] = None) -> None: - """Copy executable file to allocated compute nodes - - This sets ``--bcast`` - - :param dest_path: Path to copy an executable file - """ - self.run_args["bcast"] = dest_path - - def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None: - """Specify the node feature for this job - - This sets ``-C`` - - :param feature_list: node feature to launch on - :raises TypeError: if not str or list of str - """ - if isinstance(feature_list, str): - feature_list = [feature_list.strip()] - elif not all(isinstance(feature, str) for feature in feature_list): - raise TypeError("node_feature argument must be string or list of strings") - self.run_args["C"] = ",".join(feature_list) - - @staticmethod - def _fmt_walltime(hours: int, minutes: int, seconds: int) -> str: - """Convert hours, minutes, and seconds into valid walltime format - - Converts time to format HH:MM:SS - - :param hours: number of hours to run job - :param minutes: number of minutes to run job - :param seconds: number of seconds to run job - :returns: Formatted walltime - """ - return fmt_walltime(hours, minutes, seconds) - - def set_walltime(self, walltime: str) -> None: - """Set the walltime of the job - - format = "HH:MM:SS" - - :param walltime: wall time - """ - self.run_args["time"] = str(walltime) - - def set_het_group(self, het_group: t.Iterable[int]) -> None: - """Set the heterogeneous group for this job - - this sets `--het-group` - - :param het_group: list of heterogeneous groups - """ - het_size_env = os.getenv("SLURM_HET_SIZE") - if het_size_env is None: - msg = "Requested to set het group, but the allocation is not a het job" - raise ValueError(msg) - - het_size = int(het_size_env) - if self.mpmd: - msg = "Slurm does not support MPMD workloads in heterogeneous jobs\n" - raise ValueError(msg) - msg = ( - "Support for heterogeneous groups is an experimental feature, " - "please report any unexpected behavior to SmartSim developers " - "by opening an issue on https://github.com/CrayLabs/SmartSim/issues" - ) - if any(group >= het_size for group in het_group): - msg = ( - f"Het group {max(het_group)} requested, " - f"but max het group in allocation is {het_size-1}" - ) - raise ValueError(msg) - logger.warning(msg) - self.run_args["het-group"] = ",".join(str(group) for group in het_group) - - def format_run_args(self) -> t.List[str]: - """Return a list of slurm formatted run arguments - - :return: list of slurm arguments for these settings - """ - # add additional slurm arguments based on key length - opts = [] - for opt, value in self.run_args.items(): - short_arg = bool(len(str(opt)) == 1) - prefix = "-" if short_arg else "--" - if not value: - opts += [prefix + opt] - else: - if short_arg: - opts += [prefix + opt, str(value)] - else: - opts += ["=".join((prefix + opt, str(value)))] - return opts - - def check_env_vars(self) -> None: - """Warn a user trying to set a variable which is set in the environment - - Given Slurm's env var precedence, trying to export a variable which is already - present in the environment will not work. - """ - for k, v in self.env_vars.items(): - if "," not in str(v): - # If a variable is defined, it will take precedence over --export - # we warn the user - preexisting_var = os.environ.get(k, None) - if preexisting_var is not None and preexisting_var != v: - msg = ( - f"Variable {k} is set to {preexisting_var} in current " - "environment. If the job is running in an interactive " - f"allocation, the value {v} will not be set. Please " - "consider removing the variable from the environment " - "and re-running the experiment." - ) - logger.warning(msg) - - def format_env_vars(self) -> t.List[str]: - """Build bash compatible environment variable string for Slurm - - :returns: the formatted string of environment variables - """ - self.check_env_vars() - return [f"{k}={v}" for k, v in self.env_vars.items() if "," not in str(v)] - - def format_comma_sep_env_vars(self) -> t.Tuple[str, t.List[str]]: - """Build environment variable string for Slurm - - Slurm takes exports in comma separated lists - the list starts with all as to not disturb the rest of the environment - for more information on this, see the slurm documentation for srun - - :returns: the formatted string of environment variables - """ - self.check_env_vars() - exportable_env, compound_env, key_only = [], [], [] - - for k, v in self.env_vars.items(): - kvp = f"{k}={v}" - - if "," in str(v): - key_only.append(k) - compound_env.append(kvp) - else: - exportable_env.append(kvp) - - # Append keys to exportable KVPs, e.g. `--export x1=v1,KO1,KO2` - fmt_exported_env = ",".join(v for v in exportable_env + key_only) - - for mpmd in self.mpmd: - compound_mpmd_env = { - k: v for k, v in mpmd.env_vars.items() if "," in str(v) - } - compound_mpmd_fmt = {f"{k}={v}" for k, v in compound_mpmd_env.items()} - compound_env.extend(compound_mpmd_fmt) - - return fmt_exported_env, compound_env - - -def fmt_walltime(hours: int, minutes: int, seconds: int) -> str: - """Helper function walltime format conversion - - Converts time to format HH:MM:SS - - :param hours: number of hours to run job - :param minutes: number of minutes to run job - :param seconds: number of seconds to run job - :returns: Formatted walltime - """ - delta = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds) - fmt_str = str(delta) - if delta.seconds // 3600 < 10: - fmt_str = "0" + fmt_str - return fmt_str - - -class SbatchSettings(BatchSettings): - def __init__( - self, - nodes: t.Optional[int] = None, - time: str = "", - account: t.Optional[str] = None, - batch_args: t.Optional[t.Dict[str, t.Optional[str]]] = None, - **kwargs: t.Any, - ) -> None: - """Specify run parameters for a Slurm batch job - - Slurm `sbatch` arguments can be written into ``batch_args`` - as a dictionary. e.g. {'ntasks': 1} - - If the argument doesn't have a parameter, put `None` - as the value. e.g. {'exclusive': None} - - Initialization values provided (nodes, time, account) - will overwrite the same arguments in ``batch_args`` if present - - :param nodes: number of nodes - :param time: walltime for job, e.g. "10:00:00" for 10 hours - :param account: account for job - :param batch_args: extra batch arguments - """ - super().__init__( - "sbatch", - batch_args=batch_args, - nodes=nodes, - account=account, - time=time, - **kwargs, - ) - - def set_walltime(self, walltime: str) -> None: - """Set the walltime of the job - - format = "HH:MM:SS" - - :param walltime: wall time - """ - # TODO check for formatting here - if walltime: - self.batch_args["time"] = walltime - - def set_nodes(self, num_nodes: int) -> None: - """Set the number of nodes for this batch job - - :param num_nodes: number of nodes - """ - if num_nodes: - self.batch_args["nodes"] = str(int(num_nodes)) - - def set_account(self, account: str) -> None: - """Set the account for this batch job - - :param account: account id - """ - if account: - self.batch_args["account"] = account - - def set_partition(self, partition: str) -> None: - """Set the partition for the batch job - - :param partition: partition name - """ - self.batch_args["partition"] = str(partition) - - def set_queue(self, queue: str) -> None: - """alias for set_partition - - Sets the partition for the slurm batch job - - :param queue: the partition to run the batch job on - """ - if queue: - self.set_partition(queue) - - def set_cpus_per_task(self, cpus_per_task: int) -> None: - """Set the number of cpus to use per task - - This sets ``--cpus-per-task`` - - :param num_cpus: number of cpus to use per task - """ - self.batch_args["cpus-per-task"] = str(int(cpus_per_task)) - - def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: - """Specify the hostlist for this job - - :param host_list: hosts to launch on - :raises TypeError: if not str or list of str - """ - if isinstance(host_list, str): - host_list = [host_list.strip()] - if not isinstance(host_list, list): - raise TypeError("host_list argument must be a list of strings") - if not all(isinstance(host, str) for host in host_list): - raise TypeError("host_list argument must be list of strings") - self.batch_args["nodelist"] = ",".join(host_list) - - def format_batch_args(self) -> t.List[str]: - """Get the formatted batch arguments for a preview - - :return: batch arguments for Sbatch - """ - opts = [] - # TODO add restricted here - for opt, value in self.batch_args.items(): - # attach "-" prefix if argument is 1 character otherwise "--" - short_arg = bool(len(str(opt)) == 1) - prefix = "-" if short_arg else "--" - - if not value: - opts += [prefix + opt] - else: - if short_arg: - opts += [prefix + opt, str(value)] - else: - opts += ["=".join((prefix + opt, str(value)))] - return opts diff --git a/smartsim/wlm/slurm.py b/smartsim/wlm/slurm.py index ae7299f28b..d8bdd630df 100644 --- a/smartsim/wlm/slurm.py +++ b/smartsim/wlm/slurm.py @@ -38,7 +38,12 @@ SSReservedKeywordError, ) from ..log import get_logger -from ..settings.slurmSettings import fmt_walltime + + +# from ..settings.slurmSettings import fmt_walltime +# Mock function +def fmt_walltime(hours: int, minutes: int, seconds: int) -> str: ... # type: ignore[...] + logger = get_logger(__name__) diff --git a/tests/temp_tests/ensemble_tests.py b/tests/temp_tests/ensemble_tests.py deleted file mode 100644 index f655bb94f3..0000000000 --- a/tests/temp_tests/ensemble_tests.py +++ /dev/null @@ -1,18 +0,0 @@ -from smartsim.entity import Ensemble -from smartsim.settings import RunSettings - - -def test_create_ensemble(): - run_settings = RunSettings() - ensemble = Ensemble( - name="model", - exe="echo", - run_settings=run_settings, - exe_args=["hello"], - replicas=2, - ) - assert ensemble.exe == "echo" - assert ensemble.exe_args == ["hello"] - for model in ensemble: - assert model.exe == ["/usr/bin/echo"] - assert model.exe_args == ["hello"] diff --git a/tests/temp_tests/model_tests.py b/tests/temp_tests/model_tests.py deleted file mode 100644 index 021cfb2c23..0000000000 --- a/tests/temp_tests/model_tests.py +++ /dev/null @@ -1,60 +0,0 @@ -from smartsim import Experiment -from smartsim.database import Orchestrator -from smartsim.entity import Application, Ensemble -from smartsim.settings import RunSettings, SrunSettings -from smartsim.status import SmartSimStatus - - -def test_application_constructor(): - run_settings = RunSettings() - application = Application( - name="testing", - run_settings=run_settings, - exe="echo", - exe_args=["hello"], - params={}, - ) - assert application.exe == ["/usr/bin/echo"] - assert application.exe_args == ["hello"] - - -def test_application_add_exe_args(): - run_settings = SrunSettings() - application = Application( - name="testing", - run_settings=run_settings, - exe="echo", - exe_args=["hello"], - params={}, - ) - application.add_exe_args("there") - assert application.exe_args == ["hello", "there"] - application.add_exe_args(["how", "are", "you"]) - assert application.exe_args == ["hello", "there", "how", "are", "you"] - - -def test_create_application(): - run_settings = SrunSettings() - exp = Experiment("exp") - application = exp.create_application( - name="application", run_settings=run_settings, exe="echo", exe_args=["hello"] - ) - assert application.exe == ["/usr/bin/echo"] - assert application.exe_args == ["hello"] - - -def test_start_a_application(): - exp = Experiment("exp") - run_settings = SrunSettings() - application = Application( - name="testing", - exe="echo", - run_settings=run_settings, - exe_args=["hello"], - params={}, - ) - assert application.exe == ["/usr/bin/echo"] - assert application.exe_args == ["hello"] - exp.start(application) - application_status = exp.get_status(application)[0] - assert application_status != SmartSimStatus.STATUS_FAILED diff --git a/tests/temp_tests/settings_tests.py b/tests/temp_tests/settings_tests.py deleted file mode 100644 index d534541d8f..0000000000 --- a/tests/temp_tests/settings_tests.py +++ /dev/null @@ -1,137 +0,0 @@ -import itertools -import os -import os.path as osp -from shutil import which - -import pytest - -from smartsim.settings import ( - AprunSettings, - BsubBatchSettings, - JsrunSettings, - MpiexecSettings, - MpirunSettings, - OrterunSettings, - PalsMpiexecSettings, - QsubBatchSettings, - RunSettings, - SbatchSettings, - SrunSettings, -) - -env_vars = {"k1": "v1", "k2": "v2"} -run_args = {"envlist": "SPAM"} - - -# Test that mpi RunSetting classes create without error -@pytest.mark.parametrize( - "settings_type, env_vars, run_args", - [ - pytest.param( - MpirunSettings, - env_vars, - run_args, - id=f"mpirun", - ), - pytest.param( - OrterunSettings, - env_vars, - run_args, - id=f"orterun", - ), - ], -) -def test_mpi_instantiate_run_settings(settings_type, env_vars, run_args): - settings = settings_type( - run_args=run_args, env_vars=env_vars, fail_if_missing_exec=False - ) - assert settings.env_vars == env_vars - assert settings.run_args == run_args - assert isinstance(settings, settings_type) - - -# Test that RunSetting classes create without error -@pytest.mark.parametrize( - "settings_type, env_vars, run_args", - [ - pytest.param( - SrunSettings, - env_vars, - run_args, - id=f"srun", - ), - pytest.param( - PalsMpiexecSettings, - env_vars, - run_args, - id=f"mpiexec", - ), - pytest.param( - JsrunSettings, - env_vars, - run_args, - id="jsrun", - ), - pytest.param( - RunSettings, - env_vars, - run_args, - id="local", - ), - pytest.param( - AprunSettings, - env_vars, - run_args, - id="aprun", - ), - ], -) -def test_instantiate_run_settings(settings_type, env_vars, run_args): - settings = settings_type(run_args=run_args, env_vars=env_vars) - assert settings.env_vars == env_vars - assert settings.run_args == run_args - assert isinstance(settings, settings_type) - - -nodes = 4 -time = "10:00:00" -account = "1234" - - -# Test that BatchSettings classes create without error -# This currently does not work, need to unify how we treat each settings class -@pytest.mark.parametrize( - "settings_type, nodes, node_flag, time, account", - [ - pytest.param( - BsubBatchSettings, - nodes, - "nnodes", - time, - account, - id=f"bsub", - ), - pytest.param( - QsubBatchSettings, - nodes, - "nodes", - time, - account, - id="qsub", - ), - pytest.param( - SbatchSettings, - nodes, - "nodes", - time, - account, - id="sbatch", - ), - ], -) -def test_instantiate_batch_settings(settings_type, nodes, node_flag, time, account): - batch_settings = settings_type(nodes=nodes, time=time, account=account) - assert batch_settings.resources[node_flag] == nodes - assert batch_settings.batch_args["time"] == time - assert batch_settings.batch_args["account"] == account - assert isinstance(batch_settings, settings_type) diff --git a/tests/temp_tests/steps_tests.py b/tests/temp_tests/steps_tests.py index 2237a57824..bd20607f32 100644 --- a/tests/temp_tests/steps_tests.py +++ b/tests/temp_tests/steps_tests.py @@ -12,7 +12,7 @@ SbatchStep, SrunStep, ) -from smartsim.entity import Application +from smartsim.entity import Model from smartsim.settings import ( AprunSettings, BsubBatchSettings, @@ -55,7 +55,7 @@ def test_instantiate_run_settings(settings_type, step_type): run_settings = settings_type() run_settings.in_batch = True - model = Application( + model = Model( exe="echo", exe_args="hello", name="model_name", run_settings=run_settings ) jobStep = step_type(entity=model, run_settings=model.run_settings) @@ -85,7 +85,7 @@ def test_instantiate_run_settings(settings_type, step_type): def test_instantiate_mpi_run_settings(settings_type, step_type): run_settings = settings_type(fail_if_missing_exec=False) run_settings.in_batch = True - model = Application( + model = Model( exe="echo", exe_args="hello", name="model_name", run_settings=run_settings ) jobStep = step_type(entity=model, run_settings=model.run_settings) diff --git a/tests/temp_tests/test_colocatedJobGroup.py b/tests/temp_tests/test_colocatedJobGroup.py index 3bd49dad74..bd8ad7d6db 100644 --- a/tests/temp_tests/test_colocatedJobGroup.py +++ b/tests/temp_tests/test_colocatedJobGroup.py @@ -2,12 +2,12 @@ from smartsim.launchable.basejob import BaseJob from smartsim.launchable.colocatedJobGroup import ColocatedJobGroup from smartsim.launchable.job import Job -from smartsim.settings.base import RunSettings +from smartsim.settings import LaunchSettings # TODO replace with LaunchSettings -app_1 = Application("app_1", "python", run_settings=RunSettings()) -app_2 = Application("app_2", "python", run_settings=RunSettings()) -app_3 = Application("app_3", "python", run_settings=RunSettings()) +app_1 = Application("app_1", "python", run_settings=LaunchSettings("slurm")) +app_2 = Application("app_2", "python", run_settings=LaunchSettings("slurm")) +app_3 = Application("app_3", "python", run_settings=LaunchSettings("slurm")) def test_create_ColocatedJobGroup(): @@ -17,18 +17,18 @@ def test_create_ColocatedJobGroup(): def test_getitem_ColocatedJobGroup(): - job_1 = Job(app_1, RunSettings()) - job_2 = Job(app_2, RunSettings()) + job_1 = Job(app_1, LaunchSettings("slurm")) + job_2 = Job(app_2, LaunchSettings("slurm")) job_group = ColocatedJobGroup([job_1, job_2]) get_value = job_group[0].entity.name assert get_value == job_1.entity.name def test_setitem_JobGroup(): - job_1 = Job(app_1, RunSettings()) - job_2 = Job(app_2, RunSettings()) + job_1 = Job(app_1, LaunchSettings("slurm")) + job_2 = Job(app_2, LaunchSettings("slurm")) job_group = ColocatedJobGroup([job_1, job_2]) - job_3 = Job(app_3, RunSettings()) + job_3 = Job(app_3, LaunchSettings("slurm")) job_group[1] = job_3 assert len(job_group) == 2 get_value = job_group[1].entity.name diff --git a/tests/temp_tests/test_core/test_commands/test_command.py b/tests/temp_tests/test_core/test_commands/test_command.py new file mode 100644 index 0000000000..8780357268 --- /dev/null +++ b/tests/temp_tests/test_core/test_commands/test_command.py @@ -0,0 +1,68 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from smartsim._core.commands.command import Command +from smartsim.settings.launchCommand import LauncherType + + +def test_command_init(): + cmd = Command(launcher=LauncherType.Slurm, command=["salloc", "-N", "1"]) + assert cmd.command == ["salloc", "-N", "1"] + assert cmd.launcher == LauncherType.Slurm + + +def test_command_getitem(): + cmd = Command(launcher=LauncherType.Slurm, command=["salloc", "-N", "1"]) + get_value = cmd[0] + assert get_value == "salloc" + + +def test_command_setitem(): + cmd = Command(launcher=LauncherType.Slurm, command=["salloc", "-N", "1"]) + cmd[0] = "srun" + cmd[1] = "-n" + assert cmd.command == ["srun", "-n", "1"] + + +def test_command_delitem(): + cmd = Command( + launcher=LauncherType.Slurm, + command=["salloc", "-N", "1", "--constraint", "P100"], + ) + del cmd.command[3] + del cmd.command[3] + assert cmd.command == ["salloc", "-N", "1"] + + +def test_command_len(): + cmd = Command(launcher=LauncherType.Slurm, command=["salloc", "-N", "1"]) + assert len(cmd) is 3 + + +def test_command_insert(): + cmd = Command(launcher=LauncherType.Slurm, command=["-N", "1"]) + cmd.insert(0, "salloc") + assert cmd.command == ["salloc", "-N", "1"] diff --git a/tests/temp_tests/test_core/test_commands/test_commandList.py b/tests/temp_tests/test_core/test_commands/test_commandList.py new file mode 100644 index 0000000000..dcded85307 --- /dev/null +++ b/tests/temp_tests/test_core/test_commands/test_commandList.py @@ -0,0 +1,67 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from smartsim._core.commands.command import Command +from smartsim._core.commands.commandList import CommandList +from smartsim.settings.launchCommand import LauncherType + +salloc_cmd = Command(launcher=LauncherType.Slurm, command=["salloc", "-N", "1"]) +srun_cmd = Command(launcher=LauncherType.Slurm, command=["srun", "-n", "1"]) +sacct_cmd = Command(launcher=LauncherType.Slurm, command=["sacct", "--user"]) + + +def test_command_init(): + cmd_list = CommandList(commands=[salloc_cmd, srun_cmd]) + assert cmd_list.commands == [salloc_cmd, srun_cmd] + + +def test_command_getitem(): + cmd_list = CommandList(commands=[salloc_cmd, srun_cmd]) + get_value = cmd_list[0] + assert get_value == salloc_cmd + + +def test_command_setitem(): + cmd_list = CommandList(commands=[salloc_cmd, srun_cmd]) + cmd_list[0] = sacct_cmd + assert cmd_list.commands == [sacct_cmd, srun_cmd] + + +def test_command_delitem(): + cmd_list = CommandList(commands=[salloc_cmd, srun_cmd]) + del cmd_list.commands[0] + assert cmd_list.commands == [srun_cmd] + + +def test_command_len(): + cmd_list = CommandList(commands=[salloc_cmd, srun_cmd]) + assert len(cmd_list) is 2 + + +def test_command_insert(): + cmd_list = CommandList(commands=[salloc_cmd, srun_cmd]) + cmd_list.insert(0, sacct_cmd) + assert cmd_list.commands == [sacct_cmd, salloc_cmd, srun_cmd] diff --git a/tests/temp_tests/test_core/test_commands/test_launchCommands.py b/tests/temp_tests/test_core/test_commands/test_launchCommands.py new file mode 100644 index 0000000000..65fb7c5a7c --- /dev/null +++ b/tests/temp_tests/test_core/test_commands/test_launchCommands.py @@ -0,0 +1,22 @@ +from smartsim._core.commands.command import Command +from smartsim._core.commands.commandList import CommandList +from smartsim._core.commands.launchCommands import LaunchCommands +from smartsim.settings.launchCommand import LauncherType + +pre_cmd = Command(launcher=LauncherType.Slurm, command=["pre", "cmd"]) +launch_cmd = Command(launcher=LauncherType.Slurm, command=["launch", "cmd"]) +post_cmd = Command(launcher=LauncherType.Slurm, command=["post", "cmd"]) +pre_commands_list = CommandList(commands=[pre_cmd]) +launch_command_list = CommandList(commands=[launch_cmd]) +post_command_list = CommandList(commands=[post_cmd]) + + +def test_launchCommand_init(): + launch_cmd = LaunchCommands( + prelaunch_commands=pre_commands_list, + launch_commands=launch_command_list, + postlaunch_commands=post_command_list, + ) + assert launch_cmd.prelaunch_command == pre_commands_list + assert launch_cmd.launch_command == launch_command_list + assert launch_cmd.postlaunch_command == post_command_list diff --git a/tests/temp_tests/test_jobGroup.py b/tests/temp_tests/test_jobGroup.py index a5dd96d75f..2139b46e2d 100644 --- a/tests/temp_tests/test_jobGroup.py +++ b/tests/temp_tests/test_jobGroup.py @@ -2,12 +2,12 @@ from smartsim.launchable.basejob import BaseJob from smartsim.launchable.job import Job from smartsim.launchable.jobGroup import JobGroup -from smartsim.settings.base import RunSettings +from smartsim.settings.launchSettings import LaunchSettings # TODO replace with LaunchSettings -app_1 = Application("app_1", "python", RunSettings()) -app_2 = Application("app_2", "python", RunSettings()) -app_3 = Application("app_3", "python", RunSettings()) +app_1 = Application("app_1", "python", LaunchSettings("slurm")) +app_2 = Application("app_2", "python", LaunchSettings("slurm")) +app_3 = Application("app_3", "python", LaunchSettings("slurm")) def test_create_JobGroup(): @@ -17,18 +17,18 @@ def test_create_JobGroup(): def test_getitem_JobGroup(): - job_1 = Job(app_1, RunSettings()) - job_2 = Job(app_2, RunSettings()) + job_1 = Job(app_1, LaunchSettings("slurm")) + job_2 = Job(app_2, LaunchSettings("slurm")) job_group = JobGroup([job_1, job_2]) get_value = job_group[0].entity.name assert get_value == job_1.entity.name def test_setitem_JobGroup(): - job_1 = Job(app_1, RunSettings()) - job_2 = Job(app_2, RunSettings()) + job_1 = Job(app_1, LaunchSettings("slurm")) + job_2 = Job(app_2, LaunchSettings("slurm")) job_group = JobGroup([job_1, job_2]) - job_3 = Job(app_3, RunSettings()) + job_3 = Job(app_3, LaunchSettings("slurm")) job_group[1] = job_3 assert len(job_group) == 2 get_value = job_group[1] diff --git a/tests/temp_tests/test_launchable.py b/tests/temp_tests/test_launchable.py index 9d31ee5561..2a77817e58 100644 --- a/tests/temp_tests/test_launchable.py +++ b/tests/temp_tests/test_launchable.py @@ -26,7 +26,7 @@ import pytest -from smartsim.database.orchestrator import FeatureStore +from smartsim.entity.ensemble import Ensemble from smartsim.entity.entity import SmartSimEntity from smartsim.entity.model import Application from smartsim.error.errors import SSUnsupportedError @@ -34,7 +34,7 @@ from smartsim.launchable.launchable import SmartSimObject from smartsim.launchable.mpmdjob import MPMDJob from smartsim.launchable.mpmdpair import MPMDPair -from smartsim.settings.base import RunSettings +from smartsim.settings import LaunchSettings # TODO replace with LaunchSettings @@ -51,9 +51,12 @@ def test_launchable_init(): def test_job_init(): entity = Application( - "test_name", run_settings=RunSettings(), exe="echo", exe_args=["spam", "eggs"] + "test_name", + run_settings=LaunchSettings("slurm"), + exe="echo", + exe_args=["spam", "eggs"], ) - job = Job(entity, RunSettings()) + job = Job(entity, LaunchSettings("slurm")) assert isinstance(job, Job) assert job.entity.name == "test_name" assert "echo" in job.entity.exe[0] @@ -63,20 +66,24 @@ def test_job_init(): def test_job_init_deepcopy(): entity = Application( - "test_name", run_settings=RunSettings(), exe="echo", exe_args=["spam", "eggs"] + "test_name", + run_settings=LaunchSettings("slurm"), + exe="echo", + exe_args=["spam", "eggs"], ) - settings = RunSettings(run_args="test") + settings = LaunchSettings("slurm") job = Job(entity, settings) - settings.run_args = "change" - assert "change" not in job.launch_settings.run_args + test = job.launch_settings.launcher + test = "test_change" + assert job.launch_settings.launcher is not test def test_add_mpmd_pair(): - entity = SmartSimEntity("test_name", "python", RunSettings()) + entity = SmartSimEntity("test_name", "python", LaunchSettings("slurm")) mpmd_job = MPMDJob() - mpmd_job.add_mpmd_pair(entity, RunSettings()) - mpmd_pair = MPMDPair(entity, RunSettings()) + mpmd_job.add_mpmd_pair(entity, LaunchSettings("slurm")) + mpmd_pair = MPMDPair(entity, LaunchSettings("slurm")) assert len(mpmd_job.mpmd_pairs) == 1 assert str(mpmd_pair.entity) == str(mpmd_job.mpmd_pairs[0].entity) @@ -86,9 +93,12 @@ def test_add_mpmd_pair(): def test_mpmdpair_init(): """Test the creation of an MPMDPair""" entity = Application( - "test_name", "echo", exe_args=["spam", "eggs"], run_settings=RunSettings() + "test_name", + "echo", + exe_args=["spam", "eggs"], + run_settings=LaunchSettings("slurm"), ) - mpmd_pair = MPMDPair(entity, RunSettings()) + mpmd_pair = MPMDPair(entity, LaunchSettings("slurm")) assert isinstance(mpmd_pair, MPMDPair) assert mpmd_pair.entity.name == "test_name" assert "echo" in mpmd_pair.entity.exe[0] @@ -99,25 +109,35 @@ def test_mpmdpair_init(): def test_mpmdpair_init_deepcopy(): """Test the creation of an MPMDPair""" entity = Application( - "test_name", "echo", run_settings=RunSettings(), exe_args=["spam", "eggs"] + "test_name", + "echo", + run_settings=LaunchSettings("slurm"), + exe_args=["spam", "eggs"], ) - settings = RunSettings(run_args="test") + settings = LaunchSettings("slurm") mpmd_pair = MPMDPair(entity, settings) - settings.run_args = "change" - assert "change" not in mpmd_pair.launch_settings.run_args + test = mpmd_pair.launch_settings.launcher + test = "change" + assert test not in mpmd_pair.launch_settings.launcher def test_check_launcher(): """Test that mpmd pairs that have the same launcher type can be added to an MPMD Job""" entity1 = Application( - "entity1", "echo", exe_args=["hello", "world"], run_settings=RunSettings() + "entity1", + "echo", + exe_args=["hello", "world"], + run_settings=LaunchSettings("slurm"), ) - launch_settings1 = RunSettings() + launch_settings1 = LaunchSettings("slurm") entity2 = Application( - "entity2", "echo", exe_args=["hello", "world"], run_settings=RunSettings() + "entity2", + "echo", + exe_args=["hello", "world"], + run_settings=LaunchSettings("slurm"), ) - launch_settings2 = RunSettings() + launch_settings2 = LaunchSettings("slurm") mpmd_pairs = [] pair1 = MPMDPair(entity1, launch_settings1) @@ -134,11 +154,11 @@ def test_add_mpmd_pair_check_launcher_error(): """Test that an error is raised when a pairs is added to an mpmd job using add_mpmd_pair that does not have the same launcher type""" mpmd_pairs = [] - entity1 = SmartSimEntity("entity1", "python", RunSettings()) - launch_settings1 = RunSettings(run_command="srun") + entity1 = SmartSimEntity("entity1", "python", LaunchSettings("slurm")) + launch_settings1 = LaunchSettings("slurm") - entity2 = SmartSimEntity("entity2", "python", RunSettings()) - launch_settings2 = RunSettings(run_command="mpirun") + entity2 = SmartSimEntity("entity2", "python", LaunchSettings("pals")) + launch_settings2 = LaunchSettings("pals") pair1 = MPMDPair(entity1, launch_settings1) mpmd_pairs.append(pair1) @@ -152,11 +172,11 @@ def test_add_mpmd_pair_check_launcher_error(): def test_add_mpmd_pair_check_entity(): """Test that mpmd pairs that have the same entity type can be added to an MPMD Job""" mpmd_pairs = [] - entity1 = Application("entity1", "python", RunSettings()) - launch_settings1 = RunSettings(run_command="srun") + entity1 = Application("entity1", "python", LaunchSettings("slurm")) + launch_settings1 = LaunchSettings("slurm") - entity2 = Application("entity2", "python", RunSettings()) - launch_settings2 = RunSettings(run_command="srun") + entity2 = Application("entity2", "python", LaunchSettings("slurm")) + launch_settings2 = LaunchSettings("slurm") pair1 = MPMDPair(entity1, launch_settings1) mpmd_pairs.append(pair1) @@ -172,11 +192,11 @@ def test_add_mpmd_pair_check_entity_error(): """Test that an error is raised when a pairs is added to an mpmd job using add_mpmd_pair that does not have the same entity type""" mpmd_pairs = [] - entity1 = Application("entity1", "python", RunSettings()) - launch_settings1 = RunSettings(run_command="srun") + entity1 = Application("entity1", "python", LaunchSettings("slurm")) + launch_settings1 = LaunchSettings("slurm") - entity2 = FeatureStore("entity2") - launch_settings2 = RunSettings(run_command="srun") + entity2 = Application("entity2", "python", LaunchSettings("pals")) + launch_settings2 = LaunchSettings("pals") pair1 = MPMDPair(entity1, launch_settings1) mpmd_pairs.append(pair1) @@ -192,11 +212,11 @@ def test_create_mpmdjob_invalid_mpmdpairs(): does not have the same launcher type""" mpmd_pairs = [] - entity1 = Application("entity1", "python", RunSettings()) - launch_settings1 = RunSettings(run_command="srun") + entity1 = Application("entity1", "python", LaunchSettings("slurm")) + launch_settings1 = LaunchSettings("slurm") - entity1 = Application("entity1", "python", RunSettings()) - launch_settings2 = RunSettings(run_command="mpirun") + entity1 = Application("entity1", "python", LaunchSettings("pals")) + launch_settings2 = LaunchSettings("pals") pair1 = MPMDPair(entity1, launch_settings1) pair2 = MPMDPair(entity1, launch_settings2) @@ -213,10 +233,10 @@ def test_create_mpmdjob_valid_mpmdpairs(): """Test that all pairs have the same entity type is enforced when creating an MPMDJob""" mpmd_pairs = [] - entity1 = Application("entity1", "python", RunSettings()) - launch_settings1 = RunSettings(run_command="srun") - entity1 = Application("entity1", "python", RunSettings()) - launch_settings2 = RunSettings(run_command="srun") + entity1 = Application("entity1", "python", LaunchSettings("slurm")) + launch_settings1 = LaunchSettings("slurm") + entity1 = Application("entity1", "python", LaunchSettings("slurm")) + launch_settings2 = LaunchSettings("slurm") pair1 = MPMDPair(entity1, launch_settings1) pair2 = MPMDPair(entity1, launch_settings2) diff --git a/tests/temp_tests/test_settings/test_alpsLauncher.py b/tests/temp_tests/test_settings/test_alpsLauncher.py new file mode 100644 index 0000000000..7f9a4c3b96 --- /dev/null +++ b/tests/temp_tests/test_settings/test_alpsLauncher.py @@ -0,0 +1,149 @@ +import pytest + +from smartsim.settings import LaunchSettings +from smartsim.settings.builders.launch.alps import AprunArgBuilder +from smartsim.settings.launchCommand import LauncherType + + +def test_launcher_str(): + """Ensure launcher_str returns appropriate value""" + alpsLauncher = LaunchSettings(launcher=LauncherType.Alps) + assert alpsLauncher.launch_args.launcher_str() == LauncherType.Alps.value + + +@pytest.mark.parametrize( + "function,value,result,flag", + [ + pytest.param( + "set_cpus_per_task", (4,), "4", "cpus-per-pe", id="set_cpus_per_task" + ), + pytest.param("set_tasks", (4,), "4", "pes", id="set_tasks"), + pytest.param( + "set_tasks_per_node", (4,), "4", "pes-per-node", id="set_tasks_per_node" + ), + pytest.param( + "set_hostlist", ("host_A",), "host_A", "node-list", id="set_hostlist_str" + ), + pytest.param( + "set_hostlist", + (["host_A", "host_B"],), + "host_A,host_B", + "node-list", + id="set_hostlist_list[str]", + ), + pytest.param( + "set_hostlist_from_file", + ("./path/to/hostfile",), + "./path/to/hostfile", + "node-list-file", + id="set_hostlist_from_file", + ), + pytest.param( + "set_excluded_hosts", + ("host_A",), + "host_A", + "exclude-node-list", + id="set_excluded_hosts_str", + ), + pytest.param( + "set_excluded_hosts", + (["host_A", "host_B"],), + "host_A,host_B", + "exclude-node-list", + id="set_excluded_hosts_list[str]", + ), + pytest.param( + "set_cpu_bindings", (4,), "4", "cpu-binding", id="set_cpu_bindings" + ), + pytest.param( + "set_cpu_bindings", + ([4, 4],), + "4,4", + "cpu-binding", + id="set_cpu_bindings_list[str]", + ), + pytest.param( + "set_memory_per_node", + (8000,), + "8000", + "memory-per-pe", + id="set_memory_per_node", + ), + pytest.param( + "set_walltime", + ("10:00:00",), + "10:00:00", + "cpu-time-limit", + id="set_walltime", + ), + pytest.param( + "set_verbose_launch", (True,), "7", "debug", id="set_verbose_launch" + ), + pytest.param("set_quiet_launch", (True,), None, "quiet", id="set_quiet_launch"), + ], +) +def test_alps_class_methods(function, value, flag, result): + alpsLauncher = LaunchSettings(launcher=LauncherType.Alps) + assert isinstance(alpsLauncher._arg_builder, AprunArgBuilder) + getattr(alpsLauncher.launch_args, function)(*value) + assert alpsLauncher.launch_args._launch_args[flag] == result + + +def test_set_verbose_launch(): + alpsLauncher = LaunchSettings(launcher=LauncherType.Alps) + assert isinstance(alpsLauncher._arg_builder, AprunArgBuilder) + alpsLauncher.launch_args.set_verbose_launch(True) + assert alpsLauncher.launch_args._launch_args == {"debug": "7"} + alpsLauncher.launch_args.set_verbose_launch(False) + assert alpsLauncher.launch_args._launch_args == {} + + +def test_set_quiet_launch(): + aprunLauncher = LaunchSettings(launcher=LauncherType.Alps) + assert isinstance(aprunLauncher._arg_builder, AprunArgBuilder) + aprunLauncher.launch_args.set_quiet_launch(True) + assert aprunLauncher.launch_args._launch_args == {"quiet": None} + aprunLauncher.launch_args.set_quiet_launch(False) + assert aprunLauncher.launch_args._launch_args == {} + + +def test_format_env_vars(): + env_vars = {"OMP_NUM_THREADS": "20", "LOGGING": "verbose"} + aprunLauncher = LaunchSettings(launcher=LauncherType.Alps, env_vars=env_vars) + assert isinstance(aprunLauncher._arg_builder, AprunArgBuilder) + aprunLauncher.update_env({"OMP_NUM_THREADS": "10"}) + formatted = aprunLauncher.format_env_vars() + result = ["-e", "OMP_NUM_THREADS=10", "-e", "LOGGING=verbose"] + assert formatted == result + + +def test_aprun_settings(): + aprunLauncher = LaunchSettings(launcher=LauncherType.Alps) + aprunLauncher.launch_args.set_cpus_per_task(2) + aprunLauncher.launch_args.set_tasks(100) + aprunLauncher.launch_args.set_tasks_per_node(20) + formatted = aprunLauncher.format_launch_args() + result = ["--cpus-per-pe=2", "--pes=100", "--pes-per-node=20"] + assert formatted == result + + +def test_invalid_hostlist_format(): + """Test invalid hostlist formats""" + alpsLauncher = LaunchSettings(launcher=LauncherType.Alps) + with pytest.raises(TypeError): + alpsLauncher.launch_args.set_hostlist(["test", 5]) + with pytest.raises(TypeError): + alpsLauncher.launch_args.set_hostlist([5]) + with pytest.raises(TypeError): + alpsLauncher.launch_args.set_hostlist(5) + + +def test_invalid_exclude_hostlist_format(): + """Test invalid hostlist formats""" + alpsLauncher = LaunchSettings(launcher=LauncherType.Alps) + with pytest.raises(TypeError): + alpsLauncher.launch_args.set_excluded_hosts(["test", 5]) + with pytest.raises(TypeError): + alpsLauncher.launch_args.set_excluded_hosts([5]) + with pytest.raises(TypeError): + alpsLauncher.launch_args.set_excluded_hosts(5) diff --git a/tests/temp_tests/test_settings/test_batchSettings.py b/tests/temp_tests/test_settings/test_batchSettings.py new file mode 100644 index 0000000000..81c74c1aa9 --- /dev/null +++ b/tests/temp_tests/test_settings/test_batchSettings.py @@ -0,0 +1,50 @@ +import pytest + +from smartsim.settings import BatchSettings +from smartsim.settings.batchCommand import SchedulerType + + +@pytest.mark.parametrize( + "scheduler_enum", + [ + pytest.param(SchedulerType.Slurm, id="slurm"), + pytest.param(SchedulerType.Pbs, id="dragon"), + pytest.param(SchedulerType.Lsf, id="lsf"), + ], +) +def test_create_scheduler_settings(scheduler_enum): + bs_str = BatchSettings( + batch_scheduler=scheduler_enum.value, + scheduler_args={"launch": "var"}, + env_vars={"ENV": "VAR"}, + ) + print(bs_str) + assert bs_str._batch_scheduler == scheduler_enum + # TODO need to test scheduler_args + assert bs_str._env_vars == {"ENV": "VAR"} + + bs_enum = BatchSettings( + batch_scheduler=scheduler_enum, + scheduler_args={"launch": "var"}, + env_vars={"ENV": "VAR"}, + ) + assert bs_enum._batch_scheduler == scheduler_enum + # TODO need to test scheduler_args + assert bs_enum._env_vars == {"ENV": "VAR"} + + +def test_launcher_property(): + bs = BatchSettings(batch_scheduler="slurm") + assert bs.batch_scheduler == "slurm" + + +def test_env_vars_property(): + bs = BatchSettings(batch_scheduler="slurm", env_vars={"ENV": "VAR"}) + assert bs.env_vars == {"ENV": "VAR"} + + +def test_env_vars_property_deep_copy(): + bs = BatchSettings(batch_scheduler="slurm", env_vars={"ENV": "VAR"}) + copy_env_vars = bs.env_vars + copy_env_vars.update({"test": "no_update"}) + assert bs.env_vars == {"ENV": "VAR"} diff --git a/tests/temp_tests/test_settings/test_common.py b/tests/temp_tests/test_settings/test_common.py new file mode 100644 index 0000000000..d303aa6e2b --- /dev/null +++ b/tests/temp_tests/test_settings/test_common.py @@ -0,0 +1,12 @@ +import pytest + +from smartsim.settings.common import set_check_input + + +def test_check_set_raise_error(): + with pytest.raises(TypeError): + set_check_input(key="test", value=3) + with pytest.raises(TypeError): + set_check_input(key=3, value="str") + with pytest.raises(TypeError): + set_check_input(key=2, value=None) diff --git a/tests/temp_tests/test_settings/test_dragonLauncher.py b/tests/temp_tests/test_settings/test_dragonLauncher.py new file mode 100644 index 0000000000..d21a21c598 --- /dev/null +++ b/tests/temp_tests/test_settings/test_dragonLauncher.py @@ -0,0 +1,27 @@ +import pytest + +from smartsim.settings import LaunchSettings +from smartsim.settings.builders.launch.dragon import DragonArgBuilder +from smartsim.settings.launchCommand import LauncherType + + +def test_launcher_str(): + """Ensure launcher_str returns appropriate value""" + ls = LaunchSettings(launcher=LauncherType.Dragon) + assert ls.launch_args.launcher_str() == LauncherType.Dragon.value + + +@pytest.mark.parametrize( + "function,value,result,flag", + [ + pytest.param("set_nodes", (2,), "2", "nodes", id="set_nodes"), + pytest.param( + "set_tasks_per_node", (2,), "2", "tasks-per-node", id="set_tasks_per_node" + ), + ], +) +def test_dragon_class_methods(function, value, flag, result): + dragonLauncher = LaunchSettings(launcher=LauncherType.Dragon) + assert isinstance(dragonLauncher._arg_builder, DragonArgBuilder) + getattr(dragonLauncher.launch_args, function)(*value) + assert dragonLauncher.launch_args._launch_args[flag] == result diff --git a/tests/temp_tests/test_settings/test_launchSettings.py b/tests/temp_tests/test_settings/test_launchSettings.py new file mode 100644 index 0000000000..b84f013d78 --- /dev/null +++ b/tests/temp_tests/test_settings/test_launchSettings.py @@ -0,0 +1,66 @@ +import logging + +import pytest + +from smartsim.settings import LaunchSettings +from smartsim.settings.launchCommand import LauncherType + + +@pytest.mark.parametrize( + "launch_enum", + [pytest.param(type_, id=type_.value) for type_ in LauncherType], +) +def test_create_launch_settings(launch_enum): + ls_str = LaunchSettings( + launcher=launch_enum.value, + launch_args={"launch": "var"}, + env_vars={"ENV": "VAR"}, + ) + assert ls_str._launcher == launch_enum + # TODO need to test launch_args + assert ls_str._env_vars == {"ENV": "VAR"} + + ls_enum = LaunchSettings( + launcher=launch_enum, launch_args={"launch": "var"}, env_vars={"ENV": "VAR"} + ) + assert ls_enum._launcher == launch_enum + # TODO need to test launch_args + assert ls_enum._env_vars == {"ENV": "VAR"} + + +def test_launcher_property(): + ls = LaunchSettings(launcher="local") + assert ls.launcher == "local" + + +def test_env_vars_property(): + ls = LaunchSettings(launcher="local", env_vars={"ENV": "VAR"}) + assert ls.env_vars == {"ENV": "VAR"} + + +def test_env_vars_property_deep_copy(): + ls = LaunchSettings(launcher="local", env_vars={"ENV": "VAR"}) + copy_env_vars = ls.env_vars + copy_env_vars.update({"test": "no_update"}) + assert ls.env_vars == {"ENV": "VAR"} + + +def test_update_env_vars(): + ls = LaunchSettings(launcher="local", env_vars={"ENV": "VAR"}) + ls.update_env({"test": "no_update"}) + assert ls.env_vars == {"ENV": "VAR", "test": "no_update"} + + +def test_update_env_vars_errors(): + ls = LaunchSettings(launcher="local", env_vars={"ENV": "VAR"}) + with pytest.raises(TypeError): + ls.update_env({"test": 1}) + with pytest.raises(TypeError): + ls.update_env({1: "test"}) + with pytest.raises(TypeError): + ls.update_env({1: 1}) + with pytest.raises(TypeError): + # Make sure the first key and value do not assign + # and that the function is atomic + ls.update_env({"test": "test", "test": 1}) + assert ls.env_vars == {"ENV": "VAR"} diff --git a/tests/temp_tests/test_settings/test_localLauncher.py b/tests/temp_tests/test_settings/test_localLauncher.py new file mode 100644 index 0000000000..1ee7b9d87b --- /dev/null +++ b/tests/temp_tests/test_settings/test_localLauncher.py @@ -0,0 +1,112 @@ +import pytest + +from smartsim.settings import LaunchSettings +from smartsim.settings.builders.launch.local import LocalArgBuilder +from smartsim.settings.launchCommand import LauncherType + + +def test_launcher_str(): + """Ensure launcher_str returns appropriate value""" + ls = LaunchSettings(launcher=LauncherType.Local) + assert ls.launch_args.launcher_str() == LauncherType.Local.value + + +# TODO complete after launch args retrieval +def test_launch_args_input_mutation(): + # Tests that the run args passed in are not modified after initialization + key0, key1, key2 = "arg0", "arg1", "arg2" + val0, val1, val2 = "val0", "val1", "val2" + + default_launcher_args = { + key0: val0, + key1: val1, + key2: val2, + } + localLauncher = LaunchSettings( + launcher=LauncherType.Local, launch_args=default_launcher_args + ) + + # Confirm initial values are set + assert localLauncher.launch_args._launch_args[key0] == val0 + assert localLauncher.launch_args._launch_args[key1] == val1 + assert localLauncher.launch_args._launch_args[key2] == val2 + + # Update our common run arguments + val2_upd = f"not-{val2}" + default_launcher_args[key2] = val2_upd + + # Confirm previously created run settings are not changed + assert localLauncher.launch_args._launch_args[key2] == val2 + + +@pytest.mark.parametrize( + "env_vars", + [ + pytest.param({}, id="no env vars"), + pytest.param({"env1": "abc"}, id="normal var"), + pytest.param({"env1": "abc,def"}, id="compound var"), + pytest.param({"env1": "xyz", "env2": "pqr"}, id="multiple env vars"), + ], +) +def test_update_env(env_vars): + """Ensure non-initialized env vars update correctly""" + localLauncher = LaunchSettings(launcher=LauncherType.Local) + localLauncher.update_env(env_vars) + + assert len(localLauncher.env_vars) == len(env_vars.keys()) + + +def test_format_launch_args(): + localLauncher = LaunchSettings(launcher=LauncherType.Local, launch_args={"-np": 2}) + launch_args = localLauncher.format_launch_args() + assert launch_args == ["-np", "2"] + + +@pytest.mark.parametrize( + "env_vars", + [ + pytest.param({"env1": {"abc"}}, id="set value not allowed"), + pytest.param({"env1": {"abc": "def"}}, id="dict value not allowed"), + ], +) +def test_update_env_null_valued(env_vars): + """Ensure validation of env var in update""" + orig_env = {} + + with pytest.raises(TypeError) as ex: + localLauncher = LaunchSettings(launcher=LauncherType.Local, env_vars=orig_env) + localLauncher.update_env(env_vars) + + +@pytest.mark.parametrize( + "env_vars", + [ + pytest.param({}, id="no env vars"), + pytest.param({"env1": "abc"}, id="normal var"), + pytest.param({"env1": "abc,def"}, id="compound var"), + pytest.param({"env1": "xyz", "env2": "pqr"}, id="multiple env vars"), + ], +) +def test_update_env_initialized(env_vars): + """Ensure update of initialized env vars does not overwrite""" + orig_env = {"key": "value"} + localLauncher = LaunchSettings(launcher=LauncherType.Local, env_vars=orig_env) + localLauncher.update_env(env_vars) + + combined_keys = {k for k in env_vars.keys()} + combined_keys.update(k for k in orig_env.keys()) + + assert len(localLauncher.env_vars) == len(combined_keys) + assert {k for k in localLauncher.env_vars.keys()} == combined_keys + + +def test_format_env_vars(): + env_vars = { + "A": "a", + "B": None, + "C": "", + "D": "12", + } + localLauncher = LaunchSettings(launcher=LauncherType.Local, env_vars=env_vars) + assert isinstance(localLauncher._arg_builder, LocalArgBuilder) + assert localLauncher.format_env_vars() == ["A=a", "B=", "C=", "D=12"] diff --git a/tests/temp_tests/test_settings/test_lsfLauncher.py b/tests/temp_tests/test_settings/test_lsfLauncher.py new file mode 100644 index 0000000000..4c4260ac50 --- /dev/null +++ b/tests/temp_tests/test_settings/test_lsfLauncher.py @@ -0,0 +1,58 @@ +import pytest + +from smartsim.settings import LaunchSettings +from smartsim.settings.builders.launch.lsf import JsrunArgBuilder +from smartsim.settings.launchCommand import LauncherType + + +def test_launcher_str(): + """Ensure launcher_str returns appropriate value""" + ls = LaunchSettings(launcher=LauncherType.Lsf) + assert ls.launch_args.launcher_str() == LauncherType.Lsf.value + + +@pytest.mark.parametrize( + "function,value,result,flag", + [ + pytest.param("set_tasks", (2,), "2", "np", id="set_tasks"), + pytest.param( + "set_binding", ("packed:21",), "packed:21", "bind", id="set_binding" + ), + ], +) +def test_lsf_class_methods(function, value, flag, result): + lsfLauncher = LaunchSettings(launcher=LauncherType.Lsf) + assert isinstance(lsfLauncher._arg_builder, JsrunArgBuilder) + getattr(lsfLauncher.launch_args, function)(*value) + assert lsfLauncher.launch_args._launch_args[flag] == result + + +def test_format_env_vars(): + env_vars = {"OMP_NUM_THREADS": None, "LOGGING": "verbose"} + lsfLauncher = LaunchSettings(launcher=LauncherType.Lsf, env_vars=env_vars) + assert isinstance(lsfLauncher._arg_builder, JsrunArgBuilder) + formatted = lsfLauncher.format_env_vars() + assert formatted == ["-E", "OMP_NUM_THREADS", "-E", "LOGGING=verbose"] + + +def test_launch_args(): + """Test the possible user overrides through run_args""" + launch_args = { + "latency_priority": "gpu-gpu", + "immediate": None, + "d": "packed", # test single letter variables + "nrs": 10, + "np": 100, + } + lsfLauncher = LaunchSettings(launcher=LauncherType.Lsf, launch_args=launch_args) + assert isinstance(lsfLauncher._arg_builder, JsrunArgBuilder) + formatted = lsfLauncher.format_launch_args() + result = [ + "--latency_priority=gpu-gpu", + "--immediate", + "-d", + "packed", + "--nrs=10", + "--np=100", + ] + assert formatted == result diff --git a/tests/temp_tests/test_settings/test_lsfScheduler.py b/tests/temp_tests/test_settings/test_lsfScheduler.py new file mode 100644 index 0000000000..5c93d8978c --- /dev/null +++ b/tests/temp_tests/test_settings/test_lsfScheduler.py @@ -0,0 +1,50 @@ +import pytest + +from smartsim.settings import BatchSettings +from smartsim.settings.batchCommand import SchedulerType + + +def test_scheduler_str(): + """Ensure scheduler_str returns appropriate value""" + bs = BatchSettings(batch_scheduler=SchedulerType.Lsf) + assert bs.scheduler_args.scheduler_str() == SchedulerType.Lsf.value + + +@pytest.mark.parametrize( + "function,value,result,flag", + [ + pytest.param("set_nodes", (2,), "2", "nnodes", id="set_nodes"), + pytest.param("set_walltime", ("10:00:00",), "10:00", "W", id="set_walltime"), + pytest.param( + "set_hostlist", ("host_A",), "" '"host_A"' "", "m", id="set_hostlist_str" + ), + pytest.param( + "set_hostlist", + (["host_A", "host_B"],), + "" '"host_A host_B"' "", + "m", + id="set_hostlist_list[str]", + ), + pytest.param("set_smts", (1,), "1", "alloc_flags", id="set_smts"), + pytest.param("set_project", ("project",), "project", "P", id="set_project"), + pytest.param("set_account", ("project",), "project", "P", id="set_account"), + pytest.param("set_tasks", (2,), "2", "n", id="set_tasks"), + pytest.param("set_queue", ("queue",), "queue", "q", id="set_queue"), + ], +) +def test_update_env_initialized(function, value, flag, result): + lsfScheduler = BatchSettings(batch_scheduler=SchedulerType.Lsf) + getattr(lsfScheduler.scheduler_args, function)(*value) + assert lsfScheduler.scheduler_args._scheduler_args[flag] == result + + +def test_create_bsub(): + batch_args = {"core_isolation": None} + lsfScheduler = BatchSettings( + batch_scheduler=SchedulerType.Lsf, scheduler_args=batch_args + ) + lsfScheduler.scheduler_args.set_nodes(1) + lsfScheduler.scheduler_args.set_walltime("10:10:10") + lsfScheduler.scheduler_args.set_queue("default") + args = lsfScheduler.format_batch_args() + assert args == ["-core_isolation", "-nnodes", "1", "-W", "10:10", "-q", "default"] diff --git a/tests/temp_tests/test_settings/test_mpiLauncher.py b/tests/temp_tests/test_settings/test_mpiLauncher.py new file mode 100644 index 0000000000..815f0c5c13 --- /dev/null +++ b/tests/temp_tests/test_settings/test_mpiLauncher.py @@ -0,0 +1,207 @@ +import itertools + +import pytest + +from smartsim.settings import LaunchSettings +from smartsim.settings.builders.launch.mpi import ( + MpiArgBuilder, + MpiexecArgBuilder, + OrteArgBuilder, +) +from smartsim.settings.launchCommand import LauncherType + + +@pytest.mark.parametrize( + "launcher", + [ + pytest.param(LauncherType.Mpirun, id="launcher_str_mpirun"), + pytest.param(LauncherType.Mpiexec, id="launcher_str_mpiexec"), + pytest.param(LauncherType.Orterun, id="launcher_str_orterun"), + ], +) +def test_launcher_str(launcher): + """Ensure launcher_str returns appropriate value""" + ls = LaunchSettings(launcher=launcher) + assert ls.launch_args.launcher_str() == launcher.value + + +@pytest.mark.parametrize( + "l,function,value,result,flag", + [ + # Use OpenMPI style settigs for all launchers + *itertools.chain.from_iterable( + ( + ( + pytest.param( + l, "set_walltime", ("100",), "100", "timeout", id="set_walltime" + ), + pytest.param( + l, + "set_task_map", + ("taskmap",), + "taskmap", + "map-by", + id="set_task_map", + ), + pytest.param( + l, + "set_cpus_per_task", + (2,), + "2", + "cpus-per-proc", + id="set_cpus_per_task", + ), + pytest.param( + l, + "set_cpu_binding_type", + ("4",), + "4", + "bind-to", + id="set_cpu_binding_type", + ), + pytest.param( + l, + "set_tasks_per_node", + (4,), + "4", + "npernode", + id="set_tasks_per_node", + ), + pytest.param(l, "set_tasks", (4,), "4", "n", id="set_tasks"), + pytest.param( + l, + "set_executable_broadcast", + ("broadcast",), + "broadcast", + "preload-binary", + id="set_executable_broadcast", + ), + pytest.param( + l, + "set_hostlist", + ("host_A",), + "host_A", + "host", + id="set_hostlist_str", + ), + pytest.param( + l, + "set_hostlist", + (["host_A", "host_B"],), + "host_A,host_B", + "host", + id="set_hostlist_list[str]", + ), + pytest.param( + l, + "set_hostlist_from_file", + ("./path/to/hostfile",), + "./path/to/hostfile", + "hostfile", + id="set_hostlist_from_file", + ), + ) + for l in ( + [LauncherType.Mpirun, MpiArgBuilder], + [LauncherType.Mpiexec, MpiexecArgBuilder], + [LauncherType.Orterun, OrteArgBuilder], + ) + ) + ) + ], +) +def test_mpi_class_methods(l, function, value, flag, result): + mpiSettings = LaunchSettings(launcher=l[0]) + assert isinstance(mpiSettings._arg_builder, l[1]) + getattr(mpiSettings.launch_args, function)(*value) + assert mpiSettings.launch_args._launch_args[flag] == result + + +@pytest.mark.parametrize( + "launcher", + [ + pytest.param(LauncherType.Mpirun, id="format_env_mpirun"), + pytest.param(LauncherType.Mpiexec, id="format_env_mpiexec"), + pytest.param(LauncherType.Orterun, id="format_env_orterun"), + ], +) +def test_format_env_vars(launcher): + env_vars = {"OMP_NUM_THREADS": "20", "LOGGING": "verbose"} + mpiSettings = LaunchSettings(launcher=launcher, env_vars=env_vars) + formatted = mpiSettings.format_env_vars() + result = [ + "-x", + "OMP_NUM_THREADS=20", + "-x", + "LOGGING=verbose", + ] + assert formatted == result + + +@pytest.mark.parametrize( + "launcher", + [ + pytest.param(LauncherType.Mpirun, id="format_launcher_args_mpirun"), + pytest.param(LauncherType.Mpiexec, id="format_launcher_args_mpiexec"), + pytest.param(LauncherType.Orterun, id="format_launcher_args_orterun"), + ], +) +def test_format_launcher_args(launcher): + mpiSettings = LaunchSettings(launcher=launcher) + mpiSettings.launch_args.set_cpus_per_task(1) + mpiSettings.launch_args.set_tasks(2) + mpiSettings.launch_args.set_hostlist(["node005", "node006"]) + formatted = mpiSettings.format_launch_args() + result = ["--cpus-per-proc", "1", "--n", "2", "--host", "node005,node006"] + assert formatted == result + + +@pytest.mark.parametrize( + "launcher", + [ + pytest.param(LauncherType.Mpirun, id="set_verbose_launch_mpirun"), + pytest.param(LauncherType.Mpiexec, id="set_verbose_launch_mpiexec"), + pytest.param(LauncherType.Orterun, id="set_verbose_launch_orterun"), + ], +) +def test_set_verbose_launch(launcher): + mpiSettings = LaunchSettings(launcher=launcher) + mpiSettings.launch_args.set_verbose_launch(True) + assert mpiSettings.launch_args._launch_args == {"verbose": None} + mpiSettings.launch_args.set_verbose_launch(False) + assert mpiSettings.launch_args._launch_args == {} + + +@pytest.mark.parametrize( + "launcher", + [ + pytest.param(LauncherType.Mpirun, id="set_quiet_launch_mpirun"), + pytest.param(LauncherType.Mpiexec, id="set_quiet_launch_mpiexec"), + pytest.param(LauncherType.Orterun, id="set_quiet_launch_orterun"), + ], +) +def test_set_quiet_launch(launcher): + mpiSettings = LaunchSettings(launcher=launcher) + mpiSettings.launch_args.set_quiet_launch(True) + assert mpiSettings.launch_args._launch_args == {"quiet": None} + mpiSettings.launch_args.set_quiet_launch(False) + assert mpiSettings.launch_args._launch_args == {} + + +@pytest.mark.parametrize( + "launcher", + [ + pytest.param(LauncherType.Mpirun, id="invalid_hostlist_mpirun"), + pytest.param(LauncherType.Mpiexec, id="invalid_hostlist_mpiexec"), + pytest.param(LauncherType.Orterun, id="invalid_hostlist_orterun"), + ], +) +def test_invalid_hostlist_format(launcher): + """Test invalid hostlist formats""" + mpiSettings = LaunchSettings(launcher=launcher) + with pytest.raises(TypeError): + mpiSettings.launch_args.set_hostlist(["test", 5]) + with pytest.raises(TypeError): + mpiSettings.launch_args.set_hostlist([5]) + with pytest.raises(TypeError): + mpiSettings.launch_args.set_hostlist(5) diff --git a/tests/temp_tests/test_settings/test_palsLauncher.py b/tests/temp_tests/test_settings/test_palsLauncher.py new file mode 100644 index 0000000000..01cbea2ed6 --- /dev/null +++ b/tests/temp_tests/test_settings/test_palsLauncher.py @@ -0,0 +1,69 @@ +import pytest + +from smartsim.settings import LaunchSettings +from smartsim.settings.builders.launch.pals import PalsMpiexecArgBuilder +from smartsim.settings.launchCommand import LauncherType + + +def test_launcher_str(): + """Ensure launcher_str returns appropriate value""" + ls = LaunchSettings(launcher=LauncherType.Pals) + assert ls.launch_args.launcher_str() == LauncherType.Pals.value + + +@pytest.mark.parametrize( + "function,value,result,flag", + [ + pytest.param( + "set_cpu_binding_type", + ("bind",), + "bind", + "bind-to", + id="set_cpu_binding_type", + ), + pytest.param("set_tasks", (2,), "2", "np", id="set_tasks"), + pytest.param("set_tasks_per_node", (2,), "2", "ppn", id="set_tasks_per_node"), + pytest.param( + "set_hostlist", ("host_A",), "host_A", "hosts", id="set_hostlist_str" + ), + pytest.param( + "set_hostlist", + (["host_A", "host_B"],), + "host_A,host_B", + "hosts", + id="set_hostlist_list[str]", + ), + pytest.param( + "set_executable_broadcast", + ("broadcast",), + "broadcast", + "transfer", + id="set_executable_broadcast", + ), + ], +) +def test_pals_class_methods(function, value, flag, result): + palsLauncher = LaunchSettings(launcher=LauncherType.Pals) + assert isinstance(palsLauncher.launch_args, PalsMpiexecArgBuilder) + getattr(palsLauncher.launch_args, function)(*value) + assert palsLauncher.launch_args._launch_args[flag] == result + assert palsLauncher.format_launch_args() == ["--" + flag, str(result)] + + +def test_format_env_vars(): + env_vars = {"FOO_VERSION": "3.14", "PATH": None, "LD_LIBRARY_PATH": None} + palsLauncher = LaunchSettings(launcher=LauncherType.Pals, env_vars=env_vars) + formatted = " ".join(palsLauncher.format_env_vars()) + expected = "--env FOO_VERSION=3.14 --envlist PATH,LD_LIBRARY_PATH" + assert formatted == expected + + +def test_invalid_hostlist_format(): + """Test invalid hostlist formats""" + palsLauncher = LaunchSettings(launcher=LauncherType.Pals) + with pytest.raises(TypeError): + palsLauncher.launch_args.set_hostlist(["test", 5]) + with pytest.raises(TypeError): + palsLauncher.launch_args.set_hostlist([5]) + with pytest.raises(TypeError): + palsLauncher.launch_args.set_hostlist(5) diff --git a/tests/temp_tests/test_settings/test_pbsScheduler.py b/tests/temp_tests/test_settings/test_pbsScheduler.py new file mode 100644 index 0000000000..ab3435df55 --- /dev/null +++ b/tests/temp_tests/test_settings/test_pbsScheduler.py @@ -0,0 +1,61 @@ +import pytest + +from smartsim.settings import BatchSettings +from smartsim.settings.batchCommand import SchedulerType +from smartsim.settings.builders.batch.pbs import QsubBatchArgBuilder + + +def test_scheduler_str(): + """Ensure scheduler_str returns appropriate value""" + bs = BatchSettings(batch_scheduler=SchedulerType.Pbs) + assert bs.scheduler_args.scheduler_str() == SchedulerType.Pbs.value + + +@pytest.mark.parametrize( + "function,value,result,flag", + [ + pytest.param("set_nodes", (2,), "2", "nodes", id="set_nodes"), + pytest.param( + "set_walltime", ("10:00:00",), "10:00:00", "walltime", id="set_walltime" + ), + pytest.param("set_account", ("account",), "account", "A", id="set_account"), + pytest.param("set_queue", ("queue",), "queue", "q", id="set_queue"), + pytest.param("set_ncpus", (2,), "2", "ppn", id="set_ncpus"), + pytest.param( + "set_hostlist", ("host_A",), "host_A", "hostname", id="set_hostlist_str" + ), + pytest.param( + "set_hostlist", + (["host_A", "host_B"],), + "host_A,host_B", + "hostname", + id="set_hostlist_list[str]", + ), + ], +) +def test_create_pbs_batch(function, value, flag, result): + pbsScheduler = BatchSettings(batch_scheduler=SchedulerType.Pbs) + assert isinstance(pbsScheduler.scheduler_args, QsubBatchArgBuilder) + getattr(pbsScheduler.scheduler_args, function)(*value) + assert pbsScheduler.scheduler_args._scheduler_args[flag] == result + + +def test_format_pbs_batch_args(): + pbsScheduler = BatchSettings(batch_scheduler=SchedulerType.Pbs) + pbsScheduler.scheduler_args.set_nodes(1) + pbsScheduler.scheduler_args.set_walltime("10:00:00") + pbsScheduler.scheduler_args.set_queue("default") + pbsScheduler.scheduler_args.set_account("myproject") + pbsScheduler.scheduler_args.set_ncpus(10) + pbsScheduler.scheduler_args.set_hostlist(["host_a", "host_b", "host_c"]) + args = pbsScheduler.format_batch_args() + assert args == [ + "-l", + "nodes=1:ncpus=10:host=host_a+host=host_b+host=host_c", + "-l", + "walltime=10:00:00", + "-q", + "default", + "-A", + "myproject", + ] diff --git a/tests/temp_tests/test_settings/test_slurmLauncher.py b/tests/temp_tests/test_settings/test_slurmLauncher.py new file mode 100644 index 0000000000..c5e9b5b62d --- /dev/null +++ b/tests/temp_tests/test_settings/test_slurmLauncher.py @@ -0,0 +1,255 @@ +import pytest + +from smartsim.settings import LaunchSettings +from smartsim.settings.builders.launch.slurm import SlurmArgBuilder +from smartsim.settings.launchCommand import LauncherType + + +def test_launcher_str(): + """Ensure launcher_str returns appropriate value""" + ls = LaunchSettings(launcher=LauncherType.Slurm) + assert ls.launch_args.launcher_str() == LauncherType.Slurm.value + + +@pytest.mark.parametrize( + "function,value,result,flag", + [ + pytest.param("set_nodes", (2,), "2", "nodes", id="set_nodes"), + pytest.param( + "set_hostlist", ("host_A",), "host_A", "nodelist", id="set_hostlist_str" + ), + pytest.param( + "set_hostlist", + (["host_A", "host_B"],), + "host_A,host_B", + "nodelist", + id="set_hostlist_list[str]", + ), + pytest.param( + "set_hostlist_from_file", + ("./path/to/hostfile",), + "./path/to/hostfile", + "nodefile", + id="set_hostlist_from_file", + ), + pytest.param( + "set_excluded_hosts", + ("host_A",), + "host_A", + "exclude", + id="set_excluded_hosts_str", + ), + pytest.param( + "set_excluded_hosts", + (["host_A", "host_B"],), + "host_A,host_B", + "exclude", + id="set_excluded_hosts_list[str]", + ), + pytest.param( + "set_cpus_per_task", (4,), "4", "cpus-per-task", id="set_cpus_per_task" + ), + pytest.param("set_tasks", (4,), "4", "ntasks", id="set_tasks"), + pytest.param( + "set_tasks_per_node", (4,), "4", "ntasks-per-node", id="set_tasks_per_node" + ), + pytest.param( + "set_cpu_bindings", (4,), "map_cpu:4", "cpu_bind", id="set_cpu_bindings" + ), + pytest.param( + "set_cpu_bindings", + ([4, 4],), + "map_cpu:4,4", + "cpu_bind", + id="set_cpu_bindings_list[str]", + ), + pytest.param( + "set_memory_per_node", (8000,), "8000M", "mem", id="set_memory_per_node" + ), + pytest.param( + "set_executable_broadcast", + ("/tmp/some/path",), + "/tmp/some/path", + "bcast", + id="set_broadcast", + ), + pytest.param("set_node_feature", ("P100",), "P100", "C", id="set_node_feature"), + pytest.param( + "set_walltime", ("10:00:00",), "10:00:00", "time", id="set_walltime" + ), + ], +) +def test_slurm_class_methods(function, value, flag, result): + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) + assert isinstance(slurmLauncher.launch_args, SlurmArgBuilder) + getattr(slurmLauncher.launch_args, function)(*value) + assert slurmLauncher.launch_args._launch_args[flag] == result + + +def test_set_verbose_launch(): + ls = LaunchSettings(launcher=LauncherType.Slurm) + ls.launch_args.set_verbose_launch(True) + assert ls.launch_args._launch_args == {"verbose": None} + ls.launch_args.set_verbose_launch(False) + assert ls.launch_args._launch_args == {} + + +def test_set_quiet_launch(): + ls = LaunchSettings(launcher=LauncherType.Slurm) + ls.launch_args.set_quiet_launch(True) + assert ls.launch_args._launch_args == {"quiet": None} + ls.launch_args.set_quiet_launch(False) + assert ls.launch_args._launch_args == {} + + +def test_format_env_vars(): + """Test format_env_vars runs correctly""" + env_vars = { + "OMP_NUM_THREADS": "20", + "LOGGING": "verbose", + "SSKEYIN": "name_0,name_1", + } + ls = LaunchSettings(launcher=LauncherType.Slurm, env_vars=env_vars) + ls_format = ls.format_env_vars() + assert "OMP_NUM_THREADS=20" in ls_format + assert "LOGGING=verbose" in ls_format + assert all("SSKEYIN" not in x for x in ls_format) + + +def test_catch_existing_env_var(caplog, monkeypatch): + slurmSettings = LaunchSettings( + launcher=LauncherType.Slurm, + env_vars={ + "SMARTSIM_TEST_VAR": "B", + }, + ) + monkeypatch.setenv("SMARTSIM_TEST_VAR", "A") + monkeypatch.setenv("SMARTSIM_TEST_CSVAR", "A,B") + caplog.clear() + slurmSettings.format_env_vars() + + msg = f"Variable SMARTSIM_TEST_VAR is set to A in current environment. " + msg += f"If the job is running in an interactive allocation, the value B will not be set. " + msg += "Please consider removing the variable from the environment and re-running the experiment." + + for record in caplog.records: + assert record.levelname == "WARNING" + assert record.message == msg + + caplog.clear() + + env_vars = {"SMARTSIM_TEST_VAR": "B", "SMARTSIM_TEST_CSVAR": "C,D"} + settings = LaunchSettings(launcher=LauncherType.Slurm, env_vars=env_vars) + settings.format_comma_sep_env_vars() + + for record in caplog.records: + assert record.levelname == "WARNING" + assert record.message == msg + + +def test_format_comma_sep_env_vars(): + """Test format_comma_sep_env_vars runs correctly""" + env_vars = { + "OMP_NUM_THREADS": "20", + "LOGGING": "verbose", + "SSKEYIN": "name_0,name_1", + } + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm, env_vars=env_vars) + formatted, comma_separated_formatted = slurmLauncher.format_comma_sep_env_vars() + assert "OMP_NUM_THREADS" in formatted + assert "LOGGING" in formatted + assert "SSKEYIN" in formatted + assert "name_0,name_1" not in formatted + assert "SSKEYIN=name_0,name_1" in comma_separated_formatted + + +def test_slurmSettings_settings(): + """Test format_launch_args runs correctly""" + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) + slurmLauncher.launch_args.set_nodes(5) + slurmLauncher.launch_args.set_cpus_per_task(2) + slurmLauncher.launch_args.set_tasks(100) + slurmLauncher.launch_args.set_tasks_per_node(20) + formatted = slurmLauncher.format_launch_args() + result = ["--nodes=5", "--cpus-per-task=2", "--ntasks=100", "--ntasks-per-node=20"] + assert formatted == result + + +def test_slurmSettings_launch_args(): + """Test the possible user overrides through run_args""" + launch_args = { + "account": "A3123", + "exclusive": None, + "C": "P100", # test single letter variables + "nodes": 10, + "ntasks": 100, + } + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm, launch_args=launch_args) + formatted = slurmLauncher.format_launch_args() + result = [ + "--account=A3123", + "--exclusive", + "-C", + "P100", + "--nodes=10", + "--ntasks=100", + ] + assert formatted == result + + +def test_invalid_hostlist_format(): + """Test invalid hostlist formats""" + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_hostlist(["test", 5]) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_hostlist([5]) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_hostlist(5) + + +def test_invalid_exclude_hostlist_format(): + """Test invalid hostlist formats""" + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_excluded_hosts(["test", 5]) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_excluded_hosts([5]) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_excluded_hosts(5) + + +def test_invalid_node_feature_format(): + """Test invalid node feature formats""" + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_node_feature(["test", 5]) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_node_feature([5]) + with pytest.raises(TypeError): + slurmLauncher.launch_args.set_node_feature(5) + + +def test_invalid_walltime_format(): + """Test invalid walltime formats""" + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) + with pytest.raises(ValueError): + slurmLauncher.launch_args.set_walltime("11:11") + with pytest.raises(ValueError): + slurmLauncher.launch_args.set_walltime("ss:ss:ss") + with pytest.raises(ValueError): + slurmLauncher.launch_args.set_walltime("11:ss:ss") + with pytest.raises(ValueError): + slurmLauncher.launch_args.set_walltime("0s:ss:ss") + + +def test_set_het_groups(monkeypatch): + """Test ability to set one or more het groups to run setting""" + monkeypatch.setenv("SLURM_HET_SIZE", "4") + slurmLauncher = LaunchSettings(launcher=LauncherType.Slurm) + slurmLauncher.launch_args.set_het_group([1]) + assert slurmLauncher._arg_builder._launch_args["het-group"] == "1" + slurmLauncher.launch_args.set_het_group([3, 2]) + assert slurmLauncher._arg_builder._launch_args["het-group"] == "3,2" + with pytest.raises(ValueError): + slurmLauncher.launch_args.set_het_group([4]) diff --git a/tests/temp_tests/test_settings/test_slurmScheduler.py b/tests/temp_tests/test_settings/test_slurmScheduler.py new file mode 100644 index 0000000000..0a34b6473f --- /dev/null +++ b/tests/temp_tests/test_settings/test_slurmScheduler.py @@ -0,0 +1,110 @@ +import pytest + +from smartsim.settings import BatchSettings +from smartsim.settings.batchCommand import SchedulerType +from smartsim.settings.builders.batch.slurm import SlurmBatchArgBuilder + + +def test_scheduler_str(): + """Ensure scheduler_str returns appropriate value""" + bs = BatchSettings(batch_scheduler=SchedulerType.Slurm) + assert bs.scheduler_args.scheduler_str() == SchedulerType.Slurm.value + + +@pytest.mark.parametrize( + "function,value,result,flag", + [ + pytest.param("set_nodes", (2,), "2", "nodes", id="set_nodes"), + pytest.param( + "set_walltime", ("10:00:00",), "10:00:00", "time", id="set_walltime" + ), + pytest.param( + "set_account", ("account",), "account", "account", id="set_account" + ), + pytest.param( + "set_partition", + ("partition",), + "partition", + "partition", + id="set_partition", + ), + pytest.param( + "set_queue", ("partition",), "partition", "partition", id="set_queue" + ), + pytest.param( + "set_cpus_per_task", (2,), "2", "cpus-per-task", id="set_cpus_per_task" + ), + pytest.param( + "set_hostlist", ("host_A",), "host_A", "nodelist", id="set_hostlist_str" + ), + pytest.param( + "set_hostlist", + (["host_A", "host_B"],), + "host_A,host_B", + "nodelist", + id="set_hostlist_list[str]", + ), + ], +) +def test_sbatch_class_methods(function, value, flag, result): + slurmScheduler = BatchSettings(batch_scheduler=SchedulerType.Slurm) + getattr(slurmScheduler.scheduler_args, function)(*value) + assert slurmScheduler.scheduler_args._scheduler_args[flag] == result + + +def test_create_sbatch(): + batch_args = {"exclusive": None, "oversubscribe": None} + slurmScheduler = BatchSettings( + batch_scheduler=SchedulerType.Slurm, scheduler_args=batch_args + ) + assert isinstance(slurmScheduler._arg_builder, SlurmBatchArgBuilder) + args = slurmScheduler.format_batch_args() + assert args == ["--exclusive", "--oversubscribe"] + + +def test_launch_args_input_mutation(): + # Tests that the run args passed in are not modified after initialization + key0, key1, key2 = "arg0", "arg1", "arg2" + val0, val1, val2 = "val0", "val1", "val2" + + default_scheduler_args = { + key0: val0, + key1: val1, + key2: val2, + } + slurmScheduler = BatchSettings( + batch_scheduler=SchedulerType.Slurm, scheduler_args=default_scheduler_args + ) + + # Confirm initial values are set + assert slurmScheduler.scheduler_args._scheduler_args[key0] == val0 + assert slurmScheduler.scheduler_args._scheduler_args[key1] == val1 + assert slurmScheduler.scheduler_args._scheduler_args[key2] == val2 + + # Update our common run arguments + val2_upd = f"not-{val2}" + default_scheduler_args[key2] = val2_upd + + # Confirm previously created run settings are not changed + assert slurmScheduler.scheduler_args._scheduler_args[key2] == val2 + + +def test_sbatch_settings(): + scheduler_args = {"nodes": 1, "time": "10:00:00", "account": "A3123"} + slurmScheduler = BatchSettings( + batch_scheduler=SchedulerType.Slurm, scheduler_args=scheduler_args + ) + formatted = slurmScheduler.format_batch_args() + result = ["--nodes=1", "--time=10:00:00", "--account=A3123"] + assert formatted == result + + +def test_sbatch_manual(): + slurmScheduler = BatchSettings(batch_scheduler=SchedulerType.Slurm) + slurmScheduler.scheduler_args.set_nodes(5) + slurmScheduler.scheduler_args.set_account("A3531") + slurmScheduler.scheduler_args.set_walltime("10:00:00") + formatted = slurmScheduler.format_batch_args() + print(f"here: {formatted}") + result = ["--nodes=5", "--account=A3531", "--time=10:00:00"] + assert formatted == result