diff --git a/smartsim/experiment.py b/smartsim/experiment.py index 01f4ed58e9..288d50a5c1 100644 --- a/smartsim/experiment.py +++ b/smartsim/experiment.py @@ -427,385 +427,6 @@ def get_status( logger.error(e) raise - @_contextualize - def create_ensemble( - self, - name: str, - exe: t.Optional[str] = None, - exe_args: t.Optional[t.List[str]] = None, - params: t.Optional[t.Dict[str, t.Any]] = None, - batch_settings: t.Optional[base.BatchSettings] = None, - run_settings: t.Optional[base.RunSettings] = None, - replicas: t.Optional[int] = None, - perm_strategy: str = "all_perm", - path: t.Optional[str] = None, - **kwargs: t.Any, - ) -> Ensemble: - """Create an ``Ensemble`` of ``Application`` instances - - Ensembles can be launched sequentially or as a batch - if using a non-local launcher. e.g. slurm - - Ensembles require one of the following combinations - of arguments: - - - ``run_settings`` and ``params`` - - ``run_settings`` and ``replicas`` - - ``batch_settings`` - - ``batch_settings``, ``run_settings``, and ``params`` - - ``batch_settings``, ``run_settings``, and ``replicas`` - - If given solely batch settings, an empty ensemble - will be created that applications can be added to manually - through ``Ensemble.add_application()``. - The entire Ensemble will launch as one batch. - - Provided batch and run settings, either ``params`` - or ``replicas`` must be passed and the entire ensemble - will launch as a single batch. - - Provided solely run settings, either ``params`` - or ``replicas`` must be passed and the Ensemble members - will each launch sequentially. - - The kwargs argument can be used to pass custom input - parameters to the permutation strategy. - - :param name: name of the ``Ensemble`` - :param params: parameters to expand into ``Application`` members - :param exe: executable to run - :param exe_args: executable arguments - :param batch_settings: describes settings for ``Ensemble`` as batch workload - :param run_settings: describes how each ``Application`` should be executed - :param replicas: number of replicas to create - :param perm_strategy: strategy for expanding ``params`` into - ``Application`` instances from params argument - options are "all_perm", "step", "random" - or a callable function. - :raises SmartSimError: if initialization fails - :return: ``Ensemble`` instance - """ - if name is None: - raise AttributeError("Entity has no name. Please set name attribute.") - check_path = path or osp.join(self.exp_path, name) - entity_path: str = osp.abspath(check_path) - - try: - new_ensemble = Ensemble( - name=name, - exe=exe, - exe_args=exe_args, - params=params or {}, - path=entity_path, - batch_settings=batch_settings, - run_settings=run_settings, - perm_strat=perm_strategy, - replicas=replicas, - **kwargs, - ) - return new_ensemble - except SmartSimError as e: - logger.error(e) - raise - - @_contextualize - def create_application( - self, - name: str, - exe: str, - run_settings: base.RunSettings, - exe_args: t.Optional[t.List[str]] = None, - params: t.Optional[t.Dict[str, t.Any]] = None, - path: t.Optional[str] = None, - enable_key_prefixing: bool = False, - batch_settings: t.Optional[base.BatchSettings] = None, - ) -> Application: - """Create a general purpose ``Application`` - - The ``Application`` class is the most general encapsulation of - executable code in SmartSim. ``Application`` instances are named - references to pieces of a workflow that can be parameterized, - and executed. - - ``Application`` instances can be launched sequentially, as a batch job, - or as a group by adding them into an ``Ensemble``. - - All ``Applications`` require a reference to run settings to specify which - executable to launch as well provide options for how to launch - the executable with the underlying WLM. Furthermore, batch a - reference to a batch settings can be added to launch the ``Application`` - as a batch job through ``Experiment.start``. If a ``Application`` with - a reference to a set of batch settings is added to a larger - entity with its own set of batch settings (for e.g. an - ``Ensemble``) the batch settings of the larger entity will take - precedence and the batch setting of the ``Application`` will be - strategically ignored. - - Parameters supplied in the `params` argument can be written into - configuration files supplied at runtime to the ``Application`` through - ``Application.attach_generator_files``. `params` can also be turned - into executable arguments by calling ``Application.params_to_args`` - - By default, ``Application`` instances will be executed in the - exp_path/application_name directory if no `path` argument is supplied. - If a ``Application`` instance is passed to ``Experiment.generate``, - a directory within the ``Experiment`` directory will be created - to house the input and output files from the ``Application``. - - Example initialization of a ``Application`` instance - - .. highlight:: python - .. code-block:: python - - from smartsim import Experiment - run_settings = exp.create_run_settings("python", "run_pytorch_model.py") - application = exp.create_application("pytorch_model", run_settings) - - # adding parameters to a application - run_settings = exp.create_run_settings("python", "run_pytorch_model.py") - train_params = { - "batch": 32, - "epoch": 10, - "lr": 0.001 - } - application = exp.create_application("pytorch_model", run_settings, params=train_params) - application.attach_generator_files(to_configure="./train.cfg") - exp.generate(application) - - New in 0.4.0, ``application`` instances can be colocated with an - Orchestrator database shard through ``application.colocate_db``. This - will launch a single ``Orchestrator`` instance on each compute - host used by the (possibly distributed) application. This is - useful for performant online inference or processing - at runtime. - - New in 0.4.2, ``Application`` instances can now be colocated with - an Orchestrator database over either TCP or UDS using the - ``pplication.colocate_db_tcp`` or ``Application.colocate_db_uds`` method - respectively. The original ``Application.colocate_db`` method is now - deprecated, but remains as an alias for ``Application.colocate_db_tcp`` - for backward compatibility. - - :param name: name of the ``Application`` - :param exe: executable to run - :param exe_args: executable arguments - :param run_settings: defines how ``Application`` should be run - :param params: ``Application`` parameters for writing into configuration files - :param path: path to where the ``Application`` should be executed at runtime - :param enable_key_prefixing: If True, data sent to the ``Orchestrator`` - using SmartRedis from this ``Application`` will - be prefixed with the ``Application`` name. - :param batch_settings: Settings to run ``Application`` individually as a batch job. - :raises SmartSimError: if initialization fails - :return: the created ``Application`` - """ - if name is None: - raise AttributeError("Entity has no name. Please set name attribute.") - check_path = path or osp.join(self.exp_path, name) - entity_path: str = osp.abspath(check_path) - if params is None: - params = {} - - try: - new_application = Application( - name=name, - exe=exe, - exe_args=exe_args, - params=params, - path=entity_path, - run_settings=run_settings, - batch_settings=batch_settings, - ) - if enable_key_prefixing: - new_application.enable_key_prefixing() - return new_application - except SmartSimError as e: - logger.error(e) - raise - - @_contextualize - def create_run_settings( - self, - run_command: str = "auto", - run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None, - env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None, - container: t.Optional[Container] = None, - **kwargs: t.Any, - ) -> settings.RunSettings: - """Create a ``RunSettings`` instance. - - run_command="auto" will attempt to automatically - match a run command on the system with a ``RunSettings`` - class in SmartSim. If found, the class corresponding - to that run_command will be created and returned. - - If the local launcher is being used, auto detection will - be turned off. - - If a recognized run command is passed, the ``RunSettings`` - instance will be a child class such as ``SrunSettings`` - - If not supported by smartsim, the base ``RunSettings`` class - will be created and returned with the specified run_command and run_args - will be evaluated literally. - - Run Commands with implemented helper classes: - - aprun (ALPS) - - srun (SLURM) - - mpirun (OpenMPI) - - jsrun (LSF) - - :param run_command: command to run the executable - :param exe: executable to run - :param exe_args: arguments to pass to the executable - :param run_args: arguments to pass to the ``run_command`` - :param env_vars: environment variables to pass to the executable - :param container: if execution environment is containerized - :return: the created ``RunSettings`` - """ - - try: - return settings.create_run_settings( - self._launcher, - run_command=run_command, - run_args=run_args, - env_vars=env_vars, - container=container, - **kwargs, - ) - except SmartSimError as e: - logger.error(e) - raise - - @_contextualize - def create_batch_settings( - self, - nodes: int = 1, - time: str = "", - queue: str = "", - account: str = "", - batch_args: t.Optional[t.Dict[str, str]] = None, - **kwargs: t.Any, - ) -> base.BatchSettings: - """Create a ``BatchSettings`` instance - - Batch settings parameterize batch workloads. The result of this - function can be passed to the ``Ensemble`` initialization. - - the `batch_args` parameter can be used to pass in a dictionary - of additional batch command arguments that aren't supported through - the smartsim interface - - - .. highlight:: python - .. code-block:: python - - # i.e. for Slurm - batch_args = { - "distribution": "block" - "exclusive": None - } - bs = exp.create_batch_settings(nodes=3, - time="10:00:00", - batch_args=batch_args) - bs.set_account("default") - - :param nodes: number of nodes for batch job - :param time: length of batch job - :param queue: queue or partition (if slurm) - :param account: user account name for batch system - :param batch_args: additional batch arguments - :return: a newly created BatchSettings instance - :raises SmartSimError: if batch creation fails - """ - try: - return settings.create_batch_settings( - self._launcher, - nodes=nodes, - time=time, - queue=queue, - account=account, - batch_args=batch_args, - **kwargs, - ) - except SmartSimError as e: - logger.error(e) - raise - - @_contextualize - def create_database( - self, - port: int = 6379, - path: t.Optional[str] = None, - db_nodes: int = 1, - batch: bool = False, - hosts: t.Optional[t.Union[t.List[str], str]] = None, - run_command: str = "auto", - interface: t.Union[str, t.List[str]] = "ipogif0", - account: t.Optional[str] = None, - time: t.Optional[str] = None, - queue: t.Optional[str] = None, - single_cmd: bool = True, - db_identifier: str = "orchestrator", - **kwargs: t.Any, - ) -> Orchestrator: - """Initialize an ``Orchestrator`` database - - The ``Orchestrator`` database is a key-value store based - on Redis that can be launched together with other ``Experiment`` - created instances for online data storage. - - When launched, ``Orchestrator`` can be used to communicate - data between Fortran, Python, C, and C++ applications. - - Machine Learning model in Pytorch, Tensorflow, and ONNX (i.e. scikit-learn) - can also be stored within the ``Orchestrator`` database where they - can be called remotely and executed on CPU or GPU where - the database is hosted. - - To enable a SmartSim ``Application`` to communicate with the database - the workload must utilize the SmartRedis clients. For more - information on the database, and SmartRedis clients see the - documentation at https://www.craylabs.org/docs/smartredis.html - - :param port: TCP/IP port - :param db_nodes: number of database shards - :param batch: run as a batch workload - :param hosts: specify hosts to launch on - :param run_command: specify launch binary or detect automatically - :param interface: Network interface - :param account: account to run batch on - :param time: walltime for batch 'HH:MM:SS' format - :param queue: queue to run the batch on - :param single_cmd: run all shards with one (MPMD) command - :param db_identifier: an identifier to distinguish this orchestrator in - multiple-database experiments - :raises SmartSimError: if detection of launcher or of run command fails - :raises SmartSimError: if user indicated an incompatible run command - for the launcher - :return: Orchestrator or derived class - """ - - self._append_to_db_identifier_list(db_identifier) - check_path = path or osp.join(self.exp_path, db_identifier) - entity_path: str = osp.abspath(check_path) - return Orchestrator( - port=port, - path=entity_path, - db_nodes=db_nodes, - batch=batch, - hosts=hosts, - run_command=run_command, - interface=interface, - account=account, - time=time, - queue=queue, - single_cmd=single_cmd, - launcher=self._launcher, - db_identifier=db_identifier, - **kwargs, - ) - @_contextualize def reconnect_orchestrator(self, checkpoint: str) -> Orchestrator: """Reconnect to a running ``Orchestrator``