Skip to content
379 changes: 0 additions & 379 deletions smartsim/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,385 +427,6 @@ def get_status(
logger.error(e)
raise

@_contextualize
def create_ensemble(
self,
name: str,
exe: t.Optional[str] = None,
exe_args: t.Optional[t.List[str]] = None,
params: t.Optional[t.Dict[str, t.Any]] = None,
batch_settings: t.Optional[base.BatchSettings] = None,
run_settings: t.Optional[base.RunSettings] = None,
replicas: t.Optional[int] = None,
perm_strategy: str = "all_perm",
path: t.Optional[str] = None,
**kwargs: t.Any,
) -> Ensemble:
"""Create an ``Ensemble`` of ``Application`` instances

Ensembles can be launched sequentially or as a batch
if using a non-local launcher. e.g. slurm

Ensembles require one of the following combinations
of arguments:

- ``run_settings`` and ``params``
- ``run_settings`` and ``replicas``
- ``batch_settings``
- ``batch_settings``, ``run_settings``, and ``params``
- ``batch_settings``, ``run_settings``, and ``replicas``

If given solely batch settings, an empty ensemble
will be created that applications can be added to manually
through ``Ensemble.add_application()``.
The entire Ensemble will launch as one batch.

Provided batch and run settings, either ``params``
or ``replicas`` must be passed and the entire ensemble
will launch as a single batch.

Provided solely run settings, either ``params``
or ``replicas`` must be passed and the Ensemble members
will each launch sequentially.

The kwargs argument can be used to pass custom input
parameters to the permutation strategy.

:param name: name of the ``Ensemble``
:param params: parameters to expand into ``Application`` members
:param exe: executable to run
:param exe_args: executable arguments
:param batch_settings: describes settings for ``Ensemble`` as batch workload
:param run_settings: describes how each ``Application`` should be executed
:param replicas: number of replicas to create
:param perm_strategy: strategy for expanding ``params`` into
``Application`` instances from params argument
options are "all_perm", "step", "random"
or a callable function.
:raises SmartSimError: if initialization fails
:return: ``Ensemble`` instance
"""
if name is None:
raise AttributeError("Entity has no name. Please set name attribute.")
check_path = path or osp.join(self.exp_path, name)
entity_path: str = osp.abspath(check_path)

try:
new_ensemble = Ensemble(
name=name,
exe=exe,
exe_args=exe_args,
params=params or {},
path=entity_path,
batch_settings=batch_settings,
run_settings=run_settings,
perm_strat=perm_strategy,
replicas=replicas,
**kwargs,
)
return new_ensemble
except SmartSimError as e:
logger.error(e)
raise

@_contextualize
def create_application(
self,
name: str,
exe: str,
run_settings: base.RunSettings,
exe_args: t.Optional[t.List[str]] = None,
params: t.Optional[t.Dict[str, t.Any]] = None,
path: t.Optional[str] = None,
enable_key_prefixing: bool = False,
batch_settings: t.Optional[base.BatchSettings] = None,
) -> Application:
"""Create a general purpose ``Application``

The ``Application`` class is the most general encapsulation of
executable code in SmartSim. ``Application`` instances are named
references to pieces of a workflow that can be parameterized,
and executed.

``Application`` instances can be launched sequentially, as a batch job,
or as a group by adding them into an ``Ensemble``.

All ``Applications`` require a reference to run settings to specify which
executable to launch as well provide options for how to launch
the executable with the underlying WLM. Furthermore, batch a
reference to a batch settings can be added to launch the ``Application``
as a batch job through ``Experiment.start``. If a ``Application`` with
a reference to a set of batch settings is added to a larger
entity with its own set of batch settings (for e.g. an
``Ensemble``) the batch settings of the larger entity will take
precedence and the batch setting of the ``Application`` will be
strategically ignored.

Parameters supplied in the `params` argument can be written into
configuration files supplied at runtime to the ``Application`` through
``Application.attach_generator_files``. `params` can also be turned
into executable arguments by calling ``Application.params_to_args``

By default, ``Application`` instances will be executed in the
exp_path/application_name directory if no `path` argument is supplied.
If a ``Application`` instance is passed to ``Experiment.generate``,
a directory within the ``Experiment`` directory will be created
to house the input and output files from the ``Application``.

Example initialization of a ``Application`` instance

.. highlight:: python
.. code-block:: python

from smartsim import Experiment
run_settings = exp.create_run_settings("python", "run_pytorch_model.py")
application = exp.create_application("pytorch_model", run_settings)

# adding parameters to a application
run_settings = exp.create_run_settings("python", "run_pytorch_model.py")
train_params = {
"batch": 32,
"epoch": 10,
"lr": 0.001
}
application = exp.create_application("pytorch_model", run_settings, params=train_params)
application.attach_generator_files(to_configure="./train.cfg")
exp.generate(application)

New in 0.4.0, ``application`` instances can be colocated with an
Orchestrator database shard through ``application.colocate_db``. This
will launch a single ``Orchestrator`` instance on each compute
host used by the (possibly distributed) application. This is
useful for performant online inference or processing
at runtime.

New in 0.4.2, ``Application`` instances can now be colocated with
an Orchestrator database over either TCP or UDS using the
``pplication.colocate_db_tcp`` or ``Application.colocate_db_uds`` method
respectively. The original ``Application.colocate_db`` method is now
deprecated, but remains as an alias for ``Application.colocate_db_tcp``
for backward compatibility.

:param name: name of the ``Application``
:param exe: executable to run
:param exe_args: executable arguments
:param run_settings: defines how ``Application`` should be run
:param params: ``Application`` parameters for writing into configuration files
:param path: path to where the ``Application`` should be executed at runtime
:param enable_key_prefixing: If True, data sent to the ``Orchestrator``
using SmartRedis from this ``Application`` will
be prefixed with the ``Application`` name.
:param batch_settings: Settings to run ``Application`` individually as a batch job.
:raises SmartSimError: if initialization fails
:return: the created ``Application``
"""
if name is None:
raise AttributeError("Entity has no name. Please set name attribute.")
check_path = path or osp.join(self.exp_path, name)
entity_path: str = osp.abspath(check_path)
if params is None:
params = {}

try:
new_application = Application(
name=name,
exe=exe,
exe_args=exe_args,
params=params,
path=entity_path,
run_settings=run_settings,
batch_settings=batch_settings,
)
if enable_key_prefixing:
new_application.enable_key_prefixing()
return new_application
except SmartSimError as e:
logger.error(e)
raise

@_contextualize
def create_run_settings(
self,
run_command: str = "auto",
run_args: t.Optional[t.Dict[str, t.Union[int, str, float, None]]] = None,
env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None,
container: t.Optional[Container] = None,
**kwargs: t.Any,
) -> settings.RunSettings:
"""Create a ``RunSettings`` instance.

run_command="auto" will attempt to automatically
match a run command on the system with a ``RunSettings``
class in SmartSim. If found, the class corresponding
to that run_command will be created and returned.

If the local launcher is being used, auto detection will
be turned off.

If a recognized run command is passed, the ``RunSettings``
instance will be a child class such as ``SrunSettings``

If not supported by smartsim, the base ``RunSettings`` class
will be created and returned with the specified run_command and run_args
will be evaluated literally.

Run Commands with implemented helper classes:
- aprun (ALPS)
- srun (SLURM)
- mpirun (OpenMPI)
- jsrun (LSF)

:param run_command: command to run the executable
:param exe: executable to run
:param exe_args: arguments to pass to the executable
:param run_args: arguments to pass to the ``run_command``
:param env_vars: environment variables to pass to the executable
:param container: if execution environment is containerized
:return: the created ``RunSettings``
"""

try:
return settings.create_run_settings(
self._launcher,
run_command=run_command,
run_args=run_args,
env_vars=env_vars,
container=container,
**kwargs,
)
except SmartSimError as e:
logger.error(e)
raise

@_contextualize
def create_batch_settings(
self,
nodes: int = 1,
time: str = "",
queue: str = "",
account: str = "",
batch_args: t.Optional[t.Dict[str, str]] = None,
**kwargs: t.Any,
) -> base.BatchSettings:
"""Create a ``BatchSettings`` instance

Batch settings parameterize batch workloads. The result of this
function can be passed to the ``Ensemble`` initialization.

the `batch_args` parameter can be used to pass in a dictionary
of additional batch command arguments that aren't supported through
the smartsim interface


.. highlight:: python
.. code-block:: python

# i.e. for Slurm
batch_args = {
"distribution": "block"
"exclusive": None
}
bs = exp.create_batch_settings(nodes=3,
time="10:00:00",
batch_args=batch_args)
bs.set_account("default")

:param nodes: number of nodes for batch job
:param time: length of batch job
:param queue: queue or partition (if slurm)
:param account: user account name for batch system
:param batch_args: additional batch arguments
:return: a newly created BatchSettings instance
:raises SmartSimError: if batch creation fails
"""
try:
return settings.create_batch_settings(
self._launcher,
nodes=nodes,
time=time,
queue=queue,
account=account,
batch_args=batch_args,
**kwargs,
)
except SmartSimError as e:
logger.error(e)
raise

@_contextualize
def create_database(
self,
port: int = 6379,
path: t.Optional[str] = None,
db_nodes: int = 1,
batch: bool = False,
hosts: t.Optional[t.Union[t.List[str], str]] = None,
run_command: str = "auto",
interface: t.Union[str, t.List[str]] = "ipogif0",
account: t.Optional[str] = None,
time: t.Optional[str] = None,
queue: t.Optional[str] = None,
single_cmd: bool = True,
db_identifier: str = "orchestrator",
**kwargs: t.Any,
) -> Orchestrator:
"""Initialize an ``Orchestrator`` database

The ``Orchestrator`` database is a key-value store based
on Redis that can be launched together with other ``Experiment``
created instances for online data storage.

When launched, ``Orchestrator`` can be used to communicate
data between Fortran, Python, C, and C++ applications.

Machine Learning model in Pytorch, Tensorflow, and ONNX (i.e. scikit-learn)
can also be stored within the ``Orchestrator`` database where they
can be called remotely and executed on CPU or GPU where
the database is hosted.

To enable a SmartSim ``Application`` to communicate with the database
the workload must utilize the SmartRedis clients. For more
information on the database, and SmartRedis clients see the
documentation at https://www.craylabs.org/docs/smartredis.html

:param port: TCP/IP port
:param db_nodes: number of database shards
:param batch: run as a batch workload
:param hosts: specify hosts to launch on
:param run_command: specify launch binary or detect automatically
:param interface: Network interface
:param account: account to run batch on
:param time: walltime for batch 'HH:MM:SS' format
:param queue: queue to run the batch on
:param single_cmd: run all shards with one (MPMD) command
:param db_identifier: an identifier to distinguish this orchestrator in
multiple-database experiments
:raises SmartSimError: if detection of launcher or of run command fails
:raises SmartSimError: if user indicated an incompatible run command
for the launcher
:return: Orchestrator or derived class
"""

self._append_to_db_identifier_list(db_identifier)
check_path = path or osp.join(self.exp_path, db_identifier)
entity_path: str = osp.abspath(check_path)
return Orchestrator(
port=port,
path=entity_path,
db_nodes=db_nodes,
batch=batch,
hosts=hosts,
run_command=run_command,
interface=interface,
account=account,
time=time,
queue=queue,
single_cmd=single_cmd,
launcher=self._launcher,
db_identifier=db_identifier,
**kwargs,
)

@_contextualize
def reconnect_orchestrator(self, checkpoint: str) -> Orchestrator:
"""Reconnect to a running ``Orchestrator``
Expand Down