From e98e2fe52a8614b1473d8f19847036afd8309445 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Tue, 25 Jun 2024 12:21:53 -0500
Subject: [PATCH 01/40] Initial FLI-based implementation

---
 .../_core/launcher/dragon/dragonBackend.py    |  30 ++++-
 .../_core/mli/comm/channel/dragonchannel.py   |  12 +-
 smartsim/_core/mli/comm/channel/dragonfli.py  |  54 +++++++++
 .../infrastructure/control/workermanager.py   |  33 +++---
 .../_core/mli/infrastructure/worker/worker.py | 106 ++++++++++++++----
 smartsim/_core/mli/message_handler.py         |  10 +-
 6 files changed, 192 insertions(+), 53 deletions(-)
 create mode 100644 smartsim/_core/mli/comm/channel/dragonfli.py

diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py
index 2456606623..9ec4cc93e9 100644
--- a/smartsim/_core/launcher/dragon/dragonBackend.py
+++ b/smartsim/_core/launcher/dragon/dragonBackend.py
@@ -26,6 +26,7 @@
 import collections
 import functools
 import itertools
+import os
 import time
 import typing as t
 from dataclasses import dataclass, field
@@ -38,10 +39,13 @@
 # isort: off
 import dragon.infrastructure.connection as dragon_connection
 import dragon.infrastructure.policy as dragon_policy
+from dragon.infrastructure.process_desc import ProcessOptions
+from dragon.data.ddict.ddict import DDict
 import dragon.native.group_state as dragon_group_state
 import dragon.native.process as dragon_process
 import dragon.native.process_group as dragon_process_group
 import dragon.native.machine as dragon_machine
+import multiprocessing as mp
 
 # pylint: enable=import-error
 # isort: on
@@ -75,6 +79,9 @@ def __str__(self) -> str:
         return self.value
 
 
+mp.set_start_method("dragon")
+
+
 @dataclass
 class ProcessGroupInfo:
     status: SmartSimStatus
@@ -187,6 +194,7 @@ def __init__(self, pid: int) -> None:
 
         self._view = DragonBackendView(self)
         logger.debug(self._view.host_desc)
+        self._infra_ddict: t.Optional[DDict] = None
 
     @property
     def hosts(self) -> list[str]:
@@ -391,6 +399,20 @@ def _stop_steps(self) -> None:
                 self._group_infos[step_id].status = SmartSimStatus.STATUS_CANCELLED
                 self._group_infos[step_id].return_codes = [-9]
 
+    @property
+    def infra_ddict(self) -> str:
+        """Create a Dragon distributed dictionary and return its
+        serialized descriptor
+        """
+        if self._infra_ddict is None:
+            logger.info("Creating DDict")
+            self._infra_ddict = DDict()  # todo: parametrize
+            logger.info("Created DDict")
+            self._infra_ddict["creation"] = str(time.time())
+            logger.info(self._infra_ddict["creation"])
+
+        return self._infra_ddict.serialize()
+
     def _start_steps(self) -> None:
         self._heartbeat()
         with self._queue_lock:
@@ -406,6 +428,7 @@ def _start_steps(self) -> None:
                     placement=dragon_policy.Policy.Placement.HOST_NAME,
                     host_name=hosts[0],
                 )
+                options = ProcessOptions(make_inf_channels=True)
                 grp = dragon_process_group.ProcessGroup(
                     restart=False, pmi_enabled=request.pmi_enabled, policy=global_policy
                 )
@@ -421,10 +444,15 @@ def _start_steps(self) -> None:
                         target=request.exe,
                         args=request.exe_args,
                         cwd=request.path,
-                        env={**request.current_env, **request.env},
+                        env={
+                            **request.current_env,
+                            **request.env,
+                            "SS_DRG_DDICT": self.infra_ddict,
+                        },
                         stdout=dragon_process.Popen.PIPE,
                         stderr=dragon_process.Popen.PIPE,
                         policy=local_policy,
+                        options=options,
                     )
                     grp.add_process(nproc=request.tasks_per_node, template=tmp_proc)
 
diff --git a/smartsim/_core/mli/comm/channel/dragonchannel.py b/smartsim/_core/mli/comm/channel/dragonchannel.py
index 4fd26861ca..d4dbfa3ba0 100644
--- a/smartsim/_core/mli/comm/channel/dragonchannel.py
+++ b/smartsim/_core/mli/comm/channel/dragonchannel.py
@@ -24,16 +24,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import typing as t
 
 import smartsim._core.mli.comm.channel.channel as cch
 from smartsim.log import get_logger
 
 logger = get_logger(__name__)
 
-if t.TYPE_CHECKING:
-    import dragon.channels as dch
-    import dragon.utils as du
+import dragon.channels as dch
 
 
 class DragonCommChannel(cch.CommChannelBase):
@@ -42,11 +39,10 @@ class DragonCommChannel(cch.CommChannelBase):
     def __init__(self, key: bytes) -> None:
         """Initialize the DragonCommChannel instance"""
         super().__init__(key)
-        # todo: do we need memory pool information to construct the channel correctly?
-        self._channel: "dch.Channel" = du.get_channel(key)
+        self._channel: dch.Channel = dch.Channel.attach(key)
 
     def send(self, value: bytes) -> None:
         """Send a message throuh the underlying communication channel
         :param value: The value to send"""
-        logger.debug(f"Channel {self.descriptor.decode('utf-8')} sending message")
-        self._channel.send_bytes(value)
+        with self._channel.sendh(timeout=None) as sendh:
+            sendh.send_bytes(value)
diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
new file mode 100644
index 0000000000..f601bb2eb8
--- /dev/null
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -0,0 +1,54 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# isort: off
+import dragon
+from dragon import fli
+import dragon.channels as dch
+
+# isort: on
+
+
+import smartsim._core.mli.comm.channel.channel as cch
+from smartsim.log import get_logger
+
+logger = get_logger(__name__)
+
+
+class DragonFLIChannel(cch.CommChannelBase):
+    """Passes messages by writing to a Dragon FLI Channel"""
+
+    def __init__(self, fli_desc: bytes) -> None:
+        """Initialize the DragonFLIChannel instance"""
+        super().__init__(fli_desc)
+        # todo: do we need memory pool information to construct the channel correctly?
+        self._channel: "dch.Channel" = fli.FLInterface.attach(fli_desc)
+
+    def send(self, value: bytes) -> None:
+        """Send a message throuh the underlying communication channel
+        :param value: The value to send"""
+        with self._channel.sendh(timeout=None) as sendh:
+            sendh.send_bytes(value)
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index b3b79f7f30..588dc8e28d 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -24,14 +24,19 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import multiprocessing as mp
+# isort: off
+import dragon
+from dragon import fli
+
+# isort: on
+import time
 import typing as t
 
 import numpy as np
 
 from smartsim._core.entrypoints.service import Service
 from smartsim._core.mli.comm.channel.channel import CommChannelBase
-from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
+from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
 from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
 from smartsim._core.mli.infrastructure.worker.worker import (
     InferenceReply,
@@ -84,12 +89,6 @@ def deserialize_message(
         None  # these will really be tensors already
     )
 
-    # # client example
-    # msg = Message()
-    # t = torch.Tensor()
-    # msg.inputs = [custom_byte_converter(t)]
-    # mli_client.request_inference(msg)
-    # # end client
     input_meta: t.List[t.Any] = []
 
     if request.input.which() == "inputKeys":
@@ -163,12 +162,12 @@ class WorkerManager(Service):
 
     def __init__(
         self,
-        task_queue: "mp.Queue[bytes]",
+        file_like_interface: fli.FLInterface,
         worker: MachineLearningWorkerBase,
         feature_store: t.Optional[FeatureStore] = None,
         as_service: bool = False,
         cooldown: int = 0,
-        comm_channel_type: t.Type[CommChannelBase] = DragonCommChannel,
+        comm_channel_type: t.Type[CommChannelBase] = DragonFLIChannel,
     ) -> None:
         """Initialize the WorkerManager
         :param task_queue: The queue to monitor for new tasks
@@ -182,7 +181,7 @@ def __init__(
         super().__init__(as_service, cooldown)
 
         """a collection of workers the manager is controlling"""
-        self._task_queue: "mp.Queue[bytes]" = task_queue
+        self._task_queue: fli.FLInterface = file_like_interface
         """the queue the manager monitors for new tasks"""
         self._feature_store: t.Optional[FeatureStore] = feature_store
         """a feature store to retrieve models from"""
@@ -232,7 +231,12 @@ def _on_iteration(self) -> None:
             return
 
         # perform default deserialization of the message envelope
-        request_bytes: bytes = self._task_queue.get()
+        # perform default deserialization of the message envelope
+        with self._task_queue.recvh(timeout=None) as recvh:
+            try:
+                request_bytes, _ = recvh.recv_bytes(timeout=None)
+            except fli.FLIEOT as exc:
+                return
 
         request = deserialize_message(request_bytes, self._comm_channel_type)
         if not self._validate_request(request):
@@ -246,17 +250,12 @@ def _on_iteration(self) -> None:
         fetch_input_result = self._worker.fetch_inputs(request, self._feature_store)
         transformed_input = self._worker.transform_input(request, fetch_input_result)
 
-        # batch: t.Collection[_Datum] = transform_result.transformed_input
-        # if self._batch_size:
-        #     batch = self._worker.batch_requests(transform_result, self._batch_size)
-
         reply = InferenceReply()
 
         try:
             execute_result = self._worker.execute(
                 request, model_result, transformed_input
             )
-
             transformed_output = self._worker.transform_output(request, execute_result)
 
             if request.output_keys:
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index 99b51e178d..8992b2b6ea 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -24,12 +24,17 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import io
 import typing as t
 from abc import ABC, abstractmethod
 
+import numpy as np
+import torch
+
 import smartsim.error as sse
 from smartsim._core.mli.comm.channel.channel import CommChannelBase
 from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
+from smartsim._core.mli.mli_schemas.tensor import tensor_capnp
 from smartsim.log import get_logger
 
 logger = get_logger(__name__)
@@ -106,9 +111,10 @@ def __init__(self, result: t.Any) -> None:
 class FetchInputResult:
     """A wrapper around fetched inputs"""
 
-    def __init__(self, result: t.List[bytes]) -> None:
+    def __init__(self, result: t.List[bytes], meta: t.List[t.Any]) -> None:
         """Initialize the object"""
         self.inputs = result
+        self.meta = meta
 
 
 class TransformOutputResult:
@@ -122,7 +128,6 @@ def __init__(
         self.shape = shape
         self.order = order
         self.dtype = dtype
-        # todo: determine if each output must have an individual (shape, order, dtype)
 
 
 class CreateInputBatchResult:
@@ -152,8 +157,6 @@ def fetch_model(
         :param request: The request that triggered the pipeline
         :param feature_store: The feature store used for persistence
         :return: Raw bytes of the model"""
-        if not feature_store:
-            raise ValueError("Feature store is required for model retrieval")
 
         if request.raw_model:
             # Should we cache model in the feature store?
@@ -162,6 +165,9 @@ def fetch_model(
             # short-circuit and return the directly supplied model
             return FetchModelResult(request.raw_model)
 
+        if not feature_store:
+            raise ValueError("Feature store is required for model retrieval")
+
         if not request.model_key:
             raise sse.SmartSimError(
                 "Key must be provided to retrieve model from feature store"
@@ -185,8 +191,12 @@ def fetch_inputs(
         :param request: The request that triggered the pipeline
         :param feature_store: The feature store used for persistence
         :return: the fetched input"""
+
+        if request.raw_inputs:
+            return FetchInputResult(request.raw_inputs, request.input_meta)
+
         if not feature_store:
-            raise ValueError("Feature store is required for input retrieval")
+            raise ValueError("No input and no feature store provided")
 
         if request.input_keys:
             data: t.List[bytes] = []
@@ -201,9 +211,6 @@ def fetch_inputs(
                     ) from ex
             return FetchInputResult(data)
 
-        if request.raw_inputs:
-            return FetchInputResult(request.raw_inputs)
-
         raise ValueError("No input source")
 
     @staticmethod
@@ -250,14 +257,6 @@ class MachineLearningWorkerBase(MachineLearningWorkerCore, ABC):
     """Abstrct base class providing contract for a machine learning
     worker implementation."""
 
-    # @staticmethod
-    # @abstractmethod
-    # def deserialize(request: InferenceRequest) -> InferenceRequest:
-    #     """Given a collection of data serialized to bytes, convert the bytes
-    #     to a proper representation used by the ML backend
-    #     :param data_blob: inference request as a byte-serialized blob
-    #     :return: InferenceRequest deserialized from the input"""
-
     @staticmethod
     @abstractmethod
     def load_model(
@@ -303,11 +302,70 @@ def transform_output(
         :param execute_result: The result of inference wrapped in an ExecuteResult
         :return:"""
 
-    # @staticmethod
-    # @abstractmethod
-    # def serialize_reply(
-    #     request: InferenceRequest, results: OutputTransformResult
-    # ) -> bytes:
-    #     """Given an output, serialize to bytes for transport
-    #     :param reply: The result of the inference pipeline
-    #     :return: a byte-serialized version of the reply"""
+
+class TorchWorker(MachineLearningWorkerBase):
+    """A worker that executes a PyTorch model."""
+
+    @staticmethod
+    def load_model(
+        request: InferenceRequest, fetch_result: FetchModelResult
+    ) -> LoadModelResult:
+        model_bytes = fetch_result.model_bytes or request.raw_model
+        if not model_bytes:
+            raise ValueError("Unable to load model without reference object")
+
+        _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
+        device = _device_to_torch[str(request.device)]
+        model: torch.nn.Module = torch.jit.load(io.BytesIO(model_bytes), map_location=device)  # type: ignore[no-untyped-call]
+        result = LoadModelResult(model)
+        return result
+
+    @staticmethod
+    def transform_input(
+        request: InferenceRequest, fetch_result: FetchInputResult
+    ) -> TransformInputResult:
+        result = []
+
+        _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
+        device = _device_to_torch[str(request.device)]
+        for item, item_meta in zip(fetch_result.inputs, fetch_result.meta):
+            td: tensor_capnp.TensorDescriptor = item_meta
+            result.append(
+                torch.tensor(
+                    np.frombuffer(item, dtype=str(td.dataType)).reshape(td.dimensions)
+                ).to(device)
+            )
+        return TransformInputResult(result)
+        # return data # note: this fails copy test!
+
+    @staticmethod
+    def execute(
+        request: InferenceRequest,
+        load_result: LoadModelResult,
+        transform_result: TransformInputResult,
+    ) -> ExecuteResult:
+        if not load_result.model:
+            raise sse.SmartSimError("Model must be loaded to execute")
+
+        model: torch.nn.Module = load_result.model
+        model.eval()
+        results = [model(tensor).detach() for tensor in transform_result.transformed]
+
+        execute_result = ExecuteResult(results)
+        return execute_result
+
+    @staticmethod
+    def transform_output(
+        request: InferenceRequest,
+        execute_result: ExecuteResult,
+    ) -> TransformOutputResult:
+        if str(request.device) != "cpu":
+            transformed = [
+                item.to("cpu").clone() for item in execute_result.predictions
+            ]
+            # todo: need the shape from latest schemas added here.
+            return TransformOutputResult(transformed, None, "c", "float32")  # fixme
+        else:
+            return TransformOutputResult(
+                execute_result.predictions, None, "c", "float32"
+            )  # fixme
diff --git a/smartsim/_core/mli/message_handler.py b/smartsim/_core/mli/message_handler.py
index 733fa83d98..4a5725bd9e 100644
--- a/smartsim/_core/mli/message_handler.py
+++ b/smartsim/_core/mli/message_handler.py
@@ -391,7 +391,9 @@ def deserialize_request(request_bytes: t.ByteString) -> request_capnp.Request:
 
         :param request_bytes: Bytes to be deserialized into a Request
         """
-        bytes_message = request_capnp.Request.from_bytes(request_bytes)
+        bytes_message = request_capnp.Request.from_bytes(
+            request_bytes, traversal_limit_in_words=2**63
+        )
 
         with bytes_message as message:
             return message
@@ -484,7 +486,7 @@ def _assign_custom_response_attributes(
                     response.customAttributes.tf = custom_attrs  # type: ignore
                 else:
                     raise ValueError("""Invalid custom attribute class name.
-                        Expected 'TensorFlowResponseAttributes' or 
+                        Expected 'TensorFlowResponseAttributes' or
                         'TorchResponseAttributes'.""")
         except Exception as e:
             raise ValueError("Error assigning custom attributes to response.") from e
@@ -529,7 +531,9 @@ def deserialize_response(response_bytes: t.ByteString) -> response_capnp.Respons
         """
         Deserializes a serialized response message.
         """
-        bytes_message = response_capnp.Response.from_bytes(response_bytes)
+        bytes_message = response_capnp.Response.from_bytes(
+            response_bytes, traversal_limit_in_words=2**63
+        )
 
         with bytes_message as message:
             return message

From 043f0e74e68ad07846ffce9a0013eb6cf1919c09 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Tue, 25 Jun 2024 13:42:44 -0500
Subject: [PATCH 02/40] Add inference example stub

---
 .../high_throughput_inference/mli_driver.py   |  35 +++++
 .../high_throughput_inference/mock_app.py     | 129 ++++++++++++++++++
 .../standalone_workermanager.py               |  46 +++++++
 3 files changed, 210 insertions(+)
 create mode 100644 examples/high_throughput_inference/mli_driver.py
 create mode 100644 examples/high_throughput_inference/mock_app.py
 create mode 100644 examples/high_throughput_inference/standalone_workermanager.py

diff --git a/examples/high_throughput_inference/mli_driver.py b/examples/high_throughput_inference/mli_driver.py
new file mode 100644
index 0000000000..187a7b8214
--- /dev/null
+++ b/examples/high_throughput_inference/mli_driver.py
@@ -0,0 +1,35 @@
+import sys
+from smartsim import Experiment
+from smartsim.status import TERMINAL_STATUSES
+import time
+
+worker_manager_script_name = "standalone_workermanager.py"
+app_script_name = "mock_app.py"
+device = "cpu"
+
+
+exp = Experiment("MLI_proto", launcher="dragon")
+
+worker_manager_rs = exp.create_run_settings(sys.executable, [worker_manager_script_name])
+worker_manager = exp.create_model("worker_manager", run_settings=worker_manager_rs)
+worker_manager.attach_generator_files(to_copy=[worker_manager_script_name])
+
+
+app_rs = exp.create_run_settings(sys.executable, exe_args = [app_script_name, "--device", device])
+app = exp.create_model("app", run_settings=app_rs)
+app.attach_generator_files(to_copy=[app_script_name], to_symlink=[f"resnet50.{device.upper()}.pt"])
+
+
+exp.generate(worker_manager, app, overwrite=True)
+exp.start(worker_manager, app, block=False)
+
+while True:
+    if exp.get_status(app)[0] in TERMINAL_STATUSES:
+        exp.stop(worker_manager)
+        break
+    if exp.get_status(worker_manager)[0] in TERMINAL_STATUSES:
+        exp.stop(app)
+        break
+    time.sleep(5)
+
+print("Exiting.")
\ No newline at end of file
diff --git a/examples/high_throughput_inference/mock_app.py b/examples/high_throughput_inference/mock_app.py
new file mode 100644
index 0000000000..d6f8253b70
--- /dev/null
+++ b/examples/high_throughput_inference/mock_app.py
@@ -0,0 +1,129 @@
+# isort: off
+import dragon
+from dragon import fli
+from dragon.channels import Channel
+import dragon.channels
+from dragon.data.ddict.ddict import DDict
+from dragon.globalservices.api_setup import connect_to_infrastructure
+from dragon.utils import b64decode, b64encode
+
+# isort: on
+
+import argparse
+import io
+import numpy
+import os
+import tabulate
+import time
+import torch
+import typing as t
+
+from smartsim._core.mli.message_handler import MessageHandler
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser("Mock application")
+    parser.add_argument("--device", default="cpu")
+
+    args = parser.parse_args()
+
+    connect_to_infrastructure()
+    ddict_str = os.environ["SS_DRG_DDICT"]
+
+    ddict = DDict.attach(ddict_str)
+
+    to_worker_fli_str = None
+
+    while to_worker_fli_str is None:
+        try:
+            to_worker_fli_str = ddict["to_worker_fli"]
+        except Exception as e:
+            time.sleep(1)
+
+    to_worker_fli = fli.FLInterface.attach(b64decode(to_worker_fli_str))
+
+    batch_size = 32
+    model = torch.jit.load(f"resnet50.{args.device.upper()}.pt")
+    buffer = io.BytesIO()
+    batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
+    scripted = torch.jit.trace(model, batch)
+    torch.jit.save(scripted, buffer)
+
+    total_iterations = 10
+
+    headers=[
+                "batch_size",
+                "build_tensor",
+                "build_request",
+                "serialize_request",
+                "send",
+                "receive",
+                "deserialize_response",
+                "deserialize_tensor",
+            ]
+
+    print(",".join(headers))
+
+    for batch_size in [1, 8, 32, 64, 128]:
+
+        timings = []
+        for iteration_number in range(total_iterations + int(batch_size==1)):
+
+            timings.append([batch_size])
+
+            batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
+
+            expected_device: t.Literal["cpu", "gpu"] = args.device.lower()
+
+            start = time.perf_counter()
+            interm = start
+            built_tensor = MessageHandler.build_tensor(
+                batch.numpy(), "c", "float32", list(batch.shape)
+            )
+            timings[-1].append(time.perf_counter() - interm)
+            interm = time.perf_counter()
+
+            from_worker_ch = Channel.make_process_local()
+
+            request = MessageHandler.build_request(
+                reply_channel=from_worker_ch.serialize(),
+                model=buffer.getvalue(),
+                device=expected_device,
+                inputs=[built_tensor],
+                outputs=[],
+                output_descriptors=[],
+                custom_attributes=None,
+            )
+
+            timings[-1].append(time.perf_counter() - interm)
+            interm = time.perf_counter()
+            request_bytes = MessageHandler.serialize_request(request)
+            timings[-1].append(time.perf_counter() - interm)
+            interm = time.perf_counter()
+            with to_worker_fli.sendh(timeout=None) as to_sendh:
+                to_sendh.send_bytes(request_bytes)
+                timings[-1].append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+
+            with from_worker_ch.recvh(timeout=None) as from_recvh:
+                resp = from_recvh.recv_bytes(timeout=None)
+                timings[-1].append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+                response = MessageHandler.deserialize_response(resp)
+                timings[-1].append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+                result = torch.from_numpy(
+                    numpy.frombuffer(
+                        response.result.data[0].blob,
+                        dtype=str(response.result.data[0].tensorDescriptor.dataType),
+                    )
+                )
+
+                timings[-1].append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+
+            # duration = time.perf_counter() - start
+            # print(f"{duration:.3f} s")
+
+            print(",".join(str(timing) for timing in timings[-1]))
diff --git a/examples/high_throughput_inference/standalone_workermanager.py b/examples/high_throughput_inference/standalone_workermanager.py
new file mode 100644
index 0000000000..7ddeff0094
--- /dev/null
+++ b/examples/high_throughput_inference/standalone_workermanager.py
@@ -0,0 +1,46 @@
+# isort: off
+import dragon
+from dragon import fli
+from dragon.channels import Channel
+from dragon.data.ddict.ddict import DDict
+from dragon.utils import b64decode, b64encode
+from dragon.globalservices.api_setup import connect_to_infrastructure
+# isort: on
+import logging
+import multiprocessing as mp
+import os
+import pathlib
+import shutil
+import time
+
+
+from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
+from smartsim._core.mli.infrastructure.worker.worker import TorchWorker
+from smartsim._core.mli.infrastructure.control.workermanager import (
+    DragonCommChannel,
+    WorkerManager,
+)
+
+if __name__ == "__main__":
+    connect_to_infrastructure()
+    mp.set_start_method("dragon")
+    ddict_str = os.environ["SS_DRG_DDICT"]
+    ddict = DDict.attach(ddict_str)
+
+    to_worker_channel = Channel.make_process_local()
+    to_worker_manager_channel = Channel.make_process_local()
+    channels = [Channel.make_process_local() for _ in range(100)]
+    to_worker_fli = fli.FLInterface(main_ch=to_worker_channel, manager_ch=to_worker_manager_channel, stream_channels=channels)
+    ddict["to_worker_fli"] = b64encode(to_worker_fli.serialize())
+
+    torch_worker = TorchWorker()
+
+    worker_manager = WorkerManager(
+        file_like_interface=to_worker_fli,
+        worker=torch_worker,
+        feature_store=None,
+        as_service=True,
+        cooldown=10,
+        comm_channel_type=DragonCommChannel,
+    )
+    worker_manager.execute()

From efc9e839d2c317a49662776b710993e43c88f75c Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Tue, 25 Jun 2024 17:09:50 -0500
Subject: [PATCH 03/40] Lint, style, black magic

---
 .../high_throughput_inference/mli_driver.py   |  2 +-
 .../standalone_workermanager.py               |  3 +-
 .../_core/launcher/dragon/dragonBackend.py    |  3 +-
 .../_core/mli/infrastructure/worker/worker.py | 30 +++++++++++--------
 4 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/examples/high_throughput_inference/mli_driver.py b/examples/high_throughput_inference/mli_driver.py
index 187a7b8214..833766cbef 100644
--- a/examples/high_throughput_inference/mli_driver.py
+++ b/examples/high_throughput_inference/mli_driver.py
@@ -5,7 +5,7 @@
 
 worker_manager_script_name = "standalone_workermanager.py"
 app_script_name = "mock_app.py"
-device = "cpu"
+device = "gpu"
 
 
 exp = Experiment("MLI_proto", launcher="dragon")
diff --git a/examples/high_throughput_inference/standalone_workermanager.py b/examples/high_throughput_inference/standalone_workermanager.py
index 7ddeff0094..bb93c613ce 100644
--- a/examples/high_throughput_inference/standalone_workermanager.py
+++ b/examples/high_throughput_inference/standalone_workermanager.py
@@ -14,10 +14,9 @@
 import time
 
 
-from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
+from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
 from smartsim._core.mli.infrastructure.worker.worker import TorchWorker
 from smartsim._core.mli.infrastructure.control.workermanager import (
-    DragonCommChannel,
     WorkerManager,
 )
 
diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py
index 9ec4cc93e9..d103579115 100644
--- a/smartsim/_core/launcher/dragon/dragonBackend.py
+++ b/smartsim/_core/launcher/dragon/dragonBackend.py
@@ -26,7 +26,6 @@
 import collections
 import functools
 import itertools
-import os
 import time
 import typing as t
 from dataclasses import dataclass, field
@@ -411,7 +410,7 @@ def infra_ddict(self) -> str:
             self._infra_ddict["creation"] = str(time.time())
             logger.info(self._infra_ddict["creation"])
 
-        return self._infra_ddict.serialize()
+        return str(self._infra_ddict.serialize())
 
     def _start_steps(self) -> None:
         self._heartbeat()
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index 8992b2b6ea..295b2573c8 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -111,7 +111,7 @@ def __init__(self, result: t.Any) -> None:
 class FetchInputResult:
     """A wrapper around fetched inputs"""
 
-    def __init__(self, result: t.List[bytes], meta: t.List[t.Any]) -> None:
+    def __init__(self, result: t.List[bytes], meta: t.Optional[t.List[t.Any]]) -> None:
         """Initialize the object"""
         self.inputs = result
         self.meta = meta
@@ -121,7 +121,7 @@ class TransformOutputResult:
     """A wrapper around inference results transformed for transmission"""
 
     def __init__(
-        self, result: t.Any, shape: t.List[int], order: str, dtype: str
+        self, result: t.Any, shape: t.Optional[t.List[int]], order: str, dtype: str
     ) -> None:
         """Initialize the OutputTransformResult"""
         self.outputs = result
@@ -209,7 +209,9 @@ def fetch_inputs(
                     raise sse.SmartSimError(
                         f"Model could not be retrieved with key {input_}"
                     ) from ex
-            return FetchInputResult(data)
+            return FetchInputResult(
+                data, None
+            )  # fixme: need to get both tensor and descriptor
 
         raise ValueError("No input source")
 
@@ -316,7 +318,9 @@ def load_model(
 
         _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
         device = _device_to_torch[str(request.device)]
-        model: torch.nn.Module = torch.jit.load(io.BytesIO(model_bytes), map_location=device)  # type: ignore[no-untyped-call]
+        buffer = io.BytesIO(model_bytes)
+        # type: ignore-next[no-untyped-call]
+        model = torch.jit.load(buffer, map_location=device)
         result = LoadModelResult(model)
         return result
 
@@ -328,12 +332,14 @@ def transform_input(
 
         _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
         device = _device_to_torch[str(request.device)]
+        if fetch_result.meta is None:
+            raise ValueError("Cannot reconstruct tensor without meta information")
         for item, item_meta in zip(fetch_result.inputs, fetch_result.meta):
-            td: tensor_capnp.TensorDescriptor = item_meta
+            tensor_desc: tensor_capnp.TensorDescriptor = item_meta
             result.append(
-                torch.tensor(
-                    np.frombuffer(item, dtype=str(td.dataType)).reshape(td.dimensions)
-                ).to(device)
+                torch.tensor(np.frombuffer(item, dtype=str(tensor_desc.dataType)))
+                .to(device)
+                .reshape(tuple(dim for dim in tensor_desc.dimensions))
             )
         return TransformInputResult(result)
         # return data # note: this fails copy test!
@@ -365,7 +371,7 @@ def transform_output(
             ]
             # todo: need the shape from latest schemas added here.
             return TransformOutputResult(transformed, None, "c", "float32")  # fixme
-        else:
-            return TransformOutputResult(
-                execute_result.predictions, None, "c", "float32"
-            )  # fixme
+
+        return TransformOutputResult(
+            execute_result.predictions, None, "c", "float32"
+        )  # fixme

From ed3c42a10b812963e2de28c6e89918dfe0efbc07 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Tue, 25 Jun 2024 18:07:56 -0500
Subject: [PATCH 04/40] Bring up to feature branch

---
 .../infrastructure/control/workermanager.py   | 24 +++++++++++++++----
 .../_core/mli/infrastructure/worker/worker.py | 24 ++++++++++---------
 2 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 67b1627bb5..f46ced8756 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -54,7 +54,9 @@
 
 
 def deserialize_message(
-    data_blob: bytes, channel_type: t.Type[CommChannelBase]
+    data_blob: bytes,
+    channel_type: t.Type[CommChannelBase],
+    device: t.Literal["cpu", "gpu"],
 ) -> InferenceRequest:
     """Deserialize a message from a byte stream into an InferenceRequest
     :param data_blob: The byte stream to deserialize"""
@@ -166,6 +168,7 @@ def __init__(
         as_service: bool = False,
         cooldown: int = 0,
         comm_channel_type: t.Type[CommChannelBase] = DragonFLIChannel,
+        device: t.Literal["cpu", "gpu"] = "cpu",
     ) -> None:
         """Initialize the WorkerManager
         :param task_queue: The queue to monitor for new tasks
@@ -187,6 +190,8 @@ def __init__(
         """The ML Worker implementation"""
         self._comm_channel_type = comm_channel_type
         """The type of communication channel to construct for callbacks"""
+        self._device = device
+        """Device on which workers need to run"""
 
     def _validate_request(self, request: InferenceRequest) -> bool:
         """Ensure the request can be processed.
@@ -236,17 +241,24 @@ def _on_iteration(self) -> None:
             except fli.FLIEOT as exc:
                 return
 
-        request = deserialize_message(request_bytes, self._comm_channel_type)
+        request = deserialize_message(
+            request_bytes, self._comm_channel_type, self._device
+        )
         if not self._validate_request(request):
             return
 
+
         # # let the worker perform additional custom deserialization
         # request = self._worker.deserialize(request_bytes)
 
         fetch_model_result = self._worker.fetch_model(request, self._feature_store)
-        model_result = self._worker.load_model(request, fetch_model_result)
+        model_result = self._worker.load_model(
+            request, fetch_model_result, self._device
+        )
         fetch_input_result = self._worker.fetch_inputs(request, self._feature_store)
-        transformed_input = self._worker.transform_input(request, fetch_input_result)
+        transformed_input = self._worker.transform_input(
+            request, fetch_input_result, self._device
+        )
 
         reply = InferenceReply()
 
@@ -254,7 +266,9 @@ def _on_iteration(self) -> None:
             execute_result = self._worker.execute(
                 request, model_result, transformed_input
             )
-            transformed_output = self._worker.transform_output(request, execute_result)
+            transformed_output = self._worker.transform_output(
+                request, execute_result, self._device
+            )
 
             if request.output_keys:
                 reply.output_keys = self._worker.place_output(
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index 9b813a9e9b..08c4997554 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -260,21 +260,23 @@ class MachineLearningWorkerBase(MachineLearningWorkerCore, ABC):
     @staticmethod
     @abstractmethod
     def load_model(
-        request: InferenceRequest, fetch_result: FetchModelResult
+        request: InferenceRequest, fetch_result: FetchModelResult, device: str
     ) -> LoadModelResult:
         """Given a loaded MachineLearningModel, ensure it is loaded into
         device memory
         :param request: The request that triggered the pipeline
+        :param device: The device on which the model must be placed
         :return: ModelLoadResult wrapping the model loaded for the request"""
 
     @staticmethod
     @abstractmethod
     def transform_input(
-        request: InferenceRequest, fetch_result: FetchInputResult
+        request: InferenceRequest, fetch_result: FetchInputResult, device: str
     ) -> TransformInputResult:
         """Given a collection of data, perform a transformation on the data
         :param request: The request that triggered the pipeline
         :param fetch_result: Raw output from fetching inputs out of a feature store
+        :param device: The device on which the transformed input must be placed
         :return: The transformed inputs wrapped in a InputTransformResult"""
 
     @staticmethod
@@ -293,13 +295,13 @@ def execute(
     @staticmethod
     @abstractmethod
     def transform_output(
-        request: InferenceRequest,
-        execute_result: ExecuteResult,
+        request: InferenceRequest, execute_result: ExecuteResult, result_device: str
     ) -> TransformOutputResult:
         """Given inference results, perform transformations required to
         transmit results to the requestor.
         :param request: The request that triggered the pipeline
         :param execute_result: The result of inference wrapped in an ExecuteResult
+        :param result_device: The device on which the result of inference is placed
         :return:"""
 
 
@@ -308,28 +310,27 @@ class TorchWorker(MachineLearningWorkerBase):
 
     @staticmethod
     def load_model(
-        request: InferenceRequest, fetch_result: FetchModelResult
+        request: InferenceRequest, fetch_result: FetchModelResult, device: str
     ) -> LoadModelResult:
         model_bytes = fetch_result.model_bytes or request.raw_model
         if not model_bytes:
             raise ValueError("Unable to load model without reference object")
 
         _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
-        device = _device_to_torch[str(request.device)]
+        device = _device_to_torch[device]
         buffer = io.BytesIO(model_bytes)
-        # type: ignore-next[no-untyped-call]
-        model = torch.jit.load(buffer, map_location=device)
+        model = torch.jit.load(buffer, map_location=device)  # type: ignore
         result = LoadModelResult(model)
         return result
 
     @staticmethod
     def transform_input(
-        request: InferenceRequest, fetch_result: FetchInputResult
+        request: InferenceRequest, fetch_result: FetchInputResult, device: str
     ) -> TransformInputResult:
         result = []
 
         _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
-        device = _device_to_torch[str(request.device)]
+        device = _device_to_torch[device]
         if fetch_result.meta is None:
             raise ValueError("Cannot reconstruct tensor without meta information")
         for item, item_meta in zip(fetch_result.inputs, fetch_result.meta):
@@ -362,8 +363,9 @@ def execute(
     def transform_output(
         request: InferenceRequest,
         execute_result: ExecuteResult,
+        result_device: str,
     ) -> TransformOutputResult:
-        if str(request.device) != "cpu":
+        if result_device != "cpu":
             transformed = [
                 item.to("cpu").clone() for item in execute_result.predictions
             ]

From e5be26bdcd8d55e6b3b9669fa9bd5492ffd89390 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Tue, 25 Jun 2024 18:08:14 -0500
Subject: [PATCH 05/40] Update example

---
 examples/high_throughput_inference/mli_driver.py    | 13 ++++++++-----
 examples/high_throughput_inference/mock_app.py      |  3 ---
 .../standalone_workermanager.py                     | 11 +++++------
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/examples/high_throughput_inference/mli_driver.py b/examples/high_throughput_inference/mli_driver.py
index 833766cbef..d32d88e51b 100644
--- a/examples/high_throughput_inference/mli_driver.py
+++ b/examples/high_throughput_inference/mli_driver.py
@@ -1,23 +1,26 @@
+import os
 import sys
 from smartsim import Experiment
 from smartsim.status import TERMINAL_STATUSES
 import time
 
-worker_manager_script_name = "standalone_workermanager.py"
-app_script_name = "mock_app.py"
 device = "gpu"
+filedir = os.path.dirname(__file__)
+worker_manager_script_name = os.path.join(filedir, "standalone_workermanager.py")
+app_script_name = os.path.join(filedir, "mock_app.py")
+model_name = os.path.join(filedir, f"resnet50.{device.upper()}.pt")
 
 
-exp = Experiment("MLI_proto", launcher="dragon")
+exp = Experiment("MLI_proto", launcher="dragon", exp_path=os.path.join(filedir, "MLI_proto"))
 
-worker_manager_rs = exp.create_run_settings(sys.executable, [worker_manager_script_name])
+worker_manager_rs = exp.create_run_settings(sys.executable, [worker_manager_script_name, "--device", device])
 worker_manager = exp.create_model("worker_manager", run_settings=worker_manager_rs)
 worker_manager.attach_generator_files(to_copy=[worker_manager_script_name])
 
 
 app_rs = exp.create_run_settings(sys.executable, exe_args = [app_script_name, "--device", device])
 app = exp.create_model("app", run_settings=app_rs)
-app.attach_generator_files(to_copy=[app_script_name], to_symlink=[f"resnet50.{device.upper()}.pt"])
+app.attach_generator_files(to_copy=[app_script_name], to_symlink=[model_name])
 
 
 exp.generate(worker_manager, app, overwrite=True)
diff --git a/examples/high_throughput_inference/mock_app.py b/examples/high_throughput_inference/mock_app.py
index d6f8253b70..afc0c836b8 100644
--- a/examples/high_throughput_inference/mock_app.py
+++ b/examples/high_throughput_inference/mock_app.py
@@ -74,8 +74,6 @@
 
             batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
 
-            expected_device: t.Literal["cpu", "gpu"] = args.device.lower()
-
             start = time.perf_counter()
             interm = start
             built_tensor = MessageHandler.build_tensor(
@@ -89,7 +87,6 @@
             request = MessageHandler.build_request(
                 reply_channel=from_worker_ch.serialize(),
                 model=buffer.getvalue(),
-                device=expected_device,
                 inputs=[built_tensor],
                 outputs=[],
                 output_descriptors=[],
diff --git a/examples/high_throughput_inference/standalone_workermanager.py b/examples/high_throughput_inference/standalone_workermanager.py
index bb93c613ce..32d534f360 100644
--- a/examples/high_throughput_inference/standalone_workermanager.py
+++ b/examples/high_throughput_inference/standalone_workermanager.py
@@ -6,12 +6,8 @@
 from dragon.utils import b64decode, b64encode
 from dragon.globalservices.api_setup import connect_to_infrastructure
 # isort: on
-import logging
-import multiprocessing as mp
+import argparse
 import os
-import pathlib
-import shutil
-import time
 
 
 from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
@@ -21,8 +17,10 @@
 )
 
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser("Worker Manager")
+    parser.add_argument("--device", default="gpu")
+    args = parser.parse_args()
     connect_to_infrastructure()
-    mp.set_start_method("dragon")
     ddict_str = os.environ["SS_DRG_DDICT"]
     ddict = DDict.attach(ddict_str)
 
@@ -41,5 +39,6 @@
         as_service=True,
         cooldown=10,
         comm_channel_type=DragonCommChannel,
+        device = args.device,
     )
     worker_manager.execute()

From a23010fb9726e4c18997bee279a0553bbaa473f0 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Tue, 25 Jun 2024 18:17:30 -0500
Subject: [PATCH 06/40] Change the changelog

---
 doc/changelog.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/changelog.md b/doc/changelog.md
index e86c93de66..d146d1973a 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -17,7 +17,7 @@ Description
 - Added schemas and MessageHandler class for de/serialization of
   inference requests and response messages
 - Removed device from schemas, MessageHandler and tests
-
+- Add TorchWorker first implementation and mock inference app example
 
 ### Development branch
 

From 3c20f464d512c7b3a1ead1981efb96842e7a14bb Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Tue, 25 Jun 2024 18:38:12 -0500
Subject: [PATCH 07/40] Make style

---
 smartsim/_core/mli/infrastructure/control/workermanager.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index f46ced8756..7a5f168fe4 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -247,7 +247,6 @@ def _on_iteration(self) -> None:
         if not self._validate_request(request):
             return
 
-
         # # let the worker perform additional custom deserialization
         # request = self._worker.deserialize(request_bytes)
 

From b9ed5ba8baa9fc355640f8c2461a0ce7d16cf56b Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 09:51:07 -0500
Subject: [PATCH 08/40] Attempt to mitigate import dragon error

---
 .../_core/mli/infrastructure/control/workermanager.py  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 7a5f168fe4..607f94982d 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -24,9 +24,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import sys
+
 # isort: off
-import dragon
-from dragon import fli
+try:
+    import dragon
+    from dragon import fli
+except ImportError as exc:
+    if not "pytest" in sys.modules:
+        raise exc from None
 
 # isort: on
 import time

From 0de06f3b6c0fa4747b471989a8068e4e609829a0 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 10:20:27 -0500
Subject: [PATCH 09/40] Import dragon optional

---
 smartsim/_core/mli/comm/channel/dragonchannel.py     |  9 ++++++---
 smartsim/_core/mli/comm/channel/dragonfli.py         | 12 ++++++++----
 .../mli/infrastructure/control/workermanager.py      |  2 +-
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/smartsim/_core/mli/comm/channel/dragonchannel.py b/smartsim/_core/mli/comm/channel/dragonchannel.py
index d4dbfa3ba0..e79fd2dfcf 100644
--- a/smartsim/_core/mli/comm/channel/dragonchannel.py
+++ b/smartsim/_core/mli/comm/channel/dragonchannel.py
@@ -24,14 +24,17 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
+import sys
 import smartsim._core.mli.comm.channel.channel as cch
 from smartsim.log import get_logger
 
 logger = get_logger(__name__)
 
-import dragon.channels as dch
-
+try:
+    import dragon.channels as dch
+except ImportError as exc:
+    if not "pytest" in sys.modules:
+        raise exc from None
 
 class DragonCommChannel(cch.CommChannelBase):
     """Passes messages by writing to a Dragon channel"""
diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
index f601bb2eb8..3992241380 100644
--- a/smartsim/_core/mli/comm/channel/dragonfli.py
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -24,11 +24,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# isort: off
-import dragon
-from dragon import fli
-import dragon.channels as dch
+import sys
 
+# isort: off
+try:
+    from dragon import fli
+    import dragon.channels as dch
+except ImportError as exc:
+    if not "pytest" in sys.modules:
+        raise exc from None
 # isort: on
 
 
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 607f94982d..6003869e46 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -168,7 +168,7 @@ class WorkerManager(Service):
 
     def __init__(
         self,
-        file_like_interface: fli.FLInterface,
+        file_like_interface: "fli.FLInterface",
         worker: MachineLearningWorkerBase,
         feature_store: t.Optional[FeatureStore] = None,
         as_service: bool = False,

From d051385a963f2c18e55792b30316cd41eb2ca357 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 10:28:23 -0500
Subject: [PATCH 10/40] isort

---
 smartsim/_core/mli/comm/channel/dragonchannel.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/smartsim/_core/mli/comm/channel/dragonchannel.py b/smartsim/_core/mli/comm/channel/dragonchannel.py
index e79fd2dfcf..872eb32350 100644
--- a/smartsim/_core/mli/comm/channel/dragonchannel.py
+++ b/smartsim/_core/mli/comm/channel/dragonchannel.py
@@ -25,6 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import sys
+
 import smartsim._core.mli.comm.channel.channel as cch
 from smartsim.log import get_logger
 
@@ -36,6 +37,7 @@
     if not "pytest" in sys.modules:
         raise exc from None
 
+
 class DragonCommChannel(cch.CommChannelBase):
     """Passes messages by writing to a Dragon channel"""
 

From e77b1cd5c9c8359aa7be27b2a3d61c398eaa7d04 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 11:33:47 -0500
Subject: [PATCH 11/40] Fix imports in dragon backend tests

---
 smartsim/_core/launcher/dragon/dragonBackend.py | 10 ++++------
 tests/test_dragon_backend.py                    | 10 ++++++++++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py
index d103579115..f0e450a19c 100644
--- a/smartsim/_core/launcher/dragon/dragonBackend.py
+++ b/smartsim/_core/launcher/dragon/dragonBackend.py
@@ -36,15 +36,14 @@
 
 # pylint: disable=import-error
 # isort: off
+import dragon.data.ddict.ddict as dragon_ddict
 import dragon.infrastructure.connection as dragon_connection
 import dragon.infrastructure.policy as dragon_policy
-from dragon.infrastructure.process_desc import ProcessOptions
-from dragon.data.ddict.ddict import DDict
+import dragon.infrastructure.process_desc as dragon_process_desc
 import dragon.native.group_state as dragon_group_state
 import dragon.native.process as dragon_process
 import dragon.native.process_group as dragon_process_group
 import dragon.native.machine as dragon_machine
-import multiprocessing as mp
 
 # pylint: enable=import-error
 # isort: on
@@ -78,7 +77,6 @@ def __str__(self) -> str:
         return self.value
 
 
-mp.set_start_method("dragon")
 
 
 @dataclass
@@ -405,7 +403,7 @@ def infra_ddict(self) -> str:
         """
         if self._infra_ddict is None:
             logger.info("Creating DDict")
-            self._infra_ddict = DDict()  # todo: parametrize
+            self._infra_ddict = dragon_ddict.DDict()  # todo: parametrize
             logger.info("Created DDict")
             self._infra_ddict["creation"] = str(time.time())
             logger.info(self._infra_ddict["creation"])
@@ -427,7 +425,7 @@ def _start_steps(self) -> None:
                     placement=dragon_policy.Policy.Placement.HOST_NAME,
                     host_name=hosts[0],
                 )
-                options = ProcessOptions(make_inf_channels=True)
+                options = dragon_process_desc.ProcessOptions(make_inf_channels=True)
                 grp = dragon_process_group.ProcessGroup(
                     restart=False, pmi_enabled=request.pmi_enabled, policy=global_policy
                 )
diff --git a/tests/test_dragon_backend.py b/tests/test_dragon_backend.py
index a510f660a5..f284f38d99 100644
--- a/tests/test_dragon_backend.py
+++ b/tests/test_dragon_backend.py
@@ -103,6 +103,16 @@ def get_mock_backend(monkeypatch: pytest.MonkeyPatch) -> "DragonBackend":
         "dragon.infrastructure.connection",
         MagicMock(),
     )
+    monkeypatch.setitem(
+        sys.modules,
+        "dragon.infrastructure.process_desc",
+        MagicMock(),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "dragon.data.ddict.ddict",
+        MagicMock(),
+    )
     monkeypatch.setitem(
         sys.modules,
         "dragon.infrastructure.policy",

From a90888d44d3e9ef2207a97c6b0936418daf4d06c Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 11:36:26 -0500
Subject: [PATCH 12/40] Style

---
 smartsim/_core/launcher/dragon/dragonBackend.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py
index f0e450a19c..d91f73e3c5 100644
--- a/smartsim/_core/launcher/dragon/dragonBackend.py
+++ b/smartsim/_core/launcher/dragon/dragonBackend.py
@@ -77,8 +77,6 @@ def __str__(self) -> str:
         return self.value
 
 
-
-
 @dataclass
 class ProcessGroupInfo:
     status: SmartSimStatus

From b4312215184478186e837ab193cc609fb53f4698 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 11:40:14 -0500
Subject: [PATCH 13/40] Fix type

---
 smartsim/_core/launcher/dragon/dragonBackend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py
index d91f73e3c5..52f69ec41f 100644
--- a/smartsim/_core/launcher/dragon/dragonBackend.py
+++ b/smartsim/_core/launcher/dragon/dragonBackend.py
@@ -189,7 +189,7 @@ def __init__(self, pid: int) -> None:
 
         self._view = DragonBackendView(self)
         logger.debug(self._view.host_desc)
-        self._infra_ddict: t.Optional[DDict] = None
+        self._infra_ddict: t.Optional[dragon_ddict.DDict] = None
 
     @property
     def hosts(self) -> list[str]:

From 23efebc25027d908703e80e059a3c431d5f7d434 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 12:38:55 -0500
Subject: [PATCH 14/40] Rename examples dir

---
 ex/high_throughput_inference/mli_driver.py    |  38 ++++++
 ex/high_throughput_inference/mock_app.py      | 126 ++++++++++++++++++
 .../standalone_workermanager.py               |  44 ++++++
 3 files changed, 208 insertions(+)
 create mode 100644 ex/high_throughput_inference/mli_driver.py
 create mode 100644 ex/high_throughput_inference/mock_app.py
 create mode 100644 ex/high_throughput_inference/standalone_workermanager.py

diff --git a/ex/high_throughput_inference/mli_driver.py b/ex/high_throughput_inference/mli_driver.py
new file mode 100644
index 0000000000..7b8db5ed83
--- /dev/null
+++ b/ex/high_throughput_inference/mli_driver.py
@@ -0,0 +1,38 @@
+import os
+import sys
+from smartsim import Experiment
+from smartsim.status import TERMINAL_STATUSES
+import time
+
+device = "cpu"
+filedir = os.path.dirname(__file__)
+worker_manager_script_name = os.path.join(filedir, "standalone_workermanager.py")
+app_script_name = os.path.join(filedir, "mock_app.py")
+model_name = os.path.join(filedir, f"resnet50.{device.upper()}.pt")
+
+
+exp = Experiment("MLI_proto", launcher="dragon", exp_path=os.path.join(filedir, "MLI_proto"))
+
+worker_manager_rs = exp.create_run_settings(sys.executable, [worker_manager_script_name, "--device", device])
+worker_manager = exp.create_model("worker_manager", run_settings=worker_manager_rs)
+worker_manager.attach_generator_files(to_copy=[worker_manager_script_name])
+
+
+app_rs = exp.create_run_settings(sys.executable, exe_args = [app_script_name, "--device", device])
+app = exp.create_model("app", run_settings=app_rs)
+app.attach_generator_files(to_copy=[app_script_name], to_symlink=[model_name])
+
+
+exp.generate(worker_manager, app, overwrite=True)
+exp.start(worker_manager, app, block=False)
+
+while True:
+    if exp.get_status(app)[0] in TERMINAL_STATUSES:
+        exp.stop(worker_manager)
+        break
+    if exp.get_status(worker_manager)[0] in TERMINAL_STATUSES:
+        exp.stop(app)
+        break
+    time.sleep(5)
+
+print("Exiting.")
\ No newline at end of file
diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
new file mode 100644
index 0000000000..afc0c836b8
--- /dev/null
+++ b/ex/high_throughput_inference/mock_app.py
@@ -0,0 +1,126 @@
+# isort: off
+import dragon
+from dragon import fli
+from dragon.channels import Channel
+import dragon.channels
+from dragon.data.ddict.ddict import DDict
+from dragon.globalservices.api_setup import connect_to_infrastructure
+from dragon.utils import b64decode, b64encode
+
+# isort: on
+
+import argparse
+import io
+import numpy
+import os
+import tabulate
+import time
+import torch
+import typing as t
+
+from smartsim._core.mli.message_handler import MessageHandler
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser("Mock application")
+    parser.add_argument("--device", default="cpu")
+
+    args = parser.parse_args()
+
+    connect_to_infrastructure()
+    ddict_str = os.environ["SS_DRG_DDICT"]
+
+    ddict = DDict.attach(ddict_str)
+
+    to_worker_fli_str = None
+
+    while to_worker_fli_str is None:
+        try:
+            to_worker_fli_str = ddict["to_worker_fli"]
+        except Exception as e:
+            time.sleep(1)
+
+    to_worker_fli = fli.FLInterface.attach(b64decode(to_worker_fli_str))
+
+    batch_size = 32
+    model = torch.jit.load(f"resnet50.{args.device.upper()}.pt")
+    buffer = io.BytesIO()
+    batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
+    scripted = torch.jit.trace(model, batch)
+    torch.jit.save(scripted, buffer)
+
+    total_iterations = 10
+
+    headers=[
+                "batch_size",
+                "build_tensor",
+                "build_request",
+                "serialize_request",
+                "send",
+                "receive",
+                "deserialize_response",
+                "deserialize_tensor",
+            ]
+
+    print(",".join(headers))
+
+    for batch_size in [1, 8, 32, 64, 128]:
+
+        timings = []
+        for iteration_number in range(total_iterations + int(batch_size==1)):
+
+            timings.append([batch_size])
+
+            batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
+
+            start = time.perf_counter()
+            interm = start
+            built_tensor = MessageHandler.build_tensor(
+                batch.numpy(), "c", "float32", list(batch.shape)
+            )
+            timings[-1].append(time.perf_counter() - interm)
+            interm = time.perf_counter()
+
+            from_worker_ch = Channel.make_process_local()
+
+            request = MessageHandler.build_request(
+                reply_channel=from_worker_ch.serialize(),
+                model=buffer.getvalue(),
+                inputs=[built_tensor],
+                outputs=[],
+                output_descriptors=[],
+                custom_attributes=None,
+            )
+
+            timings[-1].append(time.perf_counter() - interm)
+            interm = time.perf_counter()
+            request_bytes = MessageHandler.serialize_request(request)
+            timings[-1].append(time.perf_counter() - interm)
+            interm = time.perf_counter()
+            with to_worker_fli.sendh(timeout=None) as to_sendh:
+                to_sendh.send_bytes(request_bytes)
+                timings[-1].append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+
+            with from_worker_ch.recvh(timeout=None) as from_recvh:
+                resp = from_recvh.recv_bytes(timeout=None)
+                timings[-1].append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+                response = MessageHandler.deserialize_response(resp)
+                timings[-1].append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+                result = torch.from_numpy(
+                    numpy.frombuffer(
+                        response.result.data[0].blob,
+                        dtype=str(response.result.data[0].tensorDescriptor.dataType),
+                    )
+                )
+
+                timings[-1].append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+
+            # duration = time.perf_counter() - start
+            # print(f"{duration:.3f} s")
+
+            print(",".join(str(timing) for timing in timings[-1]))
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
new file mode 100644
index 0000000000..32d534f360
--- /dev/null
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -0,0 +1,44 @@
+# isort: off
+import dragon
+from dragon import fli
+from dragon.channels import Channel
+from dragon.data.ddict.ddict import DDict
+from dragon.utils import b64decode, b64encode
+from dragon.globalservices.api_setup import connect_to_infrastructure
+# isort: on
+import argparse
+import os
+
+
+from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
+from smartsim._core.mli.infrastructure.worker.worker import TorchWorker
+from smartsim._core.mli.infrastructure.control.workermanager import (
+    WorkerManager,
+)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("Worker Manager")
+    parser.add_argument("--device", default="gpu")
+    args = parser.parse_args()
+    connect_to_infrastructure()
+    ddict_str = os.environ["SS_DRG_DDICT"]
+    ddict = DDict.attach(ddict_str)
+
+    to_worker_channel = Channel.make_process_local()
+    to_worker_manager_channel = Channel.make_process_local()
+    channels = [Channel.make_process_local() for _ in range(100)]
+    to_worker_fli = fli.FLInterface(main_ch=to_worker_channel, manager_ch=to_worker_manager_channel, stream_channels=channels)
+    ddict["to_worker_fli"] = b64encode(to_worker_fli.serialize())
+
+    torch_worker = TorchWorker()
+
+    worker_manager = WorkerManager(
+        file_like_interface=to_worker_fli,
+        worker=torch_worker,
+        feature_store=None,
+        as_service=True,
+        cooldown=10,
+        comm_channel_type=DragonCommChannel,
+        device = args.device,
+    )
+    worker_manager.execute()

From 09b9d249c5c2147a062f95356c943c4da8e534b9 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 12:48:11 -0500
Subject: [PATCH 15/40] Remove old dir

---
 .../high_throughput_inference/mli_driver.py   |  38 ------
 .../high_throughput_inference/mock_app.py     | 126 ------------------
 .../standalone_workermanager.py               |  44 ------
 3 files changed, 208 deletions(-)
 delete mode 100644 examples/high_throughput_inference/mli_driver.py
 delete mode 100644 examples/high_throughput_inference/mock_app.py
 delete mode 100644 examples/high_throughput_inference/standalone_workermanager.py

diff --git a/examples/high_throughput_inference/mli_driver.py b/examples/high_throughput_inference/mli_driver.py
deleted file mode 100644
index d32d88e51b..0000000000
--- a/examples/high_throughput_inference/mli_driver.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import os
-import sys
-from smartsim import Experiment
-from smartsim.status import TERMINAL_STATUSES
-import time
-
-device = "gpu"
-filedir = os.path.dirname(__file__)
-worker_manager_script_name = os.path.join(filedir, "standalone_workermanager.py")
-app_script_name = os.path.join(filedir, "mock_app.py")
-model_name = os.path.join(filedir, f"resnet50.{device.upper()}.pt")
-
-
-exp = Experiment("MLI_proto", launcher="dragon", exp_path=os.path.join(filedir, "MLI_proto"))
-
-worker_manager_rs = exp.create_run_settings(sys.executable, [worker_manager_script_name, "--device", device])
-worker_manager = exp.create_model("worker_manager", run_settings=worker_manager_rs)
-worker_manager.attach_generator_files(to_copy=[worker_manager_script_name])
-
-
-app_rs = exp.create_run_settings(sys.executable, exe_args = [app_script_name, "--device", device])
-app = exp.create_model("app", run_settings=app_rs)
-app.attach_generator_files(to_copy=[app_script_name], to_symlink=[model_name])
-
-
-exp.generate(worker_manager, app, overwrite=True)
-exp.start(worker_manager, app, block=False)
-
-while True:
-    if exp.get_status(app)[0] in TERMINAL_STATUSES:
-        exp.stop(worker_manager)
-        break
-    if exp.get_status(worker_manager)[0] in TERMINAL_STATUSES:
-        exp.stop(app)
-        break
-    time.sleep(5)
-
-print("Exiting.")
\ No newline at end of file
diff --git a/examples/high_throughput_inference/mock_app.py b/examples/high_throughput_inference/mock_app.py
deleted file mode 100644
index afc0c836b8..0000000000
--- a/examples/high_throughput_inference/mock_app.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# isort: off
-import dragon
-from dragon import fli
-from dragon.channels import Channel
-import dragon.channels
-from dragon.data.ddict.ddict import DDict
-from dragon.globalservices.api_setup import connect_to_infrastructure
-from dragon.utils import b64decode, b64encode
-
-# isort: on
-
-import argparse
-import io
-import numpy
-import os
-import tabulate
-import time
-import torch
-import typing as t
-
-from smartsim._core.mli.message_handler import MessageHandler
-
-
-if __name__ == "__main__":
-
-    parser = argparse.ArgumentParser("Mock application")
-    parser.add_argument("--device", default="cpu")
-
-    args = parser.parse_args()
-
-    connect_to_infrastructure()
-    ddict_str = os.environ["SS_DRG_DDICT"]
-
-    ddict = DDict.attach(ddict_str)
-
-    to_worker_fli_str = None
-
-    while to_worker_fli_str is None:
-        try:
-            to_worker_fli_str = ddict["to_worker_fli"]
-        except Exception as e:
-            time.sleep(1)
-
-    to_worker_fli = fli.FLInterface.attach(b64decode(to_worker_fli_str))
-
-    batch_size = 32
-    model = torch.jit.load(f"resnet50.{args.device.upper()}.pt")
-    buffer = io.BytesIO()
-    batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
-    scripted = torch.jit.trace(model, batch)
-    torch.jit.save(scripted, buffer)
-
-    total_iterations = 10
-
-    headers=[
-                "batch_size",
-                "build_tensor",
-                "build_request",
-                "serialize_request",
-                "send",
-                "receive",
-                "deserialize_response",
-                "deserialize_tensor",
-            ]
-
-    print(",".join(headers))
-
-    for batch_size in [1, 8, 32, 64, 128]:
-
-        timings = []
-        for iteration_number in range(total_iterations + int(batch_size==1)):
-
-            timings.append([batch_size])
-
-            batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
-
-            start = time.perf_counter()
-            interm = start
-            built_tensor = MessageHandler.build_tensor(
-                batch.numpy(), "c", "float32", list(batch.shape)
-            )
-            timings[-1].append(time.perf_counter() - interm)
-            interm = time.perf_counter()
-
-            from_worker_ch = Channel.make_process_local()
-
-            request = MessageHandler.build_request(
-                reply_channel=from_worker_ch.serialize(),
-                model=buffer.getvalue(),
-                inputs=[built_tensor],
-                outputs=[],
-                output_descriptors=[],
-                custom_attributes=None,
-            )
-
-            timings[-1].append(time.perf_counter() - interm)
-            interm = time.perf_counter()
-            request_bytes = MessageHandler.serialize_request(request)
-            timings[-1].append(time.perf_counter() - interm)
-            interm = time.perf_counter()
-            with to_worker_fli.sendh(timeout=None) as to_sendh:
-                to_sendh.send_bytes(request_bytes)
-                timings[-1].append(time.perf_counter() - interm)
-                interm = time.perf_counter()
-
-            with from_worker_ch.recvh(timeout=None) as from_recvh:
-                resp = from_recvh.recv_bytes(timeout=None)
-                timings[-1].append(time.perf_counter() - interm)
-                interm = time.perf_counter()
-                response = MessageHandler.deserialize_response(resp)
-                timings[-1].append(time.perf_counter() - interm)
-                interm = time.perf_counter()
-                result = torch.from_numpy(
-                    numpy.frombuffer(
-                        response.result.data[0].blob,
-                        dtype=str(response.result.data[0].tensorDescriptor.dataType),
-                    )
-                )
-
-                timings[-1].append(time.perf_counter() - interm)
-                interm = time.perf_counter()
-
-            # duration = time.perf_counter() - start
-            # print(f"{duration:.3f} s")
-
-            print(",".join(str(timing) for timing in timings[-1]))
diff --git a/examples/high_throughput_inference/standalone_workermanager.py b/examples/high_throughput_inference/standalone_workermanager.py
deleted file mode 100644
index 32d534f360..0000000000
--- a/examples/high_throughput_inference/standalone_workermanager.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# isort: off
-import dragon
-from dragon import fli
-from dragon.channels import Channel
-from dragon.data.ddict.ddict import DDict
-from dragon.utils import b64decode, b64encode
-from dragon.globalservices.api_setup import connect_to_infrastructure
-# isort: on
-import argparse
-import os
-
-
-from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
-from smartsim._core.mli.infrastructure.worker.worker import TorchWorker
-from smartsim._core.mli.infrastructure.control.workermanager import (
-    WorkerManager,
-)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser("Worker Manager")
-    parser.add_argument("--device", default="gpu")
-    args = parser.parse_args()
-    connect_to_infrastructure()
-    ddict_str = os.environ["SS_DRG_DDICT"]
-    ddict = DDict.attach(ddict_str)
-
-    to_worker_channel = Channel.make_process_local()
-    to_worker_manager_channel = Channel.make_process_local()
-    channels = [Channel.make_process_local() for _ in range(100)]
-    to_worker_fli = fli.FLInterface(main_ch=to_worker_channel, manager_ch=to_worker_manager_channel, stream_channels=channels)
-    ddict["to_worker_fli"] = b64encode(to_worker_fli.serialize())
-
-    torch_worker = TorchWorker()
-
-    worker_manager = WorkerManager(
-        file_like_interface=to_worker_fli,
-        worker=torch_worker,
-        feature_store=None,
-        as_service=True,
-        cooldown=10,
-        comm_channel_type=DragonCommChannel,
-        device = args.device,
-    )
-    worker_manager.execute()

From 56d8e50f4f7e9fddb9e4d79ba0b1fe556e400684 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 26 Jun 2024 18:47:40 -0500
Subject: [PATCH 16/40] Add tests for torch worker

---
 ex/high_throughput_inference/mock_app.py      |   5 +-
 .../standalone_workermanager.py               |   2 +-
 .../mli/infrastructure/worker/torch_worker.py | 118 ++++++++++++
 .../_core/mli/infrastructure/worker/worker.py |  91 +--------
 tests/mli/test_torch_worker.py                | 173 ++++++++++++++++++
 tests/mli/test_worker_manager.py              |  12 +-
 6 files changed, 309 insertions(+), 92 deletions(-)
 create mode 100644 smartsim/_core/mli/infrastructure/worker/torch_worker.py
 create mode 100644 tests/mli/test_torch_worker.py

diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index afc0c836b8..d22792d15b 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -5,7 +5,7 @@
 import dragon.channels
 from dragon.data.ddict.ddict import DDict
 from dragon.globalservices.api_setup import connect_to_infrastructure
-from dragon.utils import b64decode, b64encode
+from dragon.utils import b64decode
 
 # isort: on
 
@@ -13,11 +13,8 @@
 import io
 import numpy
 import os
-import tabulate
 import time
 import torch
-import typing as t
-
 from smartsim._core.mli.message_handler import MessageHandler
 
 
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
index 32d534f360..40fefcc372 100644
--- a/ex/high_throughput_inference/standalone_workermanager.py
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -11,7 +11,7 @@
 
 
 from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
-from smartsim._core.mli.infrastructure.worker.worker import TorchWorker
+from smartsim._core.mli.infrastructure.worker.torch_worker import TorchWorker
 from smartsim._core.mli.infrastructure.control.workermanager import (
     WorkerManager,
 )
diff --git a/smartsim/_core/mli/infrastructure/worker/torch_worker.py b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
new file mode 100644
index 0000000000..c350499c20
--- /dev/null
+++ b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
@@ -0,0 +1,118 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import io
+
+import numpy as np
+import torch
+
+from .....error import SmartSimError
+from .....log import get_logger
+from ...mli_schemas.tensor import tensor_capnp
+from .worker import (
+    ExecuteResult,
+    FetchInputResult,
+    FetchModelResult,
+    InferenceRequest,
+    LoadModelResult,
+    MachineLearningWorkerBase,
+    TransformInputResult,
+    TransformOutputResult,
+)
+
+logger = get_logger(__name__)
+
+
+class TorchWorker(MachineLearningWorkerBase):
+    """A worker that executes a PyTorch model."""
+
+    @staticmethod
+    def load_model(
+        request: InferenceRequest, fetch_result: FetchModelResult, device: str
+    ) -> LoadModelResult:
+        model_bytes = fetch_result.model_bytes or request.raw_model
+        if not model_bytes:
+            raise ValueError("Unable to load model without reference object")
+
+        _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
+        device = _device_to_torch[device]
+        buffer = io.BytesIO(model_bytes)
+        model = torch.jit.load(buffer, map_location=device)  # type: ignore
+        result = LoadModelResult(model)
+        return result
+
+    @staticmethod
+    def transform_input(
+        request: InferenceRequest, fetch_result: FetchInputResult, device: str
+    ) -> TransformInputResult:
+        result = []
+
+        _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
+        device = _device_to_torch[device]
+        if fetch_result.meta is None:
+            raise ValueError("Cannot reconstruct tensor without meta information")
+        for item, item_meta in zip(fetch_result.inputs, fetch_result.meta):
+            tensor_desc: tensor_capnp.TensorDescriptor = item_meta
+            result.append(
+                torch.tensor(np.frombuffer(item, dtype=str(tensor_desc.dataType)))
+                .to(device)
+                .reshape(tuple(dim for dim in tensor_desc.dimensions))
+            )
+        return TransformInputResult(result)
+        # return data # note: this fails copy test!
+
+    @staticmethod
+    def execute(
+        request: InferenceRequest,
+        load_result: LoadModelResult,
+        transform_result: TransformInputResult,
+    ) -> ExecuteResult:
+        if not load_result.model:
+            raise SmartSimError("Model must be loaded to execute")
+
+        model: torch.nn.Module = load_result.model
+        model.eval()
+        results = [model(tensor).detach() for tensor in transform_result.transformed]
+
+        execute_result = ExecuteResult(results)
+        return execute_result
+
+    @staticmethod
+    def transform_output(
+        request: InferenceRequest,
+        execute_result: ExecuteResult,
+        result_device: str,
+    ) -> TransformOutputResult:
+        if result_device != "cpu":
+            transformed = [
+                item.to("cpu").clone() for item in execute_result.predictions
+            ]
+            # todo: need the shape from latest schemas added here.
+            return TransformOutputResult(transformed, None, "c", "float32")  # fixme
+
+        return TransformOutputResult(
+            execute_result.predictions, None, "c", "float32"
+        )  # fixme
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index 08c4997554..24dc734d00 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -24,18 +24,13 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import io
 import typing as t
 from abc import ABC, abstractmethod
 
-import numpy as np
-import torch
-
-import smartsim.error as sse
-from smartsim._core.mli.comm.channel.channel import CommChannelBase
-from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
-from smartsim._core.mli.mli_schemas.tensor import tensor_capnp
-from smartsim.log import get_logger
+from .....error import SmartSimError
+from .....log import get_logger
+from ...comm.channel.channel import CommChannelBase
+from ...infrastructure.storage.featurestore import FeatureStore
 
 logger = get_logger(__name__)
 
@@ -167,7 +162,7 @@ def fetch_model(
             raise ValueError("Feature store is required for model retrieval")
 
         if not request.model_key:
-            raise sse.SmartSimError(
+            raise SmartSimError(
                 "Key must be provided to retrieve model from feature store"
             )
 
@@ -176,7 +171,7 @@ def fetch_model(
             return FetchModelResult(raw_bytes)
         except FileNotFoundError as ex:
             logger.exception(ex)
-            raise sse.SmartSimError(
+            raise SmartSimError(
                 f"Model could not be retrieved with key {request.model_key}"
             ) from ex
 
@@ -204,7 +199,7 @@ def fetch_inputs(
                     data.append(tensor_bytes)
                 except KeyError as ex:
                     logger.exception(ex)
-                    raise sse.SmartSimError(
+                    raise SmartSimError(
                         f"Model could not be retrieved with key {input_}"
                     ) from ex
             return FetchInputResult(
@@ -303,75 +298,3 @@ def transform_output(
         :param execute_result: The result of inference wrapped in an ExecuteResult
         :param result_device: The device on which the result of inference is placed
         :return:"""
-
-
-class TorchWorker(MachineLearningWorkerBase):
-    """A worker that executes a PyTorch model."""
-
-    @staticmethod
-    def load_model(
-        request: InferenceRequest, fetch_result: FetchModelResult, device: str
-    ) -> LoadModelResult:
-        model_bytes = fetch_result.model_bytes or request.raw_model
-        if not model_bytes:
-            raise ValueError("Unable to load model without reference object")
-
-        _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
-        device = _device_to_torch[device]
-        buffer = io.BytesIO(model_bytes)
-        model = torch.jit.load(buffer, map_location=device)  # type: ignore
-        result = LoadModelResult(model)
-        return result
-
-    @staticmethod
-    def transform_input(
-        request: InferenceRequest, fetch_result: FetchInputResult, device: str
-    ) -> TransformInputResult:
-        result = []
-
-        _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
-        device = _device_to_torch[device]
-        if fetch_result.meta is None:
-            raise ValueError("Cannot reconstruct tensor without meta information")
-        for item, item_meta in zip(fetch_result.inputs, fetch_result.meta):
-            tensor_desc: tensor_capnp.TensorDescriptor = item_meta
-            result.append(
-                torch.tensor(np.frombuffer(item, dtype=str(tensor_desc.dataType)))
-                .to(device)
-                .reshape(tuple(dim for dim in tensor_desc.dimensions))
-            )
-        return TransformInputResult(result)
-        # return data # note: this fails copy test!
-
-    @staticmethod
-    def execute(
-        request: InferenceRequest,
-        load_result: LoadModelResult,
-        transform_result: TransformInputResult,
-    ) -> ExecuteResult:
-        if not load_result.model:
-            raise sse.SmartSimError("Model must be loaded to execute")
-
-        model: torch.nn.Module = load_result.model
-        model.eval()
-        results = [model(tensor).detach() for tensor in transform_result.transformed]
-
-        execute_result = ExecuteResult(results)
-        return execute_result
-
-    @staticmethod
-    def transform_output(
-        request: InferenceRequest,
-        execute_result: ExecuteResult,
-        result_device: str,
-    ) -> TransformOutputResult:
-        if result_device != "cpu":
-            transformed = [
-                item.to("cpu").clone() for item in execute_result.predictions
-            ]
-            # todo: need the shape from latest schemas added here.
-            return TransformOutputResult(transformed, None, "c", "float32")  # fixme
-
-        return TransformOutputResult(
-            execute_result.predictions, None, "c", "float32"
-        )  # fixme
diff --git a/tests/mli/test_torch_worker.py b/tests/mli/test_torch_worker.py
new file mode 100644
index 0000000000..0b1cd4ccf3
--- /dev/null
+++ b/tests/mli/test_torch_worker.py
@@ -0,0 +1,173 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import io
+
+import numpy as np
+import pytest
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from smartsim._core.mli.infrastructure.worker.torch_worker import TorchWorker
+from smartsim._core.mli.infrastructure.worker.worker import (
+    ExecuteResult,
+    FetchInputResult,
+    FetchModelResult,
+    InferenceRequest,
+    LoadModelResult,
+    TransformInputResult,
+)
+from smartsim._core.mli.message_handler import MessageHandler
+from smartsim.log import get_logger
+
+logger = get_logger(__name__)
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+# simple MNIST in PyTorch
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.dropout1 = nn.Dropout(0.25)
+        self.dropout2 = nn.Dropout(0.5)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+
+torch_device = {"cpu": "cpu", "gpu": "cuda"}
+
+
+def get_batch() -> torch.Tensor:
+    return torch.rand(20, 1, 28, 28)
+
+
+def create_torch_model():
+    n = Net()
+    example_forward_input = get_batch()
+    module = torch.jit.trace(n, example_forward_input)
+    model_buffer = io.BytesIO()
+    torch.jit.save(module, model_buffer)
+    return model_buffer.getvalue()
+
+
+def get_request() -> InferenceRequest:
+
+    tensors = [get_batch() for _ in range(2)]
+    serialized_tensors = [
+        MessageHandler.build_tensor(tensor.numpy(), "c", "float32", list(tensor.shape))
+        for tensor in tensors
+    ]
+
+    return InferenceRequest(
+        model_key="model",
+        callback=None,
+        raw_inputs=[s_tensor.blob for s_tensor in serialized_tensors],
+        input_keys=None,
+        input_meta=[s_tensor.tensorDescriptor for s_tensor in serialized_tensors],
+        output_keys=None,
+        raw_model=create_torch_model(),
+        batch_size=0,
+    )
+
+
+sample_request: InferenceRequest = get_request()
+worker = TorchWorker()
+
+
+def test_load_model(mlutils) -> None:
+    fetch_model_result = FetchModelResult(sample_request.raw_model)
+    load_model_result = worker.load_model(
+        sample_request, fetch_model_result, mlutils.get_test_device().lower()
+    )
+
+    assert load_model_result.model(
+        get_batch().to(torch_device[mlutils.get_test_device().lower()])
+    ).shape == torch.Size((20, 10))
+
+
+def test_transform_input(mlutils) -> None:
+    fetch_input_result = FetchInputResult(
+        sample_request.raw_inputs, sample_request.input_meta
+    )
+
+    transform_input_result = worker.transform_input(
+        sample_request, fetch_input_result, mlutils.get_test_device().lower()
+    )
+
+    assert all(
+        transformed.shape == get_batch().shape
+        for transformed in transform_input_result.transformed
+    )
+
+
+def test_execute(mlutils) -> None:
+    load_model_result = LoadModelResult(
+        Net().to(torch_device[mlutils.get_test_device().lower()])
+    )
+    transform_result = TransformInputResult(
+        [
+            get_batch().to(torch_device[mlutils.get_test_device().lower()])
+            for _ in range(2)
+        ]
+    )
+
+    execute_result = worker.execute(sample_request, load_model_result, transform_result)
+
+    assert all(
+        result.shape == torch.Size((20, 10)) for result in execute_result.predictions
+    )
+
+
+def test_transform_output(mlutils):
+    execute_result = ExecuteResult([torch.rand((20, 10)) for _ in range(2)])
+
+    transformed_output = worker.transform_output(
+        sample_request, execute_result, torch_device[mlutils.get_test_device().lower()]
+    )
+
+    assert transformed_output.outputs == execute_result.predictions
+    assert transformed_output.shape == None
+    assert transformed_output.order == "c"
+    assert transformed_output.dtype == "float32"
diff --git a/tests/mli/test_worker_manager.py b/tests/mli/test_worker_manager.py
index 01502ec521..46cae5b2e4 100644
--- a/tests/mli/test_worker_manager.py
+++ b/tests/mli/test_worker_manager.py
@@ -29,10 +29,14 @@
 import multiprocessing as mp
 import pathlib
 import time
-import typing as t
 
 import pytest
-import torch
+
+should_run = True
+try:
+    import torch
+except ImportError:
+    should_run = False
 
 from smartsim._core.mli.infrastructure.control.workermanager import WorkerManager
 from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
@@ -44,9 +48,11 @@
 from .worker import IntegratedTorchWorker
 
 logger = get_logger(__name__)
-# The tests in this file belong to the group_b group
+# The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
 
+pytest.mark.skipif(not should_run, "Test needs PyTorch to run")
+
 
 def mock_work(worker_manager_queue: "mp.Queue[bytes]") -> None:
     """Mock event producer for triggering the inference pipeline"""

From 6cec83ea4697761b3d297cc8fd50cd44a568af64 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Thu, 27 Jun 2024 08:14:24 -0500
Subject: [PATCH 17/40] Switch to sender-supplied channels in app example

---
 ex/high_throughput_inference/mock_app.py                 | 6 ++++--
 ex/high_throughput_inference/standalone_workermanager.py | 3 +--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index d22792d15b..8a00e8f0e4 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -62,6 +62,9 @@
 
     print(",".join(headers))
 
+    from_worker_ch = Channel.make_process_local()
+    to_worker_ch = Channel.make_process_local()
+
     for batch_size in [1, 8, 32, 64, 128]:
 
         timings = []
@@ -79,7 +82,6 @@
             timings[-1].append(time.perf_counter() - interm)
             interm = time.perf_counter()
 
-            from_worker_ch = Channel.make_process_local()
 
             request = MessageHandler.build_request(
                 reply_channel=from_worker_ch.serialize(),
@@ -95,7 +97,7 @@
             request_bytes = MessageHandler.serialize_request(request)
             timings[-1].append(time.perf_counter() - interm)
             interm = time.perf_counter()
-            with to_worker_fli.sendh(timeout=None) as to_sendh:
+            with to_worker_fli.sendh(timeout=None, stream_channel=to_worker_ch) as to_sendh:
                 to_sendh.send_bytes(request_bytes)
                 timings[-1].append(time.perf_counter() - interm)
                 interm = time.perf_counter()
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
index 40fefcc372..cdc97f4c2e 100644
--- a/ex/high_throughput_inference/standalone_workermanager.py
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -26,8 +26,7 @@
 
     to_worker_channel = Channel.make_process_local()
     to_worker_manager_channel = Channel.make_process_local()
-    channels = [Channel.make_process_local() for _ in range(100)]
-    to_worker_fli = fli.FLInterface(main_ch=to_worker_channel, manager_ch=to_worker_manager_channel, stream_channels=channels)
+    to_worker_fli = fli.FLInterface(main_ch=to_worker_channel, manager_ch=to_worker_manager_channel)
     ddict["to_worker_fli"] = b64encode(to_worker_fli.serialize())
 
     torch_worker = TorchWorker()

From 3ad6d445662a611539b40cb72fcba1a0b4ea102f Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Thu, 27 Jun 2024 16:55:59 -0500
Subject: [PATCH 18/40] Add prototype client for mock app

---
 ex/high_throughput_inference/mli_driver.py |   2 +-
 ex/high_throughput_inference/mock_app.py   | 206 ++++++++++++---------
 2 files changed, 116 insertions(+), 92 deletions(-)

diff --git a/ex/high_throughput_inference/mli_driver.py b/ex/high_throughput_inference/mli_driver.py
index 7b8db5ed83..d32d88e51b 100644
--- a/ex/high_throughput_inference/mli_driver.py
+++ b/ex/high_throughput_inference/mli_driver.py
@@ -4,7 +4,7 @@
 from smartsim.status import TERMINAL_STATUSES
 import time
 
-device = "cpu"
+device = "gpu"
 filedir = os.path.dirname(__file__)
 worker_manager_script_name = os.path.join(filedir, "standalone_workermanager.py")
 app_script_name = os.path.join(filedir, "mock_app.py")
diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index 8a00e8f0e4..aa3aaeb3ee 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -15,111 +15,135 @@
 import os
 import time
 import torch
+import numbers
+
+from collections import OrderedDict
 from smartsim._core.mli.message_handler import MessageHandler
 
+class ProtoClient:
+    def __init__(self, timing_on: bool):
+        connect_to_infrastructure()
+        ddict_str = os.environ["SS_DRG_DDICT"]
+        self._ddict = DDict.attach(ddict_str)
+        to_worker_fli_str = None
+        while to_worker_fli_str is None:
+            try:
+                to_worker_fli_str = self._ddict["to_worker_fli"]
+                self._to_worker_fli = fli.FLInterface.attach(b64decode(to_worker_fli_str))
+            except KeyError:
+                time.sleep(1)
+        self._from_worker_ch = Channel.make_process_local()
+        self._from_worker_ch_serialized = self._from_worker_ch.serialize()
+        self._to_worker_ch = Channel.make_process_local()
+
+        self._start = None
+        self._interm = None
+        self._timings: OrderedDict[str, list[numbers.Number]] = OrderedDict()
+        self._timing_on = timing_on
+
+    def _add_label_to_timings(self, label: str):
+        if label not in self._timings:
+            self._timings[label] = []
+
+    @staticmethod
+    def _format_number(number: numbers.Number):
+        return f"{number:0.4e}"
+
+    def start_timings(self, batch_size: int):
+        if self._timing_on:
+            self._add_label_to_timings("batch_size")
+            self._timings["batch_size"].append(batch_size)
+            self._start = time.perf_counter()
+            self._interm = time.perf_counter()
+
+    def end_timings(self):
+        if self._timing_on:
+            self._add_label_to_timings("total_time")
+            self._timings["total_time"].append(self._format_number(time.perf_counter()-self._start))
+
+    def measure_time(self, label: str):
+        if self._timing_on:
+            self._add_label_to_timings(label)
+            self._timings[label].append(self._format_number(time.perf_counter()-self._interm))
+            self._interm = time.perf_counter()
+
+    def print_timings(self, to_file: bool = False):
+        print(" ".join(self._timings.keys()))
+        value_array = numpy.array([value for  value in self._timings.values()], dtype=float)
+        value_array = numpy.transpose(value_array)
+        for i in range(value_array.shape[0]):
+            print(" ".join(self._format_number(value) for value in value_array[i]))
+        if to_file:
+            numpy.save("timings.npy", value_array)
+            numpy.savetxt("timings.txt", value_array)
+
+
+    def run_model(self, model: bytes, batch: torch.Tensor):
+        self.start_timings(batch.shape[0])
+        built_tensor = MessageHandler.build_tensor(
+            batch.numpy(), "c", "float32", list(batch.shape))
+        self.measure_time("build_tensor")
+        request = MessageHandler.build_request(
+            reply_channel=self._from_worker_ch_serialized,
+            model=model,
+            inputs=[built_tensor],
+            outputs=[],
+            output_descriptors=[],
+            custom_attributes=None,
+        )
+        self.measure_time("build_request")
+        request_bytes = MessageHandler.serialize_request(request)
+        self.measure_time("serialize_request")
+        with self._to_worker_fli.sendh(timeout=None, stream_channel=self._to_worker_ch) as to_sendh:
+            to_sendh.send_bytes(request_bytes)
+
+        self.measure_time("send")
+        with self._from_worker_ch.recvh(timeout=None) as from_recvh:
+            resp = from_recvh.recv_bytes(timeout=None)
+            self.measure_time("receive")
+            response = MessageHandler.deserialize_response(resp)
+            self.measure_time("deserialize_response")
+            result = torch.from_numpy(
+                numpy.frombuffer(
+                    response.result.data[0].blob,
+                    dtype=str(response.result.data[0].tensorDescriptor.dataType),
+                )
+            )
+            self.measure_time("deserialize_tensor")
 
-if __name__ == "__main__":
+        self.end_timings()
+        return result
 
-    parser = argparse.ArgumentParser("Mock application")
-    parser.add_argument("--device", default="cpu")
 
-    args = parser.parse_args()
+class ResNetWrapper():
+    def __init__(self, model: str):
+        self._model = torch.jit.load(model)
+        buffer = io.BytesIO()
+        scripted = torch.jit.trace(self._model, self.get_batch())
+        torch.jit.save(scripted, buffer)
+        self._serialized_model = buffer.getvalue()
 
-    connect_to_infrastructure()
-    ddict_str = os.environ["SS_DRG_DDICT"]
+    def get_batch(self, batch_size: int=32):
+        return torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
 
-    ddict = DDict.attach(ddict_str)
+    @property
+    def model(self):
+        return self._serialized_model
 
-    to_worker_fli_str = None
+if __name__ == "__main__":
 
-    while to_worker_fli_str is None:
-        try:
-            to_worker_fli_str = ddict["to_worker_fli"]
-        except Exception as e:
-            time.sleep(1)
+    parser = argparse.ArgumentParser("Mock application")
+    parser.add_argument("--device", default="cpu")
+    args = parser.parse_args()
 
-    to_worker_fli = fli.FLInterface.attach(b64decode(to_worker_fli_str))
+    resnet = ResNetWrapper(f"resnet50.{args.device.upper()}.pt")
 
-    batch_size = 32
-    model = torch.jit.load(f"resnet50.{args.device.upper()}.pt")
-    buffer = io.BytesIO()
-    batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
-    scripted = torch.jit.trace(model, batch)
-    torch.jit.save(scripted, buffer)
+    client = ProtoClient(timing_on=True)
 
     total_iterations = 10
 
-    headers=[
-                "batch_size",
-                "build_tensor",
-                "build_request",
-                "serialize_request",
-                "send",
-                "receive",
-                "deserialize_response",
-                "deserialize_tensor",
-            ]
-
-    print(",".join(headers))
-
-    from_worker_ch = Channel.make_process_local()
-    to_worker_ch = Channel.make_process_local()
-
     for batch_size in [1, 8, 32, 64, 128]:
-
-        timings = []
         for iteration_number in range(total_iterations + int(batch_size==1)):
+            client.run_model(resnet.model, resnet.get_batch(batch_size))
 
-            timings.append([batch_size])
-
-            batch = torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
-
-            start = time.perf_counter()
-            interm = start
-            built_tensor = MessageHandler.build_tensor(
-                batch.numpy(), "c", "float32", list(batch.shape)
-            )
-            timings[-1].append(time.perf_counter() - interm)
-            interm = time.perf_counter()
-
-
-            request = MessageHandler.build_request(
-                reply_channel=from_worker_ch.serialize(),
-                model=buffer.getvalue(),
-                inputs=[built_tensor],
-                outputs=[],
-                output_descriptors=[],
-                custom_attributes=None,
-            )
-
-            timings[-1].append(time.perf_counter() - interm)
-            interm = time.perf_counter()
-            request_bytes = MessageHandler.serialize_request(request)
-            timings[-1].append(time.perf_counter() - interm)
-            interm = time.perf_counter()
-            with to_worker_fli.sendh(timeout=None, stream_channel=to_worker_ch) as to_sendh:
-                to_sendh.send_bytes(request_bytes)
-                timings[-1].append(time.perf_counter() - interm)
-                interm = time.perf_counter()
-
-            with from_worker_ch.recvh(timeout=None) as from_recvh:
-                resp = from_recvh.recv_bytes(timeout=None)
-                timings[-1].append(time.perf_counter() - interm)
-                interm = time.perf_counter()
-                response = MessageHandler.deserialize_response(resp)
-                timings[-1].append(time.perf_counter() - interm)
-                interm = time.perf_counter()
-                result = torch.from_numpy(
-                    numpy.frombuffer(
-                        response.result.data[0].blob,
-                        dtype=str(response.result.data[0].tensorDescriptor.dataType),
-                    )
-                )
-
-                timings[-1].append(time.perf_counter() - interm)
-                interm = time.perf_counter()
-
-            # duration = time.perf_counter() - start
-            # print(f"{duration:.3f} s")
-
-            print(",".join(str(timing) for timing in timings[-1]))
+    client.print_timings(to_file=True)
\ No newline at end of file

From bd5f13357b181ee07e2df880b519d8464c8af174 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Fri, 28 Jun 2024 14:55:18 -0500
Subject: [PATCH 19/40] Update mock app

---
 ex/high_throughput_inference/mli_driver.py               | 5 +++--
 ex/high_throughput_inference/mock_app.py                 | 9 +++++++--
 ex/high_throughput_inference/standalone_workermanager.py | 3 +--
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/ex/high_throughput_inference/mli_driver.py b/ex/high_throughput_inference/mli_driver.py
index d32d88e51b..9b899f4124 100644
--- a/ex/high_throughput_inference/mli_driver.py
+++ b/ex/high_throughput_inference/mli_driver.py
@@ -10,8 +10,9 @@
 app_script_name = os.path.join(filedir, "mock_app.py")
 model_name = os.path.join(filedir, f"resnet50.{device.upper()}.pt")
 
-
-exp = Experiment("MLI_proto", launcher="dragon", exp_path=os.path.join(filedir, "MLI_proto"))
+exp_path = os.path.join(filedir, "MLI_proto")
+os.makedirs(exp_path, exist_ok=True)
+exp = Experiment("MLI_proto", launcher="dragon", exp_path=exp_path)
 
 worker_manager_rs = exp.create_run_settings(sys.executable, [worker_manager_script_name, "--device", device])
 worker_manager = exp.create_model("worker_manager", run_settings=worker_manager_rs)
diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index aa3aaeb3ee..666d7fcc91 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -19,6 +19,9 @@
 
 from collections import OrderedDict
 from smartsim._core.mli.message_handler import MessageHandler
+from smartsim.log import get_logger
+
+logger = get_logger("App")
 
 class ProtoClient:
     def __init__(self, timing_on: bool):
@@ -140,10 +143,12 @@ def model(self):
 
     client = ProtoClient(timing_on=True)
 
-    total_iterations = 10
+    total_iterations = 100
 
-    for batch_size in [1, 8, 32, 64, 128]:
+    for batch_size in [1, 2, 4, 8, 16, 32, 64, 128]:
+        logger.info(f"Batch size: {batch_size}")
         for iteration_number in range(total_iterations + int(batch_size==1)):
+            logger.info(f"Iteration: {iteration_number}")
             client.run_model(resnet.model, resnet.get_batch(batch_size))
 
     client.print_timings(to_file=True)
\ No newline at end of file
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
index cdc97f4c2e..ccefcbf584 100644
--- a/ex/high_throughput_inference/standalone_workermanager.py
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -25,8 +25,7 @@
     ddict = DDict.attach(ddict_str)
 
     to_worker_channel = Channel.make_process_local()
-    to_worker_manager_channel = Channel.make_process_local()
-    to_worker_fli = fli.FLInterface(main_ch=to_worker_channel, manager_ch=to_worker_manager_channel)
+    to_worker_fli = fli.FLInterface(main_ch=to_worker_channel, manager_ch=None)
     ddict["to_worker_fli"] = b64encode(to_worker_fli.serialize())
 
     torch_worker = TorchWorker()

From 3e343ee5dff7d85646a39db1b56123efa575f387 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Thu, 4 Jul 2024 05:40:59 -0500
Subject: [PATCH 20/40] Changes to feature store

---
 smartsim/_core/launcher/dragon/dragonBackend.py    |  2 +-
 .../infrastructure/storage/dragonfeaturestore.py   | 12 ++++--------
 .../mli/infrastructure/worker/torch_worker.py      |  2 +-
 smartsim/_core/mli/infrastructure/worker/worker.py | 14 +++++++++++++-
 smartsim/_core/mli/message_handler.py              |  4 +++-
 5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py
index 52f69ec41f..856de38030 100644
--- a/smartsim/_core/launcher/dragon/dragonBackend.py
+++ b/smartsim/_core/launcher/dragon/dragonBackend.py
@@ -401,7 +401,7 @@ def infra_ddict(self) -> str:
         """
         if self._infra_ddict is None:
             logger.info("Creating DDict")
-            self._infra_ddict = dragon_ddict.DDict()  # todo: parametrize
+            self._infra_ddict = dragon_ddict.DDict(n_nodes=len(self._hosts), total_mem=len(self._hosts)*1024**3)  # todo: parametrize
             logger.info("Created DDict")
             self._infra_ddict["creation"] = str(time.time())
             logger.info(self._infra_ddict["creation"])
diff --git a/smartsim/_core/mli/infrastructure/storage/dragonfeaturestore.py b/smartsim/_core/mli/infrastructure/storage/dragonfeaturestore.py
index ea8f06977d..53f2f461f8 100644
--- a/smartsim/_core/mli/infrastructure/storage/dragonfeaturestore.py
+++ b/smartsim/_core/mli/infrastructure/storage/dragonfeaturestore.py
@@ -47,24 +47,20 @@ def __init__(self, storage: "DragonDict") -> None:
     def __getitem__(self, key: str) -> t.Any:
         """Retrieve an item using key
         :param key: Unique key of an item to retrieve from the feature store"""
-        key_ = key.encode("utf-8")
         try:
-            return self._storage[key_]
+            return self._storage[key]
         except Exception as ex:
             # note: explicitly avoid round-trip to check for key existence
             raise sse.SmartSimError(f"{key} not found in feature store") from ex
 
-    def __setitem__(self, key: str, value: bytes) -> None:
+    def __setitem__(self, key: str, value: str) -> None:
         """Assign a value using key
         :param key: Unique key of an item to set in the feature store
         :param value: Value to persist in the feature store"""
-        key_ = key.encode("utf-8")
-        self._storage[key_] = value
+        self._storage[key] = value
 
-    def __contains__(self, key: t.Union[str, bytes]) -> bool:
+    def __contains__(self, key: t.Union[str]) -> bool:
         """Membership operator to test for a key existing within the feature store.
         Return `True` if the key is found, `False` otherwise
         :param key: Unique key of an item to retrieve from the feature store"""
-        if isinstance(key, str):
-            key = key.encode("utf-8")
         return key in self._storage
diff --git a/smartsim/_core/mli/infrastructure/worker/torch_worker.py b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
index c350499c20..122b9ddf2f 100644
--- a/smartsim/_core/mli/infrastructure/worker/torch_worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
@@ -108,7 +108,7 @@ def transform_output(
     ) -> TransformOutputResult:
         if result_device != "cpu":
             transformed = [
-                item.to("cpu").clone() for item in execute_result.predictions
+                item.to("cpu") for item in execute_result.predictions
             ]
             # todo: need the shape from latest schemas added here.
             return TransformOutputResult(transformed, None, "c", "float32")  # fixme
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index 24dc734d00..40696ac22f 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -32,6 +32,18 @@
 from ...comm.channel.channel import CommChannelBase
 from ...infrastructure.storage.featurestore import FeatureStore
 
+import sys
+
+# isort: off
+try:
+    import dragon
+    from dragon.utils import b64decode
+except ImportError as exc:
+    if not "pytest" in sys.modules:
+        raise exc from None
+
+# isort: on
+
 logger = get_logger(__name__)
 
 
@@ -167,7 +179,7 @@ def fetch_model(
             )
 
         try:
-            raw_bytes = feature_store[request.model_key]
+            raw_bytes = b64decode(feature_store[request.model_key])
             return FetchModelResult(raw_bytes)
         except FileNotFoundError as ex:
             logger.exception(ex)
diff --git a/smartsim/_core/mli/message_handler.py b/smartsim/_core/mli/message_handler.py
index fd8f6aeed7..1928db2f7c 100644
--- a/smartsim/_core/mli/message_handler.py
+++ b/smartsim/_core/mli/message_handler.py
@@ -200,7 +200,9 @@ def _assign_model(
             if isinstance(model, bytes):
                 request.model.modelData = model
             else:
-                request.model.modelKey = model  # type: ignore
+                model_key = data_references_capnp.ModelKey()
+                model_key.key = model
+                request.model.modelKey = model_key  # type: ignore
         except Exception as e:
             raise ValueError("Error building model portion of request.") from e
 

From a2bed267d8dbc1af109cad6708557afb11687d0a Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Fri, 5 Jul 2024 17:45:32 +0200
Subject: [PATCH 21/40] Make style

---
 smartsim/_core/launcher/dragon/dragonBackend.py          | 4 +++-
 smartsim/_core/mli/infrastructure/worker/torch_worker.py | 4 +---
 smartsim/_core/mli/infrastructure/worker/worker.py       | 3 +--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py
index 856de38030..dcc5c8392b 100644
--- a/smartsim/_core/launcher/dragon/dragonBackend.py
+++ b/smartsim/_core/launcher/dragon/dragonBackend.py
@@ -401,7 +401,9 @@ def infra_ddict(self) -> str:
         """
         if self._infra_ddict is None:
             logger.info("Creating DDict")
-            self._infra_ddict = dragon_ddict.DDict(n_nodes=len(self._hosts), total_mem=len(self._hosts)*1024**3)  # todo: parametrize
+            self._infra_ddict = dragon_ddict.DDict(
+                n_nodes=len(self._hosts), total_mem=len(self._hosts) * 1024**3
+            )  # todo: parametrize
             logger.info("Created DDict")
             self._infra_ddict["creation"] = str(time.time())
             logger.info(self._infra_ddict["creation"])
diff --git a/smartsim/_core/mli/infrastructure/worker/torch_worker.py b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
index 122b9ddf2f..28237dc422 100644
--- a/smartsim/_core/mli/infrastructure/worker/torch_worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
@@ -107,9 +107,7 @@ def transform_output(
         result_device: str,
     ) -> TransformOutputResult:
         if result_device != "cpu":
-            transformed = [
-                item.to("cpu") for item in execute_result.predictions
-            ]
+            transformed = [item.to("cpu") for item in execute_result.predictions]
             # todo: need the shape from latest schemas added here.
             return TransformOutputResult(transformed, None, "c", "float32")  # fixme
 
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index 73eff4e8ea..e368935a0d 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -24,6 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import sys
 import typing as t
 from abc import ABC, abstractmethod
 
@@ -33,8 +34,6 @@
 from ...infrastructure.storage.featurestore import FeatureStore
 from ...mli_schemas.model.model_capnp import Model
 
-import sys
-
 # isort: off
 try:
     import dragon

From 36e92d9dabcdd013cdba637a2629e19c15896cb5 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Fri, 5 Jul 2024 18:07:31 +0200
Subject: [PATCH 22/40] Fix typing

---
 .../mli/infrastructure/storage/featurestore.py      |  2 +-
 .../_core/mli/infrastructure/worker/torch_worker.py | 13 ++++++++-----
 smartsim/_core/mli/infrastructure/worker/worker.py  |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/smartsim/_core/mli/infrastructure/storage/featurestore.py b/smartsim/_core/mli/infrastructure/storage/featurestore.py
index ec4086b732..e18643e932 100644
--- a/smartsim/_core/mli/infrastructure/storage/featurestore.py
+++ b/smartsim/_core/mli/infrastructure/storage/featurestore.py
@@ -37,7 +37,7 @@ def __getitem__(self, key: str) -> bytes:
         :param key: Unique key of an item to retrieve from the feature store"""
 
     @abstractmethod
-    def __setitem__(self, key: str, value: bytes) -> None:
+    def __setitem__(self, key: str, value: str) -> None:
         """Assign a value using key
         :param key: Unique key of an item to set in the feature store
         :param value: Value to persist in the feature store"""
diff --git a/smartsim/_core/mli/infrastructure/worker/torch_worker.py b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
index 28237dc422..e21513648b 100644
--- a/smartsim/_core/mli/infrastructure/worker/torch_worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
@@ -53,13 +53,16 @@ class TorchWorker(MachineLearningWorkerBase):
     def load_model(
         request: InferenceRequest, fetch_result: FetchModelResult, device: str
     ) -> LoadModelResult:
-        model_bytes = fetch_result.model_bytes or request.raw_model
-        if not model_bytes:
+        if fetch_result.model_bytes:
+            model_bytes = fetch_result.model_bytes
+        elif request.raw_model and request.raw_model.data:
+            model_bytes = request.raw_model.data
+        else:
             raise ValueError("Unable to load model without reference object")
 
-        _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
-        device = _device_to_torch[device]
-        buffer = io.BytesIO(model_bytes)
+        device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
+        device = device_to_torch[device]
+        buffer = io.BytesIO(initial_bytes=model_bytes)
         model = torch.jit.load(buffer, map_location=device)  # type: ignore
         result = LoadModelResult(model)
         return result
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index e368935a0d..fb061348ee 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -148,7 +148,7 @@ class FetchModelResult:
 
     def __init__(self, result: bytes) -> None:
         """Initialize the object"""
-        self.model_bytes = result
+        self.model_bytes: bytes = result
 
 
 class MachineLearningWorkerCore:

From 59840a3be12576eedce2528d93a8b601a768973e Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Fri, 5 Jul 2024 18:17:18 +0200
Subject: [PATCH 23/40] Fix lint

---
 smartsim/_core/mli/infrastructure/worker/torch_worker.py | 4 ++--
 smartsim/_core/mli/infrastructure/worker/worker.py       | 4 +---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/smartsim/_core/mli/infrastructure/worker/torch_worker.py b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
index e21513648b..a4e725ab99 100644
--- a/smartsim/_core/mli/infrastructure/worker/torch_worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
@@ -73,8 +73,8 @@ def transform_input(
     ) -> TransformInputResult:
         result = []
 
-        _device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
-        device = _device_to_torch[device]
+        device_to_torch = {"cpu": "cpu", "gpu": "cuda"}
+        device = device_to_torch[device]
         if fetch_result.meta is None:
             raise ValueError("Cannot reconstruct tensor without meta information")
         for item, item_meta in zip(fetch_result.inputs, fetch_result.meta):
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index fb061348ee..fe82ea2a3e 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -36,12 +36,10 @@
 
 # isort: off
 try:
-    import dragon
     from dragon.utils import b64decode
 except ImportError as exc:
-    if not "pytest" in sys.modules:
+    if "pytest" not in sys.modules:
         raise exc from None
-
 # isort: on
 
 logger = get_logger(__name__)

From b35b37dd89bf6f7fd7a93c339e79643046d48abe Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Fri, 5 Jul 2024 18:32:00 +0200
Subject: [PATCH 24/40] Remove duplicated/useless comments

---
 smartsim/_core/mli/infrastructure/control/workermanager.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 4e276d2507..f0cae497a0 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -240,7 +240,6 @@ def _on_iteration(self) -> None:
             logger.warning("No queue to check for tasks")
             return
 
-        # perform default deserialization of the message envelope
         # perform default deserialization of the message envelope
         with self._task_queue.recvh(timeout=None) as recvh:
             try:
@@ -254,9 +253,6 @@ def _on_iteration(self) -> None:
         if not self._validate_request(request):
             return
 
-        # # let the worker perform additional custom deserialization
-        # request = self._worker.deserialize(request_bytes)
-
         fetch_model_result = self._worker.fetch_model(request, self._feature_store)
         model_result = self._worker.load_model(
             request, fetch_model_result, self._device
@@ -294,7 +290,6 @@ def _on_iteration(self) -> None:
 
             response = build_reply(reply)
 
-        # serialized = self._worker.serialize_reply(request, transformed_output)
         serialized_resp = MessageHandler.serialize_response(response)  # type: ignore
         if request.callback:
             request.callback.send(serialized_resp)

From 51e0b17bdbf22683759597ece523778b6d7bd953 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Tue, 9 Jul 2024 12:37:22 -0500
Subject: [PATCH 25/40] Bring up to date with new schema

---
 ex/high_throughput_inference/mli_driver.py    |  9 ++-
 ex/high_throughput_inference/mock_app.py      | 30 +++++++++-
 .../standalone_workermanager.py               | 57 +++++++++++++++++--
 smartsim/_core/entrypoints/service.py         |  3 +-
 smartsim/_core/mli/comm/channel/channel.py    |  7 ++-
 .../_core/mli/comm/channel/dragonchannel.py   |  6 ++
 smartsim/_core/mli/comm/channel/dragonfli.py  | 29 ++++++----
 .../infrastructure/control/workermanager.py   | 20 ++-----
 .../_core/mli/infrastructure/worker/worker.py | 11 ++--
 9 files changed, 128 insertions(+), 44 deletions(-)

diff --git a/ex/high_throughput_inference/mli_driver.py b/ex/high_throughput_inference/mli_driver.py
index 9b899f4124..4a3dd034e8 100644
--- a/ex/high_throughput_inference/mli_driver.py
+++ b/ex/high_throughput_inference/mli_driver.py
@@ -1,6 +1,11 @@
+
+
 import os
+import base64
+import cloudpickle
 import sys
 from smartsim import Experiment
+from smartsim._core.mli.infrastructure.worker.torch_worker import TorchWorker
 from smartsim.status import TERMINAL_STATUSES
 import time
 
@@ -14,7 +19,9 @@
 os.makedirs(exp_path, exist_ok=True)
 exp = Experiment("MLI_proto", launcher="dragon", exp_path=exp_path)
 
-worker_manager_rs = exp.create_run_settings(sys.executable, [worker_manager_script_name, "--device", device])
+torch_worker_str = base64.b64encode(cloudpickle.dumps(TorchWorker)).decode("ascii")
+
+worker_manager_rs = exp.create_run_settings(sys.executable, [worker_manager_script_name, "--device", device, "--worker_class", torch_worker_str])
 worker_manager = exp.create_model("worker_manager", run_settings=worker_manager_rs)
 worker_manager.attach_generator_files(to_copy=[worker_manager_script_name])
 
diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index 666d7fcc91..df0ba55c76 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -1,3 +1,29 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 # isort: off
 import dragon
 from dragon import fli
@@ -32,7 +58,7 @@ def __init__(self, timing_on: bool):
         while to_worker_fli_str is None:
             try:
                 to_worker_fli_str = self._ddict["to_worker_fli"]
-                self._to_worker_fli = fli.FLInterface.attach(b64decode(to_worker_fli_str))
+                self._to_worker_fli = fli.FLInterface.attach(to_worker_fli_str)
             except KeyError:
                 time.sleep(1)
         self._from_worker_ch = Channel.make_process_local()
@@ -88,7 +114,7 @@ def run_model(self, model: bytes, batch: torch.Tensor):
         self.measure_time("build_tensor")
         request = MessageHandler.build_request(
             reply_channel=self._from_worker_ch_serialized,
-            model=model,
+            model=MessageHandler.build_model(model, "resnet-50", "1.0"),
             inputs=[built_tensor],
             outputs=[],
             output_descriptors=[],
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
index ccefcbf584..991e869581 100644
--- a/ex/high_throughput_inference/standalone_workermanager.py
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -1,3 +1,29 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 # isort: off
 import dragon
 from dragon import fli
@@ -7,10 +33,12 @@
 from dragon.globalservices.api_setup import connect_to_infrastructure
 # isort: on
 import argparse
+import base64
+import cloudpickle
 import os
 
-
 from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
+from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
 from smartsim._core.mli.infrastructure.worker.torch_worker import TorchWorker
 from smartsim._core.mli.infrastructure.control.workermanager import (
     WorkerManager,
@@ -18,7 +46,23 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("Worker Manager")
-    parser.add_argument("--device", default="gpu")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="gpu",
+        choices="gpu cpu".split(),
+        help="Device on which the inference takes place",
+    )
+    parser.add_argument(
+        "--worker_class",
+        type=str,
+        required=True,
+        help="Serialized class of worker to run",
+    )
+    parser.add_argument(
+        "--num_workers", type=int, default=1, help="Number of workers to run"
+    )
+
     args = parser.parse_args()
     connect_to_infrastructure()
     ddict_str = os.environ["SS_DRG_DDICT"]
@@ -26,12 +70,13 @@
 
     to_worker_channel = Channel.make_process_local()
     to_worker_fli = fli.FLInterface(main_ch=to_worker_channel, manager_ch=None)
-    ddict["to_worker_fli"] = b64encode(to_worker_fli.serialize())
-
-    torch_worker = TorchWorker()
+    to_worker_fli_serialized = to_worker_fli.serialize()
+    ddict["to_worker_fli"] = to_worker_fli_serialized
 
+    torch_worker = cloudpickle.loads(base64.b64decode(args.worker_class.encode('ascii')))()
+    comm_channel = DragonFLIChannel(to_worker_fli_serialized)
     worker_manager = WorkerManager(
-        file_like_interface=to_worker_fli,
+        task_queue=comm_channel,
         worker=torch_worker,
         feature_store=None,
         as_service=True,
diff --git a/smartsim/_core/entrypoints/service.py b/smartsim/_core/entrypoints/service.py
index e03df6bea1..6b4ef74b67 100644
--- a/smartsim/_core/entrypoints/service.py
+++ b/smartsim/_core/entrypoints/service.py
@@ -46,7 +46,8 @@ def __init__(
         :param as_service: Determines if the host will run until shutdown criteria
         are met or as a run-once instance
         :param cooldown: Period of time to allow service to run before automatic
-        shutdown, in seconds. A non-zero, positive integer."""
+        shutdown, in seconds. A non-zero, positive integer.
+        :param loop_delay: delay between iterations of the event loop"""
         self._as_service = as_service
         """If the service should run until shutdown function returns True"""
         self._cooldown = abs(cooldown)
diff --git a/smartsim/_core/mli/comm/channel/channel.py b/smartsim/_core/mli/comm/channel/channel.py
index 201ab9deab..2318896a9b 100644
--- a/smartsim/_core/mli/comm/channel/channel.py
+++ b/smartsim/_core/mli/comm/channel/channel.py
@@ -41,9 +41,14 @@ def __init__(self, descriptor: t.Union[str, bytes]) -> None:
 
     @abstractmethod
     def send(self, value: bytes) -> None:
-        """Send a message throuh the underlying communication channel
+        """Send a message through the underlying communication channel
         :param value: The value to send"""
 
+    @abstractmethod
+    def recv(self) -> bytes:
+        """Receieve a message through the underlying communication channel
+        :returns: the received message"""
+
     @property
     def descriptor(self) -> bytes:
         """Return the channel descriptor for the underlying dragon channel"""
diff --git a/smartsim/_core/mli/comm/channel/dragonchannel.py b/smartsim/_core/mli/comm/channel/dragonchannel.py
index 872eb32350..fb1a0c51c1 100644
--- a/smartsim/_core/mli/comm/channel/dragonchannel.py
+++ b/smartsim/_core/mli/comm/channel/dragonchannel.py
@@ -51,3 +51,9 @@ def send(self, value: bytes) -> None:
         :param value: The value to send"""
         with self._channel.sendh(timeout=None) as sendh:
             sendh.send_bytes(value)
+
+    def recv(self) -> bytes:
+        """Receieve a message through the underlying communication channel
+        :returns: the received message"""
+        with self._channel.recvh(timeout=None) as recvh:
+            return recvh.recv_bytes(timeout=None)
\ No newline at end of file
diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
index 3992241380..ebf824b7db 100644
--- a/smartsim/_core/mli/comm/channel/dragonfli.py
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -24,18 +24,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import sys
-
 # isort: off
-try:
-    from dragon import fli
-    import dragon.channels as dch
-except ImportError as exc:
-    if not "pytest" in sys.modules:
-        raise exc from None
+from dragon import fli
+import dragon.channels as dch
 # isort: on
 
-
+import sys
 import smartsim._core.mli.comm.channel.channel as cch
 from smartsim.log import get_logger
 
@@ -45,14 +39,25 @@
 class DragonFLIChannel(cch.CommChannelBase):
     """Passes messages by writing to a Dragon FLI Channel"""
 
-    def __init__(self, fli_desc: bytes) -> None:
+    def __init__(self, fli_desc: str) -> None:
         """Initialize the DragonFLIChannel instance"""
         super().__init__(fli_desc)
         # todo: do we need memory pool information to construct the channel correctly?
-        self._channel: "dch.Channel" = fli.FLInterface.attach(fli_desc)
+        self._channel: "fli" = fli.FLInterface.attach(fli_desc)
 
     def send(self, value: bytes) -> None:
-        """Send a message throuh the underlying communication channel
+        """Send a message through the underlying communication channel
         :param value: The value to send"""
         with self._channel.sendh(timeout=None) as sendh:
             sendh.send_bytes(value)
+
+    def recv(self) -> bytes:
+        """Receieve a message through the underlying communication channel
+        :returns: the received message"""
+        with self._channel.recvh(timeout=None) as recvh:
+            try:
+                request_bytes: bytes
+                request_bytes, _ = recvh.recv_bytes(timeout=None)
+                return request_bytes
+            except fli.FLIEOT as exc:
+                return b''
\ No newline at end of file
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index f0cae497a0..6f31972727 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -27,14 +27,10 @@
 import sys
 
 # isort: off
-try:
-    import dragon
-    from dragon import fli
-except ImportError as exc:
-    if not "pytest" in sys.modules:
-        raise exc from None
-
+import dragon
+from dragon import fli
 # isort: on
+
 import time
 import typing as t
 
@@ -169,7 +165,7 @@ class WorkerManager(Service):
 
     def __init__(
         self,
-        file_like_interface: "fli.FLInterface",
+        task_queue: CommChannelBase,
         worker: MachineLearningWorkerBase,
         feature_store: t.Optional[FeatureStore] = None,
         as_service: bool = False,
@@ -189,7 +185,7 @@ def __init__(
         super().__init__(as_service, cooldown)
 
         """a collection of workers the manager is controlling"""
-        self._task_queue: fli.FLInterface = file_like_interface
+        self._task_queue: CommChannelBase = task_queue
         """the queue the manager monitors for new tasks"""
         self._feature_store: t.Optional[FeatureStore] = feature_store
         """a feature store to retrieve models from"""
@@ -241,11 +237,7 @@ def _on_iteration(self) -> None:
             return
 
         # perform default deserialization of the message envelope
-        with self._task_queue.recvh(timeout=None) as recvh:
-            try:
-                request_bytes, _ = recvh.recv_bytes(timeout=None)
-            except fli.FLIEOT as exc:
-                return
+        request_bytes = self._task_queue.recv()
 
         request = deserialize_message(
             request_bytes, self._comm_channel_type, self._device
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index fe82ea2a3e..808c9cf9bf 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -24,6 +24,10 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+# isort: off
+from dragon.utils import b64decode
+# isort: on
+
 import sys
 import typing as t
 from abc import ABC, abstractmethod
@@ -34,13 +38,6 @@
 from ...infrastructure.storage.featurestore import FeatureStore
 from ...mli_schemas.model.model_capnp import Model
 
-# isort: off
-try:
-    from dragon.utils import b64decode
-except ImportError as exc:
-    if "pytest" not in sys.modules:
-        raise exc from None
-# isort: on
 
 logger = get_logger(__name__)
 

From 1fcf17d4456f99a6ad34d6360879e2e2a2b24f12 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 10 Jul 2024 11:06:08 -0500
Subject: [PATCH 26/40] Add feature store prototype caching

---
 ex/high_throughput_inference/mli_driver.py    |  7 +-
 ex/high_throughput_inference/mock_app.py      | 19 +++-
 .../standalone_workermanager.py               | 10 +-
 smartsim/_core/entrypoints/service.py         | 17 ++++
 .../_core/mli/comm/channel/dragonchannel.py   |  3 +-
 smartsim/_core/mli/comm/channel/dragonfli.py  |  4 +-
 .../infrastructure/control/workermanager.py   | 96 ++++++++++++++++---
 .../storage/dragonfeaturestore.py             | 15 ++-
 .../infrastructure/storage/featurestore.py    |  5 +-
 .../_core/mli/infrastructure/worker/worker.py | 10 +-
 tests/mli/test_worker_manager.py              |  8 +-
 11 files changed, 147 insertions(+), 47 deletions(-)

diff --git a/ex/high_throughput_inference/mli_driver.py b/ex/high_throughput_inference/mli_driver.py
index 4a3dd034e8..4e68fdfbcb 100644
--- a/ex/high_throughput_inference/mli_driver.py
+++ b/ex/high_throughput_inference/mli_driver.py
@@ -8,6 +8,7 @@
 from smartsim._core.mli.infrastructure.worker.torch_worker import TorchWorker
 from smartsim.status import TERMINAL_STATUSES
 import time
+import typing as t
 
 device = "gpu"
 filedir = os.path.dirname(__file__)
@@ -15,7 +16,11 @@
 app_script_name = os.path.join(filedir, "mock_app.py")
 model_name = os.path.join(filedir, f"resnet50.{device.upper()}.pt")
 
-exp_path = os.path.join(filedir, "MLI_proto")
+transport: t.Literal["hsta", "tcp"] = "hsta"
+
+os.environ["SMARTSIM_DRAGON_TRANSPORT"] = transport
+
+exp_path = os.path.join(filedir, f"MLI_proto_{transport.upper()}")
 os.makedirs(exp_path, exist_ok=True)
 exp = Experiment("MLI_proto", launcher="dragon", exp_path=exp_path)
 
diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index df0ba55c76..4ecce58ac7 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -31,7 +31,7 @@
 import dragon.channels
 from dragon.data.ddict.ddict import DDict
 from dragon.globalservices.api_setup import connect_to_infrastructure
-from dragon.utils import b64decode
+from dragon.utils import b64decode, b64encode
 
 # isort: on
 
@@ -107,7 +107,7 @@ def print_timings(self, to_file: bool = False):
             numpy.savetxt("timings.txt", value_array)
 
 
-    def run_model(self, model: bytes, batch: torch.Tensor):
+    def run_model(self, model: bytes | str, batch: torch.Tensor):
         self.start_timings(batch.shape[0])
         built_tensor = MessageHandler.build_tensor(
             batch.numpy(), "c", "float32", list(batch.shape))
@@ -143,10 +143,14 @@ def run_model(self, model: bytes, batch: torch.Tensor):
         self.end_timings()
         return result
 
+    def set_model(self, key: str, model: bytes):
+        self._ddict[key] = b64encode(model)
+
 
 class ResNetWrapper():
-    def __init__(self, model: str):
+    def __init__(self, name: str, model: str):
         self._model = torch.jit.load(model)
+        self._name = name
         buffer = io.BytesIO()
         scripted = torch.jit.trace(self._model, self.get_batch())
         torch.jit.save(scripted, buffer)
@@ -159,15 +163,20 @@ def get_batch(self, batch_size: int=32):
     def model(self):
         return self._serialized_model
 
+    @property
+    def name(self):
+        return self._name
+
 if __name__ == "__main__":
 
     parser = argparse.ArgumentParser("Mock application")
     parser.add_argument("--device", default="cpu")
     args = parser.parse_args()
 
-    resnet = ResNetWrapper(f"resnet50.{args.device.upper()}.pt")
+    resnet = ResNetWrapper("resnet50", f"resnet50.{args.device.upper()}.pt")
 
     client = ProtoClient(timing_on=True)
+    client.set_model(resnet.name, resnet.model)
 
     total_iterations = 100
 
@@ -175,6 +184,6 @@ def model(self):
         logger.info(f"Batch size: {batch_size}")
         for iteration_number in range(total_iterations + int(batch_size==1)):
             logger.info(f"Iteration: {iteration_number}")
-            client.run_model(resnet.model, resnet.get_batch(batch_size))
+            client.run_model(resnet.name, resnet.get_batch(batch_size))
 
     client.print_timings(to_file=True)
\ No newline at end of file
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
index 991e869581..f3e8e7c589 100644
--- a/ex/high_throughput_inference/standalone_workermanager.py
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -38,11 +38,11 @@
 import os
 
 from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
+from smartsim._core.mli.infrastructure.storage.dragonfeaturestore import DragonFeatureStore
 from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
 from smartsim._core.mli.infrastructure.worker.torch_worker import TorchWorker
-from smartsim._core.mli.infrastructure.control.workermanager import (
-    WorkerManager,
-)
+from smartsim._core.mli.infrastructure.control.workermanager import WorkerManager
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("Worker Manager")
@@ -74,11 +74,13 @@
     ddict["to_worker_fli"] = to_worker_fli_serialized
 
     torch_worker = cloudpickle.loads(base64.b64decode(args.worker_class.encode('ascii')))()
+
+    dfs = DragonFeatureStore(ddict)
     comm_channel = DragonFLIChannel(to_worker_fli_serialized)
     worker_manager = WorkerManager(
         task_queue=comm_channel,
         worker=torch_worker,
-        feature_store=None,
+        feature_store=dfs,
         as_service=True,
         cooldown=10,
         comm_channel_type=DragonCommChannel,
diff --git a/smartsim/_core/entrypoints/service.py b/smartsim/_core/entrypoints/service.py
index 6b4ef74b67..df9c2bbef6 100644
--- a/smartsim/_core/entrypoints/service.py
+++ b/smartsim/_core/entrypoints/service.py
@@ -103,6 +103,23 @@ def execute(self) -> None:
         running = True
         cooldown_start: t.Optional[datetime.datetime] = None
 
+        headers = [
+            "batch_size",
+            "w_deserialize",
+            "w_fetch_model",
+            "w_load_model",
+            "w_fetch_input",
+            "w_transform_input",
+            "w_execute",
+            "w_transform_output",
+            "w_assign_output",
+            "w_build_reply",
+            "w_serialize_resp",
+            "w_send",
+        ]
+
+        print(",".join(headers))
+
         while running:
             self._on_iteration()
 
diff --git a/smartsim/_core/mli/comm/channel/dragonchannel.py b/smartsim/_core/mli/comm/channel/dragonchannel.py
index fb1a0c51c1..1409747a91 100644
--- a/smartsim/_core/mli/comm/channel/dragonchannel.py
+++ b/smartsim/_core/mli/comm/channel/dragonchannel.py
@@ -56,4 +56,5 @@ def recv(self) -> bytes:
         """Receieve a message through the underlying communication channel
         :returns: the received message"""
         with self._channel.recvh(timeout=None) as recvh:
-            return recvh.recv_bytes(timeout=None)
\ No newline at end of file
+            message_bytes: bytes = recvh.recv_bytes(timeout=None)
+            return message_bytes
diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
index ebf824b7db..0c1aba94e3 100644
--- a/smartsim/_core/mli/comm/channel/dragonfli.py
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -27,9 +27,11 @@
 # isort: off
 from dragon import fli
 import dragon.channels as dch
+
 # isort: on
 
 import sys
+
 import smartsim._core.mli.comm.channel.channel as cch
 from smartsim.log import get_logger
 
@@ -60,4 +62,4 @@ def recv(self) -> bytes:
                 request_bytes, _ = recvh.recv_bytes(timeout=None)
                 return request_bytes
             except fli.FLIEOT as exc:
-                return b''
\ No newline at end of file
+                return b""
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 6f31972727..d3cc2d84ae 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -29,6 +29,7 @@
 # isort: off
 import dragon
 from dragon import fli
+
 # isort: on
 
 import time
@@ -36,18 +37,20 @@
 
 import numpy as np
 
-from smartsim._core.entrypoints.service import Service
-from smartsim._core.mli.comm.channel.channel import CommChannelBase
-from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
-from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
-from smartsim._core.mli.infrastructure.worker.worker import (
+from .....error import SmartSimError
+from .....log import get_logger
+from ....entrypoints.service import Service
+from ...comm.channel.channel import CommChannelBase
+from ...comm.channel.dragonfli import DragonFLIChannel
+from ...infrastructure.storage.featurestore import FeatureStore
+from ...infrastructure.worker.worker import (
     InferenceReply,
     InferenceRequest,
+    LoadModelResult,
     MachineLearningWorkerBase,
 )
-from smartsim._core.mli.message_handler import MessageHandler
-from smartsim._core.mli.mli_schemas.response.response_capnp import Response
-from smartsim.log import get_logger
+from ...message_handler import MessageHandler
+from ...mli_schemas.response.response_capnp import Response
 
 if t.TYPE_CHECKING:
     from smartsim._core.mli.mli_schemas.model.model_capnp import Model
@@ -195,6 +198,8 @@ def __init__(
         """The type of communication channel to construct for callbacks"""
         self._device = device
         """Device on which workers need to run"""
+        self._cached_models: dict[str, t.Any] = {}
+        """Dictionary of previously loaded models"""
 
     def _validate_request(self, request: InferenceRequest) -> bool:
         """Ensure the request can be processed.
@@ -236,34 +241,84 @@ def _on_iteration(self) -> None:
             logger.warning("No queue to check for tasks")
             return
 
+        timings = []
         # perform default deserialization of the message envelope
-        request_bytes = self._task_queue.recv()
+        request_bytes: bytes = self._task_queue.recv()
 
+        interm = time.perf_counter()
         request = deserialize_message(
             request_bytes, self._comm_channel_type, self._device
         )
         if not self._validate_request(request):
             return
 
-        fetch_model_result = self._worker.fetch_model(request, self._feature_store)
-        model_result = self._worker.load_model(
-            request, fetch_model_result, self._device
-        )
+        timings.append(time.perf_counter() - interm)
+        interm = time.perf_counter()
+
+        if not request.raw_model:
+            if not request.model_key:
+                raise SmartSimError("Neither key, nor model provided")
+
+            if request.model_key in self._cached_models:
+                timings.append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+                model_result = LoadModelResult(self._cached_models[request.model_key])
+
+            else:
+                fetch_model_result = None
+                while True:
+                    try:
+                        interm = time.perf_counter()
+                        fetch_model_result = self._worker.fetch_model(
+                            request, self._feature_store
+                        )
+                    except KeyError:
+                        time.sleep(0.1)
+                    else:
+                        break
+
+                if fetch_model_result is None:
+                    raise SmartSimError("Could not retrieve model from feature store")
+                timings.append(time.perf_counter() - interm)
+                interm = time.perf_counter()
+                model_result = self._worker.load_model(
+                    request, fetch_model_result, self._device
+                )
+                self._cached_models[request.model_key] = model_result.model
+        else:
+            fetch_model_result = self._worker.fetch_model(request, None)
+            model_result = self._worker.load_model(
+                request, fetch_result=fetch_model_result, device=self._device
+            )
+
+        timings.append(time.perf_counter() - interm)
+        interm = time.perf_counter()
         fetch_input_result = self._worker.fetch_inputs(request, self._feature_store)
+
+        timings.append(time.perf_counter() - interm)
+        interm = time.perf_counter()
         transformed_input = self._worker.transform_input(
             request, fetch_input_result, self._device
         )
 
+        timings.append(time.perf_counter() - interm)
+        interm = time.perf_counter()
+
         reply = InferenceReply()
 
         try:
             execute_result = self._worker.execute(
                 request, model_result, transformed_input
             )
+
+            timings.append(time.perf_counter() - interm)
+            interm = time.perf_counter()
             transformed_output = self._worker.transform_output(
                 request, execute_result, self._device
             )
 
+            timings.append(time.perf_counter() - interm)
+            interm = time.perf_counter()
             if request.output_keys:
                 reply.output_keys = self._worker.place_output(
                     request, transformed_output, self._feature_store
@@ -274,6 +329,9 @@ def _on_iteration(self) -> None:
             logger.exception("Error executing worker")
             reply.failed = True
 
+        timings.append(time.perf_counter() - interm)
+        interm = time.perf_counter()
+
         if reply.failed:
             response = build_failure_reply("fail", "failure-occurred")
         else:
@@ -282,10 +340,22 @@ def _on_iteration(self) -> None:
 
             response = build_reply(reply)
 
+        timings.append(time.perf_counter() - interm)
+        interm = time.perf_counter()
+
+        # serialized = self._worker.serialize_reply(request, transformed_output)
         serialized_resp = MessageHandler.serialize_response(response)  # type: ignore
+
+        timings.append(time.perf_counter() - interm)
+        interm = time.perf_counter()
         if request.callback:
             request.callback.send(serialized_resp)
 
+        timings.append(time.perf_counter() - interm)
+        interm = time.perf_counter()
+
+        print(" ".join(str(time) for time in timings))
+
     def _can_shutdown(self) -> bool:
         """Return true when the criteria to shut down the service are met."""
         # todo: determine shutdown criteria
diff --git a/smartsim/_core/mli/infrastructure/storage/dragonfeaturestore.py b/smartsim/_core/mli/infrastructure/storage/dragonfeaturestore.py
index 53f2f461f8..fbd18438f5 100644
--- a/smartsim/_core/mli/infrastructure/storage/dragonfeaturestore.py
+++ b/smartsim/_core/mli/infrastructure/storage/dragonfeaturestore.py
@@ -44,22 +44,27 @@ def __init__(self, storage: "DragonDict") -> None:
         """Initialize the DragonFeatureStore instance"""
         self._storage = storage
 
-    def __getitem__(self, key: str) -> t.Any:
+    def __getitem__(self, key: str) -> t.Union[str, bytes]:
         """Retrieve an item using key
         :param key: Unique key of an item to retrieve from the feature store"""
         try:
-            return self._storage[key]
+            value: t.Union[str, bytes] = self._storage[key]
+            return value
+        except KeyError as ex:
+            raise ex
         except Exception as ex:
             # note: explicitly avoid round-trip to check for key existence
-            raise sse.SmartSimError(f"{key} not found in feature store") from ex
+            raise sse.SmartSimError(
+                f"Could not get value for existing key {key}, error:\n{ex}"
+            ) from ex
 
-    def __setitem__(self, key: str, value: str) -> None:
+    def __setitem__(self, key: str, value: t.Union[str, bytes]) -> None:
         """Assign a value using key
         :param key: Unique key of an item to set in the feature store
         :param value: Value to persist in the feature store"""
         self._storage[key] = value
 
-    def __contains__(self, key: t.Union[str]) -> bool:
+    def __contains__(self, key: str) -> bool:
         """Membership operator to test for a key existing within the feature store.
         Return `True` if the key is found, `False` otherwise
         :param key: Unique key of an item to retrieve from the feature store"""
diff --git a/smartsim/_core/mli/infrastructure/storage/featurestore.py b/smartsim/_core/mli/infrastructure/storage/featurestore.py
index e18643e932..553e13b10f 100644
--- a/smartsim/_core/mli/infrastructure/storage/featurestore.py
+++ b/smartsim/_core/mli/infrastructure/storage/featurestore.py
@@ -24,6 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import typing as t
 from abc import ABC, abstractmethod
 
 
@@ -32,12 +33,12 @@ class FeatureStore(ABC):
     values from a feature store implementation"""
 
     @abstractmethod
-    def __getitem__(self, key: str) -> bytes:
+    def __getitem__(self, key: str) -> t.Union[str, bytes]:
         """Retrieve an item using key
         :param key: Unique key of an item to retrieve from the feature store"""
 
     @abstractmethod
-    def __setitem__(self, key: str, value: str) -> None:
+    def __setitem__(self, key: str, value: t.Union[str, bytes]) -> None:
         """Assign a value using key
         :param key: Unique key of an item to set in the feature store
         :param value: Value to persist in the feature store"""
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index 808c9cf9bf..900a8241de 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -24,11 +24,6 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# isort: off
-from dragon.utils import b64decode
-# isort: on
-
-import sys
 import typing as t
 from abc import ABC, abstractmethod
 
@@ -38,7 +33,6 @@
 from ...infrastructure.storage.featurestore import FeatureStore
 from ...mli_schemas.model.model_capnp import Model
 
-
 logger = get_logger(__name__)
 
 
@@ -174,7 +168,7 @@ def fetch_model(
             )
 
         try:
-            raw_bytes = b64decode(feature_store[request.model_key])
+            raw_bytes: bytes = t.cast(bytes, feature_store[request.model_key])
             return FetchModelResult(raw_bytes)
         except FileNotFoundError as ex:
             logger.exception(ex)
@@ -202,7 +196,7 @@ def fetch_inputs(
             data: t.List[bytes] = []
             for input_ in request.input_keys:
                 try:
-                    tensor_bytes = feature_store[input_]
+                    tensor_bytes = t.cast(bytes, feature_store[input_])
                     data.append(tensor_bytes)
                 except KeyError as ex:
                     logger.exception(ex)
diff --git a/tests/mli/test_worker_manager.py b/tests/mli/test_worker_manager.py
index 46cae5b2e4..62bd711ebb 100644
--- a/tests/mli/test_worker_manager.py
+++ b/tests/mli/test_worker_manager.py
@@ -32,11 +32,7 @@
 
 import pytest
 
-should_run = True
-try:
-    import torch
-except ImportError:
-    should_run = False
+pytest.importorskip("torch")
 
 from smartsim._core.mli.infrastructure.control.workermanager import WorkerManager
 from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
@@ -51,8 +47,6 @@
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
 
-pytest.mark.skipif(not should_run, "Test needs PyTorch to run")
-
 
 def mock_work(worker_manager_queue: "mp.Queue[bytes]") -> None:
     """Mock event producer for triggering the inference pipeline"""

From d76f88014cebe7a76175b06178d27ca32195841d Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Wed, 10 Jul 2024 13:10:08 -0500
Subject: [PATCH 27/40] Add redis driver, fix FLI

---
 ex/high_throughput_inference/mock_app.py      | 10 ++-
 .../mock_app_redis.py                         | 88 +++++++++++++++++++
 ex/high_throughput_inference/redis_driver.py  | 65 ++++++++++++++
 smartsim/_core/mli/comm/channel/dragonfli.py  | 12 ++-
 .../infrastructure/control/workermanager.py   |  2 +-
 5 files changed, 170 insertions(+), 7 deletions(-)
 create mode 100644 ex/high_throughput_inference/mock_app_redis.py
 create mode 100644 ex/high_throughput_inference/redis_driver.py

diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index 4ecce58ac7..45246db2e5 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -112,9 +112,14 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
         built_tensor = MessageHandler.build_tensor(
             batch.numpy(), "c", "float32", list(batch.shape))
         self.measure_time("build_tensor")
+        built_model = None
+        if isinstance(model, str):
+            model_arg = MessageHandler.build_model_key(model)
+        else:
+            model_arg = MessageHandler.build_model(model, "resnet-50", "1.0")
         request = MessageHandler.build_request(
             reply_channel=self._from_worker_ch_serialized,
-            model=MessageHandler.build_model(model, "resnet-50", "1.0"),
+            model= model_arg,
             inputs=[built_tensor],
             outputs=[],
             output_descriptors=[],
@@ -125,6 +130,7 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
         self.measure_time("serialize_request")
         with self._to_worker_fli.sendh(timeout=None, stream_channel=self._to_worker_ch) as to_sendh:
             to_sendh.send_bytes(request_bytes)
+        logger.info(f"Message size: {len(request_bytes)} bytes")
 
         self.measure_time("send")
         with self._from_worker_ch.recvh(timeout=None) as from_recvh:
@@ -144,7 +150,7 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
         return result
 
     def set_model(self, key: str, model: bytes):
-        self._ddict[key] = b64encode(model)
+        self._ddict[key] = model
 
 
 class ResNetWrapper():
diff --git a/ex/high_throughput_inference/mock_app_redis.py b/ex/high_throughput_inference/mock_app_redis.py
new file mode 100644
index 0000000000..c56b4fb8b4
--- /dev/null
+++ b/ex/high_throughput_inference/mock_app_redis.py
@@ -0,0 +1,88 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import io
+import numpy
+import time
+import torch
+from smartsim.log import get_logger
+from smartredis import Client
+
+logger = get_logger("App")
+
+class ResNetWrapper():
+    def __init__(self, name: str, model: str):
+        self._model = torch.jit.load(model)
+        self._name = name
+        buffer = io.BytesIO()
+        scripted = torch.jit.trace(self._model, self.get_batch())
+        torch.jit.save(scripted, buffer)
+        self._serialized_model = buffer.getvalue()
+
+    def get_batch(self, batch_size: int=32):
+        return torch.randn((batch_size, 3, 224, 224), dtype=torch.float32)
+
+    @property
+    def model(self):
+        return self._serialized_model
+
+    @property
+    def name(self):
+        return self._name
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser("Mock application")
+    parser.add_argument("--device", default="cpu")
+    args = parser.parse_args()
+
+    resnet = ResNetWrapper("resnet50", f"resnet50.{args.device.upper()}.pt")
+
+    client = Client(cluster=False, address=None)
+    client.set_model(resnet.name, resnet.model, backend='TORCH', device=args.device.upper())
+
+    total_iterations = 100
+    timings=[]
+    for batch_size in [1, 2, 4, 8, 16, 32, 64, 128]:
+        logger.info(f"Batch size: {batch_size}")
+        for iteration_number in range(total_iterations + int(batch_size==1)):
+            timing = [batch_size]
+            logger.info(f"Iteration: {iteration_number}")
+            start = time.perf_counter()
+            client.put_tensor(name="batch", data=resnet.get_batch(batch_size).numpy())
+            client.run_model(name=resnet.name, inputs=["batch"], outputs=["result"])
+            result = client.get_tensor(name="result")
+            end = time.perf_counter()
+            timing.append(end-start)
+            timings.append(timing)
+
+
+
+    timings_np = numpy.asarray(timings)
+    numpy.save("timings.npy", timings_np)
+    for timing in timings:
+        print(" ".join(str(t) for t in timing))
diff --git a/ex/high_throughput_inference/redis_driver.py b/ex/high_throughput_inference/redis_driver.py
new file mode 100644
index 0000000000..ceddba4ef7
--- /dev/null
+++ b/ex/high_throughput_inference/redis_driver.py
@@ -0,0 +1,65 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+from smartsim import Experiment
+from smartsim.status import TERMINAL_STATUSES
+import time
+import typing as t
+
+device = "gpu"
+filedir = os.path.dirname(__file__)
+app_script_name = os.path.join(filedir, "mock_app_redis.py")
+model_name = os.path.join(filedir, f"resnet50.{device.upper()}.pt")
+
+
+exp_path = os.path.join(filedir, "redis_ai")
+os.makedirs(exp_path, exist_ok=True)
+exp = Experiment("redis_ai", launcher="slurm", exp_path=exp_path)
+
+db = exp.create_database(interface="hsn0")
+
+app_rs = exp.create_run_settings(sys.executable, exe_args = [app_script_name, "--device", device])
+app_rs.set_nodes(1)
+app_rs.set_tasks(1)
+app = exp.create_model("app", run_settings=app_rs)
+app.attach_generator_files(to_copy=[app_script_name], to_symlink=[model_name])
+
+exp.generate(db, app, overwrite=True)
+
+exp.start(db, app, block=False)
+
+while True:
+    if exp.get_status(app)[0] in TERMINAL_STATUSES:
+        exp.stop(db)
+        break
+    if exp.get_status(db)[0] in TERMINAL_STATUSES:
+        exp.stop(app)
+        break
+    time.sleep(5)
+
+print("Exiting.")
\ No newline at end of file
diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
index 0c1aba94e3..eb3175e445 100644
--- a/smartsim/_core/mli/comm/channel/dragonfli.py
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -31,6 +31,7 @@
 # isort: on
 
 import sys
+import typing as t
 
 import smartsim._core.mli.comm.channel.channel as cch
 from smartsim.log import get_logger
@@ -41,22 +42,25 @@
 class DragonFLIChannel(cch.CommChannelBase):
     """Passes messages by writing to a Dragon FLI Channel"""
 
-    def __init__(self, fli_desc: str) -> None:
+    def __init__(self, fli_desc: str, sender_supplied: bool = True) -> None:
         """Initialize the DragonFLIChannel instance"""
         super().__init__(fli_desc)
         # todo: do we need memory pool information to construct the channel correctly?
-        self._channel: "fli" = fli.FLInterface.attach(fli_desc)
+        self._fli: "fli" = fli.FLInterface.attach(fli_desc)
+        self._channel: t.Optional["dch"] = (
+            dch.Channel.make_process_local() if sender_supplied else None
+        )
 
     def send(self, value: bytes) -> None:
         """Send a message through the underlying communication channel
         :param value: The value to send"""
-        with self._channel.sendh(timeout=None) as sendh:
+        with self._fli.sendh(timeout=None, stream_channel=self._channel) as sendh:
             sendh.send_bytes(value)
 
     def recv(self) -> bytes:
         """Receieve a message through the underlying communication channel
         :returns: the received message"""
-        with self._channel.recvh(timeout=None) as recvh:
+        with self._fli.recvh(timeout=None) as recvh:
             try:
                 request_bytes: bytes
                 request_bytes, _ = recvh.recv_bytes(timeout=None)
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index d3cc2d84ae..60e263f337 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -259,7 +259,7 @@ def _on_iteration(self) -> None:
             if not request.model_key:
                 raise SmartSimError("Neither key, nor model provided")
 
-            if request.model_key in self._cached_models:
+            if False and (request.model_key in self._cached_models):
                 timings.append(time.perf_counter() - interm)
                 interm = time.perf_counter()
                 model_result = LoadModelResult(self._cached_models[request.model_key])

From 3938ec8dbe9964235e6ed4791600257b08b9f3eb Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Thu, 11 Jul 2024 12:27:34 -0500
Subject: [PATCH 28/40] Update post-merge

---
 ex/high_throughput_inference/mli_driver.py    |  1 -
 .../standalone_workermanager.py               | 11 ++-
 .../infrastructure/control/workermanager.py   | 68 +++++++++----------
 .../mli/infrastructure/environmentloader.py   | 11 +--
 4 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/ex/high_throughput_inference/mli_driver.py b/ex/high_throughput_inference/mli_driver.py
index 4e68fdfbcb..6da559aa6f 100644
--- a/ex/high_throughput_inference/mli_driver.py
+++ b/ex/high_throughput_inference/mli_driver.py
@@ -30,7 +30,6 @@
 worker_manager = exp.create_model("worker_manager", run_settings=worker_manager_rs)
 worker_manager.attach_generator_files(to_copy=[worker_manager_script_name])
 
-
 app_rs = exp.create_run_settings(sys.executable, exe_args = [app_script_name, "--device", device])
 app = exp.create_model("app", run_settings=app_rs)
 app.attach_generator_files(to_copy=[app_script_name], to_symlink=[model_name])
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
index f3e8e7c589..c56e11a7c3 100644
--- a/ex/high_throughput_inference/standalone_workermanager.py
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -35,6 +35,7 @@
 import argparse
 import base64
 import cloudpickle
+import pickle
 import os
 
 from smartsim._core.mli.comm.channel.dragonchannel import DragonCommChannel
@@ -42,6 +43,7 @@
 from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
 from smartsim._core.mli.infrastructure.worker.torch_worker import TorchWorker
 from smartsim._core.mli.infrastructure.control.workermanager import WorkerManager
+from smartsim._core.mli.infrastructure.environmentloader import EnvironmentConfigLoader
 
 
 if __name__ == "__main__":
@@ -77,10 +79,15 @@
 
     dfs = DragonFeatureStore(ddict)
     comm_channel = DragonFLIChannel(to_worker_fli_serialized)
+
+    os.environ["SSFeatureStore"] = base64.b64encode(pickle.dumps(dfs)).decode("utf-8")
+    os.environ["SSQueue"] = base64.b64encode(to_worker_fli_serialized).decode("utf-8")
+
+    config_loader = EnvironmentConfigLoader()
+
     worker_manager = WorkerManager(
-        task_queue=comm_channel,
+        config_loader=config_loader,
         worker=torch_worker,
-        feature_store=dfs,
         as_service=True,
         cooldown=10,
         comm_channel_type=DragonCommChannel,
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index eaa77bdf3e..8c06351fb5 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -41,7 +41,7 @@
 from .....log import get_logger
 from ....entrypoints.service import Service
 from ...comm.channel.channel import CommChannelBase
-from ...comm.channel.dragonfli import DragonFLIChannel
+from ...comm.channel.dragonchannel import DragonCommChannel
 from ...infrastructure.environmentloader import EnvironmentConfigLoader
 from ...infrastructure.storage.featurestore import FeatureStore
 from ...infrastructure.worker.worker import (
@@ -175,7 +175,7 @@ def __init__(
         worker: MachineLearningWorkerBase,
         as_service: bool = False,
         cooldown: int = 0,
-        comm_channel_type: t.Type[CommChannelBase] = DragonFLIChannel,
+        comm_channel_type: t.Type[CommChannelBase] = DragonCommChannel,
         device: t.Literal["cpu", "gpu"] = "cpu",
     ) -> None:
         """Initialize the WorkerManager
@@ -244,34 +244,34 @@ def _on_iteration(self) -> None:
             logger.warning("No queue to check for tasks")
             return
 
-        timings = []
+        timings = []  # timing
         # perform default deserialization of the message envelope
         request_bytes: bytes = self._task_queue.recv()
 
-        interm = time.perf_counter()
+        interm = time.perf_counter()  # timing
         request = deserialize_message(
             request_bytes, self._comm_channel_type, self._device
         )
         if not self._validate_request(request):
             return
 
-        timings.append(time.perf_counter() - interm)
-        interm = time.perf_counter()
+        timings.append(time.perf_counter() - interm)  # timing
+        interm = time.perf_counter()  # timing
 
         if not request.raw_model:
-            if not request.model_key:
-                raise SmartSimError("Neither key, nor model provided")
-
-            if False and (request.model_key in self._cached_models):
-                timings.append(time.perf_counter() - interm)
-                interm = time.perf_counter()
+            if request.model_key is None:
+                # A valid request should never get here.
+                raise ValueError("Could not read model key")
+            if request.model_key in self._cached_models:
+                timings.append(time.perf_counter() - interm)  # timing
+                interm = time.perf_counter()  # timing
                 model_result = LoadModelResult(self._cached_models[request.model_key])
 
             else:
                 fetch_model_result = None
                 while True:
                     try:
-                        interm = time.perf_counter()
+                        interm = time.perf_counter()  # timing
                         fetch_model_result = self._worker.fetch_model(
                             request, self._feature_store
                         )
@@ -282,8 +282,8 @@ def _on_iteration(self) -> None:
 
                 if fetch_model_result is None:
                     raise SmartSimError("Could not retrieve model from feature store")
-                timings.append(time.perf_counter() - interm)
-                interm = time.perf_counter()
+                timings.append(time.perf_counter() - interm)  # timing
+                interm = time.perf_counter()  # timing
                 model_result = self._worker.load_model(
                     request, fetch_model_result, self._device
                 )
@@ -294,18 +294,18 @@ def _on_iteration(self) -> None:
                 request, fetch_result=fetch_model_result, device=self._device
             )
 
-        timings.append(time.perf_counter() - interm)
-        interm = time.perf_counter()
+        timings.append(time.perf_counter() - interm)  # timing
+        interm = time.perf_counter()  # timing
         fetch_input_result = self._worker.fetch_inputs(request, self._feature_store)
 
-        timings.append(time.perf_counter() - interm)
-        interm = time.perf_counter()
+        timings.append(time.perf_counter() - interm)  # timing
+        interm = time.perf_counter()  # timing
         transformed_input = self._worker.transform_input(
             request, fetch_input_result, self._device
         )
 
-        timings.append(time.perf_counter() - interm)
-        interm = time.perf_counter()
+        timings.append(time.perf_counter() - interm)  # timing
+        interm = time.perf_counter()  # timing
 
         reply = InferenceReply()
 
@@ -314,14 +314,14 @@ def _on_iteration(self) -> None:
                 request, model_result, transformed_input
             )
 
-            timings.append(time.perf_counter() - interm)
-            interm = time.perf_counter()
+            timings.append(time.perf_counter() - interm)  # timing
+            interm = time.perf_counter()  # timing
             transformed_output = self._worker.transform_output(
                 request, execute_result, self._device
             )
 
-            timings.append(time.perf_counter() - interm)
-            interm = time.perf_counter()
+            timings.append(time.perf_counter() - interm)  # timing
+            interm = time.perf_counter()  # timing
             if request.output_keys:
                 reply.output_keys = self._worker.place_output(
                     request, transformed_output, self._feature_store
@@ -332,8 +332,8 @@ def _on_iteration(self) -> None:
             logger.exception("Error executing worker")
             reply.failed = True
 
-        timings.append(time.perf_counter() - interm)
-        interm = time.perf_counter()
+        timings.append(time.perf_counter() - interm)  # timing
+        interm = time.perf_counter()  # timing
 
         if reply.failed:
             response = build_failure_reply("fail", "failure-occurred")
@@ -343,21 +343,21 @@ def _on_iteration(self) -> None:
 
             response = build_reply(reply)
 
-        timings.append(time.perf_counter() - interm)
-        interm = time.perf_counter()
+        timings.append(time.perf_counter() - interm)  # timing
+        interm = time.perf_counter()  # timing
 
         # serialized = self._worker.serialize_reply(request, transformed_output)
         serialized_resp = MessageHandler.serialize_response(response)  # type: ignore
 
-        timings.append(time.perf_counter() - interm)
-        interm = time.perf_counter()
+        timings.append(time.perf_counter() - interm)  # timing
+        interm = time.perf_counter()  # timing
         if request.callback:
             request.callback.send(serialized_resp)
 
-        timings.append(time.perf_counter() - interm)
-        interm = time.perf_counter()
+        timings.append(time.perf_counter() - interm)  # timing
+        interm = time.perf_counter()  # timing
 
-        print(" ".join(str(time) for time in timings))
+        print(" ".join(str(time) for time in timings))  # timing
 
     def _can_shutdown(self) -> bool:
         """Return true when the criteria to shut down the service are met."""
diff --git a/smartsim/_core/mli/infrastructure/environmentloader.py b/smartsim/_core/mli/infrastructure/environmentloader.py
index 267b668f63..f5e9532103 100644
--- a/smartsim/_core/mli/infrastructure/environmentloader.py
+++ b/smartsim/_core/mli/infrastructure/environmentloader.py
@@ -32,6 +32,7 @@
 from dragon.fli import FLInterface  # pylint: disable=all
 
 from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
+from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
 
 
 class EnvironmentConfigLoader:
@@ -41,10 +42,10 @@ class EnvironmentConfigLoader:
     """
 
     def __init__(self) -> None:
-        self._feature_store_descriptor = os.getenv("SSFeatureStore", None)
-        self._queue_descriptor = os.getenv("SSQueue", None)
+        self._feature_store_descriptor: t.Optional[str] = os.getenv("SSFeatureStore", None)
+        self._queue_descriptor: t.Optional[str] = os.getenv("SSQueue", None)
         self.feature_store: t.Optional[FeatureStore] = None
-        self.queue: t.Optional["FLInterface"] = None
+        self.queue: t.Optional[DragonFLIChannel] = None
 
     def get_feature_store(self) -> t.Optional[FeatureStore]:
         """Loads the Feature Store previously set in SSFeatureStore"""
@@ -54,8 +55,8 @@ def get_feature_store(self) -> t.Optional[FeatureStore]:
             )
         return self.feature_store
 
-    def get_queue(self) -> t.Optional["FLInterface"]:
+    def get_queue(self, sender_supplied: bool = True) -> t.Optional[DragonFLIChannel]:
         """Returns the Queue previously set in SSQueue"""
         if self._queue_descriptor is not None:
-            self.queue = FLInterface.attach(base64.b64decode(self._queue_descriptor))
+            self.queue = DragonFLIChannel(fli_desc=base64.b64decode(self._queue_descriptor), sender_supplied=sender_supplied)
         return self.queue

From 273a7d952fdcaa89984b654ce4b46c272c1c2bbd Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Thu, 11 Jul 2024 13:15:38 -0500
Subject: [PATCH 29/40] Fix typing

---
 smartsim/_core/mli/comm/channel/dragonfli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
index eb3175e445..75f8fb4bfc 100644
--- a/smartsim/_core/mli/comm/channel/dragonfli.py
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -42,7 +42,7 @@
 class DragonFLIChannel(cch.CommChannelBase):
     """Passes messages by writing to a Dragon FLI Channel"""
 
-    def __init__(self, fli_desc: str, sender_supplied: bool = True) -> None:
+    def __init__(self, fli_desc: bytes, sender_supplied: bool = True) -> None:
         """Initialize the DragonFLIChannel instance"""
         super().__init__(fli_desc)
         # todo: do we need memory pool information to construct the channel correctly?

From a12d9232914ff9c2cf8def6224a3bb08896b80d9 Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Thu, 11 Jul 2024 13:50:35 -0500
Subject: [PATCH 30/40] isort

---
 .../_core/mli/infrastructure/environmentloader.py     | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/smartsim/_core/mli/infrastructure/environmentloader.py b/smartsim/_core/mli/infrastructure/environmentloader.py
index f5e9532103..9f6770623d 100644
--- a/smartsim/_core/mli/infrastructure/environmentloader.py
+++ b/smartsim/_core/mli/infrastructure/environmentloader.py
@@ -31,8 +31,8 @@
 
 from dragon.fli import FLInterface  # pylint: disable=all
 
-from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
 from smartsim._core.mli.comm.channel.dragonfli import DragonFLIChannel
+from smartsim._core.mli.infrastructure.storage.featurestore import FeatureStore
 
 
 class EnvironmentConfigLoader:
@@ -42,7 +42,9 @@ class EnvironmentConfigLoader:
     """
 
     def __init__(self) -> None:
-        self._feature_store_descriptor: t.Optional[str] = os.getenv("SSFeatureStore", None)
+        self._feature_store_descriptor: t.Optional[str] = os.getenv(
+            "SSFeatureStore", None
+        )
         self._queue_descriptor: t.Optional[str] = os.getenv("SSQueue", None)
         self.feature_store: t.Optional[FeatureStore] = None
         self.queue: t.Optional[DragonFLIChannel] = None
@@ -58,5 +60,8 @@ def get_feature_store(self) -> t.Optional[FeatureStore]:
     def get_queue(self, sender_supplied: bool = True) -> t.Optional[DragonFLIChannel]:
         """Returns the Queue previously set in SSQueue"""
         if self._queue_descriptor is not None:
-            self.queue = DragonFLIChannel(fli_desc=base64.b64decode(self._queue_descriptor), sender_supplied=sender_supplied)
+            self.queue = DragonFLIChannel(
+                fli_desc=base64.b64decode(self._queue_descriptor),
+                sender_supplied=sender_supplied,
+            )
         return self.queue

From 38b0de15266288b4a959bbbcb244e131407555ea Mon Sep 17 00:00:00 2001
From: Al Rigazzi <al.rigazzi@hpe.com>
Date: Thu, 11 Jul 2024 14:42:16 -0500
Subject: [PATCH 31/40] Update envloader test

---
 tests/dragon/test_environment_loader.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/dragon/test_environment_loader.py b/tests/dragon/test_environment_loader.py
index d339fec885..00db0a9d32 100644
--- a/tests/dragon/test_environment_loader.py
+++ b/tests/dragon/test_environment_loader.py
@@ -64,10 +64,9 @@ def test_environment_loader_attach_FLI(content, monkeypatch):
     config = EnvironmentConfigLoader()
     config_queue = config.get_queue()
 
-    new_sender = config_queue.sendh(use_main_as_stream_channel=True)
-    new_sender.send_bytes(content)
+    new_sender = config_queue.send(content)
 
-    old_recv = queue.recvh(use_main_as_stream_channel=True)
+    old_recv = queue.recvh()
     result, _ = old_recv.recv_bytes()
     assert result == content
 
@@ -81,7 +80,7 @@ def test_environment_loader_serialize_FLI(monkeypatch):
 
     config = EnvironmentConfigLoader()
     config_queue = config.get_queue()
-    assert config_queue.serialize() == queue.serialize()
+    assert config_queue._fli.serialize() == queue.serialize()
 
 
 def test_environment_loader_FLI_fails(monkeypatch):

From 53eb0457fb0762f62b938065f11b7b830f1fe588 Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Thu, 11 Jul 2024 16:24:16 -0500
Subject: [PATCH 32/40] no more data blob

---
 smartsim/_core/mli/mli_schemas/request/request.capnp | 2 +-
 smartsim/_core/mli/mli_schemas/tensor/tensor.capnp   | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/smartsim/_core/mli/mli_schemas/request/request.capnp b/smartsim/_core/mli/mli_schemas/request/request.capnp
index f9508cb54f..6d290fb599 100644
--- a/smartsim/_core/mli/mli_schemas/request/request.capnp
+++ b/smartsim/_core/mli/mli_schemas/request/request.capnp
@@ -43,7 +43,7 @@ struct Request {
   }
   input :union {
     keys @3 :List(DataRef.TensorKey);
-    data @4 :List(Tensors.Tensor);
+    descriptors @4 :List(Tensors.TensorDescriptor);
   }
   output @5 :List(DataRef.TensorKey);
   outputDescriptors @6 :List(Tensors.OutputDescriptor);
diff --git a/smartsim/_core/mli/mli_schemas/tensor/tensor.capnp b/smartsim/_core/mli/mli_schemas/tensor/tensor.capnp
index aca1ce0836..3d70296209 100644
--- a/smartsim/_core/mli/mli_schemas/tensor/tensor.capnp
+++ b/smartsim/_core/mli/mli_schemas/tensor/tensor.capnp
@@ -61,11 +61,6 @@ enum ReturnNumericalType {
   auto @ 11;
 }
 
-struct Tensor {
-  blob @0 :Data;
-  tensorDescriptor @1 :TensorDescriptor;
-}
-
 struct TensorDescriptor {
   dimensions @0 :List(Int32);
   order @1 :Order;

From e64532de392c226c4543863d8a7dfc5b5f5bac0d Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Thu, 11 Jul 2024 15:59:15 -0700
Subject: [PATCH 33/40] fixing up worker manager

---
 .../infrastructure/control/workermanager.py   | 24 ++++++-----
 .../_core/mli/infrastructure/worker/worker.py |  2 +-
 smartsim/_core/mli/message_handler.py         | 42 +++++++++---------
 .../mli/mli_schemas/request/request_capnp.pyi | 18 +++++---
 .../mli/mli_schemas/response/response.capnp   |  2 +-
 .../mli_schemas/response/response_capnp.pyi   | 18 +++++---
 .../_core/mli/mli_schemas/tensor/tensor.capnp |  2 +-
 .../mli/mli_schemas/tensor/tensor_capnp.py    |  3 --
 .../mli/mli_schemas/tensor/tensor_capnp.pyi   | 43 -------------------
 tests/mli/test_torch_worker.py                |  9 ++--
 10 files changed, 66 insertions(+), 97 deletions(-)

diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 8c06351fb5..1c571dc2f2 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -103,9 +103,9 @@ def deserialize_message(
 
     if request.input.which() == "keys":
         input_keys = [input_key.key for input_key in request.input.keys]
-    elif request.input.which() == "data":
-        input_bytes = [data.blob for data in request.input.data]
-        input_meta = [data.tensorDescriptor for data in request.input.data]
+    elif request.input.which() == "descriptors":
+        # input_bytes = [data.blob for data in request.input.data]
+        input_meta = [request.input.descriptors]
 
     inference_request = InferenceRequest(
         model_key=model_key,
@@ -137,20 +137,16 @@ def prepare_outputs(reply: InferenceReply) -> t.List[t.Any]:
             msg_key = MessageHandler.build_tensor_key(key)
             prepared_outputs.append(msg_key)
     elif reply.outputs:
-        arrays: t.List[np.ndarray[t.Any, np.dtype[t.Any]]] = [
-            output.numpy() for output in reply.outputs
-        ]
-        for tensor in arrays:
+        for _ in reply.outputs:
             # todo: need to have the output attributes specified in the req?
             # maybe, add `MessageHandler.dtype_of(tensor)`?
             # can `build_tensor` do dtype and shape?
-            msg_tensor = MessageHandler.build_tensor(
-                tensor,
+            msg_tensor_desc = MessageHandler.build_tensor_descriptor(
                 "c",
                 "float32",
                 [1],
             )
-            prepared_outputs.append(msg_tensor)
+            prepared_outputs.append(msg_tensor_desc)
     return prepared_outputs
 
 
@@ -252,6 +248,11 @@ def _on_iteration(self) -> None:
         request = deserialize_message(
             request_bytes, self._comm_channel_type, self._device
         )
+
+        if request.input_meta:
+            for _ in request.input_meta:
+                request.raw_inputs.append(self._task_queue.recv())
+
         if not self._validate_request(request):
             return
 
@@ -353,6 +354,9 @@ def _on_iteration(self) -> None:
         interm = time.perf_counter()  # timing
         if request.callback:
             request.callback.send(serialized_resp)
+            if reply.outputs:
+                for output in reply.outputs:
+                    request.callback.send(output)
 
         timings.append(time.perf_counter() - interm)  # timing
         interm = time.perf_counter()  # timing
diff --git a/smartsim/_core/mli/infrastructure/worker/worker.py b/smartsim/_core/mli/infrastructure/worker/worker.py
index 900a8241de..f76e05bcc0 100644
--- a/smartsim/_core/mli/infrastructure/worker/worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/worker.py
@@ -56,7 +56,7 @@ def __init__(
         self.model_key = model_key
         self.raw_model = raw_model
         self.callback = callback
-        self.raw_inputs = raw_inputs
+        self.raw_inputs = raw_inputs or []
         self.input_keys = input_keys or []
         self.input_meta = input_meta or []
         self.output_keys = output_keys or []
diff --git a/smartsim/_core/mli/message_handler.py b/smartsim/_core/mli/message_handler.py
index bcf1cfdf14..d5e2549bae 100644
--- a/smartsim/_core/mli/message_handler.py
+++ b/smartsim/_core/mli/message_handler.py
@@ -25,8 +25,6 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import typing as t
 
-import numpy as np
-
 from .mli_schemas.data import data_references_capnp
 from .mli_schemas.model import model_capnp
 from .mli_schemas.request import request_capnp
@@ -38,17 +36,15 @@
 
 class MessageHandler:
     @staticmethod
-    def build_tensor(
-        tensor: np.ndarray[t.Any, np.dtype[t.Any]],
+    def build_tensor_descriptor(
         order: "tensor_capnp.Order",
         data_type: "tensor_capnp.NumericalType",
         dimensions: t.List[int],
-    ) -> tensor_capnp.Tensor:
+    ) -> tensor_capnp.TensorDescriptor:
         """
-        Builds a Tensor message using the provided data,
+        Builds a TensorDescriptor message using the provided
         order, data type, and dimensions.
 
-        :param tensor: Tensor to build the message around
         :param order: Order of the tensor, such as row-major (c) or column-major (f)
         :param data_type: Data type of the tensor
         :param dimensions: Dimensions of the tensor
@@ -59,15 +55,12 @@ def build_tensor(
             description.order = order
             description.dataType = data_type
             description.dimensions = dimensions
-            built_tensor = tensor_capnp.Tensor.new_message()
-            built_tensor.blob = tensor.tobytes()  # tensor channel instead?
-            built_tensor.tensorDescriptor = description
         except Exception as e:
             raise ValueError(
-                "Error building tensor."
+                "Error building tensor descriptor."
             ) from e  # TODO: create custom exception
 
-        return built_tensor
+        return description
 
     @staticmethod
     def build_output_tensor_descriptor(
@@ -248,7 +241,8 @@ def _assign_reply_channel(
     def _assign_inputs(
         request: request_capnp.Request,
         inputs: t.Union[
-            t.List[data_references_capnp.TensorKey], t.List[tensor_capnp.Tensor]
+            t.List[data_references_capnp.TensorKey],
+            t.List[tensor_capnp.TensorDescriptor],
         ],
     ) -> None:
         """
@@ -262,13 +256,14 @@ def _assign_inputs(
             if inputs:
                 display_name = inputs[0].schema.node.displayName  # type: ignore
                 input_class_name = display_name.split(":")[-1]
-                if input_class_name == "Tensor":
-                    request.input.data = inputs  # type: ignore
+                if input_class_name == "TensorDescriptor":
+                    request.input.descriptors = inputs  # type: ignore
                 elif input_class_name == "TensorKey":
                     request.input.keys = inputs  # type: ignore
                 else:
                     raise ValueError(
-                        "Invalid input class name. Expected 'Tensor' or 'TensorKey'."
+                        """Invalid input class name. Expected
+                        'TensorDescriptor' or 'TensorKey'."""
                     )
         except Exception as e:
             raise ValueError("Error building inputs portion of request.") from e
@@ -351,7 +346,8 @@ def build_request(
         reply_channel: bytes,
         model: t.Union[data_references_capnp.ModelKey, model_capnp.Model],
         inputs: t.Union[
-            t.List[data_references_capnp.TensorKey], t.List[tensor_capnp.Tensor]
+            t.List[data_references_capnp.TensorKey],
+            t.List[tensor_capnp.TensorDescriptor],
         ],
         outputs: t.List[data_references_capnp.TensorKey],
         output_descriptors: t.List[tensor_capnp.OutputDescriptor],
@@ -437,7 +433,8 @@ def _assign_message(response: response_capnp.Response, message: str) -> None:
     def _assign_result(
         response: response_capnp.Response,
         result: t.Union[
-            t.List[tensor_capnp.Tensor], t.List[data_references_capnp.TensorKey]
+            t.List[tensor_capnp.TensorDescriptor],
+            t.List[data_references_capnp.TensorKey],
         ],
     ) -> None:
         """
@@ -452,13 +449,13 @@ def _assign_result(
                 first_result = result[0]
                 display_name = first_result.schema.node.displayName  # type: ignore
                 result_class_name = display_name.split(":")[-1]
-                if result_class_name == "Tensor":
-                    response.result.data = result  # type: ignore
+                if result_class_name == "TensorDescriptor":
+                    response.result.descriptors = result  # type: ignore
                 elif result_class_name == "TensorKey":
                     response.result.keys = result  # type: ignore
                 else:
                     raise ValueError("""Invalid custom attribute class name.
-                        Expected 'Tensor' or 'TensorKey'.""")
+                        Expected 'TensorDescriptor' or 'TensorKey'.""")
         except Exception as e:
             raise ValueError("Error assigning result to response.") from e
 
@@ -501,7 +498,8 @@ def build_response(
         status: "response_capnp.StatusEnum",
         message: str,
         result: t.Union[
-            t.List[tensor_capnp.Tensor], t.List[data_references_capnp.TensorKey]
+            t.List[tensor_capnp.TensorDescriptor],
+            t.List[data_references_capnp.TensorKey],
         ],
         custom_attributes: t.Union[
             response_attributes_capnp.TorchResponseAttributes,
diff --git a/smartsim/_core/mli/mli_schemas/request/request_capnp.pyi b/smartsim/_core/mli/mli_schemas/request/request_capnp.pyi
index 39093f61ad..54dcdcfecc 100644
--- a/smartsim/_core/mli/mli_schemas/request/request_capnp.pyi
+++ b/smartsim/_core/mli/mli_schemas/request/request_capnp.pyi
@@ -47,9 +47,9 @@ from ..tensor.tensor_capnp import (
     OutputDescriptor,
     OutputDescriptorBuilder,
     OutputDescriptorReader,
-    Tensor,
-    TensorBuilder,
-    TensorReader,
+    TensorDescriptor,
+    TensorDescriptorBuilder,
+    TensorDescriptorReader,
 )
 from .request_attributes.request_attributes_capnp import (
     TensorFlowRequestAttributes,
@@ -143,8 +143,10 @@ class Request:
 
     class Input:
         keys: Sequence[TensorKey | TensorKeyBuilder | TensorKeyReader]
-        data: Sequence[Tensor | TensorBuilder | TensorReader]
-        def which(self) -> Literal["keys", "data"]: ...
+        descriptors: Sequence[
+            TensorDescriptor | TensorDescriptorBuilder | TensorDescriptorReader
+        ]
+        def which(self) -> Literal["keys", "descriptors"]: ...
         @staticmethod
         @contextmanager
         def from_bytes(
@@ -164,12 +166,14 @@ class Request:
 
     class InputReader(Request.Input):
         keys: Sequence[TensorKeyReader]
-        data: Sequence[TensorReader]
+        descriptors: Sequence[TensorDescriptorReader]
         def as_builder(self) -> Request.InputBuilder: ...
 
     class InputBuilder(Request.Input):
         keys: Sequence[TensorKey | TensorKeyBuilder | TensorKeyReader]
-        data: Sequence[Tensor | TensorBuilder | TensorReader]
+        descriptors: Sequence[
+            TensorDescriptor | TensorDescriptorBuilder | TensorDescriptorReader
+        ]
         @staticmethod
         def from_dict(dictionary: dict) -> Request.InputBuilder: ...
         def copy(self) -> Request.InputBuilder: ...
diff --git a/smartsim/_core/mli/mli_schemas/response/response.capnp b/smartsim/_core/mli/mli_schemas/response/response.capnp
index 67375b5a97..01b1f67e86 100644
--- a/smartsim/_core/mli/mli_schemas/response/response.capnp
+++ b/smartsim/_core/mli/mli_schemas/response/response.capnp
@@ -41,7 +41,7 @@ struct Response {
   message @1 :Text;
   result :union {
     keys @2 :List(DataRef.TensorKey);
-    data @3 :List(Tensors.Tensor);
+    descriptors @3 :List(Tensors.TensorDescriptor);
   }
   customAttributes :union {
     torch @4 :ResponseAttributes.TorchResponseAttributes;
diff --git a/smartsim/_core/mli/mli_schemas/response/response_capnp.pyi b/smartsim/_core/mli/mli_schemas/response/response_capnp.pyi
index f6d7f8444e..6253422af2 100644
--- a/smartsim/_core/mli/mli_schemas/response/response_capnp.pyi
+++ b/smartsim/_core/mli/mli_schemas/response/response_capnp.pyi
@@ -35,7 +35,11 @@ from io import BufferedWriter
 from typing import Iterator, Literal, Sequence, overload
 
 from ..data.data_references_capnp import TensorKey, TensorKeyBuilder, TensorKeyReader
-from ..tensor.tensor_capnp import Tensor, TensorBuilder, TensorReader
+from ..tensor.tensor_capnp import (
+    TensorDescriptor,
+    TensorDescriptorBuilder,
+    TensorDescriptorReader,
+)
 from .response_attributes.response_attributes_capnp import (
     TensorFlowResponseAttributes,
     TensorFlowResponseAttributesBuilder,
@@ -50,8 +54,10 @@ StatusEnum = Literal["complete", "fail", "timeout"]
 class Response:
     class Result:
         keys: Sequence[TensorKey | TensorKeyBuilder | TensorKeyReader]
-        data: Sequence[Tensor | TensorBuilder | TensorReader]
-        def which(self) -> Literal["keys", "data"]: ...
+        descriptors: Sequence[
+            TensorDescriptor | TensorDescriptorBuilder | TensorDescriptorReader
+        ]
+        def which(self) -> Literal["keys", "descriptors"]: ...
         @staticmethod
         @contextmanager
         def from_bytes(
@@ -71,12 +77,14 @@ class Response:
 
     class ResultReader(Response.Result):
         keys: Sequence[TensorKeyReader]
-        data: Sequence[TensorReader]
+        descriptors: Sequence[TensorDescriptorReader]
         def as_builder(self) -> Response.ResultBuilder: ...
 
     class ResultBuilder(Response.Result):
         keys: Sequence[TensorKey | TensorKeyBuilder | TensorKeyReader]
-        data: Sequence[Tensor | TensorBuilder | TensorReader]
+        descriptors: Sequence[
+            TensorDescriptor | TensorDescriptorBuilder | TensorDescriptorReader
+        ]
         @staticmethod
         def from_dict(dictionary: dict) -> Response.ResultBuilder: ...
         def copy(self) -> Response.ResultBuilder: ...
diff --git a/smartsim/_core/mli/mli_schemas/tensor/tensor.capnp b/smartsim/_core/mli/mli_schemas/tensor/tensor.capnp
index 3d70296209..4b2218b166 100644
--- a/smartsim/_core/mli/mli_schemas/tensor/tensor.capnp
+++ b/smartsim/_core/mli/mli_schemas/tensor/tensor.capnp
@@ -58,7 +58,7 @@ enum ReturnNumericalType {
   float32 @8; 
   float64 @9;
   none @10;
-  auto @ 11;
+  auto @11;
 }
 
 struct TensorDescriptor {
diff --git a/smartsim/_core/mli/mli_schemas/tensor/tensor_capnp.py b/smartsim/_core/mli/mli_schemas/tensor/tensor_capnp.py
index aa7f1e7b18..8c9d6c9029 100644
--- a/smartsim/_core/mli/mli_schemas/tensor/tensor_capnp.py
+++ b/smartsim/_core/mli/mli_schemas/tensor/tensor_capnp.py
@@ -33,9 +33,6 @@
 capnp.remove_import_hook()
 here = os.path.dirname(os.path.abspath(__file__))
 module_file = os.path.abspath(os.path.join(here, "tensor.capnp"))
-Tensor = capnp.load(module_file).Tensor
-TensorBuilder = Tensor
-TensorReader = Tensor
 TensorDescriptor = capnp.load(module_file).TensorDescriptor
 TensorDescriptorBuilder = TensorDescriptor
 TensorDescriptorReader = TensorDescriptor
diff --git a/smartsim/_core/mli/mli_schemas/tensor/tensor_capnp.pyi b/smartsim/_core/mli/mli_schemas/tensor/tensor_capnp.pyi
index 7e7222ef54..b55f26b452 100644
--- a/smartsim/_core/mli/mli_schemas/tensor/tensor_capnp.pyi
+++ b/smartsim/_core/mli/mli_schemas/tensor/tensor_capnp.pyi
@@ -101,49 +101,6 @@ class TensorDescriptorBuilder(TensorDescriptor):
     @staticmethod
     def write_packed(file: BufferedWriter) -> None: ...
 
-class Tensor:
-    blob: bytes
-    tensorDescriptor: (
-        TensorDescriptor | TensorDescriptorBuilder | TensorDescriptorReader
-    )
-    def init(self, name: Literal["tensorDescriptor"]) -> TensorDescriptor: ...
-    @staticmethod
-    @contextmanager
-    def from_bytes(
-        data: bytes,
-        traversal_limit_in_words: int | None = ...,
-        nesting_limit: int | None = ...,
-    ) -> Iterator[TensorReader]: ...
-    @staticmethod
-    def from_bytes_packed(
-        data: bytes,
-        traversal_limit_in_words: int | None = ...,
-        nesting_limit: int | None = ...,
-    ) -> TensorReader: ...
-    @staticmethod
-    def new_message() -> TensorBuilder: ...
-    def to_dict(self) -> dict: ...
-
-class TensorReader(Tensor):
-    tensorDescriptor: TensorDescriptorReader
-    def as_builder(self) -> TensorBuilder: ...
-
-class TensorBuilder(Tensor):
-    tensorDescriptor: (
-        TensorDescriptor | TensorDescriptorBuilder | TensorDescriptorReader
-    )
-    @staticmethod
-    def from_dict(dictionary: dict) -> TensorBuilder: ...
-    def copy(self) -> TensorBuilder: ...
-    def to_bytes(self) -> bytes: ...
-    def to_bytes_packed(self) -> bytes: ...
-    def to_segments(self) -> list[bytes]: ...
-    def as_reader(self) -> TensorReader: ...
-    @staticmethod
-    def write(file: BufferedWriter) -> None: ...
-    @staticmethod
-    def write_packed(file: BufferedWriter) -> None: ...
-
 class OutputDescriptor:
     order: Order
     optionalKeys: Sequence[TensorKey | TensorKeyBuilder | TensorKeyReader]
diff --git a/tests/mli/test_torch_worker.py b/tests/mli/test_torch_worker.py
index 0b1cd4ccf3..f159c15a0e 100644
--- a/tests/mli/test_torch_worker.py
+++ b/tests/mli/test_torch_worker.py
@@ -95,17 +95,18 @@ def create_torch_model():
 def get_request() -> InferenceRequest:
 
     tensors = [get_batch() for _ in range(2)]
-    serialized_tensors = [
-        MessageHandler.build_tensor(tensor.numpy(), "c", "float32", list(tensor.shape))
+    tensor_numpy = [tensor.numpy() for tensor in tensors]
+    serialized_tensors_descriptors = [
+        MessageHandler.build_tensor_descriptor("c", "float32", list(tensor.shape))
         for tensor in tensors
     ]
 
     return InferenceRequest(
         model_key="model",
         callback=None,
-        raw_inputs=[s_tensor.blob for s_tensor in serialized_tensors],
+        raw_inputs=tensor_numpy,
         input_keys=None,
-        input_meta=[s_tensor.tensorDescriptor for s_tensor in serialized_tensors],
+        input_meta=serialized_tensors_descriptors,
         output_keys=None,
         raw_model=create_torch_model(),
         batch_size=0,

From 52f5e74ea0bf80f4375e23b527034e6a5a453452 Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Thu, 11 Jul 2024 19:06:21 -0500
Subject: [PATCH 34/40] fixed tests, maybe fixed mock app?

---
 ex/high_throughput_inference/mock_app.py      |  25 +-
 smartsim/_core/mli/message_handler.py         |   6 +-
 .../test_message_handler/test_build_tensor.py | 185 -------
 .../test_build_tensor_desc.py                 |  90 ++++
 tests/test_message_handler/test_request.py    | 491 ++----------------
 tests/test_message_handler/test_response.py   | 248 ++-------
 6 files changed, 209 insertions(+), 836 deletions(-)
 delete mode 100644 tests/test_message_handler/test_build_tensor.py
 create mode 100644 tests/test_message_handler/test_build_tensor_desc.py

diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index 45246db2e5..9cd59d2206 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -108,10 +108,13 @@ def print_timings(self, to_file: bool = False):
 
 
     def run_model(self, model: bytes | str, batch: torch.Tensor):
+        tensors = [batch.numpy()]
         self.start_timings(batch.shape[0])
-        built_tensor = MessageHandler.build_tensor(
-            batch.numpy(), "c", "float32", list(batch.shape))
-        self.measure_time("build_tensor")
+        # built_tensor = MessageHandler.build_tensor(
+        #     batch.numpy(), "c", "float32", list(batch.shape))
+        built_tensor_desc = MessageHandler.build_tensor_descriptor(
+            "c", "float32", list(batch.shape))
+        self.measure_time("build_tensor_descriptor")
         built_model = None
         if isinstance(model, str):
             model_arg = MessageHandler.build_model_key(model)
@@ -120,7 +123,7 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
         request = MessageHandler.build_request(
             reply_channel=self._from_worker_ch_serialized,
             model= model_arg,
-            inputs=[built_tensor],
+            inputs=[built_tensor_desc],
             outputs=[],
             output_descriptors=[],
             custom_attributes=None,
@@ -130,6 +133,8 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
         self.measure_time("serialize_request")
         with self._to_worker_fli.sendh(timeout=None, stream_channel=self._to_worker_ch) as to_sendh:
             to_sendh.send_bytes(request_bytes)
+            for t in tensors:
+                to_sendh.send_bytes(t.tobytes()) # NOT FAST ENOUGH!!!
         logger.info(f"Message size: {len(request_bytes)} bytes")
 
         self.measure_time("send")
@@ -138,12 +143,20 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
             self.measure_time("receive")
             response = MessageHandler.deserialize_response(resp)
             self.measure_time("deserialize_response")
+            # list of data blobs? recv depending on the len(esponse.result.descriptors)?
+            data_blob = from_recvh.recv_bytes(timeout=None)
             result = torch.from_numpy(
                 numpy.frombuffer(
-                    response.result.data[0].blob,
-                    dtype=str(response.result.data[0].tensorDescriptor.dataType),
+                    data_blob,
+                    dtype=str(response.result.descriptors[0].dataType),
                 )
             )
+            # result = torch.from_numpy(
+            #     numpy.frombuffer(
+            #         response.result.data[0].blob,
+            #         dtype=str(response.result.data[0].tensorDescriptor.dataType),
+            #     )
+            # )
             self.measure_time("deserialize_tensor")
 
         self.end_timings()
diff --git a/smartsim/_core/mli/message_handler.py b/smartsim/_core/mli/message_handler.py
index d5e2549bae..5599af5d2e 100644
--- a/smartsim/_core/mli/message_handler.py
+++ b/smartsim/_core/mli/message_handler.py
@@ -261,10 +261,8 @@ def _assign_inputs(
                 elif input_class_name == "TensorKey":
                     request.input.keys = inputs  # type: ignore
                 else:
-                    raise ValueError(
-                        """Invalid input class name. Expected
-                        'TensorDescriptor' or 'TensorKey'."""
-                    )
+                    raise ValueError("""Invalid input class name. Expected
+                        'TensorDescriptor' or 'TensorKey'.""")
         except Exception as e:
             raise ValueError("Error building inputs portion of request.") from e
 
diff --git a/tests/test_message_handler/test_build_tensor.py b/tests/test_message_handler/test_build_tensor.py
deleted file mode 100644
index aa7bd4e6e2..0000000000
--- a/tests/test_message_handler/test_build_tensor.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# BSD 2-Clause License
-#
-# Copyright (c) 2021-2024, Hewlett Packard Enterprise
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import pytest
-
-try:
-    import tensorflow as tf
-except ImportError:
-    should_run_tf = False
-else:
-    should_run_tf = True
-
-    small_tf_tensor = tf.zeros((3, 2, 5), dtype=tf.int8)
-    small_tf_tensor = small_tf_tensor.numpy()
-    medium_tf_tensor = tf.ones((1040, 1040, 3), dtype=tf.int64)
-    medium_tf_tensor = medium_tf_tensor.numpy()
-
-
-try:
-    import torch
-except ImportError:
-    should_run_torch = False
-else:
-    should_run_torch = True
-
-    small_torch_tensor = torch.zeros((3, 2, 5), dtype=torch.int8)
-    small_torch_tensor = small_torch_tensor.numpy()
-    medium_torch_tensor = torch.ones((1040, 1040, 3), dtype=torch.int64)
-    medium_torch_tensor = medium_torch_tensor.numpy()
-
-from smartsim._core.mli.message_handler import MessageHandler
-
-# The tests in this file belong to the group_a group
-pytestmark = pytest.mark.group_a
-
-handler = MessageHandler()
-
-
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
-@pytest.mark.parametrize(
-    "tensor, dtype, order, dimension",
-    [
-        pytest.param(
-            small_torch_tensor,
-            "int8",
-            "c",
-            [3, 2, 5],
-            id="small torch tensor",
-        ),
-        pytest.param(
-            medium_torch_tensor,
-            "int64",
-            "c",
-            [1040, 1040, 3],
-            id="medium torch tensor",
-        ),
-    ],
-)
-def test_build_torch_tensor_successful(tensor, dtype, order, dimension):
-    built_tensor = handler.build_tensor(tensor, order, dtype, dimension)
-    assert built_tensor is not None
-    assert type(built_tensor.blob) == bytes
-    assert built_tensor.tensorDescriptor.order == order
-    assert built_tensor.tensorDescriptor.dataType == dtype
-    for i, j in zip(built_tensor.tensorDescriptor.dimensions, dimension):
-        assert i == j
-
-
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
-@pytest.mark.parametrize(
-    "tensor, dtype, order, dimension",
-    [
-        pytest.param(
-            small_tf_tensor,
-            "int8",
-            "c",
-            [3, 2, 5],
-            id="small tf tensor",
-        ),
-        pytest.param(
-            medium_tf_tensor,
-            "int64",
-            "c",
-            [1040, 1040, 3],
-            id="medium tf tensor",
-        ),
-    ],
-)
-def test_build_tf_tensor_successful(tensor, dtype, order, dimension):
-    built_tensor = handler.build_tensor(tensor, order, dtype, dimension)
-    assert built_tensor is not None
-    assert type(built_tensor.blob) == bytes
-    assert built_tensor.tensorDescriptor.order == order
-    assert built_tensor.tensorDescriptor.dataType == dtype
-    for i, j in zip(built_tensor.tensorDescriptor.dimensions, dimension):
-        assert i == j
-
-
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
-@pytest.mark.parametrize(
-    "tensor, dtype, order, dimension",
-    [
-        pytest.param([1, 2, 4], "c", "int8", [1, 2, 3], id="bad tensor type"),
-        pytest.param(
-            small_torch_tensor,
-            "bad_order",
-            "int8",
-            [3, 2, 5],
-            id="bad order type",
-        ),
-        pytest.param(
-            small_torch_tensor,
-            "f",
-            "bad_num_type",
-            [3, 2, 5],
-            id="bad numerical type",
-        ),
-        pytest.param(
-            small_torch_tensor,
-            "f",
-            "int8",
-            "bad shape type",
-            id="bad shape type",
-        ),
-    ],
-)
-def test_build_torch_tensor_bad_input(tensor, dtype, order, dimension):
-    with pytest.raises(ValueError):
-        built_tensor = handler.build_tensor(tensor, order, dtype, dimension)
-
-
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
-@pytest.mark.parametrize(
-    "tensor, dtype, order, dimension",
-    [
-        pytest.param([1, 2, 4], "c", "int8", [1, 2, 3], id="bad tensor type"),
-        pytest.param(
-            small_tf_tensor,
-            "bad_order",
-            "int8",
-            [3, 2, 5],
-            id="bad order type",
-        ),
-        pytest.param(
-            small_tf_tensor,
-            "f",
-            "bad_num_type",
-            [3, 2, 5],
-            id="bad numerical type",
-        ),
-        pytest.param(
-            small_tf_tensor,
-            "f",
-            "int8",
-            "bad shape type",
-            id="bad shape type",
-        ),
-    ],
-)
-def test_build_tf_tensor_bad_input(tensor, dtype, order, dimension):
-    with pytest.raises(ValueError):
-        built_tensor = handler.build_tensor(tensor, order, dtype, dimension)
diff --git a/tests/test_message_handler/test_build_tensor_desc.py b/tests/test_message_handler/test_build_tensor_desc.py
new file mode 100644
index 0000000000..45126fb16c
--- /dev/null
+++ b/tests/test_message_handler/test_build_tensor_desc.py
@@ -0,0 +1,90 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+from smartsim._core.mli.message_handler import MessageHandler
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+handler = MessageHandler()
+
+
+@pytest.mark.parametrize(
+    "dtype, order, dimension",
+    [
+        pytest.param(
+            "int8",
+            "c",
+            [3, 2, 5],
+            id="small torch tensor",
+        ),
+        pytest.param(
+            "int64",
+            "c",
+            [1040, 1040, 3],
+            id="medium torch tensor",
+        ),
+    ],
+)
+def test_build_tensor_descriptor_successful(dtype, order, dimension):
+    built_tensor_descriptor = handler.build_tensor_descriptor(order, dtype, dimension)
+    assert built_tensor_descriptor is not None
+    assert built_tensor_descriptor.order == order
+    assert built_tensor_descriptor.dataType == dtype
+    for i, j in zip(built_tensor_descriptor.dimensions, dimension):
+        assert i == j
+
+
+@pytest.mark.parametrize(
+    "dtype, order, dimension",
+    [
+        pytest.param(
+            "bad_order",
+            "int8",
+            [3, 2, 5],
+            id="bad order type",
+        ),
+        pytest.param(
+            "f",
+            "bad_num_type",
+            [3, 2, 5],
+            id="bad numerical type",
+        ),
+        pytest.param(
+            "f",
+            "int8",
+            "bad shape type",
+            id="bad shape type",
+        ),
+    ],
+)
+def test_build_tensor_descriptor_unsuccessful(dtype, order, dimension):
+    with pytest.raises(ValueError):
+        built_tensor_descriptor = handler.build_tensor_descriptor(
+            order, dtype, dimension
+        )
diff --git a/tests/test_message_handler/test_request.py b/tests/test_message_handler/test_request.py
index b1fedaa024..5a8a091d90 100644
--- a/tests/test_message_handler/test_request.py
+++ b/tests/test_message_handler/test_request.py
@@ -28,46 +28,6 @@
 
 from smartsim._core.mli.message_handler import MessageHandler
 
-try:
-    import tensorflow as tf
-except ImportError:
-    should_run_tf = False
-else:
-    should_run_tf = True
-    tflow1 = tf.zeros((3, 2, 5), dtype=tf.int8)
-    tflow2 = tf.ones((10, 10, 3), dtype=tf.int64)
-
-    tensor_3 = MessageHandler.build_tensor(
-        tflow1.numpy(), "c", "int8", list(tflow1.shape)
-    )
-    tensor_4 = MessageHandler.build_tensor(
-        tflow2.numpy(), "c", "int64", list(tflow2.shape)
-    )
-
-    tf_attributes = MessageHandler.build_tf_request_attributes(
-        name="tf", tensor_type="sparse"
-    )
-
-
-try:
-    import torch
-except ImportError:
-    should_run_torch = False
-else:
-    should_run_torch = True
-
-    torch1 = torch.zeros((3, 2, 5), dtype=torch.int8)
-    torch2 = torch.ones((10, 10, 3), dtype=torch.int64)
-
-    tensor_1 = MessageHandler.build_tensor(
-        torch1.numpy(), "c", "int8", list(torch1.shape)
-    )
-    tensor_2 = MessageHandler.build_tensor(
-        torch2.numpy(), "c", "int64", list(torch2.shape)
-    )
-
-    torch_attributes = MessageHandler.build_torch_request_attributes("sparse")
-
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
 
@@ -87,123 +47,54 @@
 output_descriptor3 = MessageHandler.build_output_tensor_descriptor(
     "c", [output_key1], "none", [1, 2, 3]
 )
+torch_attributes = MessageHandler.build_torch_request_attributes("sparse")
+tf_attributes = MessageHandler.build_tf_request_attributes(
+    name="tf", tensor_type="sparse"
+)
 
+tensor_1 = MessageHandler.build_tensor_descriptor("c", "int8", [1])
+tensor_2 = MessageHandler.build_tensor_descriptor("c", "int64", [3, 2])
+tensor_3 = MessageHandler.build_tensor_descriptor("f", "int8", [1])
+tensor_4 = MessageHandler.build_tensor_descriptor("f", "int64", [3, 2])
 
-if should_run_tf:
-    tf_indirect_request = MessageHandler.build_request(
-        b"reply",
-        model,
-        [input_key1, input_key2],
-        [output_key1, output_key2],
-        [output_descriptor1, output_descriptor2, output_descriptor3],
-        tf_attributes,
-    )
 
-    tf_direct_request = MessageHandler.build_request(
-        b"reply",
-        model,
-        [tensor_3, tensor_4],
-        [],
-        [output_descriptor1, output_descriptor2],
-        tf_attributes,
-    )
+tf_indirect_request = MessageHandler.build_request(
+    b"reply",
+    model,
+    [input_key1, input_key2],
+    [output_key1, output_key2],
+    [output_descriptor1, output_descriptor2, output_descriptor3],
+    tf_attributes,
+)
 
-if should_run_torch:
-    torch_indirect_request = MessageHandler.build_request(
-        b"reply",
-        model,
-        [input_key1, input_key2],
-        [output_key1, output_key2],
-        [output_descriptor1, output_descriptor2, output_descriptor3],
-        torch_attributes,
-    )
-    torch_direct_request = MessageHandler.build_request(
-        b"reply",
-        model,
-        [tensor_1, tensor_2],
-        [],
-        [output_descriptor1, output_descriptor2],
-        torch_attributes,
-    )
+tf_direct_request = MessageHandler.build_request(
+    b"reply",
+    model,
+    [tensor_3, tensor_4],
+    [],
+    [output_descriptor1, output_descriptor2],
+    tf_attributes,
+)
 
+torch_indirect_request = MessageHandler.build_request(
+    b"reply",
+    model,
+    [input_key1, input_key2],
+    [output_key1, output_key2],
+    [output_descriptor1, output_descriptor2, output_descriptor3],
+    torch_attributes,
+)
 
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
-@pytest.mark.parametrize(
-    "reply_channel, model, input, output, output_descriptors, custom_attributes",
-    [
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [input_key1, input_key2],
-            [output_key1, output_key2],
-            [output_descriptor1],
-            tf_attributes,
-        ),
-        pytest.param(
-            b"another reply channel",
-            model,
-            [input_key1],
-            [output_key2],
-            [output_descriptor1],
-            tf_attributes,
-        ),
-        pytest.param(
-            b"another reply channel",
-            model,
-            [input_key1],
-            [output_key2],
-            [output_descriptor1],
-            tf_attributes,
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [input_key1],
-            [output_key1],
-            [output_descriptor1],
-            None,
-        ),
-    ],
+torch_direct_request = MessageHandler.build_request(
+    b"reply",
+    model,
+    [tensor_1, tensor_2],
+    [],
+    [output_descriptor1, output_descriptor2],
+    torch_attributes,
 )
-def test_build_request_indirect_tf_successful(
-    reply_channel, model, input, output, output_descriptors, custom_attributes
-):
-    built_request = MessageHandler.build_request(
-        reply_channel,
-        model,
-        input,
-        output,
-        output_descriptors,
-        custom_attributes,
-    )
-    assert built_request is not None
-    assert built_request.replyChannel.reply == reply_channel
-    if built_request.model.which() == "key":
-        assert built_request.model.key.key == model.key
-    else:
-        assert built_request.model.data.data == model.data
-        assert built_request.model.data.name == model.name
-        assert built_request.model.data.version == model.version
-    assert built_request.input.which() == "keys"
-    assert built_request.input.keys[0].key == input[0].key
-    assert len(built_request.input.keys) == len(input)
-    assert len(built_request.output) == len(output)
-    for i, j in zip(built_request.outputDescriptors, output_descriptors):
-        assert i.order == j.order
-    if built_request.customAttributes.which() == "tf":
-        assert (
-            built_request.customAttributes.tf.tensorType == custom_attributes.tensorType
-        )
-    elif built_request.customAttributes.which() == "torch":
-        assert (
-            built_request.customAttributes.torch.tensorType
-            == custom_attributes.tensorType
-        )
-    else:
-        assert built_request.customAttributes.none == custom_attributes
 
 
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
 @pytest.mark.parametrize(
     "reply_channel, model, input, output, output_descriptors, custom_attributes",
     [
@@ -221,7 +112,7 @@ def test_build_request_indirect_tf_successful(
             [input_key1],
             [output_key2],
             [output_descriptor1],
-            torch_attributes,
+            tf_attributes,
         ),
         pytest.param(
             b"another reply channel",
@@ -241,7 +132,7 @@ def test_build_request_indirect_tf_successful(
         ),
     ],
 )
-def test_build_request_indirect_torch_successful(
+def test_build_request_indirect_successful(
     reply_channel, model, input, output, output_descriptors, custom_attributes
 ):
     built_request = MessageHandler.build_request(
@@ -279,108 +170,6 @@ def test_build_request_indirect_torch_successful(
         assert built_request.customAttributes.none == custom_attributes
 
 
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
-@pytest.mark.parametrize(
-    "reply_channel, model, input, output, output_descriptors, custom_attributes",
-    [
-        pytest.param(
-            [],
-            model_key,
-            [input_key1, input_key2],
-            [output_key1, output_key2],
-            [output_descriptor1],
-            torch_attributes,
-            id="bad channel",
-        ),
-        pytest.param(
-            b"reply channel",
-            "bad model",
-            [input_key1],
-            [output_key2],
-            [output_descriptor1],
-            torch_attributes,
-            id="bad model",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            ["input_key1", "input_key2"],
-            [output_key1, output_key2],
-            [output_descriptor1],
-            torch_attributes,
-            id="bad inputs",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [model_key],
-            [output_key1, output_key2],
-            [output_descriptor1],
-            torch_attributes,
-            id="bad input schema type",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [input_key1],
-            ["output_key1", "output_key2"],
-            [output_descriptor1],
-            torch_attributes,
-            id="bad outputs",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [input_key1],
-            [model_key],
-            [output_descriptor1],
-            torch_attributes,
-            id="bad output schema type",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [input_key1],
-            [output_key1, output_key2],
-            [output_descriptor1],
-            "bad attributes",
-            id="bad custom attributes",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [input_key1],
-            [output_key1, output_key2],
-            [output_descriptor1],
-            model_key,
-            id="bad custom attributes schema type",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [input_key1],
-            [output_key1, output_key2],
-            "bad descriptors",
-            torch_attributes,
-            id="bad output descriptors",
-        ),
-    ],
-)
-def test_build_request_indirect_torch_unsuccessful(
-    reply_channel, model, input, output, output_descriptors, custom_attributes
-):
-    with pytest.raises(ValueError):
-        built_request = MessageHandler.build_request(
-            reply_channel,
-            model,
-            input,
-            output,
-            output_descriptors,
-            custom_attributes,
-        )
-
-
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
 @pytest.mark.parametrize(
     "reply_channel, model, input, output, output_descriptors, custom_attributes",
     [
@@ -399,7 +188,7 @@ def test_build_request_indirect_torch_unsuccessful(
             [input_key1],
             [output_key2],
             [output_descriptor1],
-            tf_attributes,
+            torch_attributes,
             id="bad model",
         ),
         pytest.param(
@@ -417,7 +206,7 @@ def test_build_request_indirect_torch_unsuccessful(
             [model_key],
             [output_key1, output_key2],
             [output_descriptor1],
-            tf_attributes,
+            torch_attributes,
             id="bad input schema type",
         ),
         pytest.param(
@@ -462,12 +251,12 @@ def test_build_request_indirect_torch_unsuccessful(
             [input_key1],
             [output_key1, output_key2],
             "bad descriptors",
-            tf_attributes,
+            torch_attributes,
             id="bad output descriptors",
         ),
     ],
 )
-def test_build_request_indirect_tf_unsuccessful(
+def test_build_request_indirect_unsuccessful(
     reply_channel, model, input, output, output_descriptors, custom_attributes
 ):
     with pytest.raises(ValueError):
@@ -481,7 +270,6 @@ def test_build_request_indirect_tf_unsuccessful(
         )
 
 
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
 @pytest.mark.parametrize(
     "reply_channel, model, input, output, output_descriptors, custom_attributes",
     [
@@ -499,88 +287,12 @@ def test_build_request_indirect_tf_unsuccessful(
             [tensor_1],
             [],
             [output_descriptor3],
-            torch_attributes,
-        ),
-        pytest.param(
-            b"another reply channel",
-            model,
-            [tensor_2],
-            [],
-            [output_descriptor1],
-            torch_attributes,
-        ),
-        pytest.param(
-            b"another reply channel",
-            model,
-            [tensor_1],
-            [],
-            [output_descriptor1],
-            None,
-        ),
-    ],
-)
-def test_build_request_direct_torch_successful(
-    reply_channel, model, input, output, output_descriptors, custom_attributes
-):
-    built_request = MessageHandler.build_request(
-        reply_channel,
-        model,
-        input,
-        output,
-        output_descriptors,
-        custom_attributes,
-    )
-    assert built_request is not None
-    assert built_request.replyChannel.reply == reply_channel
-    if built_request.model.which() == "key":
-        assert built_request.model.key.key == model.key
-    else:
-        assert built_request.model.data.data == model.data
-        assert built_request.model.data.name == model.name
-        assert built_request.model.data.version == model.version
-    assert built_request.input.which() == "data"
-    assert built_request.input.data[0].blob == input[0].blob
-    assert len(built_request.input.data) == len(input)
-    assert len(built_request.output) == len(output)
-    for i, j in zip(built_request.outputDescriptors, output_descriptors):
-        assert i.order == j.order
-    if built_request.customAttributes.which() == "tf":
-        assert (
-            built_request.customAttributes.tf.tensorType == custom_attributes.tensorType
-        )
-    elif built_request.customAttributes.which() == "torch":
-        assert (
-            built_request.customAttributes.torch.tensorType
-            == custom_attributes.tensorType
-        )
-    else:
-        assert built_request.customAttributes.none == custom_attributes
-
-
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
-@pytest.mark.parametrize(
-    "reply_channel, model, input, output, output_descriptors, custom_attributes",
-    [
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [tensor_3, tensor_4],
-            [],
-            [output_descriptor2],
-            tf_attributes,
-        ),
-        pytest.param(
-            b"another reply channel",
-            model,
-            [tensor_4],
-            [],
-            [output_descriptor3],
             tf_attributes,
         ),
         pytest.param(
             b"another reply channel",
             model,
-            [tensor_4],
+            [tensor_2],
             [],
             [output_descriptor1],
             tf_attributes,
@@ -588,14 +300,14 @@ def test_build_request_direct_torch_successful(
         pytest.param(
             b"another reply channel",
             model,
-            [tensor_3],
+            [tensor_1],
             [],
             [output_descriptor1],
             None,
         ),
     ],
 )
-def test_build_request_direct_tf_successful(
+def test_build_request_direct_successful(
     reply_channel, model, input, output, output_descriptors, custom_attributes
 ):
     built_request = MessageHandler.build_request(
@@ -614,9 +326,8 @@ def test_build_request_direct_tf_successful(
         assert built_request.model.data.data == model.data
         assert built_request.model.data.name == model.name
         assert built_request.model.data.version == model.version
-    assert built_request.input.which() == "data"
-    assert built_request.input.data[0].blob == input[0].blob
-    assert len(built_request.input.data) == len(input)
+    assert built_request.input.which() == "descriptors"
+    assert len(built_request.input.descriptors) == len(input)
     assert len(built_request.output) == len(output)
     for i, j in zip(built_request.outputDescriptors, output_descriptors):
         assert i.order == j.order
@@ -633,81 +344,6 @@ def test_build_request_direct_tf_successful(
         assert built_request.customAttributes.none == custom_attributes
 
 
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
-@pytest.mark.parametrize(
-    "reply_channel, model, input, output, output_descriptors, custom_attributes",
-    [
-        pytest.param(
-            [],
-            model_key,
-            [tensor_1, tensor_2],
-            [],
-            [output_descriptor2],
-            torch_attributes,
-            id="bad channel",
-        ),
-        pytest.param(
-            b"reply channel",
-            "bad model",
-            [tensor_1],
-            [],
-            [output_descriptor2],
-            torch_attributes,
-            id="bad model",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            ["input_key1", "input_key2"],
-            [],
-            [output_descriptor2],
-            torch_attributes,
-            id="bad inputs",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [],
-            ["output_key1", "output_key2"],
-            [output_descriptor2],
-            torch_attributes,
-            id="bad outputs",
-        ),
-        pytest.param(
-            b"reply channel",
-            model_key,
-            [tensor_1],
-            [],
-            [output_descriptor2],
-            "bad attributes",
-            id="bad custom attributes",
-        ),
-        pytest.param(
-            b"reply_channel",
-            model_key,
-            [tensor_1, tensor_2],
-            [],
-            ["output_descriptor2"],
-            torch_attributes,
-            id="bad output descriptors",
-        ),
-    ],
-)
-def test_build_torch_request_direct_unsuccessful(
-    reply_channel, model, input, output, output_descriptors, custom_attributes
-):
-    with pytest.raises(ValueError):
-        built_request = MessageHandler.build_request(
-            reply_channel,
-            model,
-            input,
-            output,
-            output_descriptors,
-            custom_attributes,
-        )
-
-
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
 @pytest.mark.parametrize(
     "reply_channel, model, input, output, output_descriptors, custom_attributes",
     [
@@ -735,7 +371,7 @@ def test_build_torch_request_direct_unsuccessful(
             ["input_key1", "input_key2"],
             [],
             [output_descriptor2],
-            tf_attributes,
+            torch_attributes,
             id="bad inputs",
         ),
         pytest.param(
@@ -762,12 +398,12 @@ def test_build_torch_request_direct_unsuccessful(
             [tensor_3, tensor_4],
             [],
             ["output_descriptor2"],
-            tf_attributes,
+            torch_attributes,
             id="bad output descriptors",
         ),
     ],
 )
-def test_build_tf_request_direct_unsuccessful(
+def test_build_request_direct_unsuccessful(
     reply_channel, model, input, output, output_descriptors, custom_attributes
 ):
     with pytest.raises(ValueError):
@@ -781,31 +417,16 @@ def test_build_tf_request_direct_unsuccessful(
         )
 
 
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
 @pytest.mark.parametrize(
     "req",
     [
+        pytest.param(tf_indirect_request, id="tf indirect"),
+        pytest.param(tf_direct_request, id="tf direct"),
         pytest.param(torch_indirect_request, id="indirect"),
         pytest.param(torch_direct_request, id="direct"),
     ],
 )
-def test_serialize_torch_request_successful(req):
-    serialized = MessageHandler.serialize_request(req)
-    assert type(serialized) == bytes
-
-    deserialized = MessageHandler.deserialize_request(serialized)
-    assert deserialized.to_dict() == req.to_dict()
-
-
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
-@pytest.mark.parametrize(
-    "req",
-    [
-        pytest.param(tf_indirect_request, id="indirect"),
-        pytest.param(tf_direct_request, id="direct"),
-    ],
-)
-def test_serialize_tf_request_successful(req):
+def test_serialize_request_successful(req):
     serialized = MessageHandler.serialize_request(req)
     assert type(serialized) == bytes
 
diff --git a/tests/test_message_handler/test_response.py b/tests/test_message_handler/test_response.py
index 9d59a18793..03bd9ba73f 100644
--- a/tests/test_message_handler/test_response.py
+++ b/tests/test_message_handler/test_response.py
@@ -28,60 +28,6 @@
 
 from smartsim._core.mli.message_handler import MessageHandler
 
-try:
-    import tensorflow as tf
-except ImportError:
-    should_run_tf = False
-else:
-    should_run_tf = True
-
-    tflow1 = tf.zeros((3, 2, 5), dtype=tf.int8)
-    tflow2 = tf.ones((1040, 1040, 3), dtype=tf.int64)
-
-    small_tf_tensor = MessageHandler.build_tensor(
-        tflow1.numpy(), "c", "int8", list(tflow1.shape)
-    )
-    medium_tf_tensor = MessageHandler.build_tensor(
-        tflow2.numpy(), "c", "int64", list(tflow2.shape)
-    )
-
-    tf_attributes = MessageHandler.build_tf_response_attributes()
-
-    tf_direct_response = MessageHandler.build_response(
-        "complete",
-        "Success again!",
-        [small_tf_tensor, medium_tf_tensor],
-        tf_attributes,
-    )
-
-
-try:
-    import torch
-except ImportError:
-    should_run_torch = False
-else:
-    should_run_torch = True
-
-    torch1 = torch.zeros((3, 2, 5), dtype=torch.int8)
-    torch2 = torch.ones((1040, 1040, 3), dtype=torch.int64)
-
-    small_torch_tensor = MessageHandler.build_tensor(
-        torch1.numpy(), "c", "int8", list(torch1.shape)
-    )
-    medium_torch_tensor = MessageHandler.build_tensor(
-        torch2.numpy(), "c", "int64", list(torch2.shape)
-    )
-
-    torch_attributes = MessageHandler.build_torch_response_attributes()
-
-    torch_direct_response = MessageHandler.build_response(
-        "complete",
-        "Success again!",
-        [small_torch_tensor, medium_torch_tensor],
-        torch_attributes,
-    )
-
-
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
 
@@ -89,86 +35,51 @@
 result_key1 = MessageHandler.build_tensor_key("result_key1")
 result_key2 = MessageHandler.build_tensor_key("result_key2")
 
+torch_attributes = MessageHandler.build_torch_response_attributes()
+tf_attributes = MessageHandler.build_tf_response_attributes()
 
-if should_run_tf:
-    tf_indirect_response = MessageHandler.build_response(
-        "complete",
-        "Success!",
-        [result_key1, result_key2],
-        tf_attributes,
-    )
+tensor1 = MessageHandler.build_tensor_descriptor("c", "int8", [1])
+tensor2 = MessageHandler.build_tensor_descriptor("c", "int64", [3, 2])
 
-if should_run_torch:
-    torch_indirect_response = MessageHandler.build_response(
-        "complete",
-        "Success!",
-        [result_key1, result_key2],
-        torch_attributes,
-    )
 
+tf_indirect_response = MessageHandler.build_response(
+    "complete",
+    "Success!",
+    [result_key1, result_key2],
+    tf_attributes,
+)
 
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
-@pytest.mark.parametrize(
-    "status, status_message, result, custom_attribute",
-    [
-        pytest.param(
-            200,
-            "Yay, it worked!",
-            [small_torch_tensor, medium_torch_tensor],
-            None,
-            id="tensor list",
-        ),
-        pytest.param(
-            200,
-            "Yay, it worked!",
-            [small_torch_tensor],
-            torch_attributes,
-            id="small tensor",
-        ),
-        pytest.param(
-            200,
-            "Yay, it worked!",
-            [result_key1, result_key2],
-            torch_attributes,
-            id="tensor key list",
-        ),
-    ],
+tf_direct_response = MessageHandler.build_response(
+    "complete",
+    "Success again!",
+    [tensor2, tensor1],
+    tf_attributes,
+)
+
+torch_indirect_response = MessageHandler.build_response(
+    "complete",
+    "Success!",
+    [result_key1, result_key2],
+    torch_attributes,
+)
+
+torch_direct_response = MessageHandler.build_response(
+    "complete",
+    "Success again!",
+    [tensor1, tensor2],
+    torch_attributes,
 )
-def test_build_torch_response_successful(
-    status, status_message, result, custom_attribute
-):
-    response = MessageHandler.build_response(
-        status=status,
-        message=status_message,
-        result=result,
-        custom_attributes=custom_attribute,
-    )
-    assert response is not None
-    assert response.status == status
-    assert response.message == status_message
-    if response.result.which() == "keys":
-        assert response.result.keys[0].to_dict() == result[0].to_dict()
-    else:
-        assert response.result.data[0].to_dict() == result[0].to_dict()
 
 
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
 @pytest.mark.parametrize(
     "status, status_message, result, custom_attribute",
     [
         pytest.param(
             200,
             "Yay, it worked!",
-            [small_tf_tensor, medium_tf_tensor],
+            [tensor1, tensor2],
             None,
-            id="tensor list",
-        ),
-        pytest.param(
-            200,
-            "Yay, it worked!",
-            [small_tf_tensor],
-            tf_attributes,
-            id="small tensor",
+            id="tensor descriptor list",
         ),
         pytest.param(
             200,
@@ -179,7 +90,7 @@ def test_build_torch_response_successful(
         ),
     ],
 )
-def test_build_tf_response_successful(status, status_message, result, custom_attribute):
+def test_build_response_successful(status, status_message, result, custom_attribute):
     response = MessageHandler.build_response(
         status=status,
         message=status_message,
@@ -192,25 +103,24 @@ def test_build_tf_response_successful(status, status_message, result, custom_att
     if response.result.which() == "keys":
         assert response.result.keys[0].to_dict() == result[0].to_dict()
     else:
-        assert response.result.data[0].to_dict() == result[0].to_dict()
+        assert response.result.descriptors[0].to_dict() == result[0].to_dict()
 
 
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
 @pytest.mark.parametrize(
     "status, status_message, result, custom_attribute",
     [
         pytest.param(
             "bad status",
             "Yay, it worked!",
-            [small_tf_tensor, medium_tf_tensor],
+            [tensor1, tensor2],
             None,
             id="bad status",
         ),
         pytest.param(
             "complete",
             200,
-            [small_tf_tensor],
-            tf_attributes,
+            [tensor2],
+            torch_attributes,
             id="bad status message",
         ),
         pytest.param(
@@ -230,110 +140,36 @@ def test_build_tf_response_successful(status, status_message, result, custom_att
         pytest.param(
             "complete",
             "Yay, it worked!",
-            [small_tf_tensor, medium_tf_tensor],
-            "custom attributes",
-            id="bad custom attributes",
-        ),
-        pytest.param(
-            "complete",
-            "Yay, it worked!",
-            [small_tf_tensor, medium_tf_tensor],
-            result_key1,
-            id="bad custom attributes type",
-        ),
-    ],
-)
-def test_build_tf_response_unsuccessful(
-    status, status_message, result, custom_attribute
-):
-    with pytest.raises(ValueError):
-        response = MessageHandler.build_response(
-            status, status_message, result, custom_attribute
-        )
-
-
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
-@pytest.mark.parametrize(
-    "status, status_message, result, custom_attribute",
-    [
-        pytest.param(
-            "bad status",
-            "Yay, it worked!",
-            [small_torch_tensor, medium_torch_tensor],
-            None,
-            id="bad status",
-        ),
-        pytest.param(
-            "complete",
-            200,
-            [small_torch_tensor],
-            torch_attributes,
-            id="bad status message",
-        ),
-        pytest.param(
-            "complete",
-            "Yay, it worked!",
-            ["result_key1", "result_key2"],
-            torch_attributes,
-            id="bad result",
-        ),
-        pytest.param(
-            "complete",
-            "Yay, it worked!",
-            [torch_attributes],
-            torch_attributes,
-            id="bad result type",
-        ),
-        pytest.param(
-            "complete",
-            "Yay, it worked!",
-            [small_torch_tensor, medium_torch_tensor],
+            [tensor2, tensor1],
             "custom attributes",
             id="bad custom attributes",
         ),
         pytest.param(
             "complete",
             "Yay, it worked!",
-            [small_torch_tensor, medium_torch_tensor],
+            [tensor2, tensor1],
             result_key1,
             id="bad custom attributes type",
         ),
     ],
 )
-def test_build_torch_response_unsuccessful(
-    status, status_message, result, custom_attribute
-):
+def test_build_response_unsuccessful(status, status_message, result, custom_attribute):
     with pytest.raises(ValueError):
         response = MessageHandler.build_response(
             status, status_message, result, custom_attribute
         )
 
 
-@pytest.mark.skipif(not should_run_torch, reason="Test needs Torch to run")
 @pytest.mark.parametrize(
     "response",
     [
         pytest.param(torch_indirect_response, id="indirect"),
         pytest.param(torch_direct_response, id="direct"),
+        pytest.param(tf_indirect_response, id="tf indirect"),
+        pytest.param(tf_direct_response, id="tf direct"),
     ],
 )
-def test_torch_serialize_response(response):
-    serialized = MessageHandler.serialize_response(response)
-    assert type(serialized) == bytes
-
-    deserialized = MessageHandler.deserialize_response(serialized)
-    assert deserialized.to_dict() == response.to_dict()
-
-
-@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
-@pytest.mark.parametrize(
-    "response",
-    [
-        pytest.param(tf_indirect_response, id="indirect"),
-        pytest.param(tf_direct_response, id="direct"),
-    ],
-)
-def test_tf_serialize_response(response):
+def test_serialize_response(response):
     serialized = MessageHandler.serialize_response(response)
     assert type(serialized) == bytes
 

From 0e3bd612689e0223721a8c0687c4f4cb85ce399f Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Sat, 13 Jul 2024 11:18:55 -0700
Subject: [PATCH 35/40] mli driver runs all the way through

---
 ex/high_throughput_inference/mli_driver.py       |  2 +-
 ex/high_throughput_inference/mock_app.py         | 14 +++++---------
 ex/high_throughput_inference/redis_driver.py     |  2 +-
 .../standalone_workermanager.py                  |  2 +-
 smartsim/_core/mli/comm/channel/channel.py       |  2 +-
 smartsim/_core/mli/comm/channel/dragonfli.py     | 16 +++++++++-------
 .../mli/infrastructure/control/workermanager.py  | 12 ++++++++----
 .../mli/infrastructure/worker/torch_worker.py    |  5 ++++-
 tests/mli/test_torch_worker.py                   |  9 ++++++---
 9 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/ex/high_throughput_inference/mli_driver.py b/ex/high_throughput_inference/mli_driver.py
index 6da559aa6f..4438261139 100644
--- a/ex/high_throughput_inference/mli_driver.py
+++ b/ex/high_throughput_inference/mli_driver.py
@@ -10,7 +10,7 @@
 import time
 import typing as t
 
-device = "gpu"
+device = "cpu"
 filedir = os.path.dirname(__file__)
 worker_manager_script_name = os.path.join(filedir, "standalone_workermanager.py")
 app_script_name = os.path.join(filedir, "mock_app.py")
diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index 9cd59d2206..51f01c3095 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -42,6 +42,7 @@
 import time
 import torch
 import numbers
+import typing
 
 from collections import OrderedDict
 from smartsim._core.mli.message_handler import MessageHandler
@@ -108,7 +109,7 @@ def print_timings(self, to_file: bool = False):
 
 
     def run_model(self, model: bytes | str, batch: torch.Tensor):
-        tensors = [batch.numpy()]
+        tensors: typing.List[numpy.ndarray[typing.Any, numpy.dtype[typing.Any]]]= [batch.numpy()]
         self.start_timings(batch.shape[0])
         # built_tensor = MessageHandler.build_tensor(
         #     batch.numpy(), "c", "float32", list(batch.shape))
@@ -134,7 +135,8 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
         with self._to_worker_fli.sendh(timeout=None, stream_channel=self._to_worker_ch) as to_sendh:
             to_sendh.send_bytes(request_bytes)
             for t in tensors:
-                to_sendh.send_bytes(t.tobytes()) # NOT FAST ENOUGH!!!
+                # to_sendh.send_bytes(t.tobytes()) #TODO NOT FAST ENOUGH!!!
+                to_sendh.send_bytes(bytes(t.data))
         logger.info(f"Message size: {len(request_bytes)} bytes")
 
         self.measure_time("send")
@@ -143,7 +145,7 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
             self.measure_time("receive")
             response = MessageHandler.deserialize_response(resp)
             self.measure_time("deserialize_response")
-            # list of data blobs? recv depending on the len(esponse.result.descriptors)?
+            # list of data blobs? recv depending on the len(response.result.descriptors)?
             data_blob = from_recvh.recv_bytes(timeout=None)
             result = torch.from_numpy(
                 numpy.frombuffer(
@@ -151,12 +153,6 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
                     dtype=str(response.result.descriptors[0].dataType),
                 )
             )
-            # result = torch.from_numpy(
-            #     numpy.frombuffer(
-            #         response.result.data[0].blob,
-            #         dtype=str(response.result.data[0].tensorDescriptor.dataType),
-            #     )
-            # )
             self.measure_time("deserialize_tensor")
 
         self.end_timings()
diff --git a/ex/high_throughput_inference/redis_driver.py b/ex/high_throughput_inference/redis_driver.py
index ceddba4ef7..5111019099 100644
--- a/ex/high_throughput_inference/redis_driver.py
+++ b/ex/high_throughput_inference/redis_driver.py
@@ -31,7 +31,7 @@
 import time
 import typing as t
 
-device = "gpu"
+device = "cpu"
 filedir = os.path.dirname(__file__)
 app_script_name = os.path.join(filedir, "mock_app_redis.py")
 model_name = os.path.join(filedir, f"resnet50.{device.upper()}.pt")
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
index c56e11a7c3..7ff706953d 100644
--- a/ex/high_throughput_inference/standalone_workermanager.py
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -51,7 +51,7 @@
     parser.add_argument(
         "--device",
         type=str,
-        default="gpu",
+        default="cpu",
         choices="gpu cpu".split(),
         help="Device on which the inference takes place",
     )
diff --git a/smartsim/_core/mli/comm/channel/channel.py b/smartsim/_core/mli/comm/channel/channel.py
index 2318896a9b..fede10a588 100644
--- a/smartsim/_core/mli/comm/channel/channel.py
+++ b/smartsim/_core/mli/comm/channel/channel.py
@@ -50,7 +50,7 @@ def recv(self) -> bytes:
         :returns: the received message"""
 
     @property
-    def descriptor(self) -> bytes:
+    def descriptor(self) -> t.List[bytes]:
         """Return the channel descriptor for the underlying dragon channel"""
         if isinstance(self._descriptor, str):
             return self._descriptor.encode("utf-8")
diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
index 75f8fb4bfc..134b00d3df 100644
--- a/smartsim/_core/mli/comm/channel/dragonfli.py
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -57,13 +57,15 @@ def send(self, value: bytes) -> None:
         with self._fli.sendh(timeout=None, stream_channel=self._channel) as sendh:
             sendh.send_bytes(value)
 
-    def recv(self) -> bytes:
+    def recv(self) -> t.List[bytes]:
         """Receieve a message through the underlying communication channel
         :returns: the received message"""
+        messages = []
         with self._fli.recvh(timeout=None) as recvh:
-            try:
-                request_bytes: bytes
-                request_bytes, _ = recvh.recv_bytes(timeout=None)
-                return request_bytes
-            except fli.FLIEOT as exc:
-                return b""
+            while True:
+                try:
+                    message, _ = recvh.recv_bytes(timeout=None)
+                    messages.append(message)
+                except fli.FLIEOT as exc:
+                    break
+        return messages
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 1c571dc2f2..73d7a3d141 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -105,7 +105,7 @@ def deserialize_message(
         input_keys = [input_key.key for input_key in request.input.keys]
     elif request.input.which() == "descriptors":
         # input_bytes = [data.blob for data in request.input.data]
-        input_meta = [request.input.descriptors]
+        input_meta = request.input.descriptors
 
     inference_request = InferenceRequest(
         model_key=model_key,
@@ -242,7 +242,10 @@ def _on_iteration(self) -> None:
 
         timings = []  # timing
         # perform default deserialization of the message envelope
-        request_bytes: bytes = self._task_queue.recv()
+        bytes_list: t.List[bytes] = self._task_queue.recv()
+        if bytes_list:
+            request_bytes = bytes_list[0]
+            tensor_list = bytes_list[1:]
 
         interm = time.perf_counter()  # timing
         request = deserialize_message(
@@ -250,8 +253,7 @@ def _on_iteration(self) -> None:
         )
 
         if request.input_meta:
-            for _ in request.input_meta:
-                request.raw_inputs.append(self._task_queue.recv())
+            request.raw_inputs = tensor_list
 
         if not self._validate_request(request):
             return
@@ -353,8 +355,10 @@ def _on_iteration(self) -> None:
         timings.append(time.perf_counter() - interm)  # timing
         interm = time.perf_counter()  # timing
         if request.callback:
+            # send serialized response
             request.callback.send(serialized_resp)
             if reply.outputs:
+                # send tensor data after response
                 for output in reply.outputs:
                     request.callback.send(output)
 
diff --git a/smartsim/_core/mli/infrastructure/worker/torch_worker.py b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
index a4e725ab99..f8cfa9886f 100644
--- a/smartsim/_core/mli/infrastructure/worker/torch_worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
@@ -98,7 +98,10 @@ def execute(
 
         model: torch.nn.Module = load_result.model
         model.eval()
-        results = [model(tensor).detach() for tensor in transform_result.transformed]
+        results = [
+            model(tensor).detach().numpy().tobytes()
+            for tensor in transform_result.transformed
+        ]  # TODO THIS IS BAD
 
         execute_result = ExecuteResult(results)
         return execute_result
diff --git a/tests/mli/test_torch_worker.py b/tests/mli/test_torch_worker.py
index f159c15a0e..87748ecc68 100644
--- a/tests/mli/test_torch_worker.py
+++ b/tests/mli/test_torch_worker.py
@@ -156,9 +156,12 @@ def test_execute(mlutils) -> None:
 
     execute_result = worker.execute(sample_request, load_model_result, transform_result)
 
-    assert all(
-        result.shape == torch.Size((20, 10)) for result in execute_result.predictions
-    )
+    # assert all(
+    #     result.shape == torch.Size((20, 10)) for result in execute_result.predictions
+    # )
+
+    # need to make this test more meaningful, but predictions are bytes string now (potentially will change back)
+    assert all(type(result) == bytes for result in execute_result.predictions)
 
 
 def test_transform_output(mlutils):

From e3f44a5267f9574d4a2df3d623bc298b9af5ec79 Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Mon, 15 Jul 2024 15:50:04 -0500
Subject: [PATCH 36/40] weaks

---
 ex/high_throughput_inference/mock_app.py      |  4 ++--
 smartsim/_core/mli/comm/channel/channel.py    |  4 ++--
 .../infrastructure/control/workermanager.py   | 15 ++++++++++-----
 .../mli/infrastructure/worker/torch_worker.py | 19 +++++++++++++------
 tests/mli/test_torch_worker.py                | 13 ++++++-------
 5 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index 51f01c3095..d686c7d5c9 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -135,8 +135,8 @@ def run_model(self, model: bytes | str, batch: torch.Tensor):
         with self._to_worker_fli.sendh(timeout=None, stream_channel=self._to_worker_ch) as to_sendh:
             to_sendh.send_bytes(request_bytes)
             for t in tensors:
-                # to_sendh.send_bytes(t.tobytes()) #TODO NOT FAST ENOUGH!!!
-                to_sendh.send_bytes(bytes(t.data))
+                to_sendh.send_bytes(t.tobytes()) #TODO NOT FAST ENOUGH!!!
+                # to_sendh.send_bytes(bytes(t.data))
         logger.info(f"Message size: {len(request_bytes)} bytes")
 
         self.measure_time("send")
diff --git a/smartsim/_core/mli/comm/channel/channel.py b/smartsim/_core/mli/comm/channel/channel.py
index fede10a588..a3cce21814 100644
--- a/smartsim/_core/mli/comm/channel/channel.py
+++ b/smartsim/_core/mli/comm/channel/channel.py
@@ -45,12 +45,12 @@ def send(self, value: bytes) -> None:
         :param value: The value to send"""
 
     @abstractmethod
-    def recv(self) -> bytes:
+    def recv(self) -> t.List[bytes]:
         """Receieve a message through the underlying communication channel
         :returns: the received message"""
 
     @property
-    def descriptor(self) -> t.List[bytes]:
+    def descriptor(self) -> bytes:
         """Return the channel descriptor for the underlying dragon channel"""
         if isinstance(self._descriptor, str):
             return self._descriptor.encode("utf-8")
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 73d7a3d141..ad2b89f173 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -58,6 +58,7 @@
 
     from smartsim._core.mli.mli_schemas.model.model_capnp import Model
     from smartsim._core.mli.mli_schemas.response.response_capnp import StatusEnum
+    from smartsim._core.mli.mli_schemas.tensor.tensor_capnp import TensorDescriptor
 
 logger = get_logger(__name__)
 
@@ -99,13 +100,12 @@ def deserialize_message(
         None  # these will really be tensors already
     )
 
-    input_meta: t.List[t.Any] = []
+    input_meta: t.List[TensorDescriptor] = []
 
     if request.input.which() == "keys":
         input_keys = [input_key.key for input_key in request.input.keys]
     elif request.input.which() == "descriptors":
-        # input_bytes = [data.blob for data in request.input.data]
-        input_meta = request.input.descriptors
+        input_meta = request.input.descriptors  # type: ignore
 
     inference_request = InferenceRequest(
         model_key=model_key,
@@ -141,6 +141,8 @@ def prepare_outputs(reply: InferenceReply) -> t.List[t.Any]:
             # todo: need to have the output attributes specified in the req?
             # maybe, add `MessageHandler.dtype_of(tensor)`?
             # can `build_tensor` do dtype and shape?
+
+            # TODO isn't this what output descriptors are for?
             msg_tensor_desc = MessageHandler.build_tensor_descriptor(
                 "c",
                 "float32",
@@ -241,8 +243,11 @@ def _on_iteration(self) -> None:
             return
 
         timings = []  # timing
-        # perform default deserialization of the message envelope
+
         bytes_list: t.List[bytes] = self._task_queue.recv()
+        request_bytes: bytes = b""
+        tensor_list = []
+
         if bytes_list:
             request_bytes = bytes_list[0]
             tensor_list = bytes_list[1:]
@@ -252,7 +257,7 @@ def _on_iteration(self) -> None:
             request_bytes, self._comm_channel_type, self._device
         )
 
-        if request.input_meta:
+        if request.input_meta and tensor_list:
             request.raw_inputs = tensor_list
 
         if not self._validate_request(request):
diff --git a/smartsim/_core/mli/infrastructure/worker/torch_worker.py b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
index f8cfa9886f..b06874e1cc 100644
--- a/smartsim/_core/mli/infrastructure/worker/torch_worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
@@ -98,10 +98,11 @@ def execute(
 
         model: torch.nn.Module = load_result.model
         model.eval()
-        results = [
-            model(tensor).detach().numpy().tobytes()
-            for tensor in transform_result.transformed
-        ]  # TODO THIS IS BAD
+        results = [model(tensor).detach() for tensor in transform_result.transformed]
+        # results = [
+        #     model(tensor).detach().numpy().tobytes()
+        #     for tensor in transform_result.transformed
+        # ]  # TODO THIS IS BAD
 
         execute_result = ExecuteResult(results)
         return execute_result
@@ -113,10 +114,16 @@ def transform_output(
         result_device: str,
     ) -> TransformOutputResult:
         if result_device != "cpu":
-            transformed = [item.to("cpu") for item in execute_result.predictions]
+            transformed = [
+                item.to("cpu").numpy().tobytes() for item in execute_result.predictions
+            ]
+
             # todo: need the shape from latest schemas added here.
             return TransformOutputResult(transformed, None, "c", "float32")  # fixme
 
         return TransformOutputResult(
-            execute_result.predictions, None, "c", "float32"
+            [item.numpy().tobytes() for item in execute_result.predictions],
+            None,
+            "c",
+            "float32",
         )  # fixme
diff --git a/tests/mli/test_torch_worker.py b/tests/mli/test_torch_worker.py
index 87748ecc68..b73e4a31b5 100644
--- a/tests/mli/test_torch_worker.py
+++ b/tests/mli/test_torch_worker.py
@@ -156,12 +156,9 @@ def test_execute(mlutils) -> None:
 
     execute_result = worker.execute(sample_request, load_model_result, transform_result)
 
-    # assert all(
-    #     result.shape == torch.Size((20, 10)) for result in execute_result.predictions
-    # )
-
-    # need to make this test more meaningful, but predictions are bytes string now (potentially will change back)
-    assert all(type(result) == bytes for result in execute_result.predictions)
+    assert all(
+        result.shape == torch.Size((20, 10)) for result in execute_result.predictions
+    )
 
 
 def test_transform_output(mlutils):
@@ -171,7 +168,9 @@ def test_transform_output(mlutils):
         sample_request, execute_result, torch_device[mlutils.get_test_device().lower()]
     )
 
-    assert transformed_output.outputs == execute_result.predictions
+    assert transformed_output.outputs == [
+        item.numpy().tobytes() for item in execute_result.predictions
+    ]
     assert transformed_output.shape == None
     assert transformed_output.order == "c"
     assert transformed_output.dtype == "float32"

From b57fc8e6718b94bef0794e93d7f6b2e78a7cdbe8 Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Mon, 15 Jul 2024 16:15:21 -0500
Subject: [PATCH 37/40] more clean up

---
 ex/high_throughput_inference/mli_driver.py               | 2 +-
 ex/high_throughput_inference/mock_app.py                 | 5 +----
 ex/high_throughput_inference/redis_driver.py             | 2 +-
 ex/high_throughput_inference/standalone_workermanager.py | 2 +-
 smartsim/_core/mli/infrastructure/worker/torch_worker.py | 4 ----
 5 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/ex/high_throughput_inference/mli_driver.py b/ex/high_throughput_inference/mli_driver.py
index 4438261139..6da559aa6f 100644
--- a/ex/high_throughput_inference/mli_driver.py
+++ b/ex/high_throughput_inference/mli_driver.py
@@ -10,7 +10,7 @@
 import time
 import typing as t
 
-device = "cpu"
+device = "gpu"
 filedir = os.path.dirname(__file__)
 worker_manager_script_name = os.path.join(filedir, "standalone_workermanager.py")
 app_script_name = os.path.join(filedir, "mock_app.py")
diff --git a/ex/high_throughput_inference/mock_app.py b/ex/high_throughput_inference/mock_app.py
index d686c7d5c9..e244c93e0f 100644
--- a/ex/high_throughput_inference/mock_app.py
+++ b/ex/high_throughput_inference/mock_app.py
@@ -42,7 +42,6 @@
 import time
 import torch
 import numbers
-import typing
 
 from collections import OrderedDict
 from smartsim._core.mli.message_handler import MessageHandler
@@ -109,10 +108,8 @@ def print_timings(self, to_file: bool = False):
 
 
     def run_model(self, model: bytes | str, batch: torch.Tensor):
-        tensors: typing.List[numpy.ndarray[typing.Any, numpy.dtype[typing.Any]]]= [batch.numpy()]
+        tensors = [batch.numpy()]
         self.start_timings(batch.shape[0])
-        # built_tensor = MessageHandler.build_tensor(
-        #     batch.numpy(), "c", "float32", list(batch.shape))
         built_tensor_desc = MessageHandler.build_tensor_descriptor(
             "c", "float32", list(batch.shape))
         self.measure_time("build_tensor_descriptor")
diff --git a/ex/high_throughput_inference/redis_driver.py b/ex/high_throughput_inference/redis_driver.py
index 5111019099..ceddba4ef7 100644
--- a/ex/high_throughput_inference/redis_driver.py
+++ b/ex/high_throughput_inference/redis_driver.py
@@ -31,7 +31,7 @@
 import time
 import typing as t
 
-device = "cpu"
+device = "gpu"
 filedir = os.path.dirname(__file__)
 app_script_name = os.path.join(filedir, "mock_app_redis.py")
 model_name = os.path.join(filedir, f"resnet50.{device.upper()}.pt")
diff --git a/ex/high_throughput_inference/standalone_workermanager.py b/ex/high_throughput_inference/standalone_workermanager.py
index 7ff706953d..c56e11a7c3 100644
--- a/ex/high_throughput_inference/standalone_workermanager.py
+++ b/ex/high_throughput_inference/standalone_workermanager.py
@@ -51,7 +51,7 @@
     parser.add_argument(
         "--device",
         type=str,
-        default="cpu",
+        default="gpu",
         choices="gpu cpu".split(),
         help="Device on which the inference takes place",
     )
diff --git a/smartsim/_core/mli/infrastructure/worker/torch_worker.py b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
index b06874e1cc..e732ecd2cd 100644
--- a/smartsim/_core/mli/infrastructure/worker/torch_worker.py
+++ b/smartsim/_core/mli/infrastructure/worker/torch_worker.py
@@ -99,10 +99,6 @@ def execute(
         model: torch.nn.Module = load_result.model
         model.eval()
         results = [model(tensor).detach() for tensor in transform_result.transformed]
-        # results = [
-        #     model(tensor).detach().numpy().tobytes()
-        #     for tensor in transform_result.transformed
-        # ]  # TODO THIS IS BAD
 
         execute_result = ExecuteResult(results)
         return execute_result

From c1f856b6b0cba341bb2a4b710a390953f4cb969d Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Mon, 15 Jul 2024 16:31:10 -0500
Subject: [PATCH 38/40] changelog, mypy

---
 doc/changelog.md                                 | 1 +
 smartsim/_core/mli/comm/channel/dragonchannel.py | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/changelog.md b/doc/changelog.md
index ee41fabf88..81c8ac4794 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -13,6 +13,7 @@ Jump to:
 
 Description
 
+- Adjust schemas for better performance
 - Add TorchWorker first implementation and mock inference app example
 - Add EnvironmentConfigLoader for ML Worker Manager
 - Add Model schema with model metadata included
diff --git a/smartsim/_core/mli/comm/channel/dragonchannel.py b/smartsim/_core/mli/comm/channel/dragonchannel.py
index 1409747a91..672fce75b2 100644
--- a/smartsim/_core/mli/comm/channel/dragonchannel.py
+++ b/smartsim/_core/mli/comm/channel/dragonchannel.py
@@ -25,6 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import sys
+import typing as t
 
 import smartsim._core.mli.comm.channel.channel as cch
 from smartsim.log import get_logger
@@ -52,9 +53,9 @@ def send(self, value: bytes) -> None:
         with self._channel.sendh(timeout=None) as sendh:
             sendh.send_bytes(value)
 
-    def recv(self) -> bytes:
+    def recv(self) -> t.List[bytes]:
         """Receieve a message through the underlying communication channel
         :returns: the received message"""
         with self._channel.recvh(timeout=None) as recvh:
             message_bytes: bytes = recvh.recv_bytes(timeout=None)
-            return message_bytes
+            return [message_bytes]

From f1415f23fcab5f3ab1d51b61cca3e6efdf5c8903 Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Thu, 18 Jul 2024 12:12:45 -0500
Subject: [PATCH 39/40] pr comments addressed

---
 smartsim/_core/mli/comm/channel/dragonfli.py  |  6 ++--
 .../infrastructure/control/workermanager.py   | 36 +++++++++----------
 smartsim/_core/mli/message_handler.py         |  2 +-
 .../mli/mli_schemas/request/request.capnp     |  2 +-
 .../mli/mli_schemas/request/request_capnp.pyi |  2 +-
 tests/test_message_handler/test_request.py    |  4 +--
 6 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
index 134b00d3df..7ad28307cd 100644
--- a/smartsim/_core/mli/comm/channel/dragonfli.py
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -61,11 +61,13 @@ def recv(self) -> t.List[bytes]:
         """Receieve a message through the underlying communication channel
         :returns: the received message"""
         messages = []
+        eot = False
         with self._fli.recvh(timeout=None) as recvh:
-            while True:
+            while not eot:
                 try:
                     message, _ = recvh.recv_bytes(timeout=None)
                     messages.append(message)
                 except fli.FLIEOT as exc:
-                    break
+                    eot = True
         return messages
+
diff --git a/smartsim/_core/mli/infrastructure/control/workermanager.py b/smartsim/_core/mli/infrastructure/control/workermanager.py
index 781b36b450..27f5bfc971 100644
--- a/smartsim/_core/mli/infrastructure/control/workermanager.py
+++ b/smartsim/_core/mli/infrastructure/control/workermanager.py
@@ -89,19 +89,18 @@ def deserialize_message(
     elif request.model.which() == "data":
         model_bytes = request.model.data
 
-    callback_key = request.replyChannel.reply
+    callback_key = request.replyChannel.descriptor
 
     # todo: shouldn't this be `CommChannel.find` instead of `DragonCommChannel`
     comm_channel = channel_type(callback_key)
     # comm_channel = DragonCommChannel(request.replyChannel)
 
     input_keys: t.Optional[t.List[str]] = None
-    input_bytes: t.Optional[t.List[bytes]] = (
-        None  # these will really be tensors already
-    )
+    input_bytes: t.Optional[t.List[bytes]] = None
+
     output_keys: t.Optional[t.List[str]] = None
 
-    input_meta: t.List[TensorDescriptor] = []
+    input_meta: t.Optional[t.List[TensorDescriptor]] = None
 
     if request.input.which() == "keys":
         input_keys = [input_key.key for input_key in request.input.keys]
@@ -111,9 +110,6 @@ def deserialize_message(
     if request.output:
         output_keys = [tensor_key.key for tensor_key in request.output]
 
-    if request.output:
-        output_keys = [tensor_key.key for tensor_key in request.output]
-
     inference_request = InferenceRequest(
         model_key=model_key,
         callback=comm_channel,
@@ -146,11 +142,6 @@ def prepare_outputs(reply: InferenceReply) -> t.List[t.Any]:
             prepared_outputs.append(msg_key)
     elif reply.outputs:
         for _ in reply.outputs:
-            # todo: need to have the output attributes specified in the req?
-            # maybe, add `MessageHandler.dtype_of(tensor)`?
-            # can `build_tensor` do dtype and shape?
-
-            # TODO isn't this what output descriptors are for?
             msg_tensor_desc = MessageHandler.build_tensor_descriptor(
                 "c",
                 "float32",
@@ -275,20 +266,25 @@ def _on_iteration(self) -> None:
         timings = []  # timing
 
         bytes_list: t.List[bytes] = self._task_queue.recv()
-        request_bytes: bytes = b""
-        tensor_list = []
 
-        if bytes_list:
-            request_bytes = bytes_list[0]
-            tensor_list = bytes_list[1:]
+        if not bytes_list:
+            exception_handler(
+                ValueError("No request data found"),
+                None,
+                "No request data found.",
+            )
+            return
+
+        request_bytes = bytes_list[0]
+        tensor_bytes_list = bytes_list[1:]
 
         interm = time.perf_counter()  # timing
         request = deserialize_message(
             request_bytes, self._comm_channel_type, self._device
         )
 
-        if request.input_meta and tensor_list:
-            request.raw_inputs = tensor_list
+        if request.input_meta and tensor_bytes_list:
+            request.raw_inputs = tensor_bytes_list
 
         if not self._validate_request(request):
             return
diff --git a/smartsim/_core/mli/message_handler.py b/smartsim/_core/mli/message_handler.py
index f28bc341f6..00670dce8a 100644
--- a/smartsim/_core/mli/message_handler.py
+++ b/smartsim/_core/mli/message_handler.py
@@ -233,7 +233,7 @@ def _assign_reply_channel(
         :raises ValueError: if building fails
         """
         try:
-            request.replyChannel.reply = reply_channel
+            request.replyChannel.descriptor = reply_channel
         except Exception as e:
             raise ValueError("Error building reply channel portion of request.") from e
 
diff --git a/smartsim/_core/mli/mli_schemas/request/request.capnp b/smartsim/_core/mli/mli_schemas/request/request.capnp
index 6d290fb599..4be1cfa215 100644
--- a/smartsim/_core/mli/mli_schemas/request/request.capnp
+++ b/smartsim/_core/mli/mli_schemas/request/request.capnp
@@ -32,7 +32,7 @@ using DataRef = import "../data/data_references.capnp";
 using Models = import "../model/model.capnp";
 
 struct ChannelDescriptor {
-  reply @0 :Data;
+  descriptor @0 :Data;
 }
 
 struct Request {
diff --git a/smartsim/_core/mli/mli_schemas/request/request_capnp.pyi b/smartsim/_core/mli/mli_schemas/request/request_capnp.pyi
index 54dcdcfecc..a4ad631f9f 100644
--- a/smartsim/_core/mli/mli_schemas/request/request_capnp.pyi
+++ b/smartsim/_core/mli/mli_schemas/request/request_capnp.pyi
@@ -61,7 +61,7 @@ from .request_attributes.request_attributes_capnp import (
 )
 
 class ChannelDescriptor:
-    reply: bytes
+    descriptor: bytes
     @staticmethod
     @contextmanager
     def from_bytes(
diff --git a/tests/test_message_handler/test_request.py b/tests/test_message_handler/test_request.py
index 5a8a091d90..4cfc115845 100644
--- a/tests/test_message_handler/test_request.py
+++ b/tests/test_message_handler/test_request.py
@@ -144,7 +144,7 @@ def test_build_request_indirect_successful(
         custom_attributes,
     )
     assert built_request is not None
-    assert built_request.replyChannel.reply == reply_channel
+    assert built_request.replyChannel.descriptor == reply_channel
     if built_request.model.which() == "key":
         assert built_request.model.key.key == model.key
     else:
@@ -319,7 +319,7 @@ def test_build_request_direct_successful(
         custom_attributes,
     )
     assert built_request is not None
-    assert built_request.replyChannel.reply == reply_channel
+    assert built_request.replyChannel.descriptor == reply_channel
     if built_request.model.which() == "key":
         assert built_request.model.key.key == model.key
     else:

From dafb4df8c7a51921b3687262d46782dea840b7fa Mon Sep 17 00:00:00 2001
From: Alyssa Cote <aecote92@gmail.com>
Date: Thu, 18 Jul 2024 12:20:53 -0500
Subject: [PATCH 40/40] style

---
 smartsim/_core/mli/comm/channel/dragonfli.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/smartsim/_core/mli/comm/channel/dragonfli.py b/smartsim/_core/mli/comm/channel/dragonfli.py
index 7ad28307cd..28b4c2bf3b 100644
--- a/smartsim/_core/mli/comm/channel/dragonfli.py
+++ b/smartsim/_core/mli/comm/channel/dragonfli.py
@@ -70,4 +70,3 @@ def recv(self) -> t.List[bytes]:
                 except fli.FLIEOT as exc:
                     eot = True
         return messages
-