VectorInstitute · XkunW · Sep 23, 2025 · Sep 24, 2025 · Oct 3, 2025 · Oct 6, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -45,6 +45,7 @@ RUN apt-get update && apt-get install -y \
 ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
 ENV UCX_NET_DEVICES=all
 ENV NCCL_IB_DISABLE=0
+ENV NCCL_SOCKET_IFNAME=ib0
 
 # Set up project
 WORKDIR /vec-inf
@@ -53,10 +54,12 @@ COPY . /vec-inf
 # Install project dependencies with build requirements
 RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu128" uv pip install --system -e .[dev]
 
+# Install a single, system NCCL (from NVIDIA CUDA repo in base image)
+RUN apt-get update && apt-get install -y --allow-change-held-packages\
+    libnccl2 libnccl-dev \
+    && rm -rf /var/lib/apt/lists/*
+
 # Final configuration
-RUN mkdir -p /vec-inf/nccl && \
-    mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
-ENV VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
 ENV NCCL_DEBUG=INFO
 
 # Set the default command to start an interactive shell

diff --git a/MODEL_TRACKING.md b/MODEL_TRACKING.md
@@ -40,6 +40,7 @@ This document tracks all model weights available in the `/model-weights` directo
 | `gemma-2b-it` | ❌ |
 | `gemma-7b` | ❌ |
 | `gemma-7b-it` | ❌ |
+| `gemma-2-2b-it` | ✅ |
 | `gemma-2-9b` | ✅ |
 | `gemma-2-9b-it` | ✅ |
 | `gemma-2-27b` | ✅ |

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,7 +8,7 @@ license = "MIT"
 requires-python = ">=3.10"
 dependencies = [
     "requests>=2.31.0",
-    "click>=8.1.0",
+    "click>=8.1.0,!=8.3.0",
     "rich>=13.7.0",
     "pydantic>=2.10.6",
     "pyyaml>=6.0.2",
@@ -42,8 +42,7 @@ dev = [
     "xgrammar>=0.1.11",
     "torch>=2.7.0",
     "vllm>=0.10.0",
-    "vllm-nccl-cu12>=2.18,<2.19",
-    "ray>=2.40.0",
+    "ray>=2.40.0,<2.49.0",
     "cupy-cuda12x==12.1.0"
 ]
 

diff --git a/tests/vec_inf/cli/test_cli.py b/tests/vec_inf/cli/test_cli.py
@@ -39,6 +39,7 @@ def test_launch_command_success(runner):
             "mem_per_node": "32G",
             "model_weights_parent_dir": "/model-weights",
             "vocab_size": "128000",
+            "venv": "/path/to/venv",
             "vllm_args": {"max_model_len": 8192},
             "env": {"CACHE": "/cache"},
         }

diff --git a/tests/vec_inf/cli/test_helper.py b/tests/vec_inf/cli/test_helper.py
@@ -35,6 +35,7 @@ def test_format_table_output(self):
             "mem_per_node": "32G",
             "model_weights_parent_dir": "/model-weights",
             "log_dir": "/tmp/logs",
+            "venv": "/path/to/venv",
             "vllm_args": {"max_model_len": 8192, "enable_prefix_caching": True},
             "env": {"CACHE": "/cache"},
         }
@@ -63,6 +64,7 @@ def test_format_table_output_with_minimal_params(self):
             "mem_per_node": "16G",
             "model_weights_parent_dir": "/weights",
             "log_dir": "/logs",
+            "venv": "/path/to/venv",
             "vllm_args": {},
             "env": {},
         }

diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py
@@ -53,7 +53,7 @@ def singularity_params(self, basic_params):
         singularity = basic_params.copy()
         singularity.update(
             {
-                "venv": "singularity",
+                "venv": "apptainer",
                 "bind": "/scratch:/scratch,/data:/data",
                 "env": {
                     "CACHE_DIR": "/cache",
@@ -109,7 +109,7 @@ def test_init_singularity(self, singularity_params):
     def test_init_singularity_no_bind(self, basic_params):
         """Test Singularity initialization without additional binds."""
         params = basic_params.copy()
-        params["venv"] = "singularity"
+        params["venv"] = "apptainer"
         generator = SlurmScriptGenerator(params)
 
         assert generator.params == params
@@ -185,7 +185,7 @@ def test_generate_launch_cmd_singularity(self, singularity_params):
         generator = SlurmScriptGenerator(singularity_params)
         launch_cmd = generator._generate_launch_cmd()
 
-        assert "exec --nv" in launch_cmd
+        assert "apptainer exec --nv" in launch_cmd
         assert "--bind /path/to/model_weights/test-model" in launch_cmd
         assert "--bind /scratch:/scratch,/data:/data" in launch_cmd
         assert "source" not in launch_cmd
@@ -306,9 +306,9 @@ def batch_params(self):
     def batch_singularity_params(self, batch_params):
         """Generate batch SLURM configuration parameters with Singularity."""
         singularity_params = batch_params.copy()
-        singularity_params["venv"] = "singularity"  # Set top-level venv to singularity
+        singularity_params["venv"] = "apptainer"  # Set top-level venv to apptainer
         for model_name in singularity_params["models"]:
-            singularity_params["models"][model_name]["venv"] = "singularity"
+            singularity_params["models"][model_name]["venv"] = "apptainer"
             singularity_params["models"][model_name]["bind"] = (
                 "/scratch:/scratch,/data:/data"
             )
@@ -341,9 +341,9 @@ def test_init_singularity(self, batch_singularity_params):
     def test_init_singularity_no_bind(self, batch_params):
         """Test Singularity initialization without additional binds."""
         params = batch_params.copy()
-        params["venv"] = "singularity"  # Set top-level venv to singularity
+        params["venv"] = "apptainer"  # Set top-level venv to apptainer
         for model_name in params["models"]:
-            params["models"][model_name]["venv"] = "singularity"
+            params["models"][model_name]["venv"] = "apptainer"
 
         generator = BatchSlurmScriptGenerator(params)