Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ RUN apt-get update && apt-get install -y \
ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
ENV UCX_NET_DEVICES=all
ENV NCCL_IB_DISABLE=0
ENV NCCL_SOCKET_IFNAME=ib0

# Set up project
WORKDIR /vec-inf
Expand All @@ -53,10 +54,12 @@ COPY . /vec-inf
# Install project dependencies with build requirements
RUN PIP_INDEX_URL="https://download.pytorch.org/whl/cu128" uv pip install --system -e .[dev]

# Install a single, system NCCL (from NVIDIA CUDA repo in base image)
RUN apt-get update && apt-get install -y --allow-change-held-packages\
libnccl2 libnccl-dev \
&& rm -rf /var/lib/apt/lists/*

# Final configuration
RUN mkdir -p /vec-inf/nccl && \
mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /vec-inf/nccl/libnccl.so.2.18.1
ENV VLLM_NCCL_SO_PATH=/vec-inf/nccl/libnccl.so.2.18.1
ENV NCCL_DEBUG=INFO

# Set the default command to start an interactive shell
Expand Down
1 change: 1 addition & 0 deletions MODEL_TRACKING.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ This document tracks all model weights available in the `/model-weights` directo
| `gemma-2b-it` | ❌ |
| `gemma-7b` | ❌ |
| `gemma-7b-it` | ❌ |
| `gemma-2-2b-it` | ✅ |
| `gemma-2-9b` | ✅ |
| `gemma-2-9b-it` | ✅ |
| `gemma-2-27b` | ✅ |
Expand Down
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ license = "MIT"
requires-python = ">=3.10"
dependencies = [
"requests>=2.31.0",
"click>=8.1.0",
"click>=8.1.0,!=8.3.0",
"rich>=13.7.0",
"pydantic>=2.10.6",
"pyyaml>=6.0.2",
Expand Down Expand Up @@ -42,8 +42,7 @@ dev = [
"xgrammar>=0.1.11",
"torch>=2.7.0",
"vllm>=0.10.0",
"vllm-nccl-cu12>=2.18,<2.19",
"ray>=2.40.0",
"ray>=2.40.0,<2.49.0",
"cupy-cuda12x==12.1.0"
]

Expand Down
1 change: 1 addition & 0 deletions tests/vec_inf/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def test_launch_command_success(runner):
"mem_per_node": "32G",
"model_weights_parent_dir": "/model-weights",
"vocab_size": "128000",
"venv": "/path/to/venv",
"vllm_args": {"max_model_len": 8192},
"env": {"CACHE": "/cache"},
}
Expand Down
2 changes: 2 additions & 0 deletions tests/vec_inf/cli/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_format_table_output(self):
"mem_per_node": "32G",
"model_weights_parent_dir": "/model-weights",
"log_dir": "/tmp/logs",
"venv": "/path/to/venv",
"vllm_args": {"max_model_len": 8192, "enable_prefix_caching": True},
"env": {"CACHE": "/cache"},
}
Expand Down Expand Up @@ -63,6 +64,7 @@ def test_format_table_output_with_minimal_params(self):
"mem_per_node": "16G",
"model_weights_parent_dir": "/weights",
"log_dir": "/logs",
"venv": "/path/to/venv",
"vllm_args": {},
"env": {},
}
Expand Down
14 changes: 7 additions & 7 deletions tests/vec_inf/client/test_slurm_script_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def singularity_params(self, basic_params):
singularity = basic_params.copy()
singularity.update(
{
"venv": "singularity",
"venv": "apptainer",
"bind": "/scratch:/scratch,/data:/data",
"env": {
"CACHE_DIR": "/cache",
Expand Down Expand Up @@ -109,7 +109,7 @@ def test_init_singularity(self, singularity_params):
def test_init_singularity_no_bind(self, basic_params):
"""Test Singularity initialization without additional binds."""
params = basic_params.copy()
params["venv"] = "singularity"
params["venv"] = "apptainer"
generator = SlurmScriptGenerator(params)

assert generator.params == params
Expand Down Expand Up @@ -185,7 +185,7 @@ def test_generate_launch_cmd_singularity(self, singularity_params):
generator = SlurmScriptGenerator(singularity_params)
launch_cmd = generator._generate_launch_cmd()

assert "exec --nv" in launch_cmd
assert "apptainer exec --nv" in launch_cmd
assert "--bind /path/to/model_weights/test-model" in launch_cmd
assert "--bind /scratch:/scratch,/data:/data" in launch_cmd
assert "source" not in launch_cmd
Expand Down Expand Up @@ -306,9 +306,9 @@ def batch_params(self):
def batch_singularity_params(self, batch_params):
"""Generate batch SLURM configuration parameters with Singularity."""
singularity_params = batch_params.copy()
singularity_params["venv"] = "singularity" # Set top-level venv to singularity
singularity_params["venv"] = "apptainer" # Set top-level venv to apptainer
for model_name in singularity_params["models"]:
singularity_params["models"][model_name]["venv"] = "singularity"
singularity_params["models"][model_name]["venv"] = "apptainer"
singularity_params["models"][model_name]["bind"] = (
"/scratch:/scratch,/data:/data"
)
Expand Down Expand Up @@ -341,9 +341,9 @@ def test_init_singularity(self, batch_singularity_params):
def test_init_singularity_no_bind(self, batch_params):
"""Test Singularity initialization without additional binds."""
params = batch_params.copy()
params["venv"] = "singularity" # Set top-level venv to singularity
params["venv"] = "apptainer" # Set top-level venv to apptainer
for model_name in params["models"]:
params["models"][model_name]["venv"] = "singularity"
params["models"][model_name]["venv"] = "apptainer"

generator = BatchSlurmScriptGenerator(params)

Expand Down
Loading
Loading