From a525562e67222cad2a3de91c7047c8c97c747152 Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 04:03:06 +0000
Subject: [PATCH 01/12] update

---
 pyproject.toml | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 95996773e..456a20a56 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,30 +18,37 @@ classifiers = [
     "Framework :: Ray"
 ]
 dependencies = [
+    # Base
+    "ray[serve,tune]>=2.9",
+    "torch>=2.2.0",
+    "transformers>=4.35.0, <=4.35.2",
+
+    # "py-cpuinfo",
+
+    # Training
     "accelerate",
+    "tensorboard",
+    "einops",
     "datasets>=2.14.6",
-    "numpy",
-    "ray>=2.9",
+    "gymnasium",
     "typing>=3.7.4.3",
+    "scikit-image",
     "tabulate",
-    "ray[tune]",
-    "ray[serve]",
-    "gymnasium",
+
+    # RLHF
     "dm-tree",
-    "tensorboard",
-    "scikit-image",
-    "einops",
+
+    # PEFT
     "peft>=0.4.0",
     "deltatuner==1.1.9",
-    "py-cpuinfo",
-    "pydantic-yaml"
+
+    # Config
+    "pydantic-yaml",
 ]
 
 [project.optional-dependencies]
 cpu = [
-    "transformers>=4.35.0, <=4.35.2",
     "intel_extension_for_pytorch>=2.2.0",
-    "torch>=2.2.0",
     "oneccl_bind_pt>=2.2.0"
 ]
 

From 64f926cac0efff58b4b7936261e7f818f1c1249c Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 04:37:54 +0000
Subject: [PATCH 02/12] fix blocking

---
 llm_on_ray/inference/api_server_openai.py | 4 ++--
 llm_on_ray/inference/api_server_simple.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llm_on_ray/inference/api_server_openai.py b/llm_on_ray/inference/api_server_openai.py
index 2ba821075..72bea1b78 100644
--- a/llm_on_ray/inference/api_server_openai.py
+++ b/llm_on_ray/inference/api_server_openai.py
@@ -72,12 +72,12 @@ def router_application(deployments):
 def openai_serve_run(deployments, host, route_prefix, port):
     router_app = router_application(deployments)
 
+    serve.start(host=host, port=port)
     serve.run(
         router_app,
+        blocking=False,
         name="router",
         route_prefix=route_prefix,
-        host=host,
-        _blocking=True,
     ).options(
         stream=True,
         use_new_handle_api=True,
diff --git a/llm_on_ray/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py
index 0663700d8..90a874bc0 100644
--- a/llm_on_ray/inference/api_server_simple.py
+++ b/llm_on_ray/inference/api_server_simple.py
@@ -22,11 +22,11 @@ def serve_run(deployments, model_list):
     for model_id, infer_conf in model_list.items():
         print("deploy model: ", model_id)
         deployment = deployments[model_id]
+
+        serve.start(host=infer_conf.host, port=infer_conf.port)
         serve.run(
             deployment,
-            _blocking=True,
-            host=infer_conf.host,
-            port=infer_conf.port,
+            blocking=False,
             name=infer_conf.name,
             route_prefix=infer_conf.route_prefix,
         )

From 6ccf6c52e964ed9917c3c39390b4deba55fd148b Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 06:44:45 +0000
Subject: [PATCH 03/12] update

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 .gitignore                                | 1 +
 llm_on_ray/inference/api_server_openai.py | 1 -
 llm_on_ray/inference/api_server_simple.py | 1 -
 pyproject.toml                            | 1 +
 4 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index db72323f4..78ad8ea7d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 __pycache__
 **.ipynb
+*.json
 debug/
 build/lib/
 llm_on_ray.egg-info/
\ No newline at end of file
diff --git a/llm_on_ray/inference/api_server_openai.py b/llm_on_ray/inference/api_server_openai.py
index 72bea1b78..5836385be 100644
--- a/llm_on_ray/inference/api_server_openai.py
+++ b/llm_on_ray/inference/api_server_openai.py
@@ -75,7 +75,6 @@ def openai_serve_run(deployments, host, route_prefix, port):
     serve.start(host=host, port=port)
     serve.run(
         router_app,
-        blocking=False,
         name="router",
         route_prefix=route_prefix,
     ).options(
diff --git a/llm_on_ray/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py
index 90a874bc0..d0a542370 100644
--- a/llm_on_ray/inference/api_server_simple.py
+++ b/llm_on_ray/inference/api_server_simple.py
@@ -26,7 +26,6 @@ def serve_run(deployments, model_list):
         serve.start(host=infer_conf.host, port=infer_conf.port)
         serve.run(
             deployment,
-            blocking=False,
             name=infer_conf.name,
             route_prefix=infer_conf.route_prefix,
         )
diff --git a/pyproject.toml b/pyproject.toml
index 456a20a56..50b490311 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
     "typing>=3.7.4.3",
     "scikit-image",
     "tabulate",
+    "typer",
 
     # RLHF
     "dm-tree",

From e24d638730a4835fab13d04e14a03bea3a0c01c1 Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 08:59:54 +0000
Subject: [PATCH 04/12] update

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 llm_on_ray/inference/api_server_openai.py | 2 +-
 llm_on_ray/inference/api_server_simple.py | 2 +-
 pyproject.toml                            | 4 +---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/llm_on_ray/inference/api_server_openai.py b/llm_on_ray/inference/api_server_openai.py
index 5836385be..eeda00672 100644
--- a/llm_on_ray/inference/api_server_openai.py
+++ b/llm_on_ray/inference/api_server_openai.py
@@ -72,7 +72,7 @@ def router_application(deployments):
 def openai_serve_run(deployments, host, route_prefix, port):
     router_app = router_application(deployments)
 
-    serve.start(host=host, port=port)
+    serve.start(http_options={"host": host, "port": port})
     serve.run(
         router_app,
         name="router",
diff --git a/llm_on_ray/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py
index d0a542370..f2cf0a1e7 100644
--- a/llm_on_ray/inference/api_server_simple.py
+++ b/llm_on_ray/inference/api_server_simple.py
@@ -23,7 +23,7 @@ def serve_run(deployments, model_list):
         print("deploy model: ", model_id)
         deployment = deployments[model_id]
 
-        serve.start(host=infer_conf.host, port=infer_conf.port)
+        serve.start(http_options={"host": infer_conf.host, "port": infer_conf.port})
         serve.run(
             deployment,
             name=infer_conf.name,
diff --git a/pyproject.toml b/pyproject.toml
index 50b490311..a6f16fa99 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,12 +19,10 @@ classifiers = [
 ]
 dependencies = [
     # Base
-    "ray[serve,tune]>=2.9",
+    "ray[serve,tune]>=2.10",
     "torch>=2.2.0",
     "transformers>=4.35.0, <=4.35.2",
 
-    # "py-cpuinfo",
-
     # Training
     "accelerate",
     "tensorboard",

From c63b0c97eca693a54cb338414008eadeb1fa5294 Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 09:51:27 +0000
Subject: [PATCH 05/12] fix setup and getting started

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 .github/workflows/workflow_tests.yml | 46 +++-------------------------
 1 file changed, 5 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml
index 28eb277f1..31a17cf86 100644
--- a/.github/workflows/workflow_tests.yml
+++ b/.github/workflows/workflow_tests.yml
@@ -13,7 +13,7 @@ jobs:
     name: setup-test
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     runs-on: ubuntu-latest
     defaults:
@@ -21,9 +21,6 @@ jobs:
         shell: bash
 
     steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -34,15 +31,6 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
 
-      - name: Install dependencies for tests
-        run: |
-          python -m pip install --upgrade pip
-          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
-          # Dynamic link oneCCL and Intel MPI libraries
-          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
-          # Additional libraries required for pytest
-          pip install -r ./tests/requirements.txt
-
       - name: Run Test for Setup
         run: |
           ./tests/test_setup.sh CPU false
@@ -52,7 +40,7 @@ jobs:
     name: getting-started-test
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     runs-on: ubuntu-latest
     defaults:
@@ -60,9 +48,6 @@ jobs:
         shell: bash
 
     steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -73,15 +58,6 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
 
-      - name: Install dependencies for tests
-        run: |
-          python -m pip install --upgrade pip
-          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
-          # Dynamic link oneCCL and Intel MPI libraries
-          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
-          # Additional libraries required for pytest
-          pip install -r ./tests/requirements.txt
-
       - name: Run Test for Getting Started
         run: |
           ./tests/test_getting_started.sh
@@ -91,21 +67,18 @@ jobs:
     name: bare-test
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     runs-on: ubuntu-latest
 
     env:
       SHELL: bash -eo pipefail
-      
+
     defaults:
       run:
         shell: bash
 
     steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -116,15 +89,6 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
 
-      - name: Install dependencies for tests
-        run: |
-          python -m pip install --upgrade pip
-          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
-          # Dynamic link oneCCL and Intel MPI libraries
-          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
-          # Additional libraries required  for pytest
-          pip install -r ./tests/requirements.txt
-
       - name: Start Ray Cluster
         run: |
           ray start --head
@@ -138,7 +102,7 @@ jobs:
     name: docker-test
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     runs-on: ubuntu-latest
 

From b2da8120b9d102b763064526de3b115643f47727 Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 10:21:53 +0000
Subject: [PATCH 06/12] update

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 .github/workflows/workflow_tests.yml | 11 ++++++++++-
 tests/test_getting_started.sh        |  1 +
 tests/test_setup.sh                  |  4 ++--
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml
index 31a17cf86..6b6a6f04d 100644
--- a/.github/workflows/workflow_tests.yml
+++ b/.github/workflows/workflow_tests.yml
@@ -21,6 +21,9 @@ jobs:
         shell: bash
 
     steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -48,6 +51,9 @@ jobs:
         shell: bash
 
     steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -79,6 +85,9 @@ jobs:
         shell: bash
 
     steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
@@ -102,7 +111,7 @@ jobs:
     name: docker-test
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11"]
 
     runs-on: ubuntu-latest
 
diff --git a/tests/test_getting_started.sh b/tests/test_getting_started.sh
index eee6d7ff5..e5071eb56 100755
--- a/tests/test_getting_started.sh
+++ b/tests/test_getting_started.sh
@@ -11,6 +11,7 @@ fi
 
 # Clone repo
 echo "Cloning repo"
+cd /tmp
 git clone https://github.com/intel/llm-on-ray.git && cd llm-on-ray
 
 # Install dependencies
diff --git a/tests/test_setup.sh b/tests/test_setup.sh
index 0a1f7e029..7a0120b92 100755
--- a/tests/test_setup.sh
+++ b/tests/test_setup.sh
@@ -9,8 +9,8 @@ if [ "$#" != 2 ]; then
 fi
 
 # Step 1: Clone and create conda environment
-git clone https://github.com/intel/llm-on-ray.git
-cd llm-on-ray
+cd /tmp
+git clone https://github.com/intel/llm-on-ray.git && cd llm-on-ray
 
 # Step 2: Check CPU, GPU or Gaudi and install dependencies
 hardware=0

From 1b3631fccbca7201403ff46db09e23a277aa755c Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 10:30:09 +0000
Subject: [PATCH 07/12] update

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 .github/workflows/workflow_tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml
index 6b6a6f04d..e7b873f22 100644
--- a/.github/workflows/workflow_tests.yml
+++ b/.github/workflows/workflow_tests.yml
@@ -13,7 +13,7 @@ jobs:
     name: setup-test
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11"]
 
     runs-on: ubuntu-latest
     defaults:
@@ -43,7 +43,7 @@ jobs:
     name: getting-started-test
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11"]
 
     runs-on: ubuntu-latest
     defaults:
@@ -73,7 +73,7 @@ jobs:
     name: bare-test
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11"]
 
     runs-on: ubuntu-latest
 

From 0e0e80186f698cca9a7981cf57888d1c78a5d3c0 Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 10:42:44 +0000
Subject: [PATCH 08/12] nit

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index a6f16fa99..2d6dd7001 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
     "scikit-image",
     "tabulate",
     "typer",
+    "async-timeout",
 
     # RLHF
     "dm-tree",

From 80821d53da2b0836c8714ec21e996a927320fd1d Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 11:08:39 +0000
Subject: [PATCH 09/12] Add dependencies for tests and update pyproject.toml

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 .github/workflows/workflow_tests.yml | 11 ++++++++++-
 pyproject.toml                       |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml
index e7b873f22..3c1f6bb80 100644
--- a/.github/workflows/workflow_tests.yml
+++ b/.github/workflows/workflow_tests.yml
@@ -98,6 +98,15 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
 
+      - name: Install dependencies for tests
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+          # Dynamic link oneCCL and Intel MPI libraries
+          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
+          # Additional libraries required  for pytest
+          pip install -r ./tests/requirements.txt
+
       - name: Start Ray Cluster
         run: |
           ray start --head
@@ -171,4 +180,4 @@ jobs:
         run: |
           TARGET=${{steps.target.outputs.target}}
           cid=$(docker ps -q --filter "name=${TARGET}")
-          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
+          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 2d6dd7001..4da092b78 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ classifiers = [
 ]
 dependencies = [
     # Base
+    "ray>=2.10",
     "ray[serve,tune]>=2.10",
     "torch>=2.2.0",
     "transformers>=4.35.0, <=4.35.2",

From 18ca4fd14ff760471957344d05194f6a07e787b3 Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 12:02:20 +0000
Subject: [PATCH 10/12] Update dependencies and test workflow

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 .github/workflows/workflow_tests.yml | 18 +++++++++++++++
 pyproject.toml                       | 33 ++++++++++------------------
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml
index 3c1f6bb80..c113ec8b8 100644
--- a/.github/workflows/workflow_tests.yml
+++ b/.github/workflows/workflow_tests.yml
@@ -34,6 +34,15 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
 
+      - name: Install dependencies for tests
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+          # Dynamic link oneCCL and Intel MPI libraries
+          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
+          # Additional libraries required for pytest
+          pip install -r ./tests/requirements.txt
+
       - name: Run Test for Setup
         run: |
           ./tests/test_setup.sh CPU false
@@ -64,6 +73,15 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
 
+      - name: Install dependencies for tests
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+          # Dynamic link oneCCL and Intel MPI libraries
+          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
+          # Additional libraries required for pytest
+          pip install -r ./tests/requirements.txt
+
       - name: Run Test for Getting Started
         run: |
           ./tests/test_getting_started.sh
diff --git a/pyproject.toml b/pyproject.toml
index 4da092b78..30cd002fc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,38 +18,29 @@ classifiers = [
     "Framework :: Ray"
 ]
 dependencies = [
-    # Base
-    "ray>=2.10",
-    "ray[serve,tune]>=2.10",
-    "torch>=2.2.0",
-    "transformers>=4.35.0, <=4.35.2",
-
-    # Training
     "accelerate",
-    "tensorboard",
-    "einops",
     "datasets>=2.14.6",
-    "gymnasium",
+    "numpy",
+    "ray>=2.10",
+    "ray[serve,tune]>=2.10",
     "typing>=3.7.4.3",
-    "scikit-image",
     "tabulate",
-    "typer",
-    "async-timeout",
-
-    # RLHF
+    "gymnasium",
     "dm-tree",
-
-    # PEFT
+    "tensorboard",
+    "scikit-image",
+    "einops",
     "peft>=0.4.0",
     "deltatuner==1.1.9",
-
-    # Config
-    "pydantic-yaml",
+    "py-cpuinfo",
+    "pydantic-yaml"
 ]
 
 [project.optional-dependencies]
 cpu = [
+    "transformers>=4.35.0, <=4.35.2",
     "intel_extension_for_pytorch>=2.2.0",
+    "torch>=2.2.0",
     "oneccl_bind_pt>=2.2.0"
 ]
 
@@ -92,4 +83,4 @@ llm_on_ray-pretrain = "llm_on_ray.pretrain.pretrain:main"
 llm_on_ray-megatron_deepspeed_pretrain = "llm_on_ray.pretrain.megatron_deepspeed_pretrain:main"
 
 [tool.black]
-line-length = 100
+line-length = 100
\ No newline at end of file

From 3ecd87f30fe2a686aa938834c1157e9e5d5fc33b Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 12:33:14 +0000
Subject: [PATCH 11/12] Update dependencies and fix torch_dist.py

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 .github/workflows/workflow_tests.yml | 18 ------------------
 llm_on_ray/inference/torch_dist.py   |  4 ++--
 pyproject.toml                       |  4 +++-
 tests/requirements.txt               |  1 -
 tests/test_getting_started.sh        |  1 +
 5 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/workflow_tests.yml b/.github/workflows/workflow_tests.yml
index c113ec8b8..3c1f6bb80 100644
--- a/.github/workflows/workflow_tests.yml
+++ b/.github/workflows/workflow_tests.yml
@@ -34,15 +34,6 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
 
-      - name: Install dependencies for tests
-        run: |
-          python -m pip install --upgrade pip
-          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
-          # Dynamic link oneCCL and Intel MPI libraries
-          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
-          # Additional libraries required for pytest
-          pip install -r ./tests/requirements.txt
-
       - name: Run Test for Setup
         run: |
           ./tests/test_setup.sh CPU false
@@ -73,15 +64,6 @@ jobs:
         run: |
           python -c "import sys; print(sys.version)"
 
-      - name: Install dependencies for tests
-        run: |
-          python -m pip install --upgrade pip
-          pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
-          # Dynamic link oneCCL and Intel MPI libraries
-          source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
-          # Additional libraries required for pytest
-          pip install -r ./tests/requirements.txt
-
       - name: Run Test for Getting Started
         run: |
           ./tests/test_getting_started.sh
diff --git a/llm_on_ray/inference/torch_dist.py b/llm_on_ray/inference/torch_dist.py
index b97766559..6db7dd292 100644
--- a/llm_on_ray/inference/torch_dist.py
+++ b/llm_on_ray/inference/torch_dist.py
@@ -43,7 +43,7 @@
 import ray
 from ray.actor import ActorHandle
 from ray.train._internal.utils import get_address_and_port
-from ray.air._internal.torch_utils import get_device
+from ray.air._internal.torch_utils import get_devices
 from ray._private.accelerators.hpu import HPU_PACKAGE_AVAILABLE
 
 if HPU_PACKAGE_AVAILABLE:
@@ -211,7 +211,7 @@ def _shutdown_torch_distributed():
         return
 
     # Clean up cuda memory.
-    devices = get_device()
+    devices = get_devices()
     for device in devices:
         with torch.cuda.device(device):
             torch.cuda.empty_cache()
diff --git a/pyproject.toml b/pyproject.toml
index 30cd002fc..1952886c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,9 @@ dependencies = [
     "peft>=0.4.0",
     "deltatuner==1.1.9",
     "py-cpuinfo",
-    "pydantic-yaml"
+    "pydantic-yaml",
+    "async_timeout",
+    "typer"
 ]
 
 [project.optional-dependencies]
diff --git a/tests/requirements.txt b/tests/requirements.txt
index cf6c10e5f..a0e765160 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,3 +1,2 @@
 pytest
 openai
-async-timeout
\ No newline at end of file
diff --git a/tests/test_getting_started.sh b/tests/test_getting_started.sh
index e5071eb56..7a5da7ba8 100755
--- a/tests/test_getting_started.sh
+++ b/tests/test_getting_started.sh
@@ -49,6 +49,7 @@ python examples/inference/api_server_openai/query_http_requests.py --model_name
 
 # 3.Using OpenAI SDK
 echo "Method 3: Using OpenAI SDK to access model"
+pip install openai
 export no_proxy="localhost,127.0.0.1"
 export OPENAI_API_BASE="http://localhost:8000/v1"
 export OPENAI_BASE_URL="http://localhost:8000/v1"

From 240a385c5e742ab13bb31f58dbfb043c031a3c98 Mon Sep 17 00:00:00 2001
From: "Wu, Xiaochang" <xiaochang.wu@intel.com>
Date: Mon, 25 Mar 2024 12:55:40 +0000
Subject: [PATCH 12/12] Update OpenAI SDK installation and start ray cluster

Signed-off-by: Wu, Xiaochang <xiaochang.wu@intel.com>
---
 README.md                     |  2 +-
 tests/test_getting_started.sh | 14 ++++----------
 tests/test_setup.sh           |  5 ++---
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 02cd39081..0a2b8ceb2 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ curl $ENDPOINT_URL/chat/completions \
 python examples/inference/api_server_openai/query_http_requests.py
 
 # using OpenAI SDK
-# please install openai in current env by running: pip install openai>=1.0
+pip install openai>=1.0
 export OPENAI_BASE_URL=http://localhost:8000/v1
 export OPENAI_API_KEY="not_a_real_key"
 python examples/inference/api_server_openai/query_openai_sdk.py
diff --git a/tests/test_getting_started.sh b/tests/test_getting_started.sh
index 7a5da7ba8..6a900a553 100755
--- a/tests/test_getting_started.sh
+++ b/tests/test_getting_started.sh
@@ -1,27 +1,21 @@
 #!/bin/bash
 set -eo pipefail
 
-# Step 1: Python environment
 # Check Python version is or later than 3.9
-echo "Step 1: Python environment"
 echo "Checking Python version which should be equal or later than 3.9"
 if ! python -c 'import sys; assert sys.version_info >= (3,9)' > /dev/null; then
     exit "Python should be 3.9 or later!"
 fi
 
-# Clone repo
-echo "Cloning repo"
-cd /tmp
-git clone https://github.com/intel/llm-on-ray.git && cd llm-on-ray
+echo "Step 1: Clone the repository, install llm-on-ray and its dependencies."
+echo "Checkout already done in the workflow."
 
-# Install dependencies
-echo "Installing dependencies"
 pip install .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
 
 # Dynamic link oneCCL and Intel MPI libraries
 source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl; print(torch_ccl.cwd)")/env/setvars.sh
 
-# Step 2: Start ray cluster
+echo "Step 2: Start ray cluster ..."
 ray start --head
 
 # Step 3: Serving
@@ -49,7 +43,7 @@ python examples/inference/api_server_openai/query_http_requests.py --model_name
 
 # 3.Using OpenAI SDK
 echo "Method 3: Using OpenAI SDK to access model"
-pip install openai
+pip install openai>=1.0
 export no_proxy="localhost,127.0.0.1"
 export OPENAI_API_BASE="http://localhost:8000/v1"
 export OPENAI_BASE_URL="http://localhost:8000/v1"
diff --git a/tests/test_setup.sh b/tests/test_setup.sh
index 7a0120b92..589b5a320 100755
--- a/tests/test_setup.sh
+++ b/tests/test_setup.sh
@@ -8,9 +8,8 @@ if [ "$#" != 2 ]; then
     exit 1
 fi
 
-# Step 1: Clone and create conda environment
-cd /tmp
-git clone https://github.com/intel/llm-on-ray.git && cd llm-on-ray
+# Step 1: Clone the repository, install llm-on-ray and its dependencies.
+echo "Checkout already done in the workflow."
 
 # Step 2: Check CPU, GPU or Gaudi and install dependencies
 hardware=0