intel · Deegue · Jul 17, 2024 · Jul 17, 2024 · Jul 18, 2024
diff --git a/.github/workflows/workflow_inference_gaudi2.yml b/.github/workflows/workflow_inference_gaudi2.yml
@@ -99,12 +99,11 @@ jobs:
       - name: Start Docker Container
         run: |
           TARGET=${{steps.target.outputs.target}}
-          cid=$(docker ps -q --filter "name=${TARGET}")
-          if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
-          # check and remove exited container
-          cid=$(docker ps -a -q --filter "name=${TARGET}")
-          if [[ ! -z "$cid" ]]; then docker rm $cid; fi
-          docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${{ inputs.code_checkout_path }}:/root/llm-on-ray -v ${{ inputs.model_cache_path }}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana
+          code_checkout_path=${{ inputs.code_checkout_path }}
+          model_cache_path=${{ inputs.model_cache_path }}
+          source dev/scripts/ci-functions.sh
+          start_gaudi_docker ${TARGET} ${code_checkout_path} ${model_cache_path}
+
       - name: Start Ray Cluster
         run: |
           TARGET=${{steps.target.outputs.target}}
@@ -125,15 +124,7 @@ jobs:
           EOF
           )
           docker exec "${TARGET}" python -c "$CMD"
-          if [[ ${{ matrix.model }} == "llama-2-7b-chat-hf" ]]; then
-            docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-hpu.yaml --keep_serve_terminal"
-          elif [[ ${{ matrix.model }} == "llama-2-70b-chat-hf" ]]; then
-            docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-70b-chat-hf-hpu.yaml --keep_serve_terminal"
-          elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-vllm-hpu.yaml --keep_serve_terminal" 
-          else
-            docker exec "${TARGET}" bash -c "llm_on_ray-serve  --config_file llm_on_ray/inference/models/hpu/${{ matrix.model }}-hpu.yaml --keep_serve_terminal"
-          fi
+          docker exec "${TARGET}" bash -c "llm_on_ray-serve  --config_file llm_on_ray/inference/models/hpu/${{ matrix.model }}-hpu.yaml --keep_serve_terminal"
           echo Streaming query:
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests.py --model_name ${{ matrix.model }} --streaming_response"
 

diff --git a/dev/scripts/ci-functions.sh b/dev/scripts/ci-functions.sh
@@ -68,6 +68,19 @@ start_docker() {
     docker run -tid  "${docker_args[@]}" "${TARGET}:latest"   
 }
 
+start_gaudi_docker() {
+    local TARGET=$1
+    local code_checkout_path=$2
+    local model_cache_path=$3
+
+    cid=$(docker ps -q --filter "name=${TARGET}")
+    if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
+    # check and remove exited container
+    cid=$(docker ps -a -q --filter "name=${TARGET}")
+    if [[ ! -z "$cid" ]]; then docker rm $cid; fi
+    docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${code_checkout_path}:/root/llm-on-ray -v ${model_cache_path}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana
+}
+
 install_dependencies(){
     local TARGET=$1