From 451030f5d9770af53fa3037f19c7c2c516dcb108 Mon Sep 17 00:00:00 2001 From: Deegue Date: Wed, 17 Jul 2024 06:50:39 +0000 Subject: [PATCH 1/2] improve gaudi workflow --- .../workflows/workflow_inference_gaudi2.yml | 19 ++++++------------- dev/scripts/ci-functions.sh | 13 +++++++++++++ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/.github/workflows/workflow_inference_gaudi2.yml b/.github/workflows/workflow_inference_gaudi2.yml index dedeb415..b7634bfb 100644 --- a/.github/workflows/workflow_inference_gaudi2.yml +++ b/.github/workflows/workflow_inference_gaudi2.yml @@ -89,12 +89,11 @@ jobs: - name: Start Docker Container run: | TARGET=${{steps.target.outputs.target}} - cid=$(docker ps -q --filter "name=${TARGET}") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - # check and remove exited container - cid=$(docker ps -a -q --filter "name=${TARGET}") - if [[ ! -z "$cid" ]]; then docker rm $cid; fi - docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${{ inputs.code_checkout_path }}:/root/llm-on-ray -v ${{ inputs.model_cache_path }}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana + code_checkout_path=${{ inputs.code_checkout_path }} + model_cache_path=${{ inputs.model_cache_path }} + source dev/scripts/ci-functions.sh + start_gaudi_docker ${TARGET} ${code_checkout_path} ${model_cache_path} + - name: Start Ray Cluster run: | TARGET=${{steps.target.outputs.target}} @@ -118,13 +117,7 @@ jobs: EOF ) docker exec "${TARGET}" python -c "$CMD" - if [[ ${{ matrix.model }} == "llama-2-7b-chat-hf" ]]; then - docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-hpu.yaml --keep_serve_terminal" - elif [[ ${{ matrix.model }} == "llama-2-70b-chat-hf" ]]; then - docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-70b-chat-hf-hpu.yaml --keep_serve_terminal" - elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then - docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-vllm-hpu.yaml --keep_serve_terminal" - fi + docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/${{ matrix.model }}-hpu.yaml --keep_serve_terminal" echo Streaming query: docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests.py --model_name ${{ matrix.model }} --streaming_response" diff --git a/dev/scripts/ci-functions.sh b/dev/scripts/ci-functions.sh index 2240a444..d6d0624b 100644 --- a/dev/scripts/ci-functions.sh +++ b/dev/scripts/ci-functions.sh @@ -68,6 +68,19 @@ start_docker() { docker run -tid "${docker_args[@]}" "${TARGET}:latest" } +start_gaudi_docker() { + local TARGET=$1 + local code_checkout_path=$2 + local model_cache_path=$3 + + cid=$(docker ps -q --filter "name=${TARGET}") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi + # check and remove exited container + cid=$(docker ps -a -q --filter "name=${TARGET}") + if [[ ! -z "$cid" ]]; then docker rm $cid; fi + docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${ code_checkout_path }:/root/llm-on-ray -v ${ model_cache_path }:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana +} + install_dependencies(){ local TARGET=$1 From 331705b29d50db93df9d8fc332049a64641df7b5 Mon Sep 17 00:00:00 2001 From: Deegue Date: Wed, 17 Jul 2024 08:53:19 +0000 Subject: [PATCH 2/2] nit --- dev/scripts/ci-functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/scripts/ci-functions.sh b/dev/scripts/ci-functions.sh index d6d0624b..a418c465 100644 --- a/dev/scripts/ci-functions.sh +++ b/dev/scripts/ci-functions.sh @@ -78,7 +78,7 @@ start_gaudi_docker() { # check and remove exited container cid=$(docker ps -a -q --filter "name=${TARGET}") if [[ ! -z "$cid" ]]; then docker rm $cid; fi - docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${ code_checkout_path }:/root/llm-on-ray -v ${ model_cache_path }:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana + docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${code_checkout_path}:/root/llm-on-ray -v ${model_cache_path}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana } install_dependencies(){