diff --git a/.github/workflows/workflow_inference_gaudi2.yml b/.github/workflows/workflow_inference_gaudi2.yml index 6abd0381..d23e8648 100644 --- a/.github/workflows/workflow_inference_gaudi2.yml +++ b/.github/workflows/workflow_inference_gaudi2.yml @@ -99,12 +99,11 @@ jobs: - name: Start Docker Container run: | TARGET=${{steps.target.outputs.target}} - cid=$(docker ps -q --filter "name=${TARGET}") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - # check and remove exited container - cid=$(docker ps -a -q --filter "name=${TARGET}") - if [[ ! -z "$cid" ]]; then docker rm $cid; fi - docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${{ inputs.code_checkout_path }}:/root/llm-on-ray -v ${{ inputs.model_cache_path }}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana + code_checkout_path=${{ inputs.code_checkout_path }} + model_cache_path=${{ inputs.model_cache_path }} + source dev/scripts/ci-functions.sh + start_gaudi_docker ${TARGET} ${code_checkout_path} ${model_cache_path} + - name: Start Ray Cluster run: | TARGET=${{steps.target.outputs.target}} @@ -125,15 +124,7 @@ jobs: EOF ) docker exec "${TARGET}" python -c "$CMD" - if [[ ${{ matrix.model }} == "llama-2-7b-chat-hf" ]]; then - docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-hpu.yaml --keep_serve_terminal" - elif [[ ${{ matrix.model }} == "llama-2-70b-chat-hf" ]]; then - docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-70b-chat-hf-hpu.yaml --keep_serve_terminal" - elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then - docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/llama-2-7b-chat-hf-vllm-hpu.yaml --keep_serve_terminal" - else - docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/${{ matrix.model }}-hpu.yaml --keep_serve_terminal" - fi + docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/hpu/${{ matrix.model }}-hpu.yaml --keep_serve_terminal" echo Streaming query: docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests.py --model_name ${{ matrix.model }} --streaming_response" diff --git a/dev/scripts/ci-functions.sh b/dev/scripts/ci-functions.sh index 2240a444..a418c465 100644 --- a/dev/scripts/ci-functions.sh +++ b/dev/scripts/ci-functions.sh @@ -68,6 +68,19 @@ start_docker() { docker run -tid "${docker_args[@]}" "${TARGET}:latest" } +start_gaudi_docker() { + local TARGET=$1 + local code_checkout_path=$2 + local model_cache_path=$3 + + cid=$(docker ps -q --filter "name=${TARGET}") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi + # check and remove exited container + cid=$(docker ps -a -q --filter "name=${TARGET}") + if [[ ! -z "$cid" ]]; then docker rm $cid; fi + docker run -tid --name="${TARGET}" --hostname="${TARGET}-container" --runtime=habana -v /home/yizhong/Model-References:/root/Model-References -v ${code_checkout_path}:/root/llm-on-ray -v ${model_cache_path}:/root/.cache/huggingface/hub/ -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --cap-add sys_ptrace --net=host --ipc=host ${TARGET}:habana +} + install_dependencies(){ local TARGET=$1