diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml index 5a8e32720..854732bff 100644 --- a/.github/workflows/workflow_finetune.yml +++ b/.github/workflows/workflow_finetune.yml @@ -85,7 +85,7 @@ jobs: docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --address='127.0.0.1:6379' --ray-debugger-external" CMD=$(cat << EOF import yaml - conf_path = "finetune/finetune.yaml" + conf_path = "llm_on_ray/finetune/finetune.yaml" with open(conf_path, encoding="utf-8") as reader: result = yaml.load(reader, Loader=yaml.FullLoader) result['General']['base_model'] = "${{ matrix.model }}" @@ -113,14 +113,14 @@ jobs: EOF ) docker exec "finetune" python -c "$CMD" - docker exec "finetune" bash -c "python finetune/finetune.py --config_file finetune/finetune.yaml" + docker exec "finetune" bash -c "llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml" - name: Run PEFT-LoRA Test run: | docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*" CMD=$(cat << EOF import yaml - conf_path = "finetune/finetune.yaml" + conf_path = "llm_on_ray/finetune/finetune.yaml" with open(conf_path, encoding="utf-8") as reader: result = yaml.load(reader, Loader=yaml.FullLoader) result['General']['lora_config'] = { @@ -138,7 +138,7 @@ jobs: EOF ) docker exec "finetune" python -c "$CMD" - docker exec "finetune" bash -c "python finetune/finetune.py --config_file finetune/finetune.yaml" + docker exec "finetune" bash -c "llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml" - name: Run Deltatuner Test on DENAS-LoRA Model run: | @@ -150,7 +150,7 @@ jobs: import os import yaml os.system("cp -r $(python -m pip show deltatuner | grep Location | cut -d: -f2)/deltatuner/conf/best_structure examples/") - conf_path = "finetune/finetune.yaml" + conf_path = "llm_on_ray/finetune/finetune.yaml" with open(conf_path, encoding="utf-8") as reader: result = yaml.load(reader, Loader=yaml.FullLoader) result['General']['lora_config'] = { @@ -168,7 +168,7 @@ jobs: yaml.dump(result, output, sort_keys=False) EOF) docker exec "finetune" python -c "$CMD" - docker exec "finetune" bash -c "python finetune/finetune.py --config_file finetune/finetune.yaml" + docker exec "finetune" bash -c "llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml" fi - name: Stop Ray diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml index 6a5617a66..7ea4359be 100644 --- a/.github/workflows/workflow_inference.yml +++ b/.github/workflows/workflow_inference.yml @@ -118,14 +118,14 @@ jobs: CMD=$(cat << EOF import yaml if ("${{ matrix.model }}" == "starcoder"): - conf_path = "inference/models/starcoder.yaml" + conf_path = "llm_on_ray/inference/models/starcoder.yaml" with open(conf_path, encoding="utf-8") as reader: result = yaml.load(reader, Loader=yaml.FullLoader) result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}" with open(conf_path, 'w') as output: yaml.dump(result, output, sort_keys=False) if ("${{ matrix.model }}" == "llama-2-7b-chat-hf"): - conf_path = "inference/models/llama-2-7b-chat-hf.yaml" + conf_path = "llm_on_ray/inference/models/llama-2-7b-chat-hf.yaml" with open(conf_path, encoding="utf-8") as reader: result = yaml.load(reader, Loader=yaml.FullLoader) result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}" @@ -135,11 +135,11 @@ jobs: ) docker exec "${TARGET}" python -c "$CMD" if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then - docker exec "${TARGET}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml --simple" + docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml --simple" elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then - docker exec "${TARGET}" bash -c "python inference/serve.py --config_file .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml --simple" + docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml --simple" else - docker exec "${TARGET}" bash -c "python inference/serve.py --simple --models ${{ matrix.model }}" + docker exec "${TARGET}" bash -c "llm_on_ray-serve --simple --models ${{ matrix.model }}" fi echo Non-streaming query: docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" @@ -150,7 +150,7 @@ jobs: if: ${{ matrix.dtuner_model }} run: | TARGET=${{steps.target.outputs.target}} - docker exec "${TARGET}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner.yaml --simple" + docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file .github/workflows/config/mpt_deltatuner.yaml --simple" docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" @@ -160,8 +160,8 @@ jobs: if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then echo ${{ matrix.model }} is not supported! elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then - docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed" - docker exec "${TARGET}" bash -c "python inference/serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed --simple" + docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file llm_on_ray/inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed" + docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file \"${{ matrix.model }}\".yaml.deepspeed --simple" docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" fi @@ -173,7 +173,7 @@ jobs: if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then echo ${{ matrix.model }} is not supported! else - docker exec "${TARGET}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple" + docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple" docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" fi @@ -182,9 +182,9 @@ jobs: run: | TARGET=${{steps.target.outputs.target}} if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then - docker exec "${TARGET}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml" + docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml" elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then - docker exec "${TARGET}" bash -c "python inference/serve.py --models ${{ matrix.model }}" + docker exec "${TARGET}" bash -c "llm_on_ray-serve --models ${{ matrix.model }}" docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests.py --model_name ${{ matrix.model }}" fi @@ -202,9 +202,3 @@ jobs: TARGET=${{steps.target.outputs.target}} cid=$(docker ps -q --filter "name=${TARGET}") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - - - name: Test Summary - run: echo "to be continued" - - - diff --git a/.github/workflows/workflow_orders_on_merge.yml b/.github/workflows/workflow_orders_on_merge.yml index a057f8ea6..d491baca1 100644 --- a/.github/workflows/workflow_orders_on_merge.yml +++ b/.github/workflows/workflow_orders_on_merge.yml @@ -7,11 +7,11 @@ on: paths: - '.github/**' - 'docker/**' - - 'common/**' - 'dev/docker/**' - - 'finetune/**' - - 'inference/**' - - 'rlhf/**' + - 'llm_on_ray/common/**' + - 'llm_on_ray/finetune/**' + - 'llm_on_ray/inference/**' + - 'llm_on_ray/rlhf/**' - 'tools/**' - 'pyproject.toml' - 'tests/**' diff --git a/.github/workflows/workflow_orders_on_pr.yml b/.github/workflows/workflow_orders_on_pr.yml index 0fdb9bb01..9f5df5d83 100644 --- a/.github/workflows/workflow_orders_on_pr.yml +++ b/.github/workflows/workflow_orders_on_pr.yml @@ -7,11 +7,11 @@ on: paths: - '.github/**' - 'docker/**' - - 'common/**' - 'dev/docker/**' - - 'finetune/**' - - 'inference/**' - - 'rlhf/**' + - 'llm_on_ray/common/**' + - 'llm_on_ray/finetune/**' + - 'llm_on_ray/inference/**' + - 'llm_on_ray/rlhf/**' - 'tools/**' - 'pyproject.toml' - 'tests/**' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eef34287b..c539326c1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: rev: v0.0.289 hooks: - id: ruff - args: [ --fix, --exit-non-zero-on-fix, --ignore=E402, --ignore=E501, --ignore=E731] + args: [ --fix, --exit-non-zero-on-fix, --ignore=E402, --ignore=E501, --ignore=E731, --ignore=F401] # Black needs to be ran after ruff with --fix - repo: https://github.com/psf/black diff --git a/README.md b/README.md index c544f7cc7..7c9419783 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ LLM-on-Ray's modular workflow structure is designed to comprehensively cater to This guide will assist you in setting up LLM-on-Ray on Intel CPU locally, covering the initial setup, finetuning models, and deploying them for serving. ### Setup -#### 1. Clone the repository and install dependencies. +#### 1. Clone the repository, install llm-on-ray and its dependencies. Software requirement: Git and Conda ```bash git clone https://github.com/intel/llm-on-ray.git @@ -62,14 +62,14 @@ ray start --head Use the following command to finetune a model using an example dataset and default configurations. The finetuned model will be stored in `/tmp/llm-ray/output` by default. To customize the base model, dataset and configurations, please see the [finetuning document](#finetune): ```bash -python finetune/finetune.py --config_file finetune/finetune.yaml +llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml ``` ### Serving Deploy a model on Ray and expose an endpoint for serving. This command uses GPT2 as an example, but more model configuration examples can be found in the [inference/models](inference/models) directory: ```bash -python inference/serve.py --config_file inference/models/gpt2.yaml +llm_on_ray-serve --config_file llm_on_ray/inference/models/gpt2.yaml ``` The default served method is to provide an OpenAI-compatible API server ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)), you can access and test it in many ways: @@ -95,7 +95,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py ``` Or you can serve specific model to a simple endpoint according to the `port` and `route_prefix` parameters in configuration file, ```bash -python inference/serve.py --config_file inference/models/gpt2.yaml --simple +llm_on_ray-serve --config_file llm_on_ray/inference/models/gpt2.yaml --simple ``` After deploying the model endpoint, you can access and test it by using the script below: ```bash diff --git a/common/__init__.py b/common/__init__.py deleted file mode 100644 index 3960d2f50..000000000 --- a/common/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -from .logging import logger -from .load import * # noqa: F403 # unable to detect undefined names -from . import agentenv -from .torch_config import TorchConfig # noqa: F401 -from typing import Dict, Any -import sys -from .config import Config # noqa: F401 -from .init import init # noqa: F401 - - -@load_check_decorator # noqa: F405 # may be undefined, or defined from star imports -def get_agentenv(config: Dict[str, Any]): - logger.info(f"{sys._getframe().f_code.co_name} config: {config}") - agentenv_type = config.get("type", None) - Factory = agentenv.AgentEnv.registory.get(agentenv_type) - if Factory is None: - raise ValueError(f"there is no {agentenv_type} AgentEnv.") - try: - _ = Factory(config) - except Exception as e: - logger.critical(f"{Factory.__name__} init error: {e}", exc_info=True) - exit(1) - return _ diff --git a/common/agentenv/__init__.py b/common/agentenv/__init__.py deleted file mode 100644 index fe05d28ec..000000000 --- a/common/agentenv/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from .agentenv import AgentEnv -from ..common import import_all_module - -realpath = os.path.realpath(__file__) -basedir = os.path.dirname(realpath) -import_all_module(basedir, "common.agentenv") - -__all__ = ["AgentEnv"] diff --git a/common/dataprocesser/__init__.py b/common/dataprocesser/__init__.py deleted file mode 100644 index 7e74e6a13..000000000 --- a/common/dataprocesser/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from .dataprocesser import DataProcesser -from ..common import import_all_module - -realpath = os.path.realpath(__file__) -basedir = os.path.dirname(realpath) -import_all_module(basedir, "common.dataprocesser") - -__all__ = ["DataProcesser"] diff --git a/common/dataset/__init__.py b/common/dataset/__init__.py deleted file mode 100644 index 9b04a188b..000000000 --- a/common/dataset/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from .dataset import Dataset -from ..common import import_all_module - -realpath = os.path.realpath(__file__) -basedir = os.path.dirname(realpath) -import_all_module(basedir, "common.dataset") - -__all__ = ["Dataset"] diff --git a/common/initializer/__init__.py b/common/initializer/__init__.py deleted file mode 100644 index 2cdc27adb..000000000 --- a/common/initializer/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from .initializer import Initializer -from ..common import import_all_module - -realpath = os.path.realpath(__file__) -basedir = os.path.dirname(realpath) -import_all_module(basedir, "common.initializer") - -__all__ = ["Initializer"] diff --git a/common/model/__init__.py b/common/model/__init__.py deleted file mode 100644 index df7989ceb..000000000 --- a/common/model/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from .model import Model -from ..common import import_all_module - -realpath = os.path.realpath(__file__) -basedir = os.path.dirname(realpath) -import_all_module(basedir, "common.model") - -__all__ = ["Model"] diff --git a/common/optimizer/__init__.py b/common/optimizer/__init__.py deleted file mode 100644 index 122acc90f..000000000 --- a/common/optimizer/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from .optimizer import Optimizer -from ..common import import_all_module - -realpath = os.path.realpath(__file__) -basedir = os.path.dirname(realpath) -import_all_module(basedir, "common.optimizer") - -__all__ = ["Optimizer"] diff --git a/common/tokenizer/__init__.py b/common/tokenizer/__init__.py deleted file mode 100644 index 63c281496..000000000 --- a/common/tokenizer/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from .tokenizer import Tokenizer -from ..common import import_all_module - -realpath = os.path.realpath(__file__) -basedir = os.path.dirname(realpath) -import_all_module(basedir, "common.tokenizer") - -__all__ = ["Tokenizer"] diff --git a/common/trainer/__init__.py b/common/trainer/__init__.py deleted file mode 100644 index b33b565a5..000000000 --- a/common/trainer/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from .trainer import Trainer -from ..common import import_all_module - -realpath = os.path.realpath(__file__) -basedir = os.path.dirname(realpath) -import_all_module(basedir, "common.trainer") - -__all__ = ["Trainer"] diff --git a/dev/docker/Dockerfile.bigdl-cpu b/dev/docker/Dockerfile.bigdl-cpu index 411449e41..3838b3382 100644 --- a/dev/docker/Dockerfile.bigdl-cpu +++ b/dev/docker/Dockerfile.bigdl-cpu @@ -27,7 +27,8 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \ COPY ./pyproject.toml . COPY ./MANIFEST.in . -RUN mkdir ./finetune && mkdir ./inference +# create llm_on_ray package directory to bypass the following 'pip install -e' command +RUN mkdir ./llm_on_ray RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[bigdl-cpu] --extra-index-url https://download.pytorch.org/whl/cpu \ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ diff --git a/dev/docker/Dockerfile.cpu_and_deepspeed b/dev/docker/Dockerfile.cpu_and_deepspeed index 5371fae78..3e4fe5ff0 100644 --- a/dev/docker/Dockerfile.cpu_and_deepspeed +++ b/dev/docker/Dockerfile.cpu_and_deepspeed @@ -27,7 +27,8 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \ COPY ./pyproject.toml . COPY ./MANIFEST.in . -RUN mkdir ./finetune && mkdir ./inference +# create llm_on_ray package directory to bypass the following 'pip install -e' command +RUN mkdir ./llm_on_ray RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] --extra-index-url https://download.pytorch.org/whl/cpu \ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ diff --git a/dev/docker/Dockerfile.vllm b/dev/docker/Dockerfile.vllm index 4585ccef0..3f298ba69 100644 --- a/dev/docker/Dockerfile.vllm +++ b/dev/docker/Dockerfile.vllm @@ -28,7 +28,8 @@ COPY ./pyproject.toml . COPY ./MANIFEST.in . COPY ./dev/scripts/install-vllm-cpu.sh . -RUN mkdir ./finetune && mkdir ./inference +# create llm_on_ray package directory to bypass the following 'pip install -e' command +RUN mkdir ./llm_on_ray RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu \ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ diff --git a/docs/finetune.md b/docs/finetune.md index dda3505f2..ee05cc52e 100755 --- a/docs/finetune.md +++ b/docs/finetune.md @@ -65,5 +65,5 @@ The following models have been verified on Intel CPUs or GPUs. ## Finetune the model To finetune your model, execute the following command. The finetuned model will be saved in /tmp/llm-ray/output by default. ``` bash -python finetune/finetune.py --config_file +llm_on_ray-finetune --config_file ``` diff --git a/docs/pretrain.md b/docs/pretrain.md index 2b3667523..cf4a5931d 100644 --- a/docs/pretrain.md +++ b/docs/pretrain.md @@ -122,28 +122,28 @@ Set up `megatron_deepspeed_path` in the configuration. ```bash cd /home/user/workspace/llm-on-ray -#Bloom-7B -python pretrain/megatron_deepspeed_pretrain.py --config_file pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf -#llama-7B -python pretrain/megatron_deepspeed_pretrain.py --config_file pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf +# Bloom-7B +llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf +# llama-7B +llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf ``` ##### Huggingface Trainer ```bash cd /home/user/workspace/llm-on-ray -#llama-7B -python pretrain/pretrain.py --config_file pretrain/config/llama_7b_8Guadi_pretrain.conf +# llama-7B +llm_on_ray-pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf ``` ##### Nvidia GPU: ###### Megatron-DeepSpeed ```bash cd /home/user/workspace/llm-on-ray -#llama2-7B -python pretrain/megatron_deepspeed_pretrain.py --config_file pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf +# llama2-7B +llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf ``` ##### Huggingface Trainer ```bash cd /home/user/workspace/llm-on-ray -#llama-7B -python pretrain/pretrain.py --config_file pretrain/config/llama_7b_8gpu_pretrain.conf +# llama-7B +llm_on_ray-pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf ``` \ No newline at end of file diff --git a/docs/serve.md b/docs/serve.md index 2beed2b18..831774b6c 100644 --- a/docs/serve.md +++ b/docs/serve.md @@ -30,22 +30,22 @@ LLM-on-Ray also supports serving with [Deepspeed](serve_deepspeed.md) for AutoTP We support three methods to specify the models to be served, and they have the following priorities. 1. Use inference configuration file if config_file is set. ``` -python inference/serve.py --config_file inference/models/gpt2.yaml +llm_on_ray-serve --config_file llm_on_ray/inference/models/gpt2.yaml ``` 2. Use relevant configuration parameters if model_id_or_path is set. ``` -python inference/serve.py --model_id_or_path gpt2 [--tokenizer_id_or_path gpt2 --port 8000 --route_prefix ...] +llm_on_ray-serve --model_id_or_path gpt2 [--tokenizer_id_or_path gpt2 --port 8000 --route_prefix ...] ``` 3. If --config_file and --model_id_or_path are both None, it will serve all pre-defined models in inference/models/*.yaml, or part of them if models is set. ``` -python inference/serve.py --models gpt2 gpt-j-6b +llm_on_ray-serve --models gpt2 gpt-j-6b ``` ### OpenAI-compatible API To deploy your model, execute the following command with the model's configuration file. This will create an OpenAI-compatible API ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)) for serving. ```bash -python inference/serve.py --config_file +llm_on_ray-serve --config_file ``` -To deploy and serve multiple models concurrently, place all models' configuration files under `inference/models` and directly run `python inference/serve.py` without passing any conf file. +To deploy and serve multiple models concurrently, place all models' configuration files under `llm_on_ray/inference/models` and directly run `llm_on_ray-serve` without passing any conf file. After deploying the model, you can access and test it in many ways: ```bash @@ -71,7 +71,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py ### Serving Model to a Simple Endpoint This will create a simple endpoint for serving according to the `port` and `route_prefix` parameters in conf file, for example: http://127.0.0.1:8000/gpt2. ```bash -python inference/serve.py --config_file --simple +llm_on_ray-serve --config_file --simple ``` After deploying the model endpoint, you can access and test it by using the script below: ```bash diff --git a/docs/vllm.md b/docs/vllm.md index 58393a9ae..426d2c305 100644 --- a/docs/vllm.md +++ b/docs/vllm.md @@ -9,7 +9,7 @@ vLLM for CPU currently supports Intel® 4th Gen Xeon® Scalable Performance proc Please run the following script to install vLLM for CPU into your current environment. Currently a GNU C++ compiler with >=12.3 version is required to build and install. ```bash -$ dev/scripts/install-vllm-cpu.sh +dev/scripts/install-vllm-cpu.sh ``` ## Setup @@ -23,7 +23,7 @@ Please follow [Deploying and Serving LLMs on Intel CPU/GPU/Gaudi](serve.md) docu To serve model with vLLM, run the following: ```bash -$ python serve.py --config_file inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal +llm_on_ray-serve --config_file llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal ``` In the above example, `vllm` property is set to `true` in the config file for enabling vLLM. @@ -33,11 +33,11 @@ In the above example, `vllm` property is set to `true` in the config file for en To start a non-streaming query, run the following: ```bash -$ python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/llama-2-7b-chat-hf +python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/llama-2-7b-chat-hf ``` To start a streaming query, run the following: ```bash -$ python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/llama-2-7b-chat-hf --streaming_response +python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/llama-2-7b-chat-hf --streaming_response ``` \ No newline at end of file diff --git a/docs/web_ui.md b/docs/web_ui.md index db0c0824f..5207c736f 100644 --- a/docs/web_ui.md +++ b/docs/web_ui.md @@ -8,13 +8,13 @@ Please follow [setup.md](setup.md) to setup the base environment first. ## Setup UI Environment After activating the environment installed from the previous step, please run the following script to install environment for Web UI. ```bash -$ dev/scripts/install-ui.sh +dev/scripts/install-ui.sh ``` ## Start Web UI ```bash -python -u ui/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379" +python -m llm_on_ray.ui.start_ui --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379" ``` You will get URL from the command line output (E.g. http://0.0.0.0:8080 for local network and https://180cd5f7c31a1cfd3c.gradio.live for public network) and use the web browser to open it. diff --git a/llm_on_ray/common/__init__.py b/llm_on_ray/common/__init__.py new file mode 100644 index 000000000..dadeefdda --- /dev/null +++ b/llm_on_ray/common/__init__.py @@ -0,0 +1,5 @@ +from llm_on_ray.common.logging import logger +from llm_on_ray.common.torch_config import TorchConfig +from llm_on_ray.common.config import Config +from llm_on_ray.common.init import init +from llm_on_ray.common import agentenv, dataset, initializer, model, optimizer, tokenizer, trainer diff --git a/llm_on_ray/common/agentenv/__init__.py b/llm_on_ray/common/agentenv/__init__.py new file mode 100644 index 000000000..0ec64973b --- /dev/null +++ b/llm_on_ray/common/agentenv/__init__.py @@ -0,0 +1,5 @@ +from llm_on_ray.common.agentenv.agentenv import AgentEnv +from llm_on_ray.common.agentenv.rlhf_env import RLHFEnv + + +__all__ = ["AgentEnv"] diff --git a/common/agentenv/agentenv.py b/llm_on_ray/common/agentenv/agentenv.py similarity index 100% rename from common/agentenv/agentenv.py rename to llm_on_ray/common/agentenv/agentenv.py diff --git a/common/agentenv/rlhf_env.py b/llm_on_ray/common/agentenv/rlhf_env.py similarity index 98% rename from common/agentenv/rlhf_env.py rename to llm_on_ray/common/agentenv/rlhf_env.py index d4b0a5833..7c3e08ca1 100644 --- a/common/agentenv/rlhf_env.py +++ b/llm_on_ray/common/agentenv/rlhf_env.py @@ -7,8 +7,8 @@ from ray.rllib.utils.spaces.repeated import Repeated import gymnasium.spaces as sp -from .agentenv import AgentEnv -from ..load import load_dataset, load_model, load_tokenizer +from llm_on_ray.common.agentenv import AgentEnv +from llm_on_ray.common.load import load_dataset, load_model, load_tokenizer def generate_response( diff --git a/common/common.py b/llm_on_ray/common/common.py similarity index 74% rename from common/common.py rename to llm_on_ray/common/common.py index b846ea75a..590c5e4eb 100644 --- a/common/common.py +++ b/llm_on_ray/common/common.py @@ -2,10 +2,10 @@ import glob import importlib -from .logging import logger +from llm_on_ray.common.logging import logger -def import_all_module(basedir, prefix=None): +def import_all_modules(basedir, prefix=None): all_py_files = glob.glob(basedir + "/*.py") modules = [os.path.basename(f) for f in all_py_files] @@ -19,4 +19,4 @@ def import_all_module(basedir, prefix=None): try: importlib.import_module(module_name) except Exception: - logger.warning(f"import {module_name} erro", exc_info=True) + logger.warning(f"import {module_name} error", exc_info=True) diff --git a/common/config.py b/llm_on_ray/common/config.py similarity index 100% rename from common/config.py rename to llm_on_ray/common/config.py diff --git a/llm_on_ray/common/dataprocesser/__init__.py b/llm_on_ray/common/dataprocesser/__init__.py new file mode 100644 index 000000000..99ff999fd --- /dev/null +++ b/llm_on_ray/common/dataprocesser/__init__.py @@ -0,0 +1,6 @@ +from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser +from llm_on_ray.common.dataprocesser.general_processer import GeneralProcesser +from llm_on_ray.common.dataprocesser.rm_dataprocesser import RMDataProcesser + + +__all__ = ["DataProcesser"] diff --git a/common/dataprocesser/dataprocesser.py b/llm_on_ray/common/dataprocesser/dataprocesser.py similarity index 100% rename from common/dataprocesser/dataprocesser.py rename to llm_on_ray/common/dataprocesser/dataprocesser.py diff --git a/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py similarity index 99% rename from common/dataprocesser/general_processer.py rename to llm_on_ray/common/dataprocesser/general_processer.py index 4873b4594..cd09064a6 100644 --- a/common/dataprocesser/general_processer.py +++ b/llm_on_ray/common/dataprocesser/general_processer.py @@ -5,7 +5,7 @@ import datasets import transformers -from .dataprocesser import DataProcesser +from llm_on_ray.common.dataprocesser import DataProcesser INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request." INSTRUCTION_KEY = "### Instruction:" diff --git a/common/dataprocesser/rm_dataprocesser.py b/llm_on_ray/common/dataprocesser/rm_dataprocesser.py similarity index 96% rename from common/dataprocesser/rm_dataprocesser.py rename to llm_on_ray/common/dataprocesser/rm_dataprocesser.py index 36ead7d8b..10bfea6ff 100644 --- a/common/dataprocesser/rm_dataprocesser.py +++ b/llm_on_ray/common/dataprocesser/rm_dataprocesser.py @@ -1,8 +1,8 @@ import torch import transformers -from .dataprocesser import DataProcesser -from ..logging import logger +from llm_on_ray.common.dataprocesser import DataProcesser +from llm_on_ray.common.logging import logger class RMDataProcesser(DataProcesser): diff --git a/llm_on_ray/common/dataset/__init__.py b/llm_on_ray/common/dataset/__init__.py new file mode 100644 index 000000000..00477c05b --- /dev/null +++ b/llm_on_ray/common/dataset/__init__.py @@ -0,0 +1,5 @@ +from llm_on_ray.common.dataset.dataset import Dataset +from llm_on_ray.common.dataset.huggingface_dataset import HuggingfaceDataset + + +__all__ = ["Dataset"] diff --git a/common/dataset/dataset.py b/llm_on_ray/common/dataset/dataset.py similarity index 100% rename from common/dataset/dataset.py rename to llm_on_ray/common/dataset/dataset.py diff --git a/common/dataset/huggingface_dataset.py b/llm_on_ray/common/dataset/huggingface_dataset.py similarity index 97% rename from common/dataset/huggingface_dataset.py rename to llm_on_ray/common/dataset/huggingface_dataset.py index 3b9214aaf..dddcc995e 100644 --- a/common/dataset/huggingface_dataset.py +++ b/llm_on_ray/common/dataset/huggingface_dataset.py @@ -1,7 +1,7 @@ import os import datasets -from .dataset import Dataset +from llm_on_ray.common.dataset import Dataset def local_load(name, **load_config): diff --git a/common/init.py b/llm_on_ray/common/init.py similarity index 95% rename from common/init.py rename to llm_on_ray/common/init.py index 63715f18f..6ee077b0c 100644 --- a/common/init.py +++ b/llm_on_ray/common/init.py @@ -1,7 +1,7 @@ import torch import accelerate -from .logging import logger +from llm_on_ray.common.logging import logger def check_config(config): diff --git a/llm_on_ray/common/initializer/__init__.py b/llm_on_ray/common/initializer/__init__.py new file mode 100644 index 000000000..e1f5b0613 --- /dev/null +++ b/llm_on_ray/common/initializer/__init__.py @@ -0,0 +1,4 @@ +from llm_on_ray.common.initializer.initializer import Initializer + + +__all__ = ["Initializer"] diff --git a/common/initializer/initializer.py b/llm_on_ray/common/initializer/initializer.py similarity index 100% rename from common/initializer/initializer.py rename to llm_on_ray/common/initializer/initializer.py diff --git a/common/load.py b/llm_on_ray/common/load.py similarity index 83% rename from common/load.py rename to llm_on_ray/common/load.py index 16fcfd1c5..e598e5534 100644 --- a/common/load.py +++ b/llm_on_ray/common/load.py @@ -1,13 +1,8 @@ import sys from typing import Any, Dict -from .logging import logger -from . import dataset -from . import tokenizer -from . import model -from . import optimizer -from . import trainer -from . import initializer +from llm_on_ray.common import logger +from llm_on_ray.common import agentenv, dataset, initializer, model, optimizer, tokenizer, trainer def load_check_decorator(func): @@ -120,3 +115,18 @@ def get_initializer(config: Dict[str, Any]): logger.critical(f"{Factory.__name__} init error: {e}", exc_info=True) exit(1) return _ + + +@load_check_decorator # type: ignore # noqa: F405 # may be undefined, or defined from star imports +def get_agentenv(config: Dict[str, Any]): + logger.info(f"{sys._getframe().f_code.co_name} config: {config}") + agentenv_type = config.get("type", None) + Factory = agentenv.AgentEnv.registory.get(agentenv_type) + if Factory is None: + raise ValueError(f"there is no {agentenv_type} AgentEnv.") + try: + _ = Factory(config) + except Exception as e: + logger.critical(f"{Factory.__name__} init error: {e}", exc_info=True) + exit(1) + return _ diff --git a/common/logging.py b/llm_on_ray/common/logging.py similarity index 100% rename from common/logging.py rename to llm_on_ray/common/logging.py diff --git a/llm_on_ray/common/model/__init__.py b/llm_on_ray/common/model/__init__.py new file mode 100644 index 000000000..6f41c8214 --- /dev/null +++ b/llm_on_ray/common/model/__init__.py @@ -0,0 +1,6 @@ +from llm_on_ray.common.model.model import Model +from llm_on_ray.common.model.huggingface_model_for_causal_lm import HuggingFaceModelForCausalLM +from llm_on_ray.common.model.reward_model import HuggingFaceRewardModel + + +__all__ = ["Model"] diff --git a/common/model/huggingface_model_for_causal_lm.py b/llm_on_ray/common/model/huggingface_model_for_causal_lm.py similarity index 96% rename from common/model/huggingface_model_for_causal_lm.py rename to llm_on_ray/common/model/huggingface_model_for_causal_lm.py index cc2ce6234..2716ec897 100644 --- a/common/model/huggingface_model_for_causal_lm.py +++ b/llm_on_ray/common/model/huggingface_model_for_causal_lm.py @@ -1,6 +1,6 @@ import transformers -from .model import Model +from llm_on_ray.common.model import Model from peft import get_peft_model, LoraConfig import deltatuner diff --git a/common/model/model.py b/llm_on_ray/common/model/model.py similarity index 100% rename from common/model/model.py rename to llm_on_ray/common/model/model.py diff --git a/common/model/reward_model.py b/llm_on_ray/common/model/reward_model.py similarity index 98% rename from common/model/reward_model.py rename to llm_on_ray/common/model/reward_model.py index a4aa237ef..eaf5501d1 100644 --- a/common/model/reward_model.py +++ b/llm_on_ray/common/model/reward_model.py @@ -4,7 +4,7 @@ import torch import torch.nn as nn -from .model import Model +from llm_on_ray.common.model import Model class HuggingFaceRewardModel(Model): diff --git a/llm_on_ray/common/optimizer/__init__.py b/llm_on_ray/common/optimizer/__init__.py new file mode 100644 index 000000000..f71a85785 --- /dev/null +++ b/llm_on_ray/common/optimizer/__init__.py @@ -0,0 +1,6 @@ +from llm_on_ray.common.optimizer.optimizer import Optimizer +from llm_on_ray.common.optimizer.default_optimizer import DefaultOptimizer +from llm_on_ray.common.optimizer.group_optimizer import GroupOptimizer + + +__all__ = ["Optimizer"] diff --git a/common/optimizer/default_optimizer.py b/llm_on_ray/common/optimizer/default_optimizer.py similarity index 89% rename from common/optimizer/default_optimizer.py rename to llm_on_ray/common/optimizer/default_optimizer.py index dab5803a2..fef023e62 100644 --- a/common/optimizer/default_optimizer.py +++ b/llm_on_ray/common/optimizer/default_optimizer.py @@ -1,5 +1,5 @@ -import torch # noqa: F401 -from .optimizer import Optimizer +import torch +from llm_on_ray.common.optimizer import Optimizer class DefaultOptimizer(Optimizer): diff --git a/common/optimizer/group_optimizer.py b/llm_on_ray/common/optimizer/group_optimizer.py similarity index 94% rename from common/optimizer/group_optimizer.py rename to llm_on_ray/common/optimizer/group_optimizer.py index 0e07878db..5816639a9 100644 --- a/common/optimizer/group_optimizer.py +++ b/llm_on_ray/common/optimizer/group_optimizer.py @@ -1,5 +1,5 @@ -import torch # noqa: F401 -from .optimizer import Optimizer +import torch +from llm_on_ray.common.optimizer import Optimizer class GroupOptimizer(Optimizer): diff --git a/common/optimizer/optimizer.py b/llm_on_ray/common/optimizer/optimizer.py similarity index 100% rename from common/optimizer/optimizer.py rename to llm_on_ray/common/optimizer/optimizer.py diff --git a/llm_on_ray/common/tokenizer/__init__.py b/llm_on_ray/common/tokenizer/__init__.py new file mode 100644 index 000000000..3f2c40136 --- /dev/null +++ b/llm_on_ray/common/tokenizer/__init__.py @@ -0,0 +1,6 @@ +from llm_on_ray.common.tokenizer.tokenizer import Tokenizer +from llm_on_ray.common.tokenizer.empty_tokenizer import EmptyTokenizer +from llm_on_ray.common.tokenizer.huggingface_tokenizer import HuggingFaceTokenizer + + +__all__ = ["Tokenizer"] diff --git a/common/tokenizer/empty_tokenizer.py b/llm_on_ray/common/tokenizer/empty_tokenizer.py similarity index 86% rename from common/tokenizer/empty_tokenizer.py rename to llm_on_ray/common/tokenizer/empty_tokenizer.py index c2684aca0..50f5ca6f2 100644 --- a/common/tokenizer/empty_tokenizer.py +++ b/llm_on_ray/common/tokenizer/empty_tokenizer.py @@ -1,4 +1,4 @@ -from .tokenizer import Tokenizer +from llm_on_ray.common.tokenizer import Tokenizer class _EmptyTokenizer: diff --git a/common/tokenizer/huggingface_tokenizer.py b/llm_on_ray/common/tokenizer/huggingface_tokenizer.py similarity index 85% rename from common/tokenizer/huggingface_tokenizer.py rename to llm_on_ray/common/tokenizer/huggingface_tokenizer.py index a6a60bc7f..59905aef7 100644 --- a/common/tokenizer/huggingface_tokenizer.py +++ b/llm_on_ray/common/tokenizer/huggingface_tokenizer.py @@ -1,6 +1,6 @@ import transformers -from .tokenizer import Tokenizer +from llm_on_ray.common.tokenizer import Tokenizer class HuggingFaceTokenizer(Tokenizer): diff --git a/common/tokenizer/tokenizer.py b/llm_on_ray/common/tokenizer/tokenizer.py similarity index 100% rename from common/tokenizer/tokenizer.py rename to llm_on_ray/common/tokenizer/tokenizer.py diff --git a/common/torch_config.py b/llm_on_ray/common/torch_config.py similarity index 91% rename from common/torch_config.py rename to llm_on_ray/common/torch_config.py index 5a63ab565..a051de56f 100644 --- a/common/torch_config.py +++ b/llm_on_ray/common/torch_config.py @@ -26,15 +26,15 @@ def backend_cls(self): def libs_import(): """try to import IPEX and oneCCL.""" try: - import intel_extension_for_pytorch # noqa: F401 + import intel_extension_for_pytorch except ImportError: raise ImportError("Please install intel_extension_for_pytorch") try: ccl_version = importlib_metadata.version("oneccl_bind_pt") if ccl_version >= "1.12": - import oneccl_bindings_for_pytorch # noqa: F401 + import oneccl_bindings_for_pytorch else: - import torch_ccl # noqa: F401 + import torch_ccl except ImportError as ccl_not_exist: raise ImportError("Please install torch-ccl") from ccl_not_exist diff --git a/llm_on_ray/common/trainer/__init__.py b/llm_on_ray/common/trainer/__init__.py new file mode 100644 index 000000000..71ff4b808 --- /dev/null +++ b/llm_on_ray/common/trainer/__init__.py @@ -0,0 +1,6 @@ +from llm_on_ray.common.trainer.trainer import Trainer +from llm_on_ray.common.trainer.default_trainer import DefaultTrainer +from llm_on_ray.common.trainer.rm_trainer import RMTrainer + + +__all__ = ["Trainer"] diff --git a/common/trainer/default_trainer.py b/llm_on_ray/common/trainer/default_trainer.py similarity index 98% rename from common/trainer/default_trainer.py rename to llm_on_ray/common/trainer/default_trainer.py index f266a9cb9..18582caaf 100644 --- a/common/trainer/default_trainer.py +++ b/llm_on_ray/common/trainer/default_trainer.py @@ -10,10 +10,9 @@ from ray.train import report, Checkpoint -from .. import dataprocesser -from .trainer import Trainer - -from ..logging import logger +from llm_on_ray.common import dataprocesser +from llm_on_ray.common.trainer import Trainer +from llm_on_ray.common.logging import logger class DefaultTrainer(Trainer): diff --git a/common/trainer/rm_trainer.py b/llm_on_ray/common/trainer/rm_trainer.py similarity index 97% rename from common/trainer/rm_trainer.py rename to llm_on_ray/common/trainer/rm_trainer.py index 1cc64d93e..83bf0a673 100644 --- a/common/trainer/rm_trainer.py +++ b/llm_on_ray/common/trainer/rm_trainer.py @@ -4,8 +4,8 @@ import math import time -from .default_trainer import DefaultTrainer -from ..logging import logger +from llm_on_ray.common.trainer.default_trainer import DefaultTrainer +from llm_on_ray.common.logging import logger class RMTrainer(DefaultTrainer): diff --git a/common/trainer/trainer.py b/llm_on_ray/common/trainer/trainer.py similarity index 100% rename from common/trainer/trainer.py rename to llm_on_ray/common/trainer/trainer.py diff --git a/finetune/__init__.py b/llm_on_ray/finetune/__init__.py similarity index 100% rename from finetune/__init__.py rename to llm_on_ray/finetune/__init__.py diff --git a/finetune/finetune.py b/llm_on_ray/finetune/finetune.py similarity index 98% rename from finetune/finetune.py rename to llm_on_ray/finetune/finetune.py index 3557b25a4..e38596915 100644 --- a/finetune/finetune.py +++ b/llm_on_ray/finetune/finetune.py @@ -21,11 +21,8 @@ FullStateDictConfig, ) -import sys - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -import common -from finetune.finetune_config import FinetuneConfig +from llm_on_ray import common +from llm_on_ray.finetune.finetune_config import FinetuneConfig def get_accelerate_environment_variable(mode: str, config: Union[Dict[str, Any], None]) -> dict: @@ -315,8 +312,8 @@ def main(external_config=None): run_config=run_config, ) results = trainer.fit() - - return results + if external_config is not None: + return results if __name__ == "__main__": diff --git a/finetune/finetune.yaml b/llm_on_ray/finetune/finetune.yaml similarity index 100% rename from finetune/finetune.yaml rename to llm_on_ray/finetune/finetune.yaml diff --git a/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py similarity index 100% rename from finetune/finetune_config.py rename to llm_on_ray/finetune/finetune_config.py diff --git a/finetune/models/bloom-560m.yaml b/llm_on_ray/finetune/models/bloom-560m.yaml similarity index 100% rename from finetune/models/bloom-560m.yaml rename to llm_on_ray/finetune/models/bloom-560m.yaml diff --git a/finetune/models/finetune_config_template.yaml b/llm_on_ray/finetune/models/finetune_config_template.yaml similarity index 100% rename from finetune/models/finetune_config_template.yaml rename to llm_on_ray/finetune/models/finetune_config_template.yaml diff --git a/finetune/models/gpt-j-6b.yaml b/llm_on_ray/finetune/models/gpt-j-6b.yaml similarity index 100% rename from finetune/models/gpt-j-6b.yaml rename to llm_on_ray/finetune/models/gpt-j-6b.yaml diff --git a/finetune/models/gpt2.yaml b/llm_on_ray/finetune/models/gpt2.yaml similarity index 100% rename from finetune/models/gpt2.yaml rename to llm_on_ray/finetune/models/gpt2.yaml diff --git a/finetune/models/llama-2-7b-chat-hf.yaml b/llm_on_ray/finetune/models/llama-2-7b-chat-hf.yaml similarity index 100% rename from finetune/models/llama-2-7b-chat-hf.yaml rename to llm_on_ray/finetune/models/llama-2-7b-chat-hf.yaml diff --git a/finetune/models/llama-7b.yaml b/llm_on_ray/finetune/models/llama-7b.yaml similarity index 100% rename from finetune/models/llama-7b.yaml rename to llm_on_ray/finetune/models/llama-7b.yaml diff --git a/finetune/models/mistral-7b-v0.1.yaml b/llm_on_ray/finetune/models/mistral-7b-v0.1.yaml similarity index 100% rename from finetune/models/mistral-7b-v0.1.yaml rename to llm_on_ray/finetune/models/mistral-7b-v0.1.yaml diff --git a/finetune/models/mpt-7b-chat.yaml b/llm_on_ray/finetune/models/mpt-7b-chat.yaml similarity index 100% rename from finetune/models/mpt-7b-chat.yaml rename to llm_on_ray/finetune/models/mpt-7b-chat.yaml diff --git a/finetune/models/opt-125m.yaml b/llm_on_ray/finetune/models/opt-125m.yaml similarity index 100% rename from finetune/models/opt-125m.yaml rename to llm_on_ray/finetune/models/opt-125m.yaml diff --git a/inference/__init__.py b/llm_on_ray/inference/__init__.py similarity index 100% rename from inference/__init__.py rename to llm_on_ray/inference/__init__.py diff --git a/inference/api_openai_backend/openai_protocol.py b/llm_on_ray/inference/api_openai_backend/openai_protocol.py similarity index 100% rename from inference/api_openai_backend/openai_protocol.py rename to llm_on_ray/inference/api_openai_backend/openai_protocol.py diff --git a/inference/api_openai_backend/query_client.py b/llm_on_ray/inference/api_openai_backend/query_client.py similarity index 95% rename from inference/api_openai_backend/query_client.py rename to llm_on_ray/inference/api_openai_backend/query_client.py index fbfbb65b1..9e8c6656e 100644 --- a/inference/api_openai_backend/query_client.py +++ b/llm_on_ray/inference/api_openai_backend/query_client.py @@ -34,8 +34,8 @@ from typing import Dict from fastapi import HTTPException -from .openai_protocol import ModelCard, Prompt -from .request_handler import handle_request +from llm_on_ray.inference.api_openai_backend.openai_protocol import ModelCard, Prompt +from llm_on_ray.inference.api_openai_backend.request_handler import handle_request class RouterQueryClient: diff --git a/inference/api_openai_backend/request_handler.py b/llm_on_ray/inference/api_openai_backend/request_handler.py similarity index 96% rename from inference/api_openai_backend/request_handler.py rename to llm_on_ray/inference/api_openai_backend/request_handler.py index dd5a1189d..202f92538 100644 --- a/inference/api_openai_backend/request_handler.py +++ b/llm_on_ray/inference/api_openai_backend/request_handler.py @@ -38,8 +38,13 @@ from fastapi import status, HTTPException, Request from starlette.responses import JSONResponse from pydantic import ValidationError as PydanticValidationError -from inference.logger import get_logger -from .openai_protocol import Prompt, ModelResponse, ErrorResponse, FinishReason +from llm_on_ray.inference.logger import get_logger +from llm_on_ray.inference.api_openai_backend.openai_protocol import ( + Prompt, + ModelResponse, + ErrorResponse, + FinishReason, +) logger = get_logger(__name__) diff --git a/inference/api_openai_backend/router_app.py b/llm_on_ray/inference/api_openai_backend/router_app.py similarity index 97% rename from inference/api_openai_backend/router_app.py rename to llm_on_ray/inference/api_openai_backend/router_app.py index 236819392..ecaed39b8 100644 --- a/inference/api_openai_backend/router_app.py +++ b/llm_on_ray/inference/api_openai_backend/router_app.py @@ -40,16 +40,17 @@ from fastapi import Response as FastAPIResponse from fastapi.middleware.cors import CORSMiddleware from starlette.responses import Response, StreamingResponse -from inference.logger import get_logger -from .request_handler import OpenAIHTTPException, openai_exception_handler -from .query_client import RouterQueryClient -from .openai_protocol import ( +from llm_on_ray.inference.logger import get_logger +from llm_on_ray.inference.api_openai_backend.request_handler import ( + OpenAIHTTPException, + openai_exception_handler, +) +from llm_on_ray.inference.api_openai_backend.query_client import RouterQueryClient +from llm_on_ray.inference.api_openai_backend.openai_protocol import ( Prompt, ModelResponse, CompletionRequest, ChatCompletionRequest, -) -from .openai_protocol import ( ChatCompletionResponse, CompletionResponse, DeltaChoices, diff --git a/inference/api_server_openai.py b/llm_on_ray/inference/api_server_openai.py similarity index 94% rename from inference/api_server_openai.py rename to llm_on_ray/inference/api_server_openai.py index 77831a9d2..2ba821075 100644 --- a/inference/api_server_openai.py +++ b/llm_on_ray/inference/api_server_openai.py @@ -34,8 +34,8 @@ import os from ray import serve -from inference.api_openai_backend.query_client import RouterQueryClient -from inference.api_openai_backend.router_app import Router, router_app +from llm_on_ray.inference.api_openai_backend.query_client import RouterQueryClient +from llm_on_ray.inference.api_openai_backend.router_app import Router, router_app def router_application(deployments): diff --git a/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py similarity index 100% rename from inference/api_server_simple.py rename to llm_on_ray/inference/api_server_simple.py diff --git a/inference/chat_process.py b/llm_on_ray/inference/chat_process.py similarity index 100% rename from inference/chat_process.py rename to llm_on_ray/inference/chat_process.py diff --git a/inference/deepspeed_predictor.py b/llm_on_ray/inference/deepspeed_predictor.py similarity index 98% rename from inference/deepspeed_predictor.py rename to llm_on_ray/inference/deepspeed_predictor.py index ef75c6118..dbdbca06f 100644 --- a/inference/deepspeed_predictor.py +++ b/llm_on_ray/inference/deepspeed_predictor.py @@ -12,9 +12,9 @@ from ray.air import ScalingConfig from typing import List import os -from predictor import Predictor -from inference.utils import get_torch_dtype -from inference.inference_config import ( +from llm_on_ray.inference.predictor import Predictor +from llm_on_ray.inference.utils import get_torch_dtype +from llm_on_ray.inference.inference_config import ( InferenceConfig, GenerateResult, DEVICE_CPU, diff --git a/inference/inference_config.py b/llm_on_ray/inference/inference_config.py similarity index 97% rename from inference/inference_config.py rename to llm_on_ray/inference/inference_config.py index 631b0eea0..57c1f54ac 100644 --- a/inference/inference_config.py +++ b/llm_on_ray/inference/inference_config.py @@ -161,8 +161,3 @@ def _check_workers_per_group(cls, v: int): _models[m.name] = m all_models = _models.copy() - -_gpt2_key = "gpt2" -_gpt_j_6b = "gpt-j-6b" -base_models[_gpt2_key] = _models[_gpt2_key] -base_models[_gpt_j_6b] = _models[_gpt_j_6b] diff --git a/inference/logger.py b/llm_on_ray/inference/logger.py similarity index 100% rename from inference/logger.py rename to llm_on_ray/inference/logger.py diff --git a/inference/mllm_predictor.py b/llm_on_ray/inference/mllm_predictor.py similarity index 94% rename from inference/mllm_predictor.py rename to llm_on_ray/inference/mllm_predictor.py index a50db97e5..895e00514 100644 --- a/inference/mllm_predictor.py +++ b/llm_on_ray/inference/mllm_predictor.py @@ -1,9 +1,8 @@ import torch from transformers import TextIteratorStreamer -from inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16 -from predictor import Predictor -from inference.utils import module_import -from inference.utils import get_torch_dtype +from llm_on_ray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16 +from llm_on_ray.inference.utils import get_torch_dtype, module_import +from llm_on_ray.inference.predictor import Predictor class MllmPredictor(Predictor): diff --git a/inference/models/CodeLlama-7b-hf.yaml b/llm_on_ray/inference/models/CodeLlama-7b-hf.yaml similarity index 100% rename from inference/models/CodeLlama-7b-hf.yaml rename to llm_on_ray/inference/models/CodeLlama-7b-hf.yaml diff --git a/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml b/llm_on_ray/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml similarity index 100% rename from inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml rename to llm_on_ray/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml diff --git a/inference/models/bigdl/mpt-7b-bigdl.yaml b/llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml similarity index 100% rename from inference/models/bigdl/mpt-7b-bigdl.yaml rename to llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml diff --git a/inference/models/bloom-560m.yaml b/llm_on_ray/inference/models/bloom-560m.yaml similarity index 95% rename from inference/models/bloom-560m.yaml rename to llm_on_ray/inference/models/bloom-560m.yaml index 8bc661557..19a5a7deb 100644 --- a/inference/models/bloom-560m.yaml +++ b/llm_on_ray/inference/models/bloom-560m.yaml @@ -2,7 +2,7 @@ port: 8000 name: bloom-560m route_prefix: /bloom-560m num_replicas: 1 -cpus_per_worker: 10 +cpus_per_worker: 24 gpus_per_worker: 0 deepspeed: false workers_per_group: 2 diff --git a/inference/models/deplot.yaml b/llm_on_ray/inference/models/deplot.yaml similarity index 97% rename from inference/models/deplot.yaml rename to llm_on_ray/inference/models/deplot.yaml index e293bed54..ac6451c16 100644 --- a/inference/models/deplot.yaml +++ b/llm_on_ray/inference/models/deplot.yaml @@ -1,6 +1,7 @@ port: 8000 name: deplot route_prefix: /deplot +num_replicas: 1 cpus_per_worker: 24 gpus_per_worker: 0 deepspeed: false diff --git a/inference/models/falcon-7b.yaml b/llm_on_ray/inference/models/falcon-7b.yaml similarity index 100% rename from inference/models/falcon-7b.yaml rename to llm_on_ray/inference/models/falcon-7b.yaml diff --git a/inference/models/fuyu8b.yaml b/llm_on_ray/inference/models/fuyu8b.yaml similarity index 96% rename from inference/models/fuyu8b.yaml rename to llm_on_ray/inference/models/fuyu8b.yaml index 1ad9faa98..561114cd0 100644 --- a/inference/models/fuyu8b.yaml +++ b/llm_on_ray/inference/models/fuyu8b.yaml @@ -1,6 +1,7 @@ port: 8000 name: fuyu-8b route_prefix: /fuyu-8b +num_replicas: 1 cpus_per_worker: 24 gpus_per_worker: 0 deepspeed: false diff --git a/inference/models/gpt-j-6b.yaml b/llm_on_ray/inference/models/gpt-j-6b.yaml similarity index 100% rename from inference/models/gpt-j-6b.yaml rename to llm_on_ray/inference/models/gpt-j-6b.yaml diff --git a/inference/models/gpt2.yaml b/llm_on_ray/inference/models/gpt2.yaml similarity index 100% rename from inference/models/gpt2.yaml rename to llm_on_ray/inference/models/gpt2.yaml diff --git a/inference/models/llama-2-7b-chat-hf.yaml b/llm_on_ray/inference/models/llama-2-7b-chat-hf.yaml similarity index 100% rename from inference/models/llama-2-7b-chat-hf.yaml rename to llm_on_ray/inference/models/llama-2-7b-chat-hf.yaml diff --git a/inference/models/mistral-7b-v0.1.yaml b/llm_on_ray/inference/models/mistral-7b-v0.1.yaml similarity index 100% rename from inference/models/mistral-7b-v0.1.yaml rename to llm_on_ray/inference/models/mistral-7b-v0.1.yaml diff --git a/inference/models/mpt-7b.yaml b/llm_on_ray/inference/models/mpt-7b.yaml similarity index 100% rename from inference/models/mpt-7b.yaml rename to llm_on_ray/inference/models/mpt-7b.yaml diff --git a/inference/models/neural-chat-7b-v3-1.yaml b/llm_on_ray/inference/models/neural-chat-7b-v3-1.yaml similarity index 100% rename from inference/models/neural-chat-7b-v3-1.yaml rename to llm_on_ray/inference/models/neural-chat-7b-v3-1.yaml diff --git a/inference/models/opt-125m.yaml b/llm_on_ray/inference/models/opt-125m.yaml similarity index 100% rename from inference/models/opt-125m.yaml rename to llm_on_ray/inference/models/opt-125m.yaml diff --git a/inference/models/starcoder.yaml b/llm_on_ray/inference/models/starcoder.yaml similarity index 100% rename from inference/models/starcoder.yaml rename to llm_on_ray/inference/models/starcoder.yaml diff --git a/inference/models/template/export_inference_config_to_yaml.py b/llm_on_ray/inference/models/template/export_inference_config_to_yaml.py similarity index 72% rename from inference/models/template/export_inference_config_to_yaml.py rename to llm_on_ray/inference/models/template/export_inference_config_to_yaml.py index 62cfd4b75..493d3b6f5 100644 --- a/inference/models/template/export_inference_config_to_yaml.py +++ b/llm_on_ray/inference/models/template/export_inference_config_to_yaml.py @@ -1,6 +1,6 @@ import yaml import os -from inference.inference_config import InferenceConfig +from llm_on_ray.inference.inference_config import InferenceConfig ic = InferenceConfig() diff --git a/inference/models/template/inference_config_template.yaml b/llm_on_ray/inference/models/template/inference_config_template.yaml similarity index 100% rename from inference/models/template/inference_config_template.yaml rename to llm_on_ray/inference/models/template/inference_config_template.yaml diff --git a/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml b/llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml similarity index 100% rename from inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml rename to llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml diff --git a/inference/predictor.py b/llm_on_ray/inference/predictor.py similarity index 96% rename from inference/predictor.py rename to llm_on_ray/inference/predictor.py index a69a9407e..d0f5daeee 100644 --- a/inference/predictor.py +++ b/llm_on_ray/inference/predictor.py @@ -1,9 +1,9 @@ import re import torch from transformers import AutoTokenizer, StoppingCriteriaList -from inference.inference_config import InferenceConfig, GenerateResult -from inference.utils import StoppingCriteriaSub from typing import List, AsyncGenerator, Union +from llm_on_ray.inference.inference_config import InferenceConfig, GenerateResult +from llm_on_ray.inference.utils import StoppingCriteriaSub class Predictor: diff --git a/inference/predictor_deployment.py b/llm_on_ray/inference/predictor_deployment.py similarity index 94% rename from inference/predictor_deployment.py rename to llm_on_ray/inference/predictor_deployment.py index a16cea7c5..094e41a56 100644 --- a/inference/predictor_deployment.py +++ b/llm_on_ray/inference/predictor_deployment.py @@ -19,16 +19,16 @@ import asyncio import functools from ray import serve -from starlette.requests import Request from queue import Empty import torch from transformers import TextIteratorStreamer -from inference.inference_config import InferenceConfig from typing import Union, Dict, Any +from starlette.requests import Request from starlette.responses import StreamingResponse, JSONResponse from fastapi import HTTPException -from inference.api_openai_backend.openai_protocol import ModelResponse -from inference.utils import get_prompt_format, PromptFormat +from llm_on_ray.inference.inference_config import InferenceConfig +from llm_on_ray.inference.api_openai_backend.openai_protocol import ModelResponse +from llm_on_ray.inference.utils import get_prompt_format, PromptFormat @serve.deployment @@ -59,20 +59,20 @@ def __init__(self, infer_conf: InferenceConfig): self.is_mllm = True if chat_processor_name in ["ChatModelwithImage"] else False if self.use_deepspeed: - from deepspeed_predictor import DeepSpeedPredictor + from llm_on_ray.inference.deepspeed_predictor import DeepSpeedPredictor self.predictor = DeepSpeedPredictor(infer_conf) self.streamer = self.predictor.get_streamer() elif self.use_vllm: - from vllm_predictor import VllmPredictor + from llm_on_ray.inference.vllm_predictor import VllmPredictor self.predictor = VllmPredictor(infer_conf) elif self.is_mllm: - from mllm_predictor import MllmPredictor + from llm_on_ray.inference.mllm_predictor import MllmPredictor self.predictor = MllmPredictor(infer_conf) else: - from transformer_predictor import TransformerPredictor + from llm_on_ray.inference.transformer_predictor import TransformerPredictor self.predictor = TransformerPredictor(infer_conf) self.loop = asyncio.get_running_loop() diff --git a/inference/serve.py b/llm_on_ray/inference/serve.py similarity index 94% rename from inference/serve.py rename to llm_on_ray/inference/serve.py index 598ab247c..6d87bd247 100644 --- a/inference/serve.py +++ b/llm_on_ray/inference/serve.py @@ -16,12 +16,12 @@ import ray import sys -from inference.utils import get_deployment_actor_options from pydantic_yaml import parse_yaml_raw_as -from api_server_simple import serve_run -from api_server_openai import openai_serve_run -from predictor_deployment import PredictorDeployment -from inference.inference_config import ModelDescription, InferenceConfig, all_models +from llm_on_ray.inference.utils import get_deployment_actor_options +from llm_on_ray.inference.api_server_simple import serve_run +from llm_on_ray.inference.api_server_openai import openai_serve_run +from llm_on_ray.inference.predictor_deployment import PredictorDeployment +from llm_on_ray.inference.inference_config import ModelDescription, InferenceConfig, all_models def get_deployed_models(args): diff --git a/inference/transformer_predictor.py b/llm_on_ray/inference/transformer_predictor.py similarity index 94% rename from inference/transformer_predictor.py rename to llm_on_ray/inference/transformer_predictor.py index c1e83e432..8c1b74f08 100644 --- a/inference/transformer_predictor.py +++ b/llm_on_ray/inference/transformer_predictor.py @@ -1,9 +1,8 @@ import torch -from transformers import AutoModelForCausalLM, AutoConfig -from transformers import TextIteratorStreamer -from inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16 -from inference.utils import get_torch_dtype -from predictor import Predictor +from transformers import AutoModelForCausalLM, AutoConfig, TextIteratorStreamer +from llm_on_ray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16 +from llm_on_ray.inference.utils import get_torch_dtype +from llm_on_ray.inference.predictor import Predictor class TransformerPredictor(Predictor): diff --git a/inference/utils.py b/llm_on_ray/inference/utils.py similarity index 96% rename from inference/utils.py rename to llm_on_ray/inference/utils.py index 07d928449..855916d29 100644 --- a/inference/utils.py +++ b/llm_on_ray/inference/utils.py @@ -16,10 +16,10 @@ from transformers import StoppingCriteria import torch -from inference.inference_config import InferenceConfig, DEVICE_CPU -from inference.api_openai_backend.openai_protocol import ChatMessage from typing import Dict, Any, List, Union from enum import Enum +from llm_on_ray.inference.inference_config import InferenceConfig, DEVICE_CPU +from llm_on_ray.inference.api_openai_backend.openai_protocol import ChatMessage def get_deployment_actor_options(infer_conf: InferenceConfig): diff --git a/inference/vllm_predictor.py b/llm_on_ray/inference/vllm_predictor.py similarity index 96% rename from inference/vllm_predictor.py rename to llm_on_ray/inference/vllm_predictor.py index 54ec4c110..d4ab10c44 100644 --- a/inference/vllm_predictor.py +++ b/llm_on_ray/inference/vllm_predictor.py @@ -1,11 +1,11 @@ import asyncio from typing import AsyncGenerator, List, Union -from predictor import Predictor -from inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16 from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.sampling_params import SamplingParams from vllm.utils import random_uuid +from llm_on_ray.inference.predictor import Predictor +from llm_on_ray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16 class VllmPredictor(Predictor): diff --git a/pretrain/__init__.py b/llm_on_ray/pretrain/__init__.py similarity index 100% rename from pretrain/__init__.py rename to llm_on_ray/pretrain/__init__.py diff --git a/pretrain/backend/deepspeed_backend.py b/llm_on_ray/pretrain/backend/deepspeed_backend.py similarity index 100% rename from pretrain/backend/deepspeed_backend.py rename to llm_on_ray/pretrain/backend/deepspeed_backend.py diff --git a/pretrain/backend/habana_backend.py b/llm_on_ray/pretrain/backend/habana_backend.py similarity index 93% rename from pretrain/backend/habana_backend.py rename to llm_on_ray/pretrain/backend/habana_backend.py index 125987ba2..ca1240577 100644 --- a/pretrain/backend/habana_backend.py +++ b/llm_on_ray/pretrain/backend/habana_backend.py @@ -13,7 +13,7 @@ def backend_cls(self): def habana_import(): try: - import habana_frameworks.torch # noqa: F401 + import habana_frameworks.torch except ImportError as habana_not_exist: raise ImportError("Please install habana_frameworks") from habana_not_exist diff --git a/pretrain/config/bloom1b7_8gpus_pretrain.conf b/llm_on_ray/pretrain/config/bloom1b7_8gpus_pretrain.conf similarity index 100% rename from pretrain/config/bloom1b7_8gpus_pretrain.conf rename to llm_on_ray/pretrain/config/bloom1b7_8gpus_pretrain.conf diff --git a/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf b/llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf similarity index 100% rename from pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf rename to llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf diff --git a/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf b/llm_on_ray/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf similarity index 100% rename from pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf rename to llm_on_ray/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf diff --git a/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf b/llm_on_ray/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf similarity index 100% rename from pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf rename to llm_on_ray/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf diff --git a/pretrain/config/llama_7b_8Guadi_pretrain.conf b/llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf similarity index 100% rename from pretrain/config/llama_7b_8Guadi_pretrain.conf rename to llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf diff --git a/pretrain/config/llama_7b_8gpu_pretrain.conf b/llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf similarity index 100% rename from pretrain/config/llama_7b_8gpu_pretrain.conf rename to llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf diff --git a/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf b/llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf similarity index 100% rename from pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf rename to llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf diff --git a/pretrain/docker/Dockerfile.megatron.habana b/llm_on_ray/pretrain/docker/Dockerfile.megatron.habana similarity index 100% rename from pretrain/docker/Dockerfile.megatron.habana rename to llm_on_ray/pretrain/docker/Dockerfile.megatron.habana diff --git a/pretrain/docker/Dockerfile.nvidia b/llm_on_ray/pretrain/docker/Dockerfile.nvidia similarity index 100% rename from pretrain/docker/Dockerfile.nvidia rename to llm_on_ray/pretrain/docker/Dockerfile.nvidia diff --git a/pretrain/docker/Dockerfile.optimum.habana b/llm_on_ray/pretrain/docker/Dockerfile.optimum.habana similarity index 100% rename from pretrain/docker/Dockerfile.optimum.habana rename to llm_on_ray/pretrain/docker/Dockerfile.optimum.habana diff --git a/pretrain/docker/build-image.sh b/llm_on_ray/pretrain/docker/build-image.sh similarity index 100% rename from pretrain/docker/build-image.sh rename to llm_on_ray/pretrain/docker/build-image.sh diff --git a/pretrain/megatron_deepspeed_pretrain.py b/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py similarity index 96% rename from pretrain/megatron_deepspeed_pretrain.py rename to llm_on_ray/pretrain/megatron_deepspeed_pretrain.py index aa5002711..6190974be 100644 --- a/pretrain/megatron_deepspeed_pretrain.py +++ b/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py @@ -7,14 +7,13 @@ from ray.air.config import ScalingConfig from ray.air import RunConfig, FailureConfig -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) -import common +from llm_on_ray import common import importlib loader = importlib.util.find_spec("habana_frameworks") if loader is not None: - from backend.habana_backend import TorchConfig + from llm_on_ray.pretrain.backend.habana_backend import TorchConfig else: from ray.train.torch import TorchConfig diff --git a/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch b/llm_on_ray/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch similarity index 100% rename from pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch rename to llm_on_ray/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch diff --git a/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch b/llm_on_ray/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch similarity index 100% rename from pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch rename to llm_on_ray/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch diff --git a/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch b/llm_on_ray/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch similarity index 100% rename from pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch rename to llm_on_ray/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch diff --git a/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch b/llm_on_ray/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch similarity index 100% rename from pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch rename to llm_on_ray/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch diff --git a/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch b/llm_on_ray/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch similarity index 100% rename from pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch rename to llm_on_ray/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch diff --git a/pretrain/plugin/group_dataset.py b/llm_on_ray/pretrain/plugin/group_dataset.py similarity index 93% rename from pretrain/plugin/group_dataset.py rename to llm_on_ray/pretrain/plugin/group_dataset.py index 93838f7bf..1d1f9a2d5 100644 --- a/pretrain/plugin/group_dataset.py +++ b/llm_on_ray/pretrain/plugin/group_dataset.py @@ -1,7 +1,7 @@ import os import datasets -from common.dataset import Dataset +from llm_on_ray.common.dataset import Dataset class GroupDataset(Dataset): diff --git a/pretrain/plugin/hf_pretrainer.py b/llm_on_ray/pretrain/plugin/hf_pretrainer.py similarity index 98% rename from pretrain/plugin/hf_pretrainer.py rename to llm_on_ray/pretrain/plugin/hf_pretrainer.py index d9aafdfc5..2c2c5d1f7 100755 --- a/pretrain/plugin/hf_pretrainer.py +++ b/llm_on_ray/pretrain/plugin/hf_pretrainer.py @@ -3,9 +3,6 @@ import logging import sys from torch.utils.data import DataLoader, Dataset -import common -from common import dataprocesser -from common.logging import logger import evaluate from typing import Optional from transformers import ( @@ -16,7 +13,10 @@ from transformers.trainer_utils import get_last_checkpoint from transformers.utils import check_min_version, send_example_telemetry from transformers import Trainer, TrainingArguments -from common.trainer import Trainer as RayTrainer +from llm_on_ray import common +from llm_on_ray.common import dataprocesser +from llm_on_ray.common.logging import logger +from llm_on_ray.common.trainer import Trainer as RayTrainer use_habana = True import importlib diff --git a/pretrain/plugin/huggingface_model_from_config.py b/llm_on_ray/pretrain/plugin/huggingface_model_from_config.py similarity index 99% rename from pretrain/plugin/huggingface_model_from_config.py rename to llm_on_ray/pretrain/plugin/huggingface_model_from_config.py index 5ce38da8f..fe5c9608f 100644 --- a/pretrain/plugin/huggingface_model_from_config.py +++ b/llm_on_ray/pretrain/plugin/huggingface_model_from_config.py @@ -1,7 +1,7 @@ import torch import math import transformers -from common.model.model import Model +from llm_on_ray.common.model import Model # for huggingface model weight random initialization diff --git a/pretrain/plugin/megatron_dataset.py b/llm_on_ray/pretrain/plugin/megatron_dataset.py similarity index 96% rename from pretrain/plugin/megatron_dataset.py rename to llm_on_ray/pretrain/plugin/megatron_dataset.py index 944c6b53b..0d74906de 100644 --- a/pretrain/plugin/megatron_dataset.py +++ b/llm_on_ray/pretrain/plugin/megatron_dataset.py @@ -2,7 +2,7 @@ from megatron.training import build_train_valid_test_datasets, update_train_iters from megatron.data import gpt_dataset -from common.dataset import Dataset +from llm_on_ray.common.dataset import Dataset class MegatronDataset(Dataset): diff --git a/pretrain/plugin/megatron_pretrainer.py b/llm_on_ray/pretrain/plugin/megatron_pretrainer.py similarity index 98% rename from pretrain/plugin/megatron_pretrainer.py rename to llm_on_ray/pretrain/plugin/megatron_pretrainer.py index 4ee76bfa3..30e6cb815 100644 --- a/pretrain/plugin/megatron_pretrainer.py +++ b/llm_on_ray/pretrain/plugin/megatron_pretrainer.py @@ -11,9 +11,9 @@ from ray.train import Checkpoint from ray.train.torch import TorchCheckpoint -from common import dataprocesser -from .pretrainer import PreTrainer -from common.logging import logger +from llm_on_ray.common import dataprocesser +from llm_on_ray.pretrain.plugin.pretrainer import PreTrainer +from llm_on_ray.common.logging import logger class MegatronPreTrainer(PreTrainer): diff --git a/pretrain/plugin/megatron_processer.py b/llm_on_ray/pretrain/plugin/megatron_processer.py similarity index 96% rename from pretrain/plugin/megatron_processer.py rename to llm_on_ray/pretrain/plugin/megatron_processer.py index 178256ad5..455138399 100644 --- a/pretrain/plugin/megatron_processer.py +++ b/llm_on_ray/pretrain/plugin/megatron_processer.py @@ -2,7 +2,7 @@ from megatron.core import mpu from megatron.data.data_samplers import build_pretraining_data_loader -from common.dataprocesser import DataProcesser +from llm_on_ray.common.dataprocesser import DataProcesser class MegatronProcesser(DataProcesser): diff --git a/pretrain/plugin/megtron_initializer.py b/llm_on_ray/pretrain/plugin/megtron_initializer.py similarity index 85% rename from pretrain/plugin/megtron_initializer.py rename to llm_on_ray/pretrain/plugin/megtron_initializer.py index cad268603..9aad0d402 100644 --- a/pretrain/plugin/megtron_initializer.py +++ b/llm_on_ray/pretrain/plugin/megtron_initializer.py @@ -1,6 +1,6 @@ from megatron.initialize import initialize_megatron -from common.initializer import Initializer -from common.logging import logger +from llm_on_ray.common.initializer import Initializer +from llm_on_ray.common.logging import logger class MegatronInitializer(Initializer): diff --git a/pretrain/plugin/plain_id_processer.py b/llm_on_ray/pretrain/plugin/plain_id_processer.py similarity index 94% rename from pretrain/plugin/plain_id_processer.py rename to llm_on_ray/pretrain/plugin/plain_id_processer.py index 20117cdcf..50faa5e15 100644 --- a/pretrain/plugin/plain_id_processer.py +++ b/llm_on_ray/pretrain/plugin/plain_id_processer.py @@ -1,7 +1,7 @@ import torch import transformers -from common.dataprocesser import DataProcesser +from llm_on_ray.common.dataprocesser import DataProcesser class PlainIDProcesser(DataProcesser): diff --git a/pretrain/plugin/pretrainer.py b/llm_on_ray/pretrain/plugin/pretrainer.py similarity index 99% rename from pretrain/plugin/pretrainer.py rename to llm_on_ray/pretrain/plugin/pretrainer.py index 1bde38f62..1e48232c7 100755 --- a/pretrain/plugin/pretrainer.py +++ b/llm_on_ray/pretrain/plugin/pretrainer.py @@ -12,9 +12,9 @@ from ray.train.torch import TorchCheckpoint from pathlib import Path -from common import dataprocesser -from common.trainer import Trainer -from common.logging import logger +from llm_on_ray.common import dataprocesser +from llm_on_ray.common.trainer import Trainer +from llm_on_ray.common.logging import logger class PreTrainer(Trainer): diff --git a/pretrain/pretrain.py b/llm_on_ray/pretrain/pretrain.py similarity index 94% rename from pretrain/pretrain.py rename to llm_on_ray/pretrain/pretrain.py index 3e045c19d..56680b74d 100644 --- a/pretrain/pretrain.py +++ b/llm_on_ray/pretrain/pretrain.py @@ -10,31 +10,28 @@ from ray.air.config import ScalingConfig from ray.air import RunConfig, FailureConfig -import sys - -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) -import common +from llm_on_ray import common from importlib import util use_habana = False loader = util.find_spec("habana_frameworks") if loader is not None: - from backend.habana_backend import TorchConfig + from llm_on_ray.pretrain.backend.habana_backend import TorchConfig use_habana = True else: from ray.train.torch import TorchConfig - from backend.deepspeed_backend import TorchConfig as DeepSpeedTorchConfig + from llm_on_ray.pretrain.backend.deepspeed_backend import TorchConfig as DeepSpeedTorchConfig def train_func(config: Dict[str, Any]): cwd = config.get("cwd") if cwd: os.chdir(cwd) - from common.common import import_all_module + from llm_on_ray.common import import_all_modules - import_all_module(f"{os.path.dirname(os.path.realpath(__file__))}/plugin", "plugin") + import_all_modules(f"{os.path.dirname(os.path.realpath(__file__))}/plugin", "plugin") common.init(config) # type: ignore initializer_config = config.get("initializer") if initializer_config: diff --git a/pretrain/pretrain_template.conf b/llm_on_ray/pretrain/pretrain_template.conf similarity index 100% rename from pretrain/pretrain_template.conf rename to llm_on_ray/pretrain/pretrain_template.conf diff --git a/pretrain/pretrain_template_megatron_dataset.conf b/llm_on_ray/pretrain/pretrain_template_megatron_dataset.conf similarity index 100% rename from pretrain/pretrain_template_megatron_dataset.conf rename to llm_on_ray/pretrain/pretrain_template_megatron_dataset.conf diff --git a/pretrain/requirements.optimum-habana.txt b/llm_on_ray/pretrain/requirements.optimum-habana.txt similarity index 100% rename from pretrain/requirements.optimum-habana.txt rename to llm_on_ray/pretrain/requirements.optimum-habana.txt diff --git a/pretrain/requirements.txt b/llm_on_ray/pretrain/requirements.txt similarity index 100% rename from pretrain/requirements.txt rename to llm_on_ray/pretrain/requirements.txt diff --git a/rlhf/__init__.py b/llm_on_ray/rlhf/__init__.py similarity index 100% rename from rlhf/__init__.py rename to llm_on_ray/rlhf/__init__.py diff --git a/rlhf/ppo.py b/llm_on_ray/rlhf/ppo.py similarity index 93% rename from rlhf/ppo.py rename to llm_on_ray/rlhf/ppo.py index cc9fab6ae..821e9b4c0 100644 --- a/rlhf/ppo.py +++ b/llm_on_ray/rlhf/ppo.py @@ -1,6 +1,5 @@ #!/usr/bin/env python -import os import ray from ray import air, tune @@ -8,16 +7,11 @@ from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec from ray.rllib.algorithms.ppo import PPOConfig -from rl_algo.ppo.ppo_rlhf import PPORLHF -from rl_algo.ppo.rlhf_ppo_module import RLHFPPOTorchRLModule -from rl_algo.ppo.rlhf_ppo_torch_learner import RLHFPPOTorchLearner - -import sys - -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) - -import common -from common.agentenv.rlhf_env import RLHFEnv +from llm_on_ray import common +from llm_on_ray.rlhf.rl_algo.ppo.ppo_rlhf import PPORLHF +from llm_on_ray.rlhf.rl_algo.ppo.rlhf_ppo_module import RLHFPPOTorchRLModule +from llm_on_ray.rlhf.rl_algo.ppo.rlhf_ppo_torch_learner import RLHFPPOTorchLearner +from llm_on_ray.common.agentenv.rlhf_env import RLHFEnv class ValueFunctionInitializerCallback(DefaultCallbacks): diff --git a/rlhf/ppo.yaml b/llm_on_ray/rlhf/ppo.yaml similarity index 100% rename from rlhf/ppo.yaml rename to llm_on_ray/rlhf/ppo.yaml diff --git a/rlhf/reward.py b/llm_on_ray/rlhf/reward.py similarity index 98% rename from rlhf/reward.py rename to llm_on_ray/rlhf/reward.py index 7045a6c44..a88e3cb3f 100644 --- a/rlhf/reward.py +++ b/llm_on_ray/rlhf/reward.py @@ -10,11 +10,7 @@ from ray.air.config import ScalingConfig from ray.air import RunConfig, FailureConfig -import sys - -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) - -import common +from llm_on_ray import common def train_func(config: Dict[str, Any]): diff --git a/rlhf/reward.yaml b/llm_on_ray/rlhf/reward.yaml similarity index 100% rename from rlhf/reward.yaml rename to llm_on_ray/rlhf/reward.yaml diff --git a/rlhf/rl_algo/ppo/ppo_rlhf.py b/llm_on_ray/rlhf/rl_algo/ppo/ppo_rlhf.py similarity index 96% rename from rlhf/rl_algo/ppo/ppo_rlhf.py rename to llm_on_ray/rlhf/rl_algo/ppo/ppo_rlhf.py index 55657a507..10e43cd90 100644 --- a/rlhf/rl_algo/ppo/ppo_rlhf.py +++ b/llm_on_ray/rlhf/rl_algo/ppo/ppo_rlhf.py @@ -12,13 +12,9 @@ ) from ray.rllib.evaluation.metrics import RolloutMetrics -import os -import sys -sys.path.append(os.path.join(os.path.dirname(__file__), "../../../")) - -from common.agentenv.rlhf_env import generate_response -from .rlhf_buffer import Buffer, BufferItem +from llm_on_ray.common.agentenv.rlhf_env import generate_response +from llm_on_ray.rlhf.rl_algo.ppo.rlhf_buffer import Buffer, BufferItem class RLHFSampler: diff --git a/rlhf/rl_algo/ppo/rlhf_buffer.py b/llm_on_ray/rlhf/rl_algo/ppo/rlhf_buffer.py similarity index 100% rename from rlhf/rl_algo/ppo/rlhf_buffer.py rename to llm_on_ray/rlhf/rl_algo/ppo/rlhf_buffer.py diff --git a/rlhf/rl_algo/ppo/rlhf_ppo_module.py b/llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_module.py similarity index 100% rename from rlhf/rl_algo/ppo/rlhf_ppo_module.py rename to llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_module.py diff --git a/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py b/llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py similarity index 98% rename from rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py rename to llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py index 733863703..7c841c1f9 100644 --- a/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py +++ b/llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py @@ -11,7 +11,7 @@ from ray.rllib.models.torch.torch_distributions import TorchCategorical -from .util import masked_mean +from llm_on_ray.rlhf.rl_algo.ppo.util import masked_mean torch, nn = try_import_torch() diff --git a/rlhf/rl_algo/ppo/util.py b/llm_on_ray/rlhf/rl_algo/ppo/util.py similarity index 100% rename from rlhf/rl_algo/ppo/util.py rename to llm_on_ray/rlhf/rl_algo/ppo/util.py diff --git a/ui/html_format.py b/llm_on_ray/ui/html_format.py similarity index 100% rename from ui/html_format.py rename to llm_on_ray/ui/html_format.py diff --git a/ui/images/Picture1.png b/llm_on_ray/ui/images/Picture1.png similarity index 100% rename from ui/images/Picture1.png rename to llm_on_ray/ui/images/Picture1.png diff --git a/ui/images/Picture2.png b/llm_on_ray/ui/images/Picture2.png similarity index 100% rename from ui/images/Picture2.png rename to llm_on_ray/ui/images/Picture2.png diff --git a/ui/images/logo.png b/llm_on_ray/ui/images/logo.png similarity index 100% rename from ui/images/logo.png rename to llm_on_ray/ui/images/logo.png diff --git a/ui/start_ui.py b/llm_on_ray/ui/start_ui.py similarity index 98% rename from ui/start_ui.py rename to llm_on_ray/ui/start_ui.py index 420a6fdcd..4b1f0363c 100644 --- a/ui/start_ui.py +++ b/llm_on_ray/ui/start_ui.py @@ -18,24 +18,26 @@ import time import os import sys - -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) -from inference.inference_config import all_models, ModelDescription, Prompt -from inference.inference_config import InferenceConfig as FinetunedConfig -from inference.chat_process import ChatModelGptJ, ChatModelLLama, ChatModelwithImage # noqa: F401 -from inference.predictor_deployment import PredictorDeployment -from ray import serve -import ray import gradio as gr import argparse +import paramiko +from multiprocessing import Process, Queue +from typing import Dict, List, Any +import ray +from ray import serve from ray.tune import Stopper from ray.train.base_trainer import TrainingFailedError from ray.tune.logger import LoggerCallback -from multiprocessing import Process, Queue from ray.util import queue -import paramiko -from html_format import cpu_memory_html, ray_status_html, custom_css -from typing import Dict, List, Any +from llm_on_ray.inference.inference_config import all_models, ModelDescription, Prompt +from llm_on_ray.inference.inference_config import InferenceConfig as FinetunedConfig +from llm_on_ray.inference.chat_process import ( + ChatModelGptJ, + ChatModelLLama, + ChatModelwithImage, +) +from llm_on_ray.inference.predictor_deployment import PredictorDeployment +from llm_on_ray.ui.html_format import cpu_memory_html, ray_status_html, custom_css from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from pyrecdp.LLM import TextPipeline @@ -617,7 +619,7 @@ def finetune( if max_train_step != 0: finetune_config["Training"]["max_train_steps"] = max_train_step - from finetune.finetune import main + from llm_on_ray.finetune.finetune import main finetune_config["total_epochs"] = queue.Queue( actor_options={"resources": {"queue_hardware": 1}} @@ -925,13 +927,16 @@ def _init_ui(self): title = "Manage LLM Lifecycle" with gr.Blocks(css=custom_css, title=title) as gr_chat: + logo_path = os.path.join(self.repo_code_path, "ui/images/logo.png") head_content = """
-
+

Manage LLM Lifecycle

Fine-Tune LLMs using workflow on Ray, Deploy and Inference

- """ + """.format( + logo_path=logo_path + ) foot_content = """