From 0cbba3ea71304e093dc6124cbaf755da1c91e69d Mon Sep 17 00:00:00 2001 From: KepingYan Date: Fri, 19 Jan 2024 16:12:46 +0800 Subject: [PATCH 1/5] modify logging level, set ui packages --- common/logging.py | 6 +++--- pyproject.toml | 11 +++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/common/logging.py b/common/logging.py index f181ba915..c71620623 100644 --- a/common/logging.py +++ b/common/logging.py @@ -11,9 +11,9 @@ logging_config = { "version": 1, "loggers": { - "root": {"level": "DEBUG", "handlers": ["consoleHandler"]}, + "root": {"level": "INFO", "handlers": ["consoleHandler"]}, "common": { - "level": "DEBUG", + "level": "INFO", "handlers": ["consoleHandler"], "qualname": "common", "propagate": 0, @@ -22,7 +22,7 @@ "handlers": { "consoleHandler": { "class": "logging.StreamHandler", - "level": "DEBUG", + "level": "INFO", "formatter": "standardFormatter", }, }, diff --git a/pyproject.toml b/pyproject.toml index d339dc30f..16e26e76e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ dependencies = [ "fastapi<=0.108.0", "ray[tune]", "ray[serve]", - "gradio==4.11.0", "gymnasium", "dm-tree", "tensorboard", @@ -37,7 +36,6 @@ dependencies = [ "deltatuner==1.1.9", "py-cpuinfo", "pydantic-yaml", - "paramiko==3.4.0", ] [project.optional-dependencies] @@ -69,6 +67,15 @@ bigdl-cpu = [ "bigdl-llm[all]" ] +ui = [ + "gradio<=3.36.1", + "langchain<=0.0.329", + "langchain_community<=0.0.13", + "paramiko<=3.4.0", + "sentence-transformers", + "faiss-cpu", +] + [tool.setuptools] packages = ["finetune", "inference"] From 9005d7e5dab530fcd29ab3c5bdb0a97d5a65a6aa Mon Sep 17 00:00:00 2001 From: KepingYan Date: Mon, 22 Jan 2024 13:49:38 +0800 Subject: [PATCH 2/5] remove relative import --- finetune/finetune.py | 4 ++-- inference/api_server_openai.py | 4 ++-- inference/deepspeed_predictor.py | 2 +- .../models/template/export_inference_config_to_yaml.py | 2 +- inference/predictor.py | 2 +- inference/predictor_deployment.py | 4 ++-- inference/serve.py | 2 +- inference/transformer_predictor.py | 2 +- inference/utils.py | 2 +- inference/vllm_predictor.py | 2 +- ui/start_ui.py | 8 ++++---- 11 files changed, 17 insertions(+), 17 deletions(-) diff --git a/finetune/finetune.py b/finetune/finetune.py index 0815dabfe..8ed94ba54 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -22,9 +22,9 @@ import sys -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) import common -from finetune.finetune_config import FinetuneConfig +from finetune_config import FinetuneConfig def get_accelerate_environment_variable(mode: str, config: Union[Dict[str, Any], None]) -> dict: diff --git a/inference/api_server_openai.py b/inference/api_server_openai.py index 77831a9d2..6572059d1 100644 --- a/inference/api_server_openai.py +++ b/inference/api_server_openai.py @@ -34,8 +34,8 @@ import os from ray import serve -from inference.api_openai_backend.query_client import RouterQueryClient -from inference.api_openai_backend.router_app import Router, router_app +from api_openai_backend.query_client import RouterQueryClient +from api_openai_backend.router_app import Router, router_app def router_application(deployments): diff --git a/inference/deepspeed_predictor.py b/inference/deepspeed_predictor.py index 506137ca4..350a81417 100644 --- a/inference/deepspeed_predictor.py +++ b/inference/deepspeed_predictor.py @@ -14,7 +14,7 @@ import os from predictor import Predictor from utils import get_torch_dtype -from inference.inference_config import ( +from inference_config import ( InferenceConfig, DEVICE_CPU, DEVICE_XPU, diff --git a/inference/models/template/export_inference_config_to_yaml.py b/inference/models/template/export_inference_config_to_yaml.py index 62cfd4b75..0f2a5f9f7 100644 --- a/inference/models/template/export_inference_config_to_yaml.py +++ b/inference/models/template/export_inference_config_to_yaml.py @@ -1,6 +1,6 @@ import yaml import os -from inference.inference_config import InferenceConfig +from inference_config import InferenceConfig ic = InferenceConfig() diff --git a/inference/predictor.py b/inference/predictor.py index 4f7c9d3af..b6f126b1c 100644 --- a/inference/predictor.py +++ b/inference/predictor.py @@ -1,7 +1,7 @@ import re import torch from transformers import AutoTokenizer, StoppingCriteriaList -from inference.inference_config import InferenceConfig +from inference_config import InferenceConfig from utils import StoppingCriteriaSub from typing import List, AsyncGenerator, Union diff --git a/inference/predictor_deployment.py b/inference/predictor_deployment.py index 2828931d5..79913a639 100644 --- a/inference/predictor_deployment.py +++ b/inference/predictor_deployment.py @@ -23,10 +23,10 @@ from queue import Empty import torch from transformers import TextIteratorStreamer -from inference.inference_config import InferenceConfig +from inference_config import InferenceConfig from typing import Union, Dict, Any from starlette.responses import StreamingResponse -from inference.api_openai_backend.openai_protocol import ModelResponse +from api_openai_backend.openai_protocol import ModelResponse @serve.deployment diff --git a/inference/serve.py b/inference/serve.py index e73397a79..c8e52c4c6 100644 --- a/inference/serve.py +++ b/inference/serve.py @@ -21,7 +21,7 @@ from api_server_simple import serve_run from api_server_openai import openai_serve_run from predictor_deployment import PredictorDeployment -from inference.inference_config import ModelDescription, InferenceConfig, all_models +from inference_config import ModelDescription, InferenceConfig, all_models def get_deployed_models(args): diff --git a/inference/transformer_predictor.py b/inference/transformer_predictor.py index 70e90ebe6..97811d79a 100644 --- a/inference/transformer_predictor.py +++ b/inference/transformer_predictor.py @@ -1,7 +1,7 @@ import torch from transformers import AutoModelForCausalLM, AutoConfig from transformers import TextIteratorStreamer -from inference.inference_config import InferenceConfig, PRECISION_BF16 +from inference_config import InferenceConfig, PRECISION_BF16 from predictor import Predictor from utils import get_torch_dtype diff --git a/inference/utils.py b/inference/utils.py index c0bd3b14a..a9cf5f2b6 100644 --- a/inference/utils.py +++ b/inference/utils.py @@ -16,7 +16,7 @@ from transformers import StoppingCriteria import torch -from inference.inference_config import InferenceConfig, DEVICE_CPU +from inference_config import InferenceConfig, DEVICE_CPU from typing import Dict, Any diff --git a/inference/vllm_predictor.py b/inference/vllm_predictor.py index 6123b3906..d39105b12 100644 --- a/inference/vllm_predictor.py +++ b/inference/vllm_predictor.py @@ -1,7 +1,7 @@ import asyncio from typing import AsyncGenerator, List, Union from predictor import Predictor -from inference.inference_config import InferenceConfig, PRECISION_BF16 +from inference_config import InferenceConfig, PRECISION_BF16 from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.sampling_params import SamplingParams diff --git a/ui/start_ui.py b/ui/start_ui.py index d4c05e4a5..257f64667 100644 --- a/ui/start_ui.py +++ b/ui/start_ui.py @@ -20,10 +20,10 @@ import sys sys.path.append(os.path.join(os.path.dirname(__file__), "..")) -from inference.inference_config import all_models, ModelDescription, Prompt -from inference.inference_config import InferenceConfig as FinetunedConfig -from inference.chat_process import ChatModelGptJ, ChatModelLLama # noqa: F401 -from inference.predictor_deployment import PredictorDeployment +from inference_config import all_models, ModelDescription, Prompt +from inference_config import InferenceConfig as FinetunedConfig +from chat_process import ChatModelGptJ, ChatModelLLama # noqa: F401 +from predictor_deployment import PredictorDeployment from ray import serve import ray import gradio as gr From a684ce3528bb5771d06018097976587254b65d9a Mon Sep 17 00:00:00 2001 From: KepingYan Date: Mon, 22 Jan 2024 16:11:48 +0800 Subject: [PATCH 3/5] fix import error --- dev/scripts/install-ui.sh | 7 ++++ finetune/finetune.py | 2 +- pyproject.toml | 9 ----- ui/start_ui.py | 76 ++++++++++++++++++--------------------- 4 files changed, 42 insertions(+), 52 deletions(-) create mode 100644 dev/scripts/install-ui.sh diff --git a/dev/scripts/install-ui.sh b/dev/scripts/install-ui.sh new file mode 100644 index 000000000..67ae06a82 --- /dev/null +++ b/dev/scripts/install-ui.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +# install dependency +pip install "gradio<=3.36.1" "langchain<=0.0.329" "langchain_community<=0.0.13" "paramiko<=3.4.0" "sentence-transformers" "faiss-cpu" + +# install pyrecdp from source +pip install 'git+https://github.com/intel/e2eAIOK.git#egg=pyrecdp&subdirectory=RecDP' diff --git a/finetune/finetune.py b/finetune/finetune.py index 8ed94ba54..5b2c62a6d 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -22,7 +22,7 @@ import sys -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) import common from finetune_config import FinetuneConfig diff --git a/pyproject.toml b/pyproject.toml index 16e26e76e..3cfac277a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,15 +67,6 @@ bigdl-cpu = [ "bigdl-llm[all]" ] -ui = [ - "gradio<=3.36.1", - "langchain<=0.0.329", - "langchain_community<=0.0.13", - "paramiko<=3.4.0", - "sentence-transformers", - "faiss-cpu", -] - [tool.setuptools] packages = ["finetune", "inference"] diff --git a/ui/start_ui.py b/ui/start_ui.py index 257f64667..85b78dd34 100644 --- a/ui/start_ui.py +++ b/ui/start_ui.py @@ -19,11 +19,13 @@ import os import sys -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) +ui_folder = os.path.dirname(__file__) +sys.path.append(os.path.join(ui_folder, "..")) +sys.path.append(os.path.join(ui_folder, "../inference")) +from predictor_deployment import PredictorDeployment +from chat_process import ChatModelGptJ, ChatModelLLama # noqa: F401 from inference_config import all_models, ModelDescription, Prompt from inference_config import InferenceConfig as FinetunedConfig -from chat_process import ChatModelGptJ, ChatModelLLama # noqa: F401 -from predictor_deployment import PredictorDeployment from ray import serve import ray import gradio as gr @@ -31,8 +33,9 @@ from ray.tune import Stopper from ray.train.base_trainer import TrainingFailedError from ray.tune.logger import LoggerCallback -from multiprocessing import Process, Queue from ray.util import queue +from ray.job_config import JobConfig +from multiprocessing import Process, Queue import paramiko from html_format import cpu_memory_html, ray_status_html, custom_css from typing import Dict, List, Any @@ -109,6 +112,7 @@ def __init__( default_data_path: str, default_rag_path: str, config: dict, + ray_init_config: dict, head_node_ip: str, node_port: str, node_user_name: str, @@ -122,6 +126,7 @@ def __init__( self.repo_code_path = repo_code_path self.default_data_path = default_data_path self.config = config + self.ray_init_config = ray_init_config self.head_node_ip = head_node_ip self.node_port = node_port self.user_name = node_user_name @@ -455,7 +460,7 @@ def finetune( origin_model_path = model_desc.model_id_or_path tokenizer_path = model_desc.tokenizer_name_or_path gpt_base_model = model_desc.gpt_base_model - last_gpt_base_model = False + finetuned_model_path = os.path.join(self.finetuned_model_path, model_name, new_model_name) finetuned_checkpoint_path = ( os.path.join(self.finetuned_checkpoint_path, model_name, new_model_name) @@ -464,46 +469,24 @@ def finetune( ) finetune_config = self.config.copy() - training_config = finetune_config.get("Training") - exist_worker = int(training_config["num_training_workers"]) - exist_cpus_per_worker_ftn = int(training_config["resources_per_worker"]["CPU"]) + new_ray_init_config = self.ray_init_config.copy() ray_resources = ray.available_resources() if "CPU" not in ray_resources or cpus_per_worker_ftn * worker_num + 1 > int( ray.available_resources()["CPU"] ): raise gr.Error("Resources are not meeting the demand") - if ( - worker_num != exist_worker - or cpus_per_worker_ftn != exist_cpus_per_worker_ftn - or not (gpt_base_model and last_gpt_base_model) - ): - ray.shutdown() - new_ray_init_config = { - "runtime_env": { - "env_vars": { - "OMP_NUM_THREADS": str(cpus_per_worker_ftn), - "ACCELERATE_USE_CPU": "True", - "ACCELERATE_MIXED_PRECISION": "no", - "CCL_WORKER_COUNT": "1", - "CCL_LOG_LEVEL": "info", - "WORLD_SIZE": str(worker_num), - } - }, - "address": "auto", - "_node_ip_address": "127.0.0.1", - } - if gpt_base_model: - new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.26.0"] - else: - new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.31.0"] - last_gpt_base_model = gpt_base_model - finetune_config["Training"]["num_training_workers"] = int(worker_num) - finetune_config["Training"]["resources_per_worker"]["CPU"] = int(cpus_per_worker_ftn) - ray.init(**new_ray_init_config) - exist_worker = worker_num - exist_cpus_per_worker_ftn = cpus_per_worker_ftn + ray.shutdown() + if gpt_base_model: + new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.26.0"] + else: + new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.31.0"] + new_ray_init_config["runtime_env"]["env_vars"]["WORLD_SIZE"] = str(worker_num) + finetune_config["Training"]["num_training_workers"] = int(worker_num) + finetune_config["Training"]["resources_per_worker"]["CPU"] = int(cpus_per_worker_ftn) + + ray.init(**new_ray_init_config) finetune_config["Dataset"]["train_file"] = dataset finetune_config["General"]["base_model"] = origin_model_path @@ -617,6 +600,16 @@ def finetune_progress(self, progress=gr.Progress()): def deploy_func(self, model_name: str, replica_num: int, cpus_per_worker_deploy: int): self.shutdown_deploy() + ray.shutdown() + new_ray_init_config = self.ray_init_config.copy() + new_ray_init_config["job_config"] = JobConfig( + code_search_path=[ + os.path.join(ui_folder, "../finetune"), + os.path.join(ui_folder, "../inference"), + ] + ) + ray.init(**new_ray_init_config) + if cpus_per_worker_deploy * replica_num > int(ray.available_resources()["CPU"]): raise gr.Error("Resources are not meeting the demand") @@ -837,7 +830,7 @@ def _init_ui(self): base_models_list.append("specify other models") base_model_dropdown = gr.Dropdown( base_models_list, - value=base_models_list[2], + value=base_models_list[4], label="Select Base Model", allow_custom_value=True, ) @@ -936,7 +929,7 @@ def _init_ui(self): all_models_list = list(self._all_models.keys()) all_model_dropdown = gr.Dropdown( all_models_list, - value=all_models_list[3], + value=all_models_list[5], label="Select Model to Deploy", elem_classes="disable_status", allow_custom_value=True, @@ -1563,8 +1556,6 @@ def _init_ui(self): default_data_path = os.path.abspath( infer_path + os.path.sep + "../examples/data/sample_finetune_data.jsonl" ) - - sys.path.append(repo_path) from finetune.finetune import get_accelerate_environment_variable finetune_config: Dict[str, Any] = { @@ -1617,6 +1608,7 @@ def _init_ui(self): default_data_path, default_rag_path, finetune_config, + ray_init_config, head_node_ip, args.node_port, args.node_user_name, From 2b9a23fa4e36c01182f9a62796922e5386ce0226 Mon Sep 17 00:00:00 2001 From: KepingYan Date: Mon, 22 Jan 2024 16:26:30 +0800 Subject: [PATCH 4/5] update docs --- docs/web_ui.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/web_ui.md b/docs/web_ui.md index 3e247f06f..ae2e46a83 100644 --- a/docs/web_ui.md +++ b/docs/web_ui.md @@ -2,8 +2,14 @@ LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models. -## Setup -Please follow [setup.md](setup.md) to setup the environment first. +## Setup Base Environment +Please follow [setup.md](setup.md) to setup the base environment first. + +## Setup UI Environment +After activating the environment installed in previous step, please run the following script to install environment for ui. +```bash +$ dev/scripts/install-ui.sh +``` ## Start Web UI From d1824e72b12a5e9124e86c0c984af8dd544c43b2 Mon Sep 17 00:00:00 2001 From: KepingYan Date: Thu, 1 Feb 2024 12:50:46 +0800 Subject: [PATCH 5/5] modify --- docs/web_ui.md | 6 ++---- ui/start_ui.py | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/docs/web_ui.md b/docs/web_ui.md index ae2e46a83..b1a4e0e1b 100644 --- a/docs/web_ui.md +++ b/docs/web_ui.md @@ -6,7 +6,7 @@ LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLM Please follow [setup.md](setup.md) to setup the base environment first. ## Setup UI Environment -After activating the environment installed in previous step, please run the following script to install environment for ui. +After activating the environment installed from the previous step, please run the following script to install environment for Web UI. ```bash $ dev/scripts/install-ui.sh ``` @@ -15,10 +15,8 @@ $ dev/scripts/install-ui.sh ```bash python -u ui/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379" -# Get urls from the log -# Running on local URL: http://0.0.0.0:8080 -# Running on public URL: https://180cd5f7c31a1cfd3c.gradio.live ``` +You will get URL from the command line output (E.g. http://0.0.0.0:8080 for local network and https://180cd5f7c31a1cfd3c.gradio.live for public network) and use the web browser to open it. ## Finetune LLMs On the `Finetune` tab, you can configure the base model, finetuning parameters, the dataset path and the new model name. Click `Start To Finetune` to start finetuning. diff --git a/ui/start_ui.py b/ui/start_ui.py index 85b78dd34..6621cf77c 100644 --- a/ui/start_ui.py +++ b/ui/start_ui.py @@ -22,6 +22,7 @@ ui_folder = os.path.dirname(__file__) sys.path.append(os.path.join(ui_folder, "..")) sys.path.append(os.path.join(ui_folder, "../inference")) + from predictor_deployment import PredictorDeployment from chat_process import ChatModelGptJ, ChatModelLLama # noqa: F401 from inference_config import all_models, ModelDescription, Prompt @@ -827,10 +828,14 @@ def _init_ui(self): gr.HTML("

" + step1 + "

") with gr.Group(): base_models_list = list(self._base_models.keys()) + # set the default value of finetuning to gpt2 + model_index = ( + base_models_list.index("gpt2") if "gpt2" in base_models_list else 0 + ) base_models_list.append("specify other models") base_model_dropdown = gr.Dropdown( base_models_list, - value=base_models_list[4], + value=base_models_list[model_index], label="Select Base Model", allow_custom_value=True, ) @@ -927,9 +932,15 @@ def _init_ui(self): with gr.Row(): with gr.Column(scale=0.8): all_models_list = list(self._all_models.keys()) + # set the default value of deployment to llama-2-7b-chat-hf + model_index = ( + all_models_list.index("llama-2-7b-chat-hf") + if "llama-2-7b-chat-hf" in all_models_list + else 0 + ) all_model_dropdown = gr.Dropdown( all_models_list, - value=all_models_list[5], + value=all_models_list[model_index], label="Select Model to Deploy", elem_classes="disable_status", allow_custom_value=True,