Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions common/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
logging_config = {
"version": 1,
"loggers": {
"root": {"level": "DEBUG", "handlers": ["consoleHandler"]},
"root": {"level": "INFO", "handlers": ["consoleHandler"]},
"common": {
"level": "DEBUG",
"level": "INFO",
"handlers": ["consoleHandler"],
"qualname": "common",
"propagate": 0,
Expand All @@ -22,7 +22,7 @@
"handlers": {
"consoleHandler": {
"class": "logging.StreamHandler",
"level": "DEBUG",
"level": "INFO",
"formatter": "standardFormatter",
},
},
Expand Down
7 changes: 7 additions & 0 deletions dev/scripts/install-ui.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash

# install dependency
pip install "gradio<=3.36.1" "langchain<=0.0.329" "langchain_community<=0.0.13" "paramiko<=3.4.0" "sentence-transformers" "faiss-cpu"

# install pyrecdp from source
pip install 'git+https://github.com/intel/e2eAIOK.git#egg=pyrecdp&subdirectory=RecDP'
14 changes: 9 additions & 5 deletions docs/web_ui.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@

LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models.

## Setup
Please follow [setup.md](setup.md) to setup the environment first.
## Setup Base Environment
Please follow [setup.md](setup.md) to setup the base environment first.

## Setup UI Environment
After activating the environment installed from the previous step, please run the following script to install environment for Web UI.
```bash
$ dev/scripts/install-ui.sh
```

## Start Web UI

```bash
python -u ui/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
# Get urls from the log
# Running on local URL: http://0.0.0.0:8080
# Running on public URL: https://180cd5f7c31a1cfd3c.gradio.live
```
You will get URL from the command line output (E.g. http://0.0.0.0:8080 for local network and https://180cd5f7c31a1cfd3c.gradio.live for public network) and use the web browser to open it.

## Finetune LLMs
On the `Finetune` tab, you can configure the base model, finetuning parameters, the dataset path and the new model name. Click `Start To Finetune` to start finetuning.
Expand Down
2 changes: 1 addition & 1 deletion finetune/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import common
from finetune.finetune_config import FinetuneConfig
from finetune_config import FinetuneConfig


def get_accelerate_environment_variable(mode: str, config: Union[Dict[str, Any], None]) -> dict:
Expand Down
4 changes: 2 additions & 2 deletions inference/api_server_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@

import os
from ray import serve
from inference.api_openai_backend.query_client import RouterQueryClient
from inference.api_openai_backend.router_app import Router, router_app
from api_openai_backend.query_client import RouterQueryClient
from api_openai_backend.router_app import Router, router_app


def router_application(deployments):
Expand Down
2 changes: 1 addition & 1 deletion inference/deepspeed_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import os
from predictor import Predictor
from utils import get_torch_dtype
from inference.inference_config import (
from inference_config import (
InferenceConfig,
DEVICE_CPU,
DEVICE_XPU,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import yaml
import os
from inference.inference_config import InferenceConfig
from inference_config import InferenceConfig

ic = InferenceConfig()

Expand Down
2 changes: 1 addition & 1 deletion inference/predictor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
import torch
from transformers import AutoTokenizer, StoppingCriteriaList
from inference.inference_config import InferenceConfig
from inference_config import InferenceConfig
from utils import StoppingCriteriaSub
from typing import List, AsyncGenerator, Union

Expand Down
4 changes: 2 additions & 2 deletions inference/predictor_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
from queue import Empty
import torch
from transformers import TextIteratorStreamer
from inference.inference_config import InferenceConfig
from inference_config import InferenceConfig
from typing import Union, Dict, Any
from starlette.responses import StreamingResponse, JSONResponse
from inference.api_openai_backend.openai_protocol import ModelResponse
from api_openai_backend.openai_protocol import ModelResponse
from utils import get_input_format


Expand Down
2 changes: 1 addition & 1 deletion inference/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from api_server_simple import serve_run
from api_server_openai import openai_serve_run
from predictor_deployment import PredictorDeployment
from inference.inference_config import ModelDescription, InferenceConfig, all_models
from inference_config import ModelDescription, InferenceConfig, all_models


def get_deployed_models(args):
Expand Down
2 changes: 1 addition & 1 deletion inference/transformer_predictor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from transformers import AutoModelForCausalLM, AutoConfig
from transformers import TextIteratorStreamer
from inference.inference_config import InferenceConfig, PRECISION_BF16
from inference_config import InferenceConfig, PRECISION_BF16
from predictor import Predictor
from utils import get_torch_dtype

Expand Down
2 changes: 1 addition & 1 deletion inference/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from transformers import StoppingCriteria
import torch
from inference.inference_config import InferenceConfig, DEVICE_CPU
from inference_config import InferenceConfig, DEVICE_CPU
from typing import Dict, Any, List, Union


Expand Down
2 changes: 1 addition & 1 deletion inference/vllm_predictor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
from typing import AsyncGenerator, List, Union
from predictor import Predictor
from inference.inference_config import InferenceConfig, PRECISION_BF16
from inference_config import InferenceConfig, PRECISION_BF16
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.sampling_params import SamplingParams
Expand Down
91 changes: 47 additions & 44 deletions ui/start_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,24 @@
import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from inference.inference_config import all_models, ModelDescription, Prompt
from inference.inference_config import InferenceConfig as FinetunedConfig
from inference.chat_process import ChatModelGptJ, ChatModelLLama # noqa: F401
from inference.predictor_deployment import PredictorDeployment
ui_folder = os.path.dirname(__file__)
sys.path.append(os.path.join(ui_folder, ".."))
sys.path.append(os.path.join(ui_folder, "../inference"))

from predictor_deployment import PredictorDeployment
from chat_process import ChatModelGptJ, ChatModelLLama # noqa: F401
from inference_config import all_models, ModelDescription, Prompt
from inference_config import InferenceConfig as FinetunedConfig
from ray import serve
import ray
import gradio as gr
import argparse
from ray.tune import Stopper
from ray.train.base_trainer import TrainingFailedError
from ray.tune.logger import LoggerCallback
from multiprocessing import Process, Queue
from ray.util import queue
from ray.job_config import JobConfig
from multiprocessing import Process, Queue
import paramiko
from html_format import cpu_memory_html, ray_status_html, custom_css
from typing import Dict, List, Any
Expand Down Expand Up @@ -109,6 +113,7 @@ def __init__(
default_data_path: str,
default_rag_path: str,
config: dict,
ray_init_config: dict,
head_node_ip: str,
node_port: str,
node_user_name: str,
Expand All @@ -122,6 +127,7 @@ def __init__(
self.repo_code_path = repo_code_path
self.default_data_path = default_data_path
self.config = config
self.ray_init_config = ray_init_config
self.head_node_ip = head_node_ip
self.node_port = node_port
self.user_name = node_user_name
Expand Down Expand Up @@ -455,7 +461,7 @@ def finetune(
origin_model_path = model_desc.model_id_or_path
tokenizer_path = model_desc.tokenizer_name_or_path
gpt_base_model = model_desc.gpt_base_model
last_gpt_base_model = False

finetuned_model_path = os.path.join(self.finetuned_model_path, model_name, new_model_name)
finetuned_checkpoint_path = (
os.path.join(self.finetuned_checkpoint_path, model_name, new_model_name)
Expand All @@ -464,46 +470,24 @@ def finetune(
)

finetune_config = self.config.copy()
training_config = finetune_config.get("Training")
exist_worker = int(training_config["num_training_workers"])
exist_cpus_per_worker_ftn = int(training_config["resources_per_worker"]["CPU"])
new_ray_init_config = self.ray_init_config.copy()

ray_resources = ray.available_resources()
if "CPU" not in ray_resources or cpus_per_worker_ftn * worker_num + 1 > int(
ray.available_resources()["CPU"]
):
raise gr.Error("Resources are not meeting the demand")
if (
worker_num != exist_worker
or cpus_per_worker_ftn != exist_cpus_per_worker_ftn
or not (gpt_base_model and last_gpt_base_model)
):
ray.shutdown()
new_ray_init_config = {
"runtime_env": {
"env_vars": {
"OMP_NUM_THREADS": str(cpus_per_worker_ftn),
"ACCELERATE_USE_CPU": "True",
"ACCELERATE_MIXED_PRECISION": "no",
"CCL_WORKER_COUNT": "1",
"CCL_LOG_LEVEL": "info",
"WORLD_SIZE": str(worker_num),
}
},
"address": "auto",
"_node_ip_address": "127.0.0.1",
}
if gpt_base_model:
new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.26.0"]
else:
new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.31.0"]
last_gpt_base_model = gpt_base_model
finetune_config["Training"]["num_training_workers"] = int(worker_num)
finetune_config["Training"]["resources_per_worker"]["CPU"] = int(cpus_per_worker_ftn)

ray.init(**new_ray_init_config)
exist_worker = worker_num
exist_cpus_per_worker_ftn = cpus_per_worker_ftn
ray.shutdown()
if gpt_base_model:
new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.26.0"]
else:
new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.31.0"]
new_ray_init_config["runtime_env"]["env_vars"]["WORLD_SIZE"] = str(worker_num)
finetune_config["Training"]["num_training_workers"] = int(worker_num)
finetune_config["Training"]["resources_per_worker"]["CPU"] = int(cpus_per_worker_ftn)

ray.init(**new_ray_init_config)

finetune_config["Dataset"]["train_file"] = dataset
finetune_config["General"]["base_model"] = origin_model_path
Expand Down Expand Up @@ -617,6 +601,16 @@ def finetune_progress(self, progress=gr.Progress()):

def deploy_func(self, model_name: str, replica_num: int, cpus_per_worker_deploy: int):
self.shutdown_deploy()
ray.shutdown()
new_ray_init_config = self.ray_init_config.copy()
new_ray_init_config["job_config"] = JobConfig(
code_search_path=[
os.path.join(ui_folder, "../finetune"),
os.path.join(ui_folder, "../inference"),
]
)
ray.init(**new_ray_init_config)

if cpus_per_worker_deploy * replica_num > int(ray.available_resources()["CPU"]):
raise gr.Error("Resources are not meeting the demand")

Expand Down Expand Up @@ -834,10 +828,14 @@ def _init_ui(self):
gr.HTML("<h3 style='text-align: left; margin-bottom: 1rem'>" + step1 + "</h3>")
with gr.Group():
base_models_list = list(self._base_models.keys())
# set the default value of finetuning to gpt2
model_index = (
base_models_list.index("gpt2") if "gpt2" in base_models_list else 0
)
base_models_list.append("specify other models")
base_model_dropdown = gr.Dropdown(
base_models_list,
value=base_models_list[2],
value=base_models_list[model_index],
label="Select Base Model",
allow_custom_value=True,
)
Expand Down Expand Up @@ -934,9 +932,15 @@ def _init_ui(self):
with gr.Row():
with gr.Column(scale=0.8):
all_models_list = list(self._all_models.keys())
# set the default value of deployment to llama-2-7b-chat-hf
model_index = (
all_models_list.index("llama-2-7b-chat-hf")
if "llama-2-7b-chat-hf" in all_models_list
else 0
)
all_model_dropdown = gr.Dropdown(
all_models_list,
value=all_models_list[3],
value=all_models_list[model_index],
label="Select Model to Deploy",
elem_classes="disable_status",
allow_custom_value=True,
Expand Down Expand Up @@ -1563,8 +1567,6 @@ def _init_ui(self):
default_data_path = os.path.abspath(
infer_path + os.path.sep + "../examples/data/sample_finetune_data.jsonl"
)

sys.path.append(repo_path)
from finetune.finetune import get_accelerate_environment_variable

finetune_config: Dict[str, Any] = {
Expand Down Expand Up @@ -1617,6 +1619,7 @@ def _init_ui(self):
default_data_path,
default_rag_path,
finetune_config,
ray_init_config,
head_node_ip,
args.node_port,
args.node_user_name,
Expand Down