Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions common/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
logging_config = {
"version": 1,
"loggers": {
"root": {"level": "DEBUG", "handlers": ["consoleHandler"]},
"root": {"level": "INFO", "handlers": ["consoleHandler"]},
"common": {
"level": "DEBUG",
"level": "INFO",
"handlers": ["consoleHandler"],
"qualname": "common",
"propagate": 0,
Expand All @@ -22,7 +22,7 @@
"handlers": {
"consoleHandler": {
"class": "logging.StreamHandler",
"level": "DEBUG",
"level": "INFO",
"formatter": "standardFormatter",
},
},
Expand Down
7 changes: 7 additions & 0 deletions dev/scripts/install-ui.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash

# install dependency
pip install "gradio<=3.36.1" "langchain<=0.0.329" "langchain_community<=0.0.13" "paramiko<=3.4.0" "sentence-transformers" "faiss-cpu"

# install pyrecdp from source
pip install 'git+https://github.com/intel/e2eAIOK.git#egg=pyrecdp&subdirectory=RecDP'
12 changes: 8 additions & 4 deletions docs/web_ui.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models.

## Setup
Please follow [setup.md](setup.md) to setup the environment first.
Please follow [setup.md](setup.md) to setup the base environment first.

## Setup UI Environment
After activating the environment installed from the previous step, please run the following script to install environment for Web UI.
```bash
$ dev/scripts/install-ui.sh
```

## Start Web UI

```bash
python -u ui/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
# Get urls from the log
# Running on local URL: http://0.0.0.0:8080
# Running on public URL: https://180cd5f7c31a1cfd3c.gradio.live
```
You will get URL from the command line output (E.g. http://0.0.0.0:8080 for local network and https://180cd5f7c31a1cfd3c.gradio.live for public network) and use the web browser to open it.

## Finetune LLMs
On the `Finetune` tab, you can configure the base model, finetuning parameters, the dataset path and the new model name. Click `Start To Finetune` to start finetuning.
Expand Down
2 changes: 1 addition & 1 deletion inference/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch
from transformers import AutoTokenizer, StoppingCriteriaList
from inference.inference_config import InferenceConfig, GenerateResult
from utils import StoppingCriteriaSub
from inference.utils import StoppingCriteriaSub
from typing import List, AsyncGenerator, Union


Expand Down
2 changes: 1 addition & 1 deletion inference/predictor_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from starlette.responses import StreamingResponse, JSONResponse
from fastapi import HTTPException
from inference.api_openai_backend.openai_protocol import ModelResponse
from utils import get_prompt_format, PromptFormat
from inference.utils import get_prompt_format, PromptFormat


@serve.deployment
Expand Down
4 changes: 0 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,6 @@ bigdl-cpu = [
"bigdl-llm[all]"
]

ui = [
"gradio==4.11.0",
"paramiko==3.4.0",
]

[tool.setuptools]
# with MANIFEST.in, the configs below work in both baremetal and container
Expand Down
16 changes: 12 additions & 4 deletions ui/start_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,6 @@ def deploy_func(self, model_name: str, replica_num: int, cpus_per_worker_deploy:

print("Deploying model:" + model_name)

stop_words = ["### Instruction", "# Instruction", "### Question", "##", " ="]
finetuned = self._all_models[model_name]
model_desc = finetuned.model_description
prompt = model_desc.prompt
Expand All @@ -642,7 +641,6 @@ def deploy_func(self, model_name: str, replica_num: int, cpus_per_worker_deploy:
finetuned_deploy = finetuned.copy(deep=True)
finetuned_deploy.device = "cpu"
finetuned_deploy.ipex.precision = "bf16"
finetuned_deploy.model_description.prompt.stop_words = stop_words
finetuned_deploy.cpus_per_worker = cpus_per_worker_deploy
# transformers 4.35 is needed for neural-chat-7b-v3-1, will be fixed later
if "neural-chat" in model_name:
Expand Down Expand Up @@ -834,10 +832,14 @@ def _init_ui(self):
gr.HTML("<h3 style='text-align: left; margin-bottom: 1rem'>" + step1 + "</h3>")
with gr.Group():
base_models_list = list(self._base_models.keys())
# set the default value of finetuning to gpt2
model_index = (
base_models_list.index("gpt2") if "gpt2" in base_models_list else 0
)
base_models_list.append("specify other models")
base_model_dropdown = gr.Dropdown(
base_models_list,
value=base_models_list[2],
value=base_models_list[model_index],
label="Select Base Model",
allow_custom_value=True,
)
Expand Down Expand Up @@ -934,9 +936,15 @@ def _init_ui(self):
with gr.Row():
with gr.Column(scale=0.8):
all_models_list = list(self._all_models.keys())
# set the default value of deployment to llama-2-7b-chat-hf
model_index = (
all_models_list.index("llama-2-7b-chat-hf")
if "llama-2-7b-chat-hf" in all_models_list
else 0
)
all_model_dropdown = gr.Dropdown(
all_models_list,
value=all_models_list[3],
value=all_models_list[model_index],
label="Select Model to Deploy",
elem_classes="disable_status",
allow_custom_value=True,
Expand Down