intel · KepingYan · Feb 6, 2024 · Feb 5, 2024 · Feb 5, 2024 · Feb 6, 2024
diff --git a/common/logging.py b/common/logging.py
@@ -11,9 +11,9 @@
 logging_config = {
     "version": 1,
     "loggers": {
-        "root": {"level": "DEBUG", "handlers": ["consoleHandler"]},
+        "root": {"level": "INFO", "handlers": ["consoleHandler"]},
         "common": {
-            "level": "DEBUG",
+            "level": "INFO",
             "handlers": ["consoleHandler"],
             "qualname": "common",
             "propagate": 0,
@@ -22,7 +22,7 @@
     "handlers": {
         "consoleHandler": {
             "class": "logging.StreamHandler",
-            "level": "DEBUG",
+            "level": "INFO",
             "formatter": "standardFormatter",
         },
     },

diff --git a/dev/scripts/install-ui.sh b/dev/scripts/install-ui.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# install dependency
+pip install "gradio<=3.36.1" "langchain<=0.0.329" "langchain_community<=0.0.13" "paramiko<=3.4.0" "sentence-transformers" "faiss-cpu"
+
+# install pyrecdp from source
+pip install 'git+https://github.com/intel/e2eAIOK.git#egg=pyrecdp&subdirectory=RecDP'
diff --git a/docs/web_ui.md b/docs/web_ui.md
@@ -3,16 +3,20 @@
 LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models.
 
 ## Setup
-Please follow [setup.md](setup.md) to setup the environment first.
+Please follow [setup.md](setup.md) to setup the base environment first.
+
+## Setup UI Environment
+After activating the environment installed from the previous step, please run the following script to install environment for Web UI.
+```bash
+$ dev/scripts/install-ui.sh
+```
 
 ## Start Web UI
 
 ```bash
 python -u ui/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
-# Get urls from the log
-# Running on local URL:  http://0.0.0.0:8080
-# Running on public URL: https://180cd5f7c31a1cfd3c.gradio.live
 ```
+You will get URL from the command line output (E.g. http://0.0.0.0:8080 for local network and https://180cd5f7c31a1cfd3c.gradio.live for public network) and use the web browser to open it.
 
 ## Finetune LLMs
 On the `Finetune` tab, you can configure the base model, finetuning parameters, the dataset path and the new model name. Click `Start To Finetune` to start finetuning.

diff --git a/inference/predictor.py b/inference/predictor.py
@@ -2,7 +2,7 @@
 import torch
 from transformers import AutoTokenizer, StoppingCriteriaList
 from inference.inference_config import InferenceConfig, GenerateResult
-from utils import StoppingCriteriaSub
+from inference.utils import StoppingCriteriaSub
 from typing import List, AsyncGenerator, Union
 
 

diff --git a/inference/predictor_deployment.py b/inference/predictor_deployment.py
@@ -28,7 +28,7 @@
 from starlette.responses import StreamingResponse, JSONResponse
 from fastapi import HTTPException
 from inference.api_openai_backend.openai_protocol import ModelResponse
-from utils import get_prompt_format, PromptFormat
+from inference.utils import get_prompt_format, PromptFormat
 
 
 @serve.deployment

diff --git a/pyproject.toml b/pyproject.toml
@@ -67,10 +67,6 @@ bigdl-cpu = [
     "bigdl-llm[all]"
 ]
 
-ui = [
-    "gradio==4.11.0",
-    "paramiko==3.4.0",
-]
 
 [tool.setuptools]
 # with MANIFEST.in, the configs below work in both baremetal and container

diff --git a/ui/start_ui.py b/ui/start_ui.py
@@ -622,7 +622,6 @@ def deploy_func(self, model_name: str, replica_num: int, cpus_per_worker_deploy:
 
         print("Deploying model:" + model_name)
 
-        stop_words = ["### Instruction", "# Instruction", "### Question", "##", " ="]
         finetuned = self._all_models[model_name]
         model_desc = finetuned.model_description
         prompt = model_desc.prompt
@@ -642,7 +641,6 @@ def deploy_func(self, model_name: str, replica_num: int, cpus_per_worker_deploy:
         finetuned_deploy = finetuned.copy(deep=True)
         finetuned_deploy.device = "cpu"
         finetuned_deploy.ipex.precision = "bf16"
-        finetuned_deploy.model_description.prompt.stop_words = stop_words
         finetuned_deploy.cpus_per_worker = cpus_per_worker_deploy
         # transformers 4.35 is needed for neural-chat-7b-v3-1, will be fixed later
         if "neural-chat" in model_name:
@@ -834,10 +832,14 @@ def _init_ui(self):
                 gr.HTML("<h3 style='text-align: left; margin-bottom: 1rem'>" + step1 + "</h3>")
                 with gr.Group():
                     base_models_list = list(self._base_models.keys())
+                    # set the default value of finetuning to gpt2
+                    model_index = (
+                        base_models_list.index("gpt2") if "gpt2" in base_models_list else 0
+                    )
                     base_models_list.append("specify other models")
                     base_model_dropdown = gr.Dropdown(
                         base_models_list,
-                        value=base_models_list[2],
+                        value=base_models_list[model_index],
                         label="Select Base Model",
                         allow_custom_value=True,
                     )
@@ -934,9 +936,15 @@ def _init_ui(self):
                 with gr.Row():
                     with gr.Column(scale=0.8):
                         all_models_list = list(self._all_models.keys())
+                        # set the default value of deployment to llama-2-7b-chat-hf
+                        model_index = (
+                            all_models_list.index("llama-2-7b-chat-hf")
+                            if "llama-2-7b-chat-hf" in all_models_list
+                            else 0
+                        )
                         all_model_dropdown = gr.Dropdown(
                             all_models_list,
-                            value=all_models_list[3],
+                            value=all_models_list[model_index],
                             label="Select Model to Deploy",
                             elem_classes="disable_status",
                             allow_custom_value=True,