intel · KepingYan · Jan 19, 2024 · Jan 22, 2024 · Jan 22, 2024 · Jan 22, 2024
diff --git a/common/logging.py b/common/logging.py
@@ -11,9 +11,9 @@
 logging_config = {
     "version": 1,
     "loggers": {
-        "root": {"level": "DEBUG", "handlers": ["consoleHandler"]},
+        "root": {"level": "INFO", "handlers": ["consoleHandler"]},
         "common": {
-            "level": "DEBUG",
+            "level": "INFO",
             "handlers": ["consoleHandler"],
             "qualname": "common",
             "propagate": 0,
@@ -22,7 +22,7 @@
     "handlers": {
         "consoleHandler": {
             "class": "logging.StreamHandler",
-            "level": "DEBUG",
+            "level": "INFO",
             "formatter": "standardFormatter",
         },
     },

diff --git a/dev/scripts/install-ui.sh b/dev/scripts/install-ui.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+# install dependency
+pip install "gradio<=3.36.1" "langchain<=0.0.329" "langchain_community<=0.0.13" "paramiko<=3.4.0" "sentence-transformers" "faiss-cpu"
+
+# install pyrecdp from source
+pip install 'git+https://github.com/intel/e2eAIOK.git#egg=pyrecdp&subdirectory=RecDP'
diff --git a/docs/web_ui.md b/docs/web_ui.md
@@ -2,17 +2,21 @@
 
 LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models.
 
-## Setup
-Please follow [setup.md](setup.md) to setup the environment first.
+## Setup Base Environment
+Please follow [setup.md](setup.md) to setup the base environment first.
+
+## Setup UI Environment
+After activating the environment installed from the previous step, please run the following script to install environment for Web UI.
+```bash
+$ dev/scripts/install-ui.sh
+```
 
 ## Start Web UI
 
 ```bash
 python -u ui/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
-# Get urls from the log
-# Running on local URL:  http://0.0.0.0:8080
-# Running on public URL: https://180cd5f7c31a1cfd3c.gradio.live
 ```
+You will get URL from the command line output (E.g. http://0.0.0.0:8080 for local network and https://180cd5f7c31a1cfd3c.gradio.live for public network) and use the web browser to open it.
 
 ## Finetune LLMs
 On the `Finetune` tab, you can configure the base model, finetuning parameters, the dataset path and the new model name. Click `Start To Finetune` to start finetuning.

diff --git a/finetune/finetune.py b/finetune/finetune.py
@@ -24,7 +24,7 @@
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 import common
-from finetune.finetune_config import FinetuneConfig
+from finetune_config import FinetuneConfig
 
 
 def get_accelerate_environment_variable(mode: str, config: Union[Dict[str, Any], None]) -> dict:

diff --git a/inference/api_server_openai.py b/inference/api_server_openai.py
@@ -34,8 +34,8 @@
 
 import os
 from ray import serve
-from inference.api_openai_backend.query_client import RouterQueryClient
-from inference.api_openai_backend.router_app import Router, router_app
+from api_openai_backend.query_client import RouterQueryClient
+from api_openai_backend.router_app import Router, router_app
 
 
 def router_application(deployments):

diff --git a/inference/deepspeed_predictor.py b/inference/deepspeed_predictor.py
@@ -14,7 +14,7 @@
 import os
 from predictor import Predictor
 from utils import get_torch_dtype
-from inference.inference_config import (
+from inference_config import (
     InferenceConfig,
     DEVICE_CPU,
     DEVICE_XPU,

diff --git a/inference/models/template/export_inference_config_to_yaml.py b/inference/models/template/export_inference_config_to_yaml.py
@@ -1,6 +1,6 @@
 import yaml
 import os
-from inference.inference_config import InferenceConfig
+from inference_config import InferenceConfig
 
 ic = InferenceConfig()
 

diff --git a/inference/predictor.py b/inference/predictor.py
@@ -1,7 +1,7 @@
 import re
 import torch
 from transformers import AutoTokenizer, StoppingCriteriaList
-from inference.inference_config import InferenceConfig
+from inference_config import InferenceConfig
 from utils import StoppingCriteriaSub
 from typing import List, AsyncGenerator, Union
 

diff --git a/inference/predictor_deployment.py b/inference/predictor_deployment.py
@@ -23,10 +23,10 @@
 from queue import Empty
 import torch
 from transformers import TextIteratorStreamer
-from inference.inference_config import InferenceConfig
+from inference_config import InferenceConfig
 from typing import Union, Dict, Any
 from starlette.responses import StreamingResponse, JSONResponse
-from inference.api_openai_backend.openai_protocol import ModelResponse
+from api_openai_backend.openai_protocol import ModelResponse
 from utils import get_input_format
 
 

diff --git a/inference/serve.py b/inference/serve.py
@@ -21,7 +21,7 @@
 from api_server_simple import serve_run
 from api_server_openai import openai_serve_run
 from predictor_deployment import PredictorDeployment
-from inference.inference_config import ModelDescription, InferenceConfig, all_models
+from inference_config import ModelDescription, InferenceConfig, all_models
 
 
 def get_deployed_models(args):

diff --git a/inference/transformer_predictor.py b/inference/transformer_predictor.py
@@ -1,7 +1,7 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoConfig
 from transformers import TextIteratorStreamer
-from inference.inference_config import InferenceConfig, PRECISION_BF16
+from inference_config import InferenceConfig, PRECISION_BF16
 from predictor import Predictor
 from utils import get_torch_dtype
 

diff --git a/inference/utils.py b/inference/utils.py
@@ -16,7 +16,7 @@
 
 from transformers import StoppingCriteria
 import torch
-from inference.inference_config import InferenceConfig, DEVICE_CPU
+from inference_config import InferenceConfig, DEVICE_CPU
 from typing import Dict, Any, List, Union
 
 

diff --git a/inference/vllm_predictor.py b/inference/vllm_predictor.py
@@ -1,7 +1,7 @@
 import asyncio
 from typing import AsyncGenerator, List, Union
 from predictor import Predictor
-from inference.inference_config import InferenceConfig, PRECISION_BF16
+from inference_config import InferenceConfig, PRECISION_BF16
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.sampling_params import SamplingParams

diff --git a/ui/start_ui.py b/ui/start_ui.py
@@ -19,20 +19,24 @@
 import os
 import sys
 
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-from inference.inference_config import all_models, ModelDescription, Prompt
-from inference.inference_config import InferenceConfig as FinetunedConfig
-from inference.chat_process import ChatModelGptJ, ChatModelLLama  # noqa: F401
-from inference.predictor_deployment import PredictorDeployment
+ui_folder = os.path.dirname(__file__)
+sys.path.append(os.path.join(ui_folder, ".."))
+sys.path.append(os.path.join(ui_folder, "../inference"))
+
+from predictor_deployment import PredictorDeployment
+from chat_process import ChatModelGptJ, ChatModelLLama  # noqa: F401
+from inference_config import all_models, ModelDescription, Prompt
+from inference_config import InferenceConfig as FinetunedConfig
 from ray import serve
 import ray
 import gradio as gr
 import argparse
 from ray.tune import Stopper
 from ray.train.base_trainer import TrainingFailedError
 from ray.tune.logger import LoggerCallback
-from multiprocessing import Process, Queue
 from ray.util import queue
+from ray.job_config import JobConfig
+from multiprocessing import Process, Queue
 import paramiko
 from html_format import cpu_memory_html, ray_status_html, custom_css
 from typing import Dict, List, Any
@@ -109,6 +113,7 @@ def __init__(
         default_data_path: str,
         default_rag_path: str,
         config: dict,
+        ray_init_config: dict,
         head_node_ip: str,
         node_port: str,
         node_user_name: str,
@@ -122,6 +127,7 @@ def __init__(
         self.repo_code_path = repo_code_path
         self.default_data_path = default_data_path
         self.config = config
+        self.ray_init_config = ray_init_config
         self.head_node_ip = head_node_ip
         self.node_port = node_port
         self.user_name = node_user_name
@@ -455,7 +461,7 @@ def finetune(
             origin_model_path = model_desc.model_id_or_path
             tokenizer_path = model_desc.tokenizer_name_or_path
             gpt_base_model = model_desc.gpt_base_model
-        last_gpt_base_model = False
+
         finetuned_model_path = os.path.join(self.finetuned_model_path, model_name, new_model_name)
         finetuned_checkpoint_path = (
             os.path.join(self.finetuned_checkpoint_path, model_name, new_model_name)
@@ -464,46 +470,24 @@ def finetune(
         )
 
         finetune_config = self.config.copy()
-        training_config = finetune_config.get("Training")
-        exist_worker = int(training_config["num_training_workers"])
-        exist_cpus_per_worker_ftn = int(training_config["resources_per_worker"]["CPU"])
+        new_ray_init_config = self.ray_init_config.copy()
 
         ray_resources = ray.available_resources()
         if "CPU" not in ray_resources or cpus_per_worker_ftn * worker_num + 1 > int(
             ray.available_resources()["CPU"]
         ):
             raise gr.Error("Resources are not meeting the demand")
-        if (
-            worker_num != exist_worker
-            or cpus_per_worker_ftn != exist_cpus_per_worker_ftn
-            or not (gpt_base_model and last_gpt_base_model)
-        ):
-            ray.shutdown()
-            new_ray_init_config = {
-                "runtime_env": {
-                    "env_vars": {
-                        "OMP_NUM_THREADS": str(cpus_per_worker_ftn),
-                        "ACCELERATE_USE_CPU": "True",
-                        "ACCELERATE_MIXED_PRECISION": "no",
-                        "CCL_WORKER_COUNT": "1",
-                        "CCL_LOG_LEVEL": "info",
-                        "WORLD_SIZE": str(worker_num),
-                    }
-                },
-                "address": "auto",
-                "_node_ip_address": "127.0.0.1",
-            }
-            if gpt_base_model:
-                new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.26.0"]
-            else:
-                new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.31.0"]
-            last_gpt_base_model = gpt_base_model
-            finetune_config["Training"]["num_training_workers"] = int(worker_num)
-            finetune_config["Training"]["resources_per_worker"]["CPU"] = int(cpus_per_worker_ftn)
 
-            ray.init(**new_ray_init_config)
-            exist_worker = worker_num
-            exist_cpus_per_worker_ftn = cpus_per_worker_ftn
+        ray.shutdown()
+        if gpt_base_model:
+            new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.26.0"]
+        else:
+            new_ray_init_config["runtime_env"]["pip"] = ["transformers==4.31.0"]
+        new_ray_init_config["runtime_env"]["env_vars"]["WORLD_SIZE"] = str(worker_num)
+        finetune_config["Training"]["num_training_workers"] = int(worker_num)
+        finetune_config["Training"]["resources_per_worker"]["CPU"] = int(cpus_per_worker_ftn)
+
+        ray.init(**new_ray_init_config)
 
         finetune_config["Dataset"]["train_file"] = dataset
         finetune_config["General"]["base_model"] = origin_model_path
@@ -617,6 +601,16 @@ def finetune_progress(self, progress=gr.Progress()):
 
     def deploy_func(self, model_name: str, replica_num: int, cpus_per_worker_deploy: int):
         self.shutdown_deploy()
+        ray.shutdown()
+        new_ray_init_config = self.ray_init_config.copy()
+        new_ray_init_config["job_config"] = JobConfig(
+            code_search_path=[
+                os.path.join(ui_folder, "../finetune"),
+                os.path.join(ui_folder, "../inference"),
+            ]
+        )
+        ray.init(**new_ray_init_config)
+
         if cpus_per_worker_deploy * replica_num > int(ray.available_resources()["CPU"]):
             raise gr.Error("Resources are not meeting the demand")
 
@@ -834,10 +828,14 @@ def _init_ui(self):
                 gr.HTML("<h3 style='text-align: left; margin-bottom: 1rem'>" + step1 + "</h3>")
                 with gr.Group():
                     base_models_list = list(self._base_models.keys())
+                    # set the default value of finetuning to gpt2
+                    model_index = (
+                        base_models_list.index("gpt2") if "gpt2" in base_models_list else 0
+                    )
                     base_models_list.append("specify other models")
                     base_model_dropdown = gr.Dropdown(
                         base_models_list,
-                        value=base_models_list[2],
+                        value=base_models_list[model_index],
                         label="Select Base Model",
                         allow_custom_value=True,
                     )
@@ -934,9 +932,15 @@ def _init_ui(self):
                 with gr.Row():
                     with gr.Column(scale=0.8):
                         all_models_list = list(self._all_models.keys())
+                        # set the default value of deployment to llama-2-7b-chat-hf
+                        model_index = (
+                            all_models_list.index("llama-2-7b-chat-hf")
+                            if "llama-2-7b-chat-hf" in all_models_list
+                            else 0
+                        )
                         all_model_dropdown = gr.Dropdown(
                             all_models_list,
-                            value=all_models_list[3],
+                            value=all_models_list[model_index],
                             label="Select Model to Deploy",
                             elem_classes="disable_status",
                             allow_custom_value=True,
@@ -1563,8 +1567,6 @@ def _init_ui(self):
     default_data_path = os.path.abspath(
         infer_path + os.path.sep + "../examples/data/sample_finetune_data.jsonl"
     )
-
-    sys.path.append(repo_path)
     from finetune.finetune import get_accelerate_environment_variable
 
     finetune_config: Dict[str, Any] = {
@@ -1617,6 +1619,7 @@ def _init_ui(self):
         default_data_path,
         default_rag_path,
         finetune_config,
+        ray_init_config,
         head_node_ip,
         args.node_port,
         args.node_user_name,