From 3de60163210ce347e1c9a6a38cd826af0a5ee315 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Thu, 8 Feb 2024 18:12:24 +0800
Subject: [PATCH 01/29] mv path

---
 {common => llmonray/common}/__init__.py             |   0
 {common => llmonray/common}/agentenv/__init__.py    |   0
 {common => llmonray/common}/agentenv/agentenv.py    |   0
 {common => llmonray/common}/agentenv/rlhf_env.py    |   0
 {common => llmonray/common}/common.py               |   0
 {common => llmonray/common}/config.py               |   0
 .../common}/dataprocesser/__init__.py               |   0
 .../common}/dataprocesser/dataprocesser.py          |   0
 .../common}/dataprocesser/general_processer.py      |   0
 .../common}/dataprocesser/rm_dataprocesser.py       |   0
 {common => llmonray/common}/dataset/__init__.py     |   0
 {common => llmonray/common}/dataset/dataset.py      |   0
 .../common}/dataset/huggingface_dataset.py          |   0
 {common => llmonray/common}/init.py                 |   0
 {common => llmonray/common}/initializer/__init__.py |   0
 .../common}/initializer/initializer.py              |   0
 {common => llmonray/common}/load.py                 |   0
 {common => llmonray/common}/logging.py              |   0
 {common => llmonray/common}/model/__init__.py       |   0
 .../model/huggingface_model_for_causal_lm.py        |   0
 {common => llmonray/common}/model/model.py          |   0
 {common => llmonray/common}/model/reward_model.py   |   0
 {common => llmonray/common}/optimizer/__init__.py   |   0
 .../common}/optimizer/default_optimizer.py          |   0
 .../common}/optimizer/group_optimizer.py            |   0
 {common => llmonray/common}/optimizer/optimizer.py  |   0
 {common => llmonray/common}/tokenizer/__init__.py   |   0
 .../common}/tokenizer/empty_tokenizer.py            |   0
 .../common}/tokenizer/huggingface_tokenizer.py      |   0
 {common => llmonray/common}/tokenizer/tokenizer.py  |   0
 {common => llmonray/common}/torch_config.py         |   0
 {common => llmonray/common}/trainer/__init__.py     |   0
 .../common}/trainer/default_trainer.py              |   0
 {common => llmonray/common}/trainer/rm_trainer.py   |   0
 {common => llmonray/common}/trainer/trainer.py      |   0
 {finetune => llmonray/finetune}/__init__.py         |   0
 {finetune => llmonray/finetune}/finetune.py         |   0
 {finetune => llmonray/finetune}/finetune.yaml       |   0
 {finetune => llmonray/finetune}/finetune_config.py  |   0
 .../finetune}/models/bloom-560m.yaml                |   0
 .../finetune}/models/finetune_config_template.yaml  |   0
 .../finetune}/models/gpt-j-6b.yaml                  |   0
 {finetune => llmonray/finetune}/models/gpt2.yaml    |   0
 .../finetune}/models/llama-2-7b-chat-hf.yaml        |   0
 .../finetune}/models/llama-7b.yaml                  |   0
 .../finetune}/models/mistral-7b-v0.1.yaml           |   0
 .../finetune}/models/mpt-7b-chat.yaml               |   0
 .../finetune}/models/opt-125m.yaml                  |   0
 {inference => llmonray/inference}/__init__.py       |   0
 .../api_openai_backend/openai_protocol.py           |   0
 .../inference}/api_openai_backend/query_client.py   |   0
 .../api_openai_backend/request_handler.py           |   0
 .../inference}/api_openai_backend/router_app.py     |   0
 .../inference}/api_server_openai.py                 |   0
 .../inference}/api_server_simple.py                 |   0
 {inference => llmonray/inference}/chat_process.py   |   0
 .../inference}/deepspeed_predictor.py               |   0
 .../inference}/inference_config.py                  |   0
 {inference => llmonray/inference}/logger.py         |   0
 .../inference}/models/CodeLlama-7b-hf.yaml          |   0
 .../models/bigdl/mistral-7b-v0.1-bigdl.yaml         |   0
 .../inference}/models/bigdl/mpt-7b-bigdl.yaml       |   0
 .../inference}/models/bloom-560m.yaml               |   0
 .../inference}/models/falcon-7b.yaml                |   0
 .../inference}/models/gpt-j-6b.yaml                 |   0
 {inference => llmonray/inference}/models/gpt2.yaml  |   0
 .../inference}/models/llama-2-7b-chat-hf.yaml       |   0
 .../inference}/models/mistral-7b-v0.1.yaml          |   0
 .../inference}/models/mpt-7b.yaml                   |   0
 .../inference}/models/neural-chat-7b-v3-1.yaml      |   0
 .../inference}/models/opt-125m.yaml                 |   0
 .../inference}/models/starcoder.yaml                |   0
 .../template/export_inference_config_to_yaml.py     |   0
 .../models/template/inference_config_template.yaml  |   0
 .../models/vllm/llama-2-7b-chat-hf-vllm.yaml        |   0
 {inference => llmonray/inference}/predictor.py      |   0
 .../inference}/predictor_deployment.py              |   0
 {inference => llmonray/inference}/serve.py          |   0
 .../inference}/transformer_predictor.py             |   0
 {inference => llmonray/inference}/utils.py          |   0
 {inference => llmonray/inference}/vllm_predictor.py |   0
 {pretrain => llmonray/pretrain}/__init__.py         |   0
 .../pretrain}/backend/deepspeed_backend.py          |   0
 .../pretrain}/backend/habana_backend.py             |   0
 .../pretrain}/config/bloom1b7_8gpus_pretrain.conf   |   0
 ...m_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf |   0
 ...a2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf |   0
 ...a2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf |   0
 .../pretrain}/config/llama_7b_8Guadi_pretrain.conf  |   0
 .../pretrain}/config/llama_7b_8gpu_pretrain.conf    |   0
 ...a_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf |   0
 .../pretrain}/docker/Dockerfile.megatron.habana     |   0
 .../pretrain}/docker/Dockerfile.nvidia              |   0
 .../pretrain}/docker/Dockerfile.optimum.habana      |   0
 .../pretrain}/docker/build-image.sh                 |   0
 .../pretrain}/megatron_deepspeed_pretrain.py        |   0
 ...y-to-include-the-megatron.model.vision-int.patch |   0
 .../gpu/0001-Change-the-sample-s-column-name.patch  |   0
 ...fix-for-megatron-deepspeed-for-gpu-version.patch |   0
 ...1-Init-megatron-deepspeed-with-Ray-cluster.patch |   0
 .../hpu/0002-Add-the-Huggingface-tokenizer.patch    |   0
 .../pretrain}/plugin/group_dataset.py               |   0
 .../pretrain}/plugin/hf_pretrainer.py               |   0
 .../plugin/huggingface_model_from_config.py         |   0
 .../pretrain}/plugin/megatron_dataset.py            |   0
 .../pretrain}/plugin/megatron_pretrainer.py         |   0
 .../pretrain}/plugin/megatron_processer.py          |   0
 .../pretrain}/plugin/megtron_initializer.py         |   0
 .../pretrain}/plugin/plain_id_processer.py          |   0
 .../pretrain}/plugin/pretrainer.py                  |   0
 {pretrain => llmonray/pretrain}/pretrain.py         |   0
 .../pretrain}/pretrain_template.conf                |   0
 .../pretrain_template_megatron_dataset.conf         |   0
 .../pretrain}/requirements.optimum-habana.txt       |   0
 {pretrain => llmonray/pretrain}/requirements.txt    |   0
 {rlhf => llmonray/rlhf}/__init__.py                 |   0
 {rlhf => llmonray/rlhf}/ppo.py                      |   0
 {rlhf => llmonray/rlhf}/ppo.yaml                    |   0
 {rlhf => llmonray/rlhf}/reward.py                   |   0
 {rlhf => llmonray/rlhf}/reward.yaml                 |   0
 {rlhf => llmonray/rlhf}/rl_algo/ppo/ppo_rlhf.py     |   0
 {rlhf => llmonray/rlhf}/rl_algo/ppo/rlhf_buffer.py  |   0
 .../rlhf}/rl_algo/ppo/rlhf_ppo_module.py            |   0
 .../rlhf}/rl_algo/ppo/rlhf_ppo_torch_learner.py     |   0
 {rlhf => llmonray/rlhf}/rl_algo/ppo/util.py         |   0
 {ui => llmonray/ui}/html_format.py                  |   0
 {ui => llmonray/ui}/images/Picture1.png             | Bin
 {ui => llmonray/ui}/images/Picture2.png             | Bin
 {ui => llmonray/ui}/images/logo.png                 | Bin
 {ui => llmonray/ui}/start_ui.py                     |   0
 130 files changed, 0 insertions(+), 0 deletions(-)
 rename {common => llmonray/common}/__init__.py (100%)
 rename {common => llmonray/common}/agentenv/__init__.py (100%)
 rename {common => llmonray/common}/agentenv/agentenv.py (100%)
 rename {common => llmonray/common}/agentenv/rlhf_env.py (100%)
 rename {common => llmonray/common}/common.py (100%)
 rename {common => llmonray/common}/config.py (100%)
 rename {common => llmonray/common}/dataprocesser/__init__.py (100%)
 rename {common => llmonray/common}/dataprocesser/dataprocesser.py (100%)
 rename {common => llmonray/common}/dataprocesser/general_processer.py (100%)
 rename {common => llmonray/common}/dataprocesser/rm_dataprocesser.py (100%)
 rename {common => llmonray/common}/dataset/__init__.py (100%)
 rename {common => llmonray/common}/dataset/dataset.py (100%)
 rename {common => llmonray/common}/dataset/huggingface_dataset.py (100%)
 rename {common => llmonray/common}/init.py (100%)
 rename {common => llmonray/common}/initializer/__init__.py (100%)
 rename {common => llmonray/common}/initializer/initializer.py (100%)
 rename {common => llmonray/common}/load.py (100%)
 rename {common => llmonray/common}/logging.py (100%)
 rename {common => llmonray/common}/model/__init__.py (100%)
 rename {common => llmonray/common}/model/huggingface_model_for_causal_lm.py (100%)
 rename {common => llmonray/common}/model/model.py (100%)
 rename {common => llmonray/common}/model/reward_model.py (100%)
 rename {common => llmonray/common}/optimizer/__init__.py (100%)
 rename {common => llmonray/common}/optimizer/default_optimizer.py (100%)
 rename {common => llmonray/common}/optimizer/group_optimizer.py (100%)
 rename {common => llmonray/common}/optimizer/optimizer.py (100%)
 rename {common => llmonray/common}/tokenizer/__init__.py (100%)
 rename {common => llmonray/common}/tokenizer/empty_tokenizer.py (100%)
 rename {common => llmonray/common}/tokenizer/huggingface_tokenizer.py (100%)
 rename {common => llmonray/common}/tokenizer/tokenizer.py (100%)
 rename {common => llmonray/common}/torch_config.py (100%)
 rename {common => llmonray/common}/trainer/__init__.py (100%)
 rename {common => llmonray/common}/trainer/default_trainer.py (100%)
 rename {common => llmonray/common}/trainer/rm_trainer.py (100%)
 rename {common => llmonray/common}/trainer/trainer.py (100%)
 rename {finetune => llmonray/finetune}/__init__.py (100%)
 rename {finetune => llmonray/finetune}/finetune.py (100%)
 rename {finetune => llmonray/finetune}/finetune.yaml (100%)
 rename {finetune => llmonray/finetune}/finetune_config.py (100%)
 rename {finetune => llmonray/finetune}/models/bloom-560m.yaml (100%)
 rename {finetune => llmonray/finetune}/models/finetune_config_template.yaml (100%)
 rename {finetune => llmonray/finetune}/models/gpt-j-6b.yaml (100%)
 rename {finetune => llmonray/finetune}/models/gpt2.yaml (100%)
 rename {finetune => llmonray/finetune}/models/llama-2-7b-chat-hf.yaml (100%)
 rename {finetune => llmonray/finetune}/models/llama-7b.yaml (100%)
 rename {finetune => llmonray/finetune}/models/mistral-7b-v0.1.yaml (100%)
 rename {finetune => llmonray/finetune}/models/mpt-7b-chat.yaml (100%)
 rename {finetune => llmonray/finetune}/models/opt-125m.yaml (100%)
 rename {inference => llmonray/inference}/__init__.py (100%)
 rename {inference => llmonray/inference}/api_openai_backend/openai_protocol.py (100%)
 rename {inference => llmonray/inference}/api_openai_backend/query_client.py (100%)
 rename {inference => llmonray/inference}/api_openai_backend/request_handler.py (100%)
 rename {inference => llmonray/inference}/api_openai_backend/router_app.py (100%)
 rename {inference => llmonray/inference}/api_server_openai.py (100%)
 rename {inference => llmonray/inference}/api_server_simple.py (100%)
 rename {inference => llmonray/inference}/chat_process.py (100%)
 rename {inference => llmonray/inference}/deepspeed_predictor.py (100%)
 rename {inference => llmonray/inference}/inference_config.py (100%)
 rename {inference => llmonray/inference}/logger.py (100%)
 rename {inference => llmonray/inference}/models/CodeLlama-7b-hf.yaml (100%)
 rename {inference => llmonray/inference}/models/bigdl/mistral-7b-v0.1-bigdl.yaml (100%)
 rename {inference => llmonray/inference}/models/bigdl/mpt-7b-bigdl.yaml (100%)
 rename {inference => llmonray/inference}/models/bloom-560m.yaml (100%)
 rename {inference => llmonray/inference}/models/falcon-7b.yaml (100%)
 rename {inference => llmonray/inference}/models/gpt-j-6b.yaml (100%)
 rename {inference => llmonray/inference}/models/gpt2.yaml (100%)
 rename {inference => llmonray/inference}/models/llama-2-7b-chat-hf.yaml (100%)
 rename {inference => llmonray/inference}/models/mistral-7b-v0.1.yaml (100%)
 rename {inference => llmonray/inference}/models/mpt-7b.yaml (100%)
 rename {inference => llmonray/inference}/models/neural-chat-7b-v3-1.yaml (100%)
 rename {inference => llmonray/inference}/models/opt-125m.yaml (100%)
 rename {inference => llmonray/inference}/models/starcoder.yaml (100%)
 rename {inference => llmonray/inference}/models/template/export_inference_config_to_yaml.py (100%)
 rename {inference => llmonray/inference}/models/template/inference_config_template.yaml (100%)
 rename {inference => llmonray/inference}/models/vllm/llama-2-7b-chat-hf-vllm.yaml (100%)
 rename {inference => llmonray/inference}/predictor.py (100%)
 rename {inference => llmonray/inference}/predictor_deployment.py (100%)
 rename {inference => llmonray/inference}/serve.py (100%)
 rename {inference => llmonray/inference}/transformer_predictor.py (100%)
 rename {inference => llmonray/inference}/utils.py (100%)
 rename {inference => llmonray/inference}/vllm_predictor.py (100%)
 rename {pretrain => llmonray/pretrain}/__init__.py (100%)
 rename {pretrain => llmonray/pretrain}/backend/deepspeed_backend.py (100%)
 rename {pretrain => llmonray/pretrain}/backend/habana_backend.py (100%)
 rename {pretrain => llmonray/pretrain}/config/bloom1b7_8gpus_pretrain.conf (100%)
 rename {pretrain => llmonray/pretrain}/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf (100%)
 rename {pretrain => llmonray/pretrain}/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf (100%)
 rename {pretrain => llmonray/pretrain}/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf (100%)
 rename {pretrain => llmonray/pretrain}/config/llama_7b_8Guadi_pretrain.conf (100%)
 rename {pretrain => llmonray/pretrain}/config/llama_7b_8gpu_pretrain.conf (100%)
 rename {pretrain => llmonray/pretrain}/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf (100%)
 rename {pretrain => llmonray/pretrain}/docker/Dockerfile.megatron.habana (100%)
 rename {pretrain => llmonray/pretrain}/docker/Dockerfile.nvidia (100%)
 rename {pretrain => llmonray/pretrain}/docker/Dockerfile.optimum.habana (100%)
 rename {pretrain => llmonray/pretrain}/docker/build-image.sh (100%)
 rename {pretrain => llmonray/pretrain}/megatron_deepspeed_pretrain.py (100%)
 rename {pretrain => llmonray/pretrain}/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch (100%)
 rename {pretrain => llmonray/pretrain}/patch/gpu/0001-Change-the-sample-s-column-name.patch (100%)
 rename {pretrain => llmonray/pretrain}/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch (100%)
 rename {pretrain => llmonray/pretrain}/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch (100%)
 rename {pretrain => llmonray/pretrain}/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch (100%)
 rename {pretrain => llmonray/pretrain}/plugin/group_dataset.py (100%)
 rename {pretrain => llmonray/pretrain}/plugin/hf_pretrainer.py (100%)
 rename {pretrain => llmonray/pretrain}/plugin/huggingface_model_from_config.py (100%)
 rename {pretrain => llmonray/pretrain}/plugin/megatron_dataset.py (100%)
 rename {pretrain => llmonray/pretrain}/plugin/megatron_pretrainer.py (100%)
 rename {pretrain => llmonray/pretrain}/plugin/megatron_processer.py (100%)
 rename {pretrain => llmonray/pretrain}/plugin/megtron_initializer.py (100%)
 rename {pretrain => llmonray/pretrain}/plugin/plain_id_processer.py (100%)
 rename {pretrain => llmonray/pretrain}/plugin/pretrainer.py (100%)
 rename {pretrain => llmonray/pretrain}/pretrain.py (100%)
 rename {pretrain => llmonray/pretrain}/pretrain_template.conf (100%)
 rename {pretrain => llmonray/pretrain}/pretrain_template_megatron_dataset.conf (100%)
 rename {pretrain => llmonray/pretrain}/requirements.optimum-habana.txt (100%)
 rename {pretrain => llmonray/pretrain}/requirements.txt (100%)
 rename {rlhf => llmonray/rlhf}/__init__.py (100%)
 rename {rlhf => llmonray/rlhf}/ppo.py (100%)
 rename {rlhf => llmonray/rlhf}/ppo.yaml (100%)
 rename {rlhf => llmonray/rlhf}/reward.py (100%)
 rename {rlhf => llmonray/rlhf}/reward.yaml (100%)
 rename {rlhf => llmonray/rlhf}/rl_algo/ppo/ppo_rlhf.py (100%)
 rename {rlhf => llmonray/rlhf}/rl_algo/ppo/rlhf_buffer.py (100%)
 rename {rlhf => llmonray/rlhf}/rl_algo/ppo/rlhf_ppo_module.py (100%)
 rename {rlhf => llmonray/rlhf}/rl_algo/ppo/rlhf_ppo_torch_learner.py (100%)
 rename {rlhf => llmonray/rlhf}/rl_algo/ppo/util.py (100%)
 rename {ui => llmonray/ui}/html_format.py (100%)
 rename {ui => llmonray/ui}/images/Picture1.png (100%)
 rename {ui => llmonray/ui}/images/Picture2.png (100%)
 rename {ui => llmonray/ui}/images/logo.png (100%)
 rename {ui => llmonray/ui}/start_ui.py (100%)

diff --git a/common/__init__.py b/llmonray/common/__init__.py
similarity index 100%
rename from common/__init__.py
rename to llmonray/common/__init__.py
diff --git a/common/agentenv/__init__.py b/llmonray/common/agentenv/__init__.py
similarity index 100%
rename from common/agentenv/__init__.py
rename to llmonray/common/agentenv/__init__.py
diff --git a/common/agentenv/agentenv.py b/llmonray/common/agentenv/agentenv.py
similarity index 100%
rename from common/agentenv/agentenv.py
rename to llmonray/common/agentenv/agentenv.py
diff --git a/common/agentenv/rlhf_env.py b/llmonray/common/agentenv/rlhf_env.py
similarity index 100%
rename from common/agentenv/rlhf_env.py
rename to llmonray/common/agentenv/rlhf_env.py
diff --git a/common/common.py b/llmonray/common/common.py
similarity index 100%
rename from common/common.py
rename to llmonray/common/common.py
diff --git a/common/config.py b/llmonray/common/config.py
similarity index 100%
rename from common/config.py
rename to llmonray/common/config.py
diff --git a/common/dataprocesser/__init__.py b/llmonray/common/dataprocesser/__init__.py
similarity index 100%
rename from common/dataprocesser/__init__.py
rename to llmonray/common/dataprocesser/__init__.py
diff --git a/common/dataprocesser/dataprocesser.py b/llmonray/common/dataprocesser/dataprocesser.py
similarity index 100%
rename from common/dataprocesser/dataprocesser.py
rename to llmonray/common/dataprocesser/dataprocesser.py
diff --git a/common/dataprocesser/general_processer.py b/llmonray/common/dataprocesser/general_processer.py
similarity index 100%
rename from common/dataprocesser/general_processer.py
rename to llmonray/common/dataprocesser/general_processer.py
diff --git a/common/dataprocesser/rm_dataprocesser.py b/llmonray/common/dataprocesser/rm_dataprocesser.py
similarity index 100%
rename from common/dataprocesser/rm_dataprocesser.py
rename to llmonray/common/dataprocesser/rm_dataprocesser.py
diff --git a/common/dataset/__init__.py b/llmonray/common/dataset/__init__.py
similarity index 100%
rename from common/dataset/__init__.py
rename to llmonray/common/dataset/__init__.py
diff --git a/common/dataset/dataset.py b/llmonray/common/dataset/dataset.py
similarity index 100%
rename from common/dataset/dataset.py
rename to llmonray/common/dataset/dataset.py
diff --git a/common/dataset/huggingface_dataset.py b/llmonray/common/dataset/huggingface_dataset.py
similarity index 100%
rename from common/dataset/huggingface_dataset.py
rename to llmonray/common/dataset/huggingface_dataset.py
diff --git a/common/init.py b/llmonray/common/init.py
similarity index 100%
rename from common/init.py
rename to llmonray/common/init.py
diff --git a/common/initializer/__init__.py b/llmonray/common/initializer/__init__.py
similarity index 100%
rename from common/initializer/__init__.py
rename to llmonray/common/initializer/__init__.py
diff --git a/common/initializer/initializer.py b/llmonray/common/initializer/initializer.py
similarity index 100%
rename from common/initializer/initializer.py
rename to llmonray/common/initializer/initializer.py
diff --git a/common/load.py b/llmonray/common/load.py
similarity index 100%
rename from common/load.py
rename to llmonray/common/load.py
diff --git a/common/logging.py b/llmonray/common/logging.py
similarity index 100%
rename from common/logging.py
rename to llmonray/common/logging.py
diff --git a/common/model/__init__.py b/llmonray/common/model/__init__.py
similarity index 100%
rename from common/model/__init__.py
rename to llmonray/common/model/__init__.py
diff --git a/common/model/huggingface_model_for_causal_lm.py b/llmonray/common/model/huggingface_model_for_causal_lm.py
similarity index 100%
rename from common/model/huggingface_model_for_causal_lm.py
rename to llmonray/common/model/huggingface_model_for_causal_lm.py
diff --git a/common/model/model.py b/llmonray/common/model/model.py
similarity index 100%
rename from common/model/model.py
rename to llmonray/common/model/model.py
diff --git a/common/model/reward_model.py b/llmonray/common/model/reward_model.py
similarity index 100%
rename from common/model/reward_model.py
rename to llmonray/common/model/reward_model.py
diff --git a/common/optimizer/__init__.py b/llmonray/common/optimizer/__init__.py
similarity index 100%
rename from common/optimizer/__init__.py
rename to llmonray/common/optimizer/__init__.py
diff --git a/common/optimizer/default_optimizer.py b/llmonray/common/optimizer/default_optimizer.py
similarity index 100%
rename from common/optimizer/default_optimizer.py
rename to llmonray/common/optimizer/default_optimizer.py
diff --git a/common/optimizer/group_optimizer.py b/llmonray/common/optimizer/group_optimizer.py
similarity index 100%
rename from common/optimizer/group_optimizer.py
rename to llmonray/common/optimizer/group_optimizer.py
diff --git a/common/optimizer/optimizer.py b/llmonray/common/optimizer/optimizer.py
similarity index 100%
rename from common/optimizer/optimizer.py
rename to llmonray/common/optimizer/optimizer.py
diff --git a/common/tokenizer/__init__.py b/llmonray/common/tokenizer/__init__.py
similarity index 100%
rename from common/tokenizer/__init__.py
rename to llmonray/common/tokenizer/__init__.py
diff --git a/common/tokenizer/empty_tokenizer.py b/llmonray/common/tokenizer/empty_tokenizer.py
similarity index 100%
rename from common/tokenizer/empty_tokenizer.py
rename to llmonray/common/tokenizer/empty_tokenizer.py
diff --git a/common/tokenizer/huggingface_tokenizer.py b/llmonray/common/tokenizer/huggingface_tokenizer.py
similarity index 100%
rename from common/tokenizer/huggingface_tokenizer.py
rename to llmonray/common/tokenizer/huggingface_tokenizer.py
diff --git a/common/tokenizer/tokenizer.py b/llmonray/common/tokenizer/tokenizer.py
similarity index 100%
rename from common/tokenizer/tokenizer.py
rename to llmonray/common/tokenizer/tokenizer.py
diff --git a/common/torch_config.py b/llmonray/common/torch_config.py
similarity index 100%
rename from common/torch_config.py
rename to llmonray/common/torch_config.py
diff --git a/common/trainer/__init__.py b/llmonray/common/trainer/__init__.py
similarity index 100%
rename from common/trainer/__init__.py
rename to llmonray/common/trainer/__init__.py
diff --git a/common/trainer/default_trainer.py b/llmonray/common/trainer/default_trainer.py
similarity index 100%
rename from common/trainer/default_trainer.py
rename to llmonray/common/trainer/default_trainer.py
diff --git a/common/trainer/rm_trainer.py b/llmonray/common/trainer/rm_trainer.py
similarity index 100%
rename from common/trainer/rm_trainer.py
rename to llmonray/common/trainer/rm_trainer.py
diff --git a/common/trainer/trainer.py b/llmonray/common/trainer/trainer.py
similarity index 100%
rename from common/trainer/trainer.py
rename to llmonray/common/trainer/trainer.py
diff --git a/finetune/__init__.py b/llmonray/finetune/__init__.py
similarity index 100%
rename from finetune/__init__.py
rename to llmonray/finetune/__init__.py
diff --git a/finetune/finetune.py b/llmonray/finetune/finetune.py
similarity index 100%
rename from finetune/finetune.py
rename to llmonray/finetune/finetune.py
diff --git a/finetune/finetune.yaml b/llmonray/finetune/finetune.yaml
similarity index 100%
rename from finetune/finetune.yaml
rename to llmonray/finetune/finetune.yaml
diff --git a/finetune/finetune_config.py b/llmonray/finetune/finetune_config.py
similarity index 100%
rename from finetune/finetune_config.py
rename to llmonray/finetune/finetune_config.py
diff --git a/finetune/models/bloom-560m.yaml b/llmonray/finetune/models/bloom-560m.yaml
similarity index 100%
rename from finetune/models/bloom-560m.yaml
rename to llmonray/finetune/models/bloom-560m.yaml
diff --git a/finetune/models/finetune_config_template.yaml b/llmonray/finetune/models/finetune_config_template.yaml
similarity index 100%
rename from finetune/models/finetune_config_template.yaml
rename to llmonray/finetune/models/finetune_config_template.yaml
diff --git a/finetune/models/gpt-j-6b.yaml b/llmonray/finetune/models/gpt-j-6b.yaml
similarity index 100%
rename from finetune/models/gpt-j-6b.yaml
rename to llmonray/finetune/models/gpt-j-6b.yaml
diff --git a/finetune/models/gpt2.yaml b/llmonray/finetune/models/gpt2.yaml
similarity index 100%
rename from finetune/models/gpt2.yaml
rename to llmonray/finetune/models/gpt2.yaml
diff --git a/finetune/models/llama-2-7b-chat-hf.yaml b/llmonray/finetune/models/llama-2-7b-chat-hf.yaml
similarity index 100%
rename from finetune/models/llama-2-7b-chat-hf.yaml
rename to llmonray/finetune/models/llama-2-7b-chat-hf.yaml
diff --git a/finetune/models/llama-7b.yaml b/llmonray/finetune/models/llama-7b.yaml
similarity index 100%
rename from finetune/models/llama-7b.yaml
rename to llmonray/finetune/models/llama-7b.yaml
diff --git a/finetune/models/mistral-7b-v0.1.yaml b/llmonray/finetune/models/mistral-7b-v0.1.yaml
similarity index 100%
rename from finetune/models/mistral-7b-v0.1.yaml
rename to llmonray/finetune/models/mistral-7b-v0.1.yaml
diff --git a/finetune/models/mpt-7b-chat.yaml b/llmonray/finetune/models/mpt-7b-chat.yaml
similarity index 100%
rename from finetune/models/mpt-7b-chat.yaml
rename to llmonray/finetune/models/mpt-7b-chat.yaml
diff --git a/finetune/models/opt-125m.yaml b/llmonray/finetune/models/opt-125m.yaml
similarity index 100%
rename from finetune/models/opt-125m.yaml
rename to llmonray/finetune/models/opt-125m.yaml
diff --git a/inference/__init__.py b/llmonray/inference/__init__.py
similarity index 100%
rename from inference/__init__.py
rename to llmonray/inference/__init__.py
diff --git a/inference/api_openai_backend/openai_protocol.py b/llmonray/inference/api_openai_backend/openai_protocol.py
similarity index 100%
rename from inference/api_openai_backend/openai_protocol.py
rename to llmonray/inference/api_openai_backend/openai_protocol.py
diff --git a/inference/api_openai_backend/query_client.py b/llmonray/inference/api_openai_backend/query_client.py
similarity index 100%
rename from inference/api_openai_backend/query_client.py
rename to llmonray/inference/api_openai_backend/query_client.py
diff --git a/inference/api_openai_backend/request_handler.py b/llmonray/inference/api_openai_backend/request_handler.py
similarity index 100%
rename from inference/api_openai_backend/request_handler.py
rename to llmonray/inference/api_openai_backend/request_handler.py
diff --git a/inference/api_openai_backend/router_app.py b/llmonray/inference/api_openai_backend/router_app.py
similarity index 100%
rename from inference/api_openai_backend/router_app.py
rename to llmonray/inference/api_openai_backend/router_app.py
diff --git a/inference/api_server_openai.py b/llmonray/inference/api_server_openai.py
similarity index 100%
rename from inference/api_server_openai.py
rename to llmonray/inference/api_server_openai.py
diff --git a/inference/api_server_simple.py b/llmonray/inference/api_server_simple.py
similarity index 100%
rename from inference/api_server_simple.py
rename to llmonray/inference/api_server_simple.py
diff --git a/inference/chat_process.py b/llmonray/inference/chat_process.py
similarity index 100%
rename from inference/chat_process.py
rename to llmonray/inference/chat_process.py
diff --git a/inference/deepspeed_predictor.py b/llmonray/inference/deepspeed_predictor.py
similarity index 100%
rename from inference/deepspeed_predictor.py
rename to llmonray/inference/deepspeed_predictor.py
diff --git a/inference/inference_config.py b/llmonray/inference/inference_config.py
similarity index 100%
rename from inference/inference_config.py
rename to llmonray/inference/inference_config.py
diff --git a/inference/logger.py b/llmonray/inference/logger.py
similarity index 100%
rename from inference/logger.py
rename to llmonray/inference/logger.py
diff --git a/inference/models/CodeLlama-7b-hf.yaml b/llmonray/inference/models/CodeLlama-7b-hf.yaml
similarity index 100%
rename from inference/models/CodeLlama-7b-hf.yaml
rename to llmonray/inference/models/CodeLlama-7b-hf.yaml
diff --git a/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml b/llmonray/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml
similarity index 100%
rename from inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml
rename to llmonray/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml
diff --git a/inference/models/bigdl/mpt-7b-bigdl.yaml b/llmonray/inference/models/bigdl/mpt-7b-bigdl.yaml
similarity index 100%
rename from inference/models/bigdl/mpt-7b-bigdl.yaml
rename to llmonray/inference/models/bigdl/mpt-7b-bigdl.yaml
diff --git a/inference/models/bloom-560m.yaml b/llmonray/inference/models/bloom-560m.yaml
similarity index 100%
rename from inference/models/bloom-560m.yaml
rename to llmonray/inference/models/bloom-560m.yaml
diff --git a/inference/models/falcon-7b.yaml b/llmonray/inference/models/falcon-7b.yaml
similarity index 100%
rename from inference/models/falcon-7b.yaml
rename to llmonray/inference/models/falcon-7b.yaml
diff --git a/inference/models/gpt-j-6b.yaml b/llmonray/inference/models/gpt-j-6b.yaml
similarity index 100%
rename from inference/models/gpt-j-6b.yaml
rename to llmonray/inference/models/gpt-j-6b.yaml
diff --git a/inference/models/gpt2.yaml b/llmonray/inference/models/gpt2.yaml
similarity index 100%
rename from inference/models/gpt2.yaml
rename to llmonray/inference/models/gpt2.yaml
diff --git a/inference/models/llama-2-7b-chat-hf.yaml b/llmonray/inference/models/llama-2-7b-chat-hf.yaml
similarity index 100%
rename from inference/models/llama-2-7b-chat-hf.yaml
rename to llmonray/inference/models/llama-2-7b-chat-hf.yaml
diff --git a/inference/models/mistral-7b-v0.1.yaml b/llmonray/inference/models/mistral-7b-v0.1.yaml
similarity index 100%
rename from inference/models/mistral-7b-v0.1.yaml
rename to llmonray/inference/models/mistral-7b-v0.1.yaml
diff --git a/inference/models/mpt-7b.yaml b/llmonray/inference/models/mpt-7b.yaml
similarity index 100%
rename from inference/models/mpt-7b.yaml
rename to llmonray/inference/models/mpt-7b.yaml
diff --git a/inference/models/neural-chat-7b-v3-1.yaml b/llmonray/inference/models/neural-chat-7b-v3-1.yaml
similarity index 100%
rename from inference/models/neural-chat-7b-v3-1.yaml
rename to llmonray/inference/models/neural-chat-7b-v3-1.yaml
diff --git a/inference/models/opt-125m.yaml b/llmonray/inference/models/opt-125m.yaml
similarity index 100%
rename from inference/models/opt-125m.yaml
rename to llmonray/inference/models/opt-125m.yaml
diff --git a/inference/models/starcoder.yaml b/llmonray/inference/models/starcoder.yaml
similarity index 100%
rename from inference/models/starcoder.yaml
rename to llmonray/inference/models/starcoder.yaml
diff --git a/inference/models/template/export_inference_config_to_yaml.py b/llmonray/inference/models/template/export_inference_config_to_yaml.py
similarity index 100%
rename from inference/models/template/export_inference_config_to_yaml.py
rename to llmonray/inference/models/template/export_inference_config_to_yaml.py
diff --git a/inference/models/template/inference_config_template.yaml b/llmonray/inference/models/template/inference_config_template.yaml
similarity index 100%
rename from inference/models/template/inference_config_template.yaml
rename to llmonray/inference/models/template/inference_config_template.yaml
diff --git a/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml b/llmonray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml
similarity index 100%
rename from inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml
rename to llmonray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml
diff --git a/inference/predictor.py b/llmonray/inference/predictor.py
similarity index 100%
rename from inference/predictor.py
rename to llmonray/inference/predictor.py
diff --git a/inference/predictor_deployment.py b/llmonray/inference/predictor_deployment.py
similarity index 100%
rename from inference/predictor_deployment.py
rename to llmonray/inference/predictor_deployment.py
diff --git a/inference/serve.py b/llmonray/inference/serve.py
similarity index 100%
rename from inference/serve.py
rename to llmonray/inference/serve.py
diff --git a/inference/transformer_predictor.py b/llmonray/inference/transformer_predictor.py
similarity index 100%
rename from inference/transformer_predictor.py
rename to llmonray/inference/transformer_predictor.py
diff --git a/inference/utils.py b/llmonray/inference/utils.py
similarity index 100%
rename from inference/utils.py
rename to llmonray/inference/utils.py
diff --git a/inference/vllm_predictor.py b/llmonray/inference/vllm_predictor.py
similarity index 100%
rename from inference/vllm_predictor.py
rename to llmonray/inference/vllm_predictor.py
diff --git a/pretrain/__init__.py b/llmonray/pretrain/__init__.py
similarity index 100%
rename from pretrain/__init__.py
rename to llmonray/pretrain/__init__.py
diff --git a/pretrain/backend/deepspeed_backend.py b/llmonray/pretrain/backend/deepspeed_backend.py
similarity index 100%
rename from pretrain/backend/deepspeed_backend.py
rename to llmonray/pretrain/backend/deepspeed_backend.py
diff --git a/pretrain/backend/habana_backend.py b/llmonray/pretrain/backend/habana_backend.py
similarity index 100%
rename from pretrain/backend/habana_backend.py
rename to llmonray/pretrain/backend/habana_backend.py
diff --git a/pretrain/config/bloom1b7_8gpus_pretrain.conf b/llmonray/pretrain/config/bloom1b7_8gpus_pretrain.conf
similarity index 100%
rename from pretrain/config/bloom1b7_8gpus_pretrain.conf
rename to llmonray/pretrain/config/bloom1b7_8gpus_pretrain.conf
diff --git a/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf b/llmonray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
similarity index 100%
rename from pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
rename to llmonray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
diff --git a/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf b/llmonray/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf
similarity index 100%
rename from pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf
rename to llmonray/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf
diff --git a/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf b/llmonray/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf
similarity index 100%
rename from pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf
rename to llmonray/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf
diff --git a/pretrain/config/llama_7b_8Guadi_pretrain.conf b/llmonray/pretrain/config/llama_7b_8Guadi_pretrain.conf
similarity index 100%
rename from pretrain/config/llama_7b_8Guadi_pretrain.conf
rename to llmonray/pretrain/config/llama_7b_8Guadi_pretrain.conf
diff --git a/pretrain/config/llama_7b_8gpu_pretrain.conf b/llmonray/pretrain/config/llama_7b_8gpu_pretrain.conf
similarity index 100%
rename from pretrain/config/llama_7b_8gpu_pretrain.conf
rename to llmonray/pretrain/config/llama_7b_8gpu_pretrain.conf
diff --git a/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf b/llmonray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
similarity index 100%
rename from pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
rename to llmonray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
diff --git a/pretrain/docker/Dockerfile.megatron.habana b/llmonray/pretrain/docker/Dockerfile.megatron.habana
similarity index 100%
rename from pretrain/docker/Dockerfile.megatron.habana
rename to llmonray/pretrain/docker/Dockerfile.megatron.habana
diff --git a/pretrain/docker/Dockerfile.nvidia b/llmonray/pretrain/docker/Dockerfile.nvidia
similarity index 100%
rename from pretrain/docker/Dockerfile.nvidia
rename to llmonray/pretrain/docker/Dockerfile.nvidia
diff --git a/pretrain/docker/Dockerfile.optimum.habana b/llmonray/pretrain/docker/Dockerfile.optimum.habana
similarity index 100%
rename from pretrain/docker/Dockerfile.optimum.habana
rename to llmonray/pretrain/docker/Dockerfile.optimum.habana
diff --git a/pretrain/docker/build-image.sh b/llmonray/pretrain/docker/build-image.sh
similarity index 100%
rename from pretrain/docker/build-image.sh
rename to llmonray/pretrain/docker/build-image.sh
diff --git a/pretrain/megatron_deepspeed_pretrain.py b/llmonray/pretrain/megatron_deepspeed_pretrain.py
similarity index 100%
rename from pretrain/megatron_deepspeed_pretrain.py
rename to llmonray/pretrain/megatron_deepspeed_pretrain.py
diff --git a/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch b/llmonray/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch
similarity index 100%
rename from pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch
rename to llmonray/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch
diff --git a/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch b/llmonray/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch
similarity index 100%
rename from pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch
rename to llmonray/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch
diff --git a/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch b/llmonray/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch
similarity index 100%
rename from pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch
rename to llmonray/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch
diff --git a/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch b/llmonray/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch
similarity index 100%
rename from pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch
rename to llmonray/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch
diff --git a/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch b/llmonray/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch
similarity index 100%
rename from pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch
rename to llmonray/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch
diff --git a/pretrain/plugin/group_dataset.py b/llmonray/pretrain/plugin/group_dataset.py
similarity index 100%
rename from pretrain/plugin/group_dataset.py
rename to llmonray/pretrain/plugin/group_dataset.py
diff --git a/pretrain/plugin/hf_pretrainer.py b/llmonray/pretrain/plugin/hf_pretrainer.py
similarity index 100%
rename from pretrain/plugin/hf_pretrainer.py
rename to llmonray/pretrain/plugin/hf_pretrainer.py
diff --git a/pretrain/plugin/huggingface_model_from_config.py b/llmonray/pretrain/plugin/huggingface_model_from_config.py
similarity index 100%
rename from pretrain/plugin/huggingface_model_from_config.py
rename to llmonray/pretrain/plugin/huggingface_model_from_config.py
diff --git a/pretrain/plugin/megatron_dataset.py b/llmonray/pretrain/plugin/megatron_dataset.py
similarity index 100%
rename from pretrain/plugin/megatron_dataset.py
rename to llmonray/pretrain/plugin/megatron_dataset.py
diff --git a/pretrain/plugin/megatron_pretrainer.py b/llmonray/pretrain/plugin/megatron_pretrainer.py
similarity index 100%
rename from pretrain/plugin/megatron_pretrainer.py
rename to llmonray/pretrain/plugin/megatron_pretrainer.py
diff --git a/pretrain/plugin/megatron_processer.py b/llmonray/pretrain/plugin/megatron_processer.py
similarity index 100%
rename from pretrain/plugin/megatron_processer.py
rename to llmonray/pretrain/plugin/megatron_processer.py
diff --git a/pretrain/plugin/megtron_initializer.py b/llmonray/pretrain/plugin/megtron_initializer.py
similarity index 100%
rename from pretrain/plugin/megtron_initializer.py
rename to llmonray/pretrain/plugin/megtron_initializer.py
diff --git a/pretrain/plugin/plain_id_processer.py b/llmonray/pretrain/plugin/plain_id_processer.py
similarity index 100%
rename from pretrain/plugin/plain_id_processer.py
rename to llmonray/pretrain/plugin/plain_id_processer.py
diff --git a/pretrain/plugin/pretrainer.py b/llmonray/pretrain/plugin/pretrainer.py
similarity index 100%
rename from pretrain/plugin/pretrainer.py
rename to llmonray/pretrain/plugin/pretrainer.py
diff --git a/pretrain/pretrain.py b/llmonray/pretrain/pretrain.py
similarity index 100%
rename from pretrain/pretrain.py
rename to llmonray/pretrain/pretrain.py
diff --git a/pretrain/pretrain_template.conf b/llmonray/pretrain/pretrain_template.conf
similarity index 100%
rename from pretrain/pretrain_template.conf
rename to llmonray/pretrain/pretrain_template.conf
diff --git a/pretrain/pretrain_template_megatron_dataset.conf b/llmonray/pretrain/pretrain_template_megatron_dataset.conf
similarity index 100%
rename from pretrain/pretrain_template_megatron_dataset.conf
rename to llmonray/pretrain/pretrain_template_megatron_dataset.conf
diff --git a/pretrain/requirements.optimum-habana.txt b/llmonray/pretrain/requirements.optimum-habana.txt
similarity index 100%
rename from pretrain/requirements.optimum-habana.txt
rename to llmonray/pretrain/requirements.optimum-habana.txt
diff --git a/pretrain/requirements.txt b/llmonray/pretrain/requirements.txt
similarity index 100%
rename from pretrain/requirements.txt
rename to llmonray/pretrain/requirements.txt
diff --git a/rlhf/__init__.py b/llmonray/rlhf/__init__.py
similarity index 100%
rename from rlhf/__init__.py
rename to llmonray/rlhf/__init__.py
diff --git a/rlhf/ppo.py b/llmonray/rlhf/ppo.py
similarity index 100%
rename from rlhf/ppo.py
rename to llmonray/rlhf/ppo.py
diff --git a/rlhf/ppo.yaml b/llmonray/rlhf/ppo.yaml
similarity index 100%
rename from rlhf/ppo.yaml
rename to llmonray/rlhf/ppo.yaml
diff --git a/rlhf/reward.py b/llmonray/rlhf/reward.py
similarity index 100%
rename from rlhf/reward.py
rename to llmonray/rlhf/reward.py
diff --git a/rlhf/reward.yaml b/llmonray/rlhf/reward.yaml
similarity index 100%
rename from rlhf/reward.yaml
rename to llmonray/rlhf/reward.yaml
diff --git a/rlhf/rl_algo/ppo/ppo_rlhf.py b/llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py
similarity index 100%
rename from rlhf/rl_algo/ppo/ppo_rlhf.py
rename to llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py
diff --git a/rlhf/rl_algo/ppo/rlhf_buffer.py b/llmonray/rlhf/rl_algo/ppo/rlhf_buffer.py
similarity index 100%
rename from rlhf/rl_algo/ppo/rlhf_buffer.py
rename to llmonray/rlhf/rl_algo/ppo/rlhf_buffer.py
diff --git a/rlhf/rl_algo/ppo/rlhf_ppo_module.py b/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_module.py
similarity index 100%
rename from rlhf/rl_algo/ppo/rlhf_ppo_module.py
rename to llmonray/rlhf/rl_algo/ppo/rlhf_ppo_module.py
diff --git a/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py b/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
similarity index 100%
rename from rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
rename to llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
diff --git a/rlhf/rl_algo/ppo/util.py b/llmonray/rlhf/rl_algo/ppo/util.py
similarity index 100%
rename from rlhf/rl_algo/ppo/util.py
rename to llmonray/rlhf/rl_algo/ppo/util.py
diff --git a/ui/html_format.py b/llmonray/ui/html_format.py
similarity index 100%
rename from ui/html_format.py
rename to llmonray/ui/html_format.py
diff --git a/ui/images/Picture1.png b/llmonray/ui/images/Picture1.png
similarity index 100%
rename from ui/images/Picture1.png
rename to llmonray/ui/images/Picture1.png
diff --git a/ui/images/Picture2.png b/llmonray/ui/images/Picture2.png
similarity index 100%
rename from ui/images/Picture2.png
rename to llmonray/ui/images/Picture2.png
diff --git a/ui/images/logo.png b/llmonray/ui/images/logo.png
similarity index 100%
rename from ui/images/logo.png
rename to llmonray/ui/images/logo.png
diff --git a/ui/start_ui.py b/llmonray/ui/start_ui.py
similarity index 100%
rename from ui/start_ui.py
rename to llmonray/ui/start_ui.py

From 4c95129a05cce8cd91c102b9d1d5a5d44c42bc53 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Thu, 8 Feb 2024 18:29:19 +0800
Subject: [PATCH 02/29] modify import path

---
 llmonray/common/__init__.py                   | 10 ++++----
 llmonray/common/agentenv/__init__.py          |  6 ++---
 llmonray/common/agentenv/rlhf_env.py          |  4 +--
 llmonray/common/common.py                     |  2 +-
 llmonray/common/dataprocesser/__init__.py     |  6 ++---
 .../common/dataprocesser/general_processer.py |  2 +-
 .../common/dataprocesser/rm_dataprocesser.py  |  4 +--
 llmonray/common/dataset/__init__.py           |  6 ++---
 .../common/dataset/huggingface_dataset.py     |  2 +-
 llmonray/common/init.py                       |  2 +-
 llmonray/common/initializer/__init__.py       |  6 ++---
 llmonray/common/load.py                       | 14 +++++------
 llmonray/common/model/__init__.py             |  6 ++---
 .../model/huggingface_model_for_causal_lm.py  |  2 +-
 llmonray/common/model/reward_model.py         |  2 +-
 llmonray/common/optimizer/__init__.py         |  6 ++---
 .../common/optimizer/default_optimizer.py     |  2 +-
 llmonray/common/optimizer/group_optimizer.py  |  2 +-
 llmonray/common/tokenizer/__init__.py         |  6 ++---
 llmonray/common/tokenizer/empty_tokenizer.py  |  2 +-
 .../common/tokenizer/huggingface_tokenizer.py |  2 +-
 llmonray/common/trainer/__init__.py           |  6 ++---
 llmonray/common/trainer/default_trainer.py    |  7 +++---
 llmonray/common/trainer/rm_trainer.py         |  4 +--
 llmonray/finetune/finetune.py                 |  7 ++----
 .../api_openai_backend/query_client.py        |  4 +--
 .../api_openai_backend/request_handler.py     |  9 +++++--
 .../api_openai_backend/router_app.py          | 13 +++++-----
 llmonray/inference/api_server_openai.py       |  4 +--
 llmonray/inference/deepspeed_predictor.py     |  6 ++---
 llmonray/inference/inference_config.py        |  5 ----
 .../export_inference_config_to_yaml.py        |  2 +-
 llmonray/inference/predictor.py               |  4 +--
 llmonray/inference/predictor_deployment.py    |  8 +++---
 llmonray/inference/serve.py                   | 10 ++++----
 llmonray/inference/transformer_predictor.py   |  9 +++----
 llmonray/inference/utils.py                   |  4 +--
 llmonray/inference/vllm_predictor.py          |  4 +--
 .../pretrain/megatron_deepspeed_pretrain.py   |  3 +--
 llmonray/pretrain/plugin/group_dataset.py     |  2 +-
 llmonray/pretrain/plugin/hf_pretrainer.py     |  8 +++---
 .../plugin/huggingface_model_from_config.py   |  2 +-
 llmonray/pretrain/plugin/megatron_dataset.py  |  2 +-
 .../pretrain/plugin/megatron_pretrainer.py    |  6 ++---
 .../pretrain/plugin/megatron_processer.py     |  2 +-
 .../pretrain/plugin/megtron_initializer.py    |  4 +--
 .../pretrain/plugin/plain_id_processer.py     |  2 +-
 llmonray/pretrain/plugin/pretrainer.py        |  6 ++---
 llmonray/pretrain/pretrain.py                 |  7 ++----
 llmonray/rlhf/ppo.py                          | 16 ++++--------
 llmonray/rlhf/reward.py                       |  6 +----
 llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py         |  8 ++----
 .../rl_algo/ppo/rlhf_ppo_torch_learner.py     |  2 +-
 llmonray/ui/start_ui.py                       | 25 ++++++++-----------
 54 files changed, 138 insertions(+), 163 deletions(-)

diff --git a/llmonray/common/__init__.py b/llmonray/common/__init__.py
index 0c882ef13..ff87eae24 100644
--- a/llmonray/common/__init__.py
+++ b/llmonray/common/__init__.py
@@ -1,12 +1,12 @@
-from .logging import logger
-from .load import *  # noqa: F403 # unable to detect undefined names
-from . import agentenv
-from .torch_config import TorchConfig  # noqa: F401
+from llmonray.common.logging import logger
+from llmonray.common.load import *  # noqa: F403 # unable to detect undefined names
+from llmonray.common import agentenv
+from llmonray.common.torch_config import TorchConfig  # noqa: F401
 from typing import Dict, Any
 import sys
 
 
-@load_check_decorator  # noqa: F405 # may be undefined, or defined from star imports
+@load_check_decorator  # type: ignore # noqa: F405 # may be undefined, or defined from star imports
 def get_agentenv(config: Dict[str, Any]):
     logger.info(f"{sys._getframe().f_code.co_name} config: {config}")
     agentenv_type = config.get("type", None)
diff --git a/llmonray/common/agentenv/__init__.py b/llmonray/common/agentenv/__init__.py
index fe05d28ec..a5c2089a8 100644
--- a/llmonray/common/agentenv/__init__.py
+++ b/llmonray/common/agentenv/__init__.py
@@ -1,9 +1,9 @@
 import os
-from .agentenv import AgentEnv
-from ..common import import_all_module
+from llmonray.common.agentenv.agentenv import AgentEnv
+from llmonray.common.common import import_all_module
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "common.agentenv")
+import_all_module(basedir, "llmonray.common.agentenv")
 
 __all__ = ["AgentEnv"]
diff --git a/llmonray/common/agentenv/rlhf_env.py b/llmonray/common/agentenv/rlhf_env.py
index d4b0a5833..eccddcd0b 100644
--- a/llmonray/common/agentenv/rlhf_env.py
+++ b/llmonray/common/agentenv/rlhf_env.py
@@ -7,8 +7,8 @@
 from ray.rllib.utils.spaces.repeated import Repeated
 import gymnasium.spaces as sp
 
-from .agentenv import AgentEnv
-from ..load import load_dataset, load_model, load_tokenizer
+from llmonray.common.agentenv.agentenv import AgentEnv
+from llmonray.common.load import load_dataset, load_model, load_tokenizer
 
 
 def generate_response(
diff --git a/llmonray/common/common.py b/llmonray/common/common.py
index b846ea75a..e70a730ee 100644
--- a/llmonray/common/common.py
+++ b/llmonray/common/common.py
@@ -2,7 +2,7 @@
 import glob
 import importlib
 
-from .logging import logger
+from llmonray.common.logging import logger
 
 
 def import_all_module(basedir, prefix=None):
diff --git a/llmonray/common/dataprocesser/__init__.py b/llmonray/common/dataprocesser/__init__.py
index 7e74e6a13..becc562ff 100644
--- a/llmonray/common/dataprocesser/__init__.py
+++ b/llmonray/common/dataprocesser/__init__.py
@@ -1,9 +1,9 @@
 import os
-from .dataprocesser import DataProcesser
-from ..common import import_all_module
+from llmonray.common.dataprocesser.dataprocesser import DataProcesser
+from llmonray.common.common import import_all_module
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "common.dataprocesser")
+import_all_module(basedir, "llmonray.common.dataprocesser")
 
 __all__ = ["DataProcesser"]
diff --git a/llmonray/common/dataprocesser/general_processer.py b/llmonray/common/dataprocesser/general_processer.py
index 4873b4594..639ac6cb6 100644
--- a/llmonray/common/dataprocesser/general_processer.py
+++ b/llmonray/common/dataprocesser/general_processer.py
@@ -5,7 +5,7 @@
 import datasets
 import transformers
 
-from .dataprocesser import DataProcesser
+from llmonray.common.dataprocesser.dataprocesser import DataProcesser
 
 INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
 INSTRUCTION_KEY = "### Instruction:"
diff --git a/llmonray/common/dataprocesser/rm_dataprocesser.py b/llmonray/common/dataprocesser/rm_dataprocesser.py
index 36ead7d8b..25ad44441 100644
--- a/llmonray/common/dataprocesser/rm_dataprocesser.py
+++ b/llmonray/common/dataprocesser/rm_dataprocesser.py
@@ -1,8 +1,8 @@
 import torch
 import transformers
 
-from .dataprocesser import DataProcesser
-from ..logging import logger
+from llmonray.common.dataprocesser.dataprocesser import DataProcesser
+from llmonray.common.logging import logger
 
 
 class RMDataProcesser(DataProcesser):
diff --git a/llmonray/common/dataset/__init__.py b/llmonray/common/dataset/__init__.py
index 9b04a188b..8e3161182 100644
--- a/llmonray/common/dataset/__init__.py
+++ b/llmonray/common/dataset/__init__.py
@@ -1,9 +1,9 @@
 import os
-from .dataset import Dataset
-from ..common import import_all_module
+from llmonray.common.dataset.dataset import Dataset
+from llmonray.common.common import import_all_module
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "common.dataset")
+import_all_module(basedir, "llmonray.common.dataset")
 
 __all__ = ["Dataset"]
diff --git a/llmonray/common/dataset/huggingface_dataset.py b/llmonray/common/dataset/huggingface_dataset.py
index 9173e067f..92191a012 100644
--- a/llmonray/common/dataset/huggingface_dataset.py
+++ b/llmonray/common/dataset/huggingface_dataset.py
@@ -1,7 +1,7 @@
 import os
 import datasets
 
-from .dataset import Dataset
+from llmonray.common.dataset.dataset import Dataset
 
 
 def local_load(name, **load_config):
diff --git a/llmonray/common/init.py b/llmonray/common/init.py
index 63715f18f..63c36272a 100644
--- a/llmonray/common/init.py
+++ b/llmonray/common/init.py
@@ -1,7 +1,7 @@
 import torch
 import accelerate
 
-from .logging import logger
+from llmonray.common.logging import logger
 
 
 def check_config(config):
diff --git a/llmonray/common/initializer/__init__.py b/llmonray/common/initializer/__init__.py
index 2cdc27adb..90c43bf1c 100644
--- a/llmonray/common/initializer/__init__.py
+++ b/llmonray/common/initializer/__init__.py
@@ -1,9 +1,9 @@
 import os
-from .initializer import Initializer
-from ..common import import_all_module
+from llmonray.common.initializer.initializer import Initializer
+from llmonray.common.common import import_all_module
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "common.initializer")
+import_all_module(basedir, "llmonray.common.initializer")
 
 __all__ = ["Initializer"]
diff --git a/llmonray/common/load.py b/llmonray/common/load.py
index 16fcfd1c5..c62381577 100644
--- a/llmonray/common/load.py
+++ b/llmonray/common/load.py
@@ -1,13 +1,13 @@
 import sys
 from typing import Any, Dict
 
-from .logging import logger
-from . import dataset
-from . import tokenizer
-from . import model
-from . import optimizer
-from . import trainer
-from . import initializer
+from llmonray.common.logging import logger
+from llmonray.common import dataset
+from llmonray.common import tokenizer
+from llmonray.common import model
+from llmonray.common import optimizer
+from llmonray.common import trainer
+from llmonray.common import initializer
 
 
 def load_check_decorator(func):
diff --git a/llmonray/common/model/__init__.py b/llmonray/common/model/__init__.py
index df7989ceb..06271185a 100644
--- a/llmonray/common/model/__init__.py
+++ b/llmonray/common/model/__init__.py
@@ -1,9 +1,9 @@
 import os
-from .model import Model
-from ..common import import_all_module
+from llmonray.common.model.model import Model
+from llmonray.common.common import import_all_module
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "common.model")
+import_all_module(basedir, "llmonray.common.model")
 
 __all__ = ["Model"]
diff --git a/llmonray/common/model/huggingface_model_for_causal_lm.py b/llmonray/common/model/huggingface_model_for_causal_lm.py
index cc2ce6234..3a4804647 100644
--- a/llmonray/common/model/huggingface_model_for_causal_lm.py
+++ b/llmonray/common/model/huggingface_model_for_causal_lm.py
@@ -1,6 +1,6 @@
 import transformers
 
-from .model import Model
+from llmonray.common.model.model import Model
 from peft import get_peft_model, LoraConfig
 import deltatuner
 
diff --git a/llmonray/common/model/reward_model.py b/llmonray/common/model/reward_model.py
index a4aa237ef..c3cdea3c9 100644
--- a/llmonray/common/model/reward_model.py
+++ b/llmonray/common/model/reward_model.py
@@ -4,7 +4,7 @@
 import torch
 import torch.nn as nn
 
-from .model import Model
+from llmonray.common.model.model import Model
 
 
 class HuggingFaceRewardModel(Model):
diff --git a/llmonray/common/optimizer/__init__.py b/llmonray/common/optimizer/__init__.py
index 122acc90f..00424606b 100644
--- a/llmonray/common/optimizer/__init__.py
+++ b/llmonray/common/optimizer/__init__.py
@@ -1,9 +1,9 @@
 import os
-from .optimizer import Optimizer
-from ..common import import_all_module
+from llmonray.common.optimizer.optimizer import Optimizer
+from llmonray.common.common import import_all_module
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "common.optimizer")
+import_all_module(basedir, "llmonray.common.optimizer")
 
 __all__ = ["Optimizer"]
diff --git a/llmonray/common/optimizer/default_optimizer.py b/llmonray/common/optimizer/default_optimizer.py
index dab5803a2..753c78ef3 100644
--- a/llmonray/common/optimizer/default_optimizer.py
+++ b/llmonray/common/optimizer/default_optimizer.py
@@ -1,5 +1,5 @@
 import torch  # noqa: F401
-from .optimizer import Optimizer
+from llmonray.common.optimizer.optimizer import Optimizer
 
 
 class DefaultOptimizer(Optimizer):
diff --git a/llmonray/common/optimizer/group_optimizer.py b/llmonray/common/optimizer/group_optimizer.py
index 0e07878db..15c4bb279 100644
--- a/llmonray/common/optimizer/group_optimizer.py
+++ b/llmonray/common/optimizer/group_optimizer.py
@@ -1,5 +1,5 @@
 import torch  # noqa: F401
-from .optimizer import Optimizer
+from llmonray.common.optimizer.optimizer import Optimizer
 
 
 class GroupOptimizer(Optimizer):
diff --git a/llmonray/common/tokenizer/__init__.py b/llmonray/common/tokenizer/__init__.py
index 63c281496..27c5631f9 100644
--- a/llmonray/common/tokenizer/__init__.py
+++ b/llmonray/common/tokenizer/__init__.py
@@ -1,9 +1,9 @@
 import os
-from .tokenizer import Tokenizer
-from ..common import import_all_module
+from llmonray.common.tokenizer.tokenizer import Tokenizer
+from llmonray.common.common import import_all_module
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "common.tokenizer")
+import_all_module(basedir, "llmonray.common.tokenizer")
 
 __all__ = ["Tokenizer"]
diff --git a/llmonray/common/tokenizer/empty_tokenizer.py b/llmonray/common/tokenizer/empty_tokenizer.py
index c2684aca0..c23b5c947 100644
--- a/llmonray/common/tokenizer/empty_tokenizer.py
+++ b/llmonray/common/tokenizer/empty_tokenizer.py
@@ -1,4 +1,4 @@
-from .tokenizer import Tokenizer
+from llmonray.common.tokenizer.tokenizer import Tokenizer
 
 
 class _EmptyTokenizer:
diff --git a/llmonray/common/tokenizer/huggingface_tokenizer.py b/llmonray/common/tokenizer/huggingface_tokenizer.py
index a6a60bc7f..a3fd313bc 100644
--- a/llmonray/common/tokenizer/huggingface_tokenizer.py
+++ b/llmonray/common/tokenizer/huggingface_tokenizer.py
@@ -1,6 +1,6 @@
 import transformers
 
-from .tokenizer import Tokenizer
+from llmonray.common.tokenizer.tokenizer import Tokenizer
 
 
 class HuggingFaceTokenizer(Tokenizer):
diff --git a/llmonray/common/trainer/__init__.py b/llmonray/common/trainer/__init__.py
index b33b565a5..c1cfb967f 100644
--- a/llmonray/common/trainer/__init__.py
+++ b/llmonray/common/trainer/__init__.py
@@ -1,9 +1,9 @@
 import os
-from .trainer import Trainer
-from ..common import import_all_module
+from llmonray.common.trainer.trainer import Trainer
+from llmonray.common.common import import_all_module
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "common.trainer")
+import_all_module(basedir, "llmonray.common.trainer")
 
 __all__ = ["Trainer"]
diff --git a/llmonray/common/trainer/default_trainer.py b/llmonray/common/trainer/default_trainer.py
index f05c6317f..2dec917dd 100644
--- a/llmonray/common/trainer/default_trainer.py
+++ b/llmonray/common/trainer/default_trainer.py
@@ -9,10 +9,9 @@
 
 from ray.train import report, Checkpoint
 
-from .. import dataprocesser
-from .trainer import Trainer
-
-from ..logging import logger
+from llmonray.common import dataprocesser
+from llmonray.common.trainer.trainer import Trainer
+from llmonray.common.logging import logger
 
 
 class DefaultTrainer(Trainer):
diff --git a/llmonray/common/trainer/rm_trainer.py b/llmonray/common/trainer/rm_trainer.py
index 1cc64d93e..c9643217d 100644
--- a/llmonray/common/trainer/rm_trainer.py
+++ b/llmonray/common/trainer/rm_trainer.py
@@ -4,8 +4,8 @@
 import math
 import time
 
-from .default_trainer import DefaultTrainer
-from ..logging import logger
+from llmonray.common.trainer.default_trainer import DefaultTrainer
+from llmonray.common.logging import logger
 
 
 class RMTrainer(DefaultTrainer):
diff --git a/llmonray/finetune/finetune.py b/llmonray/finetune/finetune.py
index 7ab0183db..039ccda9b 100644
--- a/llmonray/finetune/finetune.py
+++ b/llmonray/finetune/finetune.py
@@ -21,11 +21,8 @@
     FullStateDictConfig,
 )
 
-import sys
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-import common
-from finetune.finetune_config import FinetuneConfig
+import llmonray.common as common
+from llmonray.finetune.finetune_config import FinetuneConfig
 
 
 def get_accelerate_environment_variable(mode: str, config: Union[Dict[str, Any], None]) -> dict:
diff --git a/llmonray/inference/api_openai_backend/query_client.py b/llmonray/inference/api_openai_backend/query_client.py
index fbfbb65b1..a01520209 100644
--- a/llmonray/inference/api_openai_backend/query_client.py
+++ b/llmonray/inference/api_openai_backend/query_client.py
@@ -34,8 +34,8 @@
 
 from typing import Dict
 from fastapi import HTTPException
-from .openai_protocol import ModelCard, Prompt
-from .request_handler import handle_request
+from llmonray.inference.api_openai_backend.openai_protocol import ModelCard, Prompt
+from llmonray.inference.api_openai_backend.request_handler import handle_request
 
 
 class RouterQueryClient:
diff --git a/llmonray/inference/api_openai_backend/request_handler.py b/llmonray/inference/api_openai_backend/request_handler.py
index 126943e22..64d1cc966 100644
--- a/llmonray/inference/api_openai_backend/request_handler.py
+++ b/llmonray/inference/api_openai_backend/request_handler.py
@@ -38,8 +38,13 @@
 from fastapi import status, HTTPException, Request
 from starlette.responses import JSONResponse
 from pydantic import ValidationError as PydanticValidationError
-from logger import get_logger
-from .openai_protocol import Prompt, ModelResponse, ErrorResponse, FinishReason
+from llmonray.inference.logger import get_logger
+from llmonray.inference.api_openai_backend.openai_protocol import (
+    Prompt,
+    ModelResponse,
+    ErrorResponse,
+    FinishReason,
+)
 
 logger = get_logger(__name__)
 
diff --git a/llmonray/inference/api_openai_backend/router_app.py b/llmonray/inference/api_openai_backend/router_app.py
index f622e1275..e65831287 100644
--- a/llmonray/inference/api_openai_backend/router_app.py
+++ b/llmonray/inference/api_openai_backend/router_app.py
@@ -40,16 +40,17 @@
 from fastapi import Response as FastAPIResponse
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import Response, StreamingResponse
-from logger import get_logger
-from .request_handler import OpenAIHTTPException, openai_exception_handler
-from .query_client import RouterQueryClient
-from .openai_protocol import (
+from llmonray.inference.logger import get_logger
+from llmonray.inference.api_openai_backend.request_handler import (
+    OpenAIHTTPException,
+    openai_exception_handler,
+)
+from llmonray.inference.api_openai_backend.query_client import RouterQueryClient
+from llmonray.inference.api_openai_backend.openai_protocol import (
     Prompt,
     ModelResponse,
     CompletionRequest,
     ChatCompletionRequest,
-)
-from .openai_protocol import (
     ChatCompletionResponse,
     CompletionResponse,
     DeltaChoices,
diff --git a/llmonray/inference/api_server_openai.py b/llmonray/inference/api_server_openai.py
index 77831a9d2..a9b123a13 100644
--- a/llmonray/inference/api_server_openai.py
+++ b/llmonray/inference/api_server_openai.py
@@ -34,8 +34,8 @@
 
 import os
 from ray import serve
-from inference.api_openai_backend.query_client import RouterQueryClient
-from inference.api_openai_backend.router_app import Router, router_app
+from llmonray.inference.api_openai_backend.query_client import RouterQueryClient
+from llmonray.inference.api_openai_backend.router_app import Router, router_app
 
 
 def router_application(deployments):
diff --git a/llmonray/inference/deepspeed_predictor.py b/llmonray/inference/deepspeed_predictor.py
index 464c81506..ef8395ae7 100644
--- a/llmonray/inference/deepspeed_predictor.py
+++ b/llmonray/inference/deepspeed_predictor.py
@@ -12,9 +12,9 @@
 from ray.air import ScalingConfig
 from typing import List
 import os
-from predictor import Predictor
-from utils import get_torch_dtype
-from inference.inference_config import (
+from llmonray.inference.predictor import Predictor
+from llmonray.inference.utils import get_torch_dtype
+from llmonray.inference.inference_config import (
     InferenceConfig,
     GenerateResult,
     DEVICE_CPU,
diff --git a/llmonray/inference/inference_config.py b/llmonray/inference/inference_config.py
index 7e1798b82..ca20df024 100644
--- a/llmonray/inference/inference_config.py
+++ b/llmonray/inference/inference_config.py
@@ -156,8 +156,3 @@ def _check_workers_per_group(cls, v: int):
         _models[m.name] = m
 
 all_models = _models.copy()
-
-_gpt2_key = "gpt2"
-_gpt_j_6b = "gpt-j-6b"
-base_models[_gpt2_key] = _models[_gpt2_key]
-base_models[_gpt_j_6b] = _models[_gpt_j_6b]
diff --git a/llmonray/inference/models/template/export_inference_config_to_yaml.py b/llmonray/inference/models/template/export_inference_config_to_yaml.py
index 62cfd4b75..89b14a507 100644
--- a/llmonray/inference/models/template/export_inference_config_to_yaml.py
+++ b/llmonray/inference/models/template/export_inference_config_to_yaml.py
@@ -1,6 +1,6 @@
 import yaml
 import os
-from inference.inference_config import InferenceConfig
+from llmonray.inference.inference_config import InferenceConfig
 
 ic = InferenceConfig()
 
diff --git a/llmonray/inference/predictor.py b/llmonray/inference/predictor.py
index a69a9407e..1019c2878 100644
--- a/llmonray/inference/predictor.py
+++ b/llmonray/inference/predictor.py
@@ -1,9 +1,9 @@
 import re
 import torch
 from transformers import AutoTokenizer, StoppingCriteriaList
-from inference.inference_config import InferenceConfig, GenerateResult
-from inference.utils import StoppingCriteriaSub
 from typing import List, AsyncGenerator, Union
+from llmonray.inference.inference_config import InferenceConfig, GenerateResult
+from llmonray.inference.utils import StoppingCriteriaSub
 
 
 class Predictor:
diff --git a/llmonray/inference/predictor_deployment.py b/llmonray/inference/predictor_deployment.py
index 447f48731..c22e8e836 100644
--- a/llmonray/inference/predictor_deployment.py
+++ b/llmonray/inference/predictor_deployment.py
@@ -19,16 +19,16 @@
 import asyncio
 import functools
 from ray import serve
-from starlette.requests import Request
 from queue import Empty
 import torch
 from transformers import TextIteratorStreamer
-from inference.inference_config import InferenceConfig
 from typing import Union, Dict, Any
+from starlette.requests import Request
 from starlette.responses import StreamingResponse, JSONResponse
 from fastapi import HTTPException
-from inference.api_openai_backend.openai_protocol import ModelResponse
-from inference.utils import get_prompt_format, PromptFormat
+from llmonray.inference.inference_config import InferenceConfig
+from llmonray.inference.api_openai_backend.openai_protocol import ModelResponse
+from llmonray.inference.utils import get_prompt_format, PromptFormat
 
 
 @serve.deployment
diff --git a/llmonray/inference/serve.py b/llmonray/inference/serve.py
index e73397a79..f7ee0b124 100644
--- a/llmonray/inference/serve.py
+++ b/llmonray/inference/serve.py
@@ -16,12 +16,12 @@
 
 import ray
 import sys
-from utils import get_deployment_actor_options
 from pydantic_yaml import parse_yaml_raw_as
-from api_server_simple import serve_run
-from api_server_openai import openai_serve_run
-from predictor_deployment import PredictorDeployment
-from inference.inference_config import ModelDescription, InferenceConfig, all_models
+from llmonray.inference.utils import get_deployment_actor_options
+from llmonray.inference.api_server_simple import serve_run
+from llmonray.inference.api_server_openai import openai_serve_run
+from llmonray.inference.predictor_deployment import PredictorDeployment
+from llmonray.inference.inference_config import ModelDescription, InferenceConfig, all_models
 
 
 def get_deployed_models(args):
diff --git a/llmonray/inference/transformer_predictor.py b/llmonray/inference/transformer_predictor.py
index c1e83e432..983df8c72 100644
--- a/llmonray/inference/transformer_predictor.py
+++ b/llmonray/inference/transformer_predictor.py
@@ -1,9 +1,8 @@
 import torch
-from transformers import AutoModelForCausalLM, AutoConfig
-from transformers import TextIteratorStreamer
-from inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
-from inference.utils import get_torch_dtype
-from predictor import Predictor
+from transformers import AutoModelForCausalLM, AutoConfig, TextIteratorStreamer
+from llmonray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
+from llmonray.inference.utils import get_torch_dtype
+from llmonray.inference.predictor import Predictor
 
 
 class TransformerPredictor(Predictor):
diff --git a/llmonray/inference/utils.py b/llmonray/inference/utils.py
index 6cdd2be85..ea98dd3a9 100644
--- a/llmonray/inference/utils.py
+++ b/llmonray/inference/utils.py
@@ -16,10 +16,10 @@
 
 from transformers import StoppingCriteria
 import torch
-from inference.inference_config import InferenceConfig, DEVICE_CPU
-from inference.api_openai_backend.openai_protocol import ChatMessage
 from typing import Dict, Any, List, Union
 from enum import Enum
+from llmonray.inference.inference_config import InferenceConfig, DEVICE_CPU
+from llmonray.inference.api_openai_backend.openai_protocol import ChatMessage
 
 
 def get_deployment_actor_options(infer_conf: InferenceConfig):
diff --git a/llmonray/inference/vllm_predictor.py b/llmonray/inference/vllm_predictor.py
index 54ec4c110..26e87a2e3 100644
--- a/llmonray/inference/vllm_predictor.py
+++ b/llmonray/inference/vllm_predictor.py
@@ -1,11 +1,11 @@
 import asyncio
 from typing import AsyncGenerator, List, Union
-from predictor import Predictor
-from inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.sampling_params import SamplingParams
 from vllm.utils import random_uuid
+from llmonray.inference.predictor import Predictor
+from llmonray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
 
 
 class VllmPredictor(Predictor):
diff --git a/llmonray/pretrain/megatron_deepspeed_pretrain.py b/llmonray/pretrain/megatron_deepspeed_pretrain.py
index aa5002711..120c46294 100644
--- a/llmonray/pretrain/megatron_deepspeed_pretrain.py
+++ b/llmonray/pretrain/megatron_deepspeed_pretrain.py
@@ -7,8 +7,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-import common
+import llmonray.common as common
 
 import importlib
 
diff --git a/llmonray/pretrain/plugin/group_dataset.py b/llmonray/pretrain/plugin/group_dataset.py
index 93838f7bf..ac1281984 100644
--- a/llmonray/pretrain/plugin/group_dataset.py
+++ b/llmonray/pretrain/plugin/group_dataset.py
@@ -1,7 +1,7 @@
 import os
 import datasets
 
-from common.dataset import Dataset
+from llmonray.common.dataset import Dataset
 
 
 class GroupDataset(Dataset):
diff --git a/llmonray/pretrain/plugin/hf_pretrainer.py b/llmonray/pretrain/plugin/hf_pretrainer.py
index d9aafdfc5..0a255770f 100755
--- a/llmonray/pretrain/plugin/hf_pretrainer.py
+++ b/llmonray/pretrain/plugin/hf_pretrainer.py
@@ -3,9 +3,9 @@
 import logging
 import sys
 from torch.utils.data import DataLoader, Dataset
-import common
-from common import dataprocesser
-from common.logging import logger
+import llmonray.common as common
+from llmonray.common import dataprocesser
+from llmonray.common.logging import logger
 import evaluate
 from typing import Optional
 from transformers import (
@@ -16,7 +16,7 @@
 from transformers.trainer_utils import get_last_checkpoint
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers import Trainer, TrainingArguments
-from common.trainer import Trainer as RayTrainer
+from llmonray.common.trainer import Trainer as RayTrainer
 
 use_habana = True
 import importlib
diff --git a/llmonray/pretrain/plugin/huggingface_model_from_config.py b/llmonray/pretrain/plugin/huggingface_model_from_config.py
index 5ce38da8f..51a332701 100644
--- a/llmonray/pretrain/plugin/huggingface_model_from_config.py
+++ b/llmonray/pretrain/plugin/huggingface_model_from_config.py
@@ -1,7 +1,7 @@
 import torch
 import math
 import transformers
-from common.model.model import Model
+from llmonray.common.model.model import Model
 
 
 # for huggingface model weight random initialization
diff --git a/llmonray/pretrain/plugin/megatron_dataset.py b/llmonray/pretrain/plugin/megatron_dataset.py
index 944c6b53b..ac763203a 100644
--- a/llmonray/pretrain/plugin/megatron_dataset.py
+++ b/llmonray/pretrain/plugin/megatron_dataset.py
@@ -2,7 +2,7 @@
 from megatron.training import build_train_valid_test_datasets, update_train_iters
 from megatron.data import gpt_dataset
 
-from common.dataset import Dataset
+from llmonray.common.dataset import Dataset
 
 
 class MegatronDataset(Dataset):
diff --git a/llmonray/pretrain/plugin/megatron_pretrainer.py b/llmonray/pretrain/plugin/megatron_pretrainer.py
index 4ee76bfa3..6199c20ac 100644
--- a/llmonray/pretrain/plugin/megatron_pretrainer.py
+++ b/llmonray/pretrain/plugin/megatron_pretrainer.py
@@ -11,9 +11,9 @@
 from ray.train import Checkpoint
 from ray.train.torch import TorchCheckpoint
 
-from common import dataprocesser
-from .pretrainer import PreTrainer
-from common.logging import logger
+from llmonray.common import dataprocesser
+from llmonray.pretrain.plugin.pretrainer import PreTrainer
+from llmonray.common.logging import logger
 
 
 class MegatronPreTrainer(PreTrainer):
diff --git a/llmonray/pretrain/plugin/megatron_processer.py b/llmonray/pretrain/plugin/megatron_processer.py
index 178256ad5..b507191f9 100644
--- a/llmonray/pretrain/plugin/megatron_processer.py
+++ b/llmonray/pretrain/plugin/megatron_processer.py
@@ -2,7 +2,7 @@
 from megatron.core import mpu
 from megatron.data.data_samplers import build_pretraining_data_loader
 
-from common.dataprocesser import DataProcesser
+from llmonray.common.dataprocesser import DataProcesser
 
 
 class MegatronProcesser(DataProcesser):
diff --git a/llmonray/pretrain/plugin/megtron_initializer.py b/llmonray/pretrain/plugin/megtron_initializer.py
index cad268603..b089b7eea 100644
--- a/llmonray/pretrain/plugin/megtron_initializer.py
+++ b/llmonray/pretrain/plugin/megtron_initializer.py
@@ -1,6 +1,6 @@
 from megatron.initialize import initialize_megatron
-from common.initializer import Initializer
-from common.logging import logger
+from llmonray.common.initializer import Initializer
+from llmonray.common.logging import logger
 
 
 class MegatronInitializer(Initializer):
diff --git a/llmonray/pretrain/plugin/plain_id_processer.py b/llmonray/pretrain/plugin/plain_id_processer.py
index 20117cdcf..175ca7606 100644
--- a/llmonray/pretrain/plugin/plain_id_processer.py
+++ b/llmonray/pretrain/plugin/plain_id_processer.py
@@ -1,7 +1,7 @@
 import torch
 import transformers
 
-from common.dataprocesser import DataProcesser
+from llmonray.common.dataprocesser import DataProcesser
 
 
 class PlainIDProcesser(DataProcesser):
diff --git a/llmonray/pretrain/plugin/pretrainer.py b/llmonray/pretrain/plugin/pretrainer.py
index 1bde38f62..76362c7f4 100755
--- a/llmonray/pretrain/plugin/pretrainer.py
+++ b/llmonray/pretrain/plugin/pretrainer.py
@@ -12,9 +12,9 @@
 from ray.train.torch import TorchCheckpoint
 from pathlib import Path
 
-from common import dataprocesser
-from common.trainer import Trainer
-from common.logging import logger
+from llmonray.common import dataprocesser
+from llmonray.common.trainer import Trainer
+from llmonray.common.logging import logger
 
 
 class PreTrainer(Trainer):
diff --git a/llmonray/pretrain/pretrain.py b/llmonray/pretrain/pretrain.py
index 3e045c19d..18b66ddb0 100644
--- a/llmonray/pretrain/pretrain.py
+++ b/llmonray/pretrain/pretrain.py
@@ -10,10 +10,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-import sys
-
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-import common
+import llmonray.common as common
 
 from importlib import util
 
@@ -32,7 +29,7 @@ def train_func(config: Dict[str, Any]):
     cwd = config.get("cwd")
     if cwd:
         os.chdir(cwd)
-    from common.common import import_all_module
+    from llmonray.common.common import import_all_module
 
     import_all_module(f"{os.path.dirname(os.path.realpath(__file__))}/plugin", "plugin")
     common.init(config)  # type: ignore
diff --git a/llmonray/rlhf/ppo.py b/llmonray/rlhf/ppo.py
index cc9fab6ae..aca80f432 100644
--- a/llmonray/rlhf/ppo.py
+++ b/llmonray/rlhf/ppo.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 
-import os
 
 import ray
 from ray import air, tune
@@ -8,16 +7,11 @@
 from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
 from ray.rllib.algorithms.ppo import PPOConfig
 
-from rl_algo.ppo.ppo_rlhf import PPORLHF
-from rl_algo.ppo.rlhf_ppo_module import RLHFPPOTorchRLModule
-from rl_algo.ppo.rlhf_ppo_torch_learner import RLHFPPOTorchLearner
-
-import sys
-
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-
-import common
-from common.agentenv.rlhf_env import RLHFEnv
+from llmonray.rlhf.rl_algo.ppo.ppo_rlhf import PPORLHF
+from llmonray.rlhf.rl_algo.ppo.rlhf_ppo_module import RLHFPPOTorchRLModule
+from llmonray.rlhf.rl_algo.ppo.rlhf_ppo_torch_learner import RLHFPPOTorchLearner
+import llmonray.common as common
+from llmonray.common.agentenv.rlhf_env import RLHFEnv
 
 
 class ValueFunctionInitializerCallback(DefaultCallbacks):
diff --git a/llmonray/rlhf/reward.py b/llmonray/rlhf/reward.py
index 7045a6c44..f27e91aef 100644
--- a/llmonray/rlhf/reward.py
+++ b/llmonray/rlhf/reward.py
@@ -10,11 +10,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-import sys
-
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-
-import common
+import llmonray.common as common
 
 
 def train_func(config: Dict[str, Any]):
diff --git a/llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py b/llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py
index 55657a507..1c8c56d2e 100644
--- a/llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py
+++ b/llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py
@@ -12,13 +12,9 @@
 )
 from ray.rllib.evaluation.metrics import RolloutMetrics
 
-import os
-import sys
 
-sys.path.append(os.path.join(os.path.dirname(__file__), "../../../"))
-
-from common.agentenv.rlhf_env import generate_response
-from .rlhf_buffer import Buffer, BufferItem
+from llmonray.common.agentenv.rlhf_env import generate_response
+from llmonray.rlhf.rl_algo.ppo.rlhf_buffer import Buffer, BufferItem
 
 
 class RLHFSampler:
diff --git a/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py b/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
index 733863703..a82fa700d 100644
--- a/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
+++ b/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
@@ -11,7 +11,7 @@
 from ray.rllib.models.torch.torch_distributions import TorchCategorical
 
 
-from .util import masked_mean
+from llmonray.rlhf.rl_algo.ppo.util import masked_mean
 
 torch, nn = try_import_torch()
 
diff --git a/llmonray/ui/start_ui.py b/llmonray/ui/start_ui.py
index 4377531b7..70d82254f 100644
--- a/llmonray/ui/start_ui.py
+++ b/llmonray/ui/start_ui.py
@@ -18,24 +18,22 @@
 import time
 import os
 import sys
-
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
-from inference.inference_config import all_models, ModelDescription, Prompt
-from inference.inference_config import InferenceConfig as FinetunedConfig
-from inference.chat_process import ChatModelGptJ, ChatModelLLama  # noqa: F401
-from inference.predictor_deployment import PredictorDeployment
-from ray import serve
-import ray
 import gradio as gr
 import argparse
+import paramiko
+from multiprocessing import Process, Queue
+from typing import Dict, List, Any
+import ray
+from ray import serve
 from ray.tune import Stopper
 from ray.train.base_trainer import TrainingFailedError
 from ray.tune.logger import LoggerCallback
-from multiprocessing import Process, Queue
 from ray.util import queue
-import paramiko
-from html_format import cpu_memory_html, ray_status_html, custom_css
-from typing import Dict, List, Any
+from llmonray.inference.inference_config import all_models, ModelDescription, Prompt
+from llmonray.inference.inference_config import InferenceConfig as FinetunedConfig
+from llmonray.inference.chat_process import ChatModelGptJ, ChatModelLLama  # noqa: F401
+from llmonray.inference.predictor_deployment import PredictorDeployment
+from llmonray.ui.html_format import cpu_memory_html, ray_status_html, custom_css
 from langchain.vectorstores import FAISS
 from langchain.embeddings import HuggingFaceEmbeddings
 from pyrecdp.LLM import TextPipeline
@@ -1572,8 +1570,7 @@ def _init_ui(self):
         infer_path + os.path.sep + "../examples/data/sample_finetune_data.jsonl"
     )
 
-    sys.path.append(repo_path)
-    from finetune.finetune import get_accelerate_environment_variable
+    from llmonray.finetune.finetune import get_accelerate_environment_variable
 
     finetune_config: Dict[str, Any] = {
         "General": {"config": {}},

From 19d7fc2b666b33f118bc11658f20fed5dd2037c4 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Thu, 8 Feb 2024 18:47:58 +0800
Subject: [PATCH 03/29] modify package name

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index ef7d280f8..36126410f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,7 @@ bigdl-cpu = [
 
 [tool.setuptools]
 # with MANIFEST.in, the configs below work in both baremetal and container
-package-dir = {"inference" = "inference", "finetune" = "finetune"}
+package-dir = {"llmonray" = "llmonray"}
 include-package-data = true
 
 [project.urls]

From c1394d475ab1590113f5cc253c2da53d6ba292bb Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 20 Feb 2024 15:55:56 +0800
Subject: [PATCH 04/29] update path

---
 .github/workflows/workflow_finetune.yml       |  6 ++---
 .github/workflows/workflow_inference.yml      | 22 +++++++++----------
 .../workflows/workflow_orders_on_merge.yml    |  8 +++----
 .github/workflows/workflow_orders_on_pr.yml   |  8 +++----
 README.md                                     |  6 ++---
 docs/finetune.md                              |  2 +-
 docs/pretrain.md                              | 10 ++++-----
 docs/serve.md                                 | 12 +++++-----
 docs/vllm.md                                  |  2 +-
 docs/web_ui.md                                |  2 +-
 10 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index 5a8e32720..eefc28c04 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -113,7 +113,7 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python finetune/finetune.py --config_file finetune/finetune.yaml"
+          docker exec "finetune" bash -c "python -m llmonray.finetune.finetune --config_file llmonray/finetune/finetune.yaml"
 
       - name: Run PEFT-LoRA Test
         run: |
@@ -138,7 +138,7 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python finetune/finetune.py --config_file finetune/finetune.yaml"
+          docker exec "finetune" bash -c "python -m llmonray.finetune.finetune --config_file llmonray/finetune/finetune.yaml"
 
       - name: Run Deltatuner Test on DENAS-LoRA Model
         run: |
@@ -168,7 +168,7 @@ jobs:
               yaml.dump(result, output, sort_keys=False)
           EOF)
             docker exec "finetune" python -c "$CMD"
-            docker exec "finetune" bash -c "python finetune/finetune.py --config_file finetune/finetune.yaml"
+            docker exec "finetune" bash -c "python -m llmonray.finetune.finetune --config_file llmonray/finetune/finetune.yaml"
           fi
 
       - name: Stop Ray
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index 6a5617a66..b8082539c 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -118,14 +118,14 @@ jobs:
           CMD=$(cat << EOF
           import yaml
           if ("${{ matrix.model }}" == "starcoder"):
-              conf_path = "inference/models/starcoder.yaml"
+              conf_path = "llmonray/inference/models/starcoder.yaml"
               with open(conf_path, encoding="utf-8") as reader:
                   result = yaml.load(reader, Loader=yaml.FullLoader)
                   result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
               with open(conf_path, 'w') as output:
                   yaml.dump(result, output, sort_keys=False)
           if ("${{ matrix.model }}" == "llama-2-7b-chat-hf"):
-              conf_path = "inference/models/llama-2-7b-chat-hf.yaml"
+              conf_path = "llmonray/inference/models/llama-2-7b-chat-hf.yaml"
               with open(conf_path, encoding="utf-8") as reader:
                   result = yaml.load(reader, Loader=yaml.FullLoader)
                   result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
@@ -135,11 +135,11 @@ jobs:
           )
           docker exec "${TARGET}" python -c "$CMD"
           if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            docker exec "${TARGET}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml --simple"
+            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file llmonray/inference/models/bigdl/mpt-7b-bigdl.yaml --simple"
           elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "python inference/serve.py --config_file .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml --simple"
+            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml --simple"
           else
-            docker exec "${TARGET}" bash -c "python inference/serve.py --simple --models ${{ matrix.model }}"
+            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --simple --models ${{ matrix.model }}"
           fi
           echo Non-streaming query:
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
@@ -150,7 +150,7 @@ jobs:
         if: ${{ matrix.dtuner_model }}
         run: |
           TARGET=${{steps.target.outputs.target}}
-          docker exec "${TARGET}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner.yaml --simple"
+          docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file .github/workflows/config/mpt_deltatuner.yaml --simple"
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
 
@@ -160,8 +160,8 @@ jobs:
           if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
-            docker exec "${TARGET}" bash -c "python inference/serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed --simple"
+            docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file llmonray/inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
+            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file \"${{ matrix.model }}\".yaml.deepspeed --simple"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
           fi
@@ -173,7 +173,7 @@ jobs:
           if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           else
-            docker exec "${TARGET}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple"
+            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
           fi
@@ -182,9 +182,9 @@ jobs:
         run: |
           TARGET=${{steps.target.outputs.target}}
           if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            docker exec "${TARGET}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml"
+            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file llmonray/inference/models/bigdl/mpt-7b-bigdl.yaml"
           elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "python inference/serve.py --models ${{ matrix.model }}"
+            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --models ${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests.py --model_name ${{ matrix.model }}"
           fi
 
diff --git a/.github/workflows/workflow_orders_on_merge.yml b/.github/workflows/workflow_orders_on_merge.yml
index a057f8ea6..812b83b73 100644
--- a/.github/workflows/workflow_orders_on_merge.yml
+++ b/.github/workflows/workflow_orders_on_merge.yml
@@ -7,11 +7,11 @@ on:
     paths:
       - '.github/**'
       - 'docker/**'
-      - 'common/**'
       - 'dev/docker/**'
-      - 'finetune/**'
-      - 'inference/**'
-      - 'rlhf/**'
+      - 'llmonray/common/**'
+      - 'llmonray/finetune/**'
+      - 'llmonray/inference/**'
+      - 'llmonray/rlhf/**'
       - 'tools/**'
       - 'pyproject.toml'
       - 'tests/**'
diff --git a/.github/workflows/workflow_orders_on_pr.yml b/.github/workflows/workflow_orders_on_pr.yml
index 0fdb9bb01..6625bbc0a 100644
--- a/.github/workflows/workflow_orders_on_pr.yml
+++ b/.github/workflows/workflow_orders_on_pr.yml
@@ -7,11 +7,11 @@ on:
     paths:
       - '.github/**'
       - 'docker/**'
-      - 'common/**'
       - 'dev/docker/**'
-      - 'finetune/**'
-      - 'inference/**'
-      - 'rlhf/**'
+      - 'llmonray/common/**'
+      - 'llmonray/finetune/**'
+      - 'llmonray/inference/**'
+      - 'llmonray/rlhf/**'
       - 'tools/**'
       - 'pyproject.toml'
       - 'tests/**'
diff --git a/README.md b/README.md
index deda1b1fe..350b3bdd2 100644
--- a/README.md
+++ b/README.md
@@ -59,14 +59,14 @@ ray start --head
 Use the following command to finetune a model using an example dataset and default configurations. The finetuned model will be stored in `/tmp/llm-ray/output` by default. To customize the base model, dataset and configurations, please see the [finetuning document](#finetune):
 
 ```bash
-python finetune/finetune.py --config_file finetune/finetune.yaml
+python -m llmonray.finetune.finetune --config_file llmonray/finetune/finetune.yaml
 ```
 
 ### Serving
 Deploy a model on Ray and expose an endpoint for serving. This command uses GPT2 as an example, but more model configuration examples can be found in the [inference/models](inference/models) directory:
 
 ```bash
-python inference/serve.py --config_file inference/models/gpt2.yaml
+python -m llmonray.inference.serve --config_file llmonray/inference/models/gpt2.yaml
 ```
 
 The default served method is to provide an OpenAI-compatible API server ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)), you can access and test it in many ways:
@@ -92,7 +92,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py
 ```
 Or you can serve specific model to a simple endpoint according to the `port` and `route_prefix` parameters in configuration file,
 ```bash
-python inference/serve.py --config_file inference/models/gpt2.yaml --simple
+python llmonray.inference.serve --config_file llmonray/inference/models/gpt2.yaml --simple
 ```
 After deploying the model endpoint, you can access and test it by using the script below:
 ```bash
diff --git a/docs/finetune.md b/docs/finetune.md
index dda3505f2..129ec22bc 100755
--- a/docs/finetune.md
+++ b/docs/finetune.md
@@ -65,5 +65,5 @@ The following models have been verified on Intel CPUs or GPUs.
 ## Finetune the model
 To finetune your model, execute the following command. The finetuned model will be saved in /tmp/llm-ray/output by default.
 ``` bash
-python finetune/finetune.py --config_file <your finetuning conf file>
+python -m llmonray.finetune.finetune --config_file <your finetuning conf file>
 ```
diff --git a/docs/pretrain.md b/docs/pretrain.md
index 2b3667523..670b118c3 100644
--- a/docs/pretrain.md
+++ b/docs/pretrain.md
@@ -123,27 +123,27 @@ Set up `megatron_deepspeed_path` in the configuration.
 ```bash
 cd /home/user/workspace/llm-on-ray
 #Bloom-7B
-python pretrain/megatron_deepspeed_pretrain.py --config_file pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
+python -m llmonray.pretrain.megatron_deepspeed_pretrain --config_file llmonray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
 #llama-7B
-python pretrain/megatron_deepspeed_pretrain.py --config_file pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
+python -m llmonray.pretrain.megatron_deepspeed_pretrain --config_file llmonray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
 ```
 
 ##### Huggingface Trainer
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama-7B
-python pretrain/pretrain.py --config_file pretrain/config/llama_7b_8Guadi_pretrain.conf
+python -m llmonray.pretrain.pretrain --config_file llmonray/pretrain/config/llama_7b_8Guadi_pretrain.conf
 ```
 ##### Nvidia GPU:
 ###### Megatron-DeepSpeed
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama2-7B
-python pretrain/megatron_deepspeed_pretrain.py --config_file pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf
+python -m llmonray.pretrain.megatron_deepspeed_pretrain --config_file llmonray/pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf
 ```
 ##### Huggingface Trainer
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama-7B
-python pretrain/pretrain.py --config_file pretrain/config/llama_7b_8gpu_pretrain.conf
+python -m llmonray.pretrain.pretrain --config_file llmonray/pretrain/config/llama_7b_8gpu_pretrain.conf
 ```
\ No newline at end of file
diff --git a/docs/serve.md b/docs/serve.md
index 0611f60e1..00d63e7ec 100644
--- a/docs/serve.md
+++ b/docs/serve.md
@@ -30,22 +30,22 @@ LLM-on-Ray also supports serving with [Deepspeed](serve_deepspeed.md) for AutoTP
 We support three methods to specify the models to be served, and they have the following priorities.
 1. Use inference configuration file if config_file is set.
 ```
-python inference/serve.py --config_file inference/models/gpt2.yaml
+python -m llmonray.inference.serve --config_file llmonray/inference/models/gpt2.yaml
 ```
 2. Use relevant configuration parameters if model_id_or_path is set.
 ```
-python inference/serve.py --model_id_or_path gpt2 [--tokenizer_id_or_path gpt2 --port 8000 --route_prefix ...]
+python -m llmonray.inference.serve --model_id_or_path gpt2 [--tokenizer_id_or_path gpt2 --port 8000 --route_prefix ...]
 ```
 3. If --config_file and --model_id_or_path are both None, it will serve all pre-defined models in inference/models/*.yaml, or part of them if models is set.
 ```
-python inference/serve.py --models gpt2 gpt-j-6b
+python -m llmonray.inference.serve --models gpt2 gpt-j-6b
 ```
 ### OpenAI-compatible API
 To deploy your model, execute the following command with the model's configuration file. This will create an OpenAI-compatible API ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)) for serving.
 ```bash
-python inference/serve.py --config_file <path to the conf file>
+python -m llmonray.inference.serve --config_file <path to the conf file>
 ```
-To deploy and serve multiple models concurrently, place all models' configuration files under `inference/models` and directly run `python inference/serve.py` without passing any conf file.
+To deploy and serve multiple models concurrently, place all models' configuration files under `llmonray/inference/models` and directly run `python -m llmonray.inference.serve` without passing any conf file.
 
 After deploying the model, you can access and test it in many ways:
 ```bash
@@ -71,7 +71,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py
 ### Serving Model to a Simple Endpoint
 This will create a simple endpoint for serving according to the `port` and `route_prefix` parameters in conf file, for example: http://127.0.0.1:8000/gpt2.
 ```bash
-python inference/serve.py --config_file <path to the conf file> --simple
+python -m llmonray.inference.serve --config_file <path to the conf file> --simple
 ```
 After deploying the model endpoint, you can access and test it by using the script below:
 ```bash
diff --git a/docs/vllm.md b/docs/vllm.md
index 58393a9ae..919ee68d1 100644
--- a/docs/vllm.md
+++ b/docs/vllm.md
@@ -23,7 +23,7 @@ Please follow [Deploying and Serving LLMs on Intel CPU/GPU/Gaudi](serve.md) docu
 To serve model with vLLM, run the following:
 
 ```bash
-$ python serve.py --config_file inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal
+$ python -m llmonray.inference.serve --config_file llmonray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal
 ```
 
 In the above example, `vllm` property is set to `true` in the config file for enabling vLLM.
diff --git a/docs/web_ui.md b/docs/web_ui.md
index db0c0824f..b8d7cbd0a 100644
--- a/docs/web_ui.md
+++ b/docs/web_ui.md
@@ -14,7 +14,7 @@ $ dev/scripts/install-ui.sh
 ## Start Web UI
 
 ```bash
-python -u ui/start_ui.py --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
+python -m llmonray.ui.start_ui --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
 ```
 You will get URL from the command line output (E.g. http://0.0.0.0:8080 for local network and https://180cd5f7c31a1cfd3c.gradio.live for public network) and use the web browser to open it.
 

From ad9a55ae109a358cff5cc35be3ef262649dbf2a0 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 20 Feb 2024 16:06:26 +0800
Subject: [PATCH 05/29] update

---
 .../workflows/config/update_finetune_config_on_intel_gpu.py | 2 +-
 .github/workflows/workflow_finetune.yml                     | 6 +++---
 llmonray/inference/predictor_deployment.py                  | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/config/update_finetune_config_on_intel_gpu.py b/.github/workflows/config/update_finetune_config_on_intel_gpu.py
index e46dda811..3cde39664 100644
--- a/.github/workflows/config/update_finetune_config_on_intel_gpu.py
+++ b/.github/workflows/config/update_finetune_config_on_intel_gpu.py
@@ -3,7 +3,7 @@
 
 
 def update_finetune_config(base_model):
-    conf_file = "finetune/finetune.yaml"
+    conf_file = "llmonray/finetune/finetune.yaml"
     with open(conf_file) as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         # due to compute node can't connect network
diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index eefc28c04..995d1e98b 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -85,7 +85,7 @@ jobs:
           docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1  ray start --address='127.0.0.1:6379' --ray-debugger-external"
           CMD=$(cat << EOF
           import yaml
-          conf_path = "finetune/finetune.yaml"
+          conf_path = "llmonray/finetune/finetune.yaml"
           with open(conf_path, encoding="utf-8") as reader:
               result = yaml.load(reader, Loader=yaml.FullLoader)
               result['General']['base_model'] = "${{ matrix.model }}"
@@ -120,7 +120,7 @@ jobs:
           docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
           CMD=$(cat << EOF
           import yaml
-          conf_path = "finetune/finetune.yaml"
+          conf_path = "llmonray/finetune/finetune.yaml"
           with open(conf_path, encoding="utf-8") as reader:
               result = yaml.load(reader, Loader=yaml.FullLoader)
               result['General']['lora_config'] = {
@@ -150,7 +150,7 @@ jobs:
           import os
           import yaml
           os.system("cp -r $(python -m pip show deltatuner | grep Location | cut -d: -f2)/deltatuner/conf/best_structure examples/")
-          conf_path = "finetune/finetune.yaml"
+          conf_path = "llmonray/finetune/finetune.yaml"
           with open(conf_path, encoding="utf-8") as reader:
               result = yaml.load(reader, Loader=yaml.FullLoader)
               result['General']['lora_config'] = {
diff --git a/llmonray/inference/predictor_deployment.py b/llmonray/inference/predictor_deployment.py
index c22e8e836..46d1fdfa9 100644
--- a/llmonray/inference/predictor_deployment.py
+++ b/llmonray/inference/predictor_deployment.py
@@ -58,16 +58,16 @@ def __init__(self, infer_conf: InferenceConfig):
         self.use_vllm = infer_conf.vllm.enabled
 
         if self.use_deepspeed:
-            from deepspeed_predictor import DeepSpeedPredictor
+            from llmonray.inference.deepspeed_predictor import DeepSpeedPredictor
 
             self.predictor = DeepSpeedPredictor(infer_conf)
             self.streamer = self.predictor.get_streamer()
         elif self.use_vllm:
-            from vllm_predictor import VllmPredictor
+            from llmonray.inference.vllm_predictor import VllmPredictor
 
             self.predictor = VllmPredictor(infer_conf)
         else:
-            from transformer_predictor import TransformerPredictor
+            from llmonray.inference.transformer_predictor import TransformerPredictor
 
             self.predictor = TransformerPredictor(infer_conf)
         self.loop = asyncio.get_running_loop()

From 4724b44a1ca2a90c84ad4fe3e670ee91c75128e0 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Wed, 21 Feb 2024 09:13:12 +0800
Subject: [PATCH 06/29] disable mpt-7b-bigdl

---
 .github/workflows/workflow_inference.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index b8082539c..f36ede259 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -44,7 +44,8 @@ jobs:
         include:
           - { model: "gpt-j-6b"}
           - { model: "mistral-7b-v0.1"}
-          - { model: "mpt-7b-bigdl"}
+          # restore after https://github.com/intel-analytics/BigDL/issues/10177 is resolved
+          # - { model: "mpt-7b-bigdl"}
           - { model: "llama-2-7b-chat-hf-vllm"}
           - dtuner_model: nathan0/mpt-7b-deltatuner-model
             model: mpt-7b

From 3024036b2d1b85bc4aa4aea31864eaed77a7322d Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Wed, 21 Feb 2024 09:39:43 +0800
Subject: [PATCH 07/29] update

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 350b3bdd2..0e9796c7f 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py
 ```
 Or you can serve specific model to a simple endpoint according to the `port` and `route_prefix` parameters in configuration file,
 ```bash
-python llmonray.inference.serve --config_file llmonray/inference/models/gpt2.yaml --simple
+python -m llmonray.inference.serve --config_file llmonray/inference/models/gpt2.yaml --simple
 ```
 After deploying the model endpoint, you can access and test it by using the script below:
 ```bash

From 188e495b857947a0bcf9a7377473adc58242e3f0 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Wed, 21 Feb 2024 23:26:19 +0800
Subject: [PATCH 08/29] update for ui

---
 llmonray/ui/start_ui.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/llmonray/ui/start_ui.py b/llmonray/ui/start_ui.py
index 70d82254f..9113b42a2 100644
--- a/llmonray/ui/start_ui.py
+++ b/llmonray/ui/start_ui.py
@@ -515,7 +515,7 @@ def finetune(
         if max_train_step != 0:
             finetune_config["Training"]["max_train_steps"] = max_train_step
 
-        from finetune.finetune import main
+        from llmonray.finetune.finetune import main
 
         finetune_config["total_epochs"] = queue.Queue(
             actor_options={"resources": {"queue_hardware": 1}}
@@ -811,13 +811,16 @@ def _init_ui(self):
 
         title = "Manage LLM Lifecycle"
         with gr.Blocks(css=custom_css, title=title) as gr_chat:
+            logo_path = os.path.join(self.repo_code_path, "ui/images/logo.png")
             head_content = """
                 <div style="color: #fff;text-align: center;">
-                    <div style="position:absolute; left:15px; top:15px; "><img  src="/file=ui/images/logo.png" width="50" height="50"/></div>
+                    <div style="position:absolute; left:15px; top:15px; "><img  src="/file={logo_path}" width="50" height="50"/></div>
                     <p style="color: #fff; font-size: 1.1rem;">Manage LLM Lifecycle</p>
                     <p style="color: #fff; font-size: 0.9rem;">Fine-Tune LLMs using workflow on Ray, Deploy and Inference</p>
                 </div>
-            """
+            """.format(
+                logo_path=logo_path
+            )
             foot_content = """
                 <div class="footer">
                     <p>The workflow is powered by Ray to provide infrastructure management, distributed training, model serving with reliability and auto scaling.</p>
@@ -1256,7 +1259,7 @@ def _init_ui(self):
                 with gr.Row():
                     with gr.Column(scale=0.1, min_width=45):
                         with gr.Row():
-                            node_pic = r"./ui/images/Picture2.png"
+                            node_pic = os.path.join(self.repo_code_path, "./ui/images/Picture2.png")
                             gr.Image(
                                 type="pil",
                                 value=node_pic,
@@ -1319,7 +1322,9 @@ def _init_ui(self):
 
                         with gr.Column(scale=0.065, min_width=45):
                             with gr.Row():
-                                node_pic = r"./ui/images/Picture1.png"
+                                node_pic = os.path.join(
+                                    self.repo_code_path, "./ui/images/Picture1.png"
+                                )
                                 gr.Image(
                                     type="pil",
                                     value=node_pic,
@@ -1564,10 +1569,10 @@ def _init_ui(self):
     args = parser.parse_args()
 
     file_path = os.path.abspath(__file__)
-    infer_path = os.path.dirname(file_path)
-    repo_path = os.path.abspath(infer_path + os.path.sep + "../")
+    ui_path = os.path.dirname(file_path)
+    repo_path = os.path.abspath(ui_path + os.path.sep + "../")
     default_data_path = os.path.abspath(
-        infer_path + os.path.sep + "../examples/data/sample_finetune_data.jsonl"
+        repo_path + os.path.sep + "../examples/data/sample_finetune_data.jsonl"
     )
 
     from llmonray.finetune.finetune import get_accelerate_environment_variable

From 4a63f344ab4973e4f11e2501b0b265376897b186 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Fri, 23 Feb 2024 09:01:35 +0800
Subject: [PATCH 09/29] modify llmonray to llm_on_ray

---
 .../update_finetune_config_on_intel_gpu.py    |   2 +-
 .github/workflows/workflow_finetune.yml       |  12 +++++-----
 .github/workflows/workflow_inference.yml      |  22 +++++++++---------
 .../workflows/workflow_orders_on_merge.yml    |   8 +++----
 .github/workflows/workflow_orders_on_pr.yml   |   8 +++----
 README.md                                     |   6 ++---
 docs/finetune.md                              |   2 +-
 docs/pretrain.md                              |  10 ++++----
 docs/serve.md                                 |  12 +++++-----
 docs/vllm.md                                  |   2 +-
 docs/web_ui.md                                |   2 +-
 {llmonray => llm_on_ray}/common/__init__.py   |   8 +++----
 llm_on_ray/common/agentenv/__init__.py        |   9 +++++++
 .../common/agentenv/agentenv.py               |   0
 .../common/agentenv/rlhf_env.py               |   4 ++--
 {llmonray => llm_on_ray}/common/common.py     |   2 +-
 {llmonray => llm_on_ray}/common/config.py     |   0
 llm_on_ray/common/dataprocesser/__init__.py   |   9 +++++++
 .../common/dataprocesser/dataprocesser.py     |   0
 .../common/dataprocesser/general_processer.py |   2 +-
 .../common/dataprocesser/rm_dataprocesser.py  |   4 ++--
 llm_on_ray/common/dataset/__init__.py         |   9 +++++++
 .../common/dataset/dataset.py                 |   0
 .../common/dataset/huggingface_dataset.py     |   2 +-
 {llmonray => llm_on_ray}/common/init.py       |   2 +-
 llm_on_ray/common/initializer/__init__.py     |   9 +++++++
 .../common/initializer/initializer.py         |   0
 {llmonray => llm_on_ray}/common/load.py       |  14 +++++------
 {llmonray => llm_on_ray}/common/logging.py    |   0
 llm_on_ray/common/model/__init__.py           |   9 +++++++
 .../model/huggingface_model_for_causal_lm.py  |   2 +-
 .../common/model/model.py                     |   0
 .../common/model/reward_model.py              |   2 +-
 llm_on_ray/common/optimizer/__init__.py       |   9 +++++++
 .../common/optimizer/default_optimizer.py     |   2 +-
 .../common/optimizer/group_optimizer.py       |   2 +-
 .../common/optimizer/optimizer.py             |   0
 llm_on_ray/common/tokenizer/__init__.py       |   9 +++++++
 .../common/tokenizer/empty_tokenizer.py       |   2 +-
 .../common/tokenizer/huggingface_tokenizer.py |   2 +-
 .../common/tokenizer/tokenizer.py             |   0
 .../common/torch_config.py                    |   0
 llm_on_ray/common/trainer/__init__.py         |   9 +++++++
 .../common/trainer/default_trainer.py         |   6 ++---
 .../common/trainer/rm_trainer.py              |   4 ++--
 .../common/trainer/trainer.py                 |   0
 {llmonray => llm_on_ray}/finetune/__init__.py |   0
 {llmonray => llm_on_ray}/finetune/finetune.py |   4 ++--
 .../finetune/finetune.yaml                    |   0
 .../finetune/finetune_config.py               |   0
 .../finetune/models/bloom-560m.yaml           |   0
 .../models/finetune_config_template.yaml      |   0
 .../finetune/models/gpt-j-6b.yaml             |   0
 .../finetune/models/gpt2.yaml                 |   0
 .../finetune/models/llama-2-7b-chat-hf.yaml   |   0
 .../finetune/models/llama-7b.yaml             |   0
 .../finetune/models/mistral-7b-v0.1.yaml      |   0
 .../finetune/models/mpt-7b-chat.yaml          |   0
 .../finetune/models/opt-125m.yaml             |   0
 .../inference/__init__.py                     |   0
 .../api_openai_backend/openai_protocol.py     |   0
 .../api_openai_backend/query_client.py        |   4 ++--
 .../api_openai_backend/request_handler.py     |   4 ++--
 .../api_openai_backend/router_app.py          |   8 +++----
 .../inference/api_server_openai.py            |   4 ++--
 .../inference/api_server_simple.py            |   0
 .../inference/chat_process.py                 |   0
 .../inference/deepspeed_predictor.py          |   6 ++---
 .../inference/inference_config.py             |   0
 {llmonray => llm_on_ray}/inference/logger.py  |   0
 .../inference/models/CodeLlama-7b-hf.yaml     |   0
 .../models/bigdl/mistral-7b-v0.1-bigdl.yaml   |   0
 .../inference/models/bigdl/mpt-7b-bigdl.yaml  |   0
 .../inference/models/bloom-560m.yaml          |   0
 .../inference/models/falcon-7b.yaml           |   0
 .../inference/models/gpt-j-6b.yaml            |   0
 .../inference/models/gpt2.yaml                |   0
 .../inference/models/llama-2-7b-chat-hf.yaml  |   0
 .../inference/models/mistral-7b-v0.1.yaml     |   0
 .../inference/models/mpt-7b.yaml              |   0
 .../inference/models/neural-chat-7b-v3-1.yaml |   0
 .../inference/models/opt-125m.yaml            |   0
 .../inference/models/starcoder.yaml           |   0
 .../export_inference_config_to_yaml.py        |   2 +-
 .../template/inference_config_template.yaml   |   0
 .../models/vllm/llama-2-7b-chat-hf-vllm.yaml  |   0
 .../inference/predictor.py                    |   4 ++--
 .../inference/predictor_deployment.py         |  12 +++++-----
 {llmonray => llm_on_ray}/inference/serve.py   |  10 ++++----
 .../inference/transformer_predictor.py        |   6 ++---
 {llmonray => llm_on_ray}/inference/utils.py   |   4 ++--
 .../inference/vllm_predictor.py               |   4 ++--
 {llmonray => llm_on_ray}/pretrain/__init__.py |   0
 .../pretrain/backend/deepspeed_backend.py     |   0
 .../pretrain/backend/habana_backend.py        |   0
 .../config/bloom1b7_8gpus_pretrain.conf       |   0
 ...egatron_deepspeed_zs0_8Gaudi_pretrain.conf |   0
 ...megatron_deepspeed_zs0_8gpus_pretrain.conf |   0
 ...megatron_deepspeed_zs3_8gpus_pretrain.conf |   0
 .../config/llama_7b_8Guadi_pretrain.conf      |   0
 .../config/llama_7b_8gpu_pretrain.conf        |   0
 ...egatron_deepspeed_zs0_8Gaudi_pretrain.conf |   0
 .../docker/Dockerfile.megatron.habana         |   0
 .../pretrain/docker/Dockerfile.nvidia         |   0
 .../pretrain/docker/Dockerfile.optimum.habana |   0
 .../pretrain/docker/build-image.sh            |   0
 .../pretrain/megatron_deepspeed_pretrain.py   |   2 +-
 ...nclude-the-megatron.model.vision-int.patch |   0
 ...0001-Change-the-sample-s-column-name.patch |   0
 ...r-megatron-deepspeed-for-gpu-version.patch |   0
 ...-megatron-deepspeed-with-Ray-cluster.patch |   0
 .../0002-Add-the-Huggingface-tokenizer.patch  |   0
 .../pretrain/plugin/group_dataset.py          |   2 +-
 .../pretrain/plugin/hf_pretrainer.py          |   8 +++----
 .../plugin/huggingface_model_from_config.py   |   2 +-
 .../pretrain/plugin/megatron_dataset.py       |   2 +-
 .../pretrain/plugin/megatron_pretrainer.py    |   6 ++---
 .../pretrain/plugin/megatron_processer.py     |   2 +-
 .../pretrain/plugin/megtron_initializer.py    |   4 ++--
 .../pretrain/plugin/plain_id_processer.py     |   2 +-
 .../pretrain/plugin/pretrainer.py             |   6 ++---
 {llmonray => llm_on_ray}/pretrain/pretrain.py |   4 ++--
 .../pretrain/pretrain_template.conf           |   0
 .../pretrain_template_megatron_dataset.conf   |   0
 .../pretrain/requirements.optimum-habana.txt  |   0
 .../pretrain/requirements.txt                 |   0
 {llmonray => llm_on_ray}/rlhf/__init__.py     |   0
 {llmonray => llm_on_ray}/rlhf/ppo.py          |  10 ++++----
 {llmonray => llm_on_ray}/rlhf/ppo.yaml        |   0
 {llmonray => llm_on_ray}/rlhf/reward.py       |   2 +-
 {llmonray => llm_on_ray}/rlhf/reward.yaml     |   0
 .../rlhf/rl_algo/ppo/ppo_rlhf.py              |   4 ++--
 .../rlhf/rl_algo/ppo/rlhf_buffer.py           |   0
 .../rlhf/rl_algo/ppo/rlhf_ppo_module.py       |   0
 .../rl_algo/ppo/rlhf_ppo_torch_learner.py     |   2 +-
 .../rlhf/rl_algo/ppo/util.py                  |   0
 {llmonray => llm_on_ray}/ui/html_format.py    |   0
 .../ui/images/Picture1.png                    | Bin
 .../ui/images/Picture2.png                    | Bin
 {llmonray => llm_on_ray}/ui/images/logo.png   | Bin
 {llmonray => llm_on_ray}/ui/start_ui.py       |  14 +++++------
 llmonray/common/agentenv/__init__.py          |   9 -------
 llmonray/common/dataprocesser/__init__.py     |   9 -------
 llmonray/common/dataset/__init__.py           |   9 -------
 llmonray/common/initializer/__init__.py       |   9 -------
 llmonray/common/model/__init__.py             |   9 -------
 llmonray/common/optimizer/__init__.py         |   9 -------
 llmonray/common/tokenizer/__init__.py         |   9 -------
 llmonray/common/trainer/__init__.py           |   9 -------
 pyproject.toml                                |   2 +-
 150 files changed, 218 insertions(+), 218 deletions(-)
 rename {llmonray => llm_on_ray}/common/__init__.py (72%)
 create mode 100644 llm_on_ray/common/agentenv/__init__.py
 rename {llmonray => llm_on_ray}/common/agentenv/agentenv.py (100%)
 rename {llmonray => llm_on_ray}/common/agentenv/rlhf_env.py (97%)
 rename {llmonray => llm_on_ray}/common/common.py (93%)
 rename {llmonray => llm_on_ray}/common/config.py (100%)
 create mode 100644 llm_on_ray/common/dataprocesser/__init__.py
 rename {llmonray => llm_on_ray}/common/dataprocesser/dataprocesser.py (100%)
 rename {llmonray => llm_on_ray}/common/dataprocesser/general_processer.py (98%)
 rename {llmonray => llm_on_ray}/common/dataprocesser/rm_dataprocesser.py (96%)
 create mode 100644 llm_on_ray/common/dataset/__init__.py
 rename {llmonray => llm_on_ray}/common/dataset/dataset.py (100%)
 rename {llmonray => llm_on_ray}/common/dataset/huggingface_dataset.py (96%)
 rename {llmonray => llm_on_ray}/common/init.py (95%)
 create mode 100644 llm_on_ray/common/initializer/__init__.py
 rename {llmonray => llm_on_ray}/common/initializer/initializer.py (100%)
 rename {llmonray => llm_on_ray}/common/load.py (93%)
 rename {llmonray => llm_on_ray}/common/logging.py (100%)
 create mode 100644 llm_on_ray/common/model/__init__.py
 rename {llmonray => llm_on_ray}/common/model/huggingface_model_for_causal_lm.py (95%)
 rename {llmonray => llm_on_ray}/common/model/model.py (100%)
 rename {llmonray => llm_on_ray}/common/model/reward_model.py (98%)
 create mode 100644 llm_on_ray/common/optimizer/__init__.py
 rename {llmonray => llm_on_ray}/common/optimizer/default_optimizer.py (90%)
 rename {llmonray => llm_on_ray}/common/optimizer/group_optimizer.py (94%)
 rename {llmonray => llm_on_ray}/common/optimizer/optimizer.py (100%)
 create mode 100644 llm_on_ray/common/tokenizer/__init__.py
 rename {llmonray => llm_on_ray}/common/tokenizer/empty_tokenizer.py (84%)
 rename {llmonray => llm_on_ray}/common/tokenizer/huggingface_tokenizer.py (82%)
 rename {llmonray => llm_on_ray}/common/tokenizer/tokenizer.py (100%)
 rename {llmonray => llm_on_ray}/common/torch_config.py (100%)
 create mode 100644 llm_on_ray/common/trainer/__init__.py
 rename {llmonray => llm_on_ray}/common/trainer/default_trainer.py (98%)
 rename {llmonray => llm_on_ray}/common/trainer/rm_trainer.py (97%)
 rename {llmonray => llm_on_ray}/common/trainer/trainer.py (100%)
 rename {llmonray => llm_on_ray}/finetune/__init__.py (100%)
 rename {llmonray => llm_on_ray}/finetune/finetune.py (99%)
 rename {llmonray => llm_on_ray}/finetune/finetune.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/finetune_config.py (100%)
 rename {llmonray => llm_on_ray}/finetune/models/bloom-560m.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/models/finetune_config_template.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/models/gpt-j-6b.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/models/gpt2.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/models/llama-2-7b-chat-hf.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/models/llama-7b.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/models/mistral-7b-v0.1.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/models/mpt-7b-chat.yaml (100%)
 rename {llmonray => llm_on_ray}/finetune/models/opt-125m.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/__init__.py (100%)
 rename {llmonray => llm_on_ray}/inference/api_openai_backend/openai_protocol.py (100%)
 rename {llmonray => llm_on_ray}/inference/api_openai_backend/query_client.py (95%)
 rename {llmonray => llm_on_ray}/inference/api_openai_backend/request_handler.py (97%)
 rename {llmonray => llm_on_ray}/inference/api_openai_backend/router_app.py (98%)
 rename {llmonray => llm_on_ray}/inference/api_server_openai.py (94%)
 rename {llmonray => llm_on_ray}/inference/api_server_simple.py (100%)
 rename {llmonray => llm_on_ray}/inference/chat_process.py (100%)
 rename {llmonray => llm_on_ray}/inference/deepspeed_predictor.py (98%)
 rename {llmonray => llm_on_ray}/inference/inference_config.py (100%)
 rename {llmonray => llm_on_ray}/inference/logger.py (100%)
 rename {llmonray => llm_on_ray}/inference/models/CodeLlama-7b-hf.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/bigdl/mpt-7b-bigdl.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/bloom-560m.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/falcon-7b.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/gpt-j-6b.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/gpt2.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/llama-2-7b-chat-hf.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/mistral-7b-v0.1.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/mpt-7b.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/neural-chat-7b-v3-1.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/opt-125m.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/starcoder.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/template/export_inference_config_to_yaml.py (72%)
 rename {llmonray => llm_on_ray}/inference/models/template/inference_config_template.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml (100%)
 rename {llmonray => llm_on_ray}/inference/predictor.py (96%)
 rename {llmonray => llm_on_ray}/inference/predictor_deployment.py (94%)
 rename {llmonray => llm_on_ray}/inference/serve.py (94%)
 rename {llmonray => llm_on_ray}/inference/transformer_predictor.py (96%)
 rename {llmonray => llm_on_ray}/inference/utils.py (96%)
 rename {llmonray => llm_on_ray}/inference/vllm_predictor.py (96%)
 rename {llmonray => llm_on_ray}/pretrain/__init__.py (100%)
 rename {llmonray => llm_on_ray}/pretrain/backend/deepspeed_backend.py (100%)
 rename {llmonray => llm_on_ray}/pretrain/backend/habana_backend.py (100%)
 rename {llmonray => llm_on_ray}/pretrain/config/bloom1b7_8gpus_pretrain.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/config/llama_7b_8Guadi_pretrain.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/config/llama_7b_8gpu_pretrain.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/docker/Dockerfile.megatron.habana (100%)
 rename {llmonray => llm_on_ray}/pretrain/docker/Dockerfile.nvidia (100%)
 rename {llmonray => llm_on_ray}/pretrain/docker/Dockerfile.optimum.habana (100%)
 rename {llmonray => llm_on_ray}/pretrain/docker/build-image.sh (100%)
 rename {llmonray => llm_on_ray}/pretrain/megatron_deepspeed_pretrain.py (98%)
 rename {llmonray => llm_on_ray}/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch (100%)
 rename {llmonray => llm_on_ray}/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch (100%)
 rename {llmonray => llm_on_ray}/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch (100%)
 rename {llmonray => llm_on_ray}/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch (100%)
 rename {llmonray => llm_on_ray}/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch (100%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/group_dataset.py (93%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/hf_pretrainer.py (98%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/huggingface_model_from_config.py (99%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/megatron_dataset.py (96%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/megatron_pretrainer.py (98%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/megatron_processer.py (96%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/megtron_initializer.py (85%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/plain_id_processer.py (94%)
 rename {llmonray => llm_on_ray}/pretrain/plugin/pretrainer.py (99%)
 rename {llmonray => llm_on_ray}/pretrain/pretrain.py (98%)
 rename {llmonray => llm_on_ray}/pretrain/pretrain_template.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/pretrain_template_megatron_dataset.conf (100%)
 rename {llmonray => llm_on_ray}/pretrain/requirements.optimum-habana.txt (100%)
 rename {llmonray => llm_on_ray}/pretrain/requirements.txt (100%)
 rename {llmonray => llm_on_ray}/rlhf/__init__.py (100%)
 rename {llmonray => llm_on_ray}/rlhf/ppo.py (93%)
 rename {llmonray => llm_on_ray}/rlhf/ppo.yaml (100%)
 rename {llmonray => llm_on_ray}/rlhf/reward.py (99%)
 rename {llmonray => llm_on_ray}/rlhf/reward.yaml (100%)
 rename {llmonray => llm_on_ray}/rlhf/rl_algo/ppo/ppo_rlhf.py (97%)
 rename {llmonray => llm_on_ray}/rlhf/rl_algo/ppo/rlhf_buffer.py (100%)
 rename {llmonray => llm_on_ray}/rlhf/rl_algo/ppo/rlhf_ppo_module.py (100%)
 rename {llmonray => llm_on_ray}/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py (98%)
 rename {llmonray => llm_on_ray}/rlhf/rl_algo/ppo/util.py (100%)
 rename {llmonray => llm_on_ray}/ui/html_format.py (100%)
 rename {llmonray => llm_on_ray}/ui/images/Picture1.png (100%)
 rename {llmonray => llm_on_ray}/ui/images/Picture2.png (100%)
 rename {llmonray => llm_on_ray}/ui/images/logo.png (100%)
 rename {llmonray => llm_on_ray}/ui/start_ui.py (99%)
 delete mode 100644 llmonray/common/agentenv/__init__.py
 delete mode 100644 llmonray/common/dataprocesser/__init__.py
 delete mode 100644 llmonray/common/dataset/__init__.py
 delete mode 100644 llmonray/common/initializer/__init__.py
 delete mode 100644 llmonray/common/model/__init__.py
 delete mode 100644 llmonray/common/optimizer/__init__.py
 delete mode 100644 llmonray/common/tokenizer/__init__.py
 delete mode 100644 llmonray/common/trainer/__init__.py

diff --git a/.github/workflows/config/update_finetune_config_on_intel_gpu.py b/.github/workflows/config/update_finetune_config_on_intel_gpu.py
index 3cde39664..1aa99cdf6 100644
--- a/.github/workflows/config/update_finetune_config_on_intel_gpu.py
+++ b/.github/workflows/config/update_finetune_config_on_intel_gpu.py
@@ -3,7 +3,7 @@
 
 
 def update_finetune_config(base_model):
-    conf_file = "llmonray/finetune/finetune.yaml"
+    conf_file = "llm_on_ray/finetune/finetune.yaml"
     with open(conf_file) as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
         # due to compute node can't connect network
diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index 995d1e98b..aaf4d7b38 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -85,7 +85,7 @@ jobs:
           docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1  ray start --address='127.0.0.1:6379' --ray-debugger-external"
           CMD=$(cat << EOF
           import yaml
-          conf_path = "llmonray/finetune/finetune.yaml"
+          conf_path = "llm_on_ray/finetune/finetune.yaml"
           with open(conf_path, encoding="utf-8") as reader:
               result = yaml.load(reader, Loader=yaml.FullLoader)
               result['General']['base_model'] = "${{ matrix.model }}"
@@ -113,14 +113,14 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python -m llmonray.finetune.finetune --config_file llmonray/finetune/finetune.yaml"
+          docker exec "finetune" bash -c "python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml"
 
       - name: Run PEFT-LoRA Test
         run: |
           docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*"
           CMD=$(cat << EOF
           import yaml
-          conf_path = "llmonray/finetune/finetune.yaml"
+          conf_path = "llm_on_ray/finetune/finetune.yaml"
           with open(conf_path, encoding="utf-8") as reader:
               result = yaml.load(reader, Loader=yaml.FullLoader)
               result['General']['lora_config'] = {
@@ -138,7 +138,7 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python -m llmonray.finetune.finetune --config_file llmonray/finetune/finetune.yaml"
+          docker exec "finetune" bash -c "python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml"
 
       - name: Run Deltatuner Test on DENAS-LoRA Model
         run: |
@@ -150,7 +150,7 @@ jobs:
           import os
           import yaml
           os.system("cp -r $(python -m pip show deltatuner | grep Location | cut -d: -f2)/deltatuner/conf/best_structure examples/")
-          conf_path = "llmonray/finetune/finetune.yaml"
+          conf_path = "llm_on_ray/finetune/finetune.yaml"
           with open(conf_path, encoding="utf-8") as reader:
               result = yaml.load(reader, Loader=yaml.FullLoader)
               result['General']['lora_config'] = {
@@ -168,7 +168,7 @@ jobs:
               yaml.dump(result, output, sort_keys=False)
           EOF)
             docker exec "finetune" python -c "$CMD"
-            docker exec "finetune" bash -c "python -m llmonray.finetune.finetune --config_file llmonray/finetune/finetune.yaml"
+            docker exec "finetune" bash -c "python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml"
           fi
 
       - name: Stop Ray
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index f36ede259..25b612f5e 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -119,14 +119,14 @@ jobs:
           CMD=$(cat << EOF
           import yaml
           if ("${{ matrix.model }}" == "starcoder"):
-              conf_path = "llmonray/inference/models/starcoder.yaml"
+              conf_path = "llm_on_ray/inference/models/starcoder.yaml"
               with open(conf_path, encoding="utf-8") as reader:
                   result = yaml.load(reader, Loader=yaml.FullLoader)
                   result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
               with open(conf_path, 'w') as output:
                   yaml.dump(result, output, sort_keys=False)
           if ("${{ matrix.model }}" == "llama-2-7b-chat-hf"):
-              conf_path = "llmonray/inference/models/llama-2-7b-chat-hf.yaml"
+              conf_path = "llm_on_ray/inference/models/llama-2-7b-chat-hf.yaml"
               with open(conf_path, encoding="utf-8") as reader:
                   result = yaml.load(reader, Loader=yaml.FullLoader)
                   result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
@@ -136,11 +136,11 @@ jobs:
           )
           docker exec "${TARGET}" python -c "$CMD"
           if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file llmonray/inference/models/bigdl/mpt-7b-bigdl.yaml --simple"
+            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml --simple"
           elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml --simple"
+            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml --simple"
           else
-            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --simple --models ${{ matrix.model }}"
+            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --simple --models ${{ matrix.model }}"
           fi
           echo Non-streaming query:
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
@@ -151,7 +151,7 @@ jobs:
         if: ${{ matrix.dtuner_model }}
         run: |
           TARGET=${{steps.target.outputs.target}}
-          docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file .github/workflows/config/mpt_deltatuner.yaml --simple"
+          docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file .github/workflows/config/mpt_deltatuner.yaml --simple"
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
 
@@ -161,8 +161,8 @@ jobs:
           if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file llmonray/inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
-            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file \"${{ matrix.model }}\".yaml.deepspeed --simple"
+            docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file llm_on_ray/inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
+            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file \"${{ matrix.model }}\".yaml.deepspeed --simple"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
           fi
@@ -174,7 +174,7 @@ jobs:
           if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           else
-            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple"
+            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
           fi
@@ -183,9 +183,9 @@ jobs:
         run: |
           TARGET=${{steps.target.outputs.target}}
           if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --config_file llmonray/inference/models/bigdl/mpt-7b-bigdl.yaml"
+            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml"
           elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "python -m llmonray.inference.serve --models ${{ matrix.model }}"
+            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --models ${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests.py --model_name ${{ matrix.model }}"
           fi
 
diff --git a/.github/workflows/workflow_orders_on_merge.yml b/.github/workflows/workflow_orders_on_merge.yml
index 812b83b73..d491baca1 100644
--- a/.github/workflows/workflow_orders_on_merge.yml
+++ b/.github/workflows/workflow_orders_on_merge.yml
@@ -8,10 +8,10 @@ on:
       - '.github/**'
       - 'docker/**'
       - 'dev/docker/**'
-      - 'llmonray/common/**'
-      - 'llmonray/finetune/**'
-      - 'llmonray/inference/**'
-      - 'llmonray/rlhf/**'
+      - 'llm_on_ray/common/**'
+      - 'llm_on_ray/finetune/**'
+      - 'llm_on_ray/inference/**'
+      - 'llm_on_ray/rlhf/**'
       - 'tools/**'
       - 'pyproject.toml'
       - 'tests/**'
diff --git a/.github/workflows/workflow_orders_on_pr.yml b/.github/workflows/workflow_orders_on_pr.yml
index 6625bbc0a..9f5df5d83 100644
--- a/.github/workflows/workflow_orders_on_pr.yml
+++ b/.github/workflows/workflow_orders_on_pr.yml
@@ -8,10 +8,10 @@ on:
       - '.github/**'
       - 'docker/**'
       - 'dev/docker/**'
-      - 'llmonray/common/**'
-      - 'llmonray/finetune/**'
-      - 'llmonray/inference/**'
-      - 'llmonray/rlhf/**'
+      - 'llm_on_ray/common/**'
+      - 'llm_on_ray/finetune/**'
+      - 'llm_on_ray/inference/**'
+      - 'llm_on_ray/rlhf/**'
       - 'tools/**'
       - 'pyproject.toml'
       - 'tests/**'
diff --git a/README.md b/README.md
index 0e9796c7f..16e6bdd51 100644
--- a/README.md
+++ b/README.md
@@ -59,14 +59,14 @@ ray start --head
 Use the following command to finetune a model using an example dataset and default configurations. The finetuned model will be stored in `/tmp/llm-ray/output` by default. To customize the base model, dataset and configurations, please see the [finetuning document](#finetune):
 
 ```bash
-python -m llmonray.finetune.finetune --config_file llmonray/finetune/finetune.yaml
+python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml
 ```
 
 ### Serving
 Deploy a model on Ray and expose an endpoint for serving. This command uses GPT2 as an example, but more model configuration examples can be found in the [inference/models](inference/models) directory:
 
 ```bash
-python -m llmonray.inference.serve --config_file llmonray/inference/models/gpt2.yaml
+python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/gpt2.yaml
 ```
 
 The default served method is to provide an OpenAI-compatible API server ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)), you can access and test it in many ways:
@@ -92,7 +92,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py
 ```
 Or you can serve specific model to a simple endpoint according to the `port` and `route_prefix` parameters in configuration file,
 ```bash
-python -m llmonray.inference.serve --config_file llmonray/inference/models/gpt2.yaml --simple
+python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/gpt2.yaml --simple
 ```
 After deploying the model endpoint, you can access and test it by using the script below:
 ```bash
diff --git a/docs/finetune.md b/docs/finetune.md
index 129ec22bc..44c417d88 100755
--- a/docs/finetune.md
+++ b/docs/finetune.md
@@ -65,5 +65,5 @@ The following models have been verified on Intel CPUs or GPUs.
 ## Finetune the model
 To finetune your model, execute the following command. The finetuned model will be saved in /tmp/llm-ray/output by default.
 ``` bash
-python -m llmonray.finetune.finetune --config_file <your finetuning conf file>
+python -m llm_on_ray.finetune.finetune --config_file <your finetuning conf file>
 ```
diff --git a/docs/pretrain.md b/docs/pretrain.md
index 670b118c3..b765f0aaf 100644
--- a/docs/pretrain.md
+++ b/docs/pretrain.md
@@ -123,27 +123,27 @@ Set up `megatron_deepspeed_path` in the configuration.
 ```bash
 cd /home/user/workspace/llm-on-ray
 #Bloom-7B
-python -m llmonray.pretrain.megatron_deepspeed_pretrain --config_file llmonray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
+python -m llm_on_ray.pretrain.megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
 #llama-7B
-python -m llmonray.pretrain.megatron_deepspeed_pretrain --config_file llmonray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
+python -m llm_on_ray.pretrain.megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
 ```
 
 ##### Huggingface Trainer
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama-7B
-python -m llmonray.pretrain.pretrain --config_file llmonray/pretrain/config/llama_7b_8Guadi_pretrain.conf
+python -m llm_on_ray.pretrain.pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf
 ```
 ##### Nvidia GPU:
 ###### Megatron-DeepSpeed
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama2-7B
-python -m llmonray.pretrain.megatron_deepspeed_pretrain --config_file llmonray/pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf
+python -m llm_on_ray.pretrain.megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf
 ```
 ##### Huggingface Trainer
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama-7B
-python -m llmonray.pretrain.pretrain --config_file llmonray/pretrain/config/llama_7b_8gpu_pretrain.conf
+python -m llm_on_ray.pretrain.pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf
 ```
\ No newline at end of file
diff --git a/docs/serve.md b/docs/serve.md
index 00d63e7ec..ea68b03dc 100644
--- a/docs/serve.md
+++ b/docs/serve.md
@@ -30,22 +30,22 @@ LLM-on-Ray also supports serving with [Deepspeed](serve_deepspeed.md) for AutoTP
 We support three methods to specify the models to be served, and they have the following priorities.
 1. Use inference configuration file if config_file is set.
 ```
-python -m llmonray.inference.serve --config_file llmonray/inference/models/gpt2.yaml
+python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/gpt2.yaml
 ```
 2. Use relevant configuration parameters if model_id_or_path is set.
 ```
-python -m llmonray.inference.serve --model_id_or_path gpt2 [--tokenizer_id_or_path gpt2 --port 8000 --route_prefix ...]
+python -m llm_on_ray.inference.serve --model_id_or_path gpt2 [--tokenizer_id_or_path gpt2 --port 8000 --route_prefix ...]
 ```
 3. If --config_file and --model_id_or_path are both None, it will serve all pre-defined models in inference/models/*.yaml, or part of them if models is set.
 ```
-python -m llmonray.inference.serve --models gpt2 gpt-j-6b
+python -m llm_on_ray.inference.serve --models gpt2 gpt-j-6b
 ```
 ### OpenAI-compatible API
 To deploy your model, execute the following command with the model's configuration file. This will create an OpenAI-compatible API ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)) for serving.
 ```bash
-python -m llmonray.inference.serve --config_file <path to the conf file>
+python -m llm_on_ray.inference.serve --config_file <path to the conf file>
 ```
-To deploy and serve multiple models concurrently, place all models' configuration files under `llmonray/inference/models` and directly run `python -m llmonray.inference.serve` without passing any conf file.
+To deploy and serve multiple models concurrently, place all models' configuration files under `llm_on_ray/inference/models` and directly run `python -m llm_on_ray.inference.serve` without passing any conf file.
 
 After deploying the model, you can access and test it in many ways:
 ```bash
@@ -71,7 +71,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py
 ### Serving Model to a Simple Endpoint
 This will create a simple endpoint for serving according to the `port` and `route_prefix` parameters in conf file, for example: http://127.0.0.1:8000/gpt2.
 ```bash
-python -m llmonray.inference.serve --config_file <path to the conf file> --simple
+python -m llm_on_ray.inference.serve --config_file <path to the conf file> --simple
 ```
 After deploying the model endpoint, you can access and test it by using the script below:
 ```bash
diff --git a/docs/vllm.md b/docs/vllm.md
index 919ee68d1..bad6a875c 100644
--- a/docs/vllm.md
+++ b/docs/vllm.md
@@ -23,7 +23,7 @@ Please follow [Deploying and Serving LLMs on Intel CPU/GPU/Gaudi](serve.md) docu
 To serve model with vLLM, run the following:
 
 ```bash
-$ python -m llmonray.inference.serve --config_file llmonray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal
+$ python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal
 ```
 
 In the above example, `vllm` property is set to `true` in the config file for enabling vLLM.
diff --git a/docs/web_ui.md b/docs/web_ui.md
index b8d7cbd0a..560196092 100644
--- a/docs/web_ui.md
+++ b/docs/web_ui.md
@@ -14,7 +14,7 @@ $ dev/scripts/install-ui.sh
 ## Start Web UI
 
 ```bash
-python -m llmonray.ui.start_ui --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
+python -m llm_on_ray.ui.start_ui --node_user_name $user --conda_env_name $conda_env --master_ip_port "$node_ip:6379"
 ```
 You will get URL from the command line output (E.g. http://0.0.0.0:8080 for local network and https://180cd5f7c31a1cfd3c.gradio.live for public network) and use the web browser to open it.
 
diff --git a/llmonray/common/__init__.py b/llm_on_ray/common/__init__.py
similarity index 72%
rename from llmonray/common/__init__.py
rename to llm_on_ray/common/__init__.py
index ff87eae24..12146e286 100644
--- a/llmonray/common/__init__.py
+++ b/llm_on_ray/common/__init__.py
@@ -1,7 +1,7 @@
-from llmonray.common.logging import logger
-from llmonray.common.load import *  # noqa: F403 # unable to detect undefined names
-from llmonray.common import agentenv
-from llmonray.common.torch_config import TorchConfig  # noqa: F401
+from llm_on_ray.common.logging import logger
+from llm_on_ray.common.load import *  # noqa: F403 # unable to detect undefined names
+from llm_on_ray.common import agentenv
+from llm_on_ray.common.torch_config import TorchConfig  # noqa: F401
 from typing import Dict, Any
 import sys
 
diff --git a/llm_on_ray/common/agentenv/__init__.py b/llm_on_ray/common/agentenv/__init__.py
new file mode 100644
index 000000000..82a90c400
--- /dev/null
+++ b/llm_on_ray/common/agentenv/__init__.py
@@ -0,0 +1,9 @@
+import os
+from llm_on_ray.common.agentenv.agentenv import AgentEnv
+from llm_on_ray.common.common import import_all_module
+
+realpath = os.path.realpath(__file__)
+basedir = os.path.dirname(realpath)
+import_all_module(basedir, "llm_on_ray.common.agentenv")
+
+__all__ = ["AgentEnv"]
diff --git a/llmonray/common/agentenv/agentenv.py b/llm_on_ray/common/agentenv/agentenv.py
similarity index 100%
rename from llmonray/common/agentenv/agentenv.py
rename to llm_on_ray/common/agentenv/agentenv.py
diff --git a/llmonray/common/agentenv/rlhf_env.py b/llm_on_ray/common/agentenv/rlhf_env.py
similarity index 97%
rename from llmonray/common/agentenv/rlhf_env.py
rename to llm_on_ray/common/agentenv/rlhf_env.py
index eccddcd0b..32322735a 100644
--- a/llmonray/common/agentenv/rlhf_env.py
+++ b/llm_on_ray/common/agentenv/rlhf_env.py
@@ -7,8 +7,8 @@
 from ray.rllib.utils.spaces.repeated import Repeated
 import gymnasium.spaces as sp
 
-from llmonray.common.agentenv.agentenv import AgentEnv
-from llmonray.common.load import load_dataset, load_model, load_tokenizer
+from llm_on_ray.common.agentenv.agentenv import AgentEnv
+from llm_on_ray.common.load import load_dataset, load_model, load_tokenizer
 
 
 def generate_response(
diff --git a/llmonray/common/common.py b/llm_on_ray/common/common.py
similarity index 93%
rename from llmonray/common/common.py
rename to llm_on_ray/common/common.py
index e70a730ee..89a62d68d 100644
--- a/llmonray/common/common.py
+++ b/llm_on_ray/common/common.py
@@ -2,7 +2,7 @@
 import glob
 import importlib
 
-from llmonray.common.logging import logger
+from llm_on_ray.common.logging import logger
 
 
 def import_all_module(basedir, prefix=None):
diff --git a/llmonray/common/config.py b/llm_on_ray/common/config.py
similarity index 100%
rename from llmonray/common/config.py
rename to llm_on_ray/common/config.py
diff --git a/llm_on_ray/common/dataprocesser/__init__.py b/llm_on_ray/common/dataprocesser/__init__.py
new file mode 100644
index 000000000..30a49627a
--- /dev/null
+++ b/llm_on_ray/common/dataprocesser/__init__.py
@@ -0,0 +1,9 @@
+import os
+from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
+from llm_on_ray.common.common import import_all_module
+
+realpath = os.path.realpath(__file__)
+basedir = os.path.dirname(realpath)
+import_all_module(basedir, "llm_on_ray.common.dataprocesser")
+
+__all__ = ["DataProcesser"]
diff --git a/llmonray/common/dataprocesser/dataprocesser.py b/llm_on_ray/common/dataprocesser/dataprocesser.py
similarity index 100%
rename from llmonray/common/dataprocesser/dataprocesser.py
rename to llm_on_ray/common/dataprocesser/dataprocesser.py
diff --git a/llmonray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
similarity index 98%
rename from llmonray/common/dataprocesser/general_processer.py
rename to llm_on_ray/common/dataprocesser/general_processer.py
index 639ac6cb6..12d70aed2 100644
--- a/llmonray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -5,7 +5,7 @@
 import datasets
 import transformers
 
-from llmonray.common.dataprocesser.dataprocesser import DataProcesser
+from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
 
 INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
 INSTRUCTION_KEY = "### Instruction:"
diff --git a/llmonray/common/dataprocesser/rm_dataprocesser.py b/llm_on_ray/common/dataprocesser/rm_dataprocesser.py
similarity index 96%
rename from llmonray/common/dataprocesser/rm_dataprocesser.py
rename to llm_on_ray/common/dataprocesser/rm_dataprocesser.py
index 25ad44441..142573a9b 100644
--- a/llmonray/common/dataprocesser/rm_dataprocesser.py
+++ b/llm_on_ray/common/dataprocesser/rm_dataprocesser.py
@@ -1,8 +1,8 @@
 import torch
 import transformers
 
-from llmonray.common.dataprocesser.dataprocesser import DataProcesser
-from llmonray.common.logging import logger
+from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
+from llm_on_ray.common.logging import logger
 
 
 class RMDataProcesser(DataProcesser):
diff --git a/llm_on_ray/common/dataset/__init__.py b/llm_on_ray/common/dataset/__init__.py
new file mode 100644
index 000000000..877caab41
--- /dev/null
+++ b/llm_on_ray/common/dataset/__init__.py
@@ -0,0 +1,9 @@
+import os
+from llm_on_ray.common.dataset.dataset import Dataset
+from llm_on_ray.common.common import import_all_module
+
+realpath = os.path.realpath(__file__)
+basedir = os.path.dirname(realpath)
+import_all_module(basedir, "llm_on_ray.common.dataset")
+
+__all__ = ["Dataset"]
diff --git a/llmonray/common/dataset/dataset.py b/llm_on_ray/common/dataset/dataset.py
similarity index 100%
rename from llmonray/common/dataset/dataset.py
rename to llm_on_ray/common/dataset/dataset.py
diff --git a/llmonray/common/dataset/huggingface_dataset.py b/llm_on_ray/common/dataset/huggingface_dataset.py
similarity index 96%
rename from llmonray/common/dataset/huggingface_dataset.py
rename to llm_on_ray/common/dataset/huggingface_dataset.py
index 92191a012..c6a03e871 100644
--- a/llmonray/common/dataset/huggingface_dataset.py
+++ b/llm_on_ray/common/dataset/huggingface_dataset.py
@@ -1,7 +1,7 @@
 import os
 import datasets
 
-from llmonray.common.dataset.dataset import Dataset
+from llm_on_ray.common.dataset.dataset import Dataset
 
 
 def local_load(name, **load_config):
diff --git a/llmonray/common/init.py b/llm_on_ray/common/init.py
similarity index 95%
rename from llmonray/common/init.py
rename to llm_on_ray/common/init.py
index 63c36272a..6ee077b0c 100644
--- a/llmonray/common/init.py
+++ b/llm_on_ray/common/init.py
@@ -1,7 +1,7 @@
 import torch
 import accelerate
 
-from llmonray.common.logging import logger
+from llm_on_ray.common.logging import logger
 
 
 def check_config(config):
diff --git a/llm_on_ray/common/initializer/__init__.py b/llm_on_ray/common/initializer/__init__.py
new file mode 100644
index 000000000..de6cd1eb0
--- /dev/null
+++ b/llm_on_ray/common/initializer/__init__.py
@@ -0,0 +1,9 @@
+import os
+from llm_on_ray.common.initializer.initializer import Initializer
+from llm_on_ray.common.common import import_all_module
+
+realpath = os.path.realpath(__file__)
+basedir = os.path.dirname(realpath)
+import_all_module(basedir, "llm_on_ray.common.initializer")
+
+__all__ = ["Initializer"]
diff --git a/llmonray/common/initializer/initializer.py b/llm_on_ray/common/initializer/initializer.py
similarity index 100%
rename from llmonray/common/initializer/initializer.py
rename to llm_on_ray/common/initializer/initializer.py
diff --git a/llmonray/common/load.py b/llm_on_ray/common/load.py
similarity index 93%
rename from llmonray/common/load.py
rename to llm_on_ray/common/load.py
index c62381577..58e99b88f 100644
--- a/llmonray/common/load.py
+++ b/llm_on_ray/common/load.py
@@ -1,13 +1,13 @@
 import sys
 from typing import Any, Dict
 
-from llmonray.common.logging import logger
-from llmonray.common import dataset
-from llmonray.common import tokenizer
-from llmonray.common import model
-from llmonray.common import optimizer
-from llmonray.common import trainer
-from llmonray.common import initializer
+from llm_on_ray.common.logging import logger
+from llm_on_ray.common import dataset
+from llm_on_ray.common import tokenizer
+from llm_on_ray.common import model
+from llm_on_ray.common import optimizer
+from llm_on_ray.common import trainer
+from llm_on_ray.common import initializer
 
 
 def load_check_decorator(func):
diff --git a/llmonray/common/logging.py b/llm_on_ray/common/logging.py
similarity index 100%
rename from llmonray/common/logging.py
rename to llm_on_ray/common/logging.py
diff --git a/llm_on_ray/common/model/__init__.py b/llm_on_ray/common/model/__init__.py
new file mode 100644
index 000000000..f3550edab
--- /dev/null
+++ b/llm_on_ray/common/model/__init__.py
@@ -0,0 +1,9 @@
+import os
+from llm_on_ray.common.model.model import Model
+from llm_on_ray.common.common import import_all_module
+
+realpath = os.path.realpath(__file__)
+basedir = os.path.dirname(realpath)
+import_all_module(basedir, "llm_on_ray.common.model")
+
+__all__ = ["Model"]
diff --git a/llmonray/common/model/huggingface_model_for_causal_lm.py b/llm_on_ray/common/model/huggingface_model_for_causal_lm.py
similarity index 95%
rename from llmonray/common/model/huggingface_model_for_causal_lm.py
rename to llm_on_ray/common/model/huggingface_model_for_causal_lm.py
index 3a4804647..806a095ae 100644
--- a/llmonray/common/model/huggingface_model_for_causal_lm.py
+++ b/llm_on_ray/common/model/huggingface_model_for_causal_lm.py
@@ -1,6 +1,6 @@
 import transformers
 
-from llmonray.common.model.model import Model
+from llm_on_ray.common.model.model import Model
 from peft import get_peft_model, LoraConfig
 import deltatuner
 
diff --git a/llmonray/common/model/model.py b/llm_on_ray/common/model/model.py
similarity index 100%
rename from llmonray/common/model/model.py
rename to llm_on_ray/common/model/model.py
diff --git a/llmonray/common/model/reward_model.py b/llm_on_ray/common/model/reward_model.py
similarity index 98%
rename from llmonray/common/model/reward_model.py
rename to llm_on_ray/common/model/reward_model.py
index c3cdea3c9..8fc424c58 100644
--- a/llmonray/common/model/reward_model.py
+++ b/llm_on_ray/common/model/reward_model.py
@@ -4,7 +4,7 @@
 import torch
 import torch.nn as nn
 
-from llmonray.common.model.model import Model
+from llm_on_ray.common.model.model import Model
 
 
 class HuggingFaceRewardModel(Model):
diff --git a/llm_on_ray/common/optimizer/__init__.py b/llm_on_ray/common/optimizer/__init__.py
new file mode 100644
index 000000000..8dff2f390
--- /dev/null
+++ b/llm_on_ray/common/optimizer/__init__.py
@@ -0,0 +1,9 @@
+import os
+from llm_on_ray.common.optimizer.optimizer import Optimizer
+from llm_on_ray.common.common import import_all_module
+
+realpath = os.path.realpath(__file__)
+basedir = os.path.dirname(realpath)
+import_all_module(basedir, "llm_on_ray.common.optimizer")
+
+__all__ = ["Optimizer"]
diff --git a/llmonray/common/optimizer/default_optimizer.py b/llm_on_ray/common/optimizer/default_optimizer.py
similarity index 90%
rename from llmonray/common/optimizer/default_optimizer.py
rename to llm_on_ray/common/optimizer/default_optimizer.py
index 753c78ef3..0477655d6 100644
--- a/llmonray/common/optimizer/default_optimizer.py
+++ b/llm_on_ray/common/optimizer/default_optimizer.py
@@ -1,5 +1,5 @@
 import torch  # noqa: F401
-from llmonray.common.optimizer.optimizer import Optimizer
+from llm_on_ray.common.optimizer.optimizer import Optimizer
 
 
 class DefaultOptimizer(Optimizer):
diff --git a/llmonray/common/optimizer/group_optimizer.py b/llm_on_ray/common/optimizer/group_optimizer.py
similarity index 94%
rename from llmonray/common/optimizer/group_optimizer.py
rename to llm_on_ray/common/optimizer/group_optimizer.py
index 15c4bb279..a04ce6dc7 100644
--- a/llmonray/common/optimizer/group_optimizer.py
+++ b/llm_on_ray/common/optimizer/group_optimizer.py
@@ -1,5 +1,5 @@
 import torch  # noqa: F401
-from llmonray.common.optimizer.optimizer import Optimizer
+from llm_on_ray.common.optimizer.optimizer import Optimizer
 
 
 class GroupOptimizer(Optimizer):
diff --git a/llmonray/common/optimizer/optimizer.py b/llm_on_ray/common/optimizer/optimizer.py
similarity index 100%
rename from llmonray/common/optimizer/optimizer.py
rename to llm_on_ray/common/optimizer/optimizer.py
diff --git a/llm_on_ray/common/tokenizer/__init__.py b/llm_on_ray/common/tokenizer/__init__.py
new file mode 100644
index 000000000..97602fd85
--- /dev/null
+++ b/llm_on_ray/common/tokenizer/__init__.py
@@ -0,0 +1,9 @@
+import os
+from llm_on_ray.common.tokenizer.tokenizer import Tokenizer
+from llm_on_ray.common.common import import_all_module
+
+realpath = os.path.realpath(__file__)
+basedir = os.path.dirname(realpath)
+import_all_module(basedir, "llm_on_ray.common.tokenizer")
+
+__all__ = ["Tokenizer"]
diff --git a/llmonray/common/tokenizer/empty_tokenizer.py b/llm_on_ray/common/tokenizer/empty_tokenizer.py
similarity index 84%
rename from llmonray/common/tokenizer/empty_tokenizer.py
rename to llm_on_ray/common/tokenizer/empty_tokenizer.py
index c23b5c947..8155d6450 100644
--- a/llmonray/common/tokenizer/empty_tokenizer.py
+++ b/llm_on_ray/common/tokenizer/empty_tokenizer.py
@@ -1,4 +1,4 @@
-from llmonray.common.tokenizer.tokenizer import Tokenizer
+from llm_on_ray.common.tokenizer.tokenizer import Tokenizer
 
 
 class _EmptyTokenizer:
diff --git a/llmonray/common/tokenizer/huggingface_tokenizer.py b/llm_on_ray/common/tokenizer/huggingface_tokenizer.py
similarity index 82%
rename from llmonray/common/tokenizer/huggingface_tokenizer.py
rename to llm_on_ray/common/tokenizer/huggingface_tokenizer.py
index a3fd313bc..5a8b30067 100644
--- a/llmonray/common/tokenizer/huggingface_tokenizer.py
+++ b/llm_on_ray/common/tokenizer/huggingface_tokenizer.py
@@ -1,6 +1,6 @@
 import transformers
 
-from llmonray.common.tokenizer.tokenizer import Tokenizer
+from llm_on_ray.common.tokenizer.tokenizer import Tokenizer
 
 
 class HuggingFaceTokenizer(Tokenizer):
diff --git a/llmonray/common/tokenizer/tokenizer.py b/llm_on_ray/common/tokenizer/tokenizer.py
similarity index 100%
rename from llmonray/common/tokenizer/tokenizer.py
rename to llm_on_ray/common/tokenizer/tokenizer.py
diff --git a/llmonray/common/torch_config.py b/llm_on_ray/common/torch_config.py
similarity index 100%
rename from llmonray/common/torch_config.py
rename to llm_on_ray/common/torch_config.py
diff --git a/llm_on_ray/common/trainer/__init__.py b/llm_on_ray/common/trainer/__init__.py
new file mode 100644
index 000000000..711c700b2
--- /dev/null
+++ b/llm_on_ray/common/trainer/__init__.py
@@ -0,0 +1,9 @@
+import os
+from llm_on_ray.common.trainer.trainer import Trainer
+from llm_on_ray.common.common import import_all_module
+
+realpath = os.path.realpath(__file__)
+basedir = os.path.dirname(realpath)
+import_all_module(basedir, "llm_on_ray.common.trainer")
+
+__all__ = ["Trainer"]
diff --git a/llmonray/common/trainer/default_trainer.py b/llm_on_ray/common/trainer/default_trainer.py
similarity index 98%
rename from llmonray/common/trainer/default_trainer.py
rename to llm_on_ray/common/trainer/default_trainer.py
index 2dec917dd..e88613cc2 100644
--- a/llmonray/common/trainer/default_trainer.py
+++ b/llm_on_ray/common/trainer/default_trainer.py
@@ -9,9 +9,9 @@
 
 from ray.train import report, Checkpoint
 
-from llmonray.common import dataprocesser
-from llmonray.common.trainer.trainer import Trainer
-from llmonray.common.logging import logger
+from llm_on_ray.common import dataprocesser
+from llm_on_ray.common.trainer.trainer import Trainer
+from llm_on_ray.common.logging import logger
 
 
 class DefaultTrainer(Trainer):
diff --git a/llmonray/common/trainer/rm_trainer.py b/llm_on_ray/common/trainer/rm_trainer.py
similarity index 97%
rename from llmonray/common/trainer/rm_trainer.py
rename to llm_on_ray/common/trainer/rm_trainer.py
index c9643217d..83bf0a673 100644
--- a/llmonray/common/trainer/rm_trainer.py
+++ b/llm_on_ray/common/trainer/rm_trainer.py
@@ -4,8 +4,8 @@
 import math
 import time
 
-from llmonray.common.trainer.default_trainer import DefaultTrainer
-from llmonray.common.logging import logger
+from llm_on_ray.common.trainer.default_trainer import DefaultTrainer
+from llm_on_ray.common.logging import logger
 
 
 class RMTrainer(DefaultTrainer):
diff --git a/llmonray/common/trainer/trainer.py b/llm_on_ray/common/trainer/trainer.py
similarity index 100%
rename from llmonray/common/trainer/trainer.py
rename to llm_on_ray/common/trainer/trainer.py
diff --git a/llmonray/finetune/__init__.py b/llm_on_ray/finetune/__init__.py
similarity index 100%
rename from llmonray/finetune/__init__.py
rename to llm_on_ray/finetune/__init__.py
diff --git a/llmonray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
similarity index 99%
rename from llmonray/finetune/finetune.py
rename to llm_on_ray/finetune/finetune.py
index 039ccda9b..802858316 100644
--- a/llmonray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -21,8 +21,8 @@
     FullStateDictConfig,
 )
 
-import llmonray.common as common
-from llmonray.finetune.finetune_config import FinetuneConfig
+import llm_on_ray.common as common
+from llm_on_ray.finetune.finetune_config import FinetuneConfig
 
 
 def get_accelerate_environment_variable(mode: str, config: Union[Dict[str, Any], None]) -> dict:
diff --git a/llmonray/finetune/finetune.yaml b/llm_on_ray/finetune/finetune.yaml
similarity index 100%
rename from llmonray/finetune/finetune.yaml
rename to llm_on_ray/finetune/finetune.yaml
diff --git a/llmonray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
similarity index 100%
rename from llmonray/finetune/finetune_config.py
rename to llm_on_ray/finetune/finetune_config.py
diff --git a/llmonray/finetune/models/bloom-560m.yaml b/llm_on_ray/finetune/models/bloom-560m.yaml
similarity index 100%
rename from llmonray/finetune/models/bloom-560m.yaml
rename to llm_on_ray/finetune/models/bloom-560m.yaml
diff --git a/llmonray/finetune/models/finetune_config_template.yaml b/llm_on_ray/finetune/models/finetune_config_template.yaml
similarity index 100%
rename from llmonray/finetune/models/finetune_config_template.yaml
rename to llm_on_ray/finetune/models/finetune_config_template.yaml
diff --git a/llmonray/finetune/models/gpt-j-6b.yaml b/llm_on_ray/finetune/models/gpt-j-6b.yaml
similarity index 100%
rename from llmonray/finetune/models/gpt-j-6b.yaml
rename to llm_on_ray/finetune/models/gpt-j-6b.yaml
diff --git a/llmonray/finetune/models/gpt2.yaml b/llm_on_ray/finetune/models/gpt2.yaml
similarity index 100%
rename from llmonray/finetune/models/gpt2.yaml
rename to llm_on_ray/finetune/models/gpt2.yaml
diff --git a/llmonray/finetune/models/llama-2-7b-chat-hf.yaml b/llm_on_ray/finetune/models/llama-2-7b-chat-hf.yaml
similarity index 100%
rename from llmonray/finetune/models/llama-2-7b-chat-hf.yaml
rename to llm_on_ray/finetune/models/llama-2-7b-chat-hf.yaml
diff --git a/llmonray/finetune/models/llama-7b.yaml b/llm_on_ray/finetune/models/llama-7b.yaml
similarity index 100%
rename from llmonray/finetune/models/llama-7b.yaml
rename to llm_on_ray/finetune/models/llama-7b.yaml
diff --git a/llmonray/finetune/models/mistral-7b-v0.1.yaml b/llm_on_ray/finetune/models/mistral-7b-v0.1.yaml
similarity index 100%
rename from llmonray/finetune/models/mistral-7b-v0.1.yaml
rename to llm_on_ray/finetune/models/mistral-7b-v0.1.yaml
diff --git a/llmonray/finetune/models/mpt-7b-chat.yaml b/llm_on_ray/finetune/models/mpt-7b-chat.yaml
similarity index 100%
rename from llmonray/finetune/models/mpt-7b-chat.yaml
rename to llm_on_ray/finetune/models/mpt-7b-chat.yaml
diff --git a/llmonray/finetune/models/opt-125m.yaml b/llm_on_ray/finetune/models/opt-125m.yaml
similarity index 100%
rename from llmonray/finetune/models/opt-125m.yaml
rename to llm_on_ray/finetune/models/opt-125m.yaml
diff --git a/llmonray/inference/__init__.py b/llm_on_ray/inference/__init__.py
similarity index 100%
rename from llmonray/inference/__init__.py
rename to llm_on_ray/inference/__init__.py
diff --git a/llmonray/inference/api_openai_backend/openai_protocol.py b/llm_on_ray/inference/api_openai_backend/openai_protocol.py
similarity index 100%
rename from llmonray/inference/api_openai_backend/openai_protocol.py
rename to llm_on_ray/inference/api_openai_backend/openai_protocol.py
diff --git a/llmonray/inference/api_openai_backend/query_client.py b/llm_on_ray/inference/api_openai_backend/query_client.py
similarity index 95%
rename from llmonray/inference/api_openai_backend/query_client.py
rename to llm_on_ray/inference/api_openai_backend/query_client.py
index a01520209..9e8c6656e 100644
--- a/llmonray/inference/api_openai_backend/query_client.py
+++ b/llm_on_ray/inference/api_openai_backend/query_client.py
@@ -34,8 +34,8 @@
 
 from typing import Dict
 from fastapi import HTTPException
-from llmonray.inference.api_openai_backend.openai_protocol import ModelCard, Prompt
-from llmonray.inference.api_openai_backend.request_handler import handle_request
+from llm_on_ray.inference.api_openai_backend.openai_protocol import ModelCard, Prompt
+from llm_on_ray.inference.api_openai_backend.request_handler import handle_request
 
 
 class RouterQueryClient:
diff --git a/llmonray/inference/api_openai_backend/request_handler.py b/llm_on_ray/inference/api_openai_backend/request_handler.py
similarity index 97%
rename from llmonray/inference/api_openai_backend/request_handler.py
rename to llm_on_ray/inference/api_openai_backend/request_handler.py
index 64d1cc966..202f92538 100644
--- a/llmonray/inference/api_openai_backend/request_handler.py
+++ b/llm_on_ray/inference/api_openai_backend/request_handler.py
@@ -38,8 +38,8 @@
 from fastapi import status, HTTPException, Request
 from starlette.responses import JSONResponse
 from pydantic import ValidationError as PydanticValidationError
-from llmonray.inference.logger import get_logger
-from llmonray.inference.api_openai_backend.openai_protocol import (
+from llm_on_ray.inference.logger import get_logger
+from llm_on_ray.inference.api_openai_backend.openai_protocol import (
     Prompt,
     ModelResponse,
     ErrorResponse,
diff --git a/llmonray/inference/api_openai_backend/router_app.py b/llm_on_ray/inference/api_openai_backend/router_app.py
similarity index 98%
rename from llmonray/inference/api_openai_backend/router_app.py
rename to llm_on_ray/inference/api_openai_backend/router_app.py
index e65831287..6f2be8656 100644
--- a/llmonray/inference/api_openai_backend/router_app.py
+++ b/llm_on_ray/inference/api_openai_backend/router_app.py
@@ -40,13 +40,13 @@
 from fastapi import Response as FastAPIResponse
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import Response, StreamingResponse
-from llmonray.inference.logger import get_logger
-from llmonray.inference.api_openai_backend.request_handler import (
+from llm_on_ray.inference.logger import get_logger
+from llm_on_ray.inference.api_openai_backend.request_handler import (
     OpenAIHTTPException,
     openai_exception_handler,
 )
-from llmonray.inference.api_openai_backend.query_client import RouterQueryClient
-from llmonray.inference.api_openai_backend.openai_protocol import (
+from llm_on_ray.inference.api_openai_backend.query_client import RouterQueryClient
+from llm_on_ray.inference.api_openai_backend.openai_protocol import (
     Prompt,
     ModelResponse,
     CompletionRequest,
diff --git a/llmonray/inference/api_server_openai.py b/llm_on_ray/inference/api_server_openai.py
similarity index 94%
rename from llmonray/inference/api_server_openai.py
rename to llm_on_ray/inference/api_server_openai.py
index a9b123a13..2ba821075 100644
--- a/llmonray/inference/api_server_openai.py
+++ b/llm_on_ray/inference/api_server_openai.py
@@ -34,8 +34,8 @@
 
 import os
 from ray import serve
-from llmonray.inference.api_openai_backend.query_client import RouterQueryClient
-from llmonray.inference.api_openai_backend.router_app import Router, router_app
+from llm_on_ray.inference.api_openai_backend.query_client import RouterQueryClient
+from llm_on_ray.inference.api_openai_backend.router_app import Router, router_app
 
 
 def router_application(deployments):
diff --git a/llmonray/inference/api_server_simple.py b/llm_on_ray/inference/api_server_simple.py
similarity index 100%
rename from llmonray/inference/api_server_simple.py
rename to llm_on_ray/inference/api_server_simple.py
diff --git a/llmonray/inference/chat_process.py b/llm_on_ray/inference/chat_process.py
similarity index 100%
rename from llmonray/inference/chat_process.py
rename to llm_on_ray/inference/chat_process.py
diff --git a/llmonray/inference/deepspeed_predictor.py b/llm_on_ray/inference/deepspeed_predictor.py
similarity index 98%
rename from llmonray/inference/deepspeed_predictor.py
rename to llm_on_ray/inference/deepspeed_predictor.py
index ef8395ae7..dbdbca06f 100644
--- a/llmonray/inference/deepspeed_predictor.py
+++ b/llm_on_ray/inference/deepspeed_predictor.py
@@ -12,9 +12,9 @@
 from ray.air import ScalingConfig
 from typing import List
 import os
-from llmonray.inference.predictor import Predictor
-from llmonray.inference.utils import get_torch_dtype
-from llmonray.inference.inference_config import (
+from llm_on_ray.inference.predictor import Predictor
+from llm_on_ray.inference.utils import get_torch_dtype
+from llm_on_ray.inference.inference_config import (
     InferenceConfig,
     GenerateResult,
     DEVICE_CPU,
diff --git a/llmonray/inference/inference_config.py b/llm_on_ray/inference/inference_config.py
similarity index 100%
rename from llmonray/inference/inference_config.py
rename to llm_on_ray/inference/inference_config.py
diff --git a/llmonray/inference/logger.py b/llm_on_ray/inference/logger.py
similarity index 100%
rename from llmonray/inference/logger.py
rename to llm_on_ray/inference/logger.py
diff --git a/llmonray/inference/models/CodeLlama-7b-hf.yaml b/llm_on_ray/inference/models/CodeLlama-7b-hf.yaml
similarity index 100%
rename from llmonray/inference/models/CodeLlama-7b-hf.yaml
rename to llm_on_ray/inference/models/CodeLlama-7b-hf.yaml
diff --git a/llmonray/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml b/llm_on_ray/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml
similarity index 100%
rename from llmonray/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml
rename to llm_on_ray/inference/models/bigdl/mistral-7b-v0.1-bigdl.yaml
diff --git a/llmonray/inference/models/bigdl/mpt-7b-bigdl.yaml b/llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml
similarity index 100%
rename from llmonray/inference/models/bigdl/mpt-7b-bigdl.yaml
rename to llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml
diff --git a/llmonray/inference/models/bloom-560m.yaml b/llm_on_ray/inference/models/bloom-560m.yaml
similarity index 100%
rename from llmonray/inference/models/bloom-560m.yaml
rename to llm_on_ray/inference/models/bloom-560m.yaml
diff --git a/llmonray/inference/models/falcon-7b.yaml b/llm_on_ray/inference/models/falcon-7b.yaml
similarity index 100%
rename from llmonray/inference/models/falcon-7b.yaml
rename to llm_on_ray/inference/models/falcon-7b.yaml
diff --git a/llmonray/inference/models/gpt-j-6b.yaml b/llm_on_ray/inference/models/gpt-j-6b.yaml
similarity index 100%
rename from llmonray/inference/models/gpt-j-6b.yaml
rename to llm_on_ray/inference/models/gpt-j-6b.yaml
diff --git a/llmonray/inference/models/gpt2.yaml b/llm_on_ray/inference/models/gpt2.yaml
similarity index 100%
rename from llmonray/inference/models/gpt2.yaml
rename to llm_on_ray/inference/models/gpt2.yaml
diff --git a/llmonray/inference/models/llama-2-7b-chat-hf.yaml b/llm_on_ray/inference/models/llama-2-7b-chat-hf.yaml
similarity index 100%
rename from llmonray/inference/models/llama-2-7b-chat-hf.yaml
rename to llm_on_ray/inference/models/llama-2-7b-chat-hf.yaml
diff --git a/llmonray/inference/models/mistral-7b-v0.1.yaml b/llm_on_ray/inference/models/mistral-7b-v0.1.yaml
similarity index 100%
rename from llmonray/inference/models/mistral-7b-v0.1.yaml
rename to llm_on_ray/inference/models/mistral-7b-v0.1.yaml
diff --git a/llmonray/inference/models/mpt-7b.yaml b/llm_on_ray/inference/models/mpt-7b.yaml
similarity index 100%
rename from llmonray/inference/models/mpt-7b.yaml
rename to llm_on_ray/inference/models/mpt-7b.yaml
diff --git a/llmonray/inference/models/neural-chat-7b-v3-1.yaml b/llm_on_ray/inference/models/neural-chat-7b-v3-1.yaml
similarity index 100%
rename from llmonray/inference/models/neural-chat-7b-v3-1.yaml
rename to llm_on_ray/inference/models/neural-chat-7b-v3-1.yaml
diff --git a/llmonray/inference/models/opt-125m.yaml b/llm_on_ray/inference/models/opt-125m.yaml
similarity index 100%
rename from llmonray/inference/models/opt-125m.yaml
rename to llm_on_ray/inference/models/opt-125m.yaml
diff --git a/llmonray/inference/models/starcoder.yaml b/llm_on_ray/inference/models/starcoder.yaml
similarity index 100%
rename from llmonray/inference/models/starcoder.yaml
rename to llm_on_ray/inference/models/starcoder.yaml
diff --git a/llmonray/inference/models/template/export_inference_config_to_yaml.py b/llm_on_ray/inference/models/template/export_inference_config_to_yaml.py
similarity index 72%
rename from llmonray/inference/models/template/export_inference_config_to_yaml.py
rename to llm_on_ray/inference/models/template/export_inference_config_to_yaml.py
index 89b14a507..493d3b6f5 100644
--- a/llmonray/inference/models/template/export_inference_config_to_yaml.py
+++ b/llm_on_ray/inference/models/template/export_inference_config_to_yaml.py
@@ -1,6 +1,6 @@
 import yaml
 import os
-from llmonray.inference.inference_config import InferenceConfig
+from llm_on_ray.inference.inference_config import InferenceConfig
 
 ic = InferenceConfig()
 
diff --git a/llmonray/inference/models/template/inference_config_template.yaml b/llm_on_ray/inference/models/template/inference_config_template.yaml
similarity index 100%
rename from llmonray/inference/models/template/inference_config_template.yaml
rename to llm_on_ray/inference/models/template/inference_config_template.yaml
diff --git a/llmonray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml b/llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml
similarity index 100%
rename from llmonray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml
rename to llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml
diff --git a/llmonray/inference/predictor.py b/llm_on_ray/inference/predictor.py
similarity index 96%
rename from llmonray/inference/predictor.py
rename to llm_on_ray/inference/predictor.py
index 1019c2878..d0f5daeee 100644
--- a/llmonray/inference/predictor.py
+++ b/llm_on_ray/inference/predictor.py
@@ -2,8 +2,8 @@
 import torch
 from transformers import AutoTokenizer, StoppingCriteriaList
 from typing import List, AsyncGenerator, Union
-from llmonray.inference.inference_config import InferenceConfig, GenerateResult
-from llmonray.inference.utils import StoppingCriteriaSub
+from llm_on_ray.inference.inference_config import InferenceConfig, GenerateResult
+from llm_on_ray.inference.utils import StoppingCriteriaSub
 
 
 class Predictor:
diff --git a/llmonray/inference/predictor_deployment.py b/llm_on_ray/inference/predictor_deployment.py
similarity index 94%
rename from llmonray/inference/predictor_deployment.py
rename to llm_on_ray/inference/predictor_deployment.py
index 46d1fdfa9..c4e377577 100644
--- a/llmonray/inference/predictor_deployment.py
+++ b/llm_on_ray/inference/predictor_deployment.py
@@ -26,9 +26,9 @@
 from starlette.requests import Request
 from starlette.responses import StreamingResponse, JSONResponse
 from fastapi import HTTPException
-from llmonray.inference.inference_config import InferenceConfig
-from llmonray.inference.api_openai_backend.openai_protocol import ModelResponse
-from llmonray.inference.utils import get_prompt_format, PromptFormat
+from llm_on_ray.inference.inference_config import InferenceConfig
+from llm_on_ray.inference.api_openai_backend.openai_protocol import ModelResponse
+from llm_on_ray.inference.utils import get_prompt_format, PromptFormat
 
 
 @serve.deployment
@@ -58,16 +58,16 @@ def __init__(self, infer_conf: InferenceConfig):
         self.use_vllm = infer_conf.vllm.enabled
 
         if self.use_deepspeed:
-            from llmonray.inference.deepspeed_predictor import DeepSpeedPredictor
+            from llm_on_ray.inference.deepspeed_predictor import DeepSpeedPredictor
 
             self.predictor = DeepSpeedPredictor(infer_conf)
             self.streamer = self.predictor.get_streamer()
         elif self.use_vllm:
-            from llmonray.inference.vllm_predictor import VllmPredictor
+            from llm_on_ray.inference.vllm_predictor import VllmPredictor
 
             self.predictor = VllmPredictor(infer_conf)
         else:
-            from llmonray.inference.transformer_predictor import TransformerPredictor
+            from llm_on_ray.inference.transformer_predictor import TransformerPredictor
 
             self.predictor = TransformerPredictor(infer_conf)
         self.loop = asyncio.get_running_loop()
diff --git a/llmonray/inference/serve.py b/llm_on_ray/inference/serve.py
similarity index 94%
rename from llmonray/inference/serve.py
rename to llm_on_ray/inference/serve.py
index f7ee0b124..90cf40e2c 100644
--- a/llmonray/inference/serve.py
+++ b/llm_on_ray/inference/serve.py
@@ -17,11 +17,11 @@
 import ray
 import sys
 from pydantic_yaml import parse_yaml_raw_as
-from llmonray.inference.utils import get_deployment_actor_options
-from llmonray.inference.api_server_simple import serve_run
-from llmonray.inference.api_server_openai import openai_serve_run
-from llmonray.inference.predictor_deployment import PredictorDeployment
-from llmonray.inference.inference_config import ModelDescription, InferenceConfig, all_models
+from llm_on_ray.inference.utils import get_deployment_actor_options
+from llm_on_ray.inference.api_server_simple import serve_run
+from llm_on_ray.inference.api_server_openai import openai_serve_run
+from llm_on_ray.inference.predictor_deployment import PredictorDeployment
+from llm_on_ray.inference.inference_config import ModelDescription, InferenceConfig, all_models
 
 
 def get_deployed_models(args):
diff --git a/llmonray/inference/transformer_predictor.py b/llm_on_ray/inference/transformer_predictor.py
similarity index 96%
rename from llmonray/inference/transformer_predictor.py
rename to llm_on_ray/inference/transformer_predictor.py
index 983df8c72..8c1b74f08 100644
--- a/llmonray/inference/transformer_predictor.py
+++ b/llm_on_ray/inference/transformer_predictor.py
@@ -1,8 +1,8 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoConfig, TextIteratorStreamer
-from llmonray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
-from llmonray.inference.utils import get_torch_dtype
-from llmonray.inference.predictor import Predictor
+from llm_on_ray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
+from llm_on_ray.inference.utils import get_torch_dtype
+from llm_on_ray.inference.predictor import Predictor
 
 
 class TransformerPredictor(Predictor):
diff --git a/llmonray/inference/utils.py b/llm_on_ray/inference/utils.py
similarity index 96%
rename from llmonray/inference/utils.py
rename to llm_on_ray/inference/utils.py
index ea98dd3a9..d07012e68 100644
--- a/llmonray/inference/utils.py
+++ b/llm_on_ray/inference/utils.py
@@ -18,8 +18,8 @@
 import torch
 from typing import Dict, Any, List, Union
 from enum import Enum
-from llmonray.inference.inference_config import InferenceConfig, DEVICE_CPU
-from llmonray.inference.api_openai_backend.openai_protocol import ChatMessage
+from llm_on_ray.inference.inference_config import InferenceConfig, DEVICE_CPU
+from llm_on_ray.inference.api_openai_backend.openai_protocol import ChatMessage
 
 
 def get_deployment_actor_options(infer_conf: InferenceConfig):
diff --git a/llmonray/inference/vllm_predictor.py b/llm_on_ray/inference/vllm_predictor.py
similarity index 96%
rename from llmonray/inference/vllm_predictor.py
rename to llm_on_ray/inference/vllm_predictor.py
index 26e87a2e3..d4ab10c44 100644
--- a/llmonray/inference/vllm_predictor.py
+++ b/llm_on_ray/inference/vllm_predictor.py
@@ -4,8 +4,8 @@
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.sampling_params import SamplingParams
 from vllm.utils import random_uuid
-from llmonray.inference.predictor import Predictor
-from llmonray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
+from llm_on_ray.inference.predictor import Predictor
+from llm_on_ray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
 
 
 class VllmPredictor(Predictor):
diff --git a/llmonray/pretrain/__init__.py b/llm_on_ray/pretrain/__init__.py
similarity index 100%
rename from llmonray/pretrain/__init__.py
rename to llm_on_ray/pretrain/__init__.py
diff --git a/llmonray/pretrain/backend/deepspeed_backend.py b/llm_on_ray/pretrain/backend/deepspeed_backend.py
similarity index 100%
rename from llmonray/pretrain/backend/deepspeed_backend.py
rename to llm_on_ray/pretrain/backend/deepspeed_backend.py
diff --git a/llmonray/pretrain/backend/habana_backend.py b/llm_on_ray/pretrain/backend/habana_backend.py
similarity index 100%
rename from llmonray/pretrain/backend/habana_backend.py
rename to llm_on_ray/pretrain/backend/habana_backend.py
diff --git a/llmonray/pretrain/config/bloom1b7_8gpus_pretrain.conf b/llm_on_ray/pretrain/config/bloom1b7_8gpus_pretrain.conf
similarity index 100%
rename from llmonray/pretrain/config/bloom1b7_8gpus_pretrain.conf
rename to llm_on_ray/pretrain/config/bloom1b7_8gpus_pretrain.conf
diff --git a/llmonray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf b/llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
similarity index 100%
rename from llmonray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
rename to llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
diff --git a/llmonray/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf b/llm_on_ray/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf
similarity index 100%
rename from llmonray/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf
rename to llm_on_ray/pretrain/config/llama2_7b_megatron_deepspeed_zs0_8gpus_pretrain.conf
diff --git a/llmonray/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf b/llm_on_ray/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf
similarity index 100%
rename from llmonray/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf
rename to llm_on_ray/pretrain/config/llama2_7b_megatron_deepspeed_zs3_8gpus_pretrain.conf
diff --git a/llmonray/pretrain/config/llama_7b_8Guadi_pretrain.conf b/llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf
similarity index 100%
rename from llmonray/pretrain/config/llama_7b_8Guadi_pretrain.conf
rename to llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf
diff --git a/llmonray/pretrain/config/llama_7b_8gpu_pretrain.conf b/llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf
similarity index 100%
rename from llmonray/pretrain/config/llama_7b_8gpu_pretrain.conf
rename to llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf
diff --git a/llmonray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf b/llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
similarity index 100%
rename from llmonray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
rename to llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
diff --git a/llmonray/pretrain/docker/Dockerfile.megatron.habana b/llm_on_ray/pretrain/docker/Dockerfile.megatron.habana
similarity index 100%
rename from llmonray/pretrain/docker/Dockerfile.megatron.habana
rename to llm_on_ray/pretrain/docker/Dockerfile.megatron.habana
diff --git a/llmonray/pretrain/docker/Dockerfile.nvidia b/llm_on_ray/pretrain/docker/Dockerfile.nvidia
similarity index 100%
rename from llmonray/pretrain/docker/Dockerfile.nvidia
rename to llm_on_ray/pretrain/docker/Dockerfile.nvidia
diff --git a/llmonray/pretrain/docker/Dockerfile.optimum.habana b/llm_on_ray/pretrain/docker/Dockerfile.optimum.habana
similarity index 100%
rename from llmonray/pretrain/docker/Dockerfile.optimum.habana
rename to llm_on_ray/pretrain/docker/Dockerfile.optimum.habana
diff --git a/llmonray/pretrain/docker/build-image.sh b/llm_on_ray/pretrain/docker/build-image.sh
similarity index 100%
rename from llmonray/pretrain/docker/build-image.sh
rename to llm_on_ray/pretrain/docker/build-image.sh
diff --git a/llmonray/pretrain/megatron_deepspeed_pretrain.py b/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
similarity index 98%
rename from llmonray/pretrain/megatron_deepspeed_pretrain.py
rename to llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
index 120c46294..a5fc40d8c 100644
--- a/llmonray/pretrain/megatron_deepspeed_pretrain.py
+++ b/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
@@ -7,7 +7,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-import llmonray.common as common
+import llm_on_ray.common as common
 
 import importlib
 
diff --git a/llmonray/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch b/llm_on_ray/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch
similarity index 100%
rename from llmonray/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch
rename to llm_on_ray/pretrain/patch/gpu/0001-Add-init.py-to-include-the-megatron.model.vision-int.patch
diff --git a/llmonray/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch b/llm_on_ray/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch
similarity index 100%
rename from llmonray/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch
rename to llm_on_ray/pretrain/patch/gpu/0001-Change-the-sample-s-column-name.patch
diff --git a/llmonray/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch b/llm_on_ray/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch
similarity index 100%
rename from llmonray/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch
rename to llm_on_ray/pretrain/patch/gpu/0001-hot-fix-for-megatron-deepspeed-for-gpu-version.patch
diff --git a/llmonray/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch b/llm_on_ray/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch
similarity index 100%
rename from llmonray/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch
rename to llm_on_ray/pretrain/patch/hpu/0001-Init-megatron-deepspeed-with-Ray-cluster.patch
diff --git a/llmonray/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch b/llm_on_ray/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch
similarity index 100%
rename from llmonray/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch
rename to llm_on_ray/pretrain/patch/hpu/0002-Add-the-Huggingface-tokenizer.patch
diff --git a/llmonray/pretrain/plugin/group_dataset.py b/llm_on_ray/pretrain/plugin/group_dataset.py
similarity index 93%
rename from llmonray/pretrain/plugin/group_dataset.py
rename to llm_on_ray/pretrain/plugin/group_dataset.py
index ac1281984..1d1f9a2d5 100644
--- a/llmonray/pretrain/plugin/group_dataset.py
+++ b/llm_on_ray/pretrain/plugin/group_dataset.py
@@ -1,7 +1,7 @@
 import os
 import datasets
 
-from llmonray.common.dataset import Dataset
+from llm_on_ray.common.dataset import Dataset
 
 
 class GroupDataset(Dataset):
diff --git a/llmonray/pretrain/plugin/hf_pretrainer.py b/llm_on_ray/pretrain/plugin/hf_pretrainer.py
similarity index 98%
rename from llmonray/pretrain/plugin/hf_pretrainer.py
rename to llm_on_ray/pretrain/plugin/hf_pretrainer.py
index 0a255770f..e65bf6188 100755
--- a/llmonray/pretrain/plugin/hf_pretrainer.py
+++ b/llm_on_ray/pretrain/plugin/hf_pretrainer.py
@@ -3,9 +3,9 @@
 import logging
 import sys
 from torch.utils.data import DataLoader, Dataset
-import llmonray.common as common
-from llmonray.common import dataprocesser
-from llmonray.common.logging import logger
+import llm_on_ray.common as common
+from llm_on_ray.common import dataprocesser
+from llm_on_ray.common.logging import logger
 import evaluate
 from typing import Optional
 from transformers import (
@@ -16,7 +16,7 @@
 from transformers.trainer_utils import get_last_checkpoint
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers import Trainer, TrainingArguments
-from llmonray.common.trainer import Trainer as RayTrainer
+from llm_on_ray.common.trainer import Trainer as RayTrainer
 
 use_habana = True
 import importlib
diff --git a/llmonray/pretrain/plugin/huggingface_model_from_config.py b/llm_on_ray/pretrain/plugin/huggingface_model_from_config.py
similarity index 99%
rename from llmonray/pretrain/plugin/huggingface_model_from_config.py
rename to llm_on_ray/pretrain/plugin/huggingface_model_from_config.py
index 51a332701..6fd1acef7 100644
--- a/llmonray/pretrain/plugin/huggingface_model_from_config.py
+++ b/llm_on_ray/pretrain/plugin/huggingface_model_from_config.py
@@ -1,7 +1,7 @@
 import torch
 import math
 import transformers
-from llmonray.common.model.model import Model
+from llm_on_ray.common.model.model import Model
 
 
 # for huggingface model weight random initialization
diff --git a/llmonray/pretrain/plugin/megatron_dataset.py b/llm_on_ray/pretrain/plugin/megatron_dataset.py
similarity index 96%
rename from llmonray/pretrain/plugin/megatron_dataset.py
rename to llm_on_ray/pretrain/plugin/megatron_dataset.py
index ac763203a..0d74906de 100644
--- a/llmonray/pretrain/plugin/megatron_dataset.py
+++ b/llm_on_ray/pretrain/plugin/megatron_dataset.py
@@ -2,7 +2,7 @@
 from megatron.training import build_train_valid_test_datasets, update_train_iters
 from megatron.data import gpt_dataset
 
-from llmonray.common.dataset import Dataset
+from llm_on_ray.common.dataset import Dataset
 
 
 class MegatronDataset(Dataset):
diff --git a/llmonray/pretrain/plugin/megatron_pretrainer.py b/llm_on_ray/pretrain/plugin/megatron_pretrainer.py
similarity index 98%
rename from llmonray/pretrain/plugin/megatron_pretrainer.py
rename to llm_on_ray/pretrain/plugin/megatron_pretrainer.py
index 6199c20ac..30e6cb815 100644
--- a/llmonray/pretrain/plugin/megatron_pretrainer.py
+++ b/llm_on_ray/pretrain/plugin/megatron_pretrainer.py
@@ -11,9 +11,9 @@
 from ray.train import Checkpoint
 from ray.train.torch import TorchCheckpoint
 
-from llmonray.common import dataprocesser
-from llmonray.pretrain.plugin.pretrainer import PreTrainer
-from llmonray.common.logging import logger
+from llm_on_ray.common import dataprocesser
+from llm_on_ray.pretrain.plugin.pretrainer import PreTrainer
+from llm_on_ray.common.logging import logger
 
 
 class MegatronPreTrainer(PreTrainer):
diff --git a/llmonray/pretrain/plugin/megatron_processer.py b/llm_on_ray/pretrain/plugin/megatron_processer.py
similarity index 96%
rename from llmonray/pretrain/plugin/megatron_processer.py
rename to llm_on_ray/pretrain/plugin/megatron_processer.py
index b507191f9..455138399 100644
--- a/llmonray/pretrain/plugin/megatron_processer.py
+++ b/llm_on_ray/pretrain/plugin/megatron_processer.py
@@ -2,7 +2,7 @@
 from megatron.core import mpu
 from megatron.data.data_samplers import build_pretraining_data_loader
 
-from llmonray.common.dataprocesser import DataProcesser
+from llm_on_ray.common.dataprocesser import DataProcesser
 
 
 class MegatronProcesser(DataProcesser):
diff --git a/llmonray/pretrain/plugin/megtron_initializer.py b/llm_on_ray/pretrain/plugin/megtron_initializer.py
similarity index 85%
rename from llmonray/pretrain/plugin/megtron_initializer.py
rename to llm_on_ray/pretrain/plugin/megtron_initializer.py
index b089b7eea..9aad0d402 100644
--- a/llmonray/pretrain/plugin/megtron_initializer.py
+++ b/llm_on_ray/pretrain/plugin/megtron_initializer.py
@@ -1,6 +1,6 @@
 from megatron.initialize import initialize_megatron
-from llmonray.common.initializer import Initializer
-from llmonray.common.logging import logger
+from llm_on_ray.common.initializer import Initializer
+from llm_on_ray.common.logging import logger
 
 
 class MegatronInitializer(Initializer):
diff --git a/llmonray/pretrain/plugin/plain_id_processer.py b/llm_on_ray/pretrain/plugin/plain_id_processer.py
similarity index 94%
rename from llmonray/pretrain/plugin/plain_id_processer.py
rename to llm_on_ray/pretrain/plugin/plain_id_processer.py
index 175ca7606..50faa5e15 100644
--- a/llmonray/pretrain/plugin/plain_id_processer.py
+++ b/llm_on_ray/pretrain/plugin/plain_id_processer.py
@@ -1,7 +1,7 @@
 import torch
 import transformers
 
-from llmonray.common.dataprocesser import DataProcesser
+from llm_on_ray.common.dataprocesser import DataProcesser
 
 
 class PlainIDProcesser(DataProcesser):
diff --git a/llmonray/pretrain/plugin/pretrainer.py b/llm_on_ray/pretrain/plugin/pretrainer.py
similarity index 99%
rename from llmonray/pretrain/plugin/pretrainer.py
rename to llm_on_ray/pretrain/plugin/pretrainer.py
index 76362c7f4..1e48232c7 100755
--- a/llmonray/pretrain/plugin/pretrainer.py
+++ b/llm_on_ray/pretrain/plugin/pretrainer.py
@@ -12,9 +12,9 @@
 from ray.train.torch import TorchCheckpoint
 from pathlib import Path
 
-from llmonray.common import dataprocesser
-from llmonray.common.trainer import Trainer
-from llmonray.common.logging import logger
+from llm_on_ray.common import dataprocesser
+from llm_on_ray.common.trainer import Trainer
+from llm_on_ray.common.logging import logger
 
 
 class PreTrainer(Trainer):
diff --git a/llmonray/pretrain/pretrain.py b/llm_on_ray/pretrain/pretrain.py
similarity index 98%
rename from llmonray/pretrain/pretrain.py
rename to llm_on_ray/pretrain/pretrain.py
index 18b66ddb0..19dd456bf 100644
--- a/llmonray/pretrain/pretrain.py
+++ b/llm_on_ray/pretrain/pretrain.py
@@ -10,7 +10,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-import llmonray.common as common
+import llm_on_ray.common as common
 
 from importlib import util
 
@@ -29,7 +29,7 @@ def train_func(config: Dict[str, Any]):
     cwd = config.get("cwd")
     if cwd:
         os.chdir(cwd)
-    from llmonray.common.common import import_all_module
+    from llm_on_ray.common.common import import_all_module
 
     import_all_module(f"{os.path.dirname(os.path.realpath(__file__))}/plugin", "plugin")
     common.init(config)  # type: ignore
diff --git a/llmonray/pretrain/pretrain_template.conf b/llm_on_ray/pretrain/pretrain_template.conf
similarity index 100%
rename from llmonray/pretrain/pretrain_template.conf
rename to llm_on_ray/pretrain/pretrain_template.conf
diff --git a/llmonray/pretrain/pretrain_template_megatron_dataset.conf b/llm_on_ray/pretrain/pretrain_template_megatron_dataset.conf
similarity index 100%
rename from llmonray/pretrain/pretrain_template_megatron_dataset.conf
rename to llm_on_ray/pretrain/pretrain_template_megatron_dataset.conf
diff --git a/llmonray/pretrain/requirements.optimum-habana.txt b/llm_on_ray/pretrain/requirements.optimum-habana.txt
similarity index 100%
rename from llmonray/pretrain/requirements.optimum-habana.txt
rename to llm_on_ray/pretrain/requirements.optimum-habana.txt
diff --git a/llmonray/pretrain/requirements.txt b/llm_on_ray/pretrain/requirements.txt
similarity index 100%
rename from llmonray/pretrain/requirements.txt
rename to llm_on_ray/pretrain/requirements.txt
diff --git a/llmonray/rlhf/__init__.py b/llm_on_ray/rlhf/__init__.py
similarity index 100%
rename from llmonray/rlhf/__init__.py
rename to llm_on_ray/rlhf/__init__.py
diff --git a/llmonray/rlhf/ppo.py b/llm_on_ray/rlhf/ppo.py
similarity index 93%
rename from llmonray/rlhf/ppo.py
rename to llm_on_ray/rlhf/ppo.py
index aca80f432..2f73d3c65 100644
--- a/llmonray/rlhf/ppo.py
+++ b/llm_on_ray/rlhf/ppo.py
@@ -7,11 +7,11 @@
 from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
 from ray.rllib.algorithms.ppo import PPOConfig
 
-from llmonray.rlhf.rl_algo.ppo.ppo_rlhf import PPORLHF
-from llmonray.rlhf.rl_algo.ppo.rlhf_ppo_module import RLHFPPOTorchRLModule
-from llmonray.rlhf.rl_algo.ppo.rlhf_ppo_torch_learner import RLHFPPOTorchLearner
-import llmonray.common as common
-from llmonray.common.agentenv.rlhf_env import RLHFEnv
+from llm_on_ray.rlhf.rl_algo.ppo.ppo_rlhf import PPORLHF
+from llm_on_ray.rlhf.rl_algo.ppo.rlhf_ppo_module import RLHFPPOTorchRLModule
+from llm_on_ray.rlhf.rl_algo.ppo.rlhf_ppo_torch_learner import RLHFPPOTorchLearner
+import llm_on_ray.common as common
+from llm_on_ray.common.agentenv.rlhf_env import RLHFEnv
 
 
 class ValueFunctionInitializerCallback(DefaultCallbacks):
diff --git a/llmonray/rlhf/ppo.yaml b/llm_on_ray/rlhf/ppo.yaml
similarity index 100%
rename from llmonray/rlhf/ppo.yaml
rename to llm_on_ray/rlhf/ppo.yaml
diff --git a/llmonray/rlhf/reward.py b/llm_on_ray/rlhf/reward.py
similarity index 99%
rename from llmonray/rlhf/reward.py
rename to llm_on_ray/rlhf/reward.py
index f27e91aef..e66fe1847 100644
--- a/llmonray/rlhf/reward.py
+++ b/llm_on_ray/rlhf/reward.py
@@ -10,7 +10,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-import llmonray.common as common
+import llm_on_ray.common as common
 
 
 def train_func(config: Dict[str, Any]):
diff --git a/llmonray/rlhf/reward.yaml b/llm_on_ray/rlhf/reward.yaml
similarity index 100%
rename from llmonray/rlhf/reward.yaml
rename to llm_on_ray/rlhf/reward.yaml
diff --git a/llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py b/llm_on_ray/rlhf/rl_algo/ppo/ppo_rlhf.py
similarity index 97%
rename from llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py
rename to llm_on_ray/rlhf/rl_algo/ppo/ppo_rlhf.py
index 1c8c56d2e..10e43cd90 100644
--- a/llmonray/rlhf/rl_algo/ppo/ppo_rlhf.py
+++ b/llm_on_ray/rlhf/rl_algo/ppo/ppo_rlhf.py
@@ -13,8 +13,8 @@
 from ray.rllib.evaluation.metrics import RolloutMetrics
 
 
-from llmonray.common.agentenv.rlhf_env import generate_response
-from llmonray.rlhf.rl_algo.ppo.rlhf_buffer import Buffer, BufferItem
+from llm_on_ray.common.agentenv.rlhf_env import generate_response
+from llm_on_ray.rlhf.rl_algo.ppo.rlhf_buffer import Buffer, BufferItem
 
 
 class RLHFSampler:
diff --git a/llmonray/rlhf/rl_algo/ppo/rlhf_buffer.py b/llm_on_ray/rlhf/rl_algo/ppo/rlhf_buffer.py
similarity index 100%
rename from llmonray/rlhf/rl_algo/ppo/rlhf_buffer.py
rename to llm_on_ray/rlhf/rl_algo/ppo/rlhf_buffer.py
diff --git a/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_module.py b/llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_module.py
similarity index 100%
rename from llmonray/rlhf/rl_algo/ppo/rlhf_ppo_module.py
rename to llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_module.py
diff --git a/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py b/llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
similarity index 98%
rename from llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
rename to llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
index a82fa700d..7c841c1f9 100644
--- a/llmonray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
+++ b/llm_on_ray/rlhf/rl_algo/ppo/rlhf_ppo_torch_learner.py
@@ -11,7 +11,7 @@
 from ray.rllib.models.torch.torch_distributions import TorchCategorical
 
 
-from llmonray.rlhf.rl_algo.ppo.util import masked_mean
+from llm_on_ray.rlhf.rl_algo.ppo.util import masked_mean
 
 torch, nn = try_import_torch()
 
diff --git a/llmonray/rlhf/rl_algo/ppo/util.py b/llm_on_ray/rlhf/rl_algo/ppo/util.py
similarity index 100%
rename from llmonray/rlhf/rl_algo/ppo/util.py
rename to llm_on_ray/rlhf/rl_algo/ppo/util.py
diff --git a/llmonray/ui/html_format.py b/llm_on_ray/ui/html_format.py
similarity index 100%
rename from llmonray/ui/html_format.py
rename to llm_on_ray/ui/html_format.py
diff --git a/llmonray/ui/images/Picture1.png b/llm_on_ray/ui/images/Picture1.png
similarity index 100%
rename from llmonray/ui/images/Picture1.png
rename to llm_on_ray/ui/images/Picture1.png
diff --git a/llmonray/ui/images/Picture2.png b/llm_on_ray/ui/images/Picture2.png
similarity index 100%
rename from llmonray/ui/images/Picture2.png
rename to llm_on_ray/ui/images/Picture2.png
diff --git a/llmonray/ui/images/logo.png b/llm_on_ray/ui/images/logo.png
similarity index 100%
rename from llmonray/ui/images/logo.png
rename to llm_on_ray/ui/images/logo.png
diff --git a/llmonray/ui/start_ui.py b/llm_on_ray/ui/start_ui.py
similarity index 99%
rename from llmonray/ui/start_ui.py
rename to llm_on_ray/ui/start_ui.py
index 9113b42a2..68a969429 100644
--- a/llmonray/ui/start_ui.py
+++ b/llm_on_ray/ui/start_ui.py
@@ -29,11 +29,11 @@
 from ray.train.base_trainer import TrainingFailedError
 from ray.tune.logger import LoggerCallback
 from ray.util import queue
-from llmonray.inference.inference_config import all_models, ModelDescription, Prompt
-from llmonray.inference.inference_config import InferenceConfig as FinetunedConfig
-from llmonray.inference.chat_process import ChatModelGptJ, ChatModelLLama  # noqa: F401
-from llmonray.inference.predictor_deployment import PredictorDeployment
-from llmonray.ui.html_format import cpu_memory_html, ray_status_html, custom_css
+from llm_on_ray.inference.inference_config import all_models, ModelDescription, Prompt
+from llm_on_ray.inference.inference_config import InferenceConfig as FinetunedConfig
+from llm_on_ray.inference.chat_process import ChatModelGptJ, ChatModelLLama  # noqa: F401
+from llm_on_ray.inference.predictor_deployment import PredictorDeployment
+from llm_on_ray.ui.html_format import cpu_memory_html, ray_status_html, custom_css
 from langchain.vectorstores import FAISS
 from langchain.embeddings import HuggingFaceEmbeddings
 from pyrecdp.LLM import TextPipeline
@@ -515,7 +515,7 @@ def finetune(
         if max_train_step != 0:
             finetune_config["Training"]["max_train_steps"] = max_train_step
 
-        from llmonray.finetune.finetune import main
+        from llm_on_ray.finetune.finetune import main
 
         finetune_config["total_epochs"] = queue.Queue(
             actor_options={"resources": {"queue_hardware": 1}}
@@ -1575,7 +1575,7 @@ def _init_ui(self):
         repo_path + os.path.sep + "../examples/data/sample_finetune_data.jsonl"
     )
 
-    from llmonray.finetune.finetune import get_accelerate_environment_variable
+    from llm_on_ray.finetune.finetune import get_accelerate_environment_variable
 
     finetune_config: Dict[str, Any] = {
         "General": {"config": {}},
diff --git a/llmonray/common/agentenv/__init__.py b/llmonray/common/agentenv/__init__.py
deleted file mode 100644
index a5c2089a8..000000000
--- a/llmonray/common/agentenv/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-from llmonray.common.agentenv.agentenv import AgentEnv
-from llmonray.common.common import import_all_module
-
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llmonray.common.agentenv")
-
-__all__ = ["AgentEnv"]
diff --git a/llmonray/common/dataprocesser/__init__.py b/llmonray/common/dataprocesser/__init__.py
deleted file mode 100644
index becc562ff..000000000
--- a/llmonray/common/dataprocesser/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-from llmonray.common.dataprocesser.dataprocesser import DataProcesser
-from llmonray.common.common import import_all_module
-
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llmonray.common.dataprocesser")
-
-__all__ = ["DataProcesser"]
diff --git a/llmonray/common/dataset/__init__.py b/llmonray/common/dataset/__init__.py
deleted file mode 100644
index 8e3161182..000000000
--- a/llmonray/common/dataset/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-from llmonray.common.dataset.dataset import Dataset
-from llmonray.common.common import import_all_module
-
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llmonray.common.dataset")
-
-__all__ = ["Dataset"]
diff --git a/llmonray/common/initializer/__init__.py b/llmonray/common/initializer/__init__.py
deleted file mode 100644
index 90c43bf1c..000000000
--- a/llmonray/common/initializer/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-from llmonray.common.initializer.initializer import Initializer
-from llmonray.common.common import import_all_module
-
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llmonray.common.initializer")
-
-__all__ = ["Initializer"]
diff --git a/llmonray/common/model/__init__.py b/llmonray/common/model/__init__.py
deleted file mode 100644
index 06271185a..000000000
--- a/llmonray/common/model/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-from llmonray.common.model.model import Model
-from llmonray.common.common import import_all_module
-
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llmonray.common.model")
-
-__all__ = ["Model"]
diff --git a/llmonray/common/optimizer/__init__.py b/llmonray/common/optimizer/__init__.py
deleted file mode 100644
index 00424606b..000000000
--- a/llmonray/common/optimizer/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-from llmonray.common.optimizer.optimizer import Optimizer
-from llmonray.common.common import import_all_module
-
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llmonray.common.optimizer")
-
-__all__ = ["Optimizer"]
diff --git a/llmonray/common/tokenizer/__init__.py b/llmonray/common/tokenizer/__init__.py
deleted file mode 100644
index 27c5631f9..000000000
--- a/llmonray/common/tokenizer/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-from llmonray.common.tokenizer.tokenizer import Tokenizer
-from llmonray.common.common import import_all_module
-
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llmonray.common.tokenizer")
-
-__all__ = ["Tokenizer"]
diff --git a/llmonray/common/trainer/__init__.py b/llmonray/common/trainer/__init__.py
deleted file mode 100644
index c1cfb967f..000000000
--- a/llmonray/common/trainer/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import os
-from llmonray.common.trainer.trainer import Trainer
-from llmonray.common.common import import_all_module
-
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llmonray.common.trainer")
-
-__all__ = ["Trainer"]
diff --git a/pyproject.toml b/pyproject.toml
index 36126410f..b25602388 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,7 @@ bigdl-cpu = [
 
 [tool.setuptools]
 # with MANIFEST.in, the configs below work in both baremetal and container
-package-dir = {"llmonray" = "llmonray"}
+package-dir = {"llm_on_ray" = "llm_on_ray"}
 include-package-data = true
 
 [project.urls]

From ac3cb595c3f3deb80902b728eca1d268974423af Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Fri, 23 Feb 2024 11:12:33 +0800
Subject: [PATCH 10/29] simply execution command

---
 .github/workflows/workflow_finetune.yml  |  6 +++---
 .github/workflows/workflow_inference.yml | 16 ++++++++--------
 README.md                                |  6 +++---
 docs/finetune.md                         |  2 +-
 docs/serve.md                            | 12 ++++++------
 docs/vllm.md                             |  2 +-
 pyproject.toml                           |  4 ++++
 7 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index aaf4d7b38..854732bff 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -113,7 +113,7 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml"
+          docker exec "finetune" bash -c "llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml"
 
       - name: Run PEFT-LoRA Test
         run: |
@@ -138,7 +138,7 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml"
+          docker exec "finetune" bash -c "llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml"
 
       - name: Run Deltatuner Test on DENAS-LoRA Model
         run: |
@@ -168,7 +168,7 @@ jobs:
               yaml.dump(result, output, sort_keys=False)
           EOF)
             docker exec "finetune" python -c "$CMD"
-            docker exec "finetune" bash -c "python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml"
+            docker exec "finetune" bash -c "llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml"
           fi
 
       - name: Stop Ray
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index 25b612f5e..042c56957 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -136,11 +136,11 @@ jobs:
           )
           docker exec "${TARGET}" python -c "$CMD"
           if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml --simple"
+            docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml --simple"
           elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml --simple"
+            docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file .github/workflows/config/llama-2-7b-chat-hf-vllm-fp32.yaml --simple"
           else
-            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --simple --models ${{ matrix.model }}"
+            docker exec "${TARGET}" bash -c "llm_on_ray-serve --simple --models ${{ matrix.model }}"
           fi
           echo Non-streaming query:
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
@@ -151,7 +151,7 @@ jobs:
         if: ${{ matrix.dtuner_model }}
         run: |
           TARGET=${{steps.target.outputs.target}}
-          docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file .github/workflows/config/mpt_deltatuner.yaml --simple"
+          docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file .github/workflows/config/mpt_deltatuner.yaml --simple"
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
           docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
 
@@ -162,7 +162,7 @@ jobs:
             echo ${{ matrix.model }} is not supported!
           elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
             docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file llm_on_ray/inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
-            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file \"${{ matrix.model }}\".yaml.deepspeed --simple"
+            docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file \"${{ matrix.model }}\".yaml.deepspeed --simple"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
           fi
@@ -174,7 +174,7 @@ jobs:
           if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           else
-            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple"
+            docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
           fi
@@ -183,9 +183,9 @@ jobs:
         run: |
           TARGET=${{steps.target.outputs.target}}
           if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
-            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml"
+            docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file llm_on_ray/inference/models/bigdl/mpt-7b-bigdl.yaml"
           elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
-            docker exec "${TARGET}" bash -c "python -m llm_on_ray.inference.serve --models ${{ matrix.model }}"
+            docker exec "${TARGET}" bash -c "llm_on_ray-serve --models ${{ matrix.model }}"
             docker exec "${TARGET}" bash -c "python examples/inference/api_server_openai/query_http_requests.py --model_name ${{ matrix.model }}"
           fi
 
diff --git a/README.md b/README.md
index 16e6bdd51..99e9e012c 100644
--- a/README.md
+++ b/README.md
@@ -59,14 +59,14 @@ ray start --head
 Use the following command to finetune a model using an example dataset and default configurations. The finetuned model will be stored in `/tmp/llm-ray/output` by default. To customize the base model, dataset and configurations, please see the [finetuning document](#finetune):
 
 ```bash
-python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml
+llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml
 ```
 
 ### Serving
 Deploy a model on Ray and expose an endpoint for serving. This command uses GPT2 as an example, but more model configuration examples can be found in the [inference/models](inference/models) directory:
 
 ```bash
-python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/gpt2.yaml
+llm_on_ray-serve --config_file llm_on_ray/inference/models/gpt2.yaml
 ```
 
 The default served method is to provide an OpenAI-compatible API server ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)), you can access and test it in many ways:
@@ -92,7 +92,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py
 ```
 Or you can serve specific model to a simple endpoint according to the `port` and `route_prefix` parameters in configuration file,
 ```bash
-python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/gpt2.yaml --simple
+llm_on_ray-serve --config_file llm_on_ray/inference/models/gpt2.yaml --simple
 ```
 After deploying the model endpoint, you can access and test it by using the script below:
 ```bash
diff --git a/docs/finetune.md b/docs/finetune.md
index 44c417d88..ee05cc52e 100755
--- a/docs/finetune.md
+++ b/docs/finetune.md
@@ -65,5 +65,5 @@ The following models have been verified on Intel CPUs or GPUs.
 ## Finetune the model
 To finetune your model, execute the following command. The finetuned model will be saved in /tmp/llm-ray/output by default.
 ``` bash
-python -m llm_on_ray.finetune.finetune --config_file <your finetuning conf file>
+llm_on_ray-finetune --config_file <your finetuning conf file>
 ```
diff --git a/docs/serve.md b/docs/serve.md
index ea68b03dc..a151f067c 100644
--- a/docs/serve.md
+++ b/docs/serve.md
@@ -30,22 +30,22 @@ LLM-on-Ray also supports serving with [Deepspeed](serve_deepspeed.md) for AutoTP
 We support three methods to specify the models to be served, and they have the following priorities.
 1. Use inference configuration file if config_file is set.
 ```
-python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/gpt2.yaml
+llm_on_ray-serve --config_file llm_on_ray/inference/models/gpt2.yaml
 ```
 2. Use relevant configuration parameters if model_id_or_path is set.
 ```
-python -m llm_on_ray.inference.serve --model_id_or_path gpt2 [--tokenizer_id_or_path gpt2 --port 8000 --route_prefix ...]
+llm_on_ray-serve --model_id_or_path gpt2 [--tokenizer_id_or_path gpt2 --port 8000 --route_prefix ...]
 ```
 3. If --config_file and --model_id_or_path are both None, it will serve all pre-defined models in inference/models/*.yaml, or part of them if models is set.
 ```
-python -m llm_on_ray.inference.serve --models gpt2 gpt-j-6b
+llm_on_ray-serve --models gpt2 gpt-j-6b
 ```
 ### OpenAI-compatible API
 To deploy your model, execute the following command with the model's configuration file. This will create an OpenAI-compatible API ([OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat)) for serving.
 ```bash
-python -m llm_on_ray.inference.serve --config_file <path to the conf file>
+llm_on_ray-serve --config_file <path to the conf file>
 ```
-To deploy and serve multiple models concurrently, place all models' configuration files under `llm_on_ray/inference/models` and directly run `python -m llm_on_ray.inference.serve` without passing any conf file.
+To deploy and serve multiple models concurrently, place all models' configuration files under `llm_on_ray/inference/models` and directly run `llm_on_ray-serve` without passing any conf file.
 
 After deploying the model, you can access and test it in many ways:
 ```bash
@@ -71,7 +71,7 @@ python examples/inference/api_server_openai/query_openai_sdk.py
 ### Serving Model to a Simple Endpoint
 This will create a simple endpoint for serving according to the `port` and `route_prefix` parameters in conf file, for example: http://127.0.0.1:8000/gpt2.
 ```bash
-python -m llm_on_ray.inference.serve --config_file <path to the conf file> --simple
+llm_on_ray-serve --config_file <path to the conf file> --simple
 ```
 After deploying the model endpoint, you can access and test it by using the script below:
 ```bash
diff --git a/docs/vllm.md b/docs/vllm.md
index bad6a875c..26273fdc5 100644
--- a/docs/vllm.md
+++ b/docs/vllm.md
@@ -23,7 +23,7 @@ Please follow [Deploying and Serving LLMs on Intel CPU/GPU/Gaudi](serve.md) docu
 To serve model with vLLM, run the following:
 
 ```bash
-$ python -m llm_on_ray.inference.serve --config_file llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal
+$ llm_on_ray-serve --config_file llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal
 ```
 
 In the above example, `vllm` property is set to `true` in the config file for enabling vLLM.
diff --git a/pyproject.toml b/pyproject.toml
index b25602388..3e5de259b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,5 +77,9 @@ include-package-data = true
 Repository = "https://github.com/intel/llm-on-ray.git"
 Issues = "https://github.com/intel/llm-on-ray.git/issues"
 
+[project.scripts]
+llm_on_ray-finetune = "llm_on_ray.finetune.finetune:main"
+llm_on_ray-serve = "llm_on_ray.inference.serve:main"
+
 [tool.black]
 line-length = 100

From 0d36d0e4a2474a9c05b801cba6bad965e65ad26f Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Fri, 23 Feb 2024 15:41:52 +0800
Subject: [PATCH 11/29] test

---
 dev/docker/Dockerfile.bigdl-cpu         | 2 +-
 dev/docker/Dockerfile.cpu_and_deepspeed | 2 +-
 dev/docker/Dockerfile.vllm              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/docker/Dockerfile.bigdl-cpu b/dev/docker/Dockerfile.bigdl-cpu
index 8eb38d4bf..aafcbd4df 100644
--- a/dev/docker/Dockerfile.bigdl-cpu
+++ b/dev/docker/Dockerfile.bigdl-cpu
@@ -27,7 +27,7 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
 COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 
-RUN mkdir ./finetune && mkdir ./inference
+RUN mkdir ./llm-on-ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[bigdl-cpu] -f https://developer.intel.com/ipex-whl-stable-cpu \
     -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/dev/docker/Dockerfile.cpu_and_deepspeed b/dev/docker/Dockerfile.cpu_and_deepspeed
index 7f4847ae0..7066ce87f 100644
--- a/dev/docker/Dockerfile.cpu_and_deepspeed
+++ b/dev/docker/Dockerfile.cpu_and_deepspeed
@@ -27,7 +27,7 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
 COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 
-RUN mkdir ./finetune && mkdir ./inference
+RUN mkdir ./llm-on-ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] -f https://developer.intel.com/ipex-whl-stable-cpu \
     -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/dev/docker/Dockerfile.vllm b/dev/docker/Dockerfile.vllm
index e4eb63d06..093c88181 100644
--- a/dev/docker/Dockerfile.vllm
+++ b/dev/docker/Dockerfile.vllm
@@ -28,7 +28,7 @@ COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 COPY ./dev/scripts/install-vllm-cpu.sh .
 
-RUN mkdir ./finetune && mkdir ./inference
+RUN mkdir ./llm-on-ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu] -f https://developer.intel.com/ipex-whl-stable-cpu \
     -f https://download.pytorch.org/whl/torch_stable.html

From f83ee7dff31a8f5da9d2212d01dca9837398dc5e Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Fri, 23 Feb 2024 16:04:27 +0800
Subject: [PATCH 12/29] test

---
 dev/docker/Dockerfile.bigdl-cpu         | 2 +-
 dev/docker/Dockerfile.cpu_and_deepspeed | 2 +-
 dev/docker/Dockerfile.vllm              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/docker/Dockerfile.bigdl-cpu b/dev/docker/Dockerfile.bigdl-cpu
index aafcbd4df..6b994a19b 100644
--- a/dev/docker/Dockerfile.bigdl-cpu
+++ b/dev/docker/Dockerfile.bigdl-cpu
@@ -27,7 +27,7 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
 COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 
-RUN mkdir ./llm-on-ray
+RUN mkdir ./llm_on_ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[bigdl-cpu] -f https://developer.intel.com/ipex-whl-stable-cpu \
     -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/dev/docker/Dockerfile.cpu_and_deepspeed b/dev/docker/Dockerfile.cpu_and_deepspeed
index 7066ce87f..9123c2357 100644
--- a/dev/docker/Dockerfile.cpu_and_deepspeed
+++ b/dev/docker/Dockerfile.cpu_and_deepspeed
@@ -27,7 +27,7 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
 COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 
-RUN mkdir ./llm-on-ray
+RUN mkdir ./llm_on_ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] -f https://developer.intel.com/ipex-whl-stable-cpu \
     -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/dev/docker/Dockerfile.vllm b/dev/docker/Dockerfile.vllm
index 093c88181..71be461d2 100644
--- a/dev/docker/Dockerfile.vllm
+++ b/dev/docker/Dockerfile.vllm
@@ -28,7 +28,7 @@ COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 COPY ./dev/scripts/install-vllm-cpu.sh .
 
-RUN mkdir ./llm-on-ray
+RUN mkdir ./llm_on_ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu] -f https://developer.intel.com/ipex-whl-stable-cpu \
     -f https://download.pytorch.org/whl/torch_stable.html

From 0219eeb41bba47eb65daea483c640d5fa9fa738b Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Fri, 23 Feb 2024 16:53:16 +0800
Subject: [PATCH 13/29] modify

---
 llm_on_ray/common/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llm_on_ray/common/__init__.py b/llm_on_ray/common/__init__.py
index 918a2cd78..a446b07ab 100644
--- a/llm_on_ray/common/__init__.py
+++ b/llm_on_ray/common/__init__.py
@@ -2,10 +2,10 @@
 from llm_on_ray.common.load import *  # noqa: F403 # unable to detect undefined names
 from llm_on_ray.common import agentenv
 from llm_on_ray.common.torch_config import TorchConfig  # noqa: F401
+from llm_on_ray.common.config import Config  # noqa: F401
+from llm_on_ray.common.init import init  # noqa: F401
 from typing import Dict, Any
 import sys
-from .config import Config  # noqa: F401
-from .init import init  # noqa: F401
 
 
 @load_check_decorator  # type: ignore # noqa: F405 # may be undefined, or defined from star imports

From 77055bd7c8f2b939dfba4d179f5c64f7b5e873ae Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Mon, 26 Feb 2024 10:51:20 +0800
Subject: [PATCH 14/29] fix

---
 llm_on_ray/pretrain/megatron_deepspeed_pretrain.py | 2 +-
 llm_on_ray/pretrain/pretrain.py                    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py b/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
index a5fc40d8c..8aaebb97b 100644
--- a/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
+++ b/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
@@ -13,7 +13,7 @@
 
 loader = importlib.util.find_spec("habana_frameworks")
 if loader is not None:
-    from backend.habana_backend import TorchConfig
+    from llm_on_ray.pretrain.backend.habana_backend import TorchConfig
 else:
     from ray.train.torch import TorchConfig
 
diff --git a/llm_on_ray/pretrain/pretrain.py b/llm_on_ray/pretrain/pretrain.py
index 19dd456bf..db4af0396 100644
--- a/llm_on_ray/pretrain/pretrain.py
+++ b/llm_on_ray/pretrain/pretrain.py
@@ -17,12 +17,12 @@
 use_habana = False
 loader = util.find_spec("habana_frameworks")
 if loader is not None:
-    from backend.habana_backend import TorchConfig
+    from llm_on_ray.pretrain.backend.habana_backend import TorchConfig
 
     use_habana = True
 else:
     from ray.train.torch import TorchConfig
-    from backend.deepspeed_backend import TorchConfig as DeepSpeedTorchConfig
+    from llm_on_ray.pretrain.backend.deepspeed_backend import TorchConfig as DeepSpeedTorchConfig
 
 
 def train_func(config: Dict[str, Any]):

From 91a8429c2c841e015af56423208d96b331f332f0 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Mon, 26 Feb 2024 16:20:44 +0800
Subject: [PATCH 15/29] update & disable vllm tempeorary

---
 .github/workflows/workflow_inference.yml |  2 +-
 docs/pretrain.md                         | 10 +++++-----
 pyproject.toml                           |  2 ++
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index 6c36ccc89..775861b24 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -45,7 +45,7 @@ jobs:
           - { model: "gpt-j-6b"}
           - { model: "mistral-7b-v0.1"}
           - { model: "mpt-7b-bigdl"}
-          - { model: "llama-2-7b-chat-hf-vllm"}
+          # - { model: "llama-2-7b-chat-hf-vllm"}
           - dtuner_model: nathan0/mpt-7b-deltatuner-model
             model: mpt-7b
 
diff --git a/docs/pretrain.md b/docs/pretrain.md
index b765f0aaf..f0cb1c30b 100644
--- a/docs/pretrain.md
+++ b/docs/pretrain.md
@@ -123,27 +123,27 @@ Set up `megatron_deepspeed_path` in the configuration.
 ```bash
 cd /home/user/workspace/llm-on-ray
 #Bloom-7B
-python -m llm_on_ray.pretrain.megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
+llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
 #llama-7B
-python -m llm_on_ray.pretrain.megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
+llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
 ```
 
 ##### Huggingface Trainer
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama-7B
-python -m llm_on_ray.pretrain.pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf
+llm_on_ray-pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf
 ```
 ##### Nvidia GPU:
 ###### Megatron-DeepSpeed
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama2-7B
-python -m llm_on_ray.pretrain.megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf
+llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf
 ```
 ##### Huggingface Trainer
 ```bash
 cd /home/user/workspace/llm-on-ray
 #llama-7B
-python -m llm_on_ray.pretrain.pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf
+llm_on_ray-pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf
 ```
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 093083dd3..3323268ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,6 +80,8 @@ Issues = "https://github.com/intel/llm-on-ray.git/issues"
 [project.scripts]
 llm_on_ray-finetune = "llm_on_ray.finetune.finetune:main"
 llm_on_ray-serve = "llm_on_ray.inference.serve:main"
+llm_on_ray-pretrain = "llm_on_ray.pretrain.pretrain:main"
+llm_on_ray-megatron_deepspeed_pretrain = "llm_on_ray.pretrain.megatron_deepspeed_pretrain:main"
 
 [tool.black]
 line-length = 100

From f8c59d38a9256663a2fbcdb648f42b1f3f4fe2d6 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 27 Feb 2024 14:59:47 +0800
Subject: [PATCH 16/29] test

---
 .github/workflows/workflow_finetune.yml | 7 ++++---
 llm_on_ray/common/common.py             | 2 +-
 llm_on_ray/common/logging.py            | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index 854732bff..afceed7ad 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -41,9 +41,10 @@ jobs:
         exclude:
           - { isPR: true }
         include:
-          - { model: "EleutherAI/gpt-j-6b"}
-          - { model: "meta-llama/Llama-2-7b-chat-hf"}
-          - { model: "mistralai/Mistral-7B-v0.1"}
+          - { model: "gpt2"}
+          # - { model: "EleutherAI/gpt-j-6b"}
+          # - { model: "meta-llama/Llama-2-7b-chat-hf"}
+          # - { model: "mistralai/Mistral-7B-v0.1"}
 
     runs-on: self-hosted
 
diff --git a/llm_on_ray/common/common.py b/llm_on_ray/common/common.py
index 89a62d68d..640fc0183 100644
--- a/llm_on_ray/common/common.py
+++ b/llm_on_ray/common/common.py
@@ -19,4 +19,4 @@ def import_all_module(basedir, prefix=None):
             try:
                 importlib.import_module(module_name)
             except Exception:
-                logger.warning(f"import {module_name} erro", exc_info=True)
+                logger.warning(f"import {module_name} error", exc_info=True)
diff --git a/llm_on_ray/common/logging.py b/llm_on_ray/common/logging.py
index c71620623..0fb9068c6 100644
--- a/llm_on_ray/common/logging.py
+++ b/llm_on_ray/common/logging.py
@@ -39,6 +39,7 @@
         logging.config.dictConfig(logging_config)
     except Exception:
         traceback.print_exc()
+        print("pass here........................")
         exit(1)
 
 if use_accelerate_log:

From a6f1db6f0c16635571498651fbf93f386a854401 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 27 Feb 2024 15:07:03 +0800
Subject: [PATCH 17/29] test

---
 .github/workflows/workflow_finetune.yml        | 2 +-
 .github/workflows/workflow_orders_on_merge.yml | 4 ++--
 .github/workflows/workflow_orders_on_pr.yml    | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index afceed7ad..94b5cb292 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -114,7 +114,7 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml"
+          docker exec "finetune" bash -c "python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml"
 
       - name: Run PEFT-LoRA Test
         run: |
diff --git a/.github/workflows/workflow_orders_on_merge.yml b/.github/workflows/workflow_orders_on_merge.yml
index d491baca1..5b656334f 100644
--- a/.github/workflows/workflow_orders_on_merge.yml
+++ b/.github/workflows/workflow_orders_on_merge.yml
@@ -23,8 +23,8 @@ jobs:
   call-tests:
     uses: ./.github/workflows/workflow_tests.yml
     
-  call-inference:
-    uses: ./.github/workflows/workflow_inference.yml
+  # call-inference:
+  #   uses: ./.github/workflows/workflow_inference.yml
 
   call-finetune:
     uses: ./.github/workflows/workflow_finetune.yml
diff --git a/.github/workflows/workflow_orders_on_pr.yml b/.github/workflows/workflow_orders_on_pr.yml
index 9f5df5d83..005434805 100644
--- a/.github/workflows/workflow_orders_on_pr.yml
+++ b/.github/workflows/workflow_orders_on_pr.yml
@@ -25,9 +25,9 @@ jobs:
     needs: call-lint
     uses: ./.github/workflows/workflow_tests.yml
 
-  call-inference:
-    needs: call-lint
-    uses: ./.github/workflows/workflow_inference.yml
+  # call-inference:
+  #   needs: call-lint
+  #   uses: ./.github/workflows/workflow_inference.yml
 
   call-finetune:
     needs: call-lint

From 61731b96f94788ec65a36efb43315a1c0108f921 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 27 Feb 2024 15:31:29 +0800
Subject: [PATCH 18/29] test

---
 llm_on_ray/finetune/finetune.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index 0d956b025..fe7cbd646 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -293,8 +293,8 @@ def main(external_config=None):
         run_config=run_config,
     )
     results = trainer.fit()
-
-    return results
+    if external_config is not None:
+        return results
 
 
 if __name__ == "__main__":

From c50ffea30edbdb7c226720eef85f34cbf17663c5 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 27 Feb 2024 15:40:34 +0800
Subject: [PATCH 19/29] recover

---
 .github/workflows/workflow_finetune.yml        | 9 ++++-----
 .github/workflows/workflow_orders_on_merge.yml | 4 ++--
 .github/workflows/workflow_orders_on_pr.yml    | 6 +++---
 llm_on_ray/common/logging.py                   | 1 -
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index 94b5cb292..d210d0dfc 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -41,10 +41,9 @@ jobs:
         exclude:
           - { isPR: true }
         include:
-          - { model: "gpt2"}
-          # - { model: "EleutherAI/gpt-j-6b"}
-          # - { model: "meta-llama/Llama-2-7b-chat-hf"}
-          # - { model: "mistralai/Mistral-7B-v0.1"}
+          - { model: "EleutherAI/gpt-j-6b"}
+          - { model: "meta-llama/Llama-2-7b-chat-hf"}
+          - { model: "mistralai/Mistral-7B-v0.1"}
 
     runs-on: self-hosted
 
@@ -114,7 +113,7 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "python -m llm_on_ray.finetune.finetune --config_file llm_on_ray/finetune/finetune.yaml"
+          docker exec "finetune" bash -c "llm_on_ray-finetune  --config_file llm_on_ray/finetune/finetune.yaml"
 
       - name: Run PEFT-LoRA Test
         run: |
diff --git a/.github/workflows/workflow_orders_on_merge.yml b/.github/workflows/workflow_orders_on_merge.yml
index 5b656334f..d491baca1 100644
--- a/.github/workflows/workflow_orders_on_merge.yml
+++ b/.github/workflows/workflow_orders_on_merge.yml
@@ -23,8 +23,8 @@ jobs:
   call-tests:
     uses: ./.github/workflows/workflow_tests.yml
     
-  # call-inference:
-  #   uses: ./.github/workflows/workflow_inference.yml
+  call-inference:
+    uses: ./.github/workflows/workflow_inference.yml
 
   call-finetune:
     uses: ./.github/workflows/workflow_finetune.yml
diff --git a/.github/workflows/workflow_orders_on_pr.yml b/.github/workflows/workflow_orders_on_pr.yml
index 005434805..9f5df5d83 100644
--- a/.github/workflows/workflow_orders_on_pr.yml
+++ b/.github/workflows/workflow_orders_on_pr.yml
@@ -25,9 +25,9 @@ jobs:
     needs: call-lint
     uses: ./.github/workflows/workflow_tests.yml
 
-  # call-inference:
-  #   needs: call-lint
-  #   uses: ./.github/workflows/workflow_inference.yml
+  call-inference:
+    needs: call-lint
+    uses: ./.github/workflows/workflow_inference.yml
 
   call-finetune:
     needs: call-lint
diff --git a/llm_on_ray/common/logging.py b/llm_on_ray/common/logging.py
index 0fb9068c6..c71620623 100644
--- a/llm_on_ray/common/logging.py
+++ b/llm_on_ray/common/logging.py
@@ -39,7 +39,6 @@
         logging.config.dictConfig(logging_config)
     except Exception:
         traceback.print_exc()
-        print("pass here........................")
         exit(1)
 
 if use_accelerate_log:

From 883c9ebd6689088e97a7f2ff802e395e52dcd283 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 27 Feb 2024 15:48:22 +0800
Subject: [PATCH 20/29] update

---
 .github/workflows/workflow_inference.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index 775861b24..6c36ccc89 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -45,7 +45,7 @@ jobs:
           - { model: "gpt-j-6b"}
           - { model: "mistral-7b-v0.1"}
           - { model: "mpt-7b-bigdl"}
-          # - { model: "llama-2-7b-chat-hf-vllm"}
+          - { model: "llama-2-7b-chat-hf-vllm"}
           - dtuner_model: nathan0/mpt-7b-deltatuner-model
             model: mpt-7b
 

From 6a07499d70f40a7c80c5ede9314570cef172170f Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Wed, 28 Feb 2024 13:52:21 +0800
Subject: [PATCH 21/29] fix vllm

---
 dev/docker/Dockerfile.vllm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/docker/Dockerfile.vllm b/dev/docker/Dockerfile.vllm
index 8da280b2d..84c25b43c 100644
--- a/dev/docker/Dockerfile.vllm
+++ b/dev/docker/Dockerfile.vllm
@@ -22,7 +22,7 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
     unset -f conda && \
     export PATH=$CONDA_DIR/bin/:${PATH} && \
     conda config --add channels intel && \
-    conda install -y -c conda-forge python==3.9 gxx=12.3 gxx_linux-64=12.3
+    conda install -y -c conda-forge python==3.9 gxx=12.3 gxx_linux-64=12.3 libxcrypt
 
 COPY ./pyproject.toml .
 COPY ./MANIFEST.in .

From 4a16df0bd00598610df962df1d7c9aad3cea781c Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Thu, 29 Feb 2024 14:12:21 +0800
Subject: [PATCH 22/29] update

---
 .github/workflows/workflow_inference.yml    |  6 ------
 docs/pretrain.md                            | 10 +++++-----
 docs/vllm.md                                |  8 ++++----
 docs/web_ui.md                              |  2 +-
 llm_on_ray/common/agentenv/__init__.py      |  4 ++--
 llm_on_ray/common/common.py                 |  2 +-
 llm_on_ray/common/dataprocesser/__init__.py |  4 ++--
 llm_on_ray/common/dataset/__init__.py       |  4 ++--
 llm_on_ray/common/initializer/__init__.py   |  4 ++--
 llm_on_ray/common/model/__init__.py         |  4 ++--
 llm_on_ray/common/optimizer/__init__.py     |  4 ++--
 llm_on_ray/common/tokenizer/__init__.py     |  4 ++--
 llm_on_ray/common/trainer/__init__.py       |  4 ++--
 llm_on_ray/pretrain/pretrain.py             |  4 ++--
 14 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index 6c36ccc89..7ea4359be 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -202,9 +202,3 @@ jobs:
           TARGET=${{steps.target.outputs.target}}
           cid=$(docker ps -q --filter "name=${TARGET}")
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
-
-      - name: Test Summary
-        run: echo "to be continued"
-
-
-
diff --git a/docs/pretrain.md b/docs/pretrain.md
index f0cb1c30b..cf4a5931d 100644
--- a/docs/pretrain.md
+++ b/docs/pretrain.md
@@ -122,28 +122,28 @@ Set up `megatron_deepspeed_path` in the configuration.
 
 ```bash
 cd /home/user/workspace/llm-on-ray
-#Bloom-7B
+# Bloom-7B
 llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/bloom_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
-#llama-7B
+# llama-7B
 llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama_7b_megatron_deepspeed_zs0_8Gaudi_pretrain.conf
 ```
 
 ##### Huggingface Trainer
 ```bash
 cd /home/user/workspace/llm-on-ray
-#llama-7B
+# llama-7B
 llm_on_ray-pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8Guadi_pretrain.conf
 ```
 ##### Nvidia GPU:
 ###### Megatron-DeepSpeed
 ```bash
 cd /home/user/workspace/llm-on-ray
-#llama2-7B
+# llama2-7B
 llm_on_ray-megatron_deepspeed_pretrain --config_file llm_on_ray/pretrain/config/llama2_3b_megatron_deepspeed_zs0_8gpus_pretrain.conf
 ```
 ##### Huggingface Trainer
 ```bash
 cd /home/user/workspace/llm-on-ray
-#llama-7B
+# llama-7B
 llm_on_ray-pretrain --config_file llm_on_ray/pretrain/config/llama_7b_8gpu_pretrain.conf
 ```
\ No newline at end of file
diff --git a/docs/vllm.md b/docs/vllm.md
index 26273fdc5..426d2c305 100644
--- a/docs/vllm.md
+++ b/docs/vllm.md
@@ -9,7 +9,7 @@ vLLM for CPU currently supports Intel® 4th Gen Xeon® Scalable Performance proc
 Please run the following script to install vLLM for CPU into your current environment. Currently a GNU C++ compiler with >=12.3 version is required to build and install.
 
 ```bash
-$ dev/scripts/install-vllm-cpu.sh
+dev/scripts/install-vllm-cpu.sh
 ```
 
 ## Setup
@@ -23,7 +23,7 @@ Please follow [Deploying and Serving LLMs on Intel CPU/GPU/Gaudi](serve.md) docu
 To serve model with vLLM, run the following:
 
 ```bash
-$ llm_on_ray-serve --config_file llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal
+llm_on_ray-serve --config_file llm_on_ray/inference/models/vllm/llama-2-7b-chat-hf-vllm.yaml --simple --keep_serve_terminal
 ```
 
 In the above example, `vllm` property is set to `true` in the config file for enabling vLLM.
@@ -33,11 +33,11 @@ In the above example, `vllm` property is set to `true` in the config file for en
 To start a non-streaming query, run the following:
 
 ```bash
-$ python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/llama-2-7b-chat-hf
+python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/llama-2-7b-chat-hf
 ```
 
 To start a streaming query, run the following:
 
 ```bash
-$ python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/llama-2-7b-chat-hf --streaming_response
+python examples/inference/api_server_simple/query_single.py --model_endpoint http://127.0.0.1:8000/llama-2-7b-chat-hf --streaming_response
 ```
\ No newline at end of file
diff --git a/docs/web_ui.md b/docs/web_ui.md
index 560196092..5207c736f 100644
--- a/docs/web_ui.md
+++ b/docs/web_ui.md
@@ -8,7 +8,7 @@ Please follow [setup.md](setup.md) to setup the base environment first.
 ## Setup UI Environment
 After activating the environment installed from the previous step, please run the following script to install environment for Web UI.
 ```bash
-$ dev/scripts/install-ui.sh
+dev/scripts/install-ui.sh
 ```
 
 ## Start Web UI
diff --git a/llm_on_ray/common/agentenv/__init__.py b/llm_on_ray/common/agentenv/__init__.py
index 82a90c400..e5d0fa18b 100644
--- a/llm_on_ray/common/agentenv/__init__.py
+++ b/llm_on_ray/common/agentenv/__init__.py
@@ -1,9 +1,9 @@
 import os
 from llm_on_ray.common.agentenv.agentenv import AgentEnv
-from llm_on_ray.common.common import import_all_module
+from llm_on_ray.common.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llm_on_ray.common.agentenv")
+import_all_modules(basedir, "llm_on_ray.common.agentenv")
 
 __all__ = ["AgentEnv"]
diff --git a/llm_on_ray/common/common.py b/llm_on_ray/common/common.py
index 640fc0183..590c5e4eb 100644
--- a/llm_on_ray/common/common.py
+++ b/llm_on_ray/common/common.py
@@ -5,7 +5,7 @@
 from llm_on_ray.common.logging import logger
 
 
-def import_all_module(basedir, prefix=None):
+def import_all_modules(basedir, prefix=None):
     all_py_files = glob.glob(basedir + "/*.py")
     modules = [os.path.basename(f) for f in all_py_files]
 
diff --git a/llm_on_ray/common/dataprocesser/__init__.py b/llm_on_ray/common/dataprocesser/__init__.py
index 30a49627a..aa848514e 100644
--- a/llm_on_ray/common/dataprocesser/__init__.py
+++ b/llm_on_ray/common/dataprocesser/__init__.py
@@ -1,9 +1,9 @@
 import os
 from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
-from llm_on_ray.common.common import import_all_module
+from llm_on_ray.common.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llm_on_ray.common.dataprocesser")
+import_all_modules(basedir, "llm_on_ray.common.dataprocesser")
 
 __all__ = ["DataProcesser"]
diff --git a/llm_on_ray/common/dataset/__init__.py b/llm_on_ray/common/dataset/__init__.py
index 877caab41..eeb14d3b7 100644
--- a/llm_on_ray/common/dataset/__init__.py
+++ b/llm_on_ray/common/dataset/__init__.py
@@ -1,9 +1,9 @@
 import os
 from llm_on_ray.common.dataset.dataset import Dataset
-from llm_on_ray.common.common import import_all_module
+from llm_on_ray.common.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llm_on_ray.common.dataset")
+import_all_modules(basedir, "llm_on_ray.common.dataset")
 
 __all__ = ["Dataset"]
diff --git a/llm_on_ray/common/initializer/__init__.py b/llm_on_ray/common/initializer/__init__.py
index de6cd1eb0..da7197340 100644
--- a/llm_on_ray/common/initializer/__init__.py
+++ b/llm_on_ray/common/initializer/__init__.py
@@ -1,9 +1,9 @@
 import os
 from llm_on_ray.common.initializer.initializer import Initializer
-from llm_on_ray.common.common import import_all_module
+from llm_on_ray.common.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llm_on_ray.common.initializer")
+import_all_modules(basedir, "llm_on_ray.common.initializer")
 
 __all__ = ["Initializer"]
diff --git a/llm_on_ray/common/model/__init__.py b/llm_on_ray/common/model/__init__.py
index f3550edab..606161689 100644
--- a/llm_on_ray/common/model/__init__.py
+++ b/llm_on_ray/common/model/__init__.py
@@ -1,9 +1,9 @@
 import os
 from llm_on_ray.common.model.model import Model
-from llm_on_ray.common.common import import_all_module
+from llm_on_ray.common.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llm_on_ray.common.model")
+import_all_modules(basedir, "llm_on_ray.common.model")
 
 __all__ = ["Model"]
diff --git a/llm_on_ray/common/optimizer/__init__.py b/llm_on_ray/common/optimizer/__init__.py
index 8dff2f390..f7a48b498 100644
--- a/llm_on_ray/common/optimizer/__init__.py
+++ b/llm_on_ray/common/optimizer/__init__.py
@@ -1,9 +1,9 @@
 import os
 from llm_on_ray.common.optimizer.optimizer import Optimizer
-from llm_on_ray.common.common import import_all_module
+from llm_on_ray.common.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llm_on_ray.common.optimizer")
+import_all_modules(basedir, "llm_on_ray.common.optimizer")
 
 __all__ = ["Optimizer"]
diff --git a/llm_on_ray/common/tokenizer/__init__.py b/llm_on_ray/common/tokenizer/__init__.py
index 97602fd85..6e5fbc391 100644
--- a/llm_on_ray/common/tokenizer/__init__.py
+++ b/llm_on_ray/common/tokenizer/__init__.py
@@ -1,9 +1,9 @@
 import os
 from llm_on_ray.common.tokenizer.tokenizer import Tokenizer
-from llm_on_ray.common.common import import_all_module
+from llm_on_ray.common.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llm_on_ray.common.tokenizer")
+import_all_modules(basedir, "llm_on_ray.common.tokenizer")
 
 __all__ = ["Tokenizer"]
diff --git a/llm_on_ray/common/trainer/__init__.py b/llm_on_ray/common/trainer/__init__.py
index 711c700b2..d95dfa04f 100644
--- a/llm_on_ray/common/trainer/__init__.py
+++ b/llm_on_ray/common/trainer/__init__.py
@@ -1,9 +1,9 @@
 import os
 from llm_on_ray.common.trainer.trainer import Trainer
-from llm_on_ray.common.common import import_all_module
+from llm_on_ray.common.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
-import_all_module(basedir, "llm_on_ray.common.trainer")
+import_all_modules(basedir, "llm_on_ray.common.trainer")
 
 __all__ = ["Trainer"]
diff --git a/llm_on_ray/pretrain/pretrain.py b/llm_on_ray/pretrain/pretrain.py
index db4af0396..cb4f9e8df 100644
--- a/llm_on_ray/pretrain/pretrain.py
+++ b/llm_on_ray/pretrain/pretrain.py
@@ -29,9 +29,9 @@ def train_func(config: Dict[str, Any]):
     cwd = config.get("cwd")
     if cwd:
         os.chdir(cwd)
-    from llm_on_ray.common.common import import_all_module
+    from llm_on_ray.common.common import import_all_modules
 
-    import_all_module(f"{os.path.dirname(os.path.realpath(__file__))}/plugin", "plugin")
+    import_all_modules(f"{os.path.dirname(os.path.realpath(__file__))}/plugin", "plugin")
     common.init(config)  # type: ignore
     initializer_config = config.get("initializer")
     if initializer_config:

From 43af1959255616dc17bddc7c71297e32f1580e70 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Thu, 29 Feb 2024 14:33:09 +0800
Subject: [PATCH 23/29] move mllm path

---
 {inference => llm_on_ray/inference}/mllm_predictor.py  | 7 +++----
 llm_on_ray/inference/models/bloom-560m.yaml            | 2 +-
 {inference => llm_on_ray/inference}/models/deplot.yaml | 1 +
 {inference => llm_on_ray/inference}/models/fuyu8b.yaml | 1 +
 llm_on_ray/inference/predictor_deployment.py           | 2 +-
 5 files changed, 7 insertions(+), 6 deletions(-)
 rename {inference => llm_on_ray/inference}/mllm_predictor.py (94%)
 rename {inference => llm_on_ray/inference}/models/deplot.yaml (97%)
 rename {inference => llm_on_ray/inference}/models/fuyu8b.yaml (96%)

diff --git a/inference/mllm_predictor.py b/llm_on_ray/inference/mllm_predictor.py
similarity index 94%
rename from inference/mllm_predictor.py
rename to llm_on_ray/inference/mllm_predictor.py
index a50db97e5..895e00514 100644
--- a/inference/mllm_predictor.py
+++ b/llm_on_ray/inference/mllm_predictor.py
@@ -1,9 +1,8 @@
 import torch
 from transformers import TextIteratorStreamer
-from inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
-from predictor import Predictor
-from inference.utils import module_import
-from inference.utils import get_torch_dtype
+from llm_on_ray.inference.inference_config import InferenceConfig, GenerateResult, PRECISION_BF16
+from llm_on_ray.inference.utils import get_torch_dtype, module_import
+from llm_on_ray.inference.predictor import Predictor
 
 
 class MllmPredictor(Predictor):
diff --git a/llm_on_ray/inference/models/bloom-560m.yaml b/llm_on_ray/inference/models/bloom-560m.yaml
index 8bc661557..19a5a7deb 100644
--- a/llm_on_ray/inference/models/bloom-560m.yaml
+++ b/llm_on_ray/inference/models/bloom-560m.yaml
@@ -2,7 +2,7 @@ port: 8000
 name: bloom-560m
 route_prefix: /bloom-560m
 num_replicas: 1
-cpus_per_worker: 10
+cpus_per_worker: 24
 gpus_per_worker: 0
 deepspeed: false
 workers_per_group: 2
diff --git a/inference/models/deplot.yaml b/llm_on_ray/inference/models/deplot.yaml
similarity index 97%
rename from inference/models/deplot.yaml
rename to llm_on_ray/inference/models/deplot.yaml
index e293bed54..ac6451c16 100644
--- a/inference/models/deplot.yaml
+++ b/llm_on_ray/inference/models/deplot.yaml
@@ -1,6 +1,7 @@
 port: 8000
 name: deplot
 route_prefix: /deplot
+num_replicas: 1
 cpus_per_worker: 24
 gpus_per_worker: 0
 deepspeed: false
diff --git a/inference/models/fuyu8b.yaml b/llm_on_ray/inference/models/fuyu8b.yaml
similarity index 96%
rename from inference/models/fuyu8b.yaml
rename to llm_on_ray/inference/models/fuyu8b.yaml
index 1ad9faa98..561114cd0 100644
--- a/inference/models/fuyu8b.yaml
+++ b/llm_on_ray/inference/models/fuyu8b.yaml
@@ -1,6 +1,7 @@
 port: 8000
 name: fuyu-8b
 route_prefix: /fuyu-8b
+num_replicas: 1
 cpus_per_worker: 24
 gpus_per_worker: 0
 deepspeed: false
diff --git a/llm_on_ray/inference/predictor_deployment.py b/llm_on_ray/inference/predictor_deployment.py
index 4fe6dcf3d..094e41a56 100644
--- a/llm_on_ray/inference/predictor_deployment.py
+++ b/llm_on_ray/inference/predictor_deployment.py
@@ -68,7 +68,7 @@ def __init__(self, infer_conf: InferenceConfig):
 
             self.predictor = VllmPredictor(infer_conf)
         elif self.is_mllm:
-            from mllm_predictor import MllmPredictor
+            from llm_on_ray.inference.mllm_predictor import MllmPredictor
 
             self.predictor = MllmPredictor(infer_conf)
         else:

From 93c49185fa6b94998872b5e12719aefd749a87a4 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 5 Mar 2024 09:40:12 +0800
Subject: [PATCH 24/29] modify

---
 .pre-commit-config.yaml                                   | 2 +-
 llm_on_ray/common/__init__.py                             | 7 ++++---
 llm_on_ray/common/agentenv/__init__.py                    | 2 +-
 llm_on_ray/common/agentenv/rlhf_env.py                    | 2 +-
 llm_on_ray/common/dataprocesser/__init__.py               | 2 +-
 llm_on_ray/common/dataprocesser/general_processer.py      | 2 +-
 llm_on_ray/common/dataprocesser/rm_dataprocesser.py       | 2 +-
 llm_on_ray/common/dataset/__init__.py                     | 2 +-
 llm_on_ray/common/dataset/huggingface_dataset.py          | 2 +-
 llm_on_ray/common/initializer/__init__.py                 | 2 +-
 llm_on_ray/common/model/__init__.py                       | 2 +-
 .../common/model/huggingface_model_for_causal_lm.py       | 2 +-
 llm_on_ray/common/model/reward_model.py                   | 2 +-
 llm_on_ray/common/optimizer/__init__.py                   | 2 +-
 llm_on_ray/common/optimizer/default_optimizer.py          | 4 ++--
 llm_on_ray/common/optimizer/group_optimizer.py            | 4 ++--
 llm_on_ray/common/tokenizer/__init__.py                   | 2 +-
 llm_on_ray/common/tokenizer/empty_tokenizer.py            | 2 +-
 llm_on_ray/common/tokenizer/huggingface_tokenizer.py      | 2 +-
 llm_on_ray/common/torch_config.py                         | 6 +++---
 llm_on_ray/common/trainer/__init__.py                     | 4 ++--
 llm_on_ray/common/trainer/default_trainer.py              | 2 +-
 llm_on_ray/inference/__init__.py                          | 4 ++++
 llm_on_ray/inference/predictor_deployment.py              | 8 ++++----
 llm_on_ray/pretrain/backend/habana_backend.py             | 2 +-
 .../pretrain/plugin/huggingface_model_from_config.py      | 2 +-
 llm_on_ray/pretrain/pretrain.py                           | 2 +-
 llm_on_ray/ui/start_ui.py                                 | 2 +-
 28 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index eef34287b..c539326c1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
     rev: v0.0.289
     hooks:
       - id: ruff
-        args: [ --fix, --exit-non-zero-on-fix, --ignore=E402, --ignore=E501, --ignore=E731]
+        args: [ --fix, --exit-non-zero-on-fix, --ignore=E402, --ignore=E501, --ignore=E731, --ignore=F401]
 
   # Black needs to be ran after ruff with --fix
   - repo: https://github.com/psf/black
diff --git a/llm_on_ray/common/__init__.py b/llm_on_ray/common/__init__.py
index a446b07ab..b2c3e34f0 100644
--- a/llm_on_ray/common/__init__.py
+++ b/llm_on_ray/common/__init__.py
@@ -1,9 +1,10 @@
 from llm_on_ray.common.logging import logger
 from llm_on_ray.common.load import *  # noqa: F403 # unable to detect undefined names
 from llm_on_ray.common import agentenv
-from llm_on_ray.common.torch_config import TorchConfig  # noqa: F401
-from llm_on_ray.common.config import Config  # noqa: F401
-from llm_on_ray.common.init import init  # noqa: F401
+from llm_on_ray.common.torch_config import TorchConfig
+from llm_on_ray.common.config import Config
+from llm_on_ray.common.init import init
+from llm_on_ray.common.common import import_all_modules
 from typing import Dict, Any
 import sys
 
diff --git a/llm_on_ray/common/agentenv/__init__.py b/llm_on_ray/common/agentenv/__init__.py
index e5d0fa18b..62fe5bfd9 100644
--- a/llm_on_ray/common/agentenv/__init__.py
+++ b/llm_on_ray/common/agentenv/__init__.py
@@ -1,6 +1,6 @@
 import os
 from llm_on_ray.common.agentenv.agentenv import AgentEnv
-from llm_on_ray.common.common import import_all_modules
+from llm_on_ray.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
diff --git a/llm_on_ray/common/agentenv/rlhf_env.py b/llm_on_ray/common/agentenv/rlhf_env.py
index 32322735a..7c3e08ca1 100644
--- a/llm_on_ray/common/agentenv/rlhf_env.py
+++ b/llm_on_ray/common/agentenv/rlhf_env.py
@@ -7,7 +7,7 @@
 from ray.rllib.utils.spaces.repeated import Repeated
 import gymnasium.spaces as sp
 
-from llm_on_ray.common.agentenv.agentenv import AgentEnv
+from llm_on_ray.common.agentenv import AgentEnv
 from llm_on_ray.common.load import load_dataset, load_model, load_tokenizer
 
 
diff --git a/llm_on_ray/common/dataprocesser/__init__.py b/llm_on_ray/common/dataprocesser/__init__.py
index aa848514e..f3b09eaf0 100644
--- a/llm_on_ray/common/dataprocesser/__init__.py
+++ b/llm_on_ray/common/dataprocesser/__init__.py
@@ -1,6 +1,6 @@
 import os
 from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
-from llm_on_ray.common.common import import_all_modules
+from llm_on_ray.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
diff --git a/llm_on_ray/common/dataprocesser/general_processer.py b/llm_on_ray/common/dataprocesser/general_processer.py
index 12d70aed2..cd09064a6 100644
--- a/llm_on_ray/common/dataprocesser/general_processer.py
+++ b/llm_on_ray/common/dataprocesser/general_processer.py
@@ -5,7 +5,7 @@
 import datasets
 import transformers
 
-from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
+from llm_on_ray.common.dataprocesser import DataProcesser
 
 INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
 INSTRUCTION_KEY = "### Instruction:"
diff --git a/llm_on_ray/common/dataprocesser/rm_dataprocesser.py b/llm_on_ray/common/dataprocesser/rm_dataprocesser.py
index 142573a9b..10bfea6ff 100644
--- a/llm_on_ray/common/dataprocesser/rm_dataprocesser.py
+++ b/llm_on_ray/common/dataprocesser/rm_dataprocesser.py
@@ -1,7 +1,7 @@
 import torch
 import transformers
 
-from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
+from llm_on_ray.common.dataprocesser import DataProcesser
 from llm_on_ray.common.logging import logger
 
 
diff --git a/llm_on_ray/common/dataset/__init__.py b/llm_on_ray/common/dataset/__init__.py
index eeb14d3b7..f1236d8eb 100644
--- a/llm_on_ray/common/dataset/__init__.py
+++ b/llm_on_ray/common/dataset/__init__.py
@@ -1,6 +1,6 @@
 import os
 from llm_on_ray.common.dataset.dataset import Dataset
-from llm_on_ray.common.common import import_all_modules
+from llm_on_ray.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
diff --git a/llm_on_ray/common/dataset/huggingface_dataset.py b/llm_on_ray/common/dataset/huggingface_dataset.py
index 02291ed70..dddcc995e 100644
--- a/llm_on_ray/common/dataset/huggingface_dataset.py
+++ b/llm_on_ray/common/dataset/huggingface_dataset.py
@@ -1,7 +1,7 @@
 import os
 import datasets
 
-from llm_on_ray.common.dataset.dataset import Dataset
+from llm_on_ray.common.dataset import Dataset
 
 
 def local_load(name, **load_config):
diff --git a/llm_on_ray/common/initializer/__init__.py b/llm_on_ray/common/initializer/__init__.py
index da7197340..88b854222 100644
--- a/llm_on_ray/common/initializer/__init__.py
+++ b/llm_on_ray/common/initializer/__init__.py
@@ -1,6 +1,6 @@
 import os
 from llm_on_ray.common.initializer.initializer import Initializer
-from llm_on_ray.common.common import import_all_modules
+from llm_on_ray.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
diff --git a/llm_on_ray/common/model/__init__.py b/llm_on_ray/common/model/__init__.py
index 606161689..6789bda07 100644
--- a/llm_on_ray/common/model/__init__.py
+++ b/llm_on_ray/common/model/__init__.py
@@ -1,6 +1,6 @@
 import os
 from llm_on_ray.common.model.model import Model
-from llm_on_ray.common.common import import_all_modules
+from llm_on_ray.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
diff --git a/llm_on_ray/common/model/huggingface_model_for_causal_lm.py b/llm_on_ray/common/model/huggingface_model_for_causal_lm.py
index 806a095ae..2716ec897 100644
--- a/llm_on_ray/common/model/huggingface_model_for_causal_lm.py
+++ b/llm_on_ray/common/model/huggingface_model_for_causal_lm.py
@@ -1,6 +1,6 @@
 import transformers
 
-from llm_on_ray.common.model.model import Model
+from llm_on_ray.common.model import Model
 from peft import get_peft_model, LoraConfig
 import deltatuner
 
diff --git a/llm_on_ray/common/model/reward_model.py b/llm_on_ray/common/model/reward_model.py
index 8fc424c58..eaf5501d1 100644
--- a/llm_on_ray/common/model/reward_model.py
+++ b/llm_on_ray/common/model/reward_model.py
@@ -4,7 +4,7 @@
 import torch
 import torch.nn as nn
 
-from llm_on_ray.common.model.model import Model
+from llm_on_ray.common.model import Model
 
 
 class HuggingFaceRewardModel(Model):
diff --git a/llm_on_ray/common/optimizer/__init__.py b/llm_on_ray/common/optimizer/__init__.py
index f7a48b498..69be4a7ef 100644
--- a/llm_on_ray/common/optimizer/__init__.py
+++ b/llm_on_ray/common/optimizer/__init__.py
@@ -1,6 +1,6 @@
 import os
 from llm_on_ray.common.optimizer.optimizer import Optimizer
-from llm_on_ray.common.common import import_all_modules
+from llm_on_ray.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
diff --git a/llm_on_ray/common/optimizer/default_optimizer.py b/llm_on_ray/common/optimizer/default_optimizer.py
index 0477655d6..fef023e62 100644
--- a/llm_on_ray/common/optimizer/default_optimizer.py
+++ b/llm_on_ray/common/optimizer/default_optimizer.py
@@ -1,5 +1,5 @@
-import torch  # noqa: F401
-from llm_on_ray.common.optimizer.optimizer import Optimizer
+import torch
+from llm_on_ray.common.optimizer import Optimizer
 
 
 class DefaultOptimizer(Optimizer):
diff --git a/llm_on_ray/common/optimizer/group_optimizer.py b/llm_on_ray/common/optimizer/group_optimizer.py
index a04ce6dc7..5816639a9 100644
--- a/llm_on_ray/common/optimizer/group_optimizer.py
+++ b/llm_on_ray/common/optimizer/group_optimizer.py
@@ -1,5 +1,5 @@
-import torch  # noqa: F401
-from llm_on_ray.common.optimizer.optimizer import Optimizer
+import torch
+from llm_on_ray.common.optimizer import Optimizer
 
 
 class GroupOptimizer(Optimizer):
diff --git a/llm_on_ray/common/tokenizer/__init__.py b/llm_on_ray/common/tokenizer/__init__.py
index 6e5fbc391..1b06867ef 100644
--- a/llm_on_ray/common/tokenizer/__init__.py
+++ b/llm_on_ray/common/tokenizer/__init__.py
@@ -1,6 +1,6 @@
 import os
 from llm_on_ray.common.tokenizer.tokenizer import Tokenizer
-from llm_on_ray.common.common import import_all_modules
+from llm_on_ray.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
diff --git a/llm_on_ray/common/tokenizer/empty_tokenizer.py b/llm_on_ray/common/tokenizer/empty_tokenizer.py
index 8155d6450..50f5ca6f2 100644
--- a/llm_on_ray/common/tokenizer/empty_tokenizer.py
+++ b/llm_on_ray/common/tokenizer/empty_tokenizer.py
@@ -1,4 +1,4 @@
-from llm_on_ray.common.tokenizer.tokenizer import Tokenizer
+from llm_on_ray.common.tokenizer import Tokenizer
 
 
 class _EmptyTokenizer:
diff --git a/llm_on_ray/common/tokenizer/huggingface_tokenizer.py b/llm_on_ray/common/tokenizer/huggingface_tokenizer.py
index 5a8b30067..59905aef7 100644
--- a/llm_on_ray/common/tokenizer/huggingface_tokenizer.py
+++ b/llm_on_ray/common/tokenizer/huggingface_tokenizer.py
@@ -1,6 +1,6 @@
 import transformers
 
-from llm_on_ray.common.tokenizer.tokenizer import Tokenizer
+from llm_on_ray.common.tokenizer import Tokenizer
 
 
 class HuggingFaceTokenizer(Tokenizer):
diff --git a/llm_on_ray/common/torch_config.py b/llm_on_ray/common/torch_config.py
index 5a63ab565..a051de56f 100644
--- a/llm_on_ray/common/torch_config.py
+++ b/llm_on_ray/common/torch_config.py
@@ -26,15 +26,15 @@ def backend_cls(self):
 def libs_import():
     """try to import IPEX and oneCCL."""
     try:
-        import intel_extension_for_pytorch  # noqa: F401
+        import intel_extension_for_pytorch
     except ImportError:
         raise ImportError("Please install intel_extension_for_pytorch")
     try:
         ccl_version = importlib_metadata.version("oneccl_bind_pt")
         if ccl_version >= "1.12":
-            import oneccl_bindings_for_pytorch  # noqa: F401
+            import oneccl_bindings_for_pytorch
         else:
-            import torch_ccl  # noqa: F401
+            import torch_ccl
     except ImportError as ccl_not_exist:
         raise ImportError("Please install torch-ccl") from ccl_not_exist
 
diff --git a/llm_on_ray/common/trainer/__init__.py b/llm_on_ray/common/trainer/__init__.py
index d95dfa04f..ecda645d2 100644
--- a/llm_on_ray/common/trainer/__init__.py
+++ b/llm_on_ray/common/trainer/__init__.py
@@ -1,6 +1,6 @@
 import os
-from llm_on_ray.common.trainer.trainer import Trainer
-from llm_on_ray.common.common import import_all_modules
+from llm_on_ray.common.trainer import Trainer
+from llm_on_ray.common import import_all_modules
 
 realpath = os.path.realpath(__file__)
 basedir = os.path.dirname(realpath)
diff --git a/llm_on_ray/common/trainer/default_trainer.py b/llm_on_ray/common/trainer/default_trainer.py
index f6150db6b..c43c3a672 100644
--- a/llm_on_ray/common/trainer/default_trainer.py
+++ b/llm_on_ray/common/trainer/default_trainer.py
@@ -10,7 +10,7 @@
 from ray.train import report, Checkpoint
 
 from llm_on_ray.common import dataprocesser
-from llm_on_ray.common.trainer.trainer import Trainer
+from llm_on_ray.common.trainer import Trainer
 from llm_on_ray.common.logging import logger
 
 
diff --git a/llm_on_ray/inference/__init__.py b/llm_on_ray/inference/__init__.py
index e69de29bb..1e8adf06e 100644
--- a/llm_on_ray/inference/__init__.py
+++ b/llm_on_ray/inference/__init__.py
@@ -0,0 +1,4 @@
+from llm_on_ray.inference.transformer_predictor import TransformerPredictor
+from llm_on_ray.inference.deepspeed_predictor import DeepSpeedPredictor
+from llm_on_ray.inference.vllm_predictor import VllmPredictor
+from llm_on_ray.inference.mllm_predictor import MllmPredictor
diff --git a/llm_on_ray/inference/predictor_deployment.py b/llm_on_ray/inference/predictor_deployment.py
index 094e41a56..77aaeac93 100644
--- a/llm_on_ray/inference/predictor_deployment.py
+++ b/llm_on_ray/inference/predictor_deployment.py
@@ -59,20 +59,20 @@ def __init__(self, infer_conf: InferenceConfig):
         self.is_mllm = True if chat_processor_name in ["ChatModelwithImage"] else False
 
         if self.use_deepspeed:
-            from llm_on_ray.inference.deepspeed_predictor import DeepSpeedPredictor
+            from llm_on_ray.inference import DeepSpeedPredictor
 
             self.predictor = DeepSpeedPredictor(infer_conf)
             self.streamer = self.predictor.get_streamer()
         elif self.use_vllm:
-            from llm_on_ray.inference.vllm_predictor import VllmPredictor
+            from llm_on_ray.inference import VllmPredictor
 
             self.predictor = VllmPredictor(infer_conf)
         elif self.is_mllm:
-            from llm_on_ray.inference.mllm_predictor import MllmPredictor
+            from llm_on_ray.inference import MllmPredictor
 
             self.predictor = MllmPredictor(infer_conf)
         else:
-            from llm_on_ray.inference.transformer_predictor import TransformerPredictor
+            from llm_on_ray.inference import TransformerPredictor
 
             self.predictor = TransformerPredictor(infer_conf)
         self.loop = asyncio.get_running_loop()
diff --git a/llm_on_ray/pretrain/backend/habana_backend.py b/llm_on_ray/pretrain/backend/habana_backend.py
index 125987ba2..ca1240577 100644
--- a/llm_on_ray/pretrain/backend/habana_backend.py
+++ b/llm_on_ray/pretrain/backend/habana_backend.py
@@ -13,7 +13,7 @@ def backend_cls(self):
 
 def habana_import():
     try:
-        import habana_frameworks.torch  # noqa: F401
+        import habana_frameworks.torch
     except ImportError as habana_not_exist:
         raise ImportError("Please install habana_frameworks") from habana_not_exist
 
diff --git a/llm_on_ray/pretrain/plugin/huggingface_model_from_config.py b/llm_on_ray/pretrain/plugin/huggingface_model_from_config.py
index 6fd1acef7..fe5c9608f 100644
--- a/llm_on_ray/pretrain/plugin/huggingface_model_from_config.py
+++ b/llm_on_ray/pretrain/plugin/huggingface_model_from_config.py
@@ -1,7 +1,7 @@
 import torch
 import math
 import transformers
-from llm_on_ray.common.model.model import Model
+from llm_on_ray.common.model import Model
 
 
 # for huggingface model weight random initialization
diff --git a/llm_on_ray/pretrain/pretrain.py b/llm_on_ray/pretrain/pretrain.py
index cb4f9e8df..cb7c71caf 100644
--- a/llm_on_ray/pretrain/pretrain.py
+++ b/llm_on_ray/pretrain/pretrain.py
@@ -29,7 +29,7 @@ def train_func(config: Dict[str, Any]):
     cwd = config.get("cwd")
     if cwd:
         os.chdir(cwd)
-    from llm_on_ray.common.common import import_all_modules
+    from llm_on_ray.common import import_all_modules
 
     import_all_modules(f"{os.path.dirname(os.path.realpath(__file__))}/plugin", "plugin")
     common.init(config)  # type: ignore
diff --git a/llm_on_ray/ui/start_ui.py b/llm_on_ray/ui/start_ui.py
index 1d0838cb0..4b1f0363c 100644
--- a/llm_on_ray/ui/start_ui.py
+++ b/llm_on_ray/ui/start_ui.py
@@ -31,7 +31,7 @@
 from ray.util import queue
 from llm_on_ray.inference.inference_config import all_models, ModelDescription, Prompt
 from llm_on_ray.inference.inference_config import InferenceConfig as FinetunedConfig
-from llm_on_ray.inference.chat_process import (  # noqa: F401
+from llm_on_ray.inference.chat_process import (
     ChatModelGptJ,
     ChatModelLLama,
     ChatModelwithImage,

From e51b24456b923618e86c269ed0a3c3223548e529 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Tue, 5 Mar 2024 14:39:41 +0800
Subject: [PATCH 25/29] fix err

---
 llm_on_ray/common/__init__.py                | 18 ----------------
 llm_on_ray/common/load.py                    | 22 ++++++++++++++------
 llm_on_ray/inference/__init__.py             |  4 ----
 llm_on_ray/inference/predictor_deployment.py |  8 +++----
 4 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/llm_on_ray/common/__init__.py b/llm_on_ray/common/__init__.py
index b2c3e34f0..8d08fe4bf 100644
--- a/llm_on_ray/common/__init__.py
+++ b/llm_on_ray/common/__init__.py
@@ -1,24 +1,6 @@
 from llm_on_ray.common.logging import logger
-from llm_on_ray.common.load import *  # noqa: F403 # unable to detect undefined names
-from llm_on_ray.common import agentenv
 from llm_on_ray.common.torch_config import TorchConfig
 from llm_on_ray.common.config import Config
 from llm_on_ray.common.init import init
 from llm_on_ray.common.common import import_all_modules
 from typing import Dict, Any
-import sys
-
-
-@load_check_decorator  # type: ignore # noqa: F405 # may be undefined, or defined from star imports
-def get_agentenv(config: Dict[str, Any]):
-    logger.info(f"{sys._getframe().f_code.co_name} config: {config}")
-    agentenv_type = config.get("type", None)
-    Factory = agentenv.AgentEnv.registory.get(agentenv_type)
-    if Factory is None:
-        raise ValueError(f"there is no {agentenv_type} AgentEnv.")
-    try:
-        _ = Factory(config)
-    except Exception as e:
-        logger.critical(f"{Factory.__name__} init error: {e}", exc_info=True)
-        exit(1)
-    return _
diff --git a/llm_on_ray/common/load.py b/llm_on_ray/common/load.py
index 58e99b88f..2e27d0fcf 100644
--- a/llm_on_ray/common/load.py
+++ b/llm_on_ray/common/load.py
@@ -2,12 +2,7 @@
 from typing import Any, Dict
 
 from llm_on_ray.common.logging import logger
-from llm_on_ray.common import dataset
-from llm_on_ray.common import tokenizer
-from llm_on_ray.common import model
-from llm_on_ray.common import optimizer
-from llm_on_ray.common import trainer
-from llm_on_ray.common import initializer
+from llm_on_ray.common import agentenv, dataset, initializer, model, optimizer, tokenizer, trainer
 
 
 def load_check_decorator(func):
@@ -120,3 +115,18 @@ def get_initializer(config: Dict[str, Any]):
         logger.critical(f"{Factory.__name__} init error: {e}", exc_info=True)
         exit(1)
     return _
+
+
+@load_check_decorator  # type: ignore # noqa: F405 # may be undefined, or defined from star imports
+def get_agentenv(config: Dict[str, Any]):
+    logger.info(f"{sys._getframe().f_code.co_name} config: {config}")
+    agentenv_type = config.get("type", None)
+    Factory = agentenv.AgentEnv.registory.get(agentenv_type)
+    if Factory is None:
+        raise ValueError(f"there is no {agentenv_type} AgentEnv.")
+    try:
+        _ = Factory(config)
+    except Exception as e:
+        logger.critical(f"{Factory.__name__} init error: {e}", exc_info=True)
+        exit(1)
+    return _
diff --git a/llm_on_ray/inference/__init__.py b/llm_on_ray/inference/__init__.py
index 1e8adf06e..e69de29bb 100644
--- a/llm_on_ray/inference/__init__.py
+++ b/llm_on_ray/inference/__init__.py
@@ -1,4 +0,0 @@
-from llm_on_ray.inference.transformer_predictor import TransformerPredictor
-from llm_on_ray.inference.deepspeed_predictor import DeepSpeedPredictor
-from llm_on_ray.inference.vllm_predictor import VllmPredictor
-from llm_on_ray.inference.mllm_predictor import MllmPredictor
diff --git a/llm_on_ray/inference/predictor_deployment.py b/llm_on_ray/inference/predictor_deployment.py
index 77aaeac93..094e41a56 100644
--- a/llm_on_ray/inference/predictor_deployment.py
+++ b/llm_on_ray/inference/predictor_deployment.py
@@ -59,20 +59,20 @@ def __init__(self, infer_conf: InferenceConfig):
         self.is_mllm = True if chat_processor_name in ["ChatModelwithImage"] else False
 
         if self.use_deepspeed:
-            from llm_on_ray.inference import DeepSpeedPredictor
+            from llm_on_ray.inference.deepspeed_predictor import DeepSpeedPredictor
 
             self.predictor = DeepSpeedPredictor(infer_conf)
             self.streamer = self.predictor.get_streamer()
         elif self.use_vllm:
-            from llm_on_ray.inference import VllmPredictor
+            from llm_on_ray.inference.vllm_predictor import VllmPredictor
 
             self.predictor = VllmPredictor(infer_conf)
         elif self.is_mllm:
-            from llm_on_ray.inference import MllmPredictor
+            from llm_on_ray.inference.mllm_predictor import MllmPredictor
 
             self.predictor = MllmPredictor(infer_conf)
         else:
-            from llm_on_ray.inference import TransformerPredictor
+            from llm_on_ray.inference.transformer_predictor import TransformerPredictor
 
             self.predictor = TransformerPredictor(infer_conf)
         self.loop = asyncio.get_running_loop()

From e055d9822b54f360f05ae209bdf4794ae5f58981 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Wed, 6 Mar 2024 09:21:47 +0800
Subject: [PATCH 26/29] remove import_all_modules

---
 README.md                                          | 2 +-
 llm_on_ray/common/__init__.py                      | 3 +--
 llm_on_ray/common/agentenv/__init__.py             | 6 +-----
 llm_on_ray/common/dataprocesser/__init__.py        | 7 ++-----
 llm_on_ray/common/dataset/__init__.py              | 6 +-----
 llm_on_ray/common/initializer/__init__.py          | 5 -----
 llm_on_ray/common/load.py                          | 2 +-
 llm_on_ray/common/model/__init__.py                | 7 ++-----
 llm_on_ray/common/optimizer/__init__.py            | 7 ++-----
 llm_on_ray/common/tokenizer/__init__.py            | 7 ++-----
 llm_on_ray/common/trainer/__init__.py              | 9 +++------
 llm_on_ray/finetune/finetune.py                    | 2 +-
 llm_on_ray/pretrain/megatron_deepspeed_pretrain.py | 2 +-
 llm_on_ray/pretrain/plugin/hf_pretrainer.py        | 6 +++---
 llm_on_ray/pretrain/pretrain.py                    | 2 +-
 llm_on_ray/rlhf/ppo.py                             | 2 +-
 llm_on_ray/rlhf/reward.py                          | 2 +-
 17 files changed, 24 insertions(+), 53 deletions(-)

diff --git a/README.md b/README.md
index c78874032..7c9419783 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ LLM-on-Ray's modular workflow structure is designed to comprehensively cater to
 This guide will assist you in setting up LLM-on-Ray on Intel CPU locally, covering the initial setup, finetuning models, and deploying them for serving.
 ### Setup
 
-#### 1. Clone the repository and install dependencies.
+#### 1. Clone the repository, install llm-on-ray and its dependencies.
 Software requirement: Git and Conda
 ```bash
 git clone https://github.com/intel/llm-on-ray.git
diff --git a/llm_on_ray/common/__init__.py b/llm_on_ray/common/__init__.py
index 8d08fe4bf..dadeefdda 100644
--- a/llm_on_ray/common/__init__.py
+++ b/llm_on_ray/common/__init__.py
@@ -2,5 +2,4 @@
 from llm_on_ray.common.torch_config import TorchConfig
 from llm_on_ray.common.config import Config
 from llm_on_ray.common.init import init
-from llm_on_ray.common.common import import_all_modules
-from typing import Dict, Any
+from llm_on_ray.common import agentenv, dataset, initializer, model, optimizer, tokenizer, trainer
diff --git a/llm_on_ray/common/agentenv/__init__.py b/llm_on_ray/common/agentenv/__init__.py
index 62fe5bfd9..0ec64973b 100644
--- a/llm_on_ray/common/agentenv/__init__.py
+++ b/llm_on_ray/common/agentenv/__init__.py
@@ -1,9 +1,5 @@
-import os
 from llm_on_ray.common.agentenv.agentenv import AgentEnv
-from llm_on_ray.common import import_all_modules
+from llm_on_ray.common.agentenv.rlhf_env import RLHFEnv
 
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_modules(basedir, "llm_on_ray.common.agentenv")
 
 __all__ = ["AgentEnv"]
diff --git a/llm_on_ray/common/dataprocesser/__init__.py b/llm_on_ray/common/dataprocesser/__init__.py
index f3b09eaf0..99ff999fd 100644
--- a/llm_on_ray/common/dataprocesser/__init__.py
+++ b/llm_on_ray/common/dataprocesser/__init__.py
@@ -1,9 +1,6 @@
-import os
 from llm_on_ray.common.dataprocesser.dataprocesser import DataProcesser
-from llm_on_ray.common import import_all_modules
+from llm_on_ray.common.dataprocesser.general_processer import GeneralProcesser
+from llm_on_ray.common.dataprocesser.rm_dataprocesser import RMDataProcesser
 
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_modules(basedir, "llm_on_ray.common.dataprocesser")
 
 __all__ = ["DataProcesser"]
diff --git a/llm_on_ray/common/dataset/__init__.py b/llm_on_ray/common/dataset/__init__.py
index f1236d8eb..00477c05b 100644
--- a/llm_on_ray/common/dataset/__init__.py
+++ b/llm_on_ray/common/dataset/__init__.py
@@ -1,9 +1,5 @@
-import os
 from llm_on_ray.common.dataset.dataset import Dataset
-from llm_on_ray.common import import_all_modules
+from llm_on_ray.common.dataset.huggingface_dataset import HuggingfaceDataset
 
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_modules(basedir, "llm_on_ray.common.dataset")
 
 __all__ = ["Dataset"]
diff --git a/llm_on_ray/common/initializer/__init__.py b/llm_on_ray/common/initializer/__init__.py
index 88b854222..e1f5b0613 100644
--- a/llm_on_ray/common/initializer/__init__.py
+++ b/llm_on_ray/common/initializer/__init__.py
@@ -1,9 +1,4 @@
-import os
 from llm_on_ray.common.initializer.initializer import Initializer
-from llm_on_ray.common import import_all_modules
 
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_modules(basedir, "llm_on_ray.common.initializer")
 
 __all__ = ["Initializer"]
diff --git a/llm_on_ray/common/load.py b/llm_on_ray/common/load.py
index 2e27d0fcf..e598e5534 100644
--- a/llm_on_ray/common/load.py
+++ b/llm_on_ray/common/load.py
@@ -1,7 +1,7 @@
 import sys
 from typing import Any, Dict
 
-from llm_on_ray.common.logging import logger
+from llm_on_ray.common import logger
 from llm_on_ray.common import agentenv, dataset, initializer, model, optimizer, tokenizer, trainer
 
 
diff --git a/llm_on_ray/common/model/__init__.py b/llm_on_ray/common/model/__init__.py
index 6789bda07..6f41c8214 100644
--- a/llm_on_ray/common/model/__init__.py
+++ b/llm_on_ray/common/model/__init__.py
@@ -1,9 +1,6 @@
-import os
 from llm_on_ray.common.model.model import Model
-from llm_on_ray.common import import_all_modules
+from llm_on_ray.common.model.huggingface_model_for_causal_lm import HuggingFaceModelForCausalLM
+from llm_on_ray.common.model.reward_model import HuggingFaceRewardModel
 
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_modules(basedir, "llm_on_ray.common.model")
 
 __all__ = ["Model"]
diff --git a/llm_on_ray/common/optimizer/__init__.py b/llm_on_ray/common/optimizer/__init__.py
index 69be4a7ef..f71a85785 100644
--- a/llm_on_ray/common/optimizer/__init__.py
+++ b/llm_on_ray/common/optimizer/__init__.py
@@ -1,9 +1,6 @@
-import os
 from llm_on_ray.common.optimizer.optimizer import Optimizer
-from llm_on_ray.common import import_all_modules
+from llm_on_ray.common.optimizer.default_optimizer import DefaultOptimizer
+from llm_on_ray.common.optimizer.group_optimizer import GroupOptimizer
 
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_modules(basedir, "llm_on_ray.common.optimizer")
 
 __all__ = ["Optimizer"]
diff --git a/llm_on_ray/common/tokenizer/__init__.py b/llm_on_ray/common/tokenizer/__init__.py
index 1b06867ef..3f2c40136 100644
--- a/llm_on_ray/common/tokenizer/__init__.py
+++ b/llm_on_ray/common/tokenizer/__init__.py
@@ -1,9 +1,6 @@
-import os
 from llm_on_ray.common.tokenizer.tokenizer import Tokenizer
-from llm_on_ray.common import import_all_modules
+from llm_on_ray.common.tokenizer.empty_tokenizer import EmptyTokenizer
+from llm_on_ray.common.tokenizer.huggingface_tokenizer import HuggingFaceTokenizer
 
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_modules(basedir, "llm_on_ray.common.tokenizer")
 
 __all__ = ["Tokenizer"]
diff --git a/llm_on_ray/common/trainer/__init__.py b/llm_on_ray/common/trainer/__init__.py
index ecda645d2..71ff4b808 100644
--- a/llm_on_ray/common/trainer/__init__.py
+++ b/llm_on_ray/common/trainer/__init__.py
@@ -1,9 +1,6 @@
-import os
-from llm_on_ray.common.trainer import Trainer
-from llm_on_ray.common import import_all_modules
+from llm_on_ray.common.trainer.trainer import Trainer
+from llm_on_ray.common.trainer.default_trainer import DefaultTrainer
+from llm_on_ray.common.trainer.rm_trainer import RMTrainer
 
-realpath = os.path.realpath(__file__)
-basedir = os.path.dirname(realpath)
-import_all_modules(basedir, "llm_on_ray.common.trainer")
 
 __all__ = ["Trainer"]
diff --git a/llm_on_ray/finetune/finetune.py b/llm_on_ray/finetune/finetune.py
index fe7cbd646..b96e0c1c6 100644
--- a/llm_on_ray/finetune/finetune.py
+++ b/llm_on_ray/finetune/finetune.py
@@ -21,7 +21,7 @@
     FullStateDictConfig,
 )
 
-import llm_on_ray.common as common
+from llm_on_ray import common
 from llm_on_ray.finetune.finetune_config import FinetuneConfig
 
 
diff --git a/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py b/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
index 8aaebb97b..6190974be 100644
--- a/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
+++ b/llm_on_ray/pretrain/megatron_deepspeed_pretrain.py
@@ -7,7 +7,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-import llm_on_ray.common as common
+from llm_on_ray import common
 
 import importlib
 
diff --git a/llm_on_ray/pretrain/plugin/hf_pretrainer.py b/llm_on_ray/pretrain/plugin/hf_pretrainer.py
index e65bf6188..2c2c5d1f7 100755
--- a/llm_on_ray/pretrain/plugin/hf_pretrainer.py
+++ b/llm_on_ray/pretrain/plugin/hf_pretrainer.py
@@ -3,9 +3,6 @@
 import logging
 import sys
 from torch.utils.data import DataLoader, Dataset
-import llm_on_ray.common as common
-from llm_on_ray.common import dataprocesser
-from llm_on_ray.common.logging import logger
 import evaluate
 from typing import Optional
 from transformers import (
@@ -16,6 +13,9 @@
 from transformers.trainer_utils import get_last_checkpoint
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers import Trainer, TrainingArguments
+from llm_on_ray import common
+from llm_on_ray.common import dataprocesser
+from llm_on_ray.common.logging import logger
 from llm_on_ray.common.trainer import Trainer as RayTrainer
 
 use_habana = True
diff --git a/llm_on_ray/pretrain/pretrain.py b/llm_on_ray/pretrain/pretrain.py
index cb7c71caf..56680b74d 100644
--- a/llm_on_ray/pretrain/pretrain.py
+++ b/llm_on_ray/pretrain/pretrain.py
@@ -10,7 +10,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-import llm_on_ray.common as common
+from llm_on_ray import common
 
 from importlib import util
 
diff --git a/llm_on_ray/rlhf/ppo.py b/llm_on_ray/rlhf/ppo.py
index 2f73d3c65..821e9b4c0 100644
--- a/llm_on_ray/rlhf/ppo.py
+++ b/llm_on_ray/rlhf/ppo.py
@@ -7,10 +7,10 @@
 from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
 from ray.rllib.algorithms.ppo import PPOConfig
 
+from llm_on_ray import common
 from llm_on_ray.rlhf.rl_algo.ppo.ppo_rlhf import PPORLHF
 from llm_on_ray.rlhf.rl_algo.ppo.rlhf_ppo_module import RLHFPPOTorchRLModule
 from llm_on_ray.rlhf.rl_algo.ppo.rlhf_ppo_torch_learner import RLHFPPOTorchLearner
-import llm_on_ray.common as common
 from llm_on_ray.common.agentenv.rlhf_env import RLHFEnv
 
 
diff --git a/llm_on_ray/rlhf/reward.py b/llm_on_ray/rlhf/reward.py
index e66fe1847..a88e3cb3f 100644
--- a/llm_on_ray/rlhf/reward.py
+++ b/llm_on_ray/rlhf/reward.py
@@ -10,7 +10,7 @@
 from ray.air.config import ScalingConfig
 from ray.air import RunConfig, FailureConfig
 
-import llm_on_ray.common as common
+from llm_on_ray import common
 
 
 def train_func(config: Dict[str, Any]):

From af9a299e52a0239dd931029f5dfb51b622a3c084 Mon Sep 17 00:00:00 2001
From: Xiaochang Wu <xiaochang.wu@intel.com>
Date: Thu, 7 Mar 2024 10:57:18 +0800
Subject: [PATCH 27/29] Update .github/workflows/workflow_finetune.yml

Signed-off-by: Xiaochang Wu <xiaochang.wu@intel.com>
---
 .github/workflows/workflow_finetune.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index d210d0dfc..854732bff 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -113,7 +113,7 @@ jobs:
           EOF
           )
           docker exec "finetune" python -c "$CMD"
-          docker exec "finetune" bash -c "llm_on_ray-finetune  --config_file llm_on_ray/finetune/finetune.yaml"
+          docker exec "finetune" bash -c "llm_on_ray-finetune --config_file llm_on_ray/finetune/finetune.yaml"
 
       - name: Run PEFT-LoRA Test
         run: |

From a2e6c577347cd5bf05f121967c0676f167734ca8 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Thu, 7 Mar 2024 12:01:16 +0800
Subject: [PATCH 28/29] add comment

---
 dev/docker/Dockerfile.bigdl-cpu         | 1 +
 dev/docker/Dockerfile.cpu_and_deepspeed | 1 +
 dev/docker/Dockerfile.vllm              | 1 +
 3 files changed, 3 insertions(+)

diff --git a/dev/docker/Dockerfile.bigdl-cpu b/dev/docker/Dockerfile.bigdl-cpu
index eb279386f..d19d1f794 100644
--- a/dev/docker/Dockerfile.bigdl-cpu
+++ b/dev/docker/Dockerfile.bigdl-cpu
@@ -27,6 +27,7 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
 COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 
+# Create llm_on_ray package_dir to bypass the following pip install -e command
 RUN mkdir ./llm_on_ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[bigdl-cpu] --extra-index-url https://download.pytorch.org/whl/cpu \
diff --git a/dev/docker/Dockerfile.cpu_and_deepspeed b/dev/docker/Dockerfile.cpu_and_deepspeed
index 110e03578..3e4fe5ff0 100644
--- a/dev/docker/Dockerfile.cpu_and_deepspeed
+++ b/dev/docker/Dockerfile.cpu_and_deepspeed
@@ -27,6 +27,7 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
 COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 
+# create llm_on_ray package directory to bypass the following 'pip install -e' command
 RUN mkdir ./llm_on_ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] --extra-index-url https://download.pytorch.org/whl/cpu \
diff --git a/dev/docker/Dockerfile.vllm b/dev/docker/Dockerfile.vllm
index 84c25b43c..3f298ba69 100644
--- a/dev/docker/Dockerfile.vllm
+++ b/dev/docker/Dockerfile.vllm
@@ -28,6 +28,7 @@ COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 COPY ./dev/scripts/install-vllm-cpu.sh .
 
+# create llm_on_ray package directory to bypass the following 'pip install -e' command
 RUN mkdir ./llm_on_ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu] --extra-index-url https://download.pytorch.org/whl/cpu \

From c741c0177c89e5c3f756e039aa491ce7463c8b55 Mon Sep 17 00:00:00 2001
From: KepingYan <keping.yan@intel.com>
Date: Thu, 7 Mar 2024 13:39:01 +0800
Subject: [PATCH 29/29] add comment

---
 dev/docker/Dockerfile.bigdl-cpu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/docker/Dockerfile.bigdl-cpu b/dev/docker/Dockerfile.bigdl-cpu
index d19d1f794..3838b3382 100644
--- a/dev/docker/Dockerfile.bigdl-cpu
+++ b/dev/docker/Dockerfile.bigdl-cpu
@@ -27,7 +27,7 @@ RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
 COPY ./pyproject.toml .
 COPY ./MANIFEST.in .
 
-# Create llm_on_ray package_dir to bypass the following pip install -e command
+# create llm_on_ray package directory to bypass the following 'pip install -e' command
 RUN mkdir ./llm_on_ray
 
 RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[bigdl-cpu] --extra-index-url https://download.pytorch.org/whl/cpu \