diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml index 36d61521b..f43defebf 100644 --- a/.github/workflows/workflow_finetune.yml +++ b/.github/workflows/workflow_finetune.yml @@ -34,7 +34,7 @@ jobs: name: finetune strategy: matrix: - model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b, mistralai/Mistral-7B-v0.1 ] + model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b, mistralai/Mistral-7B-v0.1, mistralai/Mixtral-8x7B-Instruct-v0.1 ] isPR: - ${{inputs.ci_type == 'pr'}} @@ -44,6 +44,7 @@ jobs: - { model: "EleutherAI/gpt-j-6b"} - { model: "meta-llama/Llama-2-7b-chat-hf"} - { model: "mistralai/Mistral-7B-v0.1"} + - { model: "mistralai/Mixtral-8x7B-Instruct-v0.1"} runs-on: self-hosted @@ -132,6 +133,8 @@ jobs: } if "${{ matrix.model }}" == "mistralai/Mistral-7B-v0.1": result['General']['lora_config']['target_modules'] = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head",] + elif "${{ matrix.model }}" == "mistralai/Mixtral-8x7B-Instruct-v0.1": + result['General']['lora_config']['target_modules'] = ["k_proj", "v_proj"] else: result['General']['lora_config']['target_modules'] = None with open(conf_path, 'w') as output: @@ -143,7 +146,7 @@ jobs: - name: Run Deltatuner Test on DENAS-LoRA Model run: | - if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1)$ ]]; then + if [[ ${{ matrix.model }} =~ ^(mosaicml\/mpt-7b-chat|huggyllama\/llama-7b|meta-llama\/Llama-2-7b-chat-hf|mistralai\/Mistral-7B-v0.1|mistralai\/Mixtral-8x7B-Instruct-v0.1)$ ]]; then echo ${{ matrix.model }} is not supported! else docker exec "finetune" bash -c "rm -rf /tmp/llm-ray/*" diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml index 18f706800..208b6b884 100644 --- a/.github/workflows/workflow_inference.yml +++ b/.github/workflows/workflow_inference.yml @@ -34,7 +34,7 @@ jobs: name: inference strategy: matrix: - model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm ] + model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm, mixtral-8x7b-Instruct-v0.1 ] isPR: - ${{inputs.ci_type == 'pr'}} @@ -46,6 +46,7 @@ jobs: - { model: "mistral-7b-v0.1"} - { model: "mpt-7b-bigdl"} - { model: "llama-2-7b-chat-hf-vllm"} + - { model: "mixtral-8x7b-Instruct-v0.1"} - dtuner_model: nathan0/mpt-7b-deltatuner-model model: mpt-7b @@ -158,7 +159,7 @@ jobs: - name: Run Inference Test with DeepSpeed run: | TARGET=${{steps.target.outputs.target}} - if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then + if [[ ${{ matrix.model }} =~ ^(mixtral-8x7b-Instruct-v0.1|gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then echo ${{ matrix.model }} is not supported! elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file llm_on_ray/inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed" @@ -171,7 +172,7 @@ jobs: if: ${{ matrix.dtuner_model }} run: | TARGET=${{steps.target.outputs.target}} - if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then + if [[ ${{ matrix.model }} =~ ^(mixtral-8x7b-Instruct-v0.1|gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then echo ${{ matrix.model }} is not supported! else docker exec "${TARGET}" bash -c "llm_on_ray-serve --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple" diff --git a/llm_on_ray/finetune/models/mixtral-8x7b-Instruct-v0.1.yaml b/llm_on_ray/finetune/models/mixtral-8x7b-Instruct-v0.1.yaml new file mode 100644 index 000000000..a256c4a81 --- /dev/null +++ b/llm_on_ray/finetune/models/mixtral-8x7b-Instruct-v0.1.yaml @@ -0,0 +1,36 @@ +General: + base_model: mistralai/Mixtral-8x7B-Instruct-v0.1 + gpt_base_model: false + output_dir: /tmp/llm-ray/output + checkpoint_dir: /tmp/llm-ray/checkpoint + config: + trust_remote_code: false + use_auth_token: null + lora_config: + task_type: CAUSAL_LM + r: 8 + lora_alpha: 32 + lora_dropout: 0.1 + target_modules: + - q_proj + - v_proj + enable_gradient_checkpointing: false +Dataset: + train_file: examples/data/sample_finetune_data_small.jsonl + validation_file: null + validation_split_percentage: 5 +Training: + optimizer: AdamW + batch_size: 2 + epochs: 3 + learning_rate: 1.0e-05 + lr_scheduler: linear + weight_decay: 0.0 + mixed_precision: bf16 + device: CPU + num_training_workers: 2 + resources_per_worker: + CPU: 2 + accelerate_mode: CPU_DDP + gradient_accumulation_steps: 1 + logging_steps: 10 diff --git a/llm_on_ray/inference/models/mixtral-8x7b-Instruct-v0.1.yaml b/llm_on_ray/inference/models/mixtral-8x7b-Instruct-v0.1.yaml new file mode 100644 index 000000000..d83e0ea98 --- /dev/null +++ b/llm_on_ray/inference/models/mixtral-8x7b-Instruct-v0.1.yaml @@ -0,0 +1,22 @@ +port: 8000 +name: mixtral-8x7b-Instruct-v0.1 +route_prefix: /mixtral-8x7b-Instruct-v0.1 +num_replicas: 1 +cpus_per_worker: 24 +gpus_per_worker: 0 +deepspeed: false +workers_per_group: 2 +device: CPU +ipex: + enabled: true + precision: bf16 +model_description: + model_id_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1 + bigdl: false + tokenizer_name_or_path: mistralai/Mixtral-8x7B-Instruct-v0.1 + chat_processor: ChatModelLLama + prompt: + intro: '' + human_id: '[INST] {msg} [/INST]' + bot_id: '' + stop_words: [] diff --git a/pyproject.toml b/pyproject.toml index 332775d5b..16c476a80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ [project.optional-dependencies] cpu = [ - "transformers>=4.35.0, <=4.35.2", + "transformers>=4.36.0, <=4.38.1", "intel_extension_for_pytorch>=2.2.0", "torch>=2.2.0", "oneccl_bind_pt>=2.2.0"