diff --git a/docs/source/llm_recipes.md b/docs/source/llm_recipes.md index 5f04242516b..328bba3ba09 100644 --- a/docs/source/llm_recipes.md +++ b/docs/source/llm_recipes.md @@ -29,8 +29,8 @@ This document aims to publish the specific recipes we achieved for the popular L | databricks/dolly-v2-12b | ✖ | ✔ | ✖ | | EleutherAI/gpt-neox-20b | ✖ | ✔ | ✔ | | mistralai/Mistral-7B-v0.1 | ✖ | ✔ | ✔ | -| THUDM/chatglm2-6b | WIP | ✔ | ✔ | -| THUDM/chatglm3-6b | WIP | ✔ | ✔ | +| THUDM/chatglm2-6b | ✔ | ✔ | ✔ | +| THUDM/chatglm3-6b | WIP | ✔ | WIP | **Detail recipes can be found [HERE](https://github.com/intel/intel-extension-for-transformers/blob/main/examples/huggingface/pytorch/text-generation/quantization/llm_quantization_recipes.md).** @@ -40,8 +40,8 @@ This document aims to publish the specific recipes we achieved for the popular L > - The WIP recipes will be published soon. ## Large Language Models Accuracy - - + +
@@ -63,212 +63,210 @@ This document aims to publish the specific recipes we achieved for the popular L - - + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - + + + + + + - - - - + + + + - - + + - - - - - - + + + + + + - - + + - - - - + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - -
Model lambada_openaiRatio ACC Ratio
baichuan-inc/Baichuan-13B-Chat 67.57%68.23%1.009867.57%1.000067.84%1.0040NANA69.07%1.022267.55%0.999768.12%1.008166.93%0.9905
baichuan-inc/Baichuan2-13B-Chat 71.51%70.89%0.991371.53%1.000371.76%1.0035NANA75.57%1.056871.57%1.000870.81%0.9902N/AN/A
baichuan-inc/Baichuan2-7B-Chat 67.67%67.96%1.004367.59%0.998867.24%0.993667.42%0.996368.06%1.005867.61%0.999167.90%1.0034N/AN/A
bigscience/bloom-1b7 46.34% 47.99% 1.035646.38%1.000946.19%0.9968NANA46.21%0.997246.90%1.0121N/AN/A
databricks/dolly-v2-12b 64.35%NANA64.10%0.9961NANANANAN/AN/A63.92%0.9933N/AN/AN/AN/A
EleutherAI/gpt-j-6b 68.31%68.33%1.000368.23%0.998868.79%1.007068.43%1.001868.27%0.999468.27%0.999468.35%1.000668.02%0.9958
EleutherAI/gpt-neox-20b 72.33%NANA72.25%0.998971.96%0.9949NANAN/AN/A72.29%0.999471.74%0.9918N/AN/A
facebook/opt-1.3b 57.89%57.54%0.994058.08%1.003358.57%1.0117NANA57.68%0.996458.12%1.004058.26%1.0064N/AN/A
facebook/opt-30b 71.49%71.51%1.000371.51%1.000371.82%1.004672.11%1.008771.78%1.004171.53%1.000671.59%1.001471.80%1.0043
meta-llama/Llama-2-13b-hf 76.77% 76.25% 0.993276.75%0.999777.43%1.008676.75%0.999776.89%1.001677.66%1.011676.60%0.9978
meta-llama/Llama-2-70b-hf 79.64%79.55%0.998979.57%0.999179.14%0.993779.62%0.9997 80.09% 1.005779.97%1.004179.68%1.0005
meta-llama/Llama-2-7b-hf 73.92% 73.45% 0.993673.96%1.000573.45%0.993673.49%0.994273.90%0.999773.84%0.9989N/AN/A
mistralai/Mistral-7B-v0.1 75.90%NANAN/AN/A 75.80% 0.998776.13%1.003075.61%0.996276.25%1.004675.74%0.9979
THUDM/chatglm2-6b 53.23%NANA53.19%0.999252.77%0.991453.35%1.002352.86%0.993053.00%0.995752.90%0.993852.92%0.9942
THUDM/chatglm3-6b 59.09%NANA59.01%0.9986NANA58.61%0.9919N/AN/A59.03%0.9990N/AN/AN/AN/A
tiiuae/falcon-40b 77.22%77.04%0.997777.22%1.000077.94%1.009378.79%1.020376.95%0.996577.18%0.999577.55%1.004377.82%1.0078
tiiuae/falcon-7b 74.67%76.44%1.023774.77%1.001375.00%1.0044NANA76.63%1.026274.73%1.000875.06%1.005274.00%0.9910
+