Skip to content

Commit 9ecfaa5

Browse files
authored
merge main branch to alg_ext (#970)
1 parent 2448dcf commit 9ecfaa5

File tree

4 files changed

+44
-0
lines changed

4 files changed

+44
-0
lines changed

auto_round/alg_ext.abi3.so

3.97 KB
Binary file not shown.

auto_round/compressors/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2741,6 +2741,7 @@ def _quantize_blocks(
27412741
else:
27422742
logger.info("using algorithm extension for quantization.")
27432743
except (ImportError, ModuleNotFoundError):
2744+
logger.error("algorithm extension import error, fallback to default mode")
27442745
quantize_block = self._quantize_block
27452746
else:
27462747
quantize_block = self._quantize_block

test/test_cpu/test_autoround.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,9 @@ def test_alg_ext(self):
716716
ar = AutoRound(model_name, scheme="W2A16", iters=1, nsamples=1, enable_alg_ext=True)
717717
ar.quantize()
718718

719+
def test_alg_ext_import(self):
720+
from auto_round.alg_ext import quantize_block_ext
721+
719722
def test_invalid_layer_config(self):
720723
with self.assertRaises(ValueError):
721724
layer_config = {"model.decoder.layers.2.self_attnx": {"bits": 2}}

test/test_cuda/test_alg_ext.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import shutil
2+
import sys
3+
import unittest
4+
5+
sys.path.insert(0, "../..")
6+
7+
import torch
8+
from transformers import AutoModelForCausalLM, AutoTokenizer
9+
10+
from auto_round import AutoRound, AutoRoundConfig
11+
from auto_round.eval.evaluation import simple_evaluate_user_model
12+
13+
14+
class TestAlgExt(unittest.TestCase):
15+
16+
@classmethod
17+
def setUpClass(self):
18+
self.model_name = "/models/opt-125m"
19+
self.save_folder = "./saved"
20+
21+
@classmethod
22+
def tearDownClass(self):
23+
shutil.rmtree(self.save_folder, ignore_errors=True)
24+
shutil.rmtree("runs", ignore_errors=True)
25+
26+
def test_2bits(self):
27+
model_name = "/models/opt-125m"
28+
ar = AutoRound(model=model_name, bits=2, group_size=64, enable_alg_ext=True)
29+
ar.quantize_and_save(self.save_folder)
30+
model = AutoModelForCausalLM.from_pretrained(
31+
self.save_folder,
32+
device_map="auto",
33+
)
34+
35+
tokenizer = AutoTokenizer.from_pretrained(self.save_folder)
36+
result = simple_evaluate_user_model(model, tokenizer, batch_size=64, tasks="lambada_openai")
37+
print(result["results"]["lambada_openai"]["acc,none"])
38+
# wo alg ext 0.2084, with 0.2364
39+
self.assertGreater(result["results"]["lambada_openai"]["acc,none"], 0.22)
40+
shutil.rmtree(self.save_folder, ignore_errors=True)

0 commit comments

Comments
 (0)