From 087c412bd50a5bae2eac253d973e43db9b53bb3c Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Thu, 27 Apr 2023 15:57:42 +0800 Subject: [PATCH 01/14] Remove some redundant classes for quantization, benchmark and mixed precision Signed-off-by: Cheng, Penghui --- neural_compressor/benchmark.py | 535 +++++++----------- neural_compressor/config.py | 8 +- .../data/dataloaders/dataloader.py | 9 + neural_compressor/metric/metric.py | 55 ++ neural_compressor/mix_precision.py | 387 +++---------- neural_compressor/model/model.py | 67 +++ neural_compressor/quantization.py | 476 ++++------------ neural_compressor/utils/utility.py | 29 +- test/benchmark/test_benchmark_2.x.py | 42 +- test/mixed_precision/test_mixed_precision.py | 2 +- 10 files changed, 565 insertions(+), 1045 deletions(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 0ce19ab6dd1..d9537856a8a 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -24,6 +24,8 @@ import signal import psutil from threading import Thread + +from neural_compressor.data.dataloaders.dataloader import check_dataloader from .adaptor import FRAMEWORKS from .objective import MultiObjective from .config import BenchmarkConfig, options @@ -31,8 +33,7 @@ from .utils import OPTIONS from .utils.utility import GLOBAL_STATE, MODE from .model import BaseModel -from .model import Model as NCModel -from .model.model import get_model_fwk_name +from .model.model import wrap_model_from from .utils import logger from .utils.utility import Statistics @@ -135,209 +136,31 @@ def get_bounded_threads(core_ids, threads, sockets): return res -class _Benchmark(object): - """Benchmark class can be used to evaluate the model performance. - - With the objective setting, user can get the data of what they configured in yaml. - - Args: - conf (obj): The BenchmarkConfig class containing accuracy goal, tuning objective etc. - """ - - def __init__(self, conf): - """Init a Benchmark object.""" - self.framework = None - self._model = None - self._b_dataloader = None - self._b_func = None - self._results = {} - assert isinstance(conf, BenchmarkConfig), \ - "The config object should be config.BenchmarkConfig, not {}".format(type(conf)) - self.conf = conf - if self.conf.framework is not None: - self.framework = self.conf.framework.lower() - - def __call__(self, raw_cmd=None): - """Directly call a Benchmark object. - - Args: - raw_cmd: raw command used for benchmark - """ - cfg = self.conf - assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' - # disable multi-instance for running bechmark on GPU device - set_all_env_var(cfg) - if cfg.device == 'gpu': - set_env_var('NC_ENV_CONF', True, overwrite_existing=True) - - logger.info("Start to run Benchmark.") - if os.environ.get('NC_ENV_CONF') == 'True': - return self.run_instance() - if raw_cmd is None: - raw_cmd = sys.executable + ' ' + ' '.join(sys.argv) - self.config_instance(raw_cmd) - self.summary_benchmark() - return None - - fit = __call__ - - def summary_benchmark(self): - """Get the summary of the benchmark.""" - if sys.platform in ['linux']: - num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) - latency_l = [] - throughput_l = [] - for i in range(0, num_of_instance): - log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i) - with open(log, "r") as f: - for line in f: - latency = re.search(r"[L,l]atency:\s+(\d+(\.\d+)?)", line) - latency_l.append(float(latency.group(1))) if latency and latency.group(1) else None - throughput = re.search(r"[T,t]hroughput:\s+(\d+(\.\d+)?)", line) - throughput_l.append(float(throughput.group(1))) if throughput and throughput.group(1) else None - if throughput_l and latency_l: - assert len(latency_l)==len(throughput_l)==num_of_instance, \ - "Multiple instance benchmark failed with some instance!" - - output_data = [ - ["Latency average [second/sample]", "{:.3f}".format(sum(latency_l)/len(latency_l))], - ["Throughput sum [samples/second]", "{:.3f}".format(sum(throughput_l))] - ] - logger.info("********************************************") - Statistics( - output_data, - header='Multiple Instance Benchmark Summary', - field_names=["Items", "Result"]).print_stat() - else: - # (TODO) should add summary after win32 benchmark has log - pass - - def call_one(self, cmd, log_file): - """Execute one command for one instance in one thread and dump the log (for Windows).""" - proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - shell=True) # nosec - with open(log_file, "w", 1, encoding="utf-8") as log_file: - log_file.write(f"[ COMMAND ] {cmd} \n") - for line in proc.stdout: - decoded_line = line.decode("utf-8", errors="ignore").strip() - logger.info(decoded_line) # redirect to terminal - log_file.write(decoded_line + "\n") - - def config_instance(self, raw_cmd): - """Configure the multi-instance commands and trigger benchmark with sub process. - - Args: - raw_cmd: raw command used for benchmark - """ - multi_instance_cmd = '' - num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) - - logger.info("num of instance: {}".format(num_of_instance)) - logger.info("cores per instance: {}".format(cores_per_instance)) - - if(sys.platform in ['linux'] and get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1): - raise OSError('Currently no support on ARM with hyperthreads') - elif sys.platform in ['linux']: - bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids()) - - for i in range(0, num_of_instance): - if sys.platform in ['linux'] and get_architecture() == 'x86_64': - core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance - core_list = np.array(bounded_threads)[core_list_idx] - else: - core_list = np.arange(0, cores_per_instance) + i * cores_per_instance - # bind cores only allowed in linux/mac os with numactl enabled - prefix = self.generate_prefix(core_list) - instance_cmd = '{} {}'.format(prefix, raw_cmd) - if sys.platform in ['linux']: - instance_log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i) - multi_instance_cmd += '{} 2>&1|tee {} & \\\n'.format( - instance_cmd, instance_log) - else: # pragma: no cover - multi_instance_cmd += '{} \n'.format(instance_cmd) - - multi_instance_cmd += 'wait' if sys.platform in ['linux'] else '' - logger.info("Running command is\n{}".format(multi_instance_cmd)) - # each instance will execute single instance - set_env_var('NC_ENV_CONF', True, overwrite_existing=True) - if sys.platform in ['linux']: - p = subprocess.Popen(multi_instance_cmd, preexec_fn=os.setsid, shell=True) # nosec - elif sys.platform in ['win32']: # pragma: no cover - cmd_list = multi_instance_cmd.split("\n")[:-1] - threads = [] - for idx, cmd in enumerate(cmd_list): - # wrap each execution of windows bat file in one thread - # write the log to the log file of the corresponding instance - logger.info('Will dump to {}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx)) - threads.append(Thread(target=self.call_one, args=(cmd, - '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx)))) - for command_thread in threads: - command_thread.start() - logger.info("Worker threads start") - # Wait for all of them to finish - for command_thread in threads: - command_thread.join() - logger.info("Worker threads join") - return - try: - p.communicate() - except KeyboardInterrupt: - os.killpg(os.getpgid(p.pid), signal.SIGKILL) - - def generate_prefix(self, core_list): - """Generate the command prefix with numactl. - - Args: - core_list: a list of core indexes bound with specific instances - """ - if sys.platform in ['linux'] and os.system('numactl --show >/dev/null 2>&1') == 0: - return 'OMP_NUM_THREADS={} numactl --localalloc --physcpubind={}'.format(\ - len(core_list), ','.join(core_list.astype(str))) - elif sys.platform in ['win32']: # pragma: no cover - # (TODO) should we move the hw_info from ux? - from neural_compressor.ux.utils.hw_info import get_number_of_sockets - num_of_socket = int(get_number_of_sockets()) - cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) - cores_per_socket = int(psutil.cpu_count(logical=False)) / num_of_socket - socket_id = int(core_list[0] // cores_per_socket) - # cores per socket should integral multiple of cores per instance, else not bind core - if cores_per_socket % cores_per_instance == 0: - from functools import reduce - hex_core = hex(reduce(lambda x, y : x | y, [1 << p for p in core_list])) - return 'start /b /WAIT /node {} /affinity {} CMD /c'.format(socket_id, hex_core) - else: - return '' - - def run_instance(self): +def run_instance(model, conf, b_dataloader=None, b_func=None): """Run the instance with the configuration. Args: runs benchmarking with numactl on specific cores and instances set by user config and returns model performance """ - if self._b_func is None: - cfg = self.conf + results = {} + if b_func is None: GLOBAL_STATE.STATE = MODE.BENCHMARK - framework_specific_info = {'device': cfg.device, \ - 'approach': None, \ + framework_specific_info = {'device': conf.device, + 'approach': None, 'random_seed': options.random_seed, - 'backend': cfg.backend \ - if cfg.backend is not None else 'default', + 'backend': conf.backend if conf.backend is not None else 'default', 'format': 'default'} - framework = cfg.framework.lower() + framework = conf.framework.lower() if 'tensorflow' in framework: - framework_specific_info.update({"inputs": cfg.inputs, \ - "outputs": cfg.outputs, \ + framework_specific_info.update({"inputs": conf.inputs, \ + "outputs": conf.outputs, \ "recipes": {}, \ 'workspace_path': options.workspace}) if framework == 'keras': framework_specific_info.update({'workspace_path': options.workspace}) if framework == 'mxnet': - framework_specific_info.update({"b_dataloader": self._b_dataloader}) + framework_specific_info.update({"b_dataloader": b_dataloader}) if 'onnx' in framework: framework_specific_info.update( {'workspace_path': options.workspace, \ @@ -346,37 +169,37 @@ def run_instance(self): framework_specific_info.update({"workspace_path": options.workspace, "q_dataloader": None}) - assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for quantization....' + assert isinstance(model, BaseModel), 'need set neural_compressor Model for quantization....' adaptor = FRAMEWORKS[framework](framework_specific_info) - assert self._b_dataloader is not None, "dataloader should not be None" + assert b_dataloader is not None, "dataloader should not be None" from neural_compressor.utils.create_obj_from_config import create_eval_func - self._b_func = create_eval_func(self.framework, \ - self._b_dataloader, \ - adaptor, \ - None) + b_func = create_eval_func(conf.framework, + b_dataloader, + adaptor, + None) - self.objectives = MultiObjective(["performance"], - {'relative': 0.1}, - is_measure=True) + objectives = MultiObjective(["performance"], + {'relative': 0.1}, + is_measure=True) - val = self.objectives.evaluate(self._b_func, self._model) + val = objectives.evaluate(b_func, model) # measurer contain info not only performance(eg, memory, model_size) # also measurer have result list among steps acc, _ = val - batch_size = self._b_dataloader.batch_size - warmup = cfg.warmup - if len(self.objectives.objectives[0].result_list()) < warmup: - if len(self.objectives.objectives[0].result_list()) > 1 and warmup != 0: + batch_size = b_dataloader.batch_size + warmup = conf.warmup + if len(objectives.objectives[0].result_list()) < warmup: + if len(objectives.objectives[0].result_list()) > 1 and warmup != 0: warmup = 1 else: warmup = 0 - result_list = self.objectives.objectives[0].result_list()[warmup:] + result_list = objectives.objectives[0].result_list()[warmup:] latency = np.array(result_list).mean() / batch_size - self._results["performance"] = acc, batch_size, result_list + results["performance"] = acc, batch_size, result_list logger.info("\nbenchmark result:") for i, res in enumerate(result_list): @@ -384,136 +207,175 @@ def run_instance(self): logger.info("Batch size = {}".format(batch_size)) logger.info("Latency: {:.3f} ms".format(latency * 1000)) logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + return results else: - self._b_func(self._model.model) + b_func(model.model) - @property - def results(self): - """Get the results of benchmarking.""" - return self._results - @property - def b_dataloader(self): - """Get the dataloader for the benchmarking.""" - return self._b_dataloader +def generate_prefix(core_list): + """Generate the command prefix with numactl. - @b_dataloader.setter - def b_dataloader(self, dataloader): - """Set dataloader for benchmarking. + Args: + core_list: a list of core indexes bound with specific instances + """ + if sys.platform in ['linux'] and os.system('numactl --show >/dev/null 2>&1') == 0: + return 'OMP_NUM_THREADS={} numactl --localalloc --physcpubind={}'.format(\ + len(core_list), ','.join(core_list.astype(str))) + elif sys.platform in ['win32']: # pragma: no cover + # (TODO) should we move the hw_info from ux? + from neural_compressor.ux.utils.hw_info import get_number_of_sockets + num_of_socket = int(get_number_of_sockets()) + cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + cores_per_socket = int(psutil.cpu_count(logical=False)) / num_of_socket + socket_id = int(core_list[0] // cores_per_socket) + # cores per socket should integral multiple of cores per instance, else not bind core + if cores_per_socket % cores_per_instance == 0: + from functools import reduce + hex_core = hex(reduce(lambda x, y : x | y, [1 << p for p in core_list])) + return 'start /b /WAIT /node {} /affinity {} CMD /c'.format(socket_id, hex_core) + else: + return '' - It is iterable and the batched data should consist of a tuple like (input, label) or yield (input, _). - When b_dataloader is set, users can configure postprocess(optional) and metric - in yaml file or set postprocess and metric cls for evaluation, - or just get performance without a label in dataloader and configure postprocess/metric. - Args: - dataloader(generator): users are supported to set a user-defined dataloader - which meet the requirements that can yield a tuple of - (input, label)/(input, _) batched data. - Another good practice is to use - neural_compressor.data.DataLoader - to initialize a neural_compressor dataloader object. - Notice neural_compressor.data.DataLoader - is just a wrapper of the information needed to - build a dataloader, it can't yield - batched data and only in this setter method - a 'real' eval_dataloader will be created, - the reason is we have to know the framework info - and only after the Quantization object is created then - framework information can be known. - Future we will support creating iterable dataloader - from neural_compressor.data.DataLoader - """ - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' - self._b_dataloader = dataloader +def call_one(cmd, log_file): + """Execute one command for one instance in one thread and dump the log (for Windows).""" + proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=True) # nosec + with open(log_file, "w", 1, encoding="utf-8") as log_file: + log_file.write(f"[ COMMAND ] {cmd} \n") + for line in proc.stdout: + decoded_line = line.decode("utf-8", errors="ignore").strip() + logger.info(decoded_line) # redirect to terminal + log_file.write(decoded_line + "\n") - @property - def b_func(self): - """Not support getting b_func.""" - assert False, 'Should not try to get the value of `b_func` attribute.' - return None - @b_func.setter - def b_func(self, user_b_func): - """Eval function for benchmark. +def config_instance(raw_cmd): + """Configure the multi-instance commands and trigger benchmark with sub process. - Args: - user_b_func: This function takes "model" as input parameter - and executes the entire training process with self - contained training hyper-parameters. If train_func is set, - an evaluation process must be triggered and the user should - set eval_dataloader with metric configured or directly eval_func - to make an evaluation of the model executed. - """ - self._b_func = user_b_func + Args: + raw_cmd: raw command used for benchmark + """ + multi_instance_cmd = '' + num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) + cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + + logger.info("num of instance: {}".format(num_of_instance)) + logger.info("cores per instance: {}".format(cores_per_instance)) + + if (sys.platform in ['linux'] and get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1): + raise OSError('Currently no support on ARM with hyperthreads') + elif sys.platform in ['linux']: + bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids()) + + for i in range(0, num_of_instance): + if sys.platform in ['linux'] and get_architecture() == 'x86_64': + core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance + core_list = np.array(bounded_threads)[core_list_idx] + else: + core_list = np.arange(0, cores_per_instance) + i * cores_per_instance + # bind cores only allowed in linux/mac os with numactl enabled + prefix = generate_prefix(core_list) + instance_cmd = '{} {}'.format(prefix, raw_cmd) + if sys.platform in ['linux']: + instance_log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i) + multi_instance_cmd += '{} 2>&1|tee {} & \\\n'.format( + instance_cmd, instance_log) + else: # pragma: no cover + multi_instance_cmd += '{} \n'.format(instance_cmd) + + multi_instance_cmd += 'wait' if sys.platform in ['linux'] else '' + logger.info("Running command is\n{}".format(multi_instance_cmd)) + # each instance will execute single instance + set_env_var('NC_ENV_CONF', True, overwrite_existing=True) + if sys.platform in ['linux']: + p = subprocess.Popen(multi_instance_cmd, preexec_fn=os.setsid, shell=True) # nosec + elif sys.platform in ['win32']: # pragma: no cover + cmd_list = multi_instance_cmd.split("\n")[:-1] + threads = [] + for idx, cmd in enumerate(cmd_list): + # wrap each execution of windows bat file in one thread + # write the log to the log file of the corresponding instance + logger.info('Will dump to {}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx)) + threads.append(Thread(target=call_one, args=(cmd, + '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx)))) + for command_thread in threads: + command_thread.start() + logger.info("Worker threads start") + # Wait for all of them to finish + for command_thread in threads: + command_thread.join() + logger.info("Worker threads join") + return + try: + p.communicate() + except KeyboardInterrupt: + os.killpg(os.getpgid(p.pid), signal.SIGKILL) + + +def summary_benchmark(): + """Get the summary of the benchmark.""" + if sys.platform in ['linux']: + num_of_instance = int(os.environ.get('NUM_OF_INSTANCE')) + cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE')) + latency_l = [] + throughput_l = [] + for i in range(0, num_of_instance): + log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i) + with open(log, "r") as f: + for line in f: + latency = re.search(r"[L,l]atency:\s+(\d+(\.\d+)?)", line) + latency_l.append(float(latency.group(1))) if latency and latency.group(1) else None + throughput = re.search(r"[T,t]hroughput:\s+(\d+(\.\d+)?)", line) + throughput_l.append(float(throughput.group(1))) if throughput and throughput.group(1) else None + if throughput_l and latency_l: + assert len(latency_l)==len(throughput_l)==num_of_instance, \ + "Multiple instance benchmark failed with some instance!" + + output_data = [ + ["Latency average [second/sample]", "{:.3f}".format(sum(latency_l)/len(latency_l))], + ["Throughput sum [samples/second]", "{:.3f}".format(sum(throughput_l))] + ] + logger.info("********************************************") + Statistics( + output_data, + header='Multiple Instance Benchmark Summary', + field_names=["Items", "Result"]).print_stat() + else: + # (TODO) should add summary after win32 benchmark has log + pass - @property - def model(self): - """Get the model.""" - return self._model - @model.setter - def model(self, user_model): - """Set the user model and dispatch to the framework-specific internal model object. +def benchmark_with_raw_cmd(raw_cmd, config=None): + """Benchmark the model performance with the raw commend. - Args: - user_model: users are supported to set model from the original framework model format - (eg, tensorflow frozen_pb or path to a saved model), - but not recommended. A best practice is to set from an initialized - neural_compressor.model.Model. - If tensorflow model is used, the model's inputs/outputs will be - auto inferenced, but sometimes auto inferenced - inputs/outputs will not meet your requests, so it is better to - set them manually in config yaml file. - Another corner case is the slim model of tensorflow, - be careful of the name of the model configured in the yaml file, - make sure the name is in the supported slim model list. - """ - cfg = self.conf - if cfg.framework is None: - assert not isinstance(user_model, BaseModel), \ - "Please pass an original framework model but not neural compressor model!" - self.framework = get_model_fwk_name(user_model) - if self.framework == "tensorflow": - from .model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and cfg.backend == 'itex': - self.framework = 'keras' - if self.framework == "pytorch": - if cfg.backend == "default": - self.framework = "pytorch_fx" - elif cfg.backend == "ipex": - self.framework = "pytorch_ipex" - import intel_extension_for_pytorch - cfg.framework = self.framework - - if not isinstance(user_model, BaseModel): - logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in self.framework or self.framework == "keras": - self._model = NCModel(user_model, backend=self.framework, device=cfg.device) - else: - self._model = NCModel(user_model, backend=self.framework) - else: - # It is config of neural_compressor version < 2.0, no need in 2.0 - if cfg.framework == "pytorch_ipex": - from neural_compressor.model.torch_model import IPEXModel - if not isinstance(user_model, IPEXModel): - self._model = NCModel(user_model.model, framework=cfg.framework) - return - self._model = user_model - - if 'tensorflow' in self.framework: - self._model.name = cfg.model_name - self._model.output_tensor_names = cfg.outputs - self._model.input_tensor_names = cfg.inputs - self._model.workspace_path = options.workspace - - def __repr__(self): - """Get the object representation in string format.""" - return 'Benchmark' - -def fit(model, config=None, b_dataloader=None, b_func=None): + Args: + raw_cmd (string): The commend to be benchmarked. + config (BenchmarkConfig): The configuration for benchmark containing accuracy goal, + tuning objective and preferred calibration & quantization + tuning space etc. + + Example:: + + # Run benchmark according to config + from neural_compressor.benchmark import fit_with_raw_cmd + + conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) + fit_with_raw_cmd("test.py", conf) + """ + if config is not None: + if config.backend == "ipex": + import intel_extension_for_pytorch + assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' + # disable multi-instance for running bechmark on GPU device + set_all_env_var(config) + + config_instance(raw_cmd) + summary_benchmark() + + +def fit(model, config, b_dataloader=None, b_func=None): """Benchmark the model performance with the configure. Args: @@ -533,11 +395,22 @@ def fit(model, config=None, b_dataloader=None, b_func=None): conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader) """ - benchmarker = _Benchmark(config) - benchmarker.model = model - if b_func is not None: - benchmarker.b_func = b_func + if config.backend == "ipex": + import intel_extension_for_pytorch + + wrapped_model = wrap_model_from(model, config) + if b_dataloader is not None: - benchmarker.b_dataloader = b_dataloader - benchmarker() - return benchmarker.results + check_dataloader(b_dataloader) + + assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' + # disable multi-instance for running bechmark on GPU device + set_all_env_var(config) + if config.device == 'gpu': + set_env_var('NC_ENV_CONF', True, overwrite_existing=True) + + logger.info("Start to run Benchmark.") + if os.environ.get('NC_ENV_CONF') == 'True': + return run_instance(model=wrapped_model, conf=config, b_dataloader=b_dataloader, b_func=b_func) + raw_cmd = sys.executable + ' ' + ' '.join(sys.argv) + benchmark_with_raw_cmd(raw_cmd) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 0bdb967d70f..670f3573a67 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1295,12 +1295,12 @@ def __init__(self, def approach(self): """Get approach.""" return self._approach - + @property def framework(self): """Get framework.""" return self._framework - + @framework.setter def framework(self, framework): """Set framework.""" @@ -1309,7 +1309,7 @@ def framework(self, framework): class WeightPruningConfig: """Config Class for Pruning. Define a single or a sequence of pruning configs. - + Args: pruning_configs (list of dicts, optional): Local pruning configs only valid to linked layers. Parameters defined out of pruning_configs are valid for all layers. @@ -2238,7 +2238,7 @@ def onnxruntime(self): def accuracy(self): """Get the accuracy object.""" return self._accuracy - + @property def tuning(self): """Get the tuning object.""" diff --git a/neural_compressor/data/dataloaders/dataloader.py b/neural_compressor/data/dataloaders/dataloader.py index 348caf40a3d..89e8cdd2ea8 100644 --- a/neural_compressor/data/dataloaders/dataloader.py +++ b/neural_compressor/data/dataloaders/dataloader.py @@ -80,6 +80,7 @@ def __new__(cls, framework, dataset, batch_size=1, collate_fn=None, shuffle=shuffle, distributed=distributed) + def _generate_common_dataloader(dataloader, framework, distributed=False): """Generate common dataloader. @@ -112,3 +113,11 @@ def _generate_common_dataloader(dataloader, framework, distributed=False): pin_memory=dataloader.pin_memory, shuffle=dataloader.shuffle, distributed=bool(dataloader.distributed or distributed)) + + +def check_dataloader(dataloader): + """Check if the dataloader meets requirement of neural_compressor.""" + assert hasattr(dataloader, '__iter__') and \ + hasattr(dataloader, 'batch_size'), \ + 'dataloader must implement __iter__ method and batch_size attribute' + return True diff --git a/neural_compressor/metric/metric.py b/neural_compressor/metric/metric.py index 91a2328390a..172dd9780f0 100644 --- a/neural_compressor/metric/metric.py +++ b/neural_compressor/metric/metric.py @@ -1615,3 +1615,58 @@ def result(self): roc_auc = sklearn.metrics.roc_auc_score(targets, scores) acc = sklearn.metrics.accuracy_score(targets, np.round(scores)) return acc + + +def register_customer_metric(user_metric, framework): + """register customer metric class or a dict of built-in metric configures. + + 1. neural_compressor have many built-in metrics, + user can pass a metric configure dict to tell neural compressor what metric will be use. + You also can set multi-metrics to evaluate the performance of a specific model. + Single metric: + {topk: 1} + Multi-metrics: + {topk: 1, + MSE: {compare_label: False}, + weight: [0.5, 0.5], + higher_is_better: [True, False] + } + For the built-in metrics, please refer to below link: + https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. + + 2. User also can get the built-in metrics by neural_compressor.Metric: + Metric(name="topk", k=1) + 3. User also can set specific metric through this api. The metric class should take the outputs of the model or + postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) + as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric. + + Args: + user_metric(neural_compressor.metric.Metric or a dict of built-in metric configurations): + The object of Metric or a dict of built-in metric configurations. + + framework: framework, such as: tensorflow, pytorch...... + + """ + if isinstance(user_metric, dict): + metric_cfg = user_metric + else: + if isinstance(user_metric, Metric): + if user_metric.metric_cls is None: + name = user_metric.name + metric_cls = METRICS(framework).metrics[name] + metric_cfg = {name: {**user_metric.kwargs}} + return metric_cfg + else: + name = user_metric.name + metric_cls = user_metric.metric_cls + metric_cfg = {name: {**user_metric.kwargs}} + else: + for i in ['reset', 'update', 'result']: + assert hasattr(user_metric, i), 'Please realise {} function' \ + 'in user defined metric'.format(i) + metric_cls = type(user_metric).__name__ + name = 'user_' + metric_cls + metric_cfg = {name: id(user_metric)} + metrics = METRICS(framework) + metrics.register(name, metric_cls) + return metric_cfg diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 1f5bc0445ba..714199f96f6 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -20,296 +20,15 @@ import pickle import numpy as np import random -from .utils.utility import time_limit, CpuInfo + +from neural_compressor.data.dataloaders.dataloader import check_dataloader +from neural_compressor.metric.metric import register_customer_metric +from .utils.utility import time_limit from .strategy import STRATEGIES from .config import _Config, options from .utils import logger -from .model.model import BaseModel, get_model_fwk_name, Model, MODELS - -class _MixedPrecision: - """Class used for generating low precision model. - - _MixedPrecision class automatically generates low precision model across various DL - frameworks including tensorflow, pytorch and onnxruntime. - - Example:: - - from neural_compressor.config import MixedPrecisionConfig - def eval_func(model): - ... - return accuracy - - conf = MixedPrecisionConfig() - output_model = mix_precision.fit( - model, - conf, - eval_func=eval_func, - ) - """ - def __init__(self, conf=None): - """Initialize `MixedPrecision` class. - - Args: - conf (obj): The MixedPrecisionConfig class containing accuracy goal, tuning objective etc. - """ - self.conf = _Config(mixed_precision=conf, quantization=None, benchmark=None - , pruning=None, distillation=None, nas=None) - seed = options.random_seed - random.seed(seed) - np.random.seed(seed) - - self._eval_func = None - self._eval_dataloader = None - self._model = None - self._metric = None - - def pre_process(self): - """Create strategy object for tuning.""" - strategy = 'automixedprecision' - _resume = None - # check if interrupted tuning procedure exists. if yes, it will resume the - # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ - if options.workspace and options.resume_from else None - if self.resume_file: - assert os.path.exists(self.resume_file), \ - "The specified resume file {} doesn't exist!".format(self.resume_file) - with open(self.resume_file, 'rb') as f: - _resume = pickle.load(f).__dict__ - - self.strategy = STRATEGIES[strategy]( - model=self.model, - conf=self.conf, - eval_func=self._eval_func, - eval_dataloader=self._eval_dataloader, - eval_metric=self.metric, - resume=_resume, - q_hooks=None) - - def execute(self): - """Execute routinue based on strategy design.""" - try: - with time_limit(self.conf.mixed_precision.tuning_criterion.timeout): - self.strategy.traverse() - except KeyboardInterrupt: - pass - except Exception as e: - logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) - import traceback - traceback.print_exc() - finally: - if self.strategy.best_qmodel: - logger.info( - "Specified timeout or max trials is reached! " - "Found a quantized model which meet accuracy goal. Exit.") - self.strategy.deploy_config() - else: - logger.error( - "Specified timeout or max trials is reached! " - "Not found any quantized model which meet accuracy goal. Exit.") - - return self.strategy.best_qmodel - - def __call__(self): - """Execute this class. - - For derived classes, an override function is required. - """ - self.pre_process() - results = self.execute() - return results - - fit = __call__ - - @property - def precisions(self): - """Get private member variable `precisions` of `_MixedPrecision` class.""" - return self._precisions - - @precisions.setter - def precisions(self, customized_precisions): - """Set private member variable `precisions` of `_MixedPrecision` class.""" - if isinstance(customized_precisions, list): - self._precisions = sorted([i.strip() for i in customized_precisions]) - elif isinstance(customized_precisions, str): - self._precisions = sorted([i.strip() for i in customized_precisions.split(',')]) - self.conf.mixed_precision.precision = self._precisions - - @property - def eval_dataloader(self): - """Get eval_dataloader.""" - return self._eval_dataloader - - @eval_dataloader.setter - def eval_dataloader(self, dataloader): - """Set Dataloader for evaluation. - - It is iterable and the batched data should consists of a tuple like (input, label), - when eval_dataloader is set, user should configure postprocess(optional) and metric - in yaml file or set postprocess and metric cls. Notice evaluation dataloader will be - used to generate data for model inference, make sure the input data can be feed to model. - - Args: - dataloader(generator): user are supported to set a user defined dataloader - which meet the requirements that can yield tuple of - (input, label)/(input, _) batched data. - Another good practice is to use neural_compressor.common.DataLoader - to initialize a neural_compressor dataloader object. - Notice neural_compressor.common.DataLoader is just a wrapper of the - information needed to build a dataloader, it can't yield - batched data and only in this setter method - a 'real' eval_dataloader will be created, - the reason is we have to know the framework info - and only after the mixed_precision object created then - framework infomation can be known. Future we will support - creating iterable dataloader from neural_compressor.common.DataLoader - """ - # pragma: no cover - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' +from .model.model import wrap_model_from - self._eval_dataloader = dataloader - - @property - def model(self): - """Get model.""" - return self._model - - @model.setter - def model(self, user_model): - """Set the user model and dispatch to framework specific internal model object. - - Args: - user_model: user are supported to set model from original framework model format - (eg, tensorflow frozen_pb or path to a saved model), but not recommended. - Best practice is to set from a initialized neural_compressor.common.Model. - If tensorflow model is used, model's inputs/outputs will be auto inferred, - but sometimes auto inferred inputs/outputs will not meet your requests, - set them manually in config yaml file. Another corner case is slim model - of tensorflow, be careful of the name of model configured in yaml file, - make sure the name is in supported slim model list. - """ - cfg = self.conf - if cfg.mixed_precision.framework is None: - if isinstance(user_model, BaseModel): - cfg.mixed_precision.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] - if cfg.mixed_precision.backend == "ipex": - assert cfg.mixed_precision.framework == "pytorch_ipex",\ - "Please wrap the model with correct Model class!" - if cfg.mixed_precision.backend == "itex": # pragma: no cover - from .model.tensorflow_model import get_model_type - if get_model_type(user_model.model) == 'keras': - assert cfg.mixed_precision.framework == "keras",\ - "Please wrap the model with KerasModel class!" - else: - assert cfg.mixed_precision.framework == "pytorch_itex", \ - "Please wrap the model with TensorflowModel class!" - else: - framework = get_model_fwk_name(user_model) - if framework == "tensorflow": - from .model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and cfg.mixed_precision.backend == 'itex': - framework = 'keras' - if framework == "pytorch": - if cfg.mixed_precision.backend == "default": - framework = "pytorch_fx" - elif cfg.mixed_precision.backend == "ipex": - framework = "pytorch_ipex" - cfg.mixed_precision.framework = framework - - if not isinstance(user_model, BaseModel): - logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in cfg.mixed_precision.framework or cfg.mixed_precision.framework == "keras": - self._model = Model(user_model, backend=cfg.mixed_precision.framework - , device=cfg.mixed_precision.device) - else: - self._model = Model(user_model, backend=cfg.mixed_precision.framework) - else: # pragma: no cover - if cfg.mixed_precision.framework == "pytorch_ipex": - from neural_compressor.model.torch_model import IPEXModel - assert type(user_model) == IPEXModel, \ - "The backend is ipex, please wrap the model with IPEXModel class!" - elif cfg.mixed_precision.framework == "pytorch_fx": - from neural_compressor.model.torch_model import PyTorchFXModel - assert type(user_model) == PyTorchFXModel, \ - "The backend is default, please wrap the model with PyTorchFXModel class!" - - self._model = user_model - - if 'tensorflow' in cfg.mixed_precision.framework: - self._model.name = cfg.mixed_precision.model_name - self._model.output_tensor_names = cfg.mixed_precision.outputs - self._model.input_tensor_names = cfg.mixed_precision.inputs - self._model.workspace_path = options.workspace - - @property - def metric(self): - """Get `metric` attribute.""" - return self._metric - - @metric.setter - def metric(self, user_metric): - """Set metric class or a dict of built-in metric configures. - - 1. neural_compressor have many built-in metrics, user can pass a metric configure dict to tell neural - compressor what metric will be use. - You can set multi-metrics to evaluate the performance of a specific model. - Single metric: - {topk: 1} - - Multi-metrics: - {topk: 1, - MSE: {compare_label: False}, - weight: [0.5, 0.5], - higher_is_better: [True, False] - } - Refer to this [file](../docs/source/metric.md#supported-built-in-metric-matrix) for built-in metric list - 2. User also can set specific metric through this api. The metric class should take the outputs of the model or - postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) as inputs - for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric. - - Args: - user_metric(neural_compressor.metric.Metric or a dict of built-in metric configures): - The object of Metric or a dict of built-in metric configurations. - """ - from .metric import Metric as NCMetric, METRICS - if isinstance(user_metric, dict): - metric_cfg = user_metric - else: - if isinstance(user_metric, NCMetric): - name = user_metric.name - metric_cls = user_metric.metric_cls - metric_cfg = {name: {**user_metric.kwargs}} - else: - for i in ['reset', 'update', 'result']: - assert hasattr(user_metric, i), 'Please realise {} function' \ - 'in user defined metric'.format(i) - metric_cls = type(user_metric).__name__ - name = 'user_' + metric_cls - metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.mixed_precision.framework) - metrics.register(name, metric_cls) - self._metric = metric_cfg - - - @property - def eval_func(self): - """Get evaluation function.""" - assert False, 'Should not try to get the value of `eval_func` attribute.' - - @eval_func.setter - def eval_func(self, user_eval_func): - """Set evaluation function provided by user. - - Args: - user_eval_func: This function takes "model" as input parameter - and executes entire evaluation process with self - contained metrics. If eval_func set, - an evaluation process must be triggered - to make evaluation of the model executed. - """ - self._eval_func = user_eval_func def fit(model, config=None, @@ -353,7 +72,7 @@ def fit(model, mixed_precision. Returns: - A _MixedPrecision object that generates low precision model across various DL frameworks. + A Mixed precision model across various DL frameworks. Raises: AssertionError. @@ -366,39 +85,69 @@ def fit(model, conf = MixedPrecisionConfig() converted_model = mix_precision.fit(model, config=conf) """ - converter = _MixedPrecision(config) + if eval_dataloader is not None: + check_dataloader(eval_dataloader) + if config.precision in config.excluded_precisions: - logger.warning("Target precision is in excluded_precisions, "\ - "please modify precision or excluded_precisions to make it understandable.") + logger.warning("Target precision is in excluded_precisions, " + "please modify precision or excluded_precisions to make it understandable.") sys.exit(0) - precisions = list(set(config.precision) - set(config.excluded_precisions)) - converter.conf.mixed_precision.precisions = precisions - converter.model = model - if ('bf16' in precisions or 'fp16' in precisions) and \ - converter.conf.mixed_precision.framework == "onnxruntime": # pragma: no cover - if config.device == "cpu": - logger.warning("Mix precision exits due to device isn't gpu for onnx models.") - sys.exit(0) - elif config.backend != "onnxrt_cuda_ep": - logger.warning("Mix precision exits due to backend isn't onnxrt_cuda_ep for onnx models.") - sys.exit(0) - elif 'bf16' in precisions and not CpuInfo().bf16 and \ - converter.conf.mixed_precision.framework != "onnxruntime": # pragma: no cover - if os.getenv('FORCE_BF16') == '1': - logger.warning("Mix precision will generate bf16 graph although " \ - "the hardware doesn't support bf16 instruction.") - else: - logger.warning("Mix precision exits due to the hardware " \ - "doesn't support bf16 instruction.") - sys.exit(0) - elif 'fp16' in precisions and converter.conf.mixed_precision.framework != "onnxruntime": - logger.warning("Currently mix precision only supports fp16 for onnx models.") - sys.exit(0) - if eval_func is not None: - converter.eval_func = eval_func - if eval_dataloader is not None: - converter.eval_dataloader = eval_dataloader + wrapped_model = wrap_model_from(model, config) + if eval_metric is not None: - converter.metric = eval_metric - return converter() + metric = register_customer_metric(eval_metric, config.framework) + else: + metric = None + + conf = _Config(mixed_precision=config, + quantization=None, + benchmark=None, + pruning=None, + distillation=None, + nas=None) + seed = options.random_seed + random.seed(seed) + np.random.seed(seed) + + _resume = None + # check if interrupted tuning procedure exists. if yes, it will resume the + # whole auto tune process. + resume_file = os.path.abspath(os.path.expanduser( + options.resume_from)) if options.workspace and options.resume_from else None + if resume_file: + assert os.path.exists(resume_file), \ + "The specified resume file {} doesn't exist!".format(resume_file) + with open(resume_file, 'rb') as f: + _resume = pickle.load(f).__dict__ + + strategy = STRATEGIES['automixedprecision']( + model=wrapped_model, + conf=conf, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=metric, + resume=_resume, + q_hooks=None) + + try: + with time_limit(config.tuning_criterion.timeout): + strategy.traverse() + except KeyboardInterrupt: + pass + except Exception as e: + logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) + import traceback + traceback.print_exc() + finally: + if strategy.best_qmodel: + logger.info( + "Specified timeout or max trials is reached! " + "Found a quantized model which meet accuracy goal. Exit.") + strategy.deploy_config() + else: + logger.error( + "Specified timeout or max trials is reached! " + "Not found any quantized model which meet accuracy goal. Exit.") + + return strategy.best_qmodel diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index 7b5e9252e74..fb408c6f8f5 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -21,6 +21,7 @@ import os import importlib import sys +from neural_compressor.config import options from neural_compressor.utils.utility import LazyImport from neural_compressor.utils import logger from neural_compressor.model.base_model import BaseModel @@ -186,3 +187,69 @@ def __new__(cls, root, **kwargs): else: model = MODELS[backend](root, **kwargs) return model + + +def wrap_model_from(user_model, conf): + """Wrap the user model and dispatch to framework specific internal model object. + + Args: + user_model: user are supported to set model from original framework model format + (eg, tensorflow frozen_pb or path to a saved model), but not recommended. + Best practice is to set from a initialized neural_compressor.common.Model. + If tensorflow model is used, model's inputs/outputs will be auto inferred, + but sometimes auto inferred inputs/outputs will not meet your requests, + set them manually in config yaml file. Another corner case is slim model + of tensorflow, be careful of the name of model configured in yaml file, + make sure the name is in supported slim model list. + conf: the instance of PostTrainingQuantConfig or QuantizationAwareTrainingConfig or MixedPrecisionConfig. + """ + if conf.framework is None: + if isinstance(user_model, BaseModel): # pragma: no cover + conf.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] + if conf.backend == "ipex": + assert conf.framework == "pytorch_ipex",\ + "Please wrap the model with correct Model class!" + if conf.backend == "itex": + if get_model_type(user_model.model) == 'keras': + assert conf.framework == "keras",\ + "Please wrap the model with KerasModel class!" + else: + assert conf.framework == "pytorch_itex", \ + "Please wrap the model with TensorflowModel class!" + else: + framework = get_model_fwk_name(user_model) + if framework == "tensorflow": + if get_model_type(user_model) == 'keras' and conf.backend == 'itex': + framework = 'keras' + if framework == "pytorch": + if conf.backend == "default": + framework = "pytorch_fx" + elif conf.backend == "ipex": + framework = "pytorch_ipex" + conf.framework = framework + + if not isinstance(user_model, BaseModel): + logger.warning("Force convert framework model to neural_compressor model.") + if "tensorflow" in conf.framework or conf.framework == "keras": + model = Model(user_model, backend=conf.framework, device=conf.device) + else: + model = Model(user_model, backend=conf.framework) + else: # pragma: no cover + if conf.framework == "pytorch_ipex": + from neural_compressor.model.torch_model import IPEXModel + assert type(user_model) == IPEXModel, \ + "The backend is ipex, please wrap the model with IPEXModel class!" + elif conf.framework == "pytorch_fx": + from neural_compressor.model.torch_model import PyTorchFXModel + assert type(user_model) == PyTorchFXModel, \ + "The backend is default, please wrap the model with PyTorchFXModel class!" + + model = user_model + + if 'tensorflow' in conf.framework: + model.name = conf.model_name + model.output_tensor_names = conf.outputs + model.input_tensor_names = conf.inputs + model.workspace_path = options.workspace + + return model diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 747cfa4d38d..6e29c2d6de2 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -21,372 +21,14 @@ import random import numpy as np from .config import _Config, options -from .model.model import BaseModel, get_model_fwk_name, get_model_type, Model, MODELS +from .data.dataloaders.dataloader import check_dataloader +from .metric.metric import register_customer_metric +from .model.model import wrap_model_from from .strategy import STRATEGIES from .utils import logger from .utils.utility import time_limit, dump_class_attrs -class _PostTrainingQuant: - """Post Training Quantization class. - - It automatically searches for optimal quantization recipes for low precision model inference, - achieving best tuning objectives like inference performance within accuracy loss constraints. - Tuner abstracts out the differences of quantization APIs across various DL frameworks - and brings a unified API for automatic quantization that works on frameworks including - tensorflow, pytorch and mxnet. - Since DL use cases vary in the accuracy metrics (Top-1, MAP, ROC etc.), loss criteria - (<1% or <0.1% etc.) and tuning objectives (performance, memory footprint etc.). - - Example:: - - conf = PostTrainingQuantConfig() - quantizer = _PostTrainingQuant(conf) - quantizer.model = model - quantizer.eval_func = eval_func - quantizer.calib_dataloader = calib_dataloader - q_model = quantizer.fit() - """ - def __init__(self, conf, **kwargs): - """Initialize the parameters. - - Args: - conf (PostTrainingQuantConfig): A instance of PostTrainingQuantConfig to - specify the quantization behavior. - """ - self.conf = _Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) - seed = options.random_seed - random.seed(seed) - np.random.seed(seed) - self._train_func = None - self._calib_dataloader = None - self._eval_func = None - self._eval_dataloader = None - self._model = None - self._metric = None - self.callbacks = None - if "model" in kwargs: - self.model = kwargs["model"] - - def pre_proccess(self): - """Create strategy to optimize model.""" - cfg = self.conf - - strategy = cfg.quantization.tuning_criterion.strategy - - if cfg.quantization.quant_level == "auto": - strategy = "auto" - - elif cfg.quantization.quant_level == 0: - strategy = "conservative" - - if strategy == "mse_v2": - if not (cfg.quantization.framework.startswith("tensorflow")\ - or cfg.quantization.framework == 'pytorch_fx'): # pragma: no cover - strategy = "basic" - logger.warning(f"MSE_v2 does not support {cfg.quantization.framework} now, use basic instead.") - logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") - assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) - - logger.info(f"Start {strategy} tuning.") - _resume = None - # check if interrupted tuning procedure exists. if yes, it will resume the - # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ - if options.workspace and options.resume_from else None - if self.resume_file: - assert os.path.exists(self.resume_file), \ - "The specified resume file {} doesn't exist!".format(self.resume_file) - with open(self.resume_file, 'rb') as f: - _resume = pickle.load(f).__dict__ - - if self._eval_func is None and self._eval_dataloader is None: # pragma: no cover - logger.info("Quantize model without tuning!") - - self.strategy = STRATEGIES[strategy]( - model = self.model, - conf = self.conf, - q_dataloader=self._calib_dataloader, - q_func=self._train_func, - eval_func=self._eval_func, - eval_dataloader=self._eval_dataloader, - eval_metric=self.metric, - resume=_resume, - q_hooks=self.callbacks.hooks if self.callbacks is not None else None) - - def execute(self): - """Quantization execute routinue based on strategy design.""" - try: - with time_limit(self.conf.quantization.tuning_criterion.timeout): - logger.debug("Dump user configuration:") - conf_dict = {} - dump_class_attrs(self.conf, conf_dict) - logger.info(conf_dict) - self.strategy.traverse() - except KeyboardInterrupt: - pass - except Exception as e: # pragma: no cover - logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) - import traceback - traceback.print_exc() - finally: - if self.strategy.best_qmodel: - logger.info( - "Specified timeout or max trials is reached! " - "Found a quantized model which meet accuracy goal. Exit.") - self.strategy.deploy_config() - else: - logger.error( - "Specified timeout or max trials is reached! " - "Not found any quantized model which meet accuracy goal. Exit.") - - return self.strategy.best_qmodel - - def __call__(self): - """Execute this class. - - For derived classes(Pruning, Quantization, etc.), an override function is required. - """ - self.pre_proccess() - results = self.execute() - return results - - fit = __call__ - - @property - def model(self): - """Getter of model in neural_compressor.model.""" - return self._model - - @model.setter - def model(self, user_model): - """Set the user model and dispatch to framework specific internal model object. - - Args: - user_model: user are supported to set model from original framework model format - (eg, tensorflow frozen_pb or path to a saved model), - but not recommended. Best practice is to set from a initialized - neural_compressor.model.Model. - If tensorflow model is used, model's inputs/outputs will be - auto inferenced, but sometimes auto inferenced - inputs/outputs will not meet your requests, - set them manually in config yaml file. - Another corner case is slim model of tensorflow, - be careful of the name of model configured in yaml file, - make sure the name is in supported slim model list. - - """ - cfg = self.conf - if cfg.quantization.framework is None: - if isinstance(user_model, BaseModel): # pragma: no cover - cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] - if cfg.quantization.backend == "ipex": - assert cfg.quantization.framework == "pytorch_ipex",\ - "Please wrap the model with correct Model class!" - if cfg.quantization.backend == "itex": - if get_model_type(user_model.model) == 'keras': - assert cfg.quantization.framework == "keras",\ - "Please wrap the model with KerasModel class!" - else: - assert cfg.quantization.framework == "pytorch_itex", \ - "Please wrap the model with TensorflowModel class!" - else: - framework = get_model_fwk_name(user_model) - if framework == "tensorflow": - if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': - framework = 'keras' - if framework == "pytorch": - if cfg.quantization.backend == "default": - framework = "pytorch_fx" - elif cfg.quantization.backend == "ipex": - framework = "pytorch_ipex" - cfg.quantization.framework = framework - - if not isinstance(user_model, BaseModel): - logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in cfg.quantization.framework or cfg.quantization.framework == "keras": - self._model = Model(user_model, backend=cfg.quantization.framework, device=cfg.quantization.device) - else: - self._model = Model(user_model, backend=cfg.quantization.framework) - else: # pragma: no cover - if cfg.quantization.framework == "pytorch_ipex": - from neural_compressor.model.torch_model import IPEXModel - assert type(user_model) == IPEXModel, \ - "The backend is ipex, please wrap the model with IPEXModel class!" - elif cfg.quantization.framework == "pytorch_fx": - from neural_compressor.model.torch_model import PyTorchFXModel - assert type(user_model) == PyTorchFXModel, \ - "The backend is default, please wrap the model with PyTorchFXModel class!" - - self._model = user_model - - if 'tensorflow' in cfg.quantization.framework: - self._model.name = cfg.quantization.model_name - self._model.output_tensor_names = cfg.quantization.outputs - self._model.input_tensor_names = cfg.quantization.inputs - self._model.workspace_path = options.workspace - - @property - def eval_func(self): - """Not support get eval_func.""" - assert False, 'Should not try to get the value of `eval_func` attribute.' - - @eval_func.setter - def eval_func(self, user_eval_func): - """Eval function for component. - - Args: - user_eval_func: This function takes "model" as input parameter - and executes entire evaluation process with self - contained metrics. If eval_func set, - an evaluation process must be triggered - to make evaluation of the model executed. - """ - self._eval_func = user_eval_func - - @property - def eval_dataloader(self): - """Getter to eval dataloader.""" - return self._eval_dataloader - - @eval_dataloader.setter - def eval_dataloader(self, dataloader): - """Set Data loader for evaluation of component. - - It is iterable and the batched data should consists of yield (input, _). - the input in the batched data will be used for model inference, so it - should satisfy the input format of specific model. - - Args: - dataloader(generator): user are supported to set a user defined dataloader - which meet the requirements that can yield tuple of - (input, label)/(input, _) batched data. - """ - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' - - self._eval_dataloader = dataloader - - @property - def metric(self): - """Get `metric` attribute.""" - return self._metric - - @metric.setter - def metric(self, user_metric): - """Set metric class or a dict of built-in metric configures. - - 1. neural_compressor have many built-in metrics, - user can pass a metric configure dict to tell neural compressor what metric will be use. - You also can set multi-metrics to evaluate the performance of a specific model. - Single metric: - {topk: 1} - Multi-metrics: - {topk: 1, - MSE: {compare_label: False}, - weight: [0.5, 0.5], - higher_is_better: [True, False] - } - For the built-in metrics, please refer to below link: - https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. - - 2. User also can get the built-in metrics by neural_compressor.Metric: - Metric(name="topk", k=1) - 3. User also can set specific metric through this api. The metric class should take the outputs of the model or - postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) - as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric. - - Args: - user_metric(neural_compressor.metric.Metric or a dict of built-in metric configurations): - The object of Metric or a dict of built-in metric configurations. - - """ - from .metric import Metric as NCMetric - from .metric import METRICS - if isinstance(user_metric, dict): - metric_cfg = user_metric - else: - if isinstance(user_metric, NCMetric): - if user_metric.metric_cls is None: - name = user_metric.name - metric_cls = METRICS(self.conf.quantization.framework).metrics[name] - metric_cfg = {name: {**user_metric.kwargs}} - self._metric = metric_cfg - return - else: - name = user_metric.name - metric_cls = user_metric.metric_cls - metric_cfg = {name: {**user_metric.kwargs}} - else: - for i in ['reset', 'update', 'result']: - assert hasattr(user_metric, i), 'Please realise {} function' \ - 'in user defined metric'.format(i) - metric_cls = type(user_metric).__name__ - name = 'user_' + metric_cls - metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.quantization.framework) - metrics.register(name, metric_cls) - self._metric = metric_cfg - - @property - def calib_func(self): - """Not support get train_func.""" - assert False, 'Should not try to get the value of `train_func` attribute.' - - @calib_func.setter - def calib_func(self, calib_func): - """Calibrate scale and zero for quantization. - - Args: - calib_func: This function takes "model" as input parameter - and executes entire evaluation process. If calib_func set, - an evaluation process must be triggered and user should - set eval_dataloader with metric configured or directly eval_func - to make evaluation of the model executed. - """ - self._train_func = calib_func - - @property - def calib_dataloader(self): - """Get `calib_dataloader` attribute.""" - return self._calib_dataloader - - @calib_dataloader.setter - def calib_dataloader(self, dataloader): - """Set Data loader for calibration, mandatory for post-training quantization. - - If calib_func is not be set then user must set calibration dataloader, - and calibration is iterable and the batched data should consists of a tuple like - (input, label) if the calibration dataset containing label, or yield (input, _) - for label-free calibration dataset, the input in the batched data will be used for - model inference, so it should satisfy the input format of specific model. - In calibration process, label of data loader will not be used and - neither the postprocess and metric. User only need to set - calib_dataloader when calib_dataloader can not be configured from yaml file. - - Args: - dataloader(generator): user are supported to set a user defined dataloader - which meet the requirements that can yield tuple of - (input, label)/(input, _) batched data. Another good - practice is to use neural_compressor.data.DataLoader - to initialize a neural_compressor dataloader object. Notice - neural_compressor.data.DataLoader is just a wrapper of the - information needed to build a dataloader, it can't yield - batched data and only in this setter method - a 'real' calib_dataloader will be created, - the reason is we have to know the framework info - and only after the Quantization object created then - framework infomation can be known. - Future we will support creating iterable dataloader - from neural_compressor.data.DataLoader - """ - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' - self._calib_dataloader = dataloader - - def fit(model, conf, calib_dataloader=None, @@ -450,9 +92,29 @@ def eval_func(model): Tuner will combine model, eval_dataloader and pre-defined metrics to run evaluation process. - eval_metric (dict or obj): Set metric class or a dict of built-in metric configures, + eval_metric (dict or obj): Set metric class or a dict of built-in metric configures, and neural_compressor will initialize this class when evaluation. + 1. neural_compressor have many built-in metrics, + user can pass a metric configure dict to tell neural compressor what metric will be use. + You also can set multi-metrics to evaluate the performance of a specific model. + Single metric: + {topk: 1} + Multi-metrics: + {topk: 1, + MSE: {compare_label: False}, + weight: [0.5, 0.5], + higher_is_better: [True, False] + } + For the built-in metrics, please refer to below link: + https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. + + 2. User also can get the built-in metrics by neural_compressor.Metric: + Metric(name="topk", k=1) + 3. User also can set specific metric through this api. The metric class should take the outputs of the model or + postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) + as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric. + Example:: # Quantization code for PTQ @@ -474,16 +136,86 @@ def eval_func(model): # Saved quantized model in ./saved folder q_model.save("./saved") """ - quantizer = _PostTrainingQuant(conf) - quantizer.model = model - if eval_func is not None: - quantizer.eval_func = eval_func if calib_dataloader is not None: - quantizer.calib_dataloader = calib_dataloader - if calib_func is not None: - quantizer.calib_func = calib_func + check_dataloader(calib_dataloader) if eval_dataloader is not None: - quantizer.eval_dataloader = eval_dataloader + check_dataloader(eval_dataloader) + + seed = options.random_seed + random.seed(seed) + np.random.seed(seed) + wrapped_model = wrap_model_from(model, conf) + if eval_metric is not None: - quantizer.metric = eval_metric - return quantizer() + metric = register_customer_metric(eval_metric, conf.framework) + else: + metric = None + + config = _Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + strategy_name = conf.tuning_criterion.strategy + + if conf.quant_level == "auto": + strategy_name = "auto" + elif conf.quant_level == 0: + strategy_name = "conservative" + + if strategy_name == "mse_v2": + if not (conf.framework.startswith("tensorflow")\ + or conf.framework == 'pytorch_fx'): # pragma: no cover + strategy_name = "basic" + logger.warning(f"MSE_v2 does not support {conf.framework} now, use basic instead.") + logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") + assert strategy_name in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy_name) + + logger.info(f"Start {strategy_name} tuning.") + _resume = None + # check if interrupted tuning procedure exists. if yes, it will resume the + # whole auto tune process. + resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ + if options.workspace and options.resume_from else None + if resume_file: + assert os.path.exists(resume_file), \ + "The specified resume file {} doesn't exist!".format(resume_file) + with open(resume_file, 'rb') as f: + _resume = pickle.load(f).__dict__ + + if eval_func is None and eval_dataloader is None: # pragma: no cover + logger.info("Quantize model without tuning!") + + strategy = STRATEGIES[strategy_name]( + model=wrapped_model, + conf=config, + q_dataloader=calib_dataloader, + q_func=calib_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=metric, + resume=_resume, + q_hooks=None + ) + + try: + with time_limit(conf.tuning_criterion.timeout): + logger.debug("Dump user configuration:") + conf_dict = {} + dump_class_attrs(conf, conf_dict) + logger.info(conf_dict) + strategy.traverse() + except KeyboardInterrupt: + pass + except Exception as e: # pragma: no cover + logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) + import traceback + traceback.print_exc() + finally: + if strategy.best_qmodel: + logger.info( + "Specified timeout or max trials is reached! " + "Found a quantized model which meet accuracy goal. Exit.") + strategy.deploy_config() + else: + logger.error( + "Specified timeout or max trials is reached! " + "Not found any quantized model which meet accuracy goal. Exit.") + + return strategy.best_qmodel diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index 43d95c26e0d..a146eaff77b 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -54,22 +54,27 @@ 'mxnet': ['mxnet'], } + def version1_lt_version2(version1, version2): """Check whether version1 is less than version2.""" return parse_version(version1) < parse_version(version2) - + + def version1_gt_version2(version1, version2): """Check whether version1 is greater than version2.""" return parse_version(version1) > parse_version(version2) + def version1_eq_version2(version1, version2): """Check whether version1 is equal to version2.""" return parse_version(version1) == parse_version(version2) + def version1_gte_version2(version1, version2): """Check whether version1 is greater than version2 or is equal to it.""" return parse_version(version1) > parse_version(version2) or parse_version(version1) == parse_version(version2) + def version1_lte_version2(version1, version2): """Check whether version1 is less than version2 or is equal to it.""" return parse_version(version1) < parse_version(version2) or parse_version(version1) == parse_version(version2) @@ -109,7 +114,7 @@ def __call__(self, *args, **kwargs): def singleton(cls): """Not displayed in API Docs. - + Singleton decorater. """ instances = {} @@ -173,7 +178,7 @@ def get_size(obj, seen=None): def compute_sparsity(tensor): """Compute the sparsity. - + Args: tensor: Tensorflow or Pytorch tensor @@ -411,7 +416,6 @@ def DequantizeWeight(weight_tensor, min_filter_tensor, max_filter_tensor): weight_tensor[:,:,:,i] = weight_tensor[:,:,:,i] * ((max_filter_tensor[i] - min_filter_tensor[i])/ 127.0) - def Dequantize(data, scale_info): """Dequantize the data with the scale_info.""" import numpy as np @@ -425,7 +429,7 @@ def Dequantize(data, scale_info): class CaptureOutputToFile(object): """Not displayed in API Docs. - + Capture the output to file. """ def __init__(self, tmp_file_path, stream=sys.stderr): @@ -450,7 +454,7 @@ class Statistics(): """The statistics printer.""" def __init__(self, data, header, field_names, output_handle=logger.info): """Init a Statistics object. - + Args: data: The statistics data header: The table header @@ -498,6 +502,7 @@ class GLOBAL_STATE(): """Access the global model.""" STATE = MODE.QUANTIZATION + def load_data_from_pkl(path, filename): """Load data from local pkl file. @@ -513,6 +518,7 @@ def load_data_from_pkl(path, filename): except FileExistsError: logging.getLogger("neural_compressor").info('Can not open %s.' % path) + def dump_data_to_local(data, path, filename): """Dump data to local as pkl file. @@ -533,7 +539,6 @@ def dump_data_to_local(data, path, filename): logging.getLogger("neural_compressor").info("Dumped data to %s" % file_path) - def set_random_seed(seed: int): """Set the random seed in config.""" from neural_compressor.config import options @@ -557,6 +562,7 @@ def set_tensorboard(tensorboard: bool): from neural_compressor.config import options options.tensorboard = tensorboard + def show_memory_info(hint): """Show process full memory.""" pid = os.getpid() @@ -567,7 +573,7 @@ def show_memory_info(hint): print('{} memory used: {} MB'.format(hint, memory)) -def dump_class_attrs(obj, result = {}): +def dump_class_attrs(obj, result={}): """Dump the attributes and values of a config class. Args: @@ -586,9 +592,7 @@ def dump_class_attrs(obj, result = {}): else: attr = attr[1:] if attr.startswith('_') else attr result[obj_name][attr] = value - - - + class DotDict(dict): """access yaml using attributes instead of using the dictionary notation. @@ -649,7 +653,6 @@ def __setstate__(self, d): __setattr__, __getattr__ = __setitem__, __getitem__ - def compare_objects(obj1, obj2, ignore_attrs): """Compare two objects and ignore the specified attributes. @@ -673,4 +676,4 @@ def compare_objects(obj1, obj2, ignore_attrs): # Compare the attributes, ignoring the specified ones for attr in attrs1 - set(ignore_attrs): if getattr(obj1, attr) != getattr(obj2, attr): - return False \ No newline at end of file + return False diff --git a/test/benchmark/test_benchmark_2.x.py b/test/benchmark/test_benchmark_2.x.py index 88a868e1a85..c812059de0f 100644 --- a/test/benchmark/test_benchmark_2.x.py +++ b/test/benchmark/test_benchmark_2.x.py @@ -8,6 +8,8 @@ import tempfile import re from neural_compressor.adaptor.tf_utils.util import write_graph +from neural_compressor.benchmark import benchmark_with_raw_cmd +from neural_compressor.config import BenchmarkConfig def build_benchmark(): @@ -57,22 +59,42 @@ def build_benchmark(): with open('fake_data_25.py', "w", encoding="utf-8") as f: f.writelines(fake_data_25) + def build_benchmark2(): seq = [ "from argparse import ArgumentParser\n", "arg_parser = ArgumentParser(description='Parse args')\n", "arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model')\n", "args = arg_parser.parse_args()\n", - "from neural_compressor.benchmark import fit\n" + "import time\n", + "import numpy as np\n", + "from neural_compressor.benchmark import benchmark_with_raw_cmd\n", "from neural_compressor.data import Datasets\n", + "from neural_compressor.model import Model\n", "dataset = Datasets('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n", "from neural_compressor.data.dataloaders.dataloader import DataLoader\n", "b_dataloader = DataLoader(framework='tensorflow', dataset=dataset)\n", - "fit(args.input_model, b_dataloader=b_dataloader)\n" + "model = Model(args.input_model)\n", + "input_tensor = model.input_tensor\n", + "output_tensor = model.output_tensor if len(model.output_tensor)>1 else model.output_tensor[0]\n", + "iteration = 10\n", + "latency_list = []\n", + "for idx, (inputs, labels) in enumerate(b_dataloader):\n", + " inputs = np.array([inputs])\n", + " feed_dict = dict(zip(input_tensor, inputs))\n", + " start = time.time()\n", + " predictions = model.sess.run(output_tensor, feed_dict)\n", + " end = time.time()\n", + " latency_list.append(end-start)\n", + " if idx + 1 == iteration:\n", + " break\n", + "latency = np.array(latency_list).mean()\n", + "print('Latency: {:.3f} ms'.format(latency * 1000))\n", + "print('Throughput: {:.3f} images/sec'.format(1. / latency))\n" ] - with open('fake2.py', "w", encoding="utf-8") as f: + with open('fake_raw_cmd.py', "w", encoding="utf-8") as f: f.writelines(seq) @@ -126,14 +148,14 @@ def setUpClass(self): def tearDownClass(self): if os.path.exists('fake.py'): os.remove('fake.py') - if os.path.exists('fake2.py'): - os.remove('fake2.py') if os.path.exists('fake_data_5.py'): os.remove('fake_data_5.py') if os.path.exists('fake_data_15.py'): os.remove('fake_data_15.py') if os.path.exists('fake_data_25.py'): os.remove('fake_data_25.py') + if os.path.exists('fake_raw_cmd.py'): + os.remove('fake_raw_cmd.py') shutil.rmtree('nc_workspace', ignore_errors=True) def test_benchmark(self): @@ -172,6 +194,16 @@ def test_benchmark_data_25(self): self.assertIsNotNone(throughput) os.system("rm *.log") + def test_benchmark_raw_cmd(self): + conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2) + raw_cmd = "python fake_raw_cmd.py --input_model={}".format(self.graph_path) + benchmark_with_raw_cmd(raw_cmd, config=conf) + for i in range(2): + with open(f'2_4_{i}.log', "r") as f: + for line in f: + throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line) + self.assertIsNotNone(throughput) + if __name__ == "__main__": unittest.main() diff --git a/test/mixed_precision/test_mixed_precision.py b/test/mixed_precision/test_mixed_precision.py index 938530a7bcd..099639b0159 100644 --- a/test/mixed_precision/test_mixed_precision.py +++ b/test/mixed_precision/test_mixed_precision.py @@ -315,7 +315,7 @@ def test_mixed_precision_with_evaluation(self): eval_dataloader=self.matmul_dataloader, eval_metric=ONNXRT_QL_METRICS["MSE"]()) self.assertTrue(any([i.op_type == 'Cast' for i in output_model.nodes()])) - + def test_mixed_precision_with_evaluation_old_api(self): from neural_compressor.conf.config import MixedPrecision_Conf from neural_compressor.experimental import MixedPrecision From 130349ff5134a7e3f5b0481ff0f6cf31e77c1751 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Thu, 27 Apr 2023 16:44:40 +0800 Subject: [PATCH 02/14] Update API Signed-off-by: Cheng, Penghui --- neural_compressor/adaptor/onnxrt.py | 4 ++-- neural_compressor/adaptor/pytorch.py | 2 +- neural_compressor/config.py | 22 ++++++++++---------- neural_compressor/mix_precision.py | 2 +- test/config/test_config_2.x.py | 2 +- test/mixed_precision/test_mixed_precision.py | 4 ++-- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py index 32737b8c328..3912568274a 100644 --- a/neural_compressor/adaptor/onnxrt.py +++ b/neural_compressor/adaptor/onnxrt.py @@ -117,7 +117,7 @@ def __init__(self, framework_specific_info): static=self.static, format=self.format, local_config_file=os.path.join(os.path.dirname(__file__), config_file)) - + self.work_space = framework_specific_info["workspace_path"] self.reduce_range = framework_specific_info["reduce_range"] if \ "reduce_range" in framework_specific_info else not CpuInfo().vnni @@ -133,7 +133,7 @@ def __init__(self, framework_specific_info): continue self.quantizable_op_types += \ self.query_handler.get_op_types_by_precision(precision=precision) - + if self.backend == 'TensorrtExecutionProvider': self.recipes['add_qdq_pair_to_weight'] = True self.recipes['dedicated_qdq_pair'] = True diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 86b1e463d61..2a40c113966 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -4253,7 +4253,7 @@ def get_quantization_capability(self, datatype='int8'): def get_quant_datatypes(self): """Got low-precision data types for quantization. - + Collects all data types for quantization, such as int8, int4. """ # TODO to handle other data types such FP8, FP8E4M3 diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 670f3573a67..88cf8a3c6ad 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1647,7 +1647,7 @@ class MixedPrecisionConfig(object): backend (str, optional): Backend for model execution. Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', default is 'default'. - precision (str, optional): Target precision for mix precision conversion. + precisions ([str, list], optional): Target precision for mix precision conversion. Support 'bf16' and 'fp16', default is 'bf16'. inputs (list, optional): Inputs of model, default is []. outputs (list, optional): Outputs of model, default is []. @@ -1667,7 +1667,7 @@ class MixedPrecisionConfig(object): def __init__(self, device="cpu", backend="default", - precision="bf16", + precisions="bf16", model=None, model_name="", inputs=[], @@ -1683,27 +1683,27 @@ def __init__(self, self.excluded_precisions = excluded_precisions self.accuracy_criterion = accuracy_criterion self.tuning_criterion = tuning_criterion - self.precision = precision - self.use_bf16 = "bf16" in self.precision + self.precisions = precisions + self.use_bf16 = "bf16" in self.precisions self.model = model self.model_name = model_name self._framework = None @property - def precision(self): + def precisions(self): """Get precision.""" - return self._precision + return self._precisions - @precision.setter - def precision(self, precision): + @precisions.setter + def precisions(self, precision): """Set precision.""" if isinstance(precision, str): assert precision in ["fp16", "bf16"], "Only support 'fp16' and 'bf16' for mix precision." - self._precision = [precision] + self._precisions = [precision] elif isinstance(precision, list): assert all([i in ["fp16", "bf16"] for i in precision]), "Only " \ "support 'fp16' and 'bf16' for mix precision." - self._precision = precision + self._precisions = precision @property def model(self): @@ -2088,7 +2088,7 @@ def precisions(self, precisions): if not isinstance(precisions, list): precisions = [precisions] for pr in precisions: - _check_value('precision', pr, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']) + _check_value('precisions', pr, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']) self._precisions = precisions diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 714199f96f6..def66642c4c 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -88,7 +88,7 @@ def fit(model, if eval_dataloader is not None: check_dataloader(eval_dataloader) - if config.precision in config.excluded_precisions: + if config.precisions in config.excluded_precisions: logger.warning("Target precision is in excluded_precisions, " "please modify precision or excluded_precisions to make it understandable.") sys.exit(0) diff --git a/test/config/test_config_2.x.py b/test/config/test_config_2.x.py index e24b5ebc276..71700f9013f 100644 --- a/test/config/test_config_2.x.py +++ b/test/config/test_config_2.x.py @@ -55,7 +55,7 @@ def test_config(self): cfg = MixedPrecisionConfig() a = conf(mixed_precision=cfg) - self.assertEqual(a.mixed_precision.precision, ["bf16"]) + self.assertEqual(a.mixed_precision.precisions, ["bf16"]) cfg = MXNet() cfg.precisions = "bf16" diff --git a/test/mixed_precision/test_mixed_precision.py b/test/mixed_precision/test_mixed_precision.py index 099639b0159..15ba52e9163 100644 --- a/test/mixed_precision/test_mixed_precision.py +++ b/test/mixed_precision/test_mixed_precision.py @@ -274,7 +274,7 @@ def test_on_non_enabled_dtype(self): output_model = mix_precision.fit(self.onnx_model, conf) self.assertEqual(cm.exception.code, 0) - conf = MixedPrecisionConfig(precision="fp16") + conf = MixedPrecisionConfig(precisions="fp16") with self.assertRaises(SystemExit) as cm: output_model = mix_precision.fit(self.tf_model, conf) self.assertEqual(cm.exception.code, 0) @@ -309,7 +309,7 @@ def test_mixed_precision_with_evaluation(self): #self.assertTrue(any([i.op_type == 'Cast' for i in output_model.nodes()])) tuning_criterion = TuningCriterion(max_trials=3, timeout=1000000) - conf = MixedPrecisionConfig(device='gpu', tuning_criterion=tuning_criterion, backend='onnxrt_cuda_ep', precision="fp16") + conf = MixedPrecisionConfig(device='gpu', tuning_criterion=tuning_criterion, backend='onnxrt_cuda_ep', precisions="fp16") output_model = mix_precision.fit(self.onnx_model, conf, eval_dataloader=self.matmul_dataloader, From 8c6798f9979db2a64821099a22b798289b9594ef Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Thu, 27 Apr 2023 17:17:58 +0800 Subject: [PATCH 03/14] Fixed pylink error Signed-off-by: Cheng, Penghui --- neural_compressor/benchmark.py | 151 +++++++++++++++--------------- neural_compressor/model/model.py | 2 +- neural_compressor/quantization.py | 7 +- 3 files changed, 83 insertions(+), 77 deletions(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index d9537856a8a..bd159b09f23 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -137,79 +137,84 @@ def get_bounded_threads(core_ids, threads, sockets): def run_instance(model, conf, b_dataloader=None, b_func=None): - """Run the instance with the configuration. - - Args: - runs benchmarking with numactl on specific cores and instances set - by user config and returns model performance - """ - results = {} - if b_func is None: - GLOBAL_STATE.STATE = MODE.BENCHMARK - framework_specific_info = {'device': conf.device, - 'approach': None, - 'random_seed': options.random_seed, - 'backend': conf.backend if conf.backend is not None else 'default', - 'format': 'default'} - framework = conf.framework.lower() - if 'tensorflow' in framework: - framework_specific_info.update({"inputs": conf.inputs, \ - "outputs": conf.outputs, \ - "recipes": {}, \ - 'workspace_path': options.workspace}) - if framework == 'keras': - framework_specific_info.update({'workspace_path': options.workspace}) - if framework == 'mxnet': - framework_specific_info.update({"b_dataloader": b_dataloader}) - if 'onnx' in framework: - framework_specific_info.update( - {'workspace_path': options.workspace, \ - 'graph_optimization': OPTIONS[framework].graph_optimization}) - if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - framework_specific_info.update({"workspace_path": options.workspace, - "q_dataloader": None}) - - assert isinstance(model, BaseModel), 'need set neural_compressor Model for quantization....' - - adaptor = FRAMEWORKS[framework](framework_specific_info) - - assert b_dataloader is not None, "dataloader should not be None" - - from neural_compressor.utils.create_obj_from_config import create_eval_func - b_func = create_eval_func(conf.framework, - b_dataloader, - adaptor, - None) - - objectives = MultiObjective(["performance"], - {'relative': 0.1}, - is_measure=True) - - val = objectives.evaluate(b_func, model) - # measurer contain info not only performance(eg, memory, model_size) - # also measurer have result list among steps - acc, _ = val - batch_size = b_dataloader.batch_size - warmup = conf.warmup - if len(objectives.objectives[0].result_list()) < warmup: - if len(objectives.objectives[0].result_list()) > 1 and warmup != 0: - warmup = 1 - else: - warmup = 0 - - result_list = objectives.objectives[0].result_list()[warmup:] - latency = np.array(result_list).mean() / batch_size - results["performance"] = acc, batch_size, result_list - - logger.info("\nbenchmark result:") - for i, res in enumerate(result_list): - logger.debug("Iteration {} result {}:".format(i, res)) - logger.info("Batch size = {}".format(batch_size)) - logger.info("Latency: {:.3f} ms".format(latency * 1000)) - logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) - return results - else: - b_func(model.model) + """Run the instance with the configuration. + + Args: + model (object): The model to be benchmarked. + conf (BenchmarkConfig): The configuration for benchmark containing accuracy goal, + tuning objective and preferred calibration & quantization + tuning space etc. + b_dataloader: The dataloader for frameworks. + b_func: Customized benchmark function. If user passes the dataloader, + then b_func is not needed. + """ + results = {} + if b_func is None: + GLOBAL_STATE.STATE = MODE.BENCHMARK + framework_specific_info = {'device': conf.device, + 'approach': None, + 'random_seed': options.random_seed, + 'backend': conf.backend if conf.backend is not None else 'default', + 'format': 'default'} + framework = conf.framework.lower() + if 'tensorflow' in framework: + framework_specific_info.update({"inputs": conf.inputs, \ + "outputs": conf.outputs, \ + "recipes": {}, \ + 'workspace_path': options.workspace}) + if framework == 'keras': + framework_specific_info.update({'workspace_path': options.workspace}) + if framework == 'mxnet': + framework_specific_info.update({"b_dataloader": b_dataloader}) + if 'onnx' in framework: + framework_specific_info.update( + {'workspace_path': options.workspace, \ + 'graph_optimization': OPTIONS[framework].graph_optimization}) + if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': + framework_specific_info.update({"workspace_path": options.workspace, + "q_dataloader": None}) + + assert isinstance(model, BaseModel), 'need set neural_compressor Model for quantization....' + + adaptor = FRAMEWORKS[framework](framework_specific_info) + + assert b_dataloader is not None, "dataloader should not be None" + + from neural_compressor.utils.create_obj_from_config import create_eval_func + b_func = create_eval_func(conf.framework, + b_dataloader, + adaptor, + None) + + objectives = MultiObjective(["performance"], + {'relative': 0.1}, + is_measure=True) + + val = objectives.evaluate(b_func, model) + # measurer contain info not only performance(eg, memory, model_size) + # also measurer have result list among steps + acc, _ = val + batch_size = b_dataloader.batch_size + warmup = conf.warmup + if len(objectives.objectives[0].result_list()) < warmup: + if len(objectives.objectives[0].result_list()) > 1 and warmup != 0: + warmup = 1 + else: + warmup = 0 + + result_list = objectives.objectives[0].result_list()[warmup:] + latency = np.array(result_list).mean() / batch_size + results["performance"] = acc, batch_size, result_list + + logger.info("\nbenchmark result:") + for i, res in enumerate(result_list): + logger.debug("Iteration {} result {}:".format(i, res)) + logger.info("Batch size = {}".format(batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + return results + else: + b_func(model.model) def generate_prefix(core_list): diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index fb408c6f8f5..ed110f7bd0f 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -193,7 +193,7 @@ def wrap_model_from(user_model, conf): """Wrap the user model and dispatch to framework specific internal model object. Args: - user_model: user are supported to set model from original framework model format + user_model: user are supported to set model from original framework model format (eg, tensorflow frozen_pb or path to a saved model), but not recommended. Best practice is to set from a initialized neural_compressor.common.Model. If tensorflow model is used, model's inputs/outputs will be auto inferred, diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 6e29c2d6de2..295e16dc9e3 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -111,9 +111,10 @@ def eval_func(model): 2. User also can get the built-in metrics by neural_compressor.Metric: Metric(name="topk", k=1) - 3. User also can set specific metric through this api. The metric class should take the outputs of the model or - postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) - as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric. + 3. User also can set specific metric through this api. The metric class should take the outputs of + the model or postprocess(if have) as inputs, neural_compressor built-in metric always + take (predictions, labels) as inputs for update, and user_metric.metric_cls should be + sub_class of neural_compressor.metric.BaseMetric. Example:: From 2bc6b54d129363266784fc35276f67e484492bb5 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Thu, 4 May 2023 17:43:30 +0800 Subject: [PATCH 04/14] Removed redundant class in training.py --- docs/source/pruning.md | 2 +- .../pruning/eager/run_glue_no_trainer.py | 2 +- .../eager/run_glue_no_trainer_mixed.py | 2 +- neural_compressor/__init__.py | 5 +- neural_compressor/benchmark.py | 8 +- neural_compressor/compression/__init__.py | 2 - neural_compressor/compression/callbacks.py | 371 +-------------- .../compression/pruner/README.md | 2 +- .../compression/pruner/pruners.py | 67 ++- neural_compressor/compression/pruner/utils.py | 4 +- neural_compressor/mix_precision.py | 4 +- neural_compressor/model/model.py | 135 ++---- neural_compressor/quantization.py | 4 +- neural_compressor/strategy/strategy.py | 3 +- neural_compressor/training.py | 450 ++++++++++-------- .../test_adaptor_pytorch_2.x.py | 9 +- test/pruning_2_plus.x/test_pruning.py | 3 +- test/pruning_2_plus.x/test_pruning_block.py | 3 +- 18 files changed, 382 insertions(+), 694 deletions(-) diff --git a/docs/source/pruning.md b/docs/source/pruning.md index 1ba7d20c93a..1b7f064ca77 100644 --- a/docs/source/pruning.md +++ b/docs/source/pruning.md @@ -301,7 +301,7 @@ The following section exemplifies how to use hooks in user pass-in training func [**Experimental option** ]Modify model and optimizer. ```python - from neural_compressor.training import prepare_pruning, WeightPruningConfig + from neural_compressor import prepare_pruning, WeightPruningConfig config = WeightPruningConfig(configs) prepare_pruning(config, model, optimizer) # modify model and optimizer for epoch in range(num_train_epochs): diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py index 0a8fe2891b1..0fd9c1ad61d 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py @@ -523,7 +523,7 @@ def preprocess_function(examples): # pruner = Pruning(config) # pruner.model = model # pruner.on_train_begin() - from neural_compressor.training import prepare_pruning + from neural_compressor import prepare_pruning prepare_pruning(configs, model, optimizer) diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py index b2c874fee49..9860d7b4e66 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py @@ -46,7 +46,7 @@ ) from transformers.file_utils import get_full_repo_name from transformers.utils.versions import require_version -from neural_compressor.training import prepare_pruning,WeightPruningConfig +from neural_compressor import prepare_pruning, WeightPruningConfig logger = logging.getLogger(__name__) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 977406e75a1..ba722b9db2c 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -18,11 +18,12 @@ """IntelĀ® Neural Compressor: An open-source Python library supporting popular model compression techniques.""" from .version import __version__ # we need to set a global 'NA' backend, or Model can't be used -from .utils.utility import set_random_seed, set_tensorboard, set_workspace, set_resume_from -from .utils import options +from .experimental.compression import prepare_pruning from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig from .contrib import * from .model import * from .metric import * +from .utils import options +from .utils.utility import set_random_seed, set_tensorboard, set_workspace, set_resume_from diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index bd159b09f23..1d5613840e9 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -32,8 +32,7 @@ from .utils import logger from .utils import OPTIONS from .utils.utility import GLOBAL_STATE, MODE -from .model import BaseModel -from .model.model import wrap_model_from +from .model import BaseModel, Model from .utils import logger from .utils.utility import Statistics @@ -184,7 +183,8 @@ def run_instance(model, conf, b_dataloader=None, b_func=None): b_func = create_eval_func(conf.framework, b_dataloader, adaptor, - None) + None, + iteration=conf.iteration) objectives = MultiObjective(["performance"], {'relative': 0.1}, @@ -403,7 +403,7 @@ def fit(model, config, b_dataloader=None, b_func=None): if config.backend == "ipex": import intel_extension_for_pytorch - wrapped_model = wrap_model_from(model, config) + wrapped_model = Model(model, conf=config) if b_dataloader is not None: check_dataloader(b_dataloader) diff --git a/neural_compressor/compression/__init__.py b/neural_compressor/compression/__init__.py index 14f79b22d7c..7a1b4c9fb70 100644 --- a/neural_compressor/compression/__init__.py +++ b/neural_compressor/compression/__init__.py @@ -16,6 +16,4 @@ # limitations under the License. from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks -from ..experimental.compression import prepare_pruning -from .. import WeightPruningConfig from .pruner.model_slim.auto_slim import model_slim, parse_auto_slim_config diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 82544b9a500..ffc2dd50510 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -21,18 +21,11 @@ 'PruningCallbacks' and 'DistillationCallbacks'. """ -import numpy as np -import os -import pickle -import random from .distillation.criterions import Criterions -from ..adaptor import FRAMEWORKS -from ..config import _Config, options from ..utils import logger -from ..utils.utility import time_limit, LazyImport +from ..utils.utility import LazyImport from ..model import BaseModel, Model -from ..model.model import get_model_fwk_name -from ..strategy import STRATEGIES +from ..model.model import MODELS from .pruner.utils import process_config, parse_to_prune, get_sparsity_ratio from .pruner.pruners import get_pruner, PRUNERS # model auto slim related @@ -56,21 +49,13 @@ def __init__(self, conf=None, model=None): conf: A Config object which definds the compressor behavior. Just like: QuantizationAwareTrainingConfig, WeightPruningConfig \ and DistillationConfig. - model: Model to be compressed in this object. + model: Model to be compressed in this object. It should be neural compressor model. """ - self.conf = None - self.cfg = None + assert model is None or isinstance(model, BaseModel), "The model should be a instanceof BaseModel" + self.conf = conf self.framework = None - self._model = None self.model = model - self._train_func = None - self._train_dataloader = None - self._eval_func = None - self._eval_dataloader = None - self._train_distributed = False - self._evaluation_distributed = False self.adaptor = None - self._metric = None self.hooks = { 'on_train_begin': self.on_train_begin, 'on_train_end': self.on_train_end, @@ -185,281 +170,6 @@ def __repr__(self): """Represent this class.""" pass - @property - def model(self): - """Getter of model in neural_compressor.model.""" - return self._model - - @model.setter - def model(self, user_model): - """Set the user model and dispatch to framework specific internal model object. - - Args: - user_model: user are supported to set model from original framework model format - (eg, tensorflow frozen_pb or path to a saved model), - but not recommended. Best practice is to set from a initialized - neural_compressor.Model. - If tensorflow model is used, model's inputs/outputs will be - auto inferenced, but sometimes auto inferenced - inputs/outputs will not meet your requests, - set them manually in config yaml file. - Another corner case is slim model of tensorflow, - be careful of the name of model configured in yaml file, - make sure the name is in supported slim model list. - - """ - if user_model is None: - return - - if self.framework == None: - self.framework = get_model_fwk_name( - user_model.model if isinstance(user_model, BaseModel) else user_model) - if self.framework == "tensorflow": - from ..model.tensorflow_model import get_model_type - if not isinstance(user_model, BaseModel) and get_model_type(user_model) == 'keras'\ - and self.conf.backend == 'itex': - self.framework = 'keras' - if self.framework == "pytorch": - try: - if self.conf.quantization.backend == "default": - self.framework = "pytorch_fx" - elif self.conf.quantization.backend == "ipex": - self.framework = "pytorch_ipex" - self.conf.quantization.framework = self.framework - except Exception as e: - pass - - if not isinstance(user_model, BaseModel): - logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in self.framework or self.framework == "keras": - if self.conf.quantization and self.conf.quantization.approach == "quant_aware_training": - self._model = Model(user_model, backend='tensorflow_qat', device=self.conf.device) - else: - self._model = Model(user_model, backend=self.framework, device=self.conf.device) - else: - self._model = Model(user_model, backend=self.framework) - else: - self._model = user_model - - if 'tensorflow' in self.framework: - try: - self._model.name = self.conf.quantization.model_name - self._model.output_tensor_names = self.conf.quantization.outputs - self._model.input_tensor_names = self.conf.quantization.inputs - self._model.workspace_path = options.workspace - except Exception as e: - self._model.name = None - self._model.output_tensor_names = None - self._model.input_tensor_names = None - self._model.workspace_path = None - - def pre_process(self): - """Create strategy to optimize model.""" - # Remove qat hooks if user want to tune accuracy with train function. - if self.adaptor is not None and hasattr(self.adaptor, "_pre_hook_for_qat"): - self.remove_hook("on_train_begin", self.adaptor._pre_hook_for_qat) - self.remove_hook("on_train_end", self.adaptor._post_hook_for_qat) - - strategy = self.conf.quantization.tuning_criterion.strategy.lower() - if self.conf.quantization.quant_level == 0: - strategy = "conservative" - logger.info(f"On the premise that the accuracy meets the conditions, improve the performance.") - - if strategy == "mse_v2": - if not (self.conf.quantization.framework.startswith("tensorflow") \ - or self.conf.quantization.framework == 'pytorch_fx'): # pragma: no cover - strategy = "basic" - logger.warning(f"MSE_v2 does not support \ - {self.conf.quantization.framework} now, use basic instead.") - logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") - assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) - - _resume = None - # check if interrupted tuning procedure exists. if yes, it will resume the - # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ - if options.workspace and options.resume_from else None - if self.resume_file: - assert os.path.exists(self.resume_file), \ - "The specified resume file {} doesn't exist!".format(self.resume_file) - with open(self.resume_file, 'rb') as f: - _resume = pickle.load(f).__dict__ - - self.strategy = STRATEGIES[strategy]( - model = self.model, - conf = self.conf, - q_dataloader=None, - q_func=self._train_func, - eval_func=self._eval_func, - eval_dataloader=self._eval_dataloader, - eval_metric=self.metric, - resume=_resume, - q_hooks=None) - - def execute(self): - """Quantization Aware Training execute routinue based on strategy design.""" - try: - with time_limit(self.conf.quantization.tuning_criterion.timeout): - logger.debug("Dump user yaml configuration:") - logger.debug(self.conf) - self.strategy.traverse() - except KeyboardInterrupt: - pass - except Exception as e: - logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) - import traceback - traceback.print_exc() - finally: - if self.strategy.best_qmodel: - logger.info( - "Specified timeout or max trials is reached! " - "Found a quantized model which meet accuracy goal. Exit.") - self.strategy.deploy_config() - else: - logger.error( - "Specified timeout or max trials is reached! " - "Not found any quantized model which meet accuracy goal. Exit.") - - return self.strategy.best_qmodel - - def __call__(self): - """Execute this class. - - For derived classes, an override function is required. - """ - self.pre_process() - results = self.execute() - return results - - fit = __call__ - - @property - def train_func(self): - """Not support get train_func.""" - assert False, 'Should not try to get the value of `train_func` attribute.' - return None - - @train_func.setter - def train_func(self, user_train_func): - """Training function. - - Args: - user_train_func: This function takes "model" as input parameter - and executes entire training process with self - contained training hyper-parameters. If training_func set, - an evaluation process must be triggered and user should - set eval_dataloader with metric configured or directly eval_func - to make evaluation of the model executed. training_func will return - a trained model. - """ - self._train_func = user_train_func - - @property - def eval_func(self): - """Not support get eval_func.""" - assert False, 'Should not try to get the value of `eval_func` attribute.' - return None - - @eval_func.setter - def eval_func(self, user_eval_func): - """Eval function for component. - - Args: - user_eval_func: This function takes "model" as input parameter - and executes entire evaluation process with self - contained metrics. If eval_func set, - an evaluation process must be triggered - to make evaluation of the model executed. - """ - self._eval_func = user_eval_func - - @property - def eval_dataloader(self): - """Getter to eval dataloader.""" - return self._eval_dataloader - - @eval_dataloader.setter - def eval_dataloader(self, dataloader): - """Set Data loader for evaluation of component. - - It is iterable and the batched data should consists of yield (input, _). - the input in the batched data will be used for model inference, so it - should satisfy the input format of specific model. - User only need to set eval_dataloader when eval_dataloader can not be - configured from yaml file. - - Args: - dataloader(generator): user are supported to set a user defined dataloader - which meet the requirements that can yield tuple of - (input, label)/(input, _) batched data. Another good - practice is to use neural_compressor.experimental.common.DataLoader - to initialize a neural_compressor dataloader object. Notice - neural_compressor.experimental.common.DataLoader is just a wrapper of the - information needed to build a dataloader, it can't yield - batched data and only in this setter method - a 'real' train_dataloader will be created, - the reason is we have to know the framework info - and only after the Component object created then - framework information can be known. - Future we will support creating iterable dataloader - from neural_compressor.experimental.common.DataLoader. - """ - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' - self._eval_dataloader = dataloader - - @property - def metric(self): - """Get `metric` attribute.""" - return self._metric - - @metric.setter - def metric(self, user_metric): - """Set metric class or a dict of built-in metric configures, - and neural_compressor will initialize this class when evaluation. - - 1. neural_compressor have many built-in metrics, - user can pass a metric configure dict to tell neural compressor what metric will be use. - You can set multi-metrics to evaluate the performance of a specific model. - Single metric: - {topk: 1} - Multi-metrics: - {topk: 1, - MSE: {compare_label: False}, - weight: [0.5, 0.5], - higher_is_better: [True, False] - } - For the built-in metrics, please refer to below link: - https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. - - 2. User also can set specific metric through this api. The metric class should take the outputs of the model or - postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) - as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric. - - Args: - user_metric(neural_compressor.metric.Metric or a dict of built-in metric configures): - - """ - from ..metric import Metric as NCMetric, METRICS - if isinstance(user_metric, dict): - metric_cfg = user_metric - else: - if isinstance(user_metric, NCMetric): - name = user_metric.name - metric_cls = user_metric.metric_cls - metric_cfg = {name: {**user_metric.kwargs}} - else: - for i in ['reset', 'update', 'result']: - assert hasattr(user_metric, i), 'Please realise {} function' \ - 'in user defined metric'.format(i) - metric_cls = type(user_metric).__name__ - name = 'user_' + metric_cls - metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.quantization.framework) - metrics.register(name, metric_cls) - self._metric = metric_cfg - def remove_hook(self, scope, hook): """Remove hooks if user want to tune accuracy with train_func.""" for registed_hook in self.hooks_dict[scope]: @@ -474,40 +184,16 @@ class QuantizationAwareTrainingCallbacks(BaseCallbacks): In this class will apply all hooks for Quantization-Aware Training. """ - def __init__(self, conf=None, model=None): + def __init__(self, conf=None, model=None, adaptor=None): """Construct all the necessary attributes for the callbacks object. Args: conf: A QuantizationAwareTrainingConfig object which definds the compressor behavior. - model: Model to be quantized in this object. + model: Model to be quantized in this object. It should be neural compressor model. """ - super(QuantizationAwareTrainingCallbacks, self).__init__(conf=None) - self.conf = _Config(quantization=conf, benchmark=None,pruning=None, distillation=None, nas=None) - self.model = model - - seed = options.random_seed - random.seed(seed) - np.random.seed(seed) - - framework_specific_info = {'device': self.conf.quantization.device, - 'random_seed': options.random_seed, - 'workspace_path': options.workspace, - 'q_dataloader': None, - 'backend': self.conf.quantization.backend if \ - self.conf.quantization.backend is not None else 'default', - 'format': self.conf.quantization.quant_format if \ - self.conf.quantization.quant_format is not None else 'default'} - if self.conf.quantization.approach is not None: - framework_specific_info['approach'] = self.conf.quantization.approach - - if 'tensorflow' in self.framework: - framework_specific_info.update( - {"inputs": self.conf.quantization.inputs, \ - "outputs": self.conf.quantization.outputs}) - self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) - self.adaptor.model = self.model - self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat) - self.register_hook('on_train_end', self.adaptor._post_hook_for_qat) + super(QuantizationAwareTrainingCallbacks, self).__init__(conf=conf, model=model) + self.register_hook('on_train_begin', adaptor._pre_hook_for_qat) + self.register_hook('on_train_end', adaptor._post_hook_for_qat) def __repr__(self): """Represent this class.""" @@ -525,14 +211,10 @@ def __init__(self, conf=None, model=None): Args: conf: A WeightPruningConfig object which definds the compressor behavior. - model: Model to be Pruning in this object. + model: Model to be Pruning in this object. It should be neural compressor model. """ - super(PruningCallbacks, self).__init__(conf=None) - self.conf = _Config(pruning=conf, quantization=None, benchmark=None - , distillation=None, nas=None) - self.cfg = self.conf.pruning - self.model = model - self.pruners_info = process_config(self.cfg) + super(PruningCallbacks, self).__init__(conf=conf, model=model) + self.pruners_info = process_config(self.conf) self.pruners = [] self._generate_pruners() self.generate_hooks() @@ -541,8 +223,8 @@ def on_train_end(self): """Be called after the end of training.""" for on_train_end_hook in self.hooks_dict['on_train_end']: on_train_end_hook() - if isinstance(self._model.model, torch.nn.Module): - get_sparsity_ratio(self.pruners, self._model) + if isinstance(self.model.model, torch.nn.Module): + get_sparsity_ratio(self.pruners, self.model) def __repr__(self): """Return the class's string representation.""" @@ -557,9 +239,9 @@ def generate_hooks(self): def _generate_pruners(self): """Obtain Pruner objects.""" - if isinstance(self._model.model, torch.nn.Module): + if isinstance(self.model.model, torch.nn.Module): for info in self.pruners_info: - modules = parse_to_prune(info, self._model.model) + modules = parse_to_prune(info, self.model.model) if modules == {}: logger.warning("one pruner hooks no layers, please have a check") @@ -579,7 +261,7 @@ class DistillationCallbacks(BaseCallbacks): Args: conf: Distillation_Conf containing teacher model, distillation criterion etc. - model: Student model. + model: Student model. It should be neural compressor model. Attributes: _epoch_ran: A integer indicating how much epochs ran. @@ -591,11 +273,9 @@ class DistillationCallbacks(BaseCallbacks): def __init__(self, conf=None, model=None): """Initialize the attributes.""" - super(DistillationCallbacks, self).__init__() - self.conf = _Config(quantization=None, benchmark=None, pruning=None, distillation=conf, nas=None) - self.cfg = self.conf.distillation - self.model = model + super(DistillationCallbacks, self).__init__(conf=conf, model=model) + self.framework = list(MODELS.keys())[list(MODELS.values()).index(type(model))] self._teacher_model = None self._criterion = None self._epoch_ran = 0 @@ -604,9 +284,8 @@ def __init__(self, conf=None, model=None): self.best_score = 0 self.best_model = None self.hooks_registered = False - assert hasattr(self.cfg, "teacher_model"),\ - "Please assign teacher model in DistillationConfig." - self.teacher_model = self.cfg.teacher_model + assert hasattr(self.conf, "teacher_model"), "Please assign teacher model in DistillationConfig." + self.teacher_model = self.conf.teacher_model self.generate_hooks() self.create_criterion() @@ -642,7 +321,7 @@ def init_train_cfg(self): """Initialize the training configuration.""" if self._train_cfg is None: # train section of distillation section in yaml file should be configured. - self._train_cfg = self.cfg.criterion + self._train_cfg = self.conf.criterion assert self._train_cfg, "train field of distillation section in yaml file must " \ "be configured for distillation if train_func is NOT set." @@ -734,7 +413,7 @@ def teacher_model(self, user_model): """ if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") - self._teacher_model = Model(user_model, backend=self.framework) + self._teacher_model = Model(user_model) else: self._teacher_model = user_model @@ -745,7 +424,7 @@ def student_model(self): Returns: The student model used in the distillation process. """ - return self._model + return self.model @property def train_cfg(self): diff --git a/neural_compressor/compression/pruner/README.md b/neural_compressor/compression/pruner/README.md index d0029438f4a..f8b0cfaa925 100644 --- a/neural_compressor/compression/pruner/README.md +++ b/neural_compressor/compression/pruner/README.md @@ -301,7 +301,7 @@ The following section exemplifies how to use hooks in user pass-in training func [**Experimental option** ]Modify model and optimizer. ```python - from neural_compressor.training import prepare_pruning, WeightPruningConfig + from neural_compressor import prepare_pruning, WeightPruningConfig config = WeightPruningConfig(configs) prepare_pruning(config, model, optimizer) # modify model and optimizer for epoch in range(num_train_epochs): diff --git a/neural_compressor/compression/pruner/pruners.py b/neural_compressor/compression/pruner/pruners.py index b06b0b58e87..df5904a3968 100644 --- a/neural_compressor/compression/pruner/pruners.py +++ b/neural_compressor/compression/pruner/pruners.py @@ -27,6 +27,7 @@ PRUNERS = {} + def register_pruner(name): """Class decorator to register a Pruner subclass to the registry. @@ -47,6 +48,7 @@ def register(pruner): return register + def parse_valid_pruner_types(): """Get all valid pruner names.""" valid_pruner_types = [] @@ -130,11 +132,11 @@ def __init__(self, config, modules): self.start_step = self.config['start_step'] self.end_step = self.config['end_step'] self.pruning_frequency = self.config['pruning_frequency'] - ##this is different with original code + # this is different with original code self.total_prune_cnt = (self.end_step - self.start_step + self.pruning_frequency) \ // self.pruning_frequency self.completed_pruned_cnt = 0 - self.total_prune_cnt -= 1 ## not pruning at step 0 + self.total_prune_cnt -= 1 # not pruning at step 0 if self.total_prune_cnt == 0: self.total_prune_cnt = 1 self.completed_pruned_cnt = 1 @@ -249,13 +251,13 @@ def forward(self, input): mask = self.block_mask.repeat_interleave(block_size[0], dim=0).repeat_interleave(\ block_size[1], dim=-1).to(self.weight.device) return F.linear(input, self.weight*mask, self.bias) - + for key in self.modules.keys(): if not hasattr(self.modules[key], 'block_mask'): continue # No corresponding block mask, skip. module = self.modules[key] module.forward = partial(forward, module) - + def recover_forward(self): """Restore the forward format at the end of pruning""" with torch.no_grad(): @@ -264,7 +266,7 @@ def recover_forward(self): continue # No corresponding block mask, skip. module = self.modules[key] module.forward = partial(torch.nn.Linear.forward, module) - + @register_pruner("basic") class BasicPruner(BasePruner): @@ -353,7 +355,7 @@ def on_before_optimizer_step(self): def on_after_optimizer_step(self): """Prune the model after optimization.""" - ##the order of the following three lines can't not be exchanged + # the order of the following three lines can't not be exchanged if self.global_step >= self.start_step and self.global_step <= self.end_step: self.reg.on_after_optimizer_step() self.mask_weights() @@ -420,7 +422,7 @@ class BlockMaskPruner(BasePruner): def __init__(self, config, modules): """Initialize.""" super(BlockMaskPruner, self).__init__(config, modules) - + def _init(self): """Initialize.""" self.pattern = get_pattern(self.config, self.modules) @@ -429,17 +431,17 @@ def _init(self): self.scheduler = get_scheduler(self.config) self.criterion = get_criterion(self.config, self.modules) self.reg = get_reg(self.config, self.modules, self.pattern) - + if "channel" not in self.pattern.pattern: logger.info("Enabling channel-wise pattern would be a better choice.") - + # def on_step_begin(self, local_step): # """Implement at the start of each step. - + # Update the masks at a given local_step. # """ # self.update_masks(local_step) - + def update_masks(self, local_step): """Update the masks at a given local step.""" if self.global_step == self.start_step: @@ -469,13 +471,13 @@ def update_masks(self, local_step): self.current_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) logger.info(f"current sparsity ratio is {self.current_sparsity_ratio}") - + def on_before_optimizer_step(self): """Implement before optimizer.step().""" if self.global_step >= self.start_step and self.global_step <= self.end_step: self.reg.on_before_optimizer_step() self.criterion.on_before_optimizer_step() - + def on_after_optimizer_step(self): """Prune the model after optimization.""" ##the order of the following four lines can't not be exchanged @@ -488,7 +490,7 @@ def on_after_optimizer_step(self): self.recover_forward() self.pattern.remove_block_masks() self.global_step += 1 - + def mask_weights(self): """Apply block masks to corresponding modules' weights. @@ -496,14 +498,14 @@ def mask_weights(self): """ with torch.no_grad(): self.pattern.mask_block_weights(self.masks) - + def update_block_masks(self, masks): """Update the block mask parameters.""" with torch.no_grad(): for key in self.masks.keys(): module = self.modules[key] module.block_mask.data = masks[key].data - + def zero_mask_grad(self): with torch.no_grad(): for key in self.modules.keys(): @@ -516,8 +518,8 @@ def zero_mask_grad(self): else: mask.grad.requires_grad_(False) mask.grad.zero_() - - + + @register_pruner('retrain_free') class RetrainFreePruner(BasePruner): """Pruning Pruner. @@ -526,7 +528,7 @@ class RetrainFreePruner(BasePruner): RetrainFreePruner supports one-shot pruning (same effect as fast retraining free) and iterative pruning. Please refer to A Fast Post-Training Pruning Framework for Transformers (https://arxiv.org/abs/2204.09656) - + 1. Defines pruning functions called at step begin/end, before/after optimize and epoch begin/end. 2. Defines the pruning criterion and fixed weight parameters. 3. Obtain block masks and its grads. @@ -545,7 +547,7 @@ class RetrainFreePruner(BasePruner): def __init__(self, config, modules): """Initialize.""" super(RetrainFreePruner, self).__init__(config, modules) - + def _init(self): """Initialize.""" self.pattern = get_pattern(self.config, self.modules) @@ -554,18 +556,18 @@ def _init(self): self.scheduler = get_scheduler(self.config) self.criterion = get_criterion(self.config, self.modules) self.reg = get_reg(self.config, self.modules, self.pattern) - + logger.warning("Retrain-free pruner fixed the weights, please DO NOT turn on gradient update.") assert "channel" in self.pattern.pattern, \ "retrain-free pruner only supports large patterns like channel-wise pruning." - + # def on_step_begin(self, local_step): # """Implement at the start of each step. - + # Update the masks at a given local_step. # """ # self.update_masks(local_step) - + def update_masks(self, local_step): """Update the masks at a given local step.""" if self.global_step == self.start_step: @@ -589,20 +591,20 @@ def update_masks(self, local_step): self.completed_pruned_cnt += 1 if self.criterion.scores == {}: return - ##the order of the following three lines can't not be exchanged + # the order of the following three lines can't not be exchanged self.masks = self.pattern.get_masks(self.criterion.scores, current_target_sparsity_ratio, self.masks) self.rearrange_masks(self.masks) self.update_block_masks(self.masks) self.current_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks) logger.info(f"current sparsity ratio is {self.current_sparsity_ratio}") - + def on_before_optimizer_step(self): """Implement before optimizer.step().""" if self.global_step >= self.start_step and self.global_step <= self.end_step: self.reg.on_before_optimizer_step() self.criterion.on_before_optimizer_step() - + def on_after_optimizer_step(self): """Prune the model after optimization.""" ##the order of the following four lines can't not be exchanged @@ -617,7 +619,7 @@ def on_after_optimizer_step(self): self.recover_forward() self.pattern.remove_block_masks() self.global_step += 1 - + def mask_weights(self): """Apply block masks to corresponding modules' weights. @@ -625,14 +627,14 @@ def mask_weights(self): """ with torch.no_grad(): self.pattern.mask_block_weights(self.masks) - + def update_block_masks(self, masks): """Update the block mask parameters.""" with torch.no_grad(): for key in self.masks.keys(): module = self.modules[key] module.block_mask.data = masks[key].data - + def rearrange_masks(self, masks): """Rearrange the masks of each layer with constant sparsity.""" with torch.no_grad(): @@ -662,7 +664,7 @@ def rearrange_masks(self, masks): new_masks[key][masked_indicies] = 0 new_masks[key] = new_masks[key] * torch.ones_like(block_mask).to(block_mask.device) self.masks = new_masks - + def zero_mask_grad(self): with torch.no_grad(): for key in self.modules.keys(): @@ -887,6 +889,3 @@ def print_progressive_sparsity(self): """Output the progressive sparsity.""" cur_sp = self.pattern.get_sparsity_ratio_progressive(self.progressive_masks) logger.info("Step: {} -> Current progressive sparsity: {}".format(self.global_step, cur_sp)) - - - diff --git a/neural_compressor/compression/pruner/utils.py b/neural_compressor/compression/pruner/utils.py index e80213a5976..2a8e504c9b7 100644 --- a/neural_compressor/compression/pruner/utils.py +++ b/neural_compressor/compression/pruner/utils.py @@ -30,7 +30,6 @@ LazyImport('torch.nn') torch = LazyImport('torch') F = LazyImport('torch.nn.functional') - except: import torch import torch.nn.functional as F @@ -38,7 +37,6 @@ import logging logger = logging.getLogger(__name__) from .schema_check import PrunerV2 - class WeightPruningConfig: """Similiar to torch optimizer's interface.""" @@ -414,7 +412,7 @@ def parse_last_linear(model): """ from .model_slim.pattern_analyzer import ClassifierHeadSearcher searcher = ClassifierHeadSearcher(model) - layer = searcher.search(return_name = True) + layer = searcher.search(return_name=True) return layer def parse_to_prune(config, model): diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index def66642c4c..85dc4b1fbfa 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -27,7 +27,7 @@ from .strategy import STRATEGIES from .config import _Config, options from .utils import logger -from .model.model import wrap_model_from +from .model import Model def fit(model, @@ -93,7 +93,7 @@ def fit(model, "please modify precision or excluded_precisions to make it understandable.") sys.exit(0) - wrapped_model = wrap_model_from(model, config) + wrapped_model = Model(model, conf=config) if eval_metric is not None: metric = register_customer_metric(eval_metric, config.framework) diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index ed110f7bd0f..e9149718aa5 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -163,93 +163,54 @@ def __new__(cls, root, **kwargs): Returns: BaseModel: neural_compressor built-in model """ - backend = kwargs.get("backend", "NA") - if backend == "NA" or backend == "default": - backend_tmp = get_model_fwk_name(root) - if backend_tmp == "pytorch": - backend = "pytorch_fx" - else: - backend = backend_tmp - elif backend == "ipex": - backend = "pytorch_ipex" - - if 'tensorflow' in backend or backend == 'keras': - if kwargs.get("approach", None) == "quant_aware_training" or backend == 'tensorflow_qat': - return MODELS['tensorflow_qat'](root, **kwargs) - - if 'modelType' in kwargs: - model_type = kwargs['modelType'] - else: - model_type = get_model_type(root) - if backend == 'keras' and model_type == 'keras': - return MODELS['keras'](root, **kwargs) - model = MODELS['tensorflow'](model_type, root, **kwargs) + conf = kwargs.pop("conf", "NA") + if isinstance(root, BaseModel): + if conf != "NA" and conf.framework is None: + conf.framework = list(MODELS.keys())[list(MODELS.values()).index(type(root))] + if conf.backend == "ipex": + assert conf.framework == "pytorch_ipex",\ + "Please wrap the model with correct Model class!" + if conf.backend == "itex": + if get_model_type(root.model) == 'keras': + assert conf.framework == "keras",\ + "Please wrap the model with KerasModel class!" + else: + assert conf.framework == "tensorflow_itex", \ + "Please wrap the model with TensorflowModel class!" + if getattr(conf, "approach", None) == "quant_aware_training": + assert conf.framework == "tensorflow_qat", \ + "Please wrap the model with TensorflowQATModel class!" + return root else: - model = MODELS[backend](root, **kwargs) - return model - - -def wrap_model_from(user_model, conf): - """Wrap the user model and dispatch to framework specific internal model object. - - Args: - user_model: user are supported to set model from original framework model format - (eg, tensorflow frozen_pb or path to a saved model), but not recommended. - Best practice is to set from a initialized neural_compressor.common.Model. - If tensorflow model is used, model's inputs/outputs will be auto inferred, - but sometimes auto inferred inputs/outputs will not meet your requests, - set them manually in config yaml file. Another corner case is slim model - of tensorflow, be careful of the name of model configured in yaml file, - make sure the name is in supported slim model list. - conf: the instance of PostTrainingQuantConfig or QuantizationAwareTrainingConfig or MixedPrecisionConfig. - """ - if conf.framework is None: - if isinstance(user_model, BaseModel): # pragma: no cover - conf.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] - if conf.backend == "ipex": - assert conf.framework == "pytorch_ipex",\ - "Please wrap the model with correct Model class!" - if conf.backend == "itex": - if get_model_type(user_model.model) == 'keras': - assert conf.framework == "keras",\ - "Please wrap the model with KerasModel class!" - else: - assert conf.framework == "pytorch_itex", \ - "Please wrap the model with TensorflowModel class!" - else: - framework = get_model_fwk_name(user_model) - if framework == "tensorflow": - if get_model_type(user_model) == 'keras' and conf.backend == 'itex': - framework = 'keras' - if framework == "pytorch": - if conf.backend == "default": + framework = get_model_fwk_name(root) + if conf == "NA": + if framework == "pytorch": framework = "pytorch_fx" + return MODELS[framework](root, **kwargs) + else: + conf.framework = framework + if conf.backend == "default": + if framework == "pytorch": + conf.framework = "pytorch_fx" elif conf.backend == "ipex": - framework = "pytorch_ipex" - conf.framework = framework - - if not isinstance(user_model, BaseModel): - logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in conf.framework or conf.framework == "keras": - model = Model(user_model, backend=conf.framework, device=conf.device) - else: - model = Model(user_model, backend=conf.framework) - else: # pragma: no cover - if conf.framework == "pytorch_ipex": - from neural_compressor.model.torch_model import IPEXModel - assert type(user_model) == IPEXModel, \ - "The backend is ipex, please wrap the model with IPEXModel class!" - elif conf.framework == "pytorch_fx": - from neural_compressor.model.torch_model import PyTorchFXModel - assert type(user_model) == PyTorchFXModel, \ - "The backend is default, please wrap the model with PyTorchFXModel class!" - - model = user_model - - if 'tensorflow' in conf.framework: - model.name = conf.model_name - model.output_tensor_names = conf.outputs - model.input_tensor_names = conf.inputs - model.workspace_path = options.workspace - - return model + conf.framework = "pytorch_ipex" + + if 'tensorflow' in conf.framework: + if getattr(conf, "approach", None) == "quant_aware_training": + return MODELS['tensorflow_qat'](root, **kwargs) + + if 'modelType' in kwargs: + model_type = kwargs['modelType'] + else: + model_type = get_model_type(root) + if conf.backend == "itex" and model_type == 'keras': + return MODELS['keras'](root, **kwargs) + model = MODELS['tensorflow'](model_type, root, **kwargs) + else: + model = MODELS[conf.framework](root, **kwargs) + if 'tensorflow' in conf.framework: + model.name = conf.model_name + model.output_tensor_names = conf.outputs + model.input_tensor_names = conf.inputs + model.workspace_path = options.workspace + return model diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 295e16dc9e3..e30708a3eca 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -23,7 +23,7 @@ from .config import _Config, options from .data.dataloaders.dataloader import check_dataloader from .metric.metric import register_customer_metric -from .model.model import wrap_model_from +from .model import Model from .strategy import STRATEGIES from .utils import logger from .utils.utility import time_limit, dump_class_attrs @@ -145,7 +145,7 @@ def eval_func(model): seed = options.random_seed random.seed(seed) np.random.seed(seed) - wrapped_model = wrap_model_from(model, conf) + wrapped_model = Model(model, conf=conf) if eval_metric is not None: metric = register_customer_metric(eval_metric, conf.framework) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 0dfa474f211..9f9f4bbdce9 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -41,7 +41,7 @@ from ..version import __version__ from ..algorithm import AlgorithmScheduler, ALGORITHMS -from .utils.tuning_space import TuningSpace +from .utils.tuning_space import TuningSpace from .utils.tuning_structs import OpTuningConfig from .utils.constant import FALLBACK_RECIPES_SET @@ -210,6 +210,7 @@ def _initialize_config(self, conf): Tuning config """ config = conf.quantization + config.diagnosis = getattr(config, 'diagnosis', None) return config @abstractmethod diff --git a/neural_compressor/training.py b/neural_compressor/training.py index c9902267f24..526c8a23a0b 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -15,178 +15,150 @@ # See the License for the specific language governing permissions and # limitations under the License. """The configuration of the training loop.""" -import copy +import os +import pickle +import numpy as np +import random + +from .adaptor import FRAMEWORKS from .compression.callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks +from .config import _Config, options +from .metric.metric import register_customer_metric from .model.model import Model from .utils import logger +from .utils.utility import time_limit +from neural_compressor.strategy.strategy import STRATEGIES from neural_compressor import (DistillationConfig, QuantizationAwareTrainingConfig, WeightPruningConfig) from typing import Callable, List, Union -from .compression import prepare_pruning class CompressionManager: """CompressionManager is uesd in train loop for what user want to deal with additional. Arguments: - model: A model to be compressed. It should be neural compressor model. - callbacks: A list of Callbacks instances. - Such as: DistillationCallbbacks, QuantizationAwareTrainingCallbacks, PruningCallbacks. + model: A model to be compressed. + confs: The instance of QuantizationAwareTrainingConfig, PruningConfig and distillationConfig, or a list of + config for orchestration optimization. Examples:: import neural_compressor.training.prepare_compression - compression_manager = prepare_compression(nc_model, confs) + compression_manager = prepare_compression(model, confs) compression_manager.callbacks.on_train_begin() model = compression_manager.model - train_loop: - for epoch in range(epochs): - compression_manager.callbacks.on_epoch_begin(epoch) - for i, batch in enumerate(dataloader): - compression_manager.callbacks.on_step_begin(i) - ...... - output = model(batch) - loss = ...... - loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss) - loss.backward() - compression_manager.callbacks.on_before_optimizer_step() - optimizer.step() - compression_manager.callbacks.on_step_end() - compression_manager.callbacks.on_epoch_end() + # train_loop: + for epoch in range(epochs): + compression_manager.callbacks.on_epoch_begin(epoch) + for i, (batch, label) in enumerate(dataloader): + compression_manager.callbacks.on_step_begin(i) + ...... + output = model(batch) + loss = ...... + loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss) + loss.backward() + compression_manager.callbacks.on_before_optimizer_step() + optimizer.step() + compression_manager.callbacks.on_step_end() + compression_manager.callbacks.on_epoch_end() compression_manager.callbacks.on_train_end() compression_manager.save("path_to_save") """ - def __init__(self, model, callbacks_list): + def __init__(self, model: Callable, confs: Union[Callable, List], **kwargs): """Initialize the CompressionManager's parameters. - model: A model to be compressed. It should be neural compressor model. - callbacks: A list of Callbacks instances. - Such as: DistillationCallbbacks, QuantizationAwareTrainingCallbacks, PruningCallbacks. + model: A model to be compressed. + confs: The instance of QuantizationAwareTrainingConfig, PruningConfig and distillationConfig, or a list of + config for orchestration optimization. """ - self.callbacks = CallBacks(callbacks_list) - self.model = model - self._train_func = None - self._eval_func = None - self.quantizer = None + callbacks_list = [] + self.model = None + q_conf = None + p_conf = None + d_conf = None + self.adaptor = None + + if isinstance(confs, List) and len(confs) > 1: + for conf in confs: + if isinstance(conf, QuantizationAwareTrainingConfig): + self.model = Model(model, conf=conf) + if self.model is None: + self.model = Model(model) + + for conf in confs: + if isinstance(conf, QuantizationAwareTrainingConfig): + q_conf = conf + + framework_specific_info = { + 'device': conf.device, + 'random_seed': options.random_seed, + 'workspace_path': options.workspace, + 'q_dataloader': None, + 'backend': getattr(confs, "backend", 'default'), + 'format': getattr(confs, "quant_format", 'default'), + 'approach': conf.approach, + } + if 'tensorflow' in conf.framework: + framework_specific_info.update( + {"inputs": conf.inputs, + "outputs": conf.outputs}) + self.adaptor = FRAMEWORKS[conf.framework](framework_specific_info) + self.adaptor.model = self.model + callbacks_list.append(QuantizationAwareTrainingCallbacks(conf, adaptor=self.adaptor)) + elif isinstance(conf, WeightPruningConfig): + p_conf = conf + callbacks_list.append(PruningCallbacks(conf, model=self.model)) + elif isinstance(conf, DistillationConfig): + d_conf = conf + callbacks_list.append(DistillationCallbacks(conf, model=self.model)) + else: + assert False, "Unsupported configure: {}".format(type(conf)) + self.conf = _Config(quantization=q_conf, benchmark=None, pruning=p_conf, distillation=d_conf, nas=None) + else: + if isinstance(confs, List): + confs = confs[0] + if isinstance(confs, QuantizationAwareTrainingConfig): + self.model = Model(model, conf=confs) + + framework_specific_info = { + 'device': confs.device, + 'random_seed': options.random_seed, + 'workspace_path': options.workspace, + 'q_dataloader': None, + 'backend': getattr(confs, "backend", 'default'), + 'format': getattr(confs, "quant_format", 'default'), + 'approach': confs.approach, + } + if 'tensorflow' in confs.framework: + framework_specific_info.update( + {"inputs": confs.inputs, + "outputs": confs.outputs}) + self.adaptor = FRAMEWORKS[confs.framework](framework_specific_info) + self.adaptor.model = self.model + callbacks_list.append(QuantizationAwareTrainingCallbacks(confs, adaptor=self.adaptor)) + self.conf = _Config(quantization=confs, benchmark=None, pruning=None, distillation=None, nas=None) + elif isinstance(confs, WeightPruningConfig): + self.model = Model(model) + callbacks_list.append(PruningCallbacks(confs, model=self.model)) + self.conf = _Config(quantization=None, benchmark=None, pruning=confs, distillation=None, nas=None) + elif isinstance(confs, DistillationConfig): + self.model = Model(model) + callbacks_list.append(DistillationCallbacks(confs, model=self.model)) + self.conf = _Config(quantization=None, benchmark=None, pruning=None, distillation=confs, nas=None) + else: + assert False, logger.error( + "confs should be one of QuantizationAwareTrainingConfig, " + "PruningConfig, DistillationConfig. not {}".format(type(confs)) + ) try: # TODO: export to ONNX model need original fp32 model now, will remove it # when int8 model can be exported to ONNX model. - self.fp32_model = model + self.fp32_model = self.model except Exception as e: # pragma: no cover logger.warning("Fail to deep copy the model due to {}.".format(repr(e))) self.fp32_model = None - for component in callbacks_list: - if isinstance(component, QuantizationAwareTrainingCallbacks): - self.quantizer = component - - @property - def train_func(self): - """Not support get train_func.""" - assert False, 'Should not try to get the value of `train_func` attribute.' - - @train_func.setter - def train_func(self, user_train_func): - """Set training function. - - Args: - user_train_func: This function takes "model" as input parameter - and executes entire training process with self - contained training hyper-parameters. If training_func set, - an evaluation process must be triggered and user should - set eval_dataloader with metric configured or directly eval_func - to make evaluation of the model executed. training_func will return - a trained model. - """ - self.quantizer.train_func = user_train_func - - @property - def eval_func(self): - """Not support get eval_func.""" - assert False, 'Should not try to get the value of `eval_func` attribute.' - return None - - @eval_func.setter - def eval_func(self, user_eval_func): - """Eval function for component. - - Args: - user_eval_func: This function takes "model" as input parameter - and executes entire evaluation process with self - contained metrics. If eval_func set, - an evaluation process must be triggered - to make evaluation of the model executed. - """ - assert self.quantizer is not None, "There is no quantizer to tune, " \ - "please pass a QuantizationAwareTrainingConfig." - self.quantizer.eval_func = user_eval_func - - @property - def eval_dataloader(self): - """Getter to eval dataloader.""" - return self.quantizer.eval_dataloader - - @eval_dataloader.setter - def eval_dataloader(self, dataloader): - """Set Data loader for evaluation of component. - - It is iterable and the batched data should consists of yield (input, _). - the input in the batched data will be used for model inference, so it - should satisfy the input format of specific model. - - Args: - dataloader(generator): user are supported to set a user defined dataloader - which meet the requirements that can yield tuple of - (input, label)/(input, _) batched data. - """ - assert self.quantizer is not None, "There is no quantizer to tune, " \ - "please pass a QuantizationAwareTrainingConfig." - assert hasattr(dataloader, '__iter__') and \ - hasattr(dataloader, 'batch_size'), \ - 'dataloader must implement __iter__ method and batch_size attribute' - - self.quantizer.eval_dataloader = dataloader - - @property - def metric(self): - """Get `metric` attribute.""" - assert False, 'Should not try to get the value of `metric` attribute.' - - @metric.setter - def metric(self, user_metric): - """Set metric class or a dict of built-in metric configures. - - 1. neural_compressor have many built-in metrics, - user can pass a metric configure dict to tell neural compressor what metric will be use. - You can set multi-metrics to evaluate the performance of a specific model. - Single metric: - {topk: 1} - - Multi-metrics: - {topk: 1, - MSE: {compare_label: False}, - weight: [0.5, 0.5], - higher_is_better: [True, False] - } - For the built-in metrics, please refer to below link: - https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. - - 2. User also can set specific metric through this api. The metric class should take the outputs of the model or - postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) - as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric. - - Args: - user_metric(neural_compressor.metric.Metric or a dict of built-in metric configurations): - The object of Metric or a dict of built-in metric configurations. - """ - assert self.quantizer is not None, "There is no quantizer to tune, " \ - "please pass a QuantizationAwareTrainingConfig." - self.quantizer.metric = user_metric - - def fit(self): - """Compress model with tuning for quantization.""" - self.model = self.quantizer.fit() - return self.model + self.callbacks = CallBacks(callbacks_list) def save(self, root=None): """Save compressed model. @@ -216,7 +188,7 @@ def fit(compression_manager, eval_dataloader=None, eval_metric=None, **kwargs): - """Compress the model with tuning for quantization. + """Compress the model with accuracy tuning for quantization. Args: compression_manager (CompressionManager): The Compression manager contains the model and @@ -251,16 +223,125 @@ def eval_func(model): process. eval_metric (dict or obj): Set metric class or a dict of built-in metric configures, and neural_compressor will initialize this class when evaluation. + + Returns: + A optimized model. + + Examples:: + + from neural_compressor.training import fit, prepare_compression + + compression_manager = prepare_compression(conf, model) + + def train_func(model): + compression_manager.callbacks.on_train_begin() + for epoch in range(epochs): + compression_manager.callbacks.on_epoch_begin(epoch) + for i, (batch, label) in enumerate(dataloader): + compression_manager.callbacks.on_step_begin(i) + ...... + output = model(batch) + loss = ...... + loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss) + loss.backward() + compression_manager.callbacks.on_before_optimizer_step() + optimizer.step() + compression_manager.callbacks.on_step_end() + compression_manager.callbacks.on_epoch_end() + compression_manager.callbacks.on_train_end() + return model + + def eval_func(model): + for i, (batch, label) in enumerate(dataloader): + output = model(batch) + # compute metric + metric = top1(output, label) + return metric.results() + + model = fit(compression_manager, train_func=train_func, eval_func=eval_func) """ - assert compression_manager.quantizer is not None, "Only quantization supports tuning with accuracy driven." - compression_manager.train_func = train_func - if eval_func is not None: - compression_manager.eval_func = eval_func - if eval_dataloader is not None: - compression_manager.eval_dataloader = eval_dataloader + assert compression_manager.conf.quantization is not None, "Only quantization supports tuning with accuracy driven." + seed = options.random_seed + random.seed(seed) + np.random.seed(seed) + + # Remove qat hooks if user want to tune accuracy with train function. + for callback in compression_manager.callbacks.callbacks_list: + if isinstance(callback, QuantizationAwareTrainingCallbacks): + callback.remove_hook("on_train_begin", compression_manager.adaptor._pre_hook_for_qat) + callback.remove_hook("on_train_end", compression_manager.adaptor._post_hook_for_qat) + if eval_metric is not None: - compression_manager.eval_metric = eval_metric - return compression_manager.fit() + metric = register_customer_metric(eval_metric, compression_manager.conf.quantization.framework) + else: + metric = None + + strategy_name = compression_manager.conf.quantization.tuning_criterion.strategy + + if compression_manager.conf.quantization.quant_level == "auto": + strategy_name = "auto" + elif compression_manager.conf.quantization.quant_level == 0: + strategy_name = "conservative" + + if strategy_name == "mse_v2": + if not (compression_manager.conf.quantization.framework.startswith("tensorflow") + or compression_manager.conf.quantization.framework == 'pytorch_fx'): # pragma: no cover + strategy_name = "basic" + logger.warning(f"MSE_v2 does not support {compression_manager.conf.quantization.framework} now, use basic instead.") + logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") + assert strategy_name in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy_name) + + logger.info(f"Start {strategy_name} tuning.") + _resume = None + # check if interrupted tuning procedure exists. if yes, it will resume the + # whole auto tune process. + resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ + if options.workspace and options.resume_from else None + if resume_file: + assert os.path.exists(resume_file), \ + "The specified resume file {} doesn't exist!".format(resume_file) + with open(resume_file, 'rb') as f: + _resume = pickle.load(f).__dict__ + + if eval_func is None and eval_dataloader is None: # pragma: no cover + logger.info("Quantize model without tuning!") + + strategy = STRATEGIES[strategy_name]( + model=compression_manager.model, + conf=compression_manager.conf, + q_dataloader=None, + q_func=train_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=metric, + resume=_resume, + q_hooks=None + ) + try: + with time_limit(compression_manager.conf.quantization.tuning_criterion.timeout): + logger.debug("Dump user yaml configuration:") + logger.debug(compression_manager.conf) + strategy.traverse() + except KeyboardInterrupt: + pass + except Exception as e: + logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) + import traceback + traceback.print_exc() + finally: + if strategy.best_qmodel: + logger.info( + "Specified timeout or max trials is reached! " + "Found a quantized model which meet accuracy goal. Exit.") + strategy.deploy_config() + else: + logger.error( + "Specified timeout or max trials is reached! " + "Not found any quantized model which meet accuracy goal. Exit.") + + compression_manager.model = strategy.best_qmodel + + return compression_manager.model def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs): @@ -277,59 +358,28 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs) Examples:: - import neural_compressor.training.prepare_compression + from neural_compressor.training import prepare_compression compression_manager = prepare_compression(conf, model) - train_loop: - compression_manager.on_train_begin() - for epoch in range(epochs): - compression_manager.on_epoch_begin(epoch) - for i, batch in enumerate(dataloader): - compression_manager.on_step_begin(i) - ...... - output = model(batch) - loss = ...... - loss = compression_manager.on_after_compute_loss(batch, output, loss) - loss.backward() - compression_manager.on_before_optimizer_step() - optimizer.step() - compression_manager.on_step_end() - compression_manager.on_epoch_end() - compression_manager.on_train_end() + model = compression_manager.model + # train_loop: + compression_manager.callbacks.on_train_begin() + for epoch in range(epochs): + compression_manager.callbacks.on_epoch_begin(epoch) + for i, (batch, label) in enumerate(dataloader): + compression_manager.callbacks.on_step_begin(i) + ...... + output = model(batch) + loss = ...... + loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss) + loss.backward() + compression_manager.callbacks.on_before_optimizer_step() + optimizer.step() + compression_manager.callbacks.on_step_end() + compression_manager.callbacks.on_epoch_end() + compression_manager.callbacks.on_train_end() """ - callbacks_list = [] - nc_model = None - if isinstance(confs, List) and len(confs) > 1: - for conf in confs: - if isinstance(conf, QuantizationAwareTrainingConfig): - nc_model = Model(model, backend=conf.backend, approach="quant_aware_training") - callbacks_list.append(QuantizationAwareTrainingCallbacks(conf, model=nc_model)) - elif isinstance(conf, WeightPruningConfig): - callbacks_list.append(PruningCallbacks(conf, model=model)) - elif isinstance(conf, DistillationConfig): - callbacks_list.append(DistillationCallbacks(conf, model=model)) - else: - assert False, "Unsupported configure: {}".format(type(conf)) - else: - if isinstance(confs, List): - confs = confs[0] - if isinstance(confs, QuantizationAwareTrainingConfig): - nc_model = Model(model, backend=confs.backend, approach="quant_aware_training") - callbacks_list.append(QuantizationAwareTrainingCallbacks(confs, model=nc_model)) - elif isinstance(confs, WeightPruningConfig): - callbacks_list.append(PruningCallbacks(confs, model=model)) - elif isinstance(confs, DistillationConfig): - callbacks_list.append(DistillationCallbacks(confs, model=model)) - else: - assert False, logger.error( - "confs should be one of QuantizationAwareTrainingConfig, " - "PruningConfig, DistillationConfig. not {}".format(type(confs)) - ) - - if nc_model is None: - nc_model = Model(model, backend="default") - - compression_manager = CompressionManager(nc_model, callbacks_list=callbacks_list) + compression_manager = CompressionManager(model, confs, **kwargs) return compression_manager diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py index d08eb9cac7d..7357fea20ba 100644 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py @@ -499,7 +499,7 @@ def test_mix_precision(self): # run fx_quant in neural_compressor and save the quantized GraphModule dataset = Datasets("pytorch")["dummy"]((100, 3, 224, 224)) dataloader = DataLoader("pytorch", dataset) - set_workspace=("./saved") + set_workspace("./saved") conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list) q_model = quantization.fit(model_origin, conf, @@ -530,8 +530,11 @@ def test_hawq_metric(self): dataset = Datasets("pytorch")["dummy"](((16, 3, 224, 224))) dataloader = DATALOADERS["pytorch"](dataset) q_model = fit(ori_model, conf=PostTrainingQuantConfig(), calib_dataloader=dataloader) - op_to_traces = hawq_top(fp32_model=pt_model, q_model=q_model, dataloader=dataloader, \ - criterion=None, enable_act=True) + op_to_traces = hawq_top(fp32_model=pt_model, + q_model=q_model, + dataloader=dataloader, + criterion=None, + enable_act=True) self.assertIsNotNone(op_to_traces) diff --git a/test/pruning_2_plus.x/test_pruning.py b/test/pruning_2_plus.x/test_pruning.py index 4208cb9540a..396d2f63b78 100644 --- a/test/pruning_2_plus.x/test_pruning.py +++ b/test/pruning_2_plus.x/test_pruning.py @@ -8,8 +8,7 @@ sys.path.insert(0, './') from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig -from neural_compressor.training import prepare_pruning +from neural_compressor import prepare_pruning, WeightPruningConfig class TestPruning(unittest.TestCase): diff --git a/test/pruning_2_plus.x/test_pruning_block.py b/test/pruning_2_plus.x/test_pruning_block.py index f28b9f78a0c..e4b10945755 100644 --- a/test/pruning_2_plus.x/test_pruning_block.py +++ b/test/pruning_2_plus.x/test_pruning_block.py @@ -7,8 +7,7 @@ sys.path.insert(0, './') from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import WeightPruningConfig -from neural_compressor.training import prepare_pruning +from neural_compressor import prepare_pruning, WeightPruningConfig class TestPruning(unittest.TestCase): From 183f600c586166b068cd30ab089be93bd5ba1fc7 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Fri, 5 May 2023 10:58:55 +0800 Subject: [PATCH 05/14] fixed UT error Signed-off-by: Cheng, Penghui --- .../quantization/ptq/main.py | 2 +- .../inception_v3/quantization/ptq/main.py | 3 +- .../mobilenet_v2/quantization/ptq/main.py | 2 +- .../resnet101/quantization/ptq/main.py | 2 +- .../resnet50/quantization/ptq/main.py | 2 +- .../resnet50_fashion/quantization/ptq/main.py | 2 +- .../resnetv2_101/quantization/ptq/main.py | 2 +- .../resnetv2_50/quantization/ptq/main.py | 2 +- .../vgg16/quantization/ptq/main.py | 2 +- .../vgg19/quantization/ptq/main.py | 2 +- .../xception/quantization/ptq/main.py | 2 +- neural_compressor/model/model.py | 46 +++++++++++++++---- neural_compressor/training.py | 7 +-- 13 files changed, 53 insertions(+), 23 deletions(-) diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py index c3ac54e2a5e..d707e88fffd 100644 --- a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py @@ -139,7 +139,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model) + accuracy = evaluate(Model(FLAGS.input_model).model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py index b8bf786ced6..67391cab8b8 100644 --- a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py +++ b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py @@ -139,9 +139,10 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model) + accuracy = evaluate(Model(FLAGS.input_model).model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) + if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py index ce4d5450a28..e6d77c3a130 100644 --- a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py @@ -138,7 +138,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model) + accuracy = evaluate(Model(FLAGS.input_model).model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py index 77bd046a2af..b391fa67e09 100644 --- a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py @@ -145,7 +145,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model) + accuracy = evaluate(Model(FLAGS.input_model).model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py index e8c50b098b3..4f9f09b4085 100644 --- a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py @@ -130,7 +130,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model import Model - model = Model(FLAGS.input_model, backend='keras').model + model = Model(FLAGS.input_model).model accuracy = evaluate(model) print('Batch size = %d' % FLAGS.batch_size) print("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py index a432587d5f6..2f6d238173b 100644 --- a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py @@ -128,7 +128,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model import Model - model = Model(FLAGS.input_model, backend='keras').model + model = Model(FLAGS.input_model).model accuracy = evaluate(model) print('Batch size = %d' % FLAGS.batch_size) print("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py index ce4d5450a28..e6d77c3a130 100644 --- a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py @@ -138,7 +138,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model) + accuracy = evaluate(Model(FLAGS.input_model).model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py index ce4d5450a28..e6d77c3a130 100644 --- a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py @@ -138,7 +138,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model) + accuracy = evaluate(Model(FLAGS.input_model).model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py index 380ee9a9223..224be9a5d54 100644 --- a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py +++ b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py @@ -130,7 +130,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model import Model - model = Model(FLAGS.input_model, backend='keras').model + model = Model(FLAGS.input_model).model accuracy = evaluate(model) print('Batch size = %d' % FLAGS.batch_size) print("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py index e8c50b098b3..4f9f09b4085 100644 --- a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py +++ b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py @@ -130,7 +130,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model import Model - model = Model(FLAGS.input_model, backend='keras').model + model = Model(FLAGS.input_model).model accuracy = evaluate(model) print('Batch size = %d' % FLAGS.batch_size) print("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/xception/quantization/ptq/main.py b/examples/keras/image_recognition/xception/quantization/ptq/main.py index a5b678023c1..6d0b3e2ac42 100644 --- a/examples/keras/image_recognition/xception/quantization/ptq/main.py +++ b/examples/keras/image_recognition/xception/quantization/ptq/main.py @@ -138,7 +138,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model) + accuracy = evaluate(Model(FLAGS.input_model).model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index e9149718aa5..2d898872119 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -175,17 +175,39 @@ def __new__(cls, root, **kwargs): assert conf.framework == "keras",\ "Please wrap the model with KerasModel class!" else: - assert conf.framework == "tensorflow_itex", \ + assert conf.framework == "tensorflow", \ "Please wrap the model with TensorflowModel class!" + conf.framework = "tensorflow_itex" if getattr(conf, "approach", None) == "quant_aware_training": assert conf.framework == "tensorflow_qat", \ "Please wrap the model with TensorflowQATModel class!" + else: + if 'tensorflow' in conf.framework: + if getattr(root, "name", None) is None: + root.name = conf.model_name + if getattr(root, "output_tensor_names", None) is None: + root.output_tensor_names = conf.outputs + if getattr(root, "input_tensor_names", None) is None: + root.input_tensor_names = conf.inputs + if getattr(root, "workspace_path", None) is None: + root.workspace_path = options.workspace return root else: framework = get_model_fwk_name(root) if conf == "NA": if framework == "pytorch": framework = "pytorch_fx" + if 'tensorflow' in framework: + if kwargs.get("approach", None) == "quant_aware_training": + return MODELS['tensorflow_qat'](root, **kwargs) + if 'modelType' in kwargs: + model_type = kwargs['modelType'] + else: + model_type = get_model_type(root) + if model_type == "keras" and kwargs.get("framework", None) != "tensorflow": + return MODELS['keras'](root, **kwargs) + else: + return MODELS[framework](model_type, root, **kwargs) return MODELS[framework](root, **kwargs) else: conf.framework = framework @@ -197,15 +219,21 @@ def __new__(cls, root, **kwargs): if 'tensorflow' in conf.framework: if getattr(conf, "approach", None) == "quant_aware_training": - return MODELS['tensorflow_qat'](root, **kwargs) - - if 'modelType' in kwargs: - model_type = kwargs['modelType'] + model = MODELS['tensorflow_qat'](root, **kwargs) else: - model_type = get_model_type(root) - if conf.backend == "itex" and model_type == 'keras': - return MODELS['keras'](root, **kwargs) - model = MODELS['tensorflow'](model_type, root, **kwargs) + if 'modelType' in kwargs: + model_type = kwargs['modelType'] + else: + model_type = get_model_type(root) + if conf.backend == "itex": + if model_type == 'keras': + conf.framework = "keras" + model = MODELS[conf.framework](root, **kwargs) + else: + conf.framework = "tensorflow_itex" + model = MODELS[conf.framework](model_type, root, **kwargs) + else: + model = MODELS['tensorflow'](model_type, root, **kwargs) else: model = MODELS[conf.framework](root, **kwargs) if 'tensorflow' in conf.framework: diff --git a/neural_compressor/training.py b/neural_compressor/training.py index 526c8a23a0b..4204955b012 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -166,7 +166,7 @@ def save(self, root=None): Args: root (str): path to save the model """ - self.model.save(root) + self.model.save(root) # pylint: disable=no-member def export( self, @@ -179,7 +179,7 @@ def export( save_path (str): The path to save the model conf (Union[Callable, List]) : The configure for onnx exportation. """ - self.model.export(save_path, conf) + self.model.export(save_path, conf) # pylint: disable=no-member def fit(compression_manager, @@ -287,7 +287,8 @@ def eval_func(model): if not (compression_manager.conf.quantization.framework.startswith("tensorflow") or compression_manager.conf.quantization.framework == 'pytorch_fx'): # pragma: no cover strategy_name = "basic" - logger.warning(f"MSE_v2 does not support {compression_manager.conf.quantization.framework} now, use basic instead.") + logger.warning(f"MSE_v2 does not support {compression_manager.conf.quantization.framework} now," + "use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") assert strategy_name in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy_name) From 498180987d6210d96e35292577ea43ab60b1cef4 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Fri, 5 May 2023 17:54:04 +0800 Subject: [PATCH 06/14] Fixed UT error and update docs Signed-off-by: Cheng, Penghui --- docs/source/benchmark.md | 2 +- docs/source/mixed_precision.md | 8 ++-- examples/helloworld/tf_example5/README.md | 2 +- .../quantization/ptq/main.py | 2 +- .../inception_v3/quantization/ptq/main.py | 2 +- .../mobilenet_v2/quantization/ptq/main.py | 2 +- .../resnet101/quantization/ptq/main.py | 2 +- .../resnet50/quantization/ptq/main.py | 2 +- .../resnet50_fashion/quantization/ptq/main.py | 2 +- .../resnetv2_101/quantization/ptq/main.py | 2 +- .../resnetv2_50/quantization/ptq/main.py | 2 +- .../vgg16/quantization/ptq/main.py | 2 +- .../vgg19/quantization/ptq/main.py | 2 +- .../xception/quantization/ptq/main.py | 2 +- .../quantization/ptq_static/main.py | 2 +- .../gpt2/quantization/ptq_dynamic/gpt2.py | 2 +- .../quantization/run_diffusion.py | 2 +- .../quantization/ptq/fx/python/main.py | 4 +- .../quantization/ptq/ipex/infer.py | 2 +- .../quantization/qat/fx/ssd/main.py | 2 +- .../quantization/ptq/main.py | 2 +- .../mobilenet_v1/quantization/ptq/main.py | 2 +- .../mobilenet_v2/quantization/ptq/main.py | 2 +- .../resnet_v2/quantization/qat/main.py | 2 +- .../adaptor/tf_utils/graph_converter.py | 23 ++++++----- .../graph_rewriter/generic/pre_optimize.py | 2 +- neural_compressor/benchmark.py | 24 +++++------ neural_compressor/config.py | 4 +- neural_compressor/mix_precision.py | 40 ++++++++++++++----- neural_compressor/model/model.py | 2 +- neural_compressor/training.py | 1 + test/benchmark/test_benchmark_2.x.py | 2 +- .../test_mixed_precision_keras_model.py | 2 +- 33 files changed, 89 insertions(+), 67 deletions(-) diff --git a/docs/source/benchmark.md b/docs/source/benchmark.md index 0ab89bc3548..4b660f3ac95 100644 --- a/docs/source/benchmark.md +++ b/docs/source/benchmark.md @@ -49,7 +49,7 @@ And please make sure `cores_per_instance * num_of_instance` must be less than CP from neural_compressor.config import BenchmarkConfig from neural_compressor.benchmark import fit conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7) -fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader) +fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader) ``` ## Examples diff --git a/docs/source/mixed_precision.md b/docs/source/mixed_precision.md index 86fb3986810..e288890d745 100644 --- a/docs/source/mixed_precision.md +++ b/docs/source/mixed_precision.md @@ -42,8 +42,8 @@ Supported precisions for mix precision include bf16 and fp16. If users want to g from neural_compressor import mix_precision from neural_compressor.config import MixedPrecisionConfig -conf = MixedPrecisionConfig(precision='bf16') -converted_model = mix_precision.fit(model, config=conf) +conf = MixedPrecisionConfig(precisions='bf16') +converted_model = mix_precision.fit(model, conf=conf) converted_model.save('./path/to/save/') ``` @@ -56,8 +56,8 @@ from neural_compressor.config import MixedPrecisionConfig conf = MixedPrecisionConfig( backend='onnxrt_cuda_ep', device='gpu', - precision='fp16') -converted_model = mix_precision.fit(model, config=conf) + precisions='fp16') +converted_model = mix_precision.fit(model, conf=conf) converted_model.save('./path/to/save/') ``` diff --git a/examples/helloworld/tf_example5/README.md b/examples/helloworld/tf_example5/README.md index 399ebd8bfb8..e90509222f9 100644 --- a/examples/helloworld/tf_example5/README.md +++ b/examples/helloworld/tf_example5/README.md @@ -52,7 +52,7 @@ python test.py --benchmark --dataset_location=/path/to/imagenet/ ```python from neural_compressor.benchmark import fit conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=1) - fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader) + fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader) ``` diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py index d707e88fffd..1e742e36cb6 100644 --- a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py @@ -139,7 +139,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model).model) + accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py index 67391cab8b8..a27de78af52 100644 --- a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py +++ b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py @@ -139,7 +139,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model).model) + accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py index e6d77c3a130..e5bbc475f24 100644 --- a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py @@ -138,7 +138,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model).model) + accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py index b391fa67e09..dbad4085197 100644 --- a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py @@ -145,7 +145,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model).model) + accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py index 4f9f09b4085..fba7c004931 100644 --- a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py @@ -130,7 +130,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model import Model - model = Model(FLAGS.input_model).model + model = Model(FLAGS.input_model, backend='itex').model accuracy = evaluate(model) print('Batch size = %d' % FLAGS.batch_size) print("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py index 2f6d238173b..19a5c125f03 100644 --- a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py @@ -128,7 +128,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model import Model - model = Model(FLAGS.input_model).model + model = Model(FLAGS.input_model, backend='itex').model accuracy = evaluate(model) print('Batch size = %d' % FLAGS.batch_size) print("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py index e6d77c3a130..e5bbc475f24 100644 --- a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py @@ -138,7 +138,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model).model) + accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py index e6d77c3a130..e5bbc475f24 100644 --- a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py @@ -138,7 +138,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model).model) + accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py index 224be9a5d54..1396533ff13 100644 --- a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py +++ b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py @@ -130,7 +130,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model import Model - model = Model(FLAGS.input_model).model + model = Model(FLAGS.input_model, backend='itex').model accuracy = evaluate(model) print('Batch size = %d' % FLAGS.batch_size) print("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py index 4f9f09b4085..fba7c004931 100644 --- a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py +++ b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py @@ -130,7 +130,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model import Model - model = Model(FLAGS.input_model).model + model = Model(FLAGS.input_model, backend='itex').model accuracy = evaluate(model) print('Batch size = %d' % FLAGS.batch_size) print("Accuracy: %.5f" % accuracy) diff --git a/examples/keras/image_recognition/xception/quantization/ptq/main.py b/examples/keras/image_recognition/xception/quantization/ptq/main.py index 6d0b3e2ac42..359d7e083c9 100644 --- a/examples/keras/image_recognition/xception/quantization/ptq/main.py +++ b/examples/keras/image_recognition/xception/quantization/ptq/main.py @@ -138,7 +138,7 @@ def main(_): fit(FLAGS.input_model, conf, b_func=evaluate) else: from neural_compressor.model.model import Model - accuracy = evaluate(Model(FLAGS.input_model).model) + accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py index 9b5674a50d0..e0632a8631f 100644 --- a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py @@ -414,7 +414,7 @@ def eval_func(model): config = PostTrainingQuantConfig(approach="static", quant_format=args.quant_format, recipes={"optypes_to_exclude_output_quant": ["MatMul"]}) - q_model = quantization.fit(model, + q_model = quantization.fit(model, config, eval_func=eval_func, calib_dataloader=dataloader) diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py index f861724dea8..1d05d6955c4 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py +++ b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py @@ -272,7 +272,7 @@ def eval_func(model): accuracy_criterion.relative = 0.11 config = PostTrainingQuantConfig(approach='dynamic', accuracy_criterion=accuracy_criterion) - q_model = quantization.fit(model, + q_model = quantization.fit(model, config, eval_func=eval_func) q_model.save(args.output_model) diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py b/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py index 7392ac05b47..1dd38d7a115 100644 --- a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py +++ b/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py @@ -322,7 +322,7 @@ def b_func(model): from neural_compressor.config import BenchmarkConfig b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1) - fit(model, config=b_conf, b_func=b_func) + fit(model, conf=b_conf, b_func=b_func) if args.accuracy_only: setattr(pipe, "unet", model) accuracy(pipe, generator, _rows, args) diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py index cdd072c3c27..53c084edd62 100644 --- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py +++ b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py @@ -623,7 +623,7 @@ def benchmark_func(model): from neural_compressor.config import BenchmarkConfig from neural_compressor import benchmark b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1) - benchmark.fit(int8_model, config=b_conf, b_func=benchmark_func) + benchmark.fit(int8_model, conf=b_conf, b_func=benchmark_func) else: if args.accuracy: eval_func(raw_model) @@ -631,7 +631,7 @@ def benchmark_func(model): from neural_compressor.config import BenchmarkConfig from neural_compressor import benchmark b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1) - benchmark.fit(raw_model, config=b_conf, b_func=benchmark_func) + benchmark.fit(raw_model, conf=b_conf, b_func=benchmark_func) runner.finish() lg.DestroyQSL(qsl) diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py index 312b7299d5d..3d302738143 100644 --- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py +++ b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py @@ -658,7 +658,7 @@ def coco_eval(model): from neural_compressor.config import BenchmarkConfig from neural_compressor import benchmark b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1) - benchmark.fit(ssd_r34, config=b_conf, b_func=coco_eval) + benchmark.fit(ssd_r34, conf=b_conf, b_func=coco_eval) return diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py b/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py index 6b04cca6f12..ffa33d433ae 100644 --- a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py +++ b/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py @@ -424,7 +424,7 @@ def training_func_for_nc(model, dataloader=None): from neural_compressor.config import BenchmarkConfig from neural_compressor import benchmark b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1) - benchmark.fit(new_model, config=b_conf, b_func=eval_func) + benchmark.fit(new_model, conf=b_conf, b_func=eval_func) return return False diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py index 1dcf3a00757..1da7dc46c4c 100644 --- a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py @@ -127,7 +127,7 @@ def run(self): from neural_compressor.config import BenchmarkConfig if args.mode == 'performance': conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1) - fit(model=args.input_graph, config=conf, b_func=evaluate) + fit(model=args.input_graph, conf=conf, b_func=evaluate) else: from neural_compressor.model import Model model = Model(args.input_graph).model diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py index b95d854f0bd..65617b896b5 100644 --- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py @@ -125,7 +125,7 @@ def run(self): 'filter': None } eval_dataloader = create_dataloader('tensorflow', dataloader_args) - fit(model=args.input_graph, config=conf, b_dataloader=eval_dataloader) + fit(model=args.input_graph, conf=conf, b_dataloader=eval_dataloader) else: from neural_compressor.model import Model model = Model(args.input_graph).model diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py index b95d854f0bd..65617b896b5 100644 --- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py @@ -125,7 +125,7 @@ def run(self): 'filter': None } eval_dataloader = create_dataloader('tensorflow', dataloader_args) - fit(model=args.input_graph, config=conf, b_dataloader=eval_dataloader) + fit(model=args.input_graph, conf=conf, b_dataloader=eval_dataloader) else: from neural_compressor.model import Model model = Model(args.input_graph).model diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py index 3c7e19a6da4..b2dc72c70ea 100644 --- a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py +++ b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py @@ -335,7 +335,7 @@ def evaluate(model): Args: model (tensorflow.Graph_def): The input model graph - + Returns: accuracy (float): evaluation result, the larger is better. """ diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py index 18b516ed6cd..309b4828a19 100644 --- a/neural_compressor/adaptor/tf_utils/graph_converter.py +++ b/neural_compressor/adaptor/tf_utils/graph_converter.py @@ -126,7 +126,7 @@ def __init__(self, self._check_tf_version() self._check_args() - self._fp32_model = Model(self.model._model, **self.model.kwargs) + self._fp32_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") self._fp32_model.graph_def = self.model.graph_def self._fp32_model.output_tensor_names = self.output_tensor_names self._fp32_model.input_tensor_names = self.input_tensor_names @@ -145,7 +145,7 @@ def __init__(self, self.scale_info.update({'bf16_ops': self.bf16_ops}) self.scale_info.update({'fp32_ops': self.fp32_ops}) - self._sampling_model = Model(self.model._model, **self.model.kwargs) + self._sampling_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") self._sampling_model.output_tensor_names = self.output_tensor_names self._sampling_model.input_tensor_names = self.input_tensor_names @@ -154,7 +154,7 @@ def __init__(self, self._tmp_graph_def = self.model.graph_def else: self._tmp_graph_def = copy.deepcopy(self.model.graph_def) - self.new_api = new_api #bool(version1_gte_version2(tf.version.VERSION, '2.8.0')) + self.new_api = new_api # bool(version1_gte_version2(tf.version.VERSION, '2.8.0')) self.use_bf16 = use_bf16 self.exclude_node_names = [] @@ -231,7 +231,7 @@ def check_shape(tensor, data): disorder_tensors = [] disorder_inputs = [] for idx, sort_tensor in enumerate(input_tensor): - sort_input = inputs[idx] + sort_input = inputs[idx] if check_shape(sort_tensor, sort_input): feed_dict.update({sort_tensor: sort_input}) else: @@ -326,7 +326,8 @@ def _gen_tmp_filenames(self): self._tmp_model = self._fp32_model else: # to keep temp model - self._tmp_model = Model(self.model._model, **self.model.kwargs) + self._tmp_model = Model(self.model._model, **self.model.kwargs, + backend="itex" if self.itex_mode else "default") self._tmp_model.graph_def = self.model.graph_def self._tmp_model.output_tensor_names = self.output_tensor_names self._tmp_model.input_tensor_names = self.input_tensor_names @@ -495,7 +496,7 @@ def quantize(self): output_tensor_names = copy.deepcopy(self.model.output_tensor_names) sampling_graph_def = copy.deepcopy(self._fp32_model.graph_def) - # TODO: this is a workaround to make Min/Max node be completly eliminated in int8 graph + # TODO: this is a workaround to make Min/Max node be completly eliminated in int8 graph # after enabling pad+conv2d in new API. non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops)))) sampling_graph_def = FusePadWithFP32Conv2DOptimizer( @@ -602,7 +603,7 @@ def _generate_calibration_data(self, tmp_path, output_data, enable_kl_algo=False logger.debug("Generate calibration data and save to {}.".format(tmp_dump_file)) - model = Model(tmp_path, **self._tmp_model.kwargs) + model = Model(tmp_path, **self._tmp_model.kwargs, backend="itex" if self.itex_mode else "default") model.output_tensor_names = self.output_tensor_names model.input_tensor_names = self.input_tensor_names @@ -668,7 +669,7 @@ def _fuse_requantize_with_fused_quantized_node(self): if self.qdq_enabled: self._tmp_graph_def = FuseMatMulRequantizeNewAPITransformer( self._tmp_graph_def).do_transformation() - + self._tmp_graph_def = FuseMatMulRequantizeDequantizeNewAPITransformer( self._tmp_graph_def).do_transformation() else: @@ -677,7 +678,7 @@ def _fuse_requantize_with_fused_quantized_node(self): self._tmp_graph_def = FuseMatMulRequantizeDequantizeTransformer( self._tmp_graph_def).do_transformation() - + self._tmp_graph_def = StripUnusedNodesOptimizer( self._tmp_graph_def, self._tmp_model.input_node_names, @@ -751,7 +752,7 @@ def quantize_with_qdq_pattern(self): def _insert_qdq_pairs(self): """Insert QDQ pairs before Conv/MatMul/Pooling Ops.""" - # Fuse Pad into Conv2D, Conv3D, DepthwiseConv2dNative + # Fuse Pad into Conv2D, Conv3D, DepthwiseConv2dNative non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops)))) self._tmp_graph_def = FusePadWithConv2DOptimizer( self._tmp_graph_def, @@ -828,7 +829,7 @@ def _insert_qdq_pairs(self): # Insert QDQ pattern self._tmp_graph_def = GenerateGraphWithQDQPattern( self._tmp_graph_def, self._calibration_data, self.op_wise_config, - self.fake_quant, self.fp32_ops, self.bf16_ops, self.quantized_node_info, + self.fake_quant, self.fp32_ops, self.bf16_ops, self.quantized_node_info, self.device, self.performance_only, self.itex_mode).do_transformation() def _convert_qdq(self): diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py index 4ef09bba613..5ed17ad42e1 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py @@ -108,7 +108,7 @@ def get_optimized_model(self, itex_mode=False): """ from neural_compressor.model import Model - origin_model = Model(self.model._model, **self.model.kwargs) + origin_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") origin_model.name = self.model.name origin_model.model_type = self.model.model_type origin_model.output_tensor_names = self.model.output_tensor_names diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 7d228ecd706..e21e5db90b5 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -352,7 +352,7 @@ def summary_benchmark(): pass -def benchmark_with_raw_cmd(raw_cmd, config=None): +def benchmark_with_raw_cmd(raw_cmd, conf=None): """Benchmark the model performance with the raw commend. Args: @@ -369,23 +369,23 @@ def benchmark_with_raw_cmd(raw_cmd, config=None): conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) fit_with_raw_cmd("test.py", conf) """ - if config is not None: - if config.backend == "ipex": + if conf is not None: + if conf.backend == "ipex": import intel_extension_for_pytorch assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' # disable multi-instance for running bechmark on GPU device - set_all_env_var(config) + set_all_env_var(conf) config_instance(raw_cmd) summary_benchmark() -def fit(model, config, b_dataloader=None, b_func=None): +def fit(model, conf, b_dataloader=None, b_func=None): """Benchmark the model performance with the configure. Args: model (object): The model to be benchmarked. - config (BenchmarkConfig): The configuration for benchmark containing accuracy goal, + conf (BenchmarkConfig): The configuration for benchmark containing accuracy goal, tuning objective and preferred calibration & quantization tuning space etc. b_dataloader: The dataloader for frameworks. @@ -398,24 +398,24 @@ def fit(model, config, b_dataloader=None, b_func=None): from neural_compressor.benchmark import fit conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader) + fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader) """ - if config.backend == "ipex": + if conf.backend == "ipex": import intel_extension_for_pytorch - wrapped_model = Model(model, conf=config) + wrapped_model = Model(model, conf=conf) if b_dataloader is not None: check_dataloader(b_dataloader) assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' # disable multi-instance for running bechmark on GPU device - set_all_env_var(config) - if config.device == 'gpu': + set_all_env_var(conf) + if conf.device == 'gpu': set_env_var('NC_ENV_CONF', True, overwrite_existing=True) logger.info("Start to run Benchmark.") if os.environ.get('NC_ENV_CONF') == 'True': - return run_instance(model=wrapped_model, conf=config, b_dataloader=b_dataloader, b_func=b_func) + return run_instance(model=wrapped_model, conf=conf, b_dataloader=b_dataloader, b_func=b_func) raw_cmd = sys.executable + ' ' + ' '.join(sys.argv) benchmark_with_raw_cmd(raw_cmd) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 88cf8a3c6ad..47696c70b2a 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -247,7 +247,7 @@ class BenchmarkConfig: from neural_compressor.benchmark import fit conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) - fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader) + fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader) """ def __init__(self, inputs=[], @@ -1662,7 +1662,7 @@ class MixedPrecisionConfig(object): from neural_compressor.config import MixedPrecisionConfig conf = MixedPrecisionConfig() - converted_model = mix_precision.fit(model, config=conf) + converted_model = mix_precision.fit(model, conf=conf) """ def __init__(self, device="cpu", diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 85dc4b1fbfa..386347f5943 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -23,7 +23,7 @@ from neural_compressor.data.dataloaders.dataloader import check_dataloader from neural_compressor.metric.metric import register_customer_metric -from .utils.utility import time_limit +from .utils.utility import time_limit, CpuInfo from .strategy import STRATEGIES from .config import _Config, options from .utils import logger @@ -31,7 +31,7 @@ def fit(model, - config=None, + conf=None, eval_func=None, eval_dataloader=None, eval_metric=None, @@ -47,7 +47,7 @@ def fit(model, to .onnx file or onnx.onnx_ml_pb2.ModelProto. For MXNet model, it's mxnet.symbol.Symbol or gluon.HybirdBlock instance. - config (MixedPrecisionConfig): The MixedPrecisionConfig class containing accuracy goal, + conf (MixedPrecisionConfig): The MixedPrecisionConfig class containing accuracy goal, tuning objective and mixed_precision tuning space etc. eval_func (function, optional): The evaluation function provided by user. This function takes model as parameter, @@ -83,24 +83,44 @@ def fit(model, from neural_compressor.config import MixedPrecisionConfig conf = MixedPrecisionConfig() - converted_model = mix_precision.fit(model, config=conf) + converted_model = mix_precision.fit(model, conf=conf) """ if eval_dataloader is not None: check_dataloader(eval_dataloader) - if config.precisions in config.excluded_precisions: + if conf.precisions in conf.excluded_precisions: logger.warning("Target precision is in excluded_precisions, " "please modify precision or excluded_precisions to make it understandable.") sys.exit(0) - wrapped_model = Model(model, conf=config) + wrapped_model = Model(model, conf=conf) + + precisions = list(set(conf.precisions) - set(conf.excluded_precisions)) + if ('bf16' in precisions or 'fp16' in precisions) and conf.framework == "onnxruntime": # pragma: no cover + if conf.device == "cpu": + logger.warning("Mix precision exits due to device isn't gpu for onnx models.") + sys.exit(0) + elif conf.backend != "onnxrt_cuda_ep": + logger.warning("Mix precision exits due to backend isn't onnxrt_cuda_ep for onnx models.") + sys.exit(0) + elif 'bf16' in precisions and not CpuInfo().bf16 and conf.framework != "onnxruntime": # pragma: no cover + if os.getenv('FORCE_BF16') == '1': + logger.warning("Mix precision will generate bf16 graph although " + "the hardware doesn't support bf16 instruction.") + else: + logger.warning("Mix precision exits due to the hardware " + "doesn't support bf16 instruction.") + sys.exit(0) + elif 'fp16' in precisions and conf.framework != "onnxruntime": + logger.warning("Currently mix precision only supports fp16 for onnx models.") + sys.exit(0) if eval_metric is not None: - metric = register_customer_metric(eval_metric, config.framework) + metric = register_customer_metric(eval_metric, conf.framework) else: metric = None - conf = _Config(mixed_precision=config, + config = _Config(mixed_precision=conf, quantization=None, benchmark=None, pruning=None, @@ -123,7 +143,7 @@ def fit(model, strategy = STRATEGIES['automixedprecision']( model=wrapped_model, - conf=conf, + conf=config, eval_func=eval_func, eval_dataloader=eval_dataloader, eval_metric=metric, @@ -131,7 +151,7 @@ def fit(model, q_hooks=None) try: - with time_limit(config.tuning_criterion.timeout): + with time_limit(conf.tuning_criterion.timeout): strategy.traverse() except KeyboardInterrupt: pass diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index 2d898872119..5df07d38d1e 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -204,7 +204,7 @@ def __new__(cls, root, **kwargs): model_type = kwargs['modelType'] else: model_type = get_model_type(root) - if model_type == "keras" and kwargs.get("framework", None) != "tensorflow": + if model_type == "keras" and kwargs.get("backend", None) == "itex": return MODELS['keras'](root, **kwargs) else: return MODELS[framework](model_type, root, **kwargs) diff --git a/neural_compressor/training.py b/neural_compressor/training.py index 4204955b012..eb0b17971cd 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -32,6 +32,7 @@ WeightPruningConfig) from typing import Callable, List, Union + class CompressionManager: """CompressionManager is uesd in train loop for what user want to deal with additional. diff --git a/test/benchmark/test_benchmark_2.x.py b/test/benchmark/test_benchmark_2.x.py index c812059de0f..ae3651b01fa 100644 --- a/test/benchmark/test_benchmark_2.x.py +++ b/test/benchmark/test_benchmark_2.x.py @@ -197,7 +197,7 @@ def test_benchmark_data_25(self): def test_benchmark_raw_cmd(self): conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2) raw_cmd = "python fake_raw_cmd.py --input_model={}".format(self.graph_path) - benchmark_with_raw_cmd(raw_cmd, config=conf) + benchmark_with_raw_cmd(raw_cmd, conf=conf) for i in range(2): with open(f'2_4_{i}.log', "r") as f: for line in f: diff --git a/test/mixed_precision/test_mixed_precision_keras_model.py b/test/mixed_precision/test_mixed_precision_keras_model.py index 16e7cab3608..4a3950ee49e 100644 --- a/test/mixed_precision/test_mixed_precision_keras_model.py +++ b/test/mixed_precision/test_mixed_precision_keras_model.py @@ -99,7 +99,7 @@ def test_mixed_precision_with_keras_model(self): config = MixedPrecisionConfig() q_model = mix_precision.fit( model='./models/saved_model', - config=config, + conf=config, eval_dataloader=dataloader, eval_metric=MyMetric()) From 82220f22a36de5f81e4871b29074fc50382a7ab3 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Fri, 5 May 2023 18:02:24 +0800 Subject: [PATCH 07/14] Fixed docstring check error Signed-off-by: Cheng, Penghui --- neural_compressor/benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index e21e5db90b5..bf20668f7b2 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -357,7 +357,7 @@ def benchmark_with_raw_cmd(raw_cmd, conf=None): Args: raw_cmd (string): The commend to be benchmarked. - config (BenchmarkConfig): The configuration for benchmark containing accuracy goal, + conf (BenchmarkConfig): The configuration for benchmark containing accuracy goal, tuning objective and preferred calibration & quantization tuning space etc. From acfede3c53bf8389b1582aefd99cccfe3e8a78e3 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Fri, 5 May 2023 19:55:18 +0800 Subject: [PATCH 08/14] Fixed UT error Signed-off-by: Cheng, Penghui --- .../adaptor/tf_utils/graph_converter.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py index 309b4828a19..7c322ee3bce 100644 --- a/neural_compressor/adaptor/tf_utils/graph_converter.py +++ b/neural_compressor/adaptor/tf_utils/graph_converter.py @@ -126,7 +126,10 @@ def __init__(self, self._check_tf_version() self._check_args() - self._fp32_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") + if "backend" in self.model.kwargs: + self._fp32_model = Model(self.model._model, **self.model.kwargs) + else: + self._fp32_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") self._fp32_model.graph_def = self.model.graph_def self._fp32_model.output_tensor_names = self.output_tensor_names self._fp32_model.input_tensor_names = self.input_tensor_names @@ -145,7 +148,10 @@ def __init__(self, self.scale_info.update({'bf16_ops': self.bf16_ops}) self.scale_info.update({'fp32_ops': self.fp32_ops}) - self._sampling_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") + if "backend" in self.model.kwargs: + self._sampling_model = Model(self.model._model, **self.model.kwargs) + else: + self._sampling_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") self._sampling_model.output_tensor_names = self.output_tensor_names self._sampling_model.input_tensor_names = self.input_tensor_names @@ -326,8 +332,11 @@ def _gen_tmp_filenames(self): self._tmp_model = self._fp32_model else: # to keep temp model - self._tmp_model = Model(self.model._model, **self.model.kwargs, - backend="itex" if self.itex_mode else "default") + if "backend" in self.model.kwargs: + self._tmp_model = Model(self.model._model, **self.model.kwargs) + else: + self._tmp_model = Model(self.model._model, **self.model.kwargs, + backend="itex" if self.itex_mode else "default") self._tmp_model.graph_def = self.model.graph_def self._tmp_model.output_tensor_names = self.output_tensor_names self._tmp_model.input_tensor_names = self.input_tensor_names @@ -484,7 +493,7 @@ def quantize(self): self._quantize_graph() self.quantized_node_info = [tuple(i) for i in self.quantized_node_info] - if self.fake_quant: # pragma: no cover + if self.fake_quant: # pragma: no cover self._fuse_requantize_with_fused_quantized_node() else: if self._enable_kl_op_names: @@ -603,7 +612,10 @@ def _generate_calibration_data(self, tmp_path, output_data, enable_kl_algo=False logger.debug("Generate calibration data and save to {}.".format(tmp_dump_file)) - model = Model(tmp_path, **self._tmp_model.kwargs, backend="itex" if self.itex_mode else "default") + if "backend" in self._tmp_model.kwargs: + model = Model(tmp_path, **self._tmp_model.kwargs) + else: + model = Model(tmp_path, **self._tmp_model.kwargs, backend="itex" if self.itex_mode else "default") model.output_tensor_names = self.output_tensor_names model.input_tensor_names = self.input_tensor_names From f6a0cffc6e296d018cb33dd2bc1576ce0f5299f3 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Fri, 5 May 2023 21:07:42 +0800 Subject: [PATCH 09/14] Fixed pylink error Signed-off-by: Cheng, Penghui --- neural_compressor/adaptor/tf_utils/graph_converter.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py index 7c322ee3bce..62c4657905b 100644 --- a/neural_compressor/adaptor/tf_utils/graph_converter.py +++ b/neural_compressor/adaptor/tf_utils/graph_converter.py @@ -129,7 +129,9 @@ def __init__(self, if "backend" in self.model.kwargs: self._fp32_model = Model(self.model._model, **self.model.kwargs) else: - self._fp32_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") + self._fp32_model = Model(self.model._model, + **self.model.kwargs, + backend="itex" if itex_mode else "default") self._fp32_model.graph_def = self.model.graph_def self._fp32_model.output_tensor_names = self.output_tensor_names self._fp32_model.input_tensor_names = self.input_tensor_names @@ -151,7 +153,9 @@ def __init__(self, if "backend" in self.model.kwargs: self._sampling_model = Model(self.model._model, **self.model.kwargs) else: - self._sampling_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default") + self._sampling_model = Model(self.model._model, + **self.model.kwargs, + backend="itex" if itex_mode else "default") self._sampling_model.output_tensor_names = self.output_tensor_names self._sampling_model.input_tensor_names = self.input_tensor_names @@ -335,7 +339,8 @@ def _gen_tmp_filenames(self): if "backend" in self.model.kwargs: self._tmp_model = Model(self.model._model, **self.model.kwargs) else: - self._tmp_model = Model(self.model._model, **self.model.kwargs, + self._tmp_model = Model(self.model._model, + **self.model.kwargs, backend="itex" if self.itex_mode else "default") self._tmp_model.graph_def = self.model.graph_def self._tmp_model.output_tensor_names = self.output_tensor_names From a3ac1e4c76d6feae9bb5f113d2022769f7fdc42c Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Mon, 8 May 2023 10:29:56 +0800 Subject: [PATCH 10/14] Update code style Signed-off-by: Cheng, Penghui --- neural_compressor/benchmark.py | 2 +- neural_compressor/data/__init__.py | 2 ++ neural_compressor/metric/__init__.py | 5 +++-- neural_compressor/mix_precision.py | 6 +++--- neural_compressor/quantization.py | 4 ++-- neural_compressor/training.py | 2 +- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index bf20668f7b2..aa64f23559d 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -25,7 +25,7 @@ import psutil from threading import Thread -from neural_compressor.data.dataloaders.dataloader import check_dataloader +from neural_compressor.data import check_dataloader from .adaptor import FRAMEWORKS from .objective import MultiObjective from .config import BenchmarkConfig, options diff --git a/neural_compressor/data/__init__.py b/neural_compressor/data/__init__.py index 11a743ec345..4aaaa57c0e5 100644 --- a/neural_compressor/data/__init__.py +++ b/neural_compressor/data/__init__.py @@ -22,6 +22,7 @@ import neural_compressor.data.transforms from .datasets import Datasets, Dataset, IterableDataset, dataset_registry, TensorflowImageRecord, COCORecordDataset from .dataloaders import DATALOADERS, DataLoader +from .dataloaders.dataloader import check_dataloader from .dataloaders.default_dataloader import DefaultDataLoader from .transforms import TRANSFORMS, BaseTransform, ComposeTransform, transform_registry, Postprocess from .transforms import LabelShift, BilinearImagenetTransform, TensorflowResizeCropImagenetTransform @@ -32,6 +33,7 @@ from .filters import FILTERS, Filter, filter_registry, LabelBalanceCOCORecordFilter __all__ = [ + "check_dataloader", "DataLoader", "DATALOADERS", "DefaultDataLoader", diff --git a/neural_compressor/metric/__init__.py b/neural_compressor/metric/__init__.py index 04d859d81fe..ab53b731065 100644 --- a/neural_compressor/metric/__init__.py +++ b/neural_compressor/metric/__init__.py @@ -18,7 +18,8 @@ """Intel Neural Compressor Metric.""" -from .metric import METRICS, Metric, BaseMetric, TensorflowTopK, metric_registry, COCOmAPv2, SquadF1, GeneralTopK +from .metric import (METRICS, Metric, BaseMetric, TensorflowTopK, metric_registry, COCOmAPv2, SquadF1, GeneralTopK, + register_customer_metric) from os.path import dirname, basename, isfile, join import glob @@ -30,4 +31,4 @@ __all__ = ["METRICS", "Metric", "BaseMetric", "TensorflowTopK", "metric_registry", - "COCOmAPv2", "SquadF1", "GeneralTopK"] + "COCOmAPv2", "SquadF1", "GeneralTopK", "register_customer_metric"] diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 386347f5943..1e16b615252 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -21,8 +21,8 @@ import numpy as np import random -from neural_compressor.data.dataloaders.dataloader import check_dataloader -from neural_compressor.metric.metric import register_customer_metric +from neural_compressor.data import check_dataloader +from neural_compressor.metric import register_customer_metric from .utils.utility import time_limit, CpuInfo from .strategy import STRATEGIES from .config import _Config, options @@ -31,7 +31,7 @@ def fit(model, - conf=None, + conf, eval_func=None, eval_dataloader=None, eval_metric=None, diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index e30708a3eca..d9ee2f6c953 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -21,8 +21,8 @@ import random import numpy as np from .config import _Config, options -from .data.dataloaders.dataloader import check_dataloader -from .metric.metric import register_customer_metric +from .data import check_dataloader +from .metric import register_customer_metric from .model import Model from .strategy import STRATEGIES from .utils import logger diff --git a/neural_compressor/training.py b/neural_compressor/training.py index eb0b17971cd..1ed7549eb27 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -23,7 +23,7 @@ from .adaptor import FRAMEWORKS from .compression.callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks from .config import _Config, options -from .metric.metric import register_customer_metric +from .metric import register_customer_metric from .model.model import Model from .utils import logger from .utils.utility import time_limit From 5a11a289b40d600c3ab26fdda324755037d5e187 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Mon, 8 May 2023 10:36:55 +0800 Subject: [PATCH 11/14] Update docs Signed-off-by: Cheng, Penghui --- docs/source/mixed_precision.md | 2 +- neural_compressor/quantization.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/source/mixed_precision.md b/docs/source/mixed_precision.md index e288890d745..fa134e6c0b9 100644 --- a/docs/source/mixed_precision.md +++ b/docs/source/mixed_precision.md @@ -42,7 +42,7 @@ Supported precisions for mix precision include bf16 and fp16. If users want to g from neural_compressor import mix_precision from neural_compressor.config import MixedPrecisionConfig -conf = MixedPrecisionConfig(precisions='bf16') +conf = MixedPrecisionConfig() # default precision is bf16 converted_model = mix_precision.fit(model, conf=conf) converted_model.save('./path/to/save/') ``` diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index d9ee2f6c953..e03c8cb43e9 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -61,9 +61,7 @@ def fit(model, calib_func (function, optional): Calibration function for post-training static quantization. It is optional. This function takes "model" as input parameter - and executes entire inference process. If this - parameter specified, calib_dataloader is also needed - for FX trace if PyTorch >= 1.13. + and executes entire inference process. eval_func (function, optional): The evaluation function provided by user. This function takes model as parameter, and evaluation dataset and metrics should be From d54c6510ea35c084c79a093fd0df2cc36bf468a8 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Mon, 8 May 2023 12:15:00 +0800 Subject: [PATCH 12/14] Fixed import error Signed-off-by: Cheng, Penghui --- docs/source/pruning.md | 3 ++- .../text-classification/pruning/eager/run_glue_no_trainer.py | 2 +- .../pruning/eager/run_glue_no_trainer_mixed.py | 3 ++- neural_compressor/__init__.py | 1 - neural_compressor/compression/__init__.py | 2 +- neural_compressor/compression/pruner/README.md | 3 ++- test/pruning_2_plus.x/test_pruning.py | 3 ++- test/pruning_2_plus.x/test_pruning_block.py | 3 ++- 8 files changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/source/pruning.md b/docs/source/pruning.md index 1b7f064ca77..21e5b34663a 100644 --- a/docs/source/pruning.md +++ b/docs/source/pruning.md @@ -301,7 +301,8 @@ The following section exemplifies how to use hooks in user pass-in training func [**Experimental option** ]Modify model and optimizer. ```python - from neural_compressor import prepare_pruning, WeightPruningConfig + from neural_compressor import WeightPruningConfig + from neural_compressor.experimental.compression import prepare_pruning config = WeightPruningConfig(configs) prepare_pruning(config, model, optimizer) # modify model and optimizer for epoch in range(num_train_epochs): diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py index 0fd9c1ad61d..ea566c5e3e3 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py @@ -523,7 +523,7 @@ def preprocess_function(examples): # pruner = Pruning(config) # pruner.model = model # pruner.on_train_begin() - from neural_compressor import prepare_pruning + from neural_compressor.experimental.compression import prepare_pruning prepare_pruning(configs, model, optimizer) diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py index 9860d7b4e66..784a46ed698 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py @@ -46,7 +46,8 @@ ) from transformers.file_utils import get_full_repo_name from transformers.utils.versions import require_version -from neural_compressor import prepare_pruning, WeightPruningConfig +from neural_compressor import WeightPruningConfig +from neural_compressor.experimental.compression import prepare_pruning logger = logging.getLogger(__name__) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index ba722b9db2c..2cf56182e99 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -18,7 +18,6 @@ """IntelĀ® Neural Compressor: An open-source Python library supporting popular model compression techniques.""" from .version import __version__ # we need to set a global 'NA' backend, or Model can't be used -from .experimental.compression import prepare_pruning from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig diff --git a/neural_compressor/compression/__init__.py b/neural_compressor/compression/__init__.py index b9b85c89c7e..71e6a06cd29 100644 --- a/neural_compressor/compression/__init__.py +++ b/neural_compressor/compression/__init__.py @@ -15,4 +15,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks +from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks \ No newline at end of file diff --git a/neural_compressor/compression/pruner/README.md b/neural_compressor/compression/pruner/README.md index f8b0cfaa925..6fb3738ec51 100644 --- a/neural_compressor/compression/pruner/README.md +++ b/neural_compressor/compression/pruner/README.md @@ -301,7 +301,8 @@ The following section exemplifies how to use hooks in user pass-in training func [**Experimental option** ]Modify model and optimizer. ```python - from neural_compressor import prepare_pruning, WeightPruningConfig + from neural_compressor import WeightPruningConfig + from neural_compressor.experimental.compression import prepare_pruning config = WeightPruningConfig(configs) prepare_pruning(config, model, optimizer) # modify model and optimizer for epoch in range(num_train_epochs): diff --git a/test/pruning_2_plus.x/test_pruning.py b/test/pruning_2_plus.x/test_pruning.py index 396d2f63b78..9a3df39a62c 100644 --- a/test/pruning_2_plus.x/test_pruning.py +++ b/test/pruning_2_plus.x/test_pruning.py @@ -8,7 +8,8 @@ sys.path.insert(0, './') from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import prepare_pruning, WeightPruningConfig +from neural_compressor import WeightPruningConfig +from neural_compressor.experimental.compression import prepare_pruning class TestPruning(unittest.TestCase): diff --git a/test/pruning_2_plus.x/test_pruning_block.py b/test/pruning_2_plus.x/test_pruning_block.py index e4b10945755..14a59dd39e4 100644 --- a/test/pruning_2_plus.x/test_pruning_block.py +++ b/test/pruning_2_plus.x/test_pruning_block.py @@ -7,7 +7,8 @@ sys.path.insert(0, './') from neural_compressor.data import Datasets from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor import prepare_pruning, WeightPruningConfig +from neural_compressor import WeightPruningConfig +from neural_compressor.experimental.compression import prepare_pruning class TestPruning(unittest.TestCase): From 7daaabe5c80c31060edebe26e033372439aebef9 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Mon, 8 May 2023 21:18:34 +0800 Subject: [PATCH 13/14] alias parameters with old name for benchmark and mixed_precision Signed-off-by: Cheng, Penghui --- .../quantization/ptq/main.py | 2 +- .../inception_v3/quantization/ptq/main.py | 2 +- .../mobilenet_v2/quantization/ptq/main.py | 2 +- .../resnet101/quantization/ptq/main.py | 2 +- .../resnet50/quantization/ptq/main.py | 2 +- .../resnet50_fashion/quantization/ptq/main.py | 2 +- .../resnetv2_101/quantization/ptq/main.py | 2 +- .../resnetv2_50/quantization/ptq/main.py | 2 +- .../vgg16/quantization/ptq/main.py | 2 +- .../vgg19/quantization/ptq/main.py | 2 +- .../xception/quantization/ptq/main.py | 2 +- .../quantization/ptq/main.py | 2 +- .../mobilenet_v1/quantization/ptq/main.py | 2 +- .../mobilenet_v2/quantization/ptq/main.py | 2 +- .../densenet121/quantization/ptq/main.py | 2 +- .../densenet161/quantization/ptq/main.py | 2 +- .../densenet169/quantization/ptq/main.py | 2 +- .../efficientnet-b0/quantization/ptq/main.py | 2 +- .../quantization/ptq/main.py | 2 +- .../inception_v1/quantization/ptq/main.py | 2 +- .../inception_v2/quantization/ptq/main.py | 2 +- .../inception_v3/quantization/ptq/main.py | 2 +- .../inception_v4/quantization/ptq/main.py | 2 +- .../mobilenet_v1/quantization/ptq/main.py | 2 +- .../mobilenet_v2/quantization/ptq/main.py | 2 +- .../mobilenet_v3/quantization/ptq/main.py | 2 +- .../resnet101/quantization/ptq/main.py | 2 +- .../resnet50_v1/quantization/ptq/main.py | 2 +- .../resnet50_v1_5/quantization/ptq/main.py | 2 +- .../resnet_v2_101/quantization/ptq/main.py | 2 +- .../resnet_v2_152/quantization/ptq/main.py | 2 +- .../resnet_v2_50/quantization/ptq/main.py | 2 +- .../vgg16/quantization/ptq/main.py | 2 +- .../vgg19/quantization/ptq/main.py | 2 +- .../quantization/ptq/tf_benchmark.py | 2 +- .../quantization/ptq/inference.py | 2 +- .../quantization/ptq/run_accuracy.py | 2 +- .../quantization/ptq/style_tune.py | 2 +- neural_compressor/benchmark.py | 4 +- neural_compressor/conf/pythonic_config.py | 4 +- neural_compressor/config.py | 4 +- neural_compressor/mix_precision.py | 7 +-- neural_compressor/utils/__init__.py | 4 +- neural_compressor/utils/utility.py | 46 ++++++++++++++----- test/itex/test_keras_in_keras_out.py | 4 +- test/itex/test_tensorflow_itex_2.x.py | 2 +- 46 files changed, 89 insertions(+), 62 deletions(-) diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py index 1e742e36cb6..0889c1e312d 100644 --- a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py @@ -119,7 +119,7 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) config = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[20, 150]) diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py index a27de78af52..82de2a2aa37 100644 --- a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py +++ b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py @@ -119,7 +119,7 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) config = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py index e5bbc475f24..ab5e7f9a137 100644 --- a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py @@ -118,7 +118,7 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) config = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py index dbad4085197..884620679bd 100644 --- a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py @@ -125,7 +125,7 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9524) config = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[10, 15]) diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py index fba7c004931..9cf3d9c2cb3 100644 --- a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py @@ -110,7 +110,7 @@ def eval_func(dataloader, metric): return acc def main(_): - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if FLAGS.tune: from neural_compressor import quantization diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py index 19a5c125f03..6d163c39eba 100644 --- a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py @@ -100,7 +100,7 @@ def eval_func(data_loader, metric): return acc def main(_): - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if FLAGS.tune: from neural_compressor import quantization diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py index e5bbc475f24..ab5e7f9a137 100644 --- a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py @@ -118,7 +118,7 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) config = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py index e5bbc475f24..ab5e7f9a137 100644 --- a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py @@ -118,7 +118,7 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) config = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py index 1396533ff13..0c6694e6fac 100644 --- a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py +++ b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py @@ -110,7 +110,7 @@ def eval_func(dataloader, metric): return acc def main(_): - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if FLAGS.tune: from neural_compressor import quantization diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py index fba7c004931..9cf3d9c2cb3 100644 --- a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py +++ b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py @@ -110,7 +110,7 @@ def eval_func(dataloader, metric): return acc def main(_): - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if FLAGS.tune: from neural_compressor import quantization diff --git a/examples/keras/image_recognition/xception/quantization/ptq/main.py b/examples/keras/image_recognition/xception/quantization/ptq/main.py index 359d7e083c9..7e39fc77639 100644 --- a/examples/keras/image_recognition/xception/quantization/ptq/main.py +++ b/examples/keras/image_recognition/xception/quantization/ptq/main.py @@ -118,7 +118,7 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) config = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py index 1da7dc46c4c..8ad1b39bc7a 100644 --- a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py @@ -100,7 +100,7 @@ def eval_func(dataloader, metric): class eval_object_detection_optimized_graph(object): def run(self): - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: from neural_compressor import quantization diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py index 65617b896b5..529e0cfe763 100644 --- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py @@ -93,7 +93,7 @@ def eval_func(dataloader, metric): class eval_object_detection_optimized_graph(object): def run(self): - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: from neural_compressor import quantization diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py index 65617b896b5..529e0cfe763 100644 --- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py @@ -93,7 +93,7 @@ def eval_func(dataloader, metric): class eval_object_detection_optimized_graph(object): def run(self): - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: from neural_compressor import quantization diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py index a8e25c9aa57..89b3733c41c 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py @@ -91,7 +91,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py index eefe7238b4f..6b8dfa20d96 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py @@ -91,7 +91,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py index eefe7238b4f..6b8dfa20d96 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py @@ -91,7 +91,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py index 9d05e80e5ea..3172d34b3dc 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py @@ -91,7 +91,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py index 2155ec2a821..c007b8dc18f 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py index 51672b8872a..bc9681f70a0 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py index 481d928498f..4595b3105a0 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py index 95194af2bc0..26fc20b6465 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py index aa52c8f5779..8187b01cd84 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py index e5bb40b9382..389ed7b7c63 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py index 4fc568d5e00..53e198bc717 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py index 4789bfb80e8..fbcc385936c 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py index 48b87a7b8e4..8d534299d2b 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py @@ -89,7 +89,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py index c2db69f0702..2236af805d0 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py index fed33935aee..0bef19c92ff 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py @@ -93,7 +93,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py index 481d928498f..4595b3105a0 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py index 4fa8a56a1f7..44b48d1a0de 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py index 481d928498f..4595b3105a0 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py @@ -88,7 +88,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py index 1e3c3bcaf90..c9c03bdad8a 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py @@ -89,7 +89,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py index 1e3c3bcaf90..c9c03bdad8a 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py @@ -89,7 +89,7 @@ class eval_classifier_optimized_graph: def run(self): """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) if args.tune: diff --git a/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py b/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py index 93519f4dc49..5aefcbe03be 100644 --- a/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py +++ b/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py @@ -335,7 +335,7 @@ def __iter__(self): from neural_compressor.data import DataLoader from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) config = PostTrainingQuantConfig( diff --git a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py index 2db968beba6..a21a12b47c0 100644 --- a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py +++ b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py @@ -183,7 +183,7 @@ def auto_tune(self): """ from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed infer_graph = load_graph(self.args.input_graph) set_random_seed(9527) diff --git a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py index 5f039c4f925..b20ecfd9d6d 100644 --- a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py +++ b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py @@ -206,7 +206,7 @@ def __len__(self): from neural_compressor.data import DataLoader from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) config = PostTrainingQuantConfig(calibration_sampling_size=[40]) diff --git a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py index 018f6216cef..4c81cb05006 100644 --- a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py +++ b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py @@ -122,7 +122,7 @@ def main(args=None): tf.import_graph_def(frozen_graph, name='') from neural_compressor import quantization from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils import set_random_seed + from neural_compressor import set_random_seed set_random_seed(9527) from neural_compressor.utils.create_obj_from_config import create_dataloader diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index aa64f23559d..d0397f4ff63 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -29,8 +29,7 @@ from .adaptor import FRAMEWORKS from .objective import MultiObjective from .config import BenchmarkConfig, options -from .utils import logger -from .utils import OPTIONS +from .utils import alias_param, logger, OPTIONS from .utils.utility import GLOBAL_STATE, MODE from .model import BaseModel, Model from .utils import logger @@ -380,6 +379,7 @@ def benchmark_with_raw_cmd(raw_cmd, conf=None): summary_benchmark() +@alias_param("conf", param_alias='config') def fit(model, conf, b_dataloader=None, b_func=None): """Benchmark the model performance with the configure. diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index a2cddd25b46..ca16da078cd 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -111,12 +111,12 @@ class Options: Example:: - from neural_compressor.utils.utility import set_random_seed, set_workspace, set_resume_from, set_tensorboard + from neural_compressor import set_random_seed, set_workspace, set_resume_from, set_tensorboard set_random_seed(2022) set_workspace("workspace_path") set_resume_from("workspace_path") set_tensorboard(True) - + """ def __init__(self, random_seed=1978, workspace=default_workspace, resume_from=None, tensorboard=False): diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 47696c70b2a..5b7d6df397b 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -18,6 +18,7 @@ import datetime import logging from schema import Schema, And, Optional +from .utils import alias_param logger = logging.getLogger("neural_compressor") default_workspace = './nc_workspace/{}/'.format( @@ -162,7 +163,7 @@ class Options: Example:: - from neural_compressor.utils.utility import set_random_seed, set_workspace, set_resume_from, set_tensorboard + from neural_compressor import set_random_seed, set_workspace, set_resume_from, set_tensorboard set_random_seed(2022) set_workspace("workspace_path") set_resume_from("workspace_path") @@ -1664,6 +1665,7 @@ class MixedPrecisionConfig(object): conf = MixedPrecisionConfig() converted_model = mix_precision.fit(model, conf=conf) """ + @alias_param("precisions", param_alias="precision") def __init__(self, device="cpu", backend="default", diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 1e16b615252..057344591ab 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -23,13 +23,14 @@ from neural_compressor.data import check_dataloader from neural_compressor.metric import register_customer_metric -from .utils.utility import time_limit, CpuInfo -from .strategy import STRATEGIES from .config import _Config, options -from .utils import logger from .model import Model +from .strategy import STRATEGIES +from .utils import alias_param, logger +from .utils.utility import time_limit, CpuInfo +@alias_param("conf", param_alias='config') def fit(model, conf, eval_func=None, diff --git a/neural_compressor/utils/__init__.py b/neural_compressor/utils/__init__.py index d943c962138..032235e886f 100644 --- a/neural_compressor/utils/__init__.py +++ b/neural_compressor/utils/__init__.py @@ -20,7 +20,7 @@ from .collect_layer_histogram import LayerHistogramCollector from .logger import log, info, debug, warn, warning, error, fatal from .options import OPTIONS -from .utility import set_random_seed +from .utility import alias_param __all__ = ["LayerHistogramCollector", "log", "info", "debug", "warn", "warning", "error", "fatal", - "OPTIONS", "set_random_seed"] + "OPTIONS", "alias_param"] diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index a146eaff77b..84068bdf4a0 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -22,24 +22,26 @@ file (in yaml) and use cfg_from_file(yaml_file) to load it and override the default options. """ -import re import ast -import os -import time -import sys -import pickle +import cpuinfo import logging import importlib -from contextlib import contextmanager -from tempfile import NamedTemporaryFile -import os.path as osp -import threading, _thread -import cpuinfo +import re import numpy as np -from neural_compressor.utils import logger +import os +import os.path as osp +import pickle import prettytable as pt import psutil import subprocess +import sys +import threading +import time +import _thread +from contextlib import contextmanager +from functools import wraps +from tempfile import NamedTemporaryFile +from neural_compressor.utils import logger from enum import Enum from pkg_resources import parse_version @@ -677,3 +679,25 @@ def compare_objects(obj1, obj2, ignore_attrs): for attr in attrs1 - set(ignore_attrs): if getattr(obj1, attr) != getattr(obj2, attr): return False + + +def alias_param(param_name: str, param_alias: str): + """ + Decorator for aliasing a param in a function + + Args: + param_name: name of param in function to alias + param_alias: alias that can be used for this param + Returns: + """ + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + alias_param_value = kwargs.get(param_alias) + if alias_param_value: + kwargs[param_name] = alias_param_value + del kwargs[param_alias] + result = func(*args, **kwargs) + return result + return wrapper + return decorator diff --git a/test/itex/test_keras_in_keras_out.py b/test/itex/test_keras_in_keras_out.py index d455e3d1703..3222956e953 100644 --- a/test/itex/test_keras_in_keras_out.py +++ b/test/itex/test_keras_in_keras_out.py @@ -128,7 +128,7 @@ def test_keras_in_keras_out(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed from neural_compressor.data.dataloaders.dataloader import DataLoader set_random_seed(9527) config = PostTrainingQuantConfig(backend='itex') @@ -165,7 +165,7 @@ def test_keras_model_interface(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils.utility import set_random_seed + from neural_compressor import set_random_seed from neural_compressor.data.dataloaders.dataloader import DataLoader set_random_seed(9527) config = PostTrainingQuantConfig(backend='itex') diff --git a/test/itex/test_tensorflow_itex_2.x.py b/test/itex/test_tensorflow_itex_2.x.py index 97cbe5e50da..32c648cf496 100644 --- a/test/itex/test_tensorflow_itex_2.x.py +++ b/test/itex/test_tensorflow_itex_2.x.py @@ -7,7 +7,7 @@ from neural_compressor.data.dataloaders.dataloader import DataLoader from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig -from neural_compressor.utils.utility import set_random_seed +from neural_compressor import set_random_seed from neural_compressor.adaptor.tf_utils.util import version1_lt_version2 import tensorflow as tf From 5a159ddb6238dc45c4fb9c1296f24a99f2ac68b0 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Tue, 9 May 2023 10:59:46 +0800 Subject: [PATCH 14/14] Fixed docstring error and UT coverage error Signed-off-by: Cheng, Penghui --- neural_compressor/utils/utility.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index 84068bdf4a0..d1b1f33240a 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -682,19 +682,17 @@ def compare_objects(obj1, obj2, ignore_attrs): def alias_param(param_name: str, param_alias: str): - """ - Decorator for aliasing a param in a function + """Decorator for aliasing a param in a function. Args: - param_name: name of param in function to alias - param_alias: alias that can be used for this param - Returns: + param_name: Name of param in function to alias. + param_alias: Alias that can be used for this param. """ def decorator(func): @wraps(func) def wrapper(*args, **kwargs): alias_param_value = kwargs.get(param_alias) - if alias_param_value: + if alias_param_value: # pragma: no cover kwargs[param_name] = alias_param_value del kwargs[param_alias] result = func(*args, **kwargs)