From 14f42a89e8f87468ddf294fa50c54f4a283d8ace Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 8 Nov 2022 22:31:53 +0800 Subject: [PATCH 001/128] Feat(ST): add a interface for hawq(stage1) --- neural_compressor/adaptor/pytorch.py | 13 +++++++++++++ neural_compressor/strategy/basic.py | 7 ++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index bd2f250a216..1201bb21fa4 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -1072,7 +1072,20 @@ def is_fused_module(self, module): return True else: return False + + def calculate_op_sensitivity(self, model, dataloader, method_args): + """Compute the op sensitivity by the specific method. + + Args: + model(INC model): The fp32 model. + dataloader: The calibration dataloader. + method_args(Dict): The parameters for specifying the method. + Returns: + ops_sensitivity(Dict[tuple, float]): The key is (op_name, op_type), + the value is the sensitivity under the specified method + """ + pass unify_op_type_mapping = { "ConvReLU2d": "Conv2d", diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index c35398dd4bb..3cc4e38bde2 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -143,7 +143,12 @@ def next_tune_cfg(self): if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() - fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + #fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, + self.calib_dataloader, + method_args = {'name': 'hessian_trace'}) + fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], From e0ff7328bbd3628f15c429bb257bd5c648b79486 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 10 Nov 2022 14:22:12 +0800 Subject: [PATCH 002/128] hawq_metric.py --- neural_compressor/strategy/hawq_metric.py | 291 ++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 neural_compressor/strategy/hawq_metric.py diff --git a/neural_compressor/strategy/hawq_metric.py b/neural_compressor/strategy/hawq_metric.py new file mode 100644 index 00000000000..acbcd98d740 --- /dev/null +++ b/neural_compressor/strategy/hawq_metric.py @@ -0,0 +1,291 @@ +""" + Copyright (c) 2022 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import logging +import torch +import numpy as np +from torch.autograd import Variable +import yaml +import torchvision.transforms as transforms +import torchvision +import random +import copy +from torch.quantization import get_default_qat_qconfig, quantize_jit,get_default_qconfig +from torch.quantization.quantize_fx import prepare_fx, convert_fx,fuse_fx +from torch.quantization import default_dynamic_qconfig, float_qparams_weight_only_qconfig +import torch.quantization._numeric_suite as ns + + +def fixed_seed(seed): + """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU + Args: + seed: an integer number + return: None + """ + np.random.seed(seed) #random + random.seed(seed) + torch.manual_seed(seed) #cpu + torch.cuda.manual_seed_all(seed) #parallel cpu + torch.backends.cudnn.deterministic = True #make sure results are same on cpu/gpu + torch.backends.cudnn.benchmark = True #accelerator +def calculate_params_gradients(model): + """ + get the gradients and parameters from given model + Args: + model: FP32 model specificed + return: + params: paratmeters of model + grads: gradients of model + """ + params=[] + grads=[] + for indx,(name, parm) in zip(enumerate(model.parameters()), model.named_parameters()): + logging.info('->tensor_index:', indx[0],'-->name:', name, '-->grad_requirs:',parm.requires_grad, '-->current tensor len:',parm.shape) + if not parm.requires_grad: + continue + params.append(parm) + grads.append(0. if parm.grad is None else parm.grad+0.) + return params, grads +def calculate_inner_product(list_x,list_y): + """Compute the inner product of two lists of variables list_x,list_y + Args: + list_x: input list variables + list_y: input list variables + return: + sum of inner product + """ + return sum([torch.sum(x * y) for (x, y) in zip(list_x, list_y)]) + +def calculate_vector_product(gradsH, params, v): + """compute the hessian vector product by torch.autograd.grad. + Agrs: + gradsH: gradient at current point + params: corresponding variables + v: vector + return: + hv: hessian vector product + """ + hv=torch.autograd.grad( + gradsH, + params, + grad_outputs=v, + only_inputs=True, + retain_graph=True) + return hv +def ptq_calibrate(model, data_loader,num_cal): + """Calibrate model in post train quantization model + Args: + model: a pre_quantization model to calibrate + data_laoder: datasets + num_cal: maximization number of calibrated samples, such as images + return: + model: a calibrated model + """ + #Generate some samples to calibrate from data_loader + calibrate_samples=[] + i=0 + for inputs, targets in data_loader: + calibrate_samples.append(inputs) + i=i+1 + if i>=num_cal: + break + # model.cpu() + model.eval() + #calibration + with torch.no_grad(): + for sample in calibrate_samples: + model(sample) + return model +def calculate_perturbation(model_qnt,model_fp32)->dict: + """calculate weights quantized perturbation using L2 normal + Args: + model_qnt: quantized model + model_fp32: float model + return: + pertur_lst: dict,which contains layer_name and value + + """ + + wq_cmp_dict=ns.compare_weights(model_fp32.state_dict(), model_qnt.state_dict()) + pertur_lst=[] + for key in wq_cmp_dict: + pertur_pair={"layer_name":'',"value":0} + op_float_tensor=wq_cmp_dict[key]['float'] + op_qnt_tensor=wq_cmp_dict[key]['quantized'].dequantize() + diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) #Formula: L2=||Q(w)-w||p^2 + pertur_pair['layer_name']=key + pertur_pair['value']=diff_l2 + pertur_lst.append(pertur_pair) + return pertur_lst +class Hessian(): + """This class used to compute each layer hessian trace from given FP32 model + """ + def __init__(self,model,criterion, data=None, dataloader=None,device='cpu') -> None: + """Initial parameters + Args: + model: FP32 model specificed + criterion: loss function + data: a single batch of data, including inputs and its corresponding labels + dataloader: the data loader including bunch of batches of data + device: currently only supports cpu device + """ + #make sure we either pass a single batch or a dataloader + assert (data!=None and dataloader==None ) or (data==None and dataloader!=None) + #make mode is evaluation model + self.model=model.eval() + self.criterion=criterion + self.device=device + + if data!=None: + self.data=data + self.full_dataset=False + if not self.full_dataset: + self.inputs, self.targets=self.data + outputs=self.model(self.inputs) + loss=self.criterion(outputs,self.targets) + loss.backward(create_graph=True) + params, gradSH=calculate_params_gradients(self.model) + + self.params=params + self.gradSH=gradSH + def calculate_trace(self,max_Iter=100, tolerance=1e-3): + """Compute the hessian trace based on Hutchinson algorithm + Args: + max_Inter: number of maximization iteration + tolerance: minimum relative tolerance for stopping the algorithm. + return: + avg_traces_lst: return hessian trace per layer for given model + """ + avg_traces_lst=[] + for (i_grad, i_param,(module_name, _)) in zip(self.gradSH, self.params, self.model.named_parameters()): + v=[torch.randint_like(i_param,high=2, device=self.device)] + for v_i in v: + v_i[v_i==0]=-1 + i_v=v + trace_vhv=[] + trace=0. + trace_pair={"layer_name":" ", "trace":0} + self.model.zero_grad() + for i in range(max_Iter): + hv=calculate_vector_product(i_grad,i_param,i_v) # hessian vector + trace_vhv_cur=calculate_inner_product(hv,v).cpu().item()#current point + trace_vhv.append(trace_vhv_cur) + difference=(np.mean(trace_vhv)-trace)/(abs(trace)+1e-6) + if abs(difference) None: + self.dataloader=dataloader + if yaml_trace and yaml_cpu is not None: + with open(yaml_trace) as file: + params_config=yaml.load(file) + if params_config['loss']=='CrossEntropyLoss': + self.criterion=torch.nn.CrossEntropyLoss() + self.random_seed=params_config['random_seed'] + self.max_Iteration=params_config['max_Iteration'] + self.enable_op_fuse=params_config['enable_op_fuse'] + self.tolerance=float(params_config['tolerance']) + self.max_cal_sample=float(params_config['max_cal_smaple']) + self.quantize_mode=params_config['quantize_mode'] + with open(yaml_cpu,'r') as file: + yaml_config=yaml.load(file) + str_dtype=(yaml_config[0]['precisions']['names']) + self.list_dtype = str_dtype.split(",") + else: + self.criterion=torch.nn.CrossEntropyLoss() + self.random_seed=100 + self.max_Iteration=100 + self.enable_op_fuse=True + self.tolerance=1e-6 + self.max_cal_sample=100 + self.quantize_mode='ptq' + self.list_dtype=['int8','fp32'] + logging.info("Current parameters config for Hutchinson’s algorithm as below:") + logging.info("criterion:",self.criterion,"| random_seed:",self.random_seed,"| max_Iteration:", self.max_Iteration, \ + "| tolerance:", self.tolerance,"| en_op_fuse", self.enable_op_fuse,"| max_cal_sample:", self.max_cal_sample) + fixed_seed(self.random_seed) + self.model=model + self.model.eval() + model_tmp=copy.deepcopy(model) + model_tmp.eval() + self.model_fused= fuse_fx(model_tmp) + self.model_fused.eval() + + def get_init_config(self)->dict: + """ + """ + #Load a sample from dataloader to compute graident + for inputs, targets in self.dataloader: + break + #Hessian average trace computation + with torch.enable_grad(): + if self.enable_op_fuse: + hawq_cmp=Hessian(self.model_fused,criterion=self.criterion,data=(inputs,targets)) + else: + hawq_cmp=Hessian(self.model,criterion=self.criterion,data=(inputs,targets)) + avg_traces_lst=hawq_cmp.calculate_trace(max_Iter=self.max_Iteration,tolerance=self.tolerance) + + #fiter none weight layer and save weight layer to match perturbation computation + avg_traces_lst_weight=[] + for avg_trace_i in avg_traces_lst: + if 'weight' in avg_trace_i['layer_name']: + avg_traces_lst_weight.append(avg_trace_i) + # avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) + if self.quantize_mode=='ptq': + #PTQ quantization + qconfig = get_default_qconfig("fbgemm") + qconfig_dict={"":qconfig} #enable all layers/tensor to quantize + #calibrate + model_prepared=prepare_fx(self.model, qconfig_dict) + model_prepared=ptq_calibrate(model_prepared,data_loader=self.dataloader,num_cal=self.max_cal_sample) + model_prepared.cpu() + model_all_qnt=convert_fx(model_prepared) + #calculate perturbation + pertu_list=calculate_perturbation(model_fp32=self.model,model_qnt=model_all_qnt) + #calculate omiga + for omiga_i in pertu_list: + for avg_trace_i in avg_traces_lst: + if avg_trace_i['layer_name']==omiga_i['layer_name']: + avg_trace_i['trace']=avg_trace_i['trace']*omiga_i['value'] + # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): + # omig_pair={"layer_name":" ", "value":0} + # omig_val=avg_trace_i['trace']*omiga_i['value'] + # omig_pair['layer_name']=avg_trace_i['layer_name'] + # omig_pair['value']=omig_val + # omig_list.append(omig_pair) + # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) + omig_list_sorted=sorted(avg_traces_lst,key=lambda x:x['trace'],reverse=True) + tune_init_config_pairs=[] + # + for i in omig_list_sorted: + tune_init_config_pair={"op_name":'',"op_type":'','trace':0} + if i['layer_name']==omig_list_sorted[0]['layer_name']: + tune_init_config_pair['op_name']=i['layer_name'] + tune_init_config_pair['op_type']=self.list_dtype[-1] #setup as float op + tune_init_config_pair['trace']=float(i['trace']) + else: + tune_init_config_pair['op_name']=i['layer_name'] + tune_init_config_pair['op_type']=self.list_dtype[0] + tune_init_config_pair['trace']=float(i['trace']) + tune_init_config_pairs.append(tune_init_config_pair) + return tune_init_config_pairs From e81744e621635c8013fac83c03898a47ab121e69 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 10 Nov 2022 14:24:13 +0800 Subject: [PATCH 003/128] pytorch.py --- neural_compressor/adaptor/pytorch.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 1201bb21fa4..668c77246a6 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -30,6 +30,7 @@ from ..utils import logger from .query import QueryBackendCapability from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader +from neural_compressor.strategy.hawq_metric import Hawq_top try: # pragma: no cover import intel_extension_for_pytorch as ipex IPEX = True @@ -1085,6 +1086,10 @@ def calculate_op_sensitivity(self, model, dataloader, method_args): ops_sensitivity(Dict[tuple, float]): The key is (op_name, op_type), the value is the sensitivity under the specified method """ + if method_args['name']=='hessian_trace': + Hawq_top(model=model,yaml_cpu=None,yaml_trace=None,dataloader=dataloader) + hessian_cmp=Hawq_top.get_init_config() + return hessian_cmp pass unify_op_type_mapping = { From 466ffb8bcb5a22f6cf79b5e9259de7f13bb21a5c Mon Sep 17 00:00:00 2001 From: root Date: Thu, 10 Nov 2022 15:31:11 +0800 Subject: [PATCH 004/128] disable line 33 --- neural_compressor/adaptor/pytorch.py | 2 +- test/strategy/test_hessian_trace_inc.py | 63 +++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 test/strategy/test_hessian_trace_inc.py diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 668c77246a6..097b9359f93 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -30,7 +30,7 @@ from ..utils import logger from .query import QueryBackendCapability from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader -from neural_compressor.strategy.hawq_metric import Hawq_top +# from neural_compressor.strategy.hawq_metric import Hawq_top try: # pragma: no cover import intel_extension_for_pytorch as ipex IPEX = True diff --git a/test/strategy/test_hessian_trace_inc.py b/test/strategy/test_hessian_trace_inc.py new file mode 100644 index 00000000000..f05b47ca3aa --- /dev/null +++ b/test/strategy/test_hessian_trace_inc.py @@ -0,0 +1,63 @@ +import torch +import unittest +import os +import sys +import copy +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +from neural_compressor.data import DATASETS +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.adaptor.pytorch import TemplateAdaptor +from neural_compressor.strategy.hawq_metric import Hawq_top + +def build_hessian_trace(): + hessian_trace_config_yaml=''' + loss: + CrossEntropyLoss + random_seed: + 1 + max_Iteration: + 100 + tolerance: + 1e-3 + enable_op_fuse: + True + max_cal_smaple: + 100 + quantize_mode: + ptq + ''' + with open('./hessian_trace_config_yaml','w+',encoding="utf-8") as f: + f.write(hessian_trace_config_yaml) +class Test_hessian_trace(unittest.TestCase): + #boot up test + @classmethod + def setUpClass(cls) -> None: + build_hessian_trace() + cls.model=torchvision.models.resnet18() + #shotdown test + @classmethod + def tearDownClass(cls) -> None: + os.remove('./hessian_trace_config_yaml') + #one test case + def test_run_hessian_trace(cls): + """ + hessian_trace_top + Inputs: + model: FP32 model + dataloader: imagenet + """ + model=cls.model + datasets = DATASETS('pytorch') + dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' + # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) + hessian_cmp=Hawq_top(model,yaml_cpu=None,yaml_trace=None,dataloader=dummy_dataloader) + tuning_init_config=hessian_cmp.get_init_config() + #print tuning init_config + for i in tuning_init_config: + print(i) +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 3fb9a236a7a56fe625e5ff3389c901601b4fafb8 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 15 Nov 2022 11:44:48 +0800 Subject: [PATCH 005/128] add wenhuach test env --- neural_compressor/strategy/hawq.py | 311 ++++++++++++++++++++++++++++ test/strategy/test_hawq_wenhuach.py | 74 +++++++ 2 files changed, 385 insertions(+) create mode 100644 neural_compressor/strategy/hawq.py create mode 100644 test/strategy/test_hawq_wenhuach.py diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py new file mode 100644 index 00000000000..2dd0287fa2e --- /dev/null +++ b/neural_compressor/strategy/hawq.py @@ -0,0 +1,311 @@ +""" + Copyright (c) 2022 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +from ..utils import logger +import torch +import numpy as np +from torch.autograd import Variable +import yaml +import torchvision.transforms as transforms +import torchvision +import random +import copy +from torch.quantization import get_default_qat_qconfig, quantize_jit, get_default_qconfig +from torch.quantization.quantize_fx import prepare_fx, convert_fx, fuse_fx +from torch.quantization import default_dynamic_qconfig, float_qparams_weight_only_qconfig +import torch.quantization._numeric_suite as ns + + +def fix_seed(seed): + """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU + Args: + seed: an integer number + return: None + """ + np.random.seed(seed) # random + random.seed(seed) + torch.manual_seed(seed) # cpu + torch.cuda.manual_seed_all(seed) # parallel cpu + torch.backends.cudnn.deterministic = True # make sure results are same on cpu/gpu + torch.backends.cudnn.benchmark = True # accelerator + + +def calculate_params_gradients(model): + """ + get the gradients and parameters from given model + Args: + model: FP32 model specificed + return: + params: paratmeters of model + grads: gradients of model + """ + params = [] + grads = [] + for indx, (name, parm) in zip(enumerate(model.parameters()), model.named_parameters()): + logger.info( + f'index:{indx[0]}-->name:{name}:{parm.shape}') + + if not parm.requires_grad: + continue + params.append(parm) + grads.append(0. if parm.grad is None else parm.grad + 0.) + return params, grads + + +def calculate_inner_product(list_x, list_y): + """Compute the inner product of two lists of variables list_x,list_y + Args: + list_x: input list variables + list_y: input list variables + return: + sum of inner product + """ + return sum([torch.sum(x * y) for (x, y) in zip(list_x, list_y)]) + + +def calculate_vector_product(gradsH, params, v): + """compute the hessian vector product by torch.autograd.grad. + Agrs: + gradsH: gradient at current point + params: corresponding variables + v: vector + return: + hv: hessian vector product + """ + hv = torch.autograd.grad( + gradsH, + params, + grad_outputs=v, + only_inputs=True, + retain_graph=True) + return hv + + +def ptq_calibrate(model, data_loader, num_cal): + """Calibrate model in post train quantization model + Args: + model: a pre_quantization model to calibrate + data_laoder: datasets + num_cal: maximization number of calibrated samples, such as images + return: + model: a calibrated model + """ + # Generate some samples to calibrate from data_loader + calibrate_samples = [] + i = 0 + for inputs, targets in data_loader: + calibrate_samples.append(inputs) + i = i + 1 + if i >= num_cal: + break + # model.cpu() + model.eval() + # calibration + with torch.no_grad(): + for sample in calibrate_samples: + model(sample) + return model + + +def calculate_perturbation(model_qnt, model_fp32) -> dict: + """calculate weights quantized perturbation using L2 normal + Args: + model_qnt: quantized model + model_fp32: float model + return: + pertur_lst: dict,which contains layer_name and value + + """ + + wq_cmp_dict = ns.compare_weights(model_fp32.state_dict(), model_qnt.state_dict()) + pertur_lst = [] + for key in wq_cmp_dict: + pertur_pair = {"layer_name": '', "value": 0} + op_float_tensor = wq_cmp_dict[key]['float'] + op_qnt_tensor = wq_cmp_dict[key]['quantized'].dequantize() + diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 + pertur_pair['layer_name'] = key + pertur_pair['value'] = diff_l2 + pertur_lst.append(pertur_pair) + return pertur_lst + + +class Hessian(): + """This class used to compute each layer hessian trace from given FP32 model + """ + + def __init__(self, model, criterion, data=None, dataloader=None, device='cpu') -> None: + """Initial parameters + Args: + model: FP32 model specificed + criterion: loss function + data: a single batch of data, including inputs and its corresponding labels + dataloader: the data loader including bunch of batches of data + device: currently only supports cpu device + """ + # make sure we either pass a single batch or a dataloader + assert (data != None and dataloader == None) or (data == None and dataloader != None) + # make mode is evaluation model + self.model = model.eval() + self.criterion = criterion + self.device = device + + if data != None: + self.data = data + self.full_dataset = False + if not self.full_dataset: + self.inputs, self.targets = self.data + outputs = self.model(self.inputs) + loss = self.criterion(outputs, self.targets) + loss.backward(create_graph=True) + params, gradSH = calculate_params_gradients(self.model) + + self.params = params + self.gradSH = gradSH + + def calculate_trace(self, max_Iter=100, tolerance=1e-3): + """Compute the hessian trace based on Hutchinson algorithm + Args: + max_Inter: number of maximization iteration + tolerance: minimum relative tolerance for stopping the algorithm. + return: + avg_traces_lst: return hessian trace per layer for given model + """ + avg_traces_lst = [] + for (i_grad, i_param, (module_name, _)) in zip(self.gradSH, self.params, self.model.named_parameters()): + v = [torch.randint_like(i_param, high=2, device=self.device)] + for v_i in v: + v_i[v_i == 0] = -1 + i_v = v + trace_vhv = [] + trace = 0. + trace_pair = {"layer_name": " ", "trace": 0} + self.model.zero_grad() + for i in range(max_Iter): + hv = calculate_vector_product(i_grad, i_param, i_v) # hessian vector + trace_vhv_cur = calculate_inner_product(hv, v).cpu().item() # current point + trace_vhv.append(trace_vhv_cur) + difference = (np.mean(trace_vhv) - trace) / (abs(trace) + 1e-6) + if abs(difference) < tolerance: + avg_trace_vhv = np.mean(trace_vhv) + trace_pair["layer_name"] = module_name + trace_pair["trace"] = avg_trace_vhv + avg_traces_lst.append(trace_pair) + break + else: + trace = np.mean(trace_vhv) + return avg_traces_lst + + +class Hawq_top(): + """This class is a interface of hessian + """ + + def __init__(self, model, yaml_trace=None, yaml_cpu=None, dataloader=None) -> None: + self.dataloader = dataloader + if yaml_trace and yaml_cpu is not None: + with open(yaml_trace) as file: + params_config = yaml.load(file) + if params_config['loss'] == 'CrossEntropyLoss': + self.criterion = torch.nn.CrossEntropyLoss() + self.random_seed = params_config['random_seed'] + self.max_Iteration = params_config['max_Iteration'] + self.enable_op_fuse = params_config['enable_op_fuse'] + self.tolerance = float(params_config['tolerance']) + self.max_cal_sample = float(params_config['max_cal_smaple']) + self.quantize_mode = params_config['quantize_mode'] + with open(yaml_cpu, 'r') as file: + yaml_config = yaml.load(file) + str_dtype = (yaml_config[0]['precisions']['names']) + self.list_dtype = str_dtype.split(",") + else: + self.criterion = torch.nn.CrossEntropyLoss() + self.random_seed = 100 + self.max_Iteration = 100 + self.enable_op_fuse = True + self.tolerance = 1e-6 + self.max_cal_sample = 100 + self.quantize_mode = 'ptq' + self.list_dtype = ['int8', 'fp32'] + # logger.info("Current parameters config for Hutchinson’s algorithm as below:") + logger.info( + f"criterion:{self.criterion}| random_seed:{self.random_seed}| max_Iteration:self.max_Iteration| tolerance:{self.tolerance}") + # logger.info("criterion:", self.criterion, "| random_seed:", self.random_seed, "| max_Iteration:", + # self.max_Iteration, \ + # "| tolerance:", self.tolerance, "| en_op_fuse", self.enable_op_fuse, "| max_cal_sample:", + # self.max_cal_sample) + fix_seed(self.random_seed) + self.model = model + self.model.eval() + model_tmp = copy.deepcopy(model) + model_tmp.eval() + self.model_fused = fuse_fx(model_tmp) + self.model_fused.eval() + + def get_init_config(self) -> dict: + """ + """ + # Load a sample from dataloader to compute graident + for inputs, targets in self.dataloader: + break + # Hessian average trace computation + with torch.enable_grad(): + if self.enable_op_fuse: + hawq_cmp = Hessian(self.model_fused, criterion=self.criterion, data=(inputs, targets)) + else: + hawq_cmp = Hessian(self.model, criterion=self.criterion, data=(inputs, targets)) + avg_traces_lst = hawq_cmp.calculate_trace(max_Iter=self.max_Iteration, tolerance=self.tolerance) + + # fiter none weight layer and save weight layer to match perturbation computation + avg_traces_lst_weight = [] + for avg_trace_i in avg_traces_lst: + if 'weight' in avg_trace_i['layer_name']: + avg_traces_lst_weight.append(avg_trace_i) + # avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) + if self.quantize_mode == 'ptq': + # PTQ quantization + qconfig = get_default_qconfig("fbgemm") + qconfig_dict = {"": qconfig} # enable all layers/tensor to quantize + # calibrate + model_prepared = prepare_fx(self.model, qconfig_dict) + model_prepared = ptq_calibrate(model_prepared, data_loader=self.dataloader, num_cal=self.max_cal_sample) + model_prepared.cpu() + model_all_qnt = convert_fx(model_prepared) + # calculate perturbation + pertu_list = calculate_perturbation(model_fp32=self.model, model_qnt=model_all_qnt) + # calculate omiga + for omiga_i in pertu_list: + for avg_trace_i in avg_traces_lst: + if avg_trace_i['layer_name'] == omiga_i['layer_name']: + avg_trace_i['trace'] = avg_trace_i['trace'] * omiga_i['value'] + # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): + # omig_pair={"layer_name":" ", "value":0} + # omig_val=avg_trace_i['trace']*omiga_i['value'] + # omig_pair['layer_name']=avg_trace_i['layer_name'] + # omig_pair['value']=omig_val + # omig_list.append(omig_pair) + # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) + omig_list_sorted = sorted(avg_traces_lst, key=lambda x: x['trace'], reverse=True) + tune_init_config_pairs = [] + # + for i in omig_list_sorted: + tune_init_config_pair = {"op_name": '', "op_type": '', 'trace': 0} + if i['layer_name'] == omig_list_sorted[0]['layer_name']: + tune_init_config_pair['op_name'] = i['layer_name'] + tune_init_config_pair['op_type'] = self.list_dtype[-1] # setup as float op + tune_init_config_pair['trace'] = float(i['trace']) + else: + tune_init_config_pair['op_name'] = i['layer_name'] + tune_init_config_pair['op_type'] = self.list_dtype[0] + tune_init_config_pair['trace'] = float(i['trace']) + tune_init_config_pairs.append(tune_init_config_pair) + return tune_init_config_pairs diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py new file mode 100644 index 00000000000..5affedc70ca --- /dev/null +++ b/test/strategy/test_hawq_wenhuach.py @@ -0,0 +1,74 @@ +import torch +import unittest +import os +import sys +import copy +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +from neural_compressor.data import DATASETS +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.adaptor.pytorch import TemplateAdaptor + +from neural_compressor.strategy.hawq import Hawq_top, fix_seed + +fix_seed(1) + + +def build_hessian_trace(): + hessian_trace_config_yaml = ''' + loss: + CrossEntropyLoss + random_seed: + 1 + max_Iteration: + 100 + tolerance: + 1e-3 + enable_op_fuse: + True + max_cal_smaple: + 100 + quantize_mode: + ptq + ''' + with open('./hessian_trace_config_yaml', 'w+', encoding="utf-8") as f: + f.write(hessian_trace_config_yaml) + + +class Test_hessian_trace(unittest.TestCase): + # boot up test + @classmethod + def setUpClass(cls) -> None: + build_hessian_trace() + cls.model = torchvision.models.resnet18() + + # shotdown test + @classmethod + def tearDownClass(cls) -> None: + os.remove('./hessian_trace_config_yaml') + + # one test case + def test_run_hessian_trace(cls): + """ + hessian_trace_top + Inputs: + model: FP32 model + dataloader: imagenet + """ + + model = cls.model + datasets = DATASETS('pytorch') + dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' + # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) + hessian_cmp = Hawq_top(model, yaml_cpu=None, yaml_trace=None, dataloader=dummy_dataloader) + tuning_init_config = hessian_cmp.get_init_config() + # print tuning init_config + for i in tuning_init_config: + print(i) + + +if __name__ == "__main__": + unittest.main() From 59bd29b40094f47ac3f7ac1feefe947a50fb0a3b Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 15 Nov 2022 14:00:06 +0800 Subject: [PATCH 006/128] try to test mes strategy, have bug now --- test/strategy/test_hawq_wenhuach.py | 162 +++++++++++++++++++--------- 1 file changed, 113 insertions(+), 49 deletions(-) diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index 5affedc70ca..4443cd8d486 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -9,66 +9,130 @@ from neural_compressor.data import DATASETS from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.adaptor.pytorch import TemplateAdaptor - +from neural_compressor.adaptor import FRAMEWORKS +import shutil from neural_compressor.strategy.hawq import Hawq_top, fix_seed fix_seed(1) +def build_ptq_yaml(): + fake_yaml = ''' + model: + name: imagenet + framework: pytorch + quantization: + calibration: + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: mse + accuracy_criterion: + relative: -0.1 + random_seed: 9527 + exit_policy: + max_trials: 1 + workspace: + path: saved + ''' + with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_yaml) -def build_hessian_trace(): - hessian_trace_config_yaml = ''' - loss: - CrossEntropyLoss - random_seed: - 1 - max_Iteration: - 100 - tolerance: - 1e-3 - enable_op_fuse: - True - max_cal_smaple: - 100 - quantize_mode: - ptq - ''' - with open('./hessian_trace_config_yaml', 'w+', encoding="utf-8") as f: - f.write(hessian_trace_config_yaml) - +class TestPytorchAdaptor(unittest.TestCase): + framework_specific_info = {"device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": None} + framework = "pytorch" + adaptor = FRAMEWORKS[framework](framework_specific_info) + model = torchvision.models.resnet18() -class Test_hessian_trace(unittest.TestCase): - # boot up test - @classmethod - def setUpClass(cls) -> None: - build_hessian_trace() - cls.model = torchvision.models.resnet18() + # model = torch.quantization.QuantWrapper(model) - # shotdown test @classmethod - def tearDownClass(cls) -> None: - os.remove('./hessian_trace_config_yaml') + def setUpClass(self): + build_ptq_yaml() - # one test case - def test_run_hessian_trace(cls): - """ - hessian_trace_top - Inputs: - model: FP32 model - dataloader: imagenet - """ - model = cls.model - datasets = DATASETS('pytorch') - dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) - dummy_dataloader = PyTorchDataLoader(dummy_dataset) - # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' - # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) - hessian_cmp = Hawq_top(model, yaml_cpu=None, yaml_trace=None, dataloader=dummy_dataloader) - tuning_init_config = hessian_cmp.get_init_config() - # print tuning init_config - for i in tuning_init_config: - print(i) + @classmethod + def tearDownClass(self): + os.remove('ptq_yaml.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + def test_run_hawq_one_trial(self): + from neural_compressor.experimental import Quantization, common + model = copy.deepcopy(self.model) + for fake_yaml in ['ptq_yaml.yaml']: + if fake_yaml == 'ptq_yaml.yaml': + model.eval() + quantizer = Quantization(fake_yaml) + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = model + quantizer() if __name__ == "__main__": unittest.main() + +# def build_hessian_trace(): +# hessian_trace_config_yaml = ''' +# loss: +# CrossEntropyLoss +# random_seed: +# 1 +# max_Iteration: +# 100 +# tolerance: +# 1e-3 +# enable_op_fuse: +# True +# max_cal_smaple: +# 100 +# quantize_mode: +# ptq +# ''' +# with open('./hessian_trace_config_yaml', 'w+', encoding="utf-8") as f: +# f.write(hessian_trace_config_yaml) +# +# +# class Test_hessian_trace(unittest.TestCase): +# # boot up test +# @classmethod +# def setUpClass(cls) -> None: +# build_hessian_trace() +# cls.model = torchvision.models.resnet18() +# +# # shotdown test +# @classmethod +# def tearDownClass(cls) -> None: +# os.remove('./hessian_trace_config_yaml') +# +# # one test case +# def test_run_hessian_trace(cls): +# """ +# hessian_trace_top +# Inputs: +# model: FP32 model +# dataloader: imagenet +# """ +# +# model = cls.model +# datasets = DATASETS('pytorch') +# dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) +# dummy_dataloader = PyTorchDataLoader(dummy_dataset) +# # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' +# # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) +# hessian_cmp = Hawq_top(model, yaml_cpu=None, yaml_trace=None, dataloader=dummy_dataloader) +# tuning_init_config = hessian_cmp.get_init_config() +# # print tuning init_config +# for i in tuning_init_config: +# print(i) + + +# if __name__ == "__main__": +# unittest.main() From accec3ccfa207d8ac8a08dce39ea758161d61f68 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 15 Nov 2022 14:06:46 +0800 Subject: [PATCH 007/128] change name --- .../strategy/{hawq.py => hawq_wenhuach.py} | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) rename neural_compressor/strategy/{hawq.py => hawq_wenhuach.py} (95%) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq_wenhuach.py similarity index 95% rename from neural_compressor/strategy/hawq.py rename to neural_compressor/strategy/hawq_wenhuach.py index 2dd0287fa2e..6c74401c5fc 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq_wenhuach.py @@ -246,23 +246,25 @@ def __init__(self, model, yaml_trace=None, yaml_cpu=None, dataloader=None) -> No fix_seed(self.random_seed) self.model = model self.model.eval() - model_tmp = copy.deepcopy(model) - model_tmp.eval() - self.model_fused = fuse_fx(model_tmp) - self.model_fused.eval() + if self.enable_op_fuse: + self.model = fuse_fx(self.model) + + # model_tmp = copy.deepcopy(model) + # model_tmp.eval() + # self.model_fused = fuse_fx(model_tmp) + # self.model_fused.eval() def get_init_config(self) -> dict: """ """ # Load a sample from dataloader to compute graident - for inputs, targets in self.dataloader: - break - # Hessian average trace computation + inputs, targets = next(iter(self.dataloader)) + with torch.enable_grad(): - if self.enable_op_fuse: - hawq_cmp = Hessian(self.model_fused, criterion=self.criterion, data=(inputs, targets)) - else: - hawq_cmp = Hessian(self.model, criterion=self.criterion, data=(inputs, targets)) + # if self.enable_op_fuse: + # hawq_cmp = Hessian(self.model_fused, criterion=self.criterion, data=(inputs, targets)) + # else: + hawq_cmp = Hessian(self.model, criterion=self.criterion, data=(inputs, targets)) avg_traces_lst = hawq_cmp.calculate_trace(max_Iter=self.max_Iteration, tolerance=self.tolerance) # fiter none weight layer and save weight layer to match perturbation computation From 769cbc201fc37b1450c306b3074726990f06b875 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 15 Nov 2022 14:10:03 +0800 Subject: [PATCH 008/128] comment test --- test/strategy/test_hawq_wenhuach.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index 4443cd8d486..a470f679cf8 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -11,7 +11,7 @@ from neural_compressor.adaptor.pytorch import TemplateAdaptor from neural_compressor.adaptor import FRAMEWORKS import shutil -from neural_compressor.strategy.hawq import Hawq_top, fix_seed +from neural_compressor.strategy.hawq_wenhuach import Hawq_top, fix_seed fix_seed(1) @@ -77,7 +77,8 @@ def test_run_hawq_one_trial(self): quantizer() if __name__ == "__main__": - unittest.main() + pass + # unittest.main() # def build_hessian_trace(): # hessian_trace_config_yaml = ''' From a9fecbbe58a8c1eaac7a1c9caf7aef66f9763310 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:19:56 +0800 Subject: [PATCH 009/128] add activation quantized loss eval --- neural_compressor/strategy/hawq_metric.py | 146 ++++++++++++++-------- 1 file changed, 97 insertions(+), 49 deletions(-) diff --git a/neural_compressor/strategy/hawq_metric.py b/neural_compressor/strategy/hawq_metric.py index acbcd98d740..63db277ab14 100644 --- a/neural_compressor/strategy/hawq_metric.py +++ b/neural_compressor/strategy/hawq_metric.py @@ -37,7 +37,7 @@ def fixed_seed(seed): torch.cuda.manual_seed_all(seed) #parallel cpu torch.backends.cudnn.deterministic = True #make sure results are same on cpu/gpu torch.backends.cudnn.benchmark = True #accelerator -def calculate_params_gradients(model): +def cal_params_grad(model): """ get the gradients and parameters from given model Args: @@ -55,17 +55,7 @@ def calculate_params_gradients(model): params.append(parm) grads.append(0. if parm.grad is None else parm.grad+0.) return params, grads -def calculate_inner_product(list_x,list_y): - """Compute the inner product of two lists of variables list_x,list_y - Args: - list_x: input list variables - list_y: input list variables - return: - sum of inner product - """ - return sum([torch.sum(x * y) for (x, y) in zip(list_x, list_y)]) - -def calculate_vector_product(gradsH, params, v): +def cal_vector_product(gradsH, params, v): """compute the hessian vector product by torch.autograd.grad. Agrs: gradsH: gradient at current point @@ -105,7 +95,7 @@ def ptq_calibrate(model, data_loader,num_cal): for sample in calibrate_samples: model(sample) return model -def calculate_perturbation(model_qnt,model_fp32)->dict: +def cal_weights_pertubation(model_qnt,model_fp32)->dict: """calculate weights quantized perturbation using L2 normal Args: model_qnt: quantized model @@ -126,6 +116,44 @@ def calculate_perturbation(model_qnt,model_fp32)->dict: pertur_pair['value']=diff_l2 pertur_lst.append(pertur_pair) return pertur_lst +def cal_act_pertubation(model_fp32,model_qnt,data_loader,num_cal=100)->dict: + """calculate weights quantized perturbation using L2 normal + Args: + model_qunt: quantized model + model_fp32: float model + data_loader: path to datasets + return: + pretur_lst: dict + + """ + ns.prepare_model_outputs(model_fp32, model_qnt) + model_fp32.cpu() + model_fp32.eval() + model_qnt.cpu() + model_qnt.eval() + obv_samples=[] + i=0 + for inputs, targets in data_loader: + obv_samples.append(inputs) + i=i+1 + if i>=num_cal: + break + with torch.no_grad(): + for image in obv_samples: + model_fp32(image) + model_qnt(image) + act_qnt_pairs=[] + act_compare_dict = ns.get_matching_activations(model_fp32, q_module=model_qnt) + for key in act_compare_dict: + op_float_tensor=(act_compare_dict[key]['float'][0]) + op_qnt_tensor=act_compare_dict[key]['quantized'][0].dequantize() + diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) + pertur_pair={"layer_name":'',"value":0} + pertur_pair['layer_name']=key + pertur_pair['value']=diff_l2 + act_qnt_pairs.append(pertur_pair) + return act_qnt_pairs + class Hessian(): """This class used to compute each layer hessian trace from given FP32 model """ @@ -153,7 +181,7 @@ def __init__(self,model,criterion, data=None, dataloader=None,device='cpu') -> N outputs=self.model(self.inputs) loss=self.criterion(outputs,self.targets) loss.backward(create_graph=True) - params, gradSH=calculate_params_gradients(self.model) + params, gradSH=cal_params_grad(self.model) self.params=params self.gradSH=gradSH @@ -176,8 +204,8 @@ def calculate_trace(self,max_Iter=100, tolerance=1e-3): trace_pair={"layer_name":" ", "trace":0} self.model.zero_grad() for i in range(max_Iter): - hv=calculate_vector_product(i_grad,i_param,i_v) # hessian vector - trace_vhv_cur=calculate_inner_product(hv,v).cpu().item()#current point + hv=cal_vector_product(i_grad,i_param,i_v) # hessian vector + trace_vhv_cur=sum([torch.sum(x * y) for (x, y) in zip(hv, v)]) trace_vhv.append(trace_vhv_cur) difference=(np.mean(trace_vhv)-trace)/(abs(trace)+1e-6) if abs(difference) None: self.max_Iteration=100 self.enable_op_fuse=True self.tolerance=1e-6 - self.max_cal_sample=100 + self.max_cal_sample=1 self.quantize_mode='ptq' self.list_dtype=['int8','fp32'] logging.info("Current parameters config for Hutchinson’s algorithm as below:") @@ -230,6 +258,7 @@ def __init__(self,model,yaml_trace=None,yaml_cpu=None,dataloader=None) -> None: model_tmp.eval() self.model_fused= fuse_fx(model_tmp) self.model_fused.eval() + self.hawq_level='L3' #L1:top engievalue L2:avg_trace L3:avg_trace+pertubation def get_init_config(self)->dict: """ @@ -238,6 +267,7 @@ def get_init_config(self)->dict: for inputs, targets in self.dataloader: break #Hessian average trace computation + fixed_seed(self.random_seed) with torch.enable_grad(): if self.enable_op_fuse: hawq_cmp=Hessian(self.model_fused,criterion=self.criterion,data=(inputs,targets)) @@ -246,40 +276,58 @@ def get_init_config(self)->dict: avg_traces_lst=hawq_cmp.calculate_trace(max_Iter=self.max_Iteration,tolerance=self.tolerance) #fiter none weight layer and save weight layer to match perturbation computation - avg_traces_lst_weight=[] - for avg_trace_i in avg_traces_lst: - if 'weight' in avg_trace_i['layer_name']: - avg_traces_lst_weight.append(avg_trace_i) - # avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) - if self.quantize_mode=='ptq': - #PTQ quantization - qconfig = get_default_qconfig("fbgemm") - qconfig_dict={"":qconfig} #enable all layers/tensor to quantize - #calibrate - model_prepared=prepare_fx(self.model, qconfig_dict) - model_prepared=ptq_calibrate(model_prepared,data_loader=self.dataloader,num_cal=self.max_cal_sample) - model_prepared.cpu() - model_all_qnt=convert_fx(model_prepared) - #calculate perturbation - pertu_list=calculate_perturbation(model_fp32=self.model,model_qnt=model_all_qnt) - #calculate omiga - for omiga_i in pertu_list: - for avg_trace_i in avg_traces_lst: - if avg_trace_i['layer_name']==omiga_i['layer_name']: - avg_trace_i['trace']=avg_trace_i['trace']*omiga_i['value'] - # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): - # omig_pair={"layer_name":" ", "value":0} - # omig_val=avg_trace_i['trace']*omiga_i['value'] - # omig_pair['layer_name']=avg_trace_i['layer_name'] - # omig_pair['value']=omig_val - # omig_list.append(omig_pair) - # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) - omig_list_sorted=sorted(avg_traces_lst,key=lambda x:x['trace'],reverse=True) + if self.hawq_level=='L2': + avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) + logging.info("avg_traces desending sorted is:") + for i in avg_traces_lst_sorted: + logging.info(i) + list_sorted=avg_traces_lst_sorted + if self.hawq_level=='L3': + if self.quantize_mode=='ptq': + #PTQ quantization + qconfig = get_default_qconfig("fbgemm") + qconfig_dict={"":qconfig} #enable all layers/tensor to quantize + #calibrate + model_prepared=prepare_fx(self.model, qconfig_dict) + model_prepared=ptq_calibrate(model_prepared,data_loader=self.dataloader,num_cal=self.max_cal_sample) + model_prepared.cpu() + model_all_qnt=convert_fx(model_prepared) + #calculate weights quantized perturbation + weights_pertu_lst=cal_weights_pertubation(model_fp32=self.model,model_qnt=model_all_qnt) + #merge weights quantized perturbation + #generally, fused ops=quantized weights+quantized activation + avg_trace_i=0 + omigs=[] + for wct_i in weights_pertu_lst: + omig_pair={"layer_name":" ", "trace":0} + tmp_value=avg_traces_lst[avg_trace_i]['trace']*wct_i['value'] + omig_pair['layer_name']=avg_traces_lst[avg_trace_i]['layer_name'] + omig_pair['trace']=tmp_value + avg_trace_i=avg_trace_i+2 + omigs.append(omig_pair) + act_pertu_lst=cal_act_pertubation(model_fp32=self.model, model_qnt=model_all_qnt,data_loader=self.dataloader,num_cal=self.max_cal_sample) + avg_trace_i=1 + for act_i in act_pertu_lst: + omig_pair={"layer_name":" ", "trace":0} + tmp_value=avg_traces_lst[avg_trace_i]['trace']+act_i['value'] + omig_pair['layer_name']=avg_traces_lst[avg_trace_i]['layer_name'] + omig_pair['trace']=tmp_value + avg_trace_i=avg_trace_i+2 + omigs.append(omig_pair) + + # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): + # omig_pair={"layer_name":" ", "value":0} + # omig_val=avg_trace_i['trace']*omiga_i['value'] + # omig_pair['layer_name']=avg_trace_i['layer_name'] + # omig_pair['value']=omig_val + # omig_list.append(omig_pair) + # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) + omig_list_sorted=sorted(omigs,key=lambda x:x['trace'],reverse=True) + list_sorted=omig_list_sorted tune_init_config_pairs=[] - # - for i in omig_list_sorted: + for i in list_sorted: tune_init_config_pair={"op_name":'',"op_type":'','trace':0} - if i['layer_name']==omig_list_sorted[0]['layer_name']: + if i['layer_name']==list_sorted[0]['layer_name']: tune_init_config_pair['op_name']=i['layer_name'] tune_init_config_pair['op_type']=self.list_dtype[-1] #setup as float op tune_init_config_pair['trace']=float(i['trace']) From 8f9e355cba65346e954cf8fa34d2aad34bdc4893 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:21:17 +0800 Subject: [PATCH 010/128] fixed seed for dummy datasets --- test/strategy/test_hessian_trace_inc.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/test/strategy/test_hessian_trace_inc.py b/test/strategy/test_hessian_trace_inc.py index f05b47ca3aa..5285bc619c7 100644 --- a/test/strategy/test_hessian_trace_inc.py +++ b/test/strategy/test_hessian_trace_inc.py @@ -10,7 +10,21 @@ from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.adaptor.pytorch import TemplateAdaptor from neural_compressor.strategy.hawq_metric import Hawq_top - +import random +import numpy as np +def fixed_seed(seed): + """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU + Args: + seed: an integer number + return: None + """ + np.random.seed(seed) #random + random.seed(seed) + torch.manual_seed(seed) #cpu + torch.cuda.manual_seed_all(seed) #parallel cpu + torch.backends.cudnn.deterministic = True #make sure results are same on cpu/gpu + torch.backends.cudnn.benchmark = True #accelerator +fixed_seed(100) def build_hessian_trace(): hessian_trace_config_yaml=''' loss: From 11c7592e3090dc724483eb62fdc824301c7d3340 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:24:18 +0800 Subject: [PATCH 011/128] for independence hawq tuning strategic --- neural_compressor/strategy/hawq.py | 202 +++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 neural_compressor/strategy/hawq.py diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py new file mode 100644 index 00000000000..17231ceec9d --- /dev/null +++ b/neural_compressor/strategy/hawq.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import numpy as np +from collections import OrderedDict +from .strategy import strategy_registry, TuneStrategy +from ..utils import logger + +from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .st_utils.tuning_structs import OpTuningConfig +from .st_utils.tuning_space import TUNING_ITEMS_LST + +@strategy_registry +class HawqTuneStrategy(TuneStrategy): + """The basic tuning strategy which tunes the low precision model with below order. + + 1. modelwise tuning for all quantizable ops. + 2. fallback tuning from bottom to top to decide the priority of which op has biggest impact + on accuracy. + 3. incremental fallback tuning by fallbacking multiple ops with the order got from #2. + + Args: + model (object): The FP32 model specified for low precision tuning. + conf (Class): The Conf class instance initialized from user yaml + config file. + q_dataloader (generator): Data loader for calibration, mandatory for + post-training quantization. + It is iterable and should yield a tuple (input, + label) for calibration dataset containing label, + or yield (input, _) for label-free calibration + dataset. The input could be a object, list, tuple or + dict, depending on user implementation, as well as + it can be taken as model input. + q_func (function, optional): Reserved for future use. + eval_dataloader (generator, optional): Data loader for evaluation. It is iterable + and should yield a tuple of (input, label). + The input could be a object, list, tuple or dict, + depending on user implementation, as well as it can + be taken as model input. The label should be able + to take as input of supported metrics. If this + parameter is not None, user needs to specify + pre-defined evaluation metrics through configuration + file and should set "eval_func" parameter as None. + Tuner will combine model, eval_dataloader and + pre-defined metrics to run evaluation process. + eval_func (function, optional): The evaluation function provided by user. + This function takes model as parameter, and + evaluation dataset and metrics should be + encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + + The pseudo code should be something like: + + def eval_func(model): + input, label = dataloader() + output = model(input) + accuracy = metric(output, label) + return accuracy + dicts (dict, optional): The dict containing resume information. + Defaults to None. + + """ + + def __init__(self, model, conf, q_dataloader, q_func=None, + eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + super( + HawqTuneStrategy, + self).__init__( + model, + conf, + q_dataloader, + q_func, + eval_dataloader, + eval_func, + dicts, + q_hooks) + + def next_tune_cfg(self): + """The generator of yielding next tuning config to traverse by concrete strategies + according to last tuning result. + + Yields: + tune_config (dict): It's a dict containing the tuning configuration to run. + """ + from copy import deepcopy + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for calib_sampling_size in calib_sampling_size_lst: + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = False + stage1_cnt = 0 + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = 1e9 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + # Fallback the ops supported both static and dynamic from static to dynamic + # Tuning items: None + if self.cfg.quantization.approach == 'post_training_auto_quant': + static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + item in tuning_space.query_items_by_quant_mode('dynamic')] + if static_dynamic_items: + logger.info("Fallback all ops that support both dynamic and static to dynamic.") + else: + logger.info("Non ops that support both dynamic") + + new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + for item in static_dynamic_items: + new_op_tuning_cfg[item.name] = self.initial_dynamic_cfg_based_on_static_cfg( + new_op_tuning_cfg[item.name]) + new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield new_op_tuning_cfg + best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) + + # Fallback + for target_dtype in ['bf16', 'fp32']: + target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) + fallback_items_lst = [item for item in quant_ops if item in target_type_lst] + if fallback_items_lst: + logger.info(f"Start to fallback op to {target_dtype} one by one.") + self._fallback_started() + #fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, + self.calib_dataloader, + method_args = {'name': 'hessian_trace'}) + fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) + + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() + for op_index, op_tuning_cfg in enumerate(fallback_sampler): + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + acc, _ = self.last_tune_result + op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc + + + # do accumulated fallback according to the order in the previous stage + if len(op_fallback_acc_impact) > 0: + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + + def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): + op_state = op_static_cfg.get_state() + op_name = op_static_cfg.op_name + op_type = op_static_cfg.op_type + op_quant_mode = 'dynamic' + tuning_space = self.tuning_space + dynamic_state = {} + for att in ['weight', 'activation']: + if att not in op_state: + continue + for item_name, item_val in op_state[att].items(): + att_item = (att, item_name) + if att_item not in TUNING_ITEMS_LST: + continue + if tuning_space.query_item_option((op_name, op_type), op_quant_mode, att_item, item_val): + dynamic_state[att_item] = item_val + else: + quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode) + tuning_item = quant_mode_item.get_option_by_name(att_item) + dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None + return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) + + \ No newline at end of file From bf44c0e0be4e0f1d27d3fa8a1bc6ca1cc3ac6230 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 15 Nov 2022 14:38:19 +0800 Subject: [PATCH 012/128] add a fallback ut --- neural_compressor/strategy/basic.py | 10 +-- .../strategy/{ => st_utils}/hawq_metric.py | 0 .../strategy/{ => st_utils}/hawq_wenhuach.py | 0 test/strategy/test_basic_fallback.py | 73 +++++++++++++++++++ 4 files changed, 78 insertions(+), 5 deletions(-) rename neural_compressor/strategy/{ => st_utils}/hawq_metric.py (100%) rename neural_compressor/strategy/{ => st_utils}/hawq_wenhuach.py (100%) create mode 100644 test/strategy/test_basic_fallback.py diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index 3cc4e38bde2..184a15996f7 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -143,11 +143,11 @@ def next_tune_cfg(self): if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() - #fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, - self.calib_dataloader, - method_args = {'name': 'hessian_trace'}) - fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) + fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + # ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, + # self.calib_dataloader, + # method_args = {'name': 'hessian_trace'}) + #fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) diff --git a/neural_compressor/strategy/hawq_metric.py b/neural_compressor/strategy/st_utils/hawq_metric.py similarity index 100% rename from neural_compressor/strategy/hawq_metric.py rename to neural_compressor/strategy/st_utils/hawq_metric.py diff --git a/neural_compressor/strategy/hawq_wenhuach.py b/neural_compressor/strategy/st_utils/hawq_wenhuach.py similarity index 100% rename from neural_compressor/strategy/hawq_wenhuach.py rename to neural_compressor/strategy/st_utils/hawq_wenhuach.py diff --git a/test/strategy/test_basic_fallback.py b/test/strategy/test_basic_fallback.py new file mode 100644 index 00000000000..352c81850c4 --- /dev/null +++ b/test/strategy/test_basic_fallback.py @@ -0,0 +1,73 @@ +import torch +import unittest +import os +import sys +import copy +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +from neural_compressor.data import DATASETS +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.adaptor.pytorch import TemplateAdaptor +from neural_compressor.adaptor import FRAMEWORKS +import shutil + + +def build_ptq_yaml(): + fake_yaml = ''' + model: + name: resnet18 + framework: pytorch_fx + tuning: + strategy: + name: basic + accuracy_criterion: + absolute: -1 + exit_policy: + timeout: 0 + ''' + with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_yaml) + +class TestPytorchAdaptor(unittest.TestCase): + framework_specific_info = {"device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": None} + framework = "pytorch" + adaptor = FRAMEWORKS[framework](framework_specific_info) + model = torchvision.models.resnet18() + + # model = torch.quantization.QuantWrapper(model) + + @classmethod + def setUpClass(self): + self.i = 0 + build_ptq_yaml() + + + @classmethod + def tearDownClass(self): + os.remove('ptq_yaml.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + + def test_basic_fallback(self): + def eval_func(model): + self.i -= 1 + return self.i + + from neural_compressor.experimental import Quantization, common + model = copy.deepcopy(self.model) + quantizer = Quantization('ptq_yaml.yaml') + quantizer.eval_func = eval_func + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = model + q_model = quantizer() + self.assertTrue(q_model is None) + +if __name__ == "__main__": + unittest.main() From eff50653a4679beabde6054706fd8e3c8f1360bb Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Wed, 16 Nov 2022 14:03:42 +0800 Subject: [PATCH 013/128] update test file --- test/strategy/test_hawq_wenhuach.py | 41 ++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index a470f679cf8..ad7939d5d84 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -11,7 +11,7 @@ from neural_compressor.adaptor.pytorch import TemplateAdaptor from neural_compressor.adaptor import FRAMEWORKS import shutil -from neural_compressor.strategy.hawq_wenhuach import Hawq_top, fix_seed +from neural_compressor.strategy.st_utils.hawq_wenhuach import Hawq_top, fix_seed fix_seed(1) @@ -19,7 +19,7 @@ def build_ptq_yaml(): fake_yaml = ''' model: name: imagenet - framework: pytorch + framework: pytorch_fx quantization: calibration: evaluation: @@ -28,12 +28,12 @@ def build_ptq_yaml(): topk: 1 tuning: strategy: - name: mse + name: hawq accuracy_criterion: relative: -0.1 random_seed: 9527 exit_policy: - max_trials: 1 + max_trials: 3 workspace: path: saved ''' @@ -50,10 +50,17 @@ class TestPytorchAdaptor(unittest.TestCase): adaptor = FRAMEWORKS[framework](framework_specific_info) model = torchvision.models.resnet18() + # from collections import OrderedDict + # model = torch.nn.Sequential(OrderedDict([ + # ('conv1', torch.nn.Conv2d(3, 2, 1, 1)), + # ('conv2', torch.nn.Conv2d(2, 1, 1, 1)), + # ('flat', torch.nn.Flatten()), + # ])) # model = torch.quantization.QuantWrapper(model) @classmethod def setUpClass(self): + self.i = 0 build_ptq_yaml() @@ -63,22 +70,26 @@ def tearDownClass(self): shutil.rmtree('./saved', ignore_errors=True) shutil.rmtree('runs', ignore_errors=True) + + def test_run_hawq_one_trial(self): + def eval_func(model): + self.i -= 1 + return self.i from neural_compressor.experimental import Quantization, common model = copy.deepcopy(self.model) - for fake_yaml in ['ptq_yaml.yaml']: - if fake_yaml == 'ptq_yaml.yaml': - model.eval() - quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = model - quantizer() + + quantizer = Quantization('ptq_yaml.yaml') + quantizer.eval_func = eval_func + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = model + quantizer() if __name__ == "__main__": - pass - # unittest.main() + + unittest.main() # def build_hessian_trace(): # hessian_trace_config_yaml = ''' From ed6a1fcd5eb2cf94da2f1f3483de4747166bd0be Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 17 Nov 2022 15:04:32 +0800 Subject: [PATCH 014/128] tiny update --- neural_compressor/strategy/hawq.py | 329 +++++++++++++++++++++------- neural_compressor/strategy/mse.py | 3 +- test/strategy/test_hawq_wenhuach.py | 10 +- 3 files changed, 263 insertions(+), 79 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 17231ceec9d..3db5cf0aed5 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -18,6 +18,9 @@ import copy import numpy as np from collections import OrderedDict + +import torch.nn + from .strategy import strategy_registry, TuneStrategy from ..utils import logger @@ -25,6 +28,154 @@ from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST + +class HessianTrace: + def __init__(self, model, conf, adaptor, op_cfgs_list, dataloader): + self.model = model + self.conf = conf ##config + self.op_cfgs_list = op_cfgs_list ##op to get + self.dataloader = dataloader + self.adaptor = adaptor + self.max_iter = 500 + self.tolerance = 1e-5 + self.eps = 1e-6 + self.index = 0 + + # def apply_init(self): + # trace_per_op = self._cal_trace() + # if not trace_per_op: + # raise RuntimeError('Failed to calculate hessian traces!') + # + # perturbations = self._calc_quantization_noise() + # configuration_metric = self._calc_hawq_metric_per_configuration( + # perturbations, trace_per_op) + # config_index = self.choose_configuration(configuration_metric) + # chosen_config = self.op_cfgs_list[config_index] + # return chosen_config, trace_per_op + + def get_device(self, model: torch.nn.Module): + for n, p in model.named_parameters(): + return p.data.device + + def get_gradient(self, model, data, criterion, op_list, device="cpu", retrain_graph=False): + model.zero_grad() + input = data[0] + target = data[1] + output = model(input) + loss = criterion(output, target) + loss.backward(retain_graph=retrain_graph) + gradients = {} + for n, p in model.named_parameters(): + if n in op_list: + continue + gradients[n] = 0 + if p.grad != None: + gradients[n] = p.grad + return gradients + + def get_avg_trace(self, num_batches=2): + """ + Estimates average hessian trace for each parameter + """ + assert num_batches > 0 + ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] + ##num_all_data = num_data_iter * self.dataloader.batch_size + op_list = [item.name for item in self.op_cfgs_list] + criterion = torch.nn.CrossEntropyLoss() ##TODO setting this in config + device = self.get_device(self.model) + + for step, batch in enumerate(self.dataloader): + gradient_dict = self.get_gradient(self.model, batch,criterion, op_list, device=device, retrain_graph=True) + tmp = 1 + if step == num_batches - 1: + break + + + weight_vhp = [] + w_avg_total_trace = 0. + w_avg_traces_per_iter = [] + mean_avg_traces_per_param = None + act_vhp = [] + a_avg_total_trace = 0. + a_avg_traces_per_iter = [] + mean_avg_traces_per_act = None + + for i in range(max_iter): + weight_vhp_list, w_v, \ + act_vhp_list, a_v, op_act_grad = self.adaptor.get_2order_grad(self.model, + criterion, + self.dataloader, + num_data_iter, + qop_list) + if not weight_vhp: + weight_vhp = [np.random.randn(*p.shape) for p in w_v] + for vhp_curr in weight_vhp_list: + weight_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ + for a, b in zip(weight_vhp, vhp_curr)] + weight_vhp = [a / float(num_all_data) for a in weight_vhp] + avg_traces_per_param = [np.sum(a * b) / a.size for (a, b) in zip(weight_vhp, w_v)] + w_avg_traces_per_iter.append(avg_traces_per_param) + mean_avg_traces_per_param = np.mean(w_avg_traces_per_iter, axis=0) + w_mean_avg_total_trace = np.sum(mean_avg_traces_per_param) + + w_diff_avg = abs(w_mean_avg_total_trace - w_avg_total_trace) / \ + (w_avg_total_trace + diff_eps) + w_avg_total_trace = w_mean_avg_total_trace + logger.info( + '{}# weights difference_avg={} avg_trace={}'.format( + i, w_diff_avg, w_avg_total_trace)) + + if not act_vhp: + act_vhp = [np.random.randn(*p.shape) for p in a_v] + for vhp_curr in act_vhp_list: + act_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ + for a, b in zip(act_vhp, vhp_curr)] + act_vhp = [a / float(num_all_data) for a in act_vhp] + avg_traces_per_act = [np.sum(a * b) / a.size for (a, b) in zip(act_vhp, a_v)] + a_avg_traces_per_iter.append(avg_traces_per_act) + mean_avg_traces_per_act = np.mean(a_avg_traces_per_iter, axis=0) + a_mean_avg_total_trace = np.sum(mean_avg_traces_per_act) + + a_diff_avg = abs(a_mean_avg_total_trace - a_avg_total_trace) / \ + (a_avg_total_trace + diff_eps) + a_avg_total_trace = a_mean_avg_total_trace + logger.info( + '{}# activation difference_avg={} avg_trace={}'.format( + i, a_diff_avg, a_avg_total_trace)) + + if w_diff_avg < tolerance and a_diff_avg < tolerance: + return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad + + return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad + + def _cal_trace(self): + """ + Calculate the trace for both weight and activation per layer + """ + pass + # trace_estimator = HessianTraceEstimator(self.model, + # self.conf, + # self.adaptor, + # self.op_cfgs_list, + # self.dataloader) + # w_avg_trace, a_avg_trace, op_act_grad = trace_estimator.get_avg_trace() + # + # # mapping trace to op per op_weight_mapping + # weights_name = self.adaptor.get_all_weight_names(self.model) + # op_weight_mapping = self.get_op_weight_mapping() + # trace_per_op = OrderedDict() + # w_op_trace_info = np.zeros(len(op_weight_mapping)) + # for i, (op_name, w_name) in enumerate(op_weight_mapping.items()): + # index = weights_name.index(w_name) + # w_op_trace_info[i] = w_avg_trace[index] + # act_trace = 0.0 + # if op_name in op_act_grad: + # a_index = op_act_grad.index(op_name) + # act_trace = a_avg_trace[a_index] + # trace_per_op[op_name] = (w_avg_trace[index], act_trace) + # return trace_per_op + + @strategy_registry class HawqTuneStrategy(TuneStrategy): """The basic tuning strategy which tunes the low precision model with below order. @@ -91,6 +242,37 @@ def __init__(self, model, conf, q_dataloader, q_func=None, q_hooks) def next_tune_cfg(self): + from copy import deepcopy + tuning_space = self.tuning_space + calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] ##TODO suppoprt list + + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + + target_dtype = "fp32" ##TODO support bf16 + target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) + fp_op_list = [item for item in quant_ops if item in target_type_lst] + orig_eval = True + if self._fp32_model.training: + orig_eval = False + self._fp32_model.train() + ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, fp_op_list, self.calib_dataloader) + ht.get_avg_trace() + # if orig_eval: + # self._fp32_model.eval() + # ht.get_avg_trace() + # tmp = 1 + # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + # ops_sensitivity = self.adaptor.get_hessian_trace(self._fp32_model, + # self.calib_dataloader, + # self. + # method_args={'name': 'hessian_trace'}) + # tmp = 1 + + def next_tune_cfg_bk(self): """The generator of yielding next tuning config to traverse by concrete strategies according to last tuning result. @@ -100,84 +282,85 @@ def next_tune_cfg(self): from copy import deepcopy tuning_space = self.tuning_space calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options - for calib_sampling_size in calib_sampling_size_lst: - # Initialize the tuning config for each op according to the quantization approach - op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = False - stage1_cnt = 0 - quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = 1e9 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - for op_tuning_cfg in op_wise_tuning_sampler: - stage1_cnt += 1 - if early_stop_tuning and stage1_cnt > stage1_max: - logger.info("Early stopping the stage 1.") - break + + calib_sampling_size = calib_sampling_size_lst[0] + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = False + stage1_cnt = 0 + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = 1e9 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + # for op_tuning_cfg in op_wise_tuning_sampler: + # stage1_cnt += 1 + # if early_stop_tuning and stage1_cnt > stage1_max: + # logger.info("Early stopping the stage 1.") + # break + # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # yield op_tuning_cfg + # Fallback the ops supported both static and dynamic from static to dynamic + # Tuning items: None + # if self.cfg.quantization.approach == 'post_training_auto_quant': + # static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + # item in tuning_space.query_items_by_quant_mode('dynamic')] + # if static_dynamic_items: + # logger.info("Fallback all ops that support both dynamic and static to dynamic.") + # else: + # logger.info("Non ops that support both dynamic") + # + # new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + # for item in static_dynamic_items: + # new_op_tuning_cfg[item.name] = self.initial_dynamic_cfg_based_on_static_cfg( + # new_op_tuning_cfg[item.name]) + # new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # yield new_op_tuning_cfg + best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) + + # Fallback + for target_dtype in ['bf16', 'fp32']: + target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) + fallback_items_lst = [item for item in quant_ops if item in target_type_lst] + if fallback_items_lst: + logger.info(f"Start to fallback op to {target_dtype} one by one.") + self._fallback_started() + # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, + self.calib_dataloader, + method_args={'name': 'hessian_trace'}) + + fallback_items_name_lst = sorted(ops_sensitivity, key=lambda items: items[1], reverse=True) + + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() + for op_index, op_tuning_cfg in enumerate(fallback_sampler): op_tuning_cfg['calib_sampling_size'] = calib_sampling_size yield op_tuning_cfg - # Fallback the ops supported both static and dynamic from static to dynamic - # Tuning items: None - if self.cfg.quantization.approach == 'post_training_auto_quant': - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] - if static_dynamic_items: - logger.info("Fallback all ops that support both dynamic and static to dynamic.") - else: - logger.info("Non ops that support both dynamic") - - new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) - for item in static_dynamic_items: - new_op_tuning_cfg[item.name] = self.initial_dynamic_cfg_based_on_static_cfg( - new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield new_op_tuning_cfg - best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) - - # Fallback - for target_dtype in ['bf16', 'fp32']: - target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) - fallback_items_lst = [item for item in quant_ops if item in target_type_lst] - if fallback_items_lst: - logger.info(f"Start to fallback op to {target_dtype} one by one.") - self._fallback_started() - #fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, - self.calib_dataloader, - method_args = {'name': 'hessian_trace'}) - fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) - - op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + acc, _ = self.last_tune_result + op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc + + # do accumulated fallback according to the order in the previous stage + if len(op_fallback_acc_impact) > 0: + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) - op_fallback_acc_impact = OrderedDict() - for op_index, op_tuning_cfg in enumerate(fallback_sampler): + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: op_tuning_cfg['calib_sampling_size'] = calib_sampling_size yield op_tuning_cfg - acc, _ = self.last_tune_result - op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc - - - # do accumulated fallback according to the order in the previous stage - if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - logger.info(f"Start to accumulate fallback to {target_dtype}.") - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) - for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield op_tuning_cfg - - def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): + + def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig): op_state = op_static_cfg.get_state() op_name = op_static_cfg.op_name op_type = op_static_cfg.op_type @@ -198,5 +381,3 @@ def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): tuning_item = quant_mode_item.get_option_by_name(att_item) dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) - - \ No newline at end of file diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index 614984359ba..8dafa35759d 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -194,10 +194,11 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = False + early_stop_tuning = True stage1_cnt = 0 int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value + stage1_max=-1 op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index ad7939d5d84..a6ee28b9d4a 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -12,7 +12,7 @@ from neural_compressor.adaptor import FRAMEWORKS import shutil from neural_compressor.strategy.st_utils.hawq_wenhuach import Hawq_top, fix_seed - +from torch.quantization.quantize_fx import fuse_fx fix_seed(1) def build_ptq_yaml(): @@ -41,7 +41,7 @@ def build_ptq_yaml(): f.write(fake_yaml) class TestPytorchAdaptor(unittest.TestCase): - framework_specific_info = {"device": "cpu", + framework_specific_info = {"device": "gpu", "approach": "post_training_static_quant", "random_seed": 1234, "q_dataloader": None, @@ -50,6 +50,7 @@ class TestPytorchAdaptor(unittest.TestCase): adaptor = FRAMEWORKS[framework](framework_specific_info) model = torchvision.models.resnet18() + # from collections import OrderedDict # model = torch.nn.Sequential(OrderedDict([ # ('conv1', torch.nn.Conv2d(3, 2, 1, 1)), @@ -78,10 +79,11 @@ def eval_func(model): return self.i from neural_compressor.experimental import Quantization, common model = copy.deepcopy(self.model) - + model.eval() + model = fuse_fx(model) quantizer = Quantization('ptq_yaml.yaml') quantizer.eval_func = eval_func - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + dataset = quantizer.dataset('dummy', (32, 3, 224, 224), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = model From 883c3a4c4293ff9ac692bd2c9e2bb68ce35d50d4 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 17 Nov 2022 20:01:48 +0800 Subject: [PATCH 015/128] weight hessian trace, not finished --- neural_compressor/strategy/hawq.py | 242 +++++++++++++++++++---------- 1 file changed, 161 insertions(+), 81 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 3db5cf0aed5..4d3b9489b8f 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -27,19 +27,22 @@ from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST - +from torch.quantization.quantize_fx import fuse_fx +import torchvision class HessianTrace: - def __init__(self, model, conf, adaptor, op_cfgs_list, dataloader): + def __init__(self, model, conf, adaptor, weight_list, dataloader): self.model = model self.conf = conf ##config - self.op_cfgs_list = op_cfgs_list ##op to get + self.weight_list = weight_list ##op to get self.dataloader = dataloader self.adaptor = adaptor self.max_iter = 500 self.tolerance = 1e-5 self.eps = 1e-6 self.index = 0 + self.device = self.get_device(self.model) + self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config # def apply_init(self): # trace_per_op = self._cal_trace() @@ -57,22 +60,51 @@ def get_device(self, model: torch.nn.Module): for n, p in model.named_parameters(): return p.data.device - def get_gradient(self, model, data, criterion, op_list, device="cpu", retrain_graph=False): + def get_gradients(self, model, data, criterion, create_graph=False): model.zero_grad() - input = data[0] - target = data[1] + input = data[0].to(self.device) + target = data[1].to(self.device) output = model(input) loss = criterion(output, target) - loss.backward(retain_graph=retrain_graph) - gradients = {} + loss.backward(create_graph=create_graph) + gradients = [] for n, p in model.named_parameters(): - if n in op_list: - continue - gradients[n] = 0 if p.grad != None: - gradients[n] = p.grad + gradient = p.grad + gradients.append(gradient+0.0) ## add 0 to create a copy + model.zero_grad() return gradients + def get_params(self, model): + parameters = [p for p in model.parameters() if p.requires_grad] + return parameters + + def sample_rademacher(self, params): + samples = [] + for param in params: + r = torch.randint_like(param, high=2, device=self.device) + r.masked_fill_(r == 0, -1) + samples.append(r) + return samples + + def hutchinson_one_step(self, params, num_batches): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, batch in enumerate(self.dataloader): + batch_size = batch[0].shape[0] + cnt += batch_size + gradients = self.get_gradients(self.model, batch, self.criterion, create_graph=True) + H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) + H_v = [pre + cur * float(batch_size) + 0.0 for cur, pre in zip(H_v_one, H_v)] + if step == num_batches - 1: + break + if cnt > 0: + H_v = [item / cnt for item in H_v] + v_t_H_v = [torch.sum(h_v * v_t) / h_v.size().numel() for (h_v, v_t) in zip(H_v, v)] + return v_t_H_v + + def get_avg_trace(self, num_batches=2): """ Estimates average hessian trace for each parameter @@ -80,73 +112,75 @@ def get_avg_trace(self, num_batches=2): assert num_batches > 0 ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size - op_list = [item.name for item in self.op_cfgs_list] - criterion = torch.nn.CrossEntropyLoss() ##TODO setting this in config - device = self.get_device(self.model) + op_list = self.weight_list - for step, batch in enumerate(self.dataloader): - gradient_dict = self.get_gradient(self.model, batch,criterion, op_list, device=device, retrain_graph=True) - tmp = 1 - if step == num_batches - 1: - break + ##TODO setting this in config + + + params = [p for p in self.model.parameters() if p.requires_grad] + for i in range(self.max_iter): + trace_estimated = self.hutchinson_one_step(params, num_batches) - weight_vhp = [] - w_avg_total_trace = 0. - w_avg_traces_per_iter = [] - mean_avg_traces_per_param = None - act_vhp = [] - a_avg_total_trace = 0. - a_avg_traces_per_iter = [] - mean_avg_traces_per_act = None - - for i in range(max_iter): - weight_vhp_list, w_v, \ - act_vhp_list, a_v, op_act_grad = self.adaptor.get_2order_grad(self.model, - criterion, - self.dataloader, - num_data_iter, - qop_list) - if not weight_vhp: - weight_vhp = [np.random.randn(*p.shape) for p in w_v] - for vhp_curr in weight_vhp_list: - weight_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ - for a, b in zip(weight_vhp, vhp_curr)] - weight_vhp = [a / float(num_all_data) for a in weight_vhp] - avg_traces_per_param = [np.sum(a * b) / a.size for (a, b) in zip(weight_vhp, w_v)] - w_avg_traces_per_iter.append(avg_traces_per_param) - mean_avg_traces_per_param = np.mean(w_avg_traces_per_iter, axis=0) - w_mean_avg_total_trace = np.sum(mean_avg_traces_per_param) - - w_diff_avg = abs(w_mean_avg_total_trace - w_avg_total_trace) / \ - (w_avg_total_trace + diff_eps) - w_avg_total_trace = w_mean_avg_total_trace - logger.info( - '{}# weights difference_avg={} avg_trace={}'.format( - i, w_diff_avg, w_avg_total_trace)) - - if not act_vhp: - act_vhp = [np.random.randn(*p.shape) for p in a_v] - for vhp_curr in act_vhp_list: - act_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ - for a, b in zip(act_vhp, vhp_curr)] - act_vhp = [a / float(num_all_data) for a in act_vhp] - avg_traces_per_act = [np.sum(a * b) / a.size for (a, b) in zip(act_vhp, a_v)] - a_avg_traces_per_iter.append(avg_traces_per_act) - mean_avg_traces_per_act = np.mean(a_avg_traces_per_iter, axis=0) - a_mean_avg_total_trace = np.sum(mean_avg_traces_per_act) - - a_diff_avg = abs(a_mean_avg_total_trace - a_avg_total_trace) / \ - (a_avg_total_trace + diff_eps) - a_avg_total_trace = a_mean_avg_total_trace - logger.info( - '{}# activation difference_avg={} avg_trace={}'.format( - i, a_diff_avg, a_avg_total_trace)) - - if w_diff_avg < tolerance and a_diff_avg < tolerance: - return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad - - return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad + + tmp = 1 + # + # weight_vhp = [] + # w_avg_total_trace = 0. + # w_avg_traces_per_iter = [] + # mean_avg_traces_per_param = None + # act_vhp = [] + # a_avg_total_trace = 0. + # a_avg_traces_per_iter = [] + # mean_avg_traces_per_act = None + # + # for i in range(self.max_iter): + # weight_vhp_list, w_v, \ + # act_vhp_list, a_v, op_act_grad = self.adaptor.get_2order_grad(self.model, + # criterion, + # self.dataloader, + # num_data_iter, + # qop_list) + # if not weight_vhp: + # weight_vhp = [np.random.randn(*p.shape) for p in w_v] + # for vhp_curr in weight_vhp_list: + # weight_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ + # for a, b in zip(weight_vhp, vhp_curr)] + # weight_vhp = [a / float(num_all_data) for a in weight_vhp] + # avg_traces_per_param = [np.sum(a * b) / a.size for (a, b) in zip(weight_vhp, w_v)] + # w_avg_traces_per_iter.append(avg_traces_per_param) + # mean_avg_traces_per_param = np.mean(w_avg_traces_per_iter, axis=0) + # w_mean_avg_total_trace = np.sum(mean_avg_traces_per_param) + # + # w_diff_avg = abs(w_mean_avg_total_trace - w_avg_total_trace) / \ + # (w_avg_total_trace + diff_eps) + # w_avg_total_trace = w_mean_avg_total_trace + # logger.info( + # '{}# weights difference_avg={} avg_trace={}'.format( + # i, w_diff_avg, w_avg_total_trace)) + # + # if not act_vhp: + # act_vhp = [np.random.randn(*p.shape) for p in a_v] + # for vhp_curr in act_vhp_list: + # act_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ + # for a, b in zip(act_vhp, vhp_curr)] + # act_vhp = [a / float(num_all_data) for a in act_vhp] + # avg_traces_per_act = [np.sum(a * b) / a.size for (a, b) in zip(act_vhp, a_v)] + # a_avg_traces_per_iter.append(avg_traces_per_act) + # mean_avg_traces_per_act = np.mean(a_avg_traces_per_iter, axis=0) + # a_mean_avg_total_trace = np.sum(mean_avg_traces_per_act) + # + # a_diff_avg = abs(a_mean_avg_total_trace - a_avg_total_trace) / \ + # (a_avg_total_trace + diff_eps) + # a_avg_total_trace = a_mean_avg_total_trace + # logger.info( + # '{}# activation difference_avg={} avg_trace={}'.format( + # i, a_diff_avg, a_avg_total_trace)) + # + # if w_diff_avg < tolerance and a_diff_avg < tolerance: + # return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad + # + # return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad def _cal_trace(self): """ @@ -241,6 +275,46 @@ def __init__(self, model, conf, q_dataloader, q_func=None, dicts, q_hooks) + def is_fused_module(self, module): + """This is a helper function for `_propagate_qconfig_helper` to detecte + if this module is fused. + Args: + module (object): input module + Returns: + (bool): is fused or not + """ + op_type = str(type(module)) + if 'fused' in op_type: + return True + else: + return False + + def get_fused_mapping(self): + # tmp = self.model + # if isinstance(self._fp32_model, torch.nn.Module): + # fx_model = self._fp32_model + # + # model = copy.deepcopy(self._fp32_model) ##orig model + # model.eval() + # fx_model = fuse_fx(model) + model = self._fp32_model + weights_info = dict(model.named_parameters()) + weight_to_op = {} + + module_dict = dict(model.named_modules()) + for op_name, child in model.named_modules(): + if self.is_fused_module(child): + for name, _ in child.named_children(): + if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right + weight_to_op[op_name + "." + name + ".weight"] = op_name + # module_prefix = op_name + '.' + name + # if module_prefix in module_dict: + # module_dict.pop(module_prefix) # remove sub-modules of fused modules + else: + if op_name + ".weight" in weights_info: + weight_to_op[op_name + ".weight"] = op_name + return weight_to_op + def next_tune_cfg(self): from copy import deepcopy tuning_space = self.tuning_space @@ -254,16 +328,21 @@ def next_tune_cfg(self): target_dtype = "fp32" ##TODO support bf16 target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) - fp_op_list = [item for item in quant_ops if item in target_type_lst] + fp_op_list = [item.name for item in quant_ops if item in target_type_lst] + # for n, p in self._fp32_model.named_modules(): + # print(n) + # for n, p in self._fp32_model.named_parameters(): + # print(n) + weight_to_op = self.get_fused_mapping() orig_eval = True if self._fp32_model.training: orig_eval = False self._fp32_model.train() - ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, fp_op_list, self.calib_dataloader) + ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, weight_to_op.keys(), self.calib_dataloader) ht.get_avg_trace() - # if orig_eval: - # self._fp32_model.eval() - # ht.get_avg_trace() + if orig_eval: + self._fp32_model.eval() + # tmp = 1 # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up # ops_sensitivity = self.adaptor.get_hessian_trace(self._fp32_model, @@ -338,6 +417,7 @@ def next_tune_cfg_bk(self): fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], initial_op_tuning_cfg=initial_op_tuning_cfg, op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): op_tuning_cfg['calib_sampling_size'] = calib_sampling_size From a50cc143a84d1b35ab0a0baf16d05c819e7b6164 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Fri, 18 Nov 2022 11:16:25 +0800 Subject: [PATCH 016/128] bascially finished weight trace --- neural_compressor/strategy/hawq.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 4d3b9489b8f..39fd93fd3ff 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -96,19 +96,20 @@ def hutchinson_one_step(self, params, num_batches): cnt += batch_size gradients = self.get_gradients(self.model, batch, self.criterion, create_graph=True) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) - H_v = [pre + cur * float(batch_size) + 0.0 for cur, pre in zip(H_v_one, H_v)] + H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if step == num_batches - 1: break if cnt > 0: H_v = [item / cnt for item in H_v] - v_t_H_v = [torch.sum(h_v * v_t) / h_v.size().numel() for (h_v, v_t) in zip(H_v, v)] + v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)])##maybe sum is better return v_t_H_v - def get_avg_trace(self, num_batches=2): + + def get_avg_traces(self, num_batches=2): + """ + Estimates average hessian trace for each parameter """ - Estimates average hessian trace for each parameter - """ assert num_batches > 0 ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size @@ -119,8 +120,21 @@ def get_avg_trace(self, num_batches=2): params = [p for p in self.model.parameters() if p.requires_grad] + layer_traces_per_iter = [] + prev_avg_model_trace = 0 for i in range(self.max_iter): - trace_estimated = self.hutchinson_one_step(params, num_batches) + layer_traces = self.hutchinson_one_step(params, num_batches) + layer_traces_per_iter.append(layer_traces) + layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) + model_trace = torch.sum(layer_traces_estimate) + diff_ratio = abs(model_trace-prev_avg_model_trace)/(prev_avg_model_trace+self.eps) + if diff_ratio < self.tolerance and i > 10:##TODO magic number + break + prev_avg_model_trace = model_trace + + layer_traces = layer_traces_estimate + return layer_traces + tmp = 1 @@ -339,7 +353,7 @@ def next_tune_cfg(self): orig_eval = False self._fp32_model.train() ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, weight_to_op.keys(), self.calib_dataloader) - ht.get_avg_trace() + ht.get_avg_traces() if orig_eval: self._fp32_model.eval() From 2528605a655dad9bd9cde5f07fcd53d5e3e5ea50 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Fri, 18 Nov 2022 15:45:52 +0800 Subject: [PATCH 017/128] enable activation gradient hook, activation trace is not finished --- neural_compressor/strategy/hawq.py | 266 +++++++++++------------------ 1 file changed, 101 insertions(+), 165 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 39fd93fd3ff..34d94901167 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -28,42 +28,73 @@ from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST from torch.quantization.quantize_fx import fuse_fx -import torchvision +import torchvision + class HessianTrace: - def __init__(self, model, conf, adaptor, weight_list, dataloader): - self.model = model - self.conf = conf ##config - self.weight_list = weight_list ##op to get + """ + please refer to + Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." 2020 IEEE international conference on big data (Big data). IEEE, 2020. + Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." Advances in neural information processing systems 33 (2020): 18518-18529. + https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py + """ + + def __init__(self, model, dataloader, criterion=None): + self.model = model ##TODO need to check fused or not self.dataloader = dataloader - self.adaptor = adaptor self.max_iter = 500 self.tolerance = 1e-5 self.eps = 1e-6 self.index = 0 self.device = self.get_device(self.model) - self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config - - # def apply_init(self): - # trace_per_op = self._cal_trace() - # if not trace_per_op: - # raise RuntimeError('Failed to calculate hessian traces!') - # - # perturbations = self._calc_quantization_noise() - # configuration_metric = self._calc_hawq_metric_per_configuration( - # perturbations, trace_per_op) - # config_index = self.choose_configuration(configuration_metric) - # chosen_config = self.op_cfgs_list[config_index] - # return chosen_config, trace_per_op + self.criterion = criterion + if self.criterion == None: + self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config + self.criterion = self.criterion.to(self.device) + self.weight_to_op, self.op_list = self.get_fused_mapping() + + def is_fused_module(self, module): + """This is a helper function for `_propagate_qconfig_helper` to detecte + if this module is fused. + Args: + module (object): input module + Returns: + (bool): is fused or not + """ + op_type = str(type(module)) + if 'fused' in op_type: + return True + else: + return False + + def get_fused_mapping(self): + model = self.model + weights_info = dict(model.named_parameters()) + weight_to_op = {} + for op_name, child in model.named_modules(): + if self.is_fused_module(child): + for name, _ in child.named_children(): + if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right + weight_to_op[op_name + "." + name + ".weight"] = op_name + break + else: + if op_name + ".weight" in weights_info: + weight_to_op[op_name + ".weight"] = op_name + op_list = [] + for key in weight_to_op.keys(): + op_list.append(weight_to_op[key]) + return weight_to_op, op_list def get_device(self, model: torch.nn.Module): for n, p in model.named_parameters(): return p.data.device - def get_gradients(self, model, data, criterion, create_graph=False): + def get_gradients(self, model, data, criterion, create_graph=False, enable_act=False): model.zero_grad() input = data[0].to(self.device) target = data[1].to(self.device) + if enable_act: + input.requires_grad = True output = model(input) loss = criterion(output, target) loss.backward(create_graph=create_graph) @@ -71,7 +102,7 @@ def get_gradients(self, model, data, criterion, create_graph=False): for n, p in model.named_parameters(): if p.grad != None: gradient = p.grad - gradients.append(gradient+0.0) ## add 0 to create a copy + gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() return gradients @@ -87,143 +118,88 @@ def sample_rademacher(self, params): samples.append(r) return samples - def hutchinson_one_step(self, params, num_batches): + def hutchinson_one_step(self, params, enable_act, num_batches): v = self.sample_rademacher(params) H_v = [0] * len(v) cnt = 0 - for step, batch in enumerate(self.dataloader): - batch_size = batch[0].shape[0] + for step, data in enumerate(self.dataloader): + batch_size = data[0].shape[0] cnt += batch_size - gradients = self.get_gradients(self.model, batch, self.criterion, create_graph=True) + gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True,enable_act=enable_act) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) - H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] + H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if step == num_batches - 1: break if cnt > 0: H_v = [item / cnt for item in H_v] - v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)])##maybe sum is better + v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v + def backward_hook(self, name): + def grad_hook(model, grad_input, grad_output): + self.layer_acts_grads[name] = [grad_input, grad_output] + return grad_hook + + def forward_hook(self, name): + def enable_input_grad_hook(model, inputs, outputs): + try: + input = inputs[0]##TODO check whether this is right + except: + input = inputs - def get_avg_traces(self, num_batches=2): + if input.is_leaf == False: + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = input + + return enable_input_grad_hook + + def register_hook(self): + for name, module in self.model.named_modules(): + if name in self.op_list: + forward_handle = module.register_forward_hook(self.forward_hook(name)) + backward_handle = module.register_backward_hook(self.backward_hook(name)) + self.hook_handlers.append(forward_handle) + self.hook_handlers.append(backward_handle) + + def unregister_hook(self): + for handel in self.hook_handlers: + handel.remove() + + def get_avg_traces(self, enable_act=True, num_batches=2): """ Estimates average hessian trace for each parameter """ assert num_batches > 0 + if enable_act: + self.hook_handlers = [] + self.layer_acts = {} + self.layer_acts_grads = {} + self.register_hook() ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size - op_list = self.weight_list - + ##op_list = self.op_list ##TODO setting this in config - - params = [p for p in self.model.parameters() if p.requires_grad] layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.hutchinson_one_step(params, num_batches) + layer_traces = self.hutchinson_one_step(params, enable_act, num_batches ) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) - diff_ratio = abs(model_trace-prev_avg_model_trace)/(prev_avg_model_trace+self.eps) - if diff_ratio < self.tolerance and i > 10:##TODO magic number + diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) + if diff_ratio < self.tolerance and i > 10: ##TODO magic number break prev_avg_model_trace = model_trace layer_traces = layer_traces_estimate + self.unregister_hook() return layer_traces - - tmp = 1 - # - # weight_vhp = [] - # w_avg_total_trace = 0. - # w_avg_traces_per_iter = [] - # mean_avg_traces_per_param = None - # act_vhp = [] - # a_avg_total_trace = 0. - # a_avg_traces_per_iter = [] - # mean_avg_traces_per_act = None - # - # for i in range(self.max_iter): - # weight_vhp_list, w_v, \ - # act_vhp_list, a_v, op_act_grad = self.adaptor.get_2order_grad(self.model, - # criterion, - # self.dataloader, - # num_data_iter, - # qop_list) - # if not weight_vhp: - # weight_vhp = [np.random.randn(*p.shape) for p in w_v] - # for vhp_curr in weight_vhp_list: - # weight_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ - # for a, b in zip(weight_vhp, vhp_curr)] - # weight_vhp = [a / float(num_all_data) for a in weight_vhp] - # avg_traces_per_param = [np.sum(a * b) / a.size for (a, b) in zip(weight_vhp, w_v)] - # w_avg_traces_per_iter.append(avg_traces_per_param) - # mean_avg_traces_per_param = np.mean(w_avg_traces_per_iter, axis=0) - # w_mean_avg_total_trace = np.sum(mean_avg_traces_per_param) - # - # w_diff_avg = abs(w_mean_avg_total_trace - w_avg_total_trace) / \ - # (w_avg_total_trace + diff_eps) - # w_avg_total_trace = w_mean_avg_total_trace - # logger.info( - # '{}# weights difference_avg={} avg_trace={}'.format( - # i, w_diff_avg, w_avg_total_trace)) - # - # if not act_vhp: - # act_vhp = [np.random.randn(*p.shape) for p in a_v] - # for vhp_curr in act_vhp_list: - # act_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ - # for a, b in zip(act_vhp, vhp_curr)] - # act_vhp = [a / float(num_all_data) for a in act_vhp] - # avg_traces_per_act = [np.sum(a * b) / a.size for (a, b) in zip(act_vhp, a_v)] - # a_avg_traces_per_iter.append(avg_traces_per_act) - # mean_avg_traces_per_act = np.mean(a_avg_traces_per_iter, axis=0) - # a_mean_avg_total_trace = np.sum(mean_avg_traces_per_act) - # - # a_diff_avg = abs(a_mean_avg_total_trace - a_avg_total_trace) / \ - # (a_avg_total_trace + diff_eps) - # a_avg_total_trace = a_mean_avg_total_trace - # logger.info( - # '{}# activation difference_avg={} avg_trace={}'.format( - # i, a_diff_avg, a_avg_total_trace)) - # - # if w_diff_avg < tolerance and a_diff_avg < tolerance: - # return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad - # - # return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad - - def _cal_trace(self): - """ - Calculate the trace for both weight and activation per layer - """ - pass - # trace_estimator = HessianTraceEstimator(self.model, - # self.conf, - # self.adaptor, - # self.op_cfgs_list, - # self.dataloader) - # w_avg_trace, a_avg_trace, op_act_grad = trace_estimator.get_avg_trace() - # - # # mapping trace to op per op_weight_mapping - # weights_name = self.adaptor.get_all_weight_names(self.model) - # op_weight_mapping = self.get_op_weight_mapping() - # trace_per_op = OrderedDict() - # w_op_trace_info = np.zeros(len(op_weight_mapping)) - # for i, (op_name, w_name) in enumerate(op_weight_mapping.items()): - # index = weights_name.index(w_name) - # w_op_trace_info[i] = w_avg_trace[index] - # act_trace = 0.0 - # if op_name in op_act_grad: - # a_index = op_act_grad.index(op_name) - # act_trace = a_avg_trace[a_index] - # trace_per_op[op_name] = (w_avg_trace[index], act_trace) - # return trace_per_op - - @strategy_registry class HawqTuneStrategy(TuneStrategy): """The basic tuning strategy which tunes the low precision model with below order. @@ -289,46 +265,6 @@ def __init__(self, model, conf, q_dataloader, q_func=None, dicts, q_hooks) - def is_fused_module(self, module): - """This is a helper function for `_propagate_qconfig_helper` to detecte - if this module is fused. - Args: - module (object): input module - Returns: - (bool): is fused or not - """ - op_type = str(type(module)) - if 'fused' in op_type: - return True - else: - return False - - def get_fused_mapping(self): - # tmp = self.model - # if isinstance(self._fp32_model, torch.nn.Module): - # fx_model = self._fp32_model - # - # model = copy.deepcopy(self._fp32_model) ##orig model - # model.eval() - # fx_model = fuse_fx(model) - model = self._fp32_model - weights_info = dict(model.named_parameters()) - weight_to_op = {} - - module_dict = dict(model.named_modules()) - for op_name, child in model.named_modules(): - if self.is_fused_module(child): - for name, _ in child.named_children(): - if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - weight_to_op[op_name + "." + name + ".weight"] = op_name - # module_prefix = op_name + '.' + name - # if module_prefix in module_dict: - # module_dict.pop(module_prefix) # remove sub-modules of fused modules - else: - if op_name + ".weight" in weights_info: - weight_to_op[op_name + ".weight"] = op_name - return weight_to_op - def next_tune_cfg(self): from copy import deepcopy tuning_space = self.tuning_space @@ -347,12 +283,12 @@ def next_tune_cfg(self): # print(n) # for n, p in self._fp32_model.named_parameters(): # print(n) - weight_to_op = self.get_fused_mapping() + orig_eval = True if self._fp32_model.training: orig_eval = False self._fp32_model.train() - ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, weight_to_op.keys(), self.calib_dataloader) + ht = HessianTrace(self._fp32_model, self.calib_dataloader) ht.get_avg_traces() if orig_eval: self._fp32_model.eval() From abbc4ae53e66c32a8f2ff20ee0316b562c2dda92 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Fri, 18 Nov 2022 15:47:25 +0800 Subject: [PATCH 018/128] reformat code --- neural_compressor/strategy/hawq.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 34d94901167..8ec728337b9 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -102,7 +102,7 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F for n, p in model.named_parameters(): if p.grad != None: gradient = p.grad - gradients.append(gradient + 0.0) ## add 0 to create a copy + gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() return gradients @@ -125,7 +125,7 @@ def hutchinson_one_step(self, params, enable_act, num_batches): for step, data in enumerate(self.dataloader): batch_size = data[0].shape[0] cnt += batch_size - gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True,enable_act=enable_act) + gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True, enable_act=enable_act) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if step == num_batches - 1: @@ -135,16 +135,16 @@ def hutchinson_one_step(self, params, enable_act, num_batches): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v - def backward_hook(self, name): def grad_hook(model, grad_input, grad_output): self.layer_acts_grads[name] = [grad_input, grad_output] + return grad_hook def forward_hook(self, name): def enable_input_grad_hook(model, inputs, outputs): try: - input = inputs[0]##TODO check whether this is right + input = inputs[0] ##TODO check whether this is right except: input = inputs @@ -167,7 +167,7 @@ def unregister_hook(self): for handel in self.hook_handlers: handel.remove() - def get_avg_traces(self, enable_act=True, num_batches=2): + def get_avg_traces(self, enable_act=False, num_batches=2): """ Estimates average hessian trace for each parameter """ @@ -186,7 +186,7 @@ def get_avg_traces(self, enable_act=True, num_batches=2): layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.hutchinson_one_step(params, enable_act, num_batches ) + layer_traces = self.hutchinson_one_step(params, enable_act, num_batches) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) From 58128ec3db00e65f3a8ef0cb05b364038ecd2623 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Fri, 18 Nov 2022 15:54:48 +0800 Subject: [PATCH 019/128] fix a bug --- neural_compressor/strategy/hawq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 8ec728337b9..7d2331af345 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -196,7 +196,8 @@ def get_avg_traces(self, enable_act=False, num_batches=2): prev_avg_model_trace = model_trace layer_traces = layer_traces_estimate - self.unregister_hook() + if enable_act: + self.unregister_hook() return layer_traces From 26538ee995c7b22275342955fcc3ea9cb4c88f18 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 11:06:43 +0800 Subject: [PATCH 020/128] when reset the required grad, something goes wrong --- neural_compressor/strategy/hawq.py | 67 ++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 7d2331af345..228bb249e2a 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -92,6 +92,7 @@ def get_device(self, model: torch.nn.Module): def get_gradients(self, model, data, criterion, create_graph=False, enable_act=False): model.zero_grad() input = data[0].to(self.device) + ##self._input_shape = input.shape ## for resetting input activation target = data[1].to(self.device) if enable_act: input.requires_grad = True @@ -102,7 +103,7 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F for n, p in model.named_parameters(): if p.grad != None: gradient = p.grad - gradients.append(gradient + 0.0) ## add 0 to create a copy + gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() return gradients @@ -118,7 +119,7 @@ def sample_rademacher(self, params): samples.append(r) return samples - def hutchinson_one_step(self, params, enable_act, num_batches): + def get_hv_one_sample(self, params, enable_act, num_batches): v = self.sample_rademacher(params) H_v = [0] * len(v) cnt = 0 @@ -135,19 +136,17 @@ def hutchinson_one_step(self, params, enable_act, num_batches): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v - def backward_hook(self, name): - def grad_hook(model, grad_input, grad_output): + def _get_input_grad_hook(self, name): + def input_grad_hook(model, grad_input, grad_output): self.layer_acts_grads[name] = [grad_input, grad_output] + return input_grad_hook - return grad_hook - - def forward_hook(self, name): + def _get_enable_input_grad_hook(self, name): def enable_input_grad_hook(model, inputs, outputs): try: input = inputs[0] ##TODO check whether this is right except: input = inputs - if input.is_leaf == False: if input.requires_grad is False: input.requires_grad = True @@ -155,28 +154,54 @@ def enable_input_grad_hook(model, inputs, outputs): return enable_input_grad_hook - def register_hook(self): + # def _get_disable_input_grad_hook(self, name): + # def disable_input_grad_hook(model, inputs, outputs): + # try: + # input = inputs[0] ##TODO check whether this is right + # except: + # input = inputs + # if input.is_leaf == False:## you can only change requires_grad flags of leaf variables + # if input.requires_grad is True: + # input.requires_grad = False + # + # + # return disable_input_grad_hook + + + def _unregister_hook(self): + for handel in self.hook_handles: + handel.remove() + + def register_input_grad_hooks(self): for name, module in self.model.named_modules(): if name in self.op_list: - forward_handle = module.register_forward_hook(self.forward_hook(name)) - backward_handle = module.register_backward_hook(self.backward_hook(name)) - self.hook_handlers.append(forward_handle) - self.hook_handlers.append(backward_handle) + hook_handle = module.register_forward_hook(self._get_enable_input_grad_hook(name)) + self.hook_handles.append(hook_handle) + hook_handle = module.register_forward_hook(self._get_input_grad_hook(name)) + self.hook_handles.append(hook_handle) + + + def reset_input_gradient_and_hooks(self): + # tmp_input = torch.zeros(self._input_shape, device=self.device) + # for name, module in self.model.named_modules(): + # if name in self.op_list: + # hook_handle = module.register_forward_hook(self._get_disable_input_grad_hook(name)) + # self.hook_handles.append(hook_handle) + # self.model(tmp_input) + self._unregister_hook() + - def unregister_hook(self): - for handel in self.hook_handlers: - handel.remove() - def get_avg_traces(self, enable_act=False, num_batches=2): + def get_avg_traces(self, enable_act=True, num_batches=2): """ Estimates average hessian trace for each parameter """ assert num_batches > 0 if enable_act: - self.hook_handlers = [] + self.hook_handles = [] self.layer_acts = {} self.layer_acts_grads = {} - self.register_hook() + self.register_input_grad_hooks() ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size ##op_list = self.op_list @@ -186,7 +211,7 @@ def get_avg_traces(self, enable_act=False, num_batches=2): layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.hutchinson_one_step(params, enable_act, num_batches) + layer_traces = self.get_hv_one_sample(params, enable_act, num_batches) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) @@ -197,7 +222,7 @@ def get_avg_traces(self, enable_act=False, num_batches=2): layer_traces = layer_traces_estimate if enable_act: - self.unregister_hook() + self.reset_input_gradient_and_hooks() return layer_traces From 8710a690fd1dde7ebb769623a35e4ada5911417a Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 15:13:01 +0800 Subject: [PATCH 021/128] add trick imagenet dataset fix one issue --- .../experimental/quantization.py | 32 +++++++++++++++++++ neural_compressor/strategy/hawq.py | 3 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 3a4f822c905..4fa143fc5c8 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -143,6 +143,38 @@ def pre_process(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ + + import torchvision.datasets as datasets + import torchvision.transforms as transforms + data_path = "/mnt/data2/dataset/dataset/imagenet/img_raw" + traindir = os.path.join(data_path, 'train') + valdir = os.path.join(data_path, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + val_dataset = datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + from torch.utils.data import DataLoader + + self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) + self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) + self.strategy = STRATEGIES[strategy]( self._model, self.conf, diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 228bb249e2a..2beef8668b4 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -98,6 +98,7 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F input.requires_grad = True output = model(input) loss = criterion(output, target) + # torch.autograd.backward(loss, create_graph=create_graph) loss.backward(create_graph=create_graph) gradients = [] for n, p in model.named_parameters(): @@ -177,7 +178,7 @@ def register_input_grad_hooks(self): if name in self.op_list: hook_handle = module.register_forward_hook(self._get_enable_input_grad_hook(name)) self.hook_handles.append(hook_handle) - hook_handle = module.register_forward_hook(self._get_input_grad_hook(name)) + hook_handle = module.register_backward_hook(self._get_input_grad_hook(name)) self.hook_handles.append(hook_handle) From cda302943bc532bdb3b80e6a3f9aeabeeab69acc Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 15:13:22 +0800 Subject: [PATCH 022/128] fix fuese issue --- neural_compressor/adaptor/pytorch.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 097b9359f93..8e89de9fb8b 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3104,8 +3104,15 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): Returns: None """ - + module_dict = dict(model.named_modules()) for op_name, child in model.named_modules(): + if self.is_fused_module(child): + for name, _ in child.named_children(): + module_prefix = op_name + '.' + name + if module_prefix in module_dict: + module_dict.pop(module_prefix) # remove sub-modules of fused modules + + for op_name, child in module_dict.items(): if type(child) in self.white_list \ and type(child) != torch.nn.Sequential \ and type(child) != torch.quantization.stubs.DeQuantStub: From df3c6e059f713ded00dda7ed1d0c2195c49820dd Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 18:51:51 +0800 Subject: [PATCH 023/128] change to eval model, remove bias --- .../quantization/ptq/cpu/fx/conf.yaml | 12 +++-- .../experimental/quantization.py | 2 +- .../strategy/auto_mixed_precision.py | 1 + neural_compressor/strategy/hawq.py | 46 ++++++++++++++----- .../strategy/st_utils/hawq_wenhuach.py | 2 +- neural_compressor/strategy/strategy.py | 3 +- test/strategy/test_hawq_wenhuach.py | 2 +- 7 files changed, 47 insertions(+), 21 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml index d1dab0d2f43..064656e872b 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml @@ -21,10 +21,10 @@ quantization: # optional. tuning constrai calibration: sampling_size: 300 # optional. default value is 100. used to set how many samples should be used in calibration. dataloader: - batch_size: 30 + batch_size: 1 dataset: ImageFolder: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to calibration dataset location if needed transform: Resize: size: 256 @@ -40,10 +40,10 @@ evaluation: # optional. required if use metric: topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. dataloader: - batch_size: 30 + batch_size: 1 dataset: ImageFolder: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -61,7 +61,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -73,6 +73,8 @@ evaluation: # optional. required if use std: [0.229, 0.224, 0.225] tuning: + strategy: + name: hawq accuracy_criterion: relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. exit_policy: diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 4fa143fc5c8..7e8e8cfbbac 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -146,7 +146,7 @@ def pre_process(self): import torchvision.datasets as datasets import torchvision.transforms as transforms - data_path = "/mnt/data2/dataset/dataset/imagenet/img_raw" + data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" traindir = os.path.join(data_path, 'train') valdir = os.path.join(data_path, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 4b59cf2cced..7fbd759a87e 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -145,6 +145,7 @@ def traverse(self): if self.baseline is None and (self.eval_dataloader or self.eval_func): logger.info("Get FP32 model baseline.") self.baseline = self._evaluate(self.model) + self.baseline=[0.698,[700]] # record the FP32 baseline self._add_tuning_history() diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 2beef8668b4..09f0b1ef175 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -102,15 +102,15 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F loss.backward(create_graph=create_graph) gradients = [] for n, p in model.named_parameters(): - if p.grad != None: + if p.grad != None and n in self.weight_names: gradient = p.grad gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() return gradients - def get_params(self, model): - parameters = [p for p in model.parameters() if p.requires_grad] - return parameters + # def get_params(self, model): + # parameters = [p for p in model.parameters() if p.requires_grad] + # return parameters def sample_rademacher(self, params): samples = [] @@ -191,9 +191,13 @@ def reset_input_gradient_and_hooks(self): # self.model(tmp_input) self._unregister_hook() + def get_params(self): + weight_names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias + params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias + self.weight_names = weight_names + self.params = params - - def get_avg_traces(self, enable_act=True, num_batches=2): + def get_avg_traces(self, enable_act=False, num_batches=2): """ Estimates average hessian trace for each parameter """ @@ -207,18 +211,22 @@ def get_avg_traces(self, enable_act=True, num_batches=2): ##num_all_data = num_data_iter * self.dataloader.batch_size ##op_list = self.op_list ##TODO setting this in config - params = [p for p in self.model.parameters() if p.requires_grad] + self.get_params() + # names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + # params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.get_hv_one_sample(params, enable_act, num_batches) + layer_traces = self.get_hv_one_sample(self.params, enable_act, num_batches) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) if diff_ratio < self.tolerance and i > 10: ##TODO magic number break + if i==50:##TODO for debug + break prev_avg_model_trace = model_trace layer_traces = layer_traces_estimate @@ -314,11 +322,25 @@ def next_tune_cfg(self): orig_eval = True if self._fp32_model.training: orig_eval = False - self._fp32_model.train() + self._fp32_model.eval() ht = HessianTrace(self._fp32_model, self.calib_dataloader) - ht.get_avg_traces() - if orig_eval: - self._fp32_model.eval() + traces = ht.get_avg_traces() + if orig_eval==False: + self._fp32_model.train() + + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + # tmp = 1 # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up diff --git a/neural_compressor/strategy/st_utils/hawq_wenhuach.py b/neural_compressor/strategy/st_utils/hawq_wenhuach.py index 6c74401c5fc..c0ced2af3f4 100644 --- a/neural_compressor/strategy/st_utils/hawq_wenhuach.py +++ b/neural_compressor/strategy/st_utils/hawq_wenhuach.py @@ -10,7 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from ..utils import logger +from ...utils import logger import torch import numpy as np from torch.autograd import Variable diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 8c959023bf0..c5db10a4d1b 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -219,7 +219,8 @@ def traverse(self): if self.baseline is None: logger.info("Get FP32 model baseline.") self._fp32_model = self.model - self.baseline = self._evaluate(self.model) + ##self.baseline = self._evaluate(self.model) + self.baseline = [0.698,[700]] # record the FP32 baseline self._add_tuning_history() self.show_baseline_info() diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index a6ee28b9d4a..236d8219e71 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -11,7 +11,7 @@ from neural_compressor.adaptor.pytorch import TemplateAdaptor from neural_compressor.adaptor import FRAMEWORKS import shutil -from neural_compressor.strategy.st_utils.hawq_wenhuach import Hawq_top, fix_seed +from neural_compressor.strategy.st_utils.hawq_wenhuach import fix_seed from torch.quantization.quantize_fx import fuse_fx fix_seed(1) From 084b4def57518b000d5b31f794c8d9eb40e0ef9e Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 19:16:10 +0800 Subject: [PATCH 024/128] fixed weight to op bug --- neural_compressor/strategy/hawq.py | 41 ++++++++++++++++++----------- test/strategy/test_hawq_wenhuach.py | 2 +- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 09f0b1ef175..015d9e678c1 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -75,11 +75,12 @@ def get_fused_mapping(self): if self.is_fused_module(child): for name, _ in child.named_children(): if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - weight_to_op[op_name + "." + name + ".weight"] = op_name + weight_to_op[op_name + "." + name + ".weight"] = op_name[7:] break else: - if op_name + ".weight" in weights_info: - weight_to_op[op_name + ".weight"] = op_name + name = op_name + ".weight" + if name in weights_info and name not in weight_to_op.keys(): + weight_to_op[op_name + ".weight"] = op_name[7:] op_list = [] for key in weight_to_op.keys(): op_list.append(weight_to_op[key]) @@ -232,7 +233,15 @@ def get_avg_traces(self, enable_act=False, num_batches=2): layer_traces = layer_traces_estimate if enable_act: self.reset_input_gradient_and_hooks() - return layer_traces + weight_name_to_traces={} + + for weigth_name,trace in zip(self.weight_names, layer_traces): + weight_name_to_traces[weigth_name] = trace + op_name_to_trace={} + for weigth_name in self.weight_names: + op_name = self.weight_to_op[weigth_name] + op_name_to_trace[op_name] = weight_name_to_traces[weigth_name] + return op_name_to_trace @strategy_registry @@ -328,18 +337,18 @@ def next_tune_cfg(self): if orig_eval==False: self._fp32_model.train() - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - logger.info(f"Start to accumulate fallback to {target_dtype}.") - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) - for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield op_tuning_cfg + # ordered_ops = sorted(op_fallback_acc_impact.keys(), + # key=lambda key: op_fallback_acc_impact[key], + # reverse=self.higher_is_better) + # op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + # logger.info(f"Start to accumulate fallback to {target_dtype}.") + # initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + # fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + # initial_op_tuning_cfg=initial_op_tuning_cfg, + # op_dtypes=op_dtypes, accumulate=True) + # for op_tuning_cfg in fallback_sampler: + # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # yield op_tuning_cfg # tmp = 1 diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index 236d8219e71..a09c83c3452 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -13,7 +13,7 @@ import shutil from neural_compressor.strategy.st_utils.hawq_wenhuach import fix_seed from torch.quantization.quantize_fx import fuse_fx -fix_seed(1) +# fix_seed(1) def build_ptq_yaml(): fake_yaml = ''' From 4f0961d11eecabb44d624bdf14ffc062d246eb6b Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 19:44:42 +0800 Subject: [PATCH 025/128] still have issues --- neural_compressor/strategy/hawq.py | 16 +++++++++++++++- .../strategy/st_utils/tuning_sampler.py | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 015d9e678c1..dbbaa98e931 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -333,10 +333,24 @@ def next_tune_cfg(self): orig_eval = False self._fp32_model.eval() ht = HessianTrace(self._fp32_model, self.calib_dataloader) - traces = ht.get_avg_traces() + op_to_traces = ht.get_avg_traces() if orig_eval==False: self._fp32_model.train() + ordered_ops = sorted(op_to_traces.keys(), + key=lambda key: op_to_traces[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(ordered_ops))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=None, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_size + yield op_tuning_cfg + + # ordered_ops = sorted(op_fallback_acc_impact.keys(), # key=lambda key: op_fallback_acc_impact[key], # reverse=self.higher_is_better) diff --git a/neural_compressor/strategy/st_utils/tuning_sampler.py b/neural_compressor/strategy/st_utils/tuning_sampler.py index fea140a9e4d..c583f1c2764 100644 --- a/neural_compressor/strategy/st_utils/tuning_sampler.py +++ b/neural_compressor/strategy/st_utils/tuning_sampler.py @@ -263,7 +263,7 @@ def __init__(self, def __iter__(self): new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) - skip_first = True + skip_first = False for op_name_type, target_dtype in self.op_dtypes.items(): if not self.accumulate: new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) From 16bd68ecc3b73afdf6613c351ae2c149d2a51bcd Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 22 Nov 2022 11:01:22 +0800 Subject: [PATCH 026/128] WA for align the op name --- neural_compressor/strategy/hawq.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index dbbaa98e931..bc042f06b2c 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -316,7 +316,22 @@ def next_tune_cfg(self): # Initialize the tuning config for each op according to the quantization approach op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = True + stage1_cnt = 0 + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = 2 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_size + yield op_tuning_cfg + # Fallback the ops supported both static and dynamic from static to dynamic quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] @@ -340,11 +355,16 @@ def next_tune_cfg(self): ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(ordered_ops))) + # WA for add op type + op_info_map = {} + for op_info in list(initial_op_tuning_cfg.keys()): + op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) + tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops] + op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops))) logger.info(f"Start to accumulate fallback to {target_dtype}.") fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=None, + initial_op_tuning_cfg=op_tuning_cfg, op_dtypes=op_dtypes, accumulate=True) for op_tuning_cfg in fallback_sampler: op_tuning_cfg['calib_sampling_size'] = calib_size From e0ae1cee6da110be432e6ed26359a980a4c6f531 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 22 Nov 2022 15:36:21 +0800 Subject: [PATCH 027/128] change entry point to main function fx model before entering into quantization --- .../quantization/ptq/cpu/fx/conf.yaml | 6 +- .../quantization/ptq/cpu/fx/main.py | 2 + .../experimental/quantization.py | 62 +++++++++---------- neural_compressor/strategy/hawq.py | 2 +- .../strategy/st_utils/tuning_sampler.py | 2 +- test/strategy/test_hawq_wenhuach.py | 8 +-- 6 files changed, 42 insertions(+), 40 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml index 064656e872b..4b50b559e6a 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml @@ -24,7 +24,7 @@ quantization: # optional. tuning constrai batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to calibration dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to calibration dataset location if needed transform: Resize: size: 256 @@ -43,7 +43,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to evaluation dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -61,7 +61,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to evaluation dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py index 8646048ccf4..30008bfa3db 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py @@ -169,6 +169,8 @@ def main(): if args.tune: from neural_compressor.experimental import Quantization, common model.eval() + from torch.quantization.quantize_fx import fuse_fx + model = fuse_fx(model) quantizer = Quantization("./conf.yaml") quantizer.model = common.Model(model) q_model = quantizer.fit() diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 7e8e8cfbbac..c6e4a8c3646 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -143,37 +143,37 @@ def pre_process(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - - import torchvision.datasets as datasets - import torchvision.transforms as transforms - data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" - traindir = os.path.join(data_path, 'train') - valdir = os.path.join(data_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - val_dataset = datasets.ImageFolder( - valdir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - from torch.utils.data import DataLoader - - self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) - self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) + # + # import torchvision.datasets as datasets + # import torchvision.transforms as transforms + # data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" + # traindir = os.path.join(data_path, 'train') + # valdir = os.path.join(data_path, 'val') + # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + # std=[0.229, 0.224, 0.225]) + # + # train_dataset = datasets.ImageFolder( + # traindir, + # transforms.Compose([ + # transforms.RandomResizedCrop(224), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # normalize, + # ])) + # + # val_dataset = datasets.ImageFolder( + # valdir, + # transforms.Compose([ + # transforms.RandomResizedCrop(224), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # normalize, + # ])) + # + # from torch.utils.data import DataLoader + # + # self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) + # self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) self.strategy = STRATEGIES[strategy]( self._model, diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index bc042f06b2c..6db4757aa0c 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -321,7 +321,7 @@ def next_tune_cfg(self): stage1_cnt = 0 quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = 2 # TODO set a more appropriate value + stage1_max = -1 # TODO set a more appropriate value op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: diff --git a/neural_compressor/strategy/st_utils/tuning_sampler.py b/neural_compressor/strategy/st_utils/tuning_sampler.py index c583f1c2764..f311d7c16a4 100644 --- a/neural_compressor/strategy/st_utils/tuning_sampler.py +++ b/neural_compressor/strategy/st_utils/tuning_sampler.py @@ -272,7 +272,7 @@ def __iter__(self): if self.accumulate and skip_first: # skip the first one skip_first = False continue - logger.debug(f"fallback {op_name_type} to {target_dtype}") + logger.info(f"fallback {op_name_type} to {target_dtype}") yield new_tune_cfg # need to skip the first one diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index a09c83c3452..2adcd5a5812 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -74,15 +74,15 @@ def tearDownClass(self): def test_run_hawq_one_trial(self): - def eval_func(model): - self.i -= 1 - return self.i + # def eval_func(model): + # self.i -= 1 + # return self.i from neural_compressor.experimental import Quantization, common model = copy.deepcopy(self.model) model.eval() model = fuse_fx(model) quantizer = Quantization('ptq_yaml.yaml') - quantizer.eval_func = eval_func + ##quantizer.eval_func = eval_func dataset = quantizer.dataset('dummy', (32, 3, 224, 224), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) From 3440ac5ee964fdf2fb1d876a096cf964ecefb4bf Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Wed, 23 Nov 2022 10:54:34 +0800 Subject: [PATCH 028/128] get activations and the corresponding gradients --- neural_compressor/strategy/hawq.py | 72 ++++++++++++++++-------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 6db4757aa0c..604008ac2d4 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -67,6 +67,13 @@ def is_fused_module(self, module): else: return False + def mapping_module_to_op(self, name): + length = len("_model.") + if len(name) < length: + return name + else: + return name[length:] + def get_fused_mapping(self): model = self.model weights_info = dict(model.named_parameters()) @@ -75,7 +82,8 @@ def get_fused_mapping(self): if self.is_fused_module(child): for name, _ in child.named_children(): if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - weight_to_op[op_name + "." + name + ".weight"] = op_name[7:] + + weight_to_op[op_name + "." + name + ".weight"] = self.mapping_module_to_op(op_name) break else: name = op_name + ".weight" @@ -95,8 +103,8 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F input = data[0].to(self.device) ##self._input_shape = input.shape ## for resetting input activation target = data[1].to(self.device) - if enable_act: - input.requires_grad = True + # if enable_act: + # input.requires_grad = True output = model(input) loss = criterion(output, target) # torch.autograd.backward(loss, create_graph=create_graph) @@ -138,23 +146,24 @@ def get_hv_one_sample(self, params, enable_act, num_batches): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v - def _get_input_grad_hook(self, name): - def input_grad_hook(model, grad_input, grad_output): + def _get_act_grad_hook(self, name): + def act_grad_hook(model, grad_input, grad_output): self.layer_acts_grads[name] = [grad_input, grad_output] - return input_grad_hook - def _get_enable_input_grad_hook(self, name): - def enable_input_grad_hook(model, inputs, outputs): + return act_grad_hook + + def _get_enable_act_grad_hook(self, name): + def enable_act_grad_hook(model, inputs, outputs): try: input = inputs[0] ##TODO check whether this is right except: input = inputs - if input.is_leaf == False: - if input.requires_grad is False: - input.requires_grad = True - self.layer_acts[name] = input - return enable_input_grad_hook + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = input + + return enable_act_grad_hook # def _get_disable_input_grad_hook(self, name): # def disable_input_grad_hook(model, inputs, outputs): @@ -169,21 +178,19 @@ def enable_input_grad_hook(model, inputs, outputs): # # return disable_input_grad_hook - def _unregister_hook(self): for handel in self.hook_handles: handel.remove() - def register_input_grad_hooks(self): + def register_act_grad_hooks(self): for name, module in self.model.named_modules(): - if name in self.op_list: - hook_handle = module.register_forward_hook(self._get_enable_input_grad_hook(name)) + if self.mapping_module_to_op(name) in self.op_list: + hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) self.hook_handles.append(hook_handle) - hook_handle = module.register_backward_hook(self._get_input_grad_hook(name)) + hook_handle = module.register_backward_hook(self._get_act_grad_hook(name)) self.hook_handles.append(hook_handle) - - def reset_input_gradient_and_hooks(self): + def reset_act_gradient_and_hooks(self): # tmp_input = torch.zeros(self._input_shape, device=self.device) # for name, module in self.model.named_modules(): # if name in self.op_list: @@ -193,12 +200,13 @@ def reset_input_gradient_and_hooks(self): self._unregister_hook() def get_params(self): - weight_names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias + weight_names = [n for n, p in self.model.named_parameters() if + p.requires_grad and "bias" not in n] ##remove bias params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias self.weight_names = weight_names self.params = params - def get_avg_traces(self, enable_act=False, num_batches=2): + def get_avg_traces(self, enable_act=True, num_batches=2): """ Estimates average hessian trace for each parameter """ @@ -207,7 +215,7 @@ def get_avg_traces(self, enable_act=False, num_batches=2): self.hook_handles = [] self.layer_acts = {} self.layer_acts_grads = {} - self.register_input_grad_hooks() + self.register_act_grad_hooks() ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size ##op_list = self.op_list @@ -226,18 +234,18 @@ def get_avg_traces(self, enable_act=False, num_batches=2): diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) if diff_ratio < self.tolerance and i > 10: ##TODO magic number break - if i==50:##TODO for debug + if i == 50: ##TODO for debug break prev_avg_model_trace = model_trace layer_traces = layer_traces_estimate if enable_act: self.reset_input_gradient_and_hooks() - weight_name_to_traces={} + weight_name_to_traces = {} - for weigth_name,trace in zip(self.weight_names, layer_traces): + for weigth_name, trace in zip(self.weight_names, layer_traces): weight_name_to_traces[weigth_name] = trace - op_name_to_trace={} + op_name_to_trace = {} for weigth_name in self.weight_names: op_name = self.weight_to_op[weigth_name] op_name_to_trace[op_name] = weight_name_to_traces[weigth_name] @@ -322,8 +330,8 @@ def next_tune_cfg(self): quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] stage1_max = -1 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 if early_stop_tuning and stage1_cnt > stage1_max: @@ -349,7 +357,7 @@ def next_tune_cfg(self): self._fp32_model.eval() ht = HessianTrace(self._fp32_model, self.calib_dataloader) op_to_traces = ht.get_avg_traces() - if orig_eval==False: + if orig_eval == False: self._fp32_model.train() ordered_ops = sorted(op_to_traces.keys(), @@ -358,7 +366,7 @@ def next_tune_cfg(self): # WA for add op type op_info_map = {} for op_info in list(initial_op_tuning_cfg.keys()): - op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) + op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops] op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops))) logger.info(f"Start to accumulate fallback to {target_dtype}.") @@ -370,7 +378,6 @@ def next_tune_cfg(self): op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg - # ordered_ops = sorted(op_fallback_acc_impact.keys(), # key=lambda key: op_fallback_acc_impact[key], # reverse=self.higher_is_better) @@ -384,7 +391,6 @@ def next_tune_cfg(self): # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size # yield op_tuning_cfg - # tmp = 1 # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up # ops_sensitivity = self.adaptor.get_hessian_trace(self._fp32_model, From f895fb4b6a0219bf0cb31ddbfce802a0be2d828d Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Wed, 23 Nov 2022 11:54:27 +0800 Subject: [PATCH 029/128] change fusefx position --- .../quantization/ptq/cpu/fx/main.py | 2 -- neural_compressor/strategy/hawq.py | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py index 30008bfa3db..8646048ccf4 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py @@ -169,8 +169,6 @@ def main(): if args.tune: from neural_compressor.experimental import Quantization, common model.eval() - from torch.quantization.quantize_fx import fuse_fx - model = fuse_fx(model) quantizer = Quantization("./conf.yaml") quantizer.model = common.Model(model) q_model = quantizer.fit() diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 604008ac2d4..2cfac2b5815 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -40,7 +40,9 @@ class HessianTrace: """ def __init__(self, model, dataloader, criterion=None): - self.model = model ##TODO need to check fused or not + from torch.quantization.quantize_fx import fuse_fx + self.model = fuse_fx(model.model) + self.dataloader = dataloader self.max_iter = 500 self.tolerance = 1e-5 @@ -68,11 +70,11 @@ def is_fused_module(self, module): return False def mapping_module_to_op(self, name): - length = len("_model.") - if len(name) < length: - return name - else: - return name[length:] + # length = len("_model.") + # if len(name) < length: + # return name + # else: + return name def get_fused_mapping(self): model = self.model @@ -88,7 +90,7 @@ def get_fused_mapping(self): else: name = op_name + ".weight" if name in weights_info and name not in weight_to_op.keys(): - weight_to_op[op_name + ".weight"] = op_name[7:] + weight_to_op[op_name + ".weight"] = op_name op_list = [] for key in weight_to_op.keys(): op_list.append(weight_to_op[key]) @@ -240,7 +242,7 @@ def get_avg_traces(self, enable_act=True, num_batches=2): layer_traces = layer_traces_estimate if enable_act: - self.reset_input_gradient_and_hooks() + self.reset_act_gradient_and_hooks() weight_name_to_traces = {} for weigth_name, trace in zip(self.weight_names, layer_traces): From 4f7dd785e4b52882953183a7a44d1de9daa2b8d2 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Wed, 23 Nov 2022 17:20:16 +0800 Subject: [PATCH 030/128] add weight quant loss, the current key is from quant model --- neural_compressor/strategy/hawq.py | 342 +++++++++++++++++------------ 1 file changed, 201 insertions(+), 141 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 2cfac2b5815..2f6a2e7e074 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -29,6 +29,7 @@ from .st_utils.tuning_space import TUNING_ITEMS_LST from torch.quantization.quantize_fx import fuse_fx import torchvision +from typing import Dict, List, Optional, Any, Union, Callable, Set class HessianTrace: @@ -55,6 +56,22 @@ def __init__(self, model, dataloader, criterion=None): self.criterion = self.criterion.to(self.device) self.weight_to_op, self.op_list = self.get_fused_mapping() + def get_qnt_weight_loss(self, weights_name): + + fp32_model = self.fp32model + + qnt_model = self.q_model + + # print(self.model.state_dict()) + for n, p in self.model.named_parameters(): + print(n) + + print("*" * 20) + + for n, p in self.q_model._model.named_parameters(): + print(n) + pass + def is_fused_module(self, module): """This is a helper function for `_propagate_qconfig_helper` to detecte if this module is fused. @@ -100,7 +117,7 @@ def get_device(self, model: torch.nn.Module): for n, p in model.named_parameters(): return p.data.device - def get_gradients(self, model, data, criterion, create_graph=False, enable_act=False): + def get_gradients(self, model, data, criterion, create_graph=False): model.zero_grad() input = data[0].to(self.device) ##self._input_shape = input.shape ## for resetting input activation @@ -131,14 +148,15 @@ def sample_rademacher(self, params): samples.append(r) return samples - def get_hv_one_sample(self, params, enable_act, num_batches): + def get_vtHv_weight(self, params, num_samples): + num_batches = (num_samples + self.dataloader.batchsize - 1) // self.dataloader v = self.sample_rademacher(params) H_v = [0] * len(v) cnt = 0 for step, data in enumerate(self.dataloader): batch_size = data[0].shape[0] cnt += batch_size - gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True, enable_act=enable_act) + gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if step == num_batches - 1: @@ -148,6 +166,25 @@ def get_hv_one_sample(self, params, enable_act, num_batches): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v + def get_vtHv_act(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 + input = data[0][i:i + 1] + target = data[1][i:i + 1] + + self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) + layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] + hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) + cnt += 1 + if cnt >= num_samples: + break + def _get_act_grad_hook(self, name): def act_grad_hook(model, grad_input, grad_output): self.layer_acts_grads[name] = [grad_input, grad_output] @@ -208,28 +245,12 @@ def get_params(self): self.weight_names = weight_names self.params = params - def get_avg_traces(self, enable_act=True, num_batches=2): - """ - Estimates average hessian trace for each parameter - """ - assert num_batches > 0 - if enable_act: - self.hook_handles = [] - self.layer_acts = {} - self.layer_acts_grads = {} - self.register_act_grad_hooks() - ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] - ##num_all_data = num_data_iter * self.dataloader.batch_size - ##op_list = self.op_list - ##TODO setting this in config - self.get_params() - # names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias - # params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + def get_weight_traces(self, num_samples): layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.get_hv_one_sample(self.params, enable_act, num_batches) + layer_traces = self.get_vtHv_weight(self.params, num_samples) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) @@ -239,19 +260,152 @@ def get_avg_traces(self, enable_act=True, num_batches=2): if i == 50: ##TODO for debug break prev_avg_model_trace = model_trace - - layer_traces = layer_traces_estimate - if enable_act: - self.reset_act_gradient_and_hooks() weight_name_to_traces = {} - for weigth_name, trace in zip(self.weight_names, layer_traces): - weight_name_to_traces[weigth_name] = trace + for weight_name, trace in zip(self.weight_names, layer_traces): + weight_name_to_traces[weight_name] = trace op_name_to_trace = {} - for weigth_name in self.weight_names: - op_name = self.weight_to_op[weigth_name] - op_name_to_trace[op_name] = weight_name_to_traces[weigth_name] + for weight_name in self.weight_names: + op_name = self.weight_to_op[weight_name] + op_name_to_trace[op_name] = weight_name_to_traces[weight_name] return op_name_to_trace + return layer_traces_estimate + + def get_act_traces(self, num_samples): + self.hook_handles = [] + self.layer_acts = {} + self.layer_acts_grads = {} + self.register_act_grad_hooks() + for i in range(self.max_iter): + pass + + def get_avg_traces(self, enable_act=True, num_samples=100): + """ + Estimates average hessian trace for each parameter + """ + + assert num_samples > 0 + + ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] + ##num_all_data = num_data_iter * self.dataloader.batch_size + ##op_list = self.op_list + ##TODO setting this in config + self.get_params() + # names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + # params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + + ## handle activation + if enable_act: + self.get_act_traces(num_samples) + ##change batchsize to 1 + + # + # layer_traces = layer_traces_estimate + # if enable_act: + # self.reset_act_gradient_and_hooks() + + +##copy from torch.quantization._numeric_suite +def _find_match( + str_list: Union[Dict[str, Any], List[str]], key_str: str, + postfix: str, +) -> Optional[str]: + split_str = key_str.split(".") + if split_str[-1] == postfix: + match_string = "".join(key_str.split(".")[0:-1]) + for s2 in str_list: + pattern1 = "".join(s2.split(".")[0:-1]) + pattern2 = "".join(s2.split(".")[0:-2]) + if match_string == pattern1: + return s2 + if match_string == pattern2: + return s2 + + # For matching "fc.weight" and "fc._packed_params._packed_params" + if postfix == "_packed_params": + match_string = "".join(key_str.split(".")[0:-2]) + if len(match_string) == 0: + return None + for s2 in str_list: + pattern1 = "".join(s2.split(".")[0:-1]) + pattern2 = "".join(s2.split(".")[0:-2]) + if match_string == pattern1: + return s2 + if match_string == pattern2: + return s2 + return None + else: + return None + + +##copy form torch.quantization._numeric_suite +def compare_weights( + float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] +) -> Dict[str, Dict[str, torch.Tensor]]: + r"""Compare the weights of the float module with its corresponding quantized + module. Return a dict with key corresponding to module names and each entry being + a dictionary with two keys 'float' and 'quantized', containing the float and + quantized weights. This dict can be used to compare and compute the quantization + error of the weights of float and quantized models. + + Example usage:: + + wt_compare_dict = compare_weights( + float_model.state_dict(), qmodel.state_dict()) + for key in wt_compare_dict: + print( + key, + compute_error( + wt_compare_dict[key]['float'], + wt_compare_dict[key]['quantized'].dequantize() + ) + ) + + Args: + float_dict: state dict of the float model + quantized_dict: state dict of the quantized model + + Return: + weight_dict: dict with key corresponding to module names and each entry being + a dictionary with two keys 'float' and 'quantized', containing the float and + quantized weights + """ + + weight_dict: Dict[str, Dict] = {} + for key in quantized_dict: + match_key = _find_match(float_dict, key, "weight") + if match_key is not None: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[match_key] + weight_dict[key]["quantized"] = quantized_dict[key] + continue + + # For matching "fc.weight" and "fc._packed_params._packed_params" + match_key = _find_match(float_dict, key, "_packed_params") + if match_key is not None: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[match_key] + weight_dict[key]["quantized"] = quantized_dict[key][0] + + # For LSTM + split_str = key.split(".") + if split_str[-1] == "param" and split_str[-3] == "_all_weight_values": + layer = split_str[-2] + module_name = ".".join(split_str[:-3]) + float_weight_ih_key = module_name + ".weight_ih_l" + layer + float_weight_hh_key = module_name + ".weight_hh_l" + layer + if float_weight_ih_key in float_dict and float_weight_hh_key in float_dict: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[float_weight_ih_key] + weight_dict[key]["quantized"] = ( + quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] + ) + weight_dict[key]["float"] = float_dict[float_weight_hh_key] + weight_dict[key]["quantized"] = ( + quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] + ) + + return weight_dict @strategy_registry @@ -331,7 +485,7 @@ def next_tune_cfg(self): stage1_cnt = 0 quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = -1 # TODO set a more appropriate value + stage1_max = 1 # TODO set a more appropriate value op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: @@ -341,6 +495,12 @@ def next_tune_cfg(self): break op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg + + # import torch.quantization._numeric_suite as ns + # self.model.eval() + # fused_model = fuse_fx(self.model.model) + # res = compare_weights(fused_model.state_dict(), self.q_model.state_dict()) + # Fallback the ops supported both static and dynamic from static to dynamic quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] @@ -358,6 +518,16 @@ def next_tune_cfg(self): orig_eval = False self._fp32_model.eval() ht = HessianTrace(self._fp32_model, self.calib_dataloader) + + q_model_state_dict = { + } + for key in self.q_model.state_dict().keys(): + length = len("_model.") + new_key = key[length:] + q_model_state_dict[new_key] = self.q_model.state_dict()[key] + + weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) + op_to_traces = ht.get_avg_traces() if orig_eval == False: self._fp32_model.train() @@ -380,116 +550,6 @@ def next_tune_cfg(self): op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg - # ordered_ops = sorted(op_fallback_acc_impact.keys(), - # key=lambda key: op_fallback_acc_impact[key], - # reverse=self.higher_is_better) - # op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - # logger.info(f"Start to accumulate fallback to {target_dtype}.") - # initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - # fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - # initial_op_tuning_cfg=initial_op_tuning_cfg, - # op_dtypes=op_dtypes, accumulate=True) - # for op_tuning_cfg in fallback_sampler: - # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - # yield op_tuning_cfg - - # tmp = 1 - # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - # ops_sensitivity = self.adaptor.get_hessian_trace(self._fp32_model, - # self.calib_dataloader, - # self. - # method_args={'name': 'hessian_trace'}) - # tmp = 1 - - def next_tune_cfg_bk(self): - """The generator of yielding next tuning config to traverse by concrete strategies - according to last tuning result. - - Yields: - tune_config (dict): It's a dict containing the tuning configuration to run. - """ - from copy import deepcopy - tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options - - calib_sampling_size = calib_sampling_size_lst[0] - # Initialize the tuning config for each op according to the quantization approach - op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = False - stage1_cnt = 0 - quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = 1e9 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - # for op_tuning_cfg in op_wise_tuning_sampler: - # stage1_cnt += 1 - # if early_stop_tuning and stage1_cnt > stage1_max: - # logger.info("Early stopping the stage 1.") - # break - # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - # yield op_tuning_cfg - # Fallback the ops supported both static and dynamic from static to dynamic - # Tuning items: None - # if self.cfg.quantization.approach == 'post_training_auto_quant': - # static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - # item in tuning_space.query_items_by_quant_mode('dynamic')] - # if static_dynamic_items: - # logger.info("Fallback all ops that support both dynamic and static to dynamic.") - # else: - # logger.info("Non ops that support both dynamic") - # - # new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) - # for item in static_dynamic_items: - # new_op_tuning_cfg[item.name] = self.initial_dynamic_cfg_based_on_static_cfg( - # new_op_tuning_cfg[item.name]) - # new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - # yield new_op_tuning_cfg - best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) - - # Fallback - for target_dtype in ['bf16', 'fp32']: - target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) - fallback_items_lst = [item for item in quant_ops if item in target_type_lst] - if fallback_items_lst: - logger.info(f"Start to fallback op to {target_dtype} one by one.") - self._fallback_started() - # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, - self.calib_dataloader, - method_args={'name': 'hessian_trace'}) - - fallback_items_name_lst = sorted(ops_sensitivity, key=lambda items: items[1], reverse=True) - - op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) - - op_fallback_acc_impact = OrderedDict() - for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield op_tuning_cfg - acc, _ = self.last_tune_result - op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc - - # do accumulated fallback according to the order in the previous stage - if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - logger.info(f"Start to accumulate fallback to {target_dtype}.") - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) - for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield op_tuning_cfg - def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig): op_state = op_static_cfg.get_state() op_name = op_static_cfg.op_name From a7b58c776765e3289fd6c8cf75a5e78f5cde8bbb Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Wed, 23 Nov 2022 19:06:28 +0800 Subject: [PATCH 031/128] add weights_quant loss eval still bugs for get avg traces --- neural_compressor/strategy/hawq.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 2f6a2e7e074..897dfcffea2 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -527,8 +527,16 @@ def next_tune_cfg(self): q_model_state_dict[new_key] = self.q_model.state_dict()[key] weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) - + pertur_lst={} + for key in weight_quant_loss: + op_float_tensor=weight_quant_loss[key]['float'] + op_qnt_tensor=weight_quant_loss[key]['quantized'].dequantize() + diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) #Formula: L2=||Q(w)-w||p^2 + pertur_lst[key]=diff_l2 + # for i in pertur_lst: + # print(pertur_lst[i]) op_to_traces = ht.get_avg_traces() + print(op_to_traces) if orig_eval == False: self._fp32_model.train() From 356dc2bbfd758f3f3caeec558dd4d455c8709a50 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 14:12:48 +0800 Subject: [PATCH 032/128] fixed weight trace issue --- neural_compressor/strategy/hawq.py | 50 ++++++++---------------------- 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 897dfcffea2..65c7ab72d82 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -55,22 +55,7 @@ def __init__(self, model, dataloader, criterion=None): self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config self.criterion = self.criterion.to(self.device) self.weight_to_op, self.op_list = self.get_fused_mapping() - - def get_qnt_weight_loss(self, weights_name): - - fp32_model = self.fp32model - - qnt_model = self.q_model - - # print(self.model.state_dict()) - for n, p in self.model.named_parameters(): - print(n) - - print("*" * 20) - - for n, p in self.q_model._model.named_parameters(): - print(n) - pass + self.get_params() def is_fused_module(self, module): """This is a helper function for `_propagate_qconfig_helper` to detecte @@ -149,7 +134,6 @@ def sample_rademacher(self, params): return samples def get_vtHv_weight(self, params, num_samples): - num_batches = (num_samples + self.dataloader.batchsize - 1) // self.dataloader v = self.sample_rademacher(params) H_v = [0] * len(v) cnt = 0 @@ -159,7 +143,7 @@ def get_vtHv_weight(self, params, num_samples): gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] - if step == num_batches - 1: + if cnt >=num_samples: break if cnt > 0: H_v = [item / cnt for item in H_v] @@ -246,7 +230,6 @@ def get_params(self): self.params = params def get_weight_traces(self, num_samples): - layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): @@ -261,7 +244,7 @@ def get_weight_traces(self, num_samples): break prev_avg_model_trace = model_trace weight_name_to_traces = {} - + layer_traces = layer_traces_estimate for weight_name, trace in zip(self.weight_names, layer_traces): weight_name_to_traces[weight_name] = trace op_name_to_trace = {} @@ -269,7 +252,6 @@ def get_weight_traces(self, num_samples): op_name = self.weight_to_op[weight_name] op_name_to_trace[op_name] = weight_name_to_traces[weight_name] return op_name_to_trace - return layer_traces_estimate def get_act_traces(self, num_samples): self.hook_handles = [] @@ -279,24 +261,18 @@ def get_act_traces(self, num_samples): for i in range(self.max_iter): pass - def get_avg_traces(self, enable_act=True, num_samples=100): + def get_avg_traces(self, enable_act=True, num_samples=32): """ Estimates average hessian trace for each parameter """ assert num_samples > 0 - - ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] - ##num_all_data = num_data_iter * self.dataloader.batch_size - ##op_list = self.op_list - ##TODO setting this in config - self.get_params() - # names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias - # params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + weight_traces = self.get_weight_traces(num_samples) + return weight_traces ## handle activation - if enable_act: - self.get_act_traces(num_samples) + # if enable_act: + # self.get_act_traces(num_samples) ##change batchsize to 1 # @@ -527,12 +503,12 @@ def next_tune_cfg(self): q_model_state_dict[new_key] = self.q_model.state_dict()[key] weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) - pertur_lst={} + pertur_lst = {} for key in weight_quant_loss: - op_float_tensor=weight_quant_loss[key]['float'] - op_qnt_tensor=weight_quant_loss[key]['quantized'].dequantize() - diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) #Formula: L2=||Q(w)-w||p^2 - pertur_lst[key]=diff_l2 + op_float_tensor = weight_quant_loss[key]['float'] + op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() + diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 + pertur_lst[key] = diff_l2 # for i in pertur_lst: # print(pertur_lst[i]) op_to_traces = ht.get_avg_traces() From 5f78a9c479c661880a6fb166979b6fb2b8d20c3d Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 14:15:11 +0800 Subject: [PATCH 033/128] fixed weight trace issue --- .../experimental/quantization.py | 62 +++++++++---------- test/strategy/test_hawq_wenhuach.py | 10 +-- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index c6e4a8c3646..7e8e8cfbbac 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -143,37 +143,37 @@ def pre_process(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - # - # import torchvision.datasets as datasets - # import torchvision.transforms as transforms - # data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" - # traindir = os.path.join(data_path, 'train') - # valdir = os.path.join(data_path, 'val') - # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - # std=[0.229, 0.224, 0.225]) - # - # train_dataset = datasets.ImageFolder( - # traindir, - # transforms.Compose([ - # transforms.RandomResizedCrop(224), - # transforms.RandomHorizontalFlip(), - # transforms.ToTensor(), - # normalize, - # ])) - # - # val_dataset = datasets.ImageFolder( - # valdir, - # transforms.Compose([ - # transforms.RandomResizedCrop(224), - # transforms.RandomHorizontalFlip(), - # transforms.ToTensor(), - # normalize, - # ])) - # - # from torch.utils.data import DataLoader - # - # self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) - # self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) + + import torchvision.datasets as datasets + import torchvision.transforms as transforms + data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" + traindir = os.path.join(data_path, 'train') + valdir = os.path.join(data_path, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + val_dataset = datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + from torch.utils.data import DataLoader + + self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) + self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) self.strategy = STRATEGIES[strategy]( self._model, diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index 2adcd5a5812..df70e32cd9e 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -74,15 +74,15 @@ def tearDownClass(self): def test_run_hawq_one_trial(self): - # def eval_func(model): - # self.i -= 1 - # return self.i + def eval_func(model): + self.i -= 1 + return self.i from neural_compressor.experimental import Quantization, common model = copy.deepcopy(self.model) model.eval() - model = fuse_fx(model) + # model = fuse_fx(model) quantizer = Quantization('ptq_yaml.yaml') - ##quantizer.eval_func = eval_func + quantizer.eval_func = eval_func dataset = quantizer.dataset('dummy', (32, 3, 224, 224), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) From df25db90edb3c7f5d8603e438f4886ea0db269f3 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 17:12:30 +0800 Subject: [PATCH 034/128] act traces have some issues --- neural_compressor/strategy/hawq.py | 216 +++++++++++++++++------------ 1 file changed, 124 insertions(+), 92 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 65c7ab72d82..c9f8c4488da 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -28,7 +28,7 @@ from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST from torch.quantization.quantize_fx import fuse_fx -import torchvision + from typing import Dict, List, Optional, Any, Union, Callable, Set @@ -41,8 +41,7 @@ class HessianTrace: """ def __init__(self, model, dataloader, criterion=None): - from torch.quantization.quantize_fx import fuse_fx - self.model = fuse_fx(model.model) + self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused self.dataloader = dataloader self.max_iter = 500 @@ -102,89 +101,19 @@ def get_device(self, model: torch.nn.Module): for n, p in model.named_parameters(): return p.data.device - def get_gradients(self, model, data, criterion, create_graph=False): - model.zero_grad() - input = data[0].to(self.device) - ##self._input_shape = input.shape ## for resetting input activation - target = data[1].to(self.device) - # if enable_act: - # input.requires_grad = True - output = model(input) - loss = criterion(output, target) - # torch.autograd.backward(loss, create_graph=create_graph) - loss.backward(create_graph=create_graph) - gradients = [] - for n, p in model.named_parameters(): - if p.grad != None and n in self.weight_names: - gradient = p.grad - gradients.append(gradient + 0.0) ## add 0 to create a copy - model.zero_grad() - return gradients - - # def get_params(self, model): - # parameters = [p for p in model.parameters() if p.requires_grad] - # return parameters - - def sample_rademacher(self, params): - samples = [] - for param in params: - r = torch.randint_like(param, high=2, device=self.device) - r.masked_fill_(r == 0, -1) - samples.append(r) - return samples - - def get_vtHv_weight(self, params, num_samples): - v = self.sample_rademacher(params) - H_v = [0] * len(v) - cnt = 0 - for step, data in enumerate(self.dataloader): - batch_size = data[0].shape[0] - cnt += batch_size - gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True) - H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) - H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] - if cnt >=num_samples: - break - if cnt > 0: - H_v = [item / cnt for item in H_v] - v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better - return v_t_H_v - - def get_vtHv_act(self, params, num_samples): - v = self.sample_rademacher(params) - H_v = [0] * len(v) - cnt = 0 - for step, data in enumerate(self.dataloader): - if cnt >= num_samples: - break - for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 - input = data[0][i:i + 1] - target = data[1][i:i + 1] - - self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) - layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] - layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] - hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) - cnt += 1 - if cnt >= num_samples: - break - def _get_act_grad_hook(self, name): def act_grad_hook(model, grad_input, grad_output): + ##print(name, grad_input[0].shape, grad_output[0].shape) self.layer_acts_grads[name] = [grad_input, grad_output] return act_grad_hook def _get_enable_act_grad_hook(self, name): def enable_act_grad_hook(model, inputs, outputs): - try: - input = inputs[0] ##TODO check whether this is right - except: - input = inputs - - if input.requires_grad is False: - input.requires_grad = True - self.layer_acts[name] = input + for input in inputs: + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = inputs return enable_act_grad_hook @@ -229,18 +158,87 @@ def get_params(self): self.weight_names = weight_names self.params = params + def forward_backward(self, data, create_graph=False, return_w_grad=True): + self.model.zero_grad() + input = data[0].to(self.device) + ##self._input_shape = input.shape ## for resetting input activation + target = data[1].to(self.device) + ##input.requires_grad = True + output = self.model(input) + loss = self.criterion(output, target) + torch.autograd.backward(loss, create_graph=create_graph) + ##loss.backward(create_graph=create_graph) + if return_w_grad: + gradients = [] + for n, p in self.model.named_parameters(): + if p.grad != None and n in self.weight_names: + gradient = p.grad + gradients.append(gradient + 0.0) ## add 0 to create a copy + self.model.zero_grad() + return gradients + else: + self.model.zero_grad() + + # def get_params(self, model): + # parameters = [p for p in model.parameters() if p.requires_grad] + # return parameters + + def sample_rademacher(self, params): + samples = [] + for param in params: + r = torch.randint_like(param, high=2, device=self.device) + r.masked_fill_(r == 0, -1) + samples.append(r) + return samples + + def get_vtHv_weight(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + batch_size = data[0].shape[0] + cnt += batch_size + gradients = self.forward_backward(data, create_graph=True) + H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) + H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] + if cnt >= num_samples: + break + if cnt > 0: + H_v = [item / cnt for item in H_v] + v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better + return v_t_H_v + + def get_vtHv_act(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 + input = data[0][i:i + 1] + target = data[1][i:i + 1] + + self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) + layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] + hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) + cnt += 1 + if cnt >= num_samples: + break + def get_weight_traces(self, num_samples): layer_traces_per_iter = [] prev_avg_model_trace = 0 - for i in range(self.max_iter): + for iter in range(self.max_iter): layer_traces = self.get_vtHv_weight(self.params, num_samples) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) - if diff_ratio < self.tolerance and i > 10: ##TODO magic number + if diff_ratio < self.tolerance and iter > 10: ##TODO magic number break - if i == 50: ##TODO for debug + if iter == 50: ##TODO for debug break prev_avg_model_trace = model_trace weight_name_to_traces = {} @@ -258,28 +256,62 @@ def get_act_traces(self, num_samples): self.layer_acts = {} self.layer_acts_grads = {} self.register_act_grad_hooks() - for i in range(self.max_iter): + cnt = 0 + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + bs = data[0].shape[0] + act_traces_sum = 0 + act_traces_per_iter = [] + prev_avg_model_trace = 0 + act_traces_sums = None + for i in range(bs): ##force the bs to be one + input = data[0][i:i + 1] + target = data[1][i:i + 1] + self.forward_backward((input, target), create_graph=True, return_w_grad=False) + acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + if act_traces_sums == None: + act_traces_sums = [0] * len(acts) + acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts + # vt_H_v_sum_per_act = [0] * len(acts) + # + # prev_model_act_trace = 0 + # for iter in range(self.max_iter): + # v = self.sample_rademacher(acts) + # H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=False) + # vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] + # + # vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in + # enumerate(vt_H_v_sum_per_act)] + # vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] + # current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) + # + # diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( + # prev_model_act_trace + self.eps) + # if diff_ratio < self.tolerance and iter > 10: ##TODO magic number + # break + # if iter == 50: ##TODO for debug + # break + # + # prev_model_act_trace = current_vt_H_v_mean_per_model + # + # cnt += 1 + # if cnt >= num_samples: + # break pass + self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False + def get_avg_traces(self, enable_act=True, num_samples=32): """ Estimates average hessian trace for each parameter """ assert num_samples > 0 + ##self.get_act_traces(num_samples) weight_traces = self.get_weight_traces(num_samples) return weight_traces - ## handle activation - # if enable_act: - # self.get_act_traces(num_samples) - ##change batchsize to 1 - - # - # layer_traces = layer_traces_estimate - # if enable_act: - # self.reset_act_gradient_and_hooks() - ##copy from torch.quantization._numeric_suite def _find_match( From 5a266fff26f49ab13764e4298ae1dc725f52743a Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Thu, 24 Nov 2022 18:24:20 +0800 Subject: [PATCH 035/128] correct the qnt_weigths does't machted issue --- neural_compressor/strategy/hawq.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index c9f8c4488da..a1616c23dd9 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -391,9 +391,10 @@ def compare_weights( # For matching "fc.weight" and "fc._packed_params._packed_params" match_key = _find_match(float_dict, key, "_packed_params") if match_key is not None: - weight_dict[key] = {} - weight_dict[key]["float"] = float_dict[match_key] - weight_dict[key]["quantized"] = quantized_dict[key][0] + weight_dict[match_key] = {} + weight_dict[match_key]["float"] = float_dict[match_key] + weight_dict[match_key]["quantized"] = quantized_dict[key][0] + ##TODO:should consider more models in further work # For LSTM split_str = key.split(".") @@ -586,4 +587,4 @@ def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig) quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode) tuning_item = quant_mode_item.get_option_by_name(att_item) dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None - return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) + return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) \ No newline at end of file From 523303f2d160c0cbbcc5e3ae15f5e1992d85532f Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 20:10:25 +0800 Subject: [PATCH 036/128] support activation traces --- neural_compressor/strategy/hawq.py | 110 +++++++++++++++++------------ 1 file changed, 66 insertions(+), 44 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index c9f8c4488da..94745270ac1 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -41,6 +41,8 @@ class HessianTrace: """ def __init__(self, model, dataloader, criterion=None): + self.unfused_model = model.model + self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused self.dataloader = dataloader @@ -104,16 +106,19 @@ def get_device(self, model: torch.nn.Module): def _get_act_grad_hook(self, name): def act_grad_hook(model, grad_input, grad_output): ##print(name, grad_input[0].shape, grad_output[0].shape) - self.layer_acts_grads[name] = [grad_input, grad_output] + if type(model) == torch.nn.Linear: ##TODO very tricky + self.layer_acts_grads[name] = grad_input[1] + else: + self.layer_acts_grads[name] = grad_input[0] return act_grad_hook def _get_enable_act_grad_hook(self, name): def enable_act_grad_hook(model, inputs, outputs): - for input in inputs: - if input.requires_grad is False: - input.requires_grad = True - self.layer_acts[name] = inputs + input = inputs[0] + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = input return enable_act_grad_hook @@ -134,8 +139,8 @@ def _unregister_hook(self): for handel in self.hook_handles: handel.remove() - def register_act_grad_hooks(self): - for name, module in self.model.named_modules(): + def register_act_grad_hooks(self, model): + for name, module in model.named_modules(): if self.mapping_module_to_op(name) in self.op_list: hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) self.hook_handles.append(hook_handle) @@ -158,13 +163,13 @@ def get_params(self): self.weight_names = weight_names self.params = params - def forward_backward(self, data, create_graph=False, return_w_grad=True): - self.model.zero_grad() + def forward_backward(self, model, data, create_graph=False, return_w_grad=True): + model.zero_grad() input = data[0].to(self.device) ##self._input_shape = input.shape ## for resetting input activation target = data[1].to(self.device) - ##input.requires_grad = True - output = self.model(input) + input.requires_grad = True + output = model(input) loss = self.criterion(output, target) torch.autograd.backward(loss, create_graph=create_graph) ##loss.backward(create_graph=create_graph) @@ -174,10 +179,10 @@ def forward_backward(self, data, create_graph=False, return_w_grad=True): if p.grad != None and n in self.weight_names: gradient = p.grad gradients.append(gradient + 0.0) ## add 0 to create a copy - self.model.zero_grad() + model.zero_grad() return gradients else: - self.model.zero_grad() + model.zero_grad() # def get_params(self, model): # parameters = [p for p in model.parameters() if p.requires_grad] @@ -198,7 +203,7 @@ def get_vtHv_weight(self, params, num_samples): for step, data in enumerate(self.dataloader): batch_size = data[0].shape[0] cnt += batch_size - gradients = self.forward_backward(data, create_graph=True) + gradients = self.forward_backward(self.model, data, create_graph=True) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if cnt >= num_samples: @@ -252,11 +257,14 @@ def get_weight_traces(self, num_samples): return op_name_to_trace def get_act_traces(self, num_samples): + unfused_training = self.unfused_model.training + self.unfused_model.eval() self.hook_handles = [] self.layer_acts = {} self.layer_acts_grads = {} - self.register_act_grad_hooks() + self.register_act_grad_hooks(self.unfused_model) cnt = 0 + act_traces_per_sample = [] for step, data in enumerate(self.dataloader): if cnt >= num_samples: break @@ -268,39 +276,49 @@ def get_act_traces(self, num_samples): for i in range(bs): ##force the bs to be one input = data[0][i:i + 1] target = data[1][i:i + 1] - self.forward_backward((input, target), create_graph=True, return_w_grad=False) + self.forward_backward(self.unfused_model, (input, target), create_graph=True, return_w_grad=False) acts = [self.layer_acts[key] for key in self.layer_acts.keys()] if act_traces_sums == None: act_traces_sums = [0] * len(acts) acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts - # vt_H_v_sum_per_act = [0] * len(acts) - # - # prev_model_act_trace = 0 - # for iter in range(self.max_iter): - # v = self.sample_rademacher(acts) - # H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=False) - # vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] - # - # vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in - # enumerate(vt_H_v_sum_per_act)] - # vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] - # current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) - # - # diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( - # prev_model_act_trace + self.eps) - # if diff_ratio < self.tolerance and iter > 10: ##TODO magic number - # break - # if iter == 50: ##TODO for debug - # break - # - # prev_model_act_trace = current_vt_H_v_mean_per_model - # - # cnt += 1 - # if cnt >= num_samples: - # break - pass + vt_H_v_sum_per_act = [0] * len(acts) + + prev_model_act_trace = 0 + for iter in range(self.max_iter): + v = self.sample_rademacher(acts) + H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=True) + vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] + + vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in + enumerate(vt_H_v_sum_per_act)] + vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] + current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) + + diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( + prev_model_act_trace + self.eps) + if diff_ratio < self.tolerance and iter > 10: ##TODO magic number + break + if iter == 50: ##TODO for debug + break + prev_model_act_trace = current_model_act_trace + act_traces_per_sample.append(vt_H_v_mean_per_act) + cnt += 1 + if cnt >= num_samples: + break + + if unfused_training: + self.unfused_model.train() self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False + act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) + act_traces = torch.mean(act_traces_stack, dim=0) + res_dict={} + for index, key in enumerate(self.layer_acts.keys()): + res_dict[key]=act_traces[index] + + self.layer_acts=[] + self.layer_acts_grads=[] + return act_traces def get_avg_traces(self, enable_act=True, num_samples=32): """ @@ -308,9 +326,13 @@ def get_avg_traces(self, enable_act=True, num_samples=32): """ assert num_samples > 0 - ##self.get_act_traces(num_samples) + traces = {} weight_traces = self.get_weight_traces(num_samples) - return weight_traces + traces['weight'] = weight_traces + if enable_act: + act_traces = self.get_act_traces(num_samples) + traces['activation']= act_traces + return traces ##copy from torch.quantization._numeric_suite From f56ab18ab869e79ca9792abf04582913ab1aa96d Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 20:43:03 +0800 Subject: [PATCH 037/128] only enable weight traces currently --- neural_compressor/strategy/hawq.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 6575c21fccb..2e590c3f34b 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -312,12 +312,12 @@ def get_act_traces(self, num_samples): self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) act_traces = torch.mean(act_traces_stack, dim=0) - res_dict={} + res_dict = {} for index, key in enumerate(self.layer_acts.keys()): - res_dict[key]=act_traces[index] + res_dict[key] = act_traces[index] - self.layer_acts=[] - self.layer_acts_grads=[] + self.layer_acts = [] + self.layer_acts_grads = [] return act_traces def get_avg_traces(self, enable_act=True, num_samples=32): @@ -331,7 +331,7 @@ def get_avg_traces(self, enable_act=True, num_samples=32): traces['weight'] = weight_traces if enable_act: act_traces = self.get_act_traces(num_samples) - traces['activation']= act_traces + traces['activation'] = act_traces return traces @@ -566,7 +566,8 @@ def next_tune_cfg(self): pertur_lst[key] = diff_l2 # for i in pertur_lst: # print(pertur_lst[i]) - op_to_traces = ht.get_avg_traces() + traces = ht.get_avg_traces(enable_act=False) + op_to_traces = traces['weight'] print(op_to_traces) if orig_eval == False: self._fp32_model.train() @@ -609,4 +610,4 @@ def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig) quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode) tuning_item = quant_mode_item.get_option_by_name(att_item) dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None - return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) \ No newline at end of file + return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) From 007b33606ef960af08a32b356b519cbc21835f66 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Fri, 25 Nov 2022 15:30:44 +0800 Subject: [PATCH 038/128] merge weights quantization loss and trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Formula:pertubation=trace*weights_qnt_loss --- neural_compressor/strategy/hawq.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 2e590c3f34b..c000def9440 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -564,18 +564,17 @@ def next_tune_cfg(self): op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 pertur_lst[key] = diff_l2 - # for i in pertur_lst: - # print(pertur_lst[i]) traces = ht.get_avg_traces(enable_act=False) op_to_traces = traces['weight'] - print(op_to_traces) + for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 if orig_eval == False: self._fp32_model.train() - ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) # WA for add op type + print("ordered_ops:",ordered_ops) op_info_map = {} for op_info in list(initial_op_tuning_cfg.keys()): op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) From 420fc95111bf12d33a0d539a1e18b35d77b3af19 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Mon, 28 Nov 2022 22:44:38 +0800 Subject: [PATCH 039/128] Update conf.yaml change root path to default config --- .../torchvision_models/quantization/ptq/cpu/fx/conf.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml index 4b50b559e6a..ef61c6c3e0b 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml @@ -24,7 +24,7 @@ quantization: # optional. tuning constrai batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to calibration dataset location if needed + root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed transform: Resize: size: 256 @@ -43,7 +43,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to evaluation dataset location if needed + root: /path/to/calibration/dataset # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -61,7 +61,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to evaluation dataset location if needed + root: /path/to/calibration/dataset # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 From 2707825b6706cac0c79860dc5940f77e4066e901 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Mon, 28 Nov 2022 22:28:42 +0800 Subject: [PATCH 040/128] WA add loss for strategy --- neural_compressor/conf/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index aae60416104..dba150cb7d3 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -837,7 +837,7 @@ def percent_to_float(data): Optional('model_conversion'): model_conversion_schema, Optional('tuning', default={ - 'strategy': {'name': 'basic'}, + 'strategy': {'name': 'basic', 'loss': 'CrossEntropyLoss'}, # TODO move loss to appropriate position 'accuracy_criterion': {'relative': 0.01, 'higher_is_better': True}, 'objective': 'performance', 'exit_policy': {'timeout': 0, 'max_trials': 100, 'performance_only': False}, From 36731bcbfa8d67b1efc7eba94eeb27d2815890c8 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 8 Nov 2022 22:31:53 +0800 Subject: [PATCH 041/128] Feat(ST): add a interface for hawq(stage1) --- neural_compressor/adaptor/pytorch.py | 13 +++++++++++++ neural_compressor/strategy/basic.py | 7 ++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 0a44fe2f5a3..4d7f4561ac8 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -1093,7 +1093,20 @@ def is_fused_module(self, module): return True else: return False + + def calculate_op_sensitivity(self, model, dataloader, method_args): + """Compute the op sensitivity by the specific method. + Args: + model(INC model): The fp32 model. + dataloader: The calibration dataloader. + method_args(Dict): The parameters for specifying the method. + + Returns: + ops_sensitivity(Dict[tuple, float]): The key is (op_name, op_type), + the value is the sensitivity under the specified method + """ + pass unify_op_type_mapping = { "ConvReLU2d": "Conv2d", diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index c35398dd4bb..3cc4e38bde2 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -143,7 +143,12 @@ def next_tune_cfg(self): if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() - fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + #fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, + self.calib_dataloader, + method_args = {'name': 'hessian_trace'}) + fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], From c02b5c1cbf2da762cc4db7aa7cf941411677c89b Mon Sep 17 00:00:00 2001 From: root Date: Thu, 10 Nov 2022 14:22:12 +0800 Subject: [PATCH 042/128] hawq_metric.py --- neural_compressor/strategy/hawq_metric.py | 291 ++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 neural_compressor/strategy/hawq_metric.py diff --git a/neural_compressor/strategy/hawq_metric.py b/neural_compressor/strategy/hawq_metric.py new file mode 100644 index 00000000000..acbcd98d740 --- /dev/null +++ b/neural_compressor/strategy/hawq_metric.py @@ -0,0 +1,291 @@ +""" + Copyright (c) 2022 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import logging +import torch +import numpy as np +from torch.autograd import Variable +import yaml +import torchvision.transforms as transforms +import torchvision +import random +import copy +from torch.quantization import get_default_qat_qconfig, quantize_jit,get_default_qconfig +from torch.quantization.quantize_fx import prepare_fx, convert_fx,fuse_fx +from torch.quantization import default_dynamic_qconfig, float_qparams_weight_only_qconfig +import torch.quantization._numeric_suite as ns + + +def fixed_seed(seed): + """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU + Args: + seed: an integer number + return: None + """ + np.random.seed(seed) #random + random.seed(seed) + torch.manual_seed(seed) #cpu + torch.cuda.manual_seed_all(seed) #parallel cpu + torch.backends.cudnn.deterministic = True #make sure results are same on cpu/gpu + torch.backends.cudnn.benchmark = True #accelerator +def calculate_params_gradients(model): + """ + get the gradients and parameters from given model + Args: + model: FP32 model specificed + return: + params: paratmeters of model + grads: gradients of model + """ + params=[] + grads=[] + for indx,(name, parm) in zip(enumerate(model.parameters()), model.named_parameters()): + logging.info('->tensor_index:', indx[0],'-->name:', name, '-->grad_requirs:',parm.requires_grad, '-->current tensor len:',parm.shape) + if not parm.requires_grad: + continue + params.append(parm) + grads.append(0. if parm.grad is None else parm.grad+0.) + return params, grads +def calculate_inner_product(list_x,list_y): + """Compute the inner product of two lists of variables list_x,list_y + Args: + list_x: input list variables + list_y: input list variables + return: + sum of inner product + """ + return sum([torch.sum(x * y) for (x, y) in zip(list_x, list_y)]) + +def calculate_vector_product(gradsH, params, v): + """compute the hessian vector product by torch.autograd.grad. + Agrs: + gradsH: gradient at current point + params: corresponding variables + v: vector + return: + hv: hessian vector product + """ + hv=torch.autograd.grad( + gradsH, + params, + grad_outputs=v, + only_inputs=True, + retain_graph=True) + return hv +def ptq_calibrate(model, data_loader,num_cal): + """Calibrate model in post train quantization model + Args: + model: a pre_quantization model to calibrate + data_laoder: datasets + num_cal: maximization number of calibrated samples, such as images + return: + model: a calibrated model + """ + #Generate some samples to calibrate from data_loader + calibrate_samples=[] + i=0 + for inputs, targets in data_loader: + calibrate_samples.append(inputs) + i=i+1 + if i>=num_cal: + break + # model.cpu() + model.eval() + #calibration + with torch.no_grad(): + for sample in calibrate_samples: + model(sample) + return model +def calculate_perturbation(model_qnt,model_fp32)->dict: + """calculate weights quantized perturbation using L2 normal + Args: + model_qnt: quantized model + model_fp32: float model + return: + pertur_lst: dict,which contains layer_name and value + + """ + + wq_cmp_dict=ns.compare_weights(model_fp32.state_dict(), model_qnt.state_dict()) + pertur_lst=[] + for key in wq_cmp_dict: + pertur_pair={"layer_name":'',"value":0} + op_float_tensor=wq_cmp_dict[key]['float'] + op_qnt_tensor=wq_cmp_dict[key]['quantized'].dequantize() + diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) #Formula: L2=||Q(w)-w||p^2 + pertur_pair['layer_name']=key + pertur_pair['value']=diff_l2 + pertur_lst.append(pertur_pair) + return pertur_lst +class Hessian(): + """This class used to compute each layer hessian trace from given FP32 model + """ + def __init__(self,model,criterion, data=None, dataloader=None,device='cpu') -> None: + """Initial parameters + Args: + model: FP32 model specificed + criterion: loss function + data: a single batch of data, including inputs and its corresponding labels + dataloader: the data loader including bunch of batches of data + device: currently only supports cpu device + """ + #make sure we either pass a single batch or a dataloader + assert (data!=None and dataloader==None ) or (data==None and dataloader!=None) + #make mode is evaluation model + self.model=model.eval() + self.criterion=criterion + self.device=device + + if data!=None: + self.data=data + self.full_dataset=False + if not self.full_dataset: + self.inputs, self.targets=self.data + outputs=self.model(self.inputs) + loss=self.criterion(outputs,self.targets) + loss.backward(create_graph=True) + params, gradSH=calculate_params_gradients(self.model) + + self.params=params + self.gradSH=gradSH + def calculate_trace(self,max_Iter=100, tolerance=1e-3): + """Compute the hessian trace based on Hutchinson algorithm + Args: + max_Inter: number of maximization iteration + tolerance: minimum relative tolerance for stopping the algorithm. + return: + avg_traces_lst: return hessian trace per layer for given model + """ + avg_traces_lst=[] + for (i_grad, i_param,(module_name, _)) in zip(self.gradSH, self.params, self.model.named_parameters()): + v=[torch.randint_like(i_param,high=2, device=self.device)] + for v_i in v: + v_i[v_i==0]=-1 + i_v=v + trace_vhv=[] + trace=0. + trace_pair={"layer_name":" ", "trace":0} + self.model.zero_grad() + for i in range(max_Iter): + hv=calculate_vector_product(i_grad,i_param,i_v) # hessian vector + trace_vhv_cur=calculate_inner_product(hv,v).cpu().item()#current point + trace_vhv.append(trace_vhv_cur) + difference=(np.mean(trace_vhv)-trace)/(abs(trace)+1e-6) + if abs(difference) None: + self.dataloader=dataloader + if yaml_trace and yaml_cpu is not None: + with open(yaml_trace) as file: + params_config=yaml.load(file) + if params_config['loss']=='CrossEntropyLoss': + self.criterion=torch.nn.CrossEntropyLoss() + self.random_seed=params_config['random_seed'] + self.max_Iteration=params_config['max_Iteration'] + self.enable_op_fuse=params_config['enable_op_fuse'] + self.tolerance=float(params_config['tolerance']) + self.max_cal_sample=float(params_config['max_cal_smaple']) + self.quantize_mode=params_config['quantize_mode'] + with open(yaml_cpu,'r') as file: + yaml_config=yaml.load(file) + str_dtype=(yaml_config[0]['precisions']['names']) + self.list_dtype = str_dtype.split(",") + else: + self.criterion=torch.nn.CrossEntropyLoss() + self.random_seed=100 + self.max_Iteration=100 + self.enable_op_fuse=True + self.tolerance=1e-6 + self.max_cal_sample=100 + self.quantize_mode='ptq' + self.list_dtype=['int8','fp32'] + logging.info("Current parameters config for Hutchinson’s algorithm as below:") + logging.info("criterion:",self.criterion,"| random_seed:",self.random_seed,"| max_Iteration:", self.max_Iteration, \ + "| tolerance:", self.tolerance,"| en_op_fuse", self.enable_op_fuse,"| max_cal_sample:", self.max_cal_sample) + fixed_seed(self.random_seed) + self.model=model + self.model.eval() + model_tmp=copy.deepcopy(model) + model_tmp.eval() + self.model_fused= fuse_fx(model_tmp) + self.model_fused.eval() + + def get_init_config(self)->dict: + """ + """ + #Load a sample from dataloader to compute graident + for inputs, targets in self.dataloader: + break + #Hessian average trace computation + with torch.enable_grad(): + if self.enable_op_fuse: + hawq_cmp=Hessian(self.model_fused,criterion=self.criterion,data=(inputs,targets)) + else: + hawq_cmp=Hessian(self.model,criterion=self.criterion,data=(inputs,targets)) + avg_traces_lst=hawq_cmp.calculate_trace(max_Iter=self.max_Iteration,tolerance=self.tolerance) + + #fiter none weight layer and save weight layer to match perturbation computation + avg_traces_lst_weight=[] + for avg_trace_i in avg_traces_lst: + if 'weight' in avg_trace_i['layer_name']: + avg_traces_lst_weight.append(avg_trace_i) + # avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) + if self.quantize_mode=='ptq': + #PTQ quantization + qconfig = get_default_qconfig("fbgemm") + qconfig_dict={"":qconfig} #enable all layers/tensor to quantize + #calibrate + model_prepared=prepare_fx(self.model, qconfig_dict) + model_prepared=ptq_calibrate(model_prepared,data_loader=self.dataloader,num_cal=self.max_cal_sample) + model_prepared.cpu() + model_all_qnt=convert_fx(model_prepared) + #calculate perturbation + pertu_list=calculate_perturbation(model_fp32=self.model,model_qnt=model_all_qnt) + #calculate omiga + for omiga_i in pertu_list: + for avg_trace_i in avg_traces_lst: + if avg_trace_i['layer_name']==omiga_i['layer_name']: + avg_trace_i['trace']=avg_trace_i['trace']*omiga_i['value'] + # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): + # omig_pair={"layer_name":" ", "value":0} + # omig_val=avg_trace_i['trace']*omiga_i['value'] + # omig_pair['layer_name']=avg_trace_i['layer_name'] + # omig_pair['value']=omig_val + # omig_list.append(omig_pair) + # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) + omig_list_sorted=sorted(avg_traces_lst,key=lambda x:x['trace'],reverse=True) + tune_init_config_pairs=[] + # + for i in omig_list_sorted: + tune_init_config_pair={"op_name":'',"op_type":'','trace':0} + if i['layer_name']==omig_list_sorted[0]['layer_name']: + tune_init_config_pair['op_name']=i['layer_name'] + tune_init_config_pair['op_type']=self.list_dtype[-1] #setup as float op + tune_init_config_pair['trace']=float(i['trace']) + else: + tune_init_config_pair['op_name']=i['layer_name'] + tune_init_config_pair['op_type']=self.list_dtype[0] + tune_init_config_pair['trace']=float(i['trace']) + tune_init_config_pairs.append(tune_init_config_pair) + return tune_init_config_pairs From 399c732d12ed66ec11e4b359442fefa379866b5a Mon Sep 17 00:00:00 2001 From: root Date: Thu, 10 Nov 2022 14:24:13 +0800 Subject: [PATCH 043/128] pytorch.py --- neural_compressor/adaptor/pytorch.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 4d7f4561ac8..e89f687f81a 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -29,6 +29,7 @@ from ..utils import logger from .query import QueryBackendCapability from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader +from neural_compressor.strategy.hawq_metric import Hawq_top torch = LazyImport("torch") @@ -1106,6 +1107,10 @@ def calculate_op_sensitivity(self, model, dataloader, method_args): ops_sensitivity(Dict[tuple, float]): The key is (op_name, op_type), the value is the sensitivity under the specified method """ + if method_args['name']=='hessian_trace': + Hawq_top(model=model,yaml_cpu=None,yaml_trace=None,dataloader=dataloader) + hessian_cmp=Hawq_top.get_init_config() + return hessian_cmp pass unify_op_type_mapping = { From 3b5abbf116b4bc72a2a4a549c3863a0964bbb140 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 30 Nov 2022 14:29:49 +0800 Subject: [PATCH 044/128] resolve conflicts Signed-off-by: yiliu30 --- test/strategy/test_hessian_trace_inc.py | 63 +++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 test/strategy/test_hessian_trace_inc.py diff --git a/test/strategy/test_hessian_trace_inc.py b/test/strategy/test_hessian_trace_inc.py new file mode 100644 index 00000000000..f05b47ca3aa --- /dev/null +++ b/test/strategy/test_hessian_trace_inc.py @@ -0,0 +1,63 @@ +import torch +import unittest +import os +import sys +import copy +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +from neural_compressor.data import DATASETS +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.adaptor.pytorch import TemplateAdaptor +from neural_compressor.strategy.hawq_metric import Hawq_top + +def build_hessian_trace(): + hessian_trace_config_yaml=''' + loss: + CrossEntropyLoss + random_seed: + 1 + max_Iteration: + 100 + tolerance: + 1e-3 + enable_op_fuse: + True + max_cal_smaple: + 100 + quantize_mode: + ptq + ''' + with open('./hessian_trace_config_yaml','w+',encoding="utf-8") as f: + f.write(hessian_trace_config_yaml) +class Test_hessian_trace(unittest.TestCase): + #boot up test + @classmethod + def setUpClass(cls) -> None: + build_hessian_trace() + cls.model=torchvision.models.resnet18() + #shotdown test + @classmethod + def tearDownClass(cls) -> None: + os.remove('./hessian_trace_config_yaml') + #one test case + def test_run_hessian_trace(cls): + """ + hessian_trace_top + Inputs: + model: FP32 model + dataloader: imagenet + """ + model=cls.model + datasets = DATASETS('pytorch') + dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' + # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) + hessian_cmp=Hawq_top(model,yaml_cpu=None,yaml_trace=None,dataloader=dummy_dataloader) + tuning_init_config=hessian_cmp.get_init_config() + #print tuning init_config + for i in tuning_init_config: + print(i) +if __name__ == "__main__": + unittest.main() \ No newline at end of file From c7c16988c8ccd4a9fc4b6c06ffb5b18f2647e9e0 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 15 Nov 2022 11:44:48 +0800 Subject: [PATCH 045/128] add wenhuach test env --- neural_compressor/strategy/hawq.py | 311 ++++++++++++++++++++++++++++ test/strategy/test_hawq_wenhuach.py | 74 +++++++ 2 files changed, 385 insertions(+) create mode 100644 neural_compressor/strategy/hawq.py create mode 100644 test/strategy/test_hawq_wenhuach.py diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py new file mode 100644 index 00000000000..2dd0287fa2e --- /dev/null +++ b/neural_compressor/strategy/hawq.py @@ -0,0 +1,311 @@ +""" + Copyright (c) 2022 Intel Corporation + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +from ..utils import logger +import torch +import numpy as np +from torch.autograd import Variable +import yaml +import torchvision.transforms as transforms +import torchvision +import random +import copy +from torch.quantization import get_default_qat_qconfig, quantize_jit, get_default_qconfig +from torch.quantization.quantize_fx import prepare_fx, convert_fx, fuse_fx +from torch.quantization import default_dynamic_qconfig, float_qparams_weight_only_qconfig +import torch.quantization._numeric_suite as ns + + +def fix_seed(seed): + """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU + Args: + seed: an integer number + return: None + """ + np.random.seed(seed) # random + random.seed(seed) + torch.manual_seed(seed) # cpu + torch.cuda.manual_seed_all(seed) # parallel cpu + torch.backends.cudnn.deterministic = True # make sure results are same on cpu/gpu + torch.backends.cudnn.benchmark = True # accelerator + + +def calculate_params_gradients(model): + """ + get the gradients and parameters from given model + Args: + model: FP32 model specificed + return: + params: paratmeters of model + grads: gradients of model + """ + params = [] + grads = [] + for indx, (name, parm) in zip(enumerate(model.parameters()), model.named_parameters()): + logger.info( + f'index:{indx[0]}-->name:{name}:{parm.shape}') + + if not parm.requires_grad: + continue + params.append(parm) + grads.append(0. if parm.grad is None else parm.grad + 0.) + return params, grads + + +def calculate_inner_product(list_x, list_y): + """Compute the inner product of two lists of variables list_x,list_y + Args: + list_x: input list variables + list_y: input list variables + return: + sum of inner product + """ + return sum([torch.sum(x * y) for (x, y) in zip(list_x, list_y)]) + + +def calculate_vector_product(gradsH, params, v): + """compute the hessian vector product by torch.autograd.grad. + Agrs: + gradsH: gradient at current point + params: corresponding variables + v: vector + return: + hv: hessian vector product + """ + hv = torch.autograd.grad( + gradsH, + params, + grad_outputs=v, + only_inputs=True, + retain_graph=True) + return hv + + +def ptq_calibrate(model, data_loader, num_cal): + """Calibrate model in post train quantization model + Args: + model: a pre_quantization model to calibrate + data_laoder: datasets + num_cal: maximization number of calibrated samples, such as images + return: + model: a calibrated model + """ + # Generate some samples to calibrate from data_loader + calibrate_samples = [] + i = 0 + for inputs, targets in data_loader: + calibrate_samples.append(inputs) + i = i + 1 + if i >= num_cal: + break + # model.cpu() + model.eval() + # calibration + with torch.no_grad(): + for sample in calibrate_samples: + model(sample) + return model + + +def calculate_perturbation(model_qnt, model_fp32) -> dict: + """calculate weights quantized perturbation using L2 normal + Args: + model_qnt: quantized model + model_fp32: float model + return: + pertur_lst: dict,which contains layer_name and value + + """ + + wq_cmp_dict = ns.compare_weights(model_fp32.state_dict(), model_qnt.state_dict()) + pertur_lst = [] + for key in wq_cmp_dict: + pertur_pair = {"layer_name": '', "value": 0} + op_float_tensor = wq_cmp_dict[key]['float'] + op_qnt_tensor = wq_cmp_dict[key]['quantized'].dequantize() + diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 + pertur_pair['layer_name'] = key + pertur_pair['value'] = diff_l2 + pertur_lst.append(pertur_pair) + return pertur_lst + + +class Hessian(): + """This class used to compute each layer hessian trace from given FP32 model + """ + + def __init__(self, model, criterion, data=None, dataloader=None, device='cpu') -> None: + """Initial parameters + Args: + model: FP32 model specificed + criterion: loss function + data: a single batch of data, including inputs and its corresponding labels + dataloader: the data loader including bunch of batches of data + device: currently only supports cpu device + """ + # make sure we either pass a single batch or a dataloader + assert (data != None and dataloader == None) or (data == None and dataloader != None) + # make mode is evaluation model + self.model = model.eval() + self.criterion = criterion + self.device = device + + if data != None: + self.data = data + self.full_dataset = False + if not self.full_dataset: + self.inputs, self.targets = self.data + outputs = self.model(self.inputs) + loss = self.criterion(outputs, self.targets) + loss.backward(create_graph=True) + params, gradSH = calculate_params_gradients(self.model) + + self.params = params + self.gradSH = gradSH + + def calculate_trace(self, max_Iter=100, tolerance=1e-3): + """Compute the hessian trace based on Hutchinson algorithm + Args: + max_Inter: number of maximization iteration + tolerance: minimum relative tolerance for stopping the algorithm. + return: + avg_traces_lst: return hessian trace per layer for given model + """ + avg_traces_lst = [] + for (i_grad, i_param, (module_name, _)) in zip(self.gradSH, self.params, self.model.named_parameters()): + v = [torch.randint_like(i_param, high=2, device=self.device)] + for v_i in v: + v_i[v_i == 0] = -1 + i_v = v + trace_vhv = [] + trace = 0. + trace_pair = {"layer_name": " ", "trace": 0} + self.model.zero_grad() + for i in range(max_Iter): + hv = calculate_vector_product(i_grad, i_param, i_v) # hessian vector + trace_vhv_cur = calculate_inner_product(hv, v).cpu().item() # current point + trace_vhv.append(trace_vhv_cur) + difference = (np.mean(trace_vhv) - trace) / (abs(trace) + 1e-6) + if abs(difference) < tolerance: + avg_trace_vhv = np.mean(trace_vhv) + trace_pair["layer_name"] = module_name + trace_pair["trace"] = avg_trace_vhv + avg_traces_lst.append(trace_pair) + break + else: + trace = np.mean(trace_vhv) + return avg_traces_lst + + +class Hawq_top(): + """This class is a interface of hessian + """ + + def __init__(self, model, yaml_trace=None, yaml_cpu=None, dataloader=None) -> None: + self.dataloader = dataloader + if yaml_trace and yaml_cpu is not None: + with open(yaml_trace) as file: + params_config = yaml.load(file) + if params_config['loss'] == 'CrossEntropyLoss': + self.criterion = torch.nn.CrossEntropyLoss() + self.random_seed = params_config['random_seed'] + self.max_Iteration = params_config['max_Iteration'] + self.enable_op_fuse = params_config['enable_op_fuse'] + self.tolerance = float(params_config['tolerance']) + self.max_cal_sample = float(params_config['max_cal_smaple']) + self.quantize_mode = params_config['quantize_mode'] + with open(yaml_cpu, 'r') as file: + yaml_config = yaml.load(file) + str_dtype = (yaml_config[0]['precisions']['names']) + self.list_dtype = str_dtype.split(",") + else: + self.criterion = torch.nn.CrossEntropyLoss() + self.random_seed = 100 + self.max_Iteration = 100 + self.enable_op_fuse = True + self.tolerance = 1e-6 + self.max_cal_sample = 100 + self.quantize_mode = 'ptq' + self.list_dtype = ['int8', 'fp32'] + # logger.info("Current parameters config for Hutchinson’s algorithm as below:") + logger.info( + f"criterion:{self.criterion}| random_seed:{self.random_seed}| max_Iteration:self.max_Iteration| tolerance:{self.tolerance}") + # logger.info("criterion:", self.criterion, "| random_seed:", self.random_seed, "| max_Iteration:", + # self.max_Iteration, \ + # "| tolerance:", self.tolerance, "| en_op_fuse", self.enable_op_fuse, "| max_cal_sample:", + # self.max_cal_sample) + fix_seed(self.random_seed) + self.model = model + self.model.eval() + model_tmp = copy.deepcopy(model) + model_tmp.eval() + self.model_fused = fuse_fx(model_tmp) + self.model_fused.eval() + + def get_init_config(self) -> dict: + """ + """ + # Load a sample from dataloader to compute graident + for inputs, targets in self.dataloader: + break + # Hessian average trace computation + with torch.enable_grad(): + if self.enable_op_fuse: + hawq_cmp = Hessian(self.model_fused, criterion=self.criterion, data=(inputs, targets)) + else: + hawq_cmp = Hessian(self.model, criterion=self.criterion, data=(inputs, targets)) + avg_traces_lst = hawq_cmp.calculate_trace(max_Iter=self.max_Iteration, tolerance=self.tolerance) + + # fiter none weight layer and save weight layer to match perturbation computation + avg_traces_lst_weight = [] + for avg_trace_i in avg_traces_lst: + if 'weight' in avg_trace_i['layer_name']: + avg_traces_lst_weight.append(avg_trace_i) + # avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) + if self.quantize_mode == 'ptq': + # PTQ quantization + qconfig = get_default_qconfig("fbgemm") + qconfig_dict = {"": qconfig} # enable all layers/tensor to quantize + # calibrate + model_prepared = prepare_fx(self.model, qconfig_dict) + model_prepared = ptq_calibrate(model_prepared, data_loader=self.dataloader, num_cal=self.max_cal_sample) + model_prepared.cpu() + model_all_qnt = convert_fx(model_prepared) + # calculate perturbation + pertu_list = calculate_perturbation(model_fp32=self.model, model_qnt=model_all_qnt) + # calculate omiga + for omiga_i in pertu_list: + for avg_trace_i in avg_traces_lst: + if avg_trace_i['layer_name'] == omiga_i['layer_name']: + avg_trace_i['trace'] = avg_trace_i['trace'] * omiga_i['value'] + # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): + # omig_pair={"layer_name":" ", "value":0} + # omig_val=avg_trace_i['trace']*omiga_i['value'] + # omig_pair['layer_name']=avg_trace_i['layer_name'] + # omig_pair['value']=omig_val + # omig_list.append(omig_pair) + # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) + omig_list_sorted = sorted(avg_traces_lst, key=lambda x: x['trace'], reverse=True) + tune_init_config_pairs = [] + # + for i in omig_list_sorted: + tune_init_config_pair = {"op_name": '', "op_type": '', 'trace': 0} + if i['layer_name'] == omig_list_sorted[0]['layer_name']: + tune_init_config_pair['op_name'] = i['layer_name'] + tune_init_config_pair['op_type'] = self.list_dtype[-1] # setup as float op + tune_init_config_pair['trace'] = float(i['trace']) + else: + tune_init_config_pair['op_name'] = i['layer_name'] + tune_init_config_pair['op_type'] = self.list_dtype[0] + tune_init_config_pair['trace'] = float(i['trace']) + tune_init_config_pairs.append(tune_init_config_pair) + return tune_init_config_pairs diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py new file mode 100644 index 00000000000..5affedc70ca --- /dev/null +++ b/test/strategy/test_hawq_wenhuach.py @@ -0,0 +1,74 @@ +import torch +import unittest +import os +import sys +import copy +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +from neural_compressor.data import DATASETS +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.adaptor.pytorch import TemplateAdaptor + +from neural_compressor.strategy.hawq import Hawq_top, fix_seed + +fix_seed(1) + + +def build_hessian_trace(): + hessian_trace_config_yaml = ''' + loss: + CrossEntropyLoss + random_seed: + 1 + max_Iteration: + 100 + tolerance: + 1e-3 + enable_op_fuse: + True + max_cal_smaple: + 100 + quantize_mode: + ptq + ''' + with open('./hessian_trace_config_yaml', 'w+', encoding="utf-8") as f: + f.write(hessian_trace_config_yaml) + + +class Test_hessian_trace(unittest.TestCase): + # boot up test + @classmethod + def setUpClass(cls) -> None: + build_hessian_trace() + cls.model = torchvision.models.resnet18() + + # shotdown test + @classmethod + def tearDownClass(cls) -> None: + os.remove('./hessian_trace_config_yaml') + + # one test case + def test_run_hessian_trace(cls): + """ + hessian_trace_top + Inputs: + model: FP32 model + dataloader: imagenet + """ + + model = cls.model + datasets = DATASETS('pytorch') + dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) + dummy_dataloader = PyTorchDataLoader(dummy_dataset) + # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' + # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) + hessian_cmp = Hawq_top(model, yaml_cpu=None, yaml_trace=None, dataloader=dummy_dataloader) + tuning_init_config = hessian_cmp.get_init_config() + # print tuning init_config + for i in tuning_init_config: + print(i) + + +if __name__ == "__main__": + unittest.main() From 581b21e56cf8baf0766d4a3c980a86cef3339ef7 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 15 Nov 2022 14:00:06 +0800 Subject: [PATCH 046/128] try to test mes strategy, have bug now --- test/strategy/test_hawq_wenhuach.py | 162 +++++++++++++++++++--------- 1 file changed, 113 insertions(+), 49 deletions(-) diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index 5affedc70ca..4443cd8d486 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -9,66 +9,130 @@ from neural_compressor.data import DATASETS from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.adaptor.pytorch import TemplateAdaptor - +from neural_compressor.adaptor import FRAMEWORKS +import shutil from neural_compressor.strategy.hawq import Hawq_top, fix_seed fix_seed(1) +def build_ptq_yaml(): + fake_yaml = ''' + model: + name: imagenet + framework: pytorch + quantization: + calibration: + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: mse + accuracy_criterion: + relative: -0.1 + random_seed: 9527 + exit_policy: + max_trials: 1 + workspace: + path: saved + ''' + with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_yaml) -def build_hessian_trace(): - hessian_trace_config_yaml = ''' - loss: - CrossEntropyLoss - random_seed: - 1 - max_Iteration: - 100 - tolerance: - 1e-3 - enable_op_fuse: - True - max_cal_smaple: - 100 - quantize_mode: - ptq - ''' - with open('./hessian_trace_config_yaml', 'w+', encoding="utf-8") as f: - f.write(hessian_trace_config_yaml) - +class TestPytorchAdaptor(unittest.TestCase): + framework_specific_info = {"device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": None} + framework = "pytorch" + adaptor = FRAMEWORKS[framework](framework_specific_info) + model = torchvision.models.resnet18() -class Test_hessian_trace(unittest.TestCase): - # boot up test - @classmethod - def setUpClass(cls) -> None: - build_hessian_trace() - cls.model = torchvision.models.resnet18() + # model = torch.quantization.QuantWrapper(model) - # shotdown test @classmethod - def tearDownClass(cls) -> None: - os.remove('./hessian_trace_config_yaml') + def setUpClass(self): + build_ptq_yaml() - # one test case - def test_run_hessian_trace(cls): - """ - hessian_trace_top - Inputs: - model: FP32 model - dataloader: imagenet - """ - model = cls.model - datasets = DATASETS('pytorch') - dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) - dummy_dataloader = PyTorchDataLoader(dummy_dataset) - # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' - # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) - hessian_cmp = Hawq_top(model, yaml_cpu=None, yaml_trace=None, dataloader=dummy_dataloader) - tuning_init_config = hessian_cmp.get_init_config() - # print tuning init_config - for i in tuning_init_config: - print(i) + @classmethod + def tearDownClass(self): + os.remove('ptq_yaml.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + def test_run_hawq_one_trial(self): + from neural_compressor.experimental import Quantization, common + model = copy.deepcopy(self.model) + for fake_yaml in ['ptq_yaml.yaml']: + if fake_yaml == 'ptq_yaml.yaml': + model.eval() + quantizer = Quantization(fake_yaml) + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = model + quantizer() if __name__ == "__main__": unittest.main() + +# def build_hessian_trace(): +# hessian_trace_config_yaml = ''' +# loss: +# CrossEntropyLoss +# random_seed: +# 1 +# max_Iteration: +# 100 +# tolerance: +# 1e-3 +# enable_op_fuse: +# True +# max_cal_smaple: +# 100 +# quantize_mode: +# ptq +# ''' +# with open('./hessian_trace_config_yaml', 'w+', encoding="utf-8") as f: +# f.write(hessian_trace_config_yaml) +# +# +# class Test_hessian_trace(unittest.TestCase): +# # boot up test +# @classmethod +# def setUpClass(cls) -> None: +# build_hessian_trace() +# cls.model = torchvision.models.resnet18() +# +# # shotdown test +# @classmethod +# def tearDownClass(cls) -> None: +# os.remove('./hessian_trace_config_yaml') +# +# # one test case +# def test_run_hessian_trace(cls): +# """ +# hessian_trace_top +# Inputs: +# model: FP32 model +# dataloader: imagenet +# """ +# +# model = cls.model +# datasets = DATASETS('pytorch') +# dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) +# dummy_dataloader = PyTorchDataLoader(dummy_dataset) +# # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' +# # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) +# hessian_cmp = Hawq_top(model, yaml_cpu=None, yaml_trace=None, dataloader=dummy_dataloader) +# tuning_init_config = hessian_cmp.get_init_config() +# # print tuning init_config +# for i in tuning_init_config: +# print(i) + + +# if __name__ == "__main__": +# unittest.main() From 7bb75cc69b5702fc59b52842205bd65ea1130172 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 15 Nov 2022 14:06:46 +0800 Subject: [PATCH 047/128] change name --- .../strategy/{hawq.py => hawq_wenhuach.py} | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) rename neural_compressor/strategy/{hawq.py => hawq_wenhuach.py} (95%) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq_wenhuach.py similarity index 95% rename from neural_compressor/strategy/hawq.py rename to neural_compressor/strategy/hawq_wenhuach.py index 2dd0287fa2e..6c74401c5fc 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq_wenhuach.py @@ -246,23 +246,25 @@ def __init__(self, model, yaml_trace=None, yaml_cpu=None, dataloader=None) -> No fix_seed(self.random_seed) self.model = model self.model.eval() - model_tmp = copy.deepcopy(model) - model_tmp.eval() - self.model_fused = fuse_fx(model_tmp) - self.model_fused.eval() + if self.enable_op_fuse: + self.model = fuse_fx(self.model) + + # model_tmp = copy.deepcopy(model) + # model_tmp.eval() + # self.model_fused = fuse_fx(model_tmp) + # self.model_fused.eval() def get_init_config(self) -> dict: """ """ # Load a sample from dataloader to compute graident - for inputs, targets in self.dataloader: - break - # Hessian average trace computation + inputs, targets = next(iter(self.dataloader)) + with torch.enable_grad(): - if self.enable_op_fuse: - hawq_cmp = Hessian(self.model_fused, criterion=self.criterion, data=(inputs, targets)) - else: - hawq_cmp = Hessian(self.model, criterion=self.criterion, data=(inputs, targets)) + # if self.enable_op_fuse: + # hawq_cmp = Hessian(self.model_fused, criterion=self.criterion, data=(inputs, targets)) + # else: + hawq_cmp = Hessian(self.model, criterion=self.criterion, data=(inputs, targets)) avg_traces_lst = hawq_cmp.calculate_trace(max_Iter=self.max_Iteration, tolerance=self.tolerance) # fiter none weight layer and save weight layer to match perturbation computation From 312b8aaa85ccf2f5ecbc7bdfbeea5d8e78dbc829 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 15 Nov 2022 14:10:03 +0800 Subject: [PATCH 048/128] comment test --- test/strategy/test_hawq_wenhuach.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index 4443cd8d486..a470f679cf8 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -11,7 +11,7 @@ from neural_compressor.adaptor.pytorch import TemplateAdaptor from neural_compressor.adaptor import FRAMEWORKS import shutil -from neural_compressor.strategy.hawq import Hawq_top, fix_seed +from neural_compressor.strategy.hawq_wenhuach import Hawq_top, fix_seed fix_seed(1) @@ -77,7 +77,8 @@ def test_run_hawq_one_trial(self): quantizer() if __name__ == "__main__": - unittest.main() + pass + # unittest.main() # def build_hessian_trace(): # hessian_trace_config_yaml = ''' From 90ef088d6364e88577bb100c23f1f3d94fc8c0d6 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:19:56 +0800 Subject: [PATCH 049/128] add activation quantized loss eval --- neural_compressor/strategy/hawq_metric.py | 146 ++++++++++++++-------- 1 file changed, 97 insertions(+), 49 deletions(-) diff --git a/neural_compressor/strategy/hawq_metric.py b/neural_compressor/strategy/hawq_metric.py index acbcd98d740..63db277ab14 100644 --- a/neural_compressor/strategy/hawq_metric.py +++ b/neural_compressor/strategy/hawq_metric.py @@ -37,7 +37,7 @@ def fixed_seed(seed): torch.cuda.manual_seed_all(seed) #parallel cpu torch.backends.cudnn.deterministic = True #make sure results are same on cpu/gpu torch.backends.cudnn.benchmark = True #accelerator -def calculate_params_gradients(model): +def cal_params_grad(model): """ get the gradients and parameters from given model Args: @@ -55,17 +55,7 @@ def calculate_params_gradients(model): params.append(parm) grads.append(0. if parm.grad is None else parm.grad+0.) return params, grads -def calculate_inner_product(list_x,list_y): - """Compute the inner product of two lists of variables list_x,list_y - Args: - list_x: input list variables - list_y: input list variables - return: - sum of inner product - """ - return sum([torch.sum(x * y) for (x, y) in zip(list_x, list_y)]) - -def calculate_vector_product(gradsH, params, v): +def cal_vector_product(gradsH, params, v): """compute the hessian vector product by torch.autograd.grad. Agrs: gradsH: gradient at current point @@ -105,7 +95,7 @@ def ptq_calibrate(model, data_loader,num_cal): for sample in calibrate_samples: model(sample) return model -def calculate_perturbation(model_qnt,model_fp32)->dict: +def cal_weights_pertubation(model_qnt,model_fp32)->dict: """calculate weights quantized perturbation using L2 normal Args: model_qnt: quantized model @@ -126,6 +116,44 @@ def calculate_perturbation(model_qnt,model_fp32)->dict: pertur_pair['value']=diff_l2 pertur_lst.append(pertur_pair) return pertur_lst +def cal_act_pertubation(model_fp32,model_qnt,data_loader,num_cal=100)->dict: + """calculate weights quantized perturbation using L2 normal + Args: + model_qunt: quantized model + model_fp32: float model + data_loader: path to datasets + return: + pretur_lst: dict + + """ + ns.prepare_model_outputs(model_fp32, model_qnt) + model_fp32.cpu() + model_fp32.eval() + model_qnt.cpu() + model_qnt.eval() + obv_samples=[] + i=0 + for inputs, targets in data_loader: + obv_samples.append(inputs) + i=i+1 + if i>=num_cal: + break + with torch.no_grad(): + for image in obv_samples: + model_fp32(image) + model_qnt(image) + act_qnt_pairs=[] + act_compare_dict = ns.get_matching_activations(model_fp32, q_module=model_qnt) + for key in act_compare_dict: + op_float_tensor=(act_compare_dict[key]['float'][0]) + op_qnt_tensor=act_compare_dict[key]['quantized'][0].dequantize() + diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) + pertur_pair={"layer_name":'',"value":0} + pertur_pair['layer_name']=key + pertur_pair['value']=diff_l2 + act_qnt_pairs.append(pertur_pair) + return act_qnt_pairs + class Hessian(): """This class used to compute each layer hessian trace from given FP32 model """ @@ -153,7 +181,7 @@ def __init__(self,model,criterion, data=None, dataloader=None,device='cpu') -> N outputs=self.model(self.inputs) loss=self.criterion(outputs,self.targets) loss.backward(create_graph=True) - params, gradSH=calculate_params_gradients(self.model) + params, gradSH=cal_params_grad(self.model) self.params=params self.gradSH=gradSH @@ -176,8 +204,8 @@ def calculate_trace(self,max_Iter=100, tolerance=1e-3): trace_pair={"layer_name":" ", "trace":0} self.model.zero_grad() for i in range(max_Iter): - hv=calculate_vector_product(i_grad,i_param,i_v) # hessian vector - trace_vhv_cur=calculate_inner_product(hv,v).cpu().item()#current point + hv=cal_vector_product(i_grad,i_param,i_v) # hessian vector + trace_vhv_cur=sum([torch.sum(x * y) for (x, y) in zip(hv, v)]) trace_vhv.append(trace_vhv_cur) difference=(np.mean(trace_vhv)-trace)/(abs(trace)+1e-6) if abs(difference) None: self.max_Iteration=100 self.enable_op_fuse=True self.tolerance=1e-6 - self.max_cal_sample=100 + self.max_cal_sample=1 self.quantize_mode='ptq' self.list_dtype=['int8','fp32'] logging.info("Current parameters config for Hutchinson’s algorithm as below:") @@ -230,6 +258,7 @@ def __init__(self,model,yaml_trace=None,yaml_cpu=None,dataloader=None) -> None: model_tmp.eval() self.model_fused= fuse_fx(model_tmp) self.model_fused.eval() + self.hawq_level='L3' #L1:top engievalue L2:avg_trace L3:avg_trace+pertubation def get_init_config(self)->dict: """ @@ -238,6 +267,7 @@ def get_init_config(self)->dict: for inputs, targets in self.dataloader: break #Hessian average trace computation + fixed_seed(self.random_seed) with torch.enable_grad(): if self.enable_op_fuse: hawq_cmp=Hessian(self.model_fused,criterion=self.criterion,data=(inputs,targets)) @@ -246,40 +276,58 @@ def get_init_config(self)->dict: avg_traces_lst=hawq_cmp.calculate_trace(max_Iter=self.max_Iteration,tolerance=self.tolerance) #fiter none weight layer and save weight layer to match perturbation computation - avg_traces_lst_weight=[] - for avg_trace_i in avg_traces_lst: - if 'weight' in avg_trace_i['layer_name']: - avg_traces_lst_weight.append(avg_trace_i) - # avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) - if self.quantize_mode=='ptq': - #PTQ quantization - qconfig = get_default_qconfig("fbgemm") - qconfig_dict={"":qconfig} #enable all layers/tensor to quantize - #calibrate - model_prepared=prepare_fx(self.model, qconfig_dict) - model_prepared=ptq_calibrate(model_prepared,data_loader=self.dataloader,num_cal=self.max_cal_sample) - model_prepared.cpu() - model_all_qnt=convert_fx(model_prepared) - #calculate perturbation - pertu_list=calculate_perturbation(model_fp32=self.model,model_qnt=model_all_qnt) - #calculate omiga - for omiga_i in pertu_list: - for avg_trace_i in avg_traces_lst: - if avg_trace_i['layer_name']==omiga_i['layer_name']: - avg_trace_i['trace']=avg_trace_i['trace']*omiga_i['value'] - # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): - # omig_pair={"layer_name":" ", "value":0} - # omig_val=avg_trace_i['trace']*omiga_i['value'] - # omig_pair['layer_name']=avg_trace_i['layer_name'] - # omig_pair['value']=omig_val - # omig_list.append(omig_pair) - # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) - omig_list_sorted=sorted(avg_traces_lst,key=lambda x:x['trace'],reverse=True) + if self.hawq_level=='L2': + avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) + logging.info("avg_traces desending sorted is:") + for i in avg_traces_lst_sorted: + logging.info(i) + list_sorted=avg_traces_lst_sorted + if self.hawq_level=='L3': + if self.quantize_mode=='ptq': + #PTQ quantization + qconfig = get_default_qconfig("fbgemm") + qconfig_dict={"":qconfig} #enable all layers/tensor to quantize + #calibrate + model_prepared=prepare_fx(self.model, qconfig_dict) + model_prepared=ptq_calibrate(model_prepared,data_loader=self.dataloader,num_cal=self.max_cal_sample) + model_prepared.cpu() + model_all_qnt=convert_fx(model_prepared) + #calculate weights quantized perturbation + weights_pertu_lst=cal_weights_pertubation(model_fp32=self.model,model_qnt=model_all_qnt) + #merge weights quantized perturbation + #generally, fused ops=quantized weights+quantized activation + avg_trace_i=0 + omigs=[] + for wct_i in weights_pertu_lst: + omig_pair={"layer_name":" ", "trace":0} + tmp_value=avg_traces_lst[avg_trace_i]['trace']*wct_i['value'] + omig_pair['layer_name']=avg_traces_lst[avg_trace_i]['layer_name'] + omig_pair['trace']=tmp_value + avg_trace_i=avg_trace_i+2 + omigs.append(omig_pair) + act_pertu_lst=cal_act_pertubation(model_fp32=self.model, model_qnt=model_all_qnt,data_loader=self.dataloader,num_cal=self.max_cal_sample) + avg_trace_i=1 + for act_i in act_pertu_lst: + omig_pair={"layer_name":" ", "trace":0} + tmp_value=avg_traces_lst[avg_trace_i]['trace']+act_i['value'] + omig_pair['layer_name']=avg_traces_lst[avg_trace_i]['layer_name'] + omig_pair['trace']=tmp_value + avg_trace_i=avg_trace_i+2 + omigs.append(omig_pair) + + # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): + # omig_pair={"layer_name":" ", "value":0} + # omig_val=avg_trace_i['trace']*omiga_i['value'] + # omig_pair['layer_name']=avg_trace_i['layer_name'] + # omig_pair['value']=omig_val + # omig_list.append(omig_pair) + # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) + omig_list_sorted=sorted(omigs,key=lambda x:x['trace'],reverse=True) + list_sorted=omig_list_sorted tune_init_config_pairs=[] - # - for i in omig_list_sorted: + for i in list_sorted: tune_init_config_pair={"op_name":'',"op_type":'','trace':0} - if i['layer_name']==omig_list_sorted[0]['layer_name']: + if i['layer_name']==list_sorted[0]['layer_name']: tune_init_config_pair['op_name']=i['layer_name'] tune_init_config_pair['op_type']=self.list_dtype[-1] #setup as float op tune_init_config_pair['trace']=float(i['trace']) From 84fe8829ab4f018eb9de84aee182be96a345e47a Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:21:17 +0800 Subject: [PATCH 050/128] fixed seed for dummy datasets --- test/strategy/test_hessian_trace_inc.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/test/strategy/test_hessian_trace_inc.py b/test/strategy/test_hessian_trace_inc.py index f05b47ca3aa..5285bc619c7 100644 --- a/test/strategy/test_hessian_trace_inc.py +++ b/test/strategy/test_hessian_trace_inc.py @@ -10,7 +10,21 @@ from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader from neural_compressor.adaptor.pytorch import TemplateAdaptor from neural_compressor.strategy.hawq_metric import Hawq_top - +import random +import numpy as np +def fixed_seed(seed): + """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU + Args: + seed: an integer number + return: None + """ + np.random.seed(seed) #random + random.seed(seed) + torch.manual_seed(seed) #cpu + torch.cuda.manual_seed_all(seed) #parallel cpu + torch.backends.cudnn.deterministic = True #make sure results are same on cpu/gpu + torch.backends.cudnn.benchmark = True #accelerator +fixed_seed(100) def build_hessian_trace(): hessian_trace_config_yaml=''' loss: From f221068657823d6615c5a782dadc0698c4e4275f Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Tue, 15 Nov 2022 14:24:18 +0800 Subject: [PATCH 051/128] for independence hawq tuning strategic --- neural_compressor/strategy/hawq.py | 202 +++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 neural_compressor/strategy/hawq.py diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py new file mode 100644 index 00000000000..17231ceec9d --- /dev/null +++ b/neural_compressor/strategy/hawq.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import numpy as np +from collections import OrderedDict +from .strategy import strategy_registry, TuneStrategy +from ..utils import logger + +from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .st_utils.tuning_structs import OpTuningConfig +from .st_utils.tuning_space import TUNING_ITEMS_LST + +@strategy_registry +class HawqTuneStrategy(TuneStrategy): + """The basic tuning strategy which tunes the low precision model with below order. + + 1. modelwise tuning for all quantizable ops. + 2. fallback tuning from bottom to top to decide the priority of which op has biggest impact + on accuracy. + 3. incremental fallback tuning by fallbacking multiple ops with the order got from #2. + + Args: + model (object): The FP32 model specified for low precision tuning. + conf (Class): The Conf class instance initialized from user yaml + config file. + q_dataloader (generator): Data loader for calibration, mandatory for + post-training quantization. + It is iterable and should yield a tuple (input, + label) for calibration dataset containing label, + or yield (input, _) for label-free calibration + dataset. The input could be a object, list, tuple or + dict, depending on user implementation, as well as + it can be taken as model input. + q_func (function, optional): Reserved for future use. + eval_dataloader (generator, optional): Data loader for evaluation. It is iterable + and should yield a tuple of (input, label). + The input could be a object, list, tuple or dict, + depending on user implementation, as well as it can + be taken as model input. The label should be able + to take as input of supported metrics. If this + parameter is not None, user needs to specify + pre-defined evaluation metrics through configuration + file and should set "eval_func" parameter as None. + Tuner will combine model, eval_dataloader and + pre-defined metrics to run evaluation process. + eval_func (function, optional): The evaluation function provided by user. + This function takes model as parameter, and + evaluation dataset and metrics should be + encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + + The pseudo code should be something like: + + def eval_func(model): + input, label = dataloader() + output = model(input) + accuracy = metric(output, label) + return accuracy + dicts (dict, optional): The dict containing resume information. + Defaults to None. + + """ + + def __init__(self, model, conf, q_dataloader, q_func=None, + eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + super( + HawqTuneStrategy, + self).__init__( + model, + conf, + q_dataloader, + q_func, + eval_dataloader, + eval_func, + dicts, + q_hooks) + + def next_tune_cfg(self): + """The generator of yielding next tuning config to traverse by concrete strategies + according to last tuning result. + + Yields: + tune_config (dict): It's a dict containing the tuning configuration to run. + """ + from copy import deepcopy + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for calib_sampling_size in calib_sampling_size_lst: + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = False + stage1_cnt = 0 + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = 1e9 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + # Fallback the ops supported both static and dynamic from static to dynamic + # Tuning items: None + if self.cfg.quantization.approach == 'post_training_auto_quant': + static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + item in tuning_space.query_items_by_quant_mode('dynamic')] + if static_dynamic_items: + logger.info("Fallback all ops that support both dynamic and static to dynamic.") + else: + logger.info("Non ops that support both dynamic") + + new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + for item in static_dynamic_items: + new_op_tuning_cfg[item.name] = self.initial_dynamic_cfg_based_on_static_cfg( + new_op_tuning_cfg[item.name]) + new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield new_op_tuning_cfg + best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) + + # Fallback + for target_dtype in ['bf16', 'fp32']: + target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) + fallback_items_lst = [item for item in quant_ops if item in target_type_lst] + if fallback_items_lst: + logger.info(f"Start to fallback op to {target_dtype} one by one.") + self._fallback_started() + #fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, + self.calib_dataloader, + method_args = {'name': 'hessian_trace'}) + fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) + + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() + for op_index, op_tuning_cfg in enumerate(fallback_sampler): + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + acc, _ = self.last_tune_result + op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc + + + # do accumulated fallback according to the order in the previous stage + if len(op_fallback_acc_impact) > 0: + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + + def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): + op_state = op_static_cfg.get_state() + op_name = op_static_cfg.op_name + op_type = op_static_cfg.op_type + op_quant_mode = 'dynamic' + tuning_space = self.tuning_space + dynamic_state = {} + for att in ['weight', 'activation']: + if att not in op_state: + continue + for item_name, item_val in op_state[att].items(): + att_item = (att, item_name) + if att_item not in TUNING_ITEMS_LST: + continue + if tuning_space.query_item_option((op_name, op_type), op_quant_mode, att_item, item_val): + dynamic_state[att_item] = item_val + else: + quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode) + tuning_item = quant_mode_item.get_option_by_name(att_item) + dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None + return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) + + \ No newline at end of file From c6ebf79959d6405e5e92f53064c4657a77a23b21 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 15 Nov 2022 14:38:19 +0800 Subject: [PATCH 052/128] add a fallback ut --- neural_compressor/strategy/basic.py | 10 +-- .../strategy/{ => st_utils}/hawq_metric.py | 0 .../strategy/{ => st_utils}/hawq_wenhuach.py | 0 test/strategy/test_basic_fallback.py | 73 +++++++++++++++++++ 4 files changed, 78 insertions(+), 5 deletions(-) rename neural_compressor/strategy/{ => st_utils}/hawq_metric.py (100%) rename neural_compressor/strategy/{ => st_utils}/hawq_wenhuach.py (100%) create mode 100644 test/strategy/test_basic_fallback.py diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index 3cc4e38bde2..184a15996f7 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -143,11 +143,11 @@ def next_tune_cfg(self): if fallback_items_lst: logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() - #fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, - self.calib_dataloader, - method_args = {'name': 'hessian_trace'}) - fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) + fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + # ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, + # self.calib_dataloader, + # method_args = {'name': 'hessian_trace'}) + #fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) diff --git a/neural_compressor/strategy/hawq_metric.py b/neural_compressor/strategy/st_utils/hawq_metric.py similarity index 100% rename from neural_compressor/strategy/hawq_metric.py rename to neural_compressor/strategy/st_utils/hawq_metric.py diff --git a/neural_compressor/strategy/hawq_wenhuach.py b/neural_compressor/strategy/st_utils/hawq_wenhuach.py similarity index 100% rename from neural_compressor/strategy/hawq_wenhuach.py rename to neural_compressor/strategy/st_utils/hawq_wenhuach.py diff --git a/test/strategy/test_basic_fallback.py b/test/strategy/test_basic_fallback.py new file mode 100644 index 00000000000..352c81850c4 --- /dev/null +++ b/test/strategy/test_basic_fallback.py @@ -0,0 +1,73 @@ +import torch +import unittest +import os +import sys +import copy +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +from neural_compressor.data import DATASETS +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.adaptor.pytorch import TemplateAdaptor +from neural_compressor.adaptor import FRAMEWORKS +import shutil + + +def build_ptq_yaml(): + fake_yaml = ''' + model: + name: resnet18 + framework: pytorch_fx + tuning: + strategy: + name: basic + accuracy_criterion: + absolute: -1 + exit_policy: + timeout: 0 + ''' + with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: + f.write(fake_yaml) + +class TestPytorchAdaptor(unittest.TestCase): + framework_specific_info = {"device": "cpu", + "approach": "post_training_static_quant", + "random_seed": 1234, + "q_dataloader": None, + "workspace_path": None} + framework = "pytorch" + adaptor = FRAMEWORKS[framework](framework_specific_info) + model = torchvision.models.resnet18() + + # model = torch.quantization.QuantWrapper(model) + + @classmethod + def setUpClass(self): + self.i = 0 + build_ptq_yaml() + + + @classmethod + def tearDownClass(self): + os.remove('ptq_yaml.yaml') + shutil.rmtree('./saved', ignore_errors=True) + shutil.rmtree('runs', ignore_errors=True) + + def test_basic_fallback(self): + def eval_func(model): + self.i -= 1 + return self.i + + from neural_compressor.experimental import Quantization, common + model = copy.deepcopy(self.model) + quantizer = Quantization('ptq_yaml.yaml') + quantizer.eval_func = eval_func + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = model + q_model = quantizer() + self.assertTrue(q_model is None) + +if __name__ == "__main__": + unittest.main() From 69f6c2a3c1944e16bb709464c32b630568c3ed2e Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Wed, 16 Nov 2022 14:03:42 +0800 Subject: [PATCH 053/128] update test file --- test/strategy/test_hawq_wenhuach.py | 41 ++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index a470f679cf8..ad7939d5d84 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -11,7 +11,7 @@ from neural_compressor.adaptor.pytorch import TemplateAdaptor from neural_compressor.adaptor import FRAMEWORKS import shutil -from neural_compressor.strategy.hawq_wenhuach import Hawq_top, fix_seed +from neural_compressor.strategy.st_utils.hawq_wenhuach import Hawq_top, fix_seed fix_seed(1) @@ -19,7 +19,7 @@ def build_ptq_yaml(): fake_yaml = ''' model: name: imagenet - framework: pytorch + framework: pytorch_fx quantization: calibration: evaluation: @@ -28,12 +28,12 @@ def build_ptq_yaml(): topk: 1 tuning: strategy: - name: mse + name: hawq accuracy_criterion: relative: -0.1 random_seed: 9527 exit_policy: - max_trials: 1 + max_trials: 3 workspace: path: saved ''' @@ -50,10 +50,17 @@ class TestPytorchAdaptor(unittest.TestCase): adaptor = FRAMEWORKS[framework](framework_specific_info) model = torchvision.models.resnet18() + # from collections import OrderedDict + # model = torch.nn.Sequential(OrderedDict([ + # ('conv1', torch.nn.Conv2d(3, 2, 1, 1)), + # ('conv2', torch.nn.Conv2d(2, 1, 1, 1)), + # ('flat', torch.nn.Flatten()), + # ])) # model = torch.quantization.QuantWrapper(model) @classmethod def setUpClass(self): + self.i = 0 build_ptq_yaml() @@ -63,22 +70,26 @@ def tearDownClass(self): shutil.rmtree('./saved', ignore_errors=True) shutil.rmtree('runs', ignore_errors=True) + + def test_run_hawq_one_trial(self): + def eval_func(model): + self.i -= 1 + return self.i from neural_compressor.experimental import Quantization, common model = copy.deepcopy(self.model) - for fake_yaml in ['ptq_yaml.yaml']: - if fake_yaml == 'ptq_yaml.yaml': - model.eval() - quantizer = Quantization(fake_yaml) - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = model - quantizer() + + quantizer = Quantization('ptq_yaml.yaml') + quantizer.eval_func = eval_func + dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = model + quantizer() if __name__ == "__main__": - pass - # unittest.main() + + unittest.main() # def build_hessian_trace(): # hessian_trace_config_yaml = ''' From 85f1d203461f5919841f5135e9795a30ae8e804a Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 17 Nov 2022 15:04:32 +0800 Subject: [PATCH 054/128] tiny update --- neural_compressor/strategy/hawq.py | 329 +++++++++++++++++++++------- neural_compressor/strategy/mse.py | 3 +- test/strategy/test_hawq_wenhuach.py | 10 +- 3 files changed, 263 insertions(+), 79 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 17231ceec9d..3db5cf0aed5 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -18,6 +18,9 @@ import copy import numpy as np from collections import OrderedDict + +import torch.nn + from .strategy import strategy_registry, TuneStrategy from ..utils import logger @@ -25,6 +28,154 @@ from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST + +class HessianTrace: + def __init__(self, model, conf, adaptor, op_cfgs_list, dataloader): + self.model = model + self.conf = conf ##config + self.op_cfgs_list = op_cfgs_list ##op to get + self.dataloader = dataloader + self.adaptor = adaptor + self.max_iter = 500 + self.tolerance = 1e-5 + self.eps = 1e-6 + self.index = 0 + + # def apply_init(self): + # trace_per_op = self._cal_trace() + # if not trace_per_op: + # raise RuntimeError('Failed to calculate hessian traces!') + # + # perturbations = self._calc_quantization_noise() + # configuration_metric = self._calc_hawq_metric_per_configuration( + # perturbations, trace_per_op) + # config_index = self.choose_configuration(configuration_metric) + # chosen_config = self.op_cfgs_list[config_index] + # return chosen_config, trace_per_op + + def get_device(self, model: torch.nn.Module): + for n, p in model.named_parameters(): + return p.data.device + + def get_gradient(self, model, data, criterion, op_list, device="cpu", retrain_graph=False): + model.zero_grad() + input = data[0] + target = data[1] + output = model(input) + loss = criterion(output, target) + loss.backward(retain_graph=retrain_graph) + gradients = {} + for n, p in model.named_parameters(): + if n in op_list: + continue + gradients[n] = 0 + if p.grad != None: + gradients[n] = p.grad + return gradients + + def get_avg_trace(self, num_batches=2): + """ + Estimates average hessian trace for each parameter + """ + assert num_batches > 0 + ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] + ##num_all_data = num_data_iter * self.dataloader.batch_size + op_list = [item.name for item in self.op_cfgs_list] + criterion = torch.nn.CrossEntropyLoss() ##TODO setting this in config + device = self.get_device(self.model) + + for step, batch in enumerate(self.dataloader): + gradient_dict = self.get_gradient(self.model, batch,criterion, op_list, device=device, retrain_graph=True) + tmp = 1 + if step == num_batches - 1: + break + + + weight_vhp = [] + w_avg_total_trace = 0. + w_avg_traces_per_iter = [] + mean_avg_traces_per_param = None + act_vhp = [] + a_avg_total_trace = 0. + a_avg_traces_per_iter = [] + mean_avg_traces_per_act = None + + for i in range(max_iter): + weight_vhp_list, w_v, \ + act_vhp_list, a_v, op_act_grad = self.adaptor.get_2order_grad(self.model, + criterion, + self.dataloader, + num_data_iter, + qop_list) + if not weight_vhp: + weight_vhp = [np.random.randn(*p.shape) for p in w_v] + for vhp_curr in weight_vhp_list: + weight_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ + for a, b in zip(weight_vhp, vhp_curr)] + weight_vhp = [a / float(num_all_data) for a in weight_vhp] + avg_traces_per_param = [np.sum(a * b) / a.size for (a, b) in zip(weight_vhp, w_v)] + w_avg_traces_per_iter.append(avg_traces_per_param) + mean_avg_traces_per_param = np.mean(w_avg_traces_per_iter, axis=0) + w_mean_avg_total_trace = np.sum(mean_avg_traces_per_param) + + w_diff_avg = abs(w_mean_avg_total_trace - w_avg_total_trace) / \ + (w_avg_total_trace + diff_eps) + w_avg_total_trace = w_mean_avg_total_trace + logger.info( + '{}# weights difference_avg={} avg_trace={}'.format( + i, w_diff_avg, w_avg_total_trace)) + + if not act_vhp: + act_vhp = [np.random.randn(*p.shape) for p in a_v] + for vhp_curr in act_vhp_list: + act_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ + for a, b in zip(act_vhp, vhp_curr)] + act_vhp = [a / float(num_all_data) for a in act_vhp] + avg_traces_per_act = [np.sum(a * b) / a.size for (a, b) in zip(act_vhp, a_v)] + a_avg_traces_per_iter.append(avg_traces_per_act) + mean_avg_traces_per_act = np.mean(a_avg_traces_per_iter, axis=0) + a_mean_avg_total_trace = np.sum(mean_avg_traces_per_act) + + a_diff_avg = abs(a_mean_avg_total_trace - a_avg_total_trace) / \ + (a_avg_total_trace + diff_eps) + a_avg_total_trace = a_mean_avg_total_trace + logger.info( + '{}# activation difference_avg={} avg_trace={}'.format( + i, a_diff_avg, a_avg_total_trace)) + + if w_diff_avg < tolerance and a_diff_avg < tolerance: + return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad + + return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad + + def _cal_trace(self): + """ + Calculate the trace for both weight and activation per layer + """ + pass + # trace_estimator = HessianTraceEstimator(self.model, + # self.conf, + # self.adaptor, + # self.op_cfgs_list, + # self.dataloader) + # w_avg_trace, a_avg_trace, op_act_grad = trace_estimator.get_avg_trace() + # + # # mapping trace to op per op_weight_mapping + # weights_name = self.adaptor.get_all_weight_names(self.model) + # op_weight_mapping = self.get_op_weight_mapping() + # trace_per_op = OrderedDict() + # w_op_trace_info = np.zeros(len(op_weight_mapping)) + # for i, (op_name, w_name) in enumerate(op_weight_mapping.items()): + # index = weights_name.index(w_name) + # w_op_trace_info[i] = w_avg_trace[index] + # act_trace = 0.0 + # if op_name in op_act_grad: + # a_index = op_act_grad.index(op_name) + # act_trace = a_avg_trace[a_index] + # trace_per_op[op_name] = (w_avg_trace[index], act_trace) + # return trace_per_op + + @strategy_registry class HawqTuneStrategy(TuneStrategy): """The basic tuning strategy which tunes the low precision model with below order. @@ -91,6 +242,37 @@ def __init__(self, model, conf, q_dataloader, q_func=None, q_hooks) def next_tune_cfg(self): + from copy import deepcopy + tuning_space = self.tuning_space + calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] ##TODO suppoprt list + + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + + target_dtype = "fp32" ##TODO support bf16 + target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) + fp_op_list = [item for item in quant_ops if item in target_type_lst] + orig_eval = True + if self._fp32_model.training: + orig_eval = False + self._fp32_model.train() + ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, fp_op_list, self.calib_dataloader) + ht.get_avg_trace() + # if orig_eval: + # self._fp32_model.eval() + # ht.get_avg_trace() + # tmp = 1 + # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + # ops_sensitivity = self.adaptor.get_hessian_trace(self._fp32_model, + # self.calib_dataloader, + # self. + # method_args={'name': 'hessian_trace'}) + # tmp = 1 + + def next_tune_cfg_bk(self): """The generator of yielding next tuning config to traverse by concrete strategies according to last tuning result. @@ -100,84 +282,85 @@ def next_tune_cfg(self): from copy import deepcopy tuning_space = self.tuning_space calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options - for calib_sampling_size in calib_sampling_size_lst: - # Initialize the tuning config for each op according to the quantization approach - op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = False - stage1_cnt = 0 - quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = 1e9 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - for op_tuning_cfg in op_wise_tuning_sampler: - stage1_cnt += 1 - if early_stop_tuning and stage1_cnt > stage1_max: - logger.info("Early stopping the stage 1.") - break + + calib_sampling_size = calib_sampling_size_lst[0] + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = False + stage1_cnt = 0 + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = 1e9 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + # for op_tuning_cfg in op_wise_tuning_sampler: + # stage1_cnt += 1 + # if early_stop_tuning and stage1_cnt > stage1_max: + # logger.info("Early stopping the stage 1.") + # break + # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # yield op_tuning_cfg + # Fallback the ops supported both static and dynamic from static to dynamic + # Tuning items: None + # if self.cfg.quantization.approach == 'post_training_auto_quant': + # static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + # item in tuning_space.query_items_by_quant_mode('dynamic')] + # if static_dynamic_items: + # logger.info("Fallback all ops that support both dynamic and static to dynamic.") + # else: + # logger.info("Non ops that support both dynamic") + # + # new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + # for item in static_dynamic_items: + # new_op_tuning_cfg[item.name] = self.initial_dynamic_cfg_based_on_static_cfg( + # new_op_tuning_cfg[item.name]) + # new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # yield new_op_tuning_cfg + best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) + + # Fallback + for target_dtype in ['bf16', 'fp32']: + target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) + fallback_items_lst = [item for item in quant_ops if item in target_type_lst] + if fallback_items_lst: + logger.info(f"Start to fallback op to {target_dtype} one by one.") + self._fallback_started() + # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, + self.calib_dataloader, + method_args={'name': 'hessian_trace'}) + + fallback_items_name_lst = sorted(ops_sensitivity, key=lambda items: items[1], reverse=True) + + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() + for op_index, op_tuning_cfg in enumerate(fallback_sampler): op_tuning_cfg['calib_sampling_size'] = calib_sampling_size yield op_tuning_cfg - # Fallback the ops supported both static and dynamic from static to dynamic - # Tuning items: None - if self.cfg.quantization.approach == 'post_training_auto_quant': - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] - if static_dynamic_items: - logger.info("Fallback all ops that support both dynamic and static to dynamic.") - else: - logger.info("Non ops that support both dynamic") - - new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) - for item in static_dynamic_items: - new_op_tuning_cfg[item.name] = self.initial_dynamic_cfg_based_on_static_cfg( - new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield new_op_tuning_cfg - best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) - - # Fallback - for target_dtype in ['bf16', 'fp32']: - target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) - fallback_items_lst = [item for item in quant_ops if item in target_type_lst] - if fallback_items_lst: - logger.info(f"Start to fallback op to {target_dtype} one by one.") - self._fallback_started() - #fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, - self.calib_dataloader, - method_args = {'name': 'hessian_trace'}) - fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) - - op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + acc, _ = self.last_tune_result + op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc + + # do accumulated fallback according to the order in the previous stage + if len(op_fallback_acc_impact) > 0: + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) - op_fallback_acc_impact = OrderedDict() - for op_index, op_tuning_cfg in enumerate(fallback_sampler): + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: op_tuning_cfg['calib_sampling_size'] = calib_sampling_size yield op_tuning_cfg - acc, _ = self.last_tune_result - op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc - - - # do accumulated fallback according to the order in the previous stage - if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - logger.info(f"Start to accumulate fallback to {target_dtype}.") - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) - for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield op_tuning_cfg - - def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): + + def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig): op_state = op_static_cfg.get_state() op_name = op_static_cfg.op_name op_type = op_static_cfg.op_type @@ -198,5 +381,3 @@ def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): tuning_item = quant_mode_item.get_option_by_name(att_item) dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) - - \ No newline at end of file diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index 614984359ba..8dafa35759d 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -194,10 +194,11 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = False + early_stop_tuning = True stage1_cnt = 0 int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value + stage1_max=-1 op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index ad7939d5d84..a6ee28b9d4a 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -12,7 +12,7 @@ from neural_compressor.adaptor import FRAMEWORKS import shutil from neural_compressor.strategy.st_utils.hawq_wenhuach import Hawq_top, fix_seed - +from torch.quantization.quantize_fx import fuse_fx fix_seed(1) def build_ptq_yaml(): @@ -41,7 +41,7 @@ def build_ptq_yaml(): f.write(fake_yaml) class TestPytorchAdaptor(unittest.TestCase): - framework_specific_info = {"device": "cpu", + framework_specific_info = {"device": "gpu", "approach": "post_training_static_quant", "random_seed": 1234, "q_dataloader": None, @@ -50,6 +50,7 @@ class TestPytorchAdaptor(unittest.TestCase): adaptor = FRAMEWORKS[framework](framework_specific_info) model = torchvision.models.resnet18() + # from collections import OrderedDict # model = torch.nn.Sequential(OrderedDict([ # ('conv1', torch.nn.Conv2d(3, 2, 1, 1)), @@ -78,10 +79,11 @@ def eval_func(model): return self.i from neural_compressor.experimental import Quantization, common model = copy.deepcopy(self.model) - + model.eval() + model = fuse_fx(model) quantizer = Quantization('ptq_yaml.yaml') quantizer.eval_func = eval_func - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) + dataset = quantizer.dataset('dummy', (32, 3, 224, 224), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = model From a490187d23bd50e0b5025be21009bfe5ea36124a Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 17 Nov 2022 20:01:48 +0800 Subject: [PATCH 055/128] weight hessian trace, not finished --- neural_compressor/strategy/hawq.py | 242 +++++++++++++++++++---------- 1 file changed, 161 insertions(+), 81 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 3db5cf0aed5..4d3b9489b8f 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -27,19 +27,22 @@ from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST - +from torch.quantization.quantize_fx import fuse_fx +import torchvision class HessianTrace: - def __init__(self, model, conf, adaptor, op_cfgs_list, dataloader): + def __init__(self, model, conf, adaptor, weight_list, dataloader): self.model = model self.conf = conf ##config - self.op_cfgs_list = op_cfgs_list ##op to get + self.weight_list = weight_list ##op to get self.dataloader = dataloader self.adaptor = adaptor self.max_iter = 500 self.tolerance = 1e-5 self.eps = 1e-6 self.index = 0 + self.device = self.get_device(self.model) + self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config # def apply_init(self): # trace_per_op = self._cal_trace() @@ -57,22 +60,51 @@ def get_device(self, model: torch.nn.Module): for n, p in model.named_parameters(): return p.data.device - def get_gradient(self, model, data, criterion, op_list, device="cpu", retrain_graph=False): + def get_gradients(self, model, data, criterion, create_graph=False): model.zero_grad() - input = data[0] - target = data[1] + input = data[0].to(self.device) + target = data[1].to(self.device) output = model(input) loss = criterion(output, target) - loss.backward(retain_graph=retrain_graph) - gradients = {} + loss.backward(create_graph=create_graph) + gradients = [] for n, p in model.named_parameters(): - if n in op_list: - continue - gradients[n] = 0 if p.grad != None: - gradients[n] = p.grad + gradient = p.grad + gradients.append(gradient+0.0) ## add 0 to create a copy + model.zero_grad() return gradients + def get_params(self, model): + parameters = [p for p in model.parameters() if p.requires_grad] + return parameters + + def sample_rademacher(self, params): + samples = [] + for param in params: + r = torch.randint_like(param, high=2, device=self.device) + r.masked_fill_(r == 0, -1) + samples.append(r) + return samples + + def hutchinson_one_step(self, params, num_batches): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, batch in enumerate(self.dataloader): + batch_size = batch[0].shape[0] + cnt += batch_size + gradients = self.get_gradients(self.model, batch, self.criterion, create_graph=True) + H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) + H_v = [pre + cur * float(batch_size) + 0.0 for cur, pre in zip(H_v_one, H_v)] + if step == num_batches - 1: + break + if cnt > 0: + H_v = [item / cnt for item in H_v] + v_t_H_v = [torch.sum(h_v * v_t) / h_v.size().numel() for (h_v, v_t) in zip(H_v, v)] + return v_t_H_v + + def get_avg_trace(self, num_batches=2): """ Estimates average hessian trace for each parameter @@ -80,73 +112,75 @@ def get_avg_trace(self, num_batches=2): assert num_batches > 0 ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size - op_list = [item.name for item in self.op_cfgs_list] - criterion = torch.nn.CrossEntropyLoss() ##TODO setting this in config - device = self.get_device(self.model) + op_list = self.weight_list - for step, batch in enumerate(self.dataloader): - gradient_dict = self.get_gradient(self.model, batch,criterion, op_list, device=device, retrain_graph=True) - tmp = 1 - if step == num_batches - 1: - break + ##TODO setting this in config + + + params = [p for p in self.model.parameters() if p.requires_grad] + for i in range(self.max_iter): + trace_estimated = self.hutchinson_one_step(params, num_batches) - weight_vhp = [] - w_avg_total_trace = 0. - w_avg_traces_per_iter = [] - mean_avg_traces_per_param = None - act_vhp = [] - a_avg_total_trace = 0. - a_avg_traces_per_iter = [] - mean_avg_traces_per_act = None - - for i in range(max_iter): - weight_vhp_list, w_v, \ - act_vhp_list, a_v, op_act_grad = self.adaptor.get_2order_grad(self.model, - criterion, - self.dataloader, - num_data_iter, - qop_list) - if not weight_vhp: - weight_vhp = [np.random.randn(*p.shape) for p in w_v] - for vhp_curr in weight_vhp_list: - weight_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ - for a, b in zip(weight_vhp, vhp_curr)] - weight_vhp = [a / float(num_all_data) for a in weight_vhp] - avg_traces_per_param = [np.sum(a * b) / a.size for (a, b) in zip(weight_vhp, w_v)] - w_avg_traces_per_iter.append(avg_traces_per_param) - mean_avg_traces_per_param = np.mean(w_avg_traces_per_iter, axis=0) - w_mean_avg_total_trace = np.sum(mean_avg_traces_per_param) - - w_diff_avg = abs(w_mean_avg_total_trace - w_avg_total_trace) / \ - (w_avg_total_trace + diff_eps) - w_avg_total_trace = w_mean_avg_total_trace - logger.info( - '{}# weights difference_avg={} avg_trace={}'.format( - i, w_diff_avg, w_avg_total_trace)) - - if not act_vhp: - act_vhp = [np.random.randn(*p.shape) for p in a_v] - for vhp_curr in act_vhp_list: - act_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ - for a, b in zip(act_vhp, vhp_curr)] - act_vhp = [a / float(num_all_data) for a in act_vhp] - avg_traces_per_act = [np.sum(a * b) / a.size for (a, b) in zip(act_vhp, a_v)] - a_avg_traces_per_iter.append(avg_traces_per_act) - mean_avg_traces_per_act = np.mean(a_avg_traces_per_iter, axis=0) - a_mean_avg_total_trace = np.sum(mean_avg_traces_per_act) - - a_diff_avg = abs(a_mean_avg_total_trace - a_avg_total_trace) / \ - (a_avg_total_trace + diff_eps) - a_avg_total_trace = a_mean_avg_total_trace - logger.info( - '{}# activation difference_avg={} avg_trace={}'.format( - i, a_diff_avg, a_avg_total_trace)) - - if w_diff_avg < tolerance and a_diff_avg < tolerance: - return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad - - return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad + + tmp = 1 + # + # weight_vhp = [] + # w_avg_total_trace = 0. + # w_avg_traces_per_iter = [] + # mean_avg_traces_per_param = None + # act_vhp = [] + # a_avg_total_trace = 0. + # a_avg_traces_per_iter = [] + # mean_avg_traces_per_act = None + # + # for i in range(self.max_iter): + # weight_vhp_list, w_v, \ + # act_vhp_list, a_v, op_act_grad = self.adaptor.get_2order_grad(self.model, + # criterion, + # self.dataloader, + # num_data_iter, + # qop_list) + # if not weight_vhp: + # weight_vhp = [np.random.randn(*p.shape) for p in w_v] + # for vhp_curr in weight_vhp_list: + # weight_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ + # for a, b in zip(weight_vhp, vhp_curr)] + # weight_vhp = [a / float(num_all_data) for a in weight_vhp] + # avg_traces_per_param = [np.sum(a * b) / a.size for (a, b) in zip(weight_vhp, w_v)] + # w_avg_traces_per_iter.append(avg_traces_per_param) + # mean_avg_traces_per_param = np.mean(w_avg_traces_per_iter, axis=0) + # w_mean_avg_total_trace = np.sum(mean_avg_traces_per_param) + # + # w_diff_avg = abs(w_mean_avg_total_trace - w_avg_total_trace) / \ + # (w_avg_total_trace + diff_eps) + # w_avg_total_trace = w_mean_avg_total_trace + # logger.info( + # '{}# weights difference_avg={} avg_trace={}'.format( + # i, w_diff_avg, w_avg_total_trace)) + # + # if not act_vhp: + # act_vhp = [np.random.randn(*p.shape) for p in a_v] + # for vhp_curr in act_vhp_list: + # act_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ + # for a, b in zip(act_vhp, vhp_curr)] + # act_vhp = [a / float(num_all_data) for a in act_vhp] + # avg_traces_per_act = [np.sum(a * b) / a.size for (a, b) in zip(act_vhp, a_v)] + # a_avg_traces_per_iter.append(avg_traces_per_act) + # mean_avg_traces_per_act = np.mean(a_avg_traces_per_iter, axis=0) + # a_mean_avg_total_trace = np.sum(mean_avg_traces_per_act) + # + # a_diff_avg = abs(a_mean_avg_total_trace - a_avg_total_trace) / \ + # (a_avg_total_trace + diff_eps) + # a_avg_total_trace = a_mean_avg_total_trace + # logger.info( + # '{}# activation difference_avg={} avg_trace={}'.format( + # i, a_diff_avg, a_avg_total_trace)) + # + # if w_diff_avg < tolerance and a_diff_avg < tolerance: + # return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad + # + # return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad def _cal_trace(self): """ @@ -241,6 +275,46 @@ def __init__(self, model, conf, q_dataloader, q_func=None, dicts, q_hooks) + def is_fused_module(self, module): + """This is a helper function for `_propagate_qconfig_helper` to detecte + if this module is fused. + Args: + module (object): input module + Returns: + (bool): is fused or not + """ + op_type = str(type(module)) + if 'fused' in op_type: + return True + else: + return False + + def get_fused_mapping(self): + # tmp = self.model + # if isinstance(self._fp32_model, torch.nn.Module): + # fx_model = self._fp32_model + # + # model = copy.deepcopy(self._fp32_model) ##orig model + # model.eval() + # fx_model = fuse_fx(model) + model = self._fp32_model + weights_info = dict(model.named_parameters()) + weight_to_op = {} + + module_dict = dict(model.named_modules()) + for op_name, child in model.named_modules(): + if self.is_fused_module(child): + for name, _ in child.named_children(): + if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right + weight_to_op[op_name + "." + name + ".weight"] = op_name + # module_prefix = op_name + '.' + name + # if module_prefix in module_dict: + # module_dict.pop(module_prefix) # remove sub-modules of fused modules + else: + if op_name + ".weight" in weights_info: + weight_to_op[op_name + ".weight"] = op_name + return weight_to_op + def next_tune_cfg(self): from copy import deepcopy tuning_space = self.tuning_space @@ -254,16 +328,21 @@ def next_tune_cfg(self): target_dtype = "fp32" ##TODO support bf16 target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) - fp_op_list = [item for item in quant_ops if item in target_type_lst] + fp_op_list = [item.name for item in quant_ops if item in target_type_lst] + # for n, p in self._fp32_model.named_modules(): + # print(n) + # for n, p in self._fp32_model.named_parameters(): + # print(n) + weight_to_op = self.get_fused_mapping() orig_eval = True if self._fp32_model.training: orig_eval = False self._fp32_model.train() - ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, fp_op_list, self.calib_dataloader) + ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, weight_to_op.keys(), self.calib_dataloader) ht.get_avg_trace() - # if orig_eval: - # self._fp32_model.eval() - # ht.get_avg_trace() + if orig_eval: + self._fp32_model.eval() + # tmp = 1 # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up # ops_sensitivity = self.adaptor.get_hessian_trace(self._fp32_model, @@ -338,6 +417,7 @@ def next_tune_cfg_bk(self): fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], initial_op_tuning_cfg=initial_op_tuning_cfg, op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): op_tuning_cfg['calib_sampling_size'] = calib_sampling_size From 6c683f43ced5a22845f9da3d03505f562cbb0c8b Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Fri, 18 Nov 2022 11:16:25 +0800 Subject: [PATCH 056/128] bascially finished weight trace --- neural_compressor/strategy/hawq.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 4d3b9489b8f..39fd93fd3ff 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -96,19 +96,20 @@ def hutchinson_one_step(self, params, num_batches): cnt += batch_size gradients = self.get_gradients(self.model, batch, self.criterion, create_graph=True) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) - H_v = [pre + cur * float(batch_size) + 0.0 for cur, pre in zip(H_v_one, H_v)] + H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if step == num_batches - 1: break if cnt > 0: H_v = [item / cnt for item in H_v] - v_t_H_v = [torch.sum(h_v * v_t) / h_v.size().numel() for (h_v, v_t) in zip(H_v, v)] + v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)])##maybe sum is better return v_t_H_v - def get_avg_trace(self, num_batches=2): + + def get_avg_traces(self, num_batches=2): + """ + Estimates average hessian trace for each parameter """ - Estimates average hessian trace for each parameter - """ assert num_batches > 0 ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size @@ -119,8 +120,21 @@ def get_avg_trace(self, num_batches=2): params = [p for p in self.model.parameters() if p.requires_grad] + layer_traces_per_iter = [] + prev_avg_model_trace = 0 for i in range(self.max_iter): - trace_estimated = self.hutchinson_one_step(params, num_batches) + layer_traces = self.hutchinson_one_step(params, num_batches) + layer_traces_per_iter.append(layer_traces) + layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) + model_trace = torch.sum(layer_traces_estimate) + diff_ratio = abs(model_trace-prev_avg_model_trace)/(prev_avg_model_trace+self.eps) + if diff_ratio < self.tolerance and i > 10:##TODO magic number + break + prev_avg_model_trace = model_trace + + layer_traces = layer_traces_estimate + return layer_traces + tmp = 1 @@ -339,7 +353,7 @@ def next_tune_cfg(self): orig_eval = False self._fp32_model.train() ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, weight_to_op.keys(), self.calib_dataloader) - ht.get_avg_trace() + ht.get_avg_traces() if orig_eval: self._fp32_model.eval() From 03993e600f428efe430cd4a66c592c3d03a964ec Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Fri, 18 Nov 2022 15:45:52 +0800 Subject: [PATCH 057/128] enable activation gradient hook, activation trace is not finished --- neural_compressor/strategy/hawq.py | 266 +++++++++++------------------ 1 file changed, 101 insertions(+), 165 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 39fd93fd3ff..34d94901167 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -28,42 +28,73 @@ from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST from torch.quantization.quantize_fx import fuse_fx -import torchvision +import torchvision + class HessianTrace: - def __init__(self, model, conf, adaptor, weight_list, dataloader): - self.model = model - self.conf = conf ##config - self.weight_list = weight_list ##op to get + """ + please refer to + Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." 2020 IEEE international conference on big data (Big data). IEEE, 2020. + Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." Advances in neural information processing systems 33 (2020): 18518-18529. + https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py + """ + + def __init__(self, model, dataloader, criterion=None): + self.model = model ##TODO need to check fused or not self.dataloader = dataloader - self.adaptor = adaptor self.max_iter = 500 self.tolerance = 1e-5 self.eps = 1e-6 self.index = 0 self.device = self.get_device(self.model) - self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config - - # def apply_init(self): - # trace_per_op = self._cal_trace() - # if not trace_per_op: - # raise RuntimeError('Failed to calculate hessian traces!') - # - # perturbations = self._calc_quantization_noise() - # configuration_metric = self._calc_hawq_metric_per_configuration( - # perturbations, trace_per_op) - # config_index = self.choose_configuration(configuration_metric) - # chosen_config = self.op_cfgs_list[config_index] - # return chosen_config, trace_per_op + self.criterion = criterion + if self.criterion == None: + self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config + self.criterion = self.criterion.to(self.device) + self.weight_to_op, self.op_list = self.get_fused_mapping() + + def is_fused_module(self, module): + """This is a helper function for `_propagate_qconfig_helper` to detecte + if this module is fused. + Args: + module (object): input module + Returns: + (bool): is fused or not + """ + op_type = str(type(module)) + if 'fused' in op_type: + return True + else: + return False + + def get_fused_mapping(self): + model = self.model + weights_info = dict(model.named_parameters()) + weight_to_op = {} + for op_name, child in model.named_modules(): + if self.is_fused_module(child): + for name, _ in child.named_children(): + if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right + weight_to_op[op_name + "." + name + ".weight"] = op_name + break + else: + if op_name + ".weight" in weights_info: + weight_to_op[op_name + ".weight"] = op_name + op_list = [] + for key in weight_to_op.keys(): + op_list.append(weight_to_op[key]) + return weight_to_op, op_list def get_device(self, model: torch.nn.Module): for n, p in model.named_parameters(): return p.data.device - def get_gradients(self, model, data, criterion, create_graph=False): + def get_gradients(self, model, data, criterion, create_graph=False, enable_act=False): model.zero_grad() input = data[0].to(self.device) target = data[1].to(self.device) + if enable_act: + input.requires_grad = True output = model(input) loss = criterion(output, target) loss.backward(create_graph=create_graph) @@ -71,7 +102,7 @@ def get_gradients(self, model, data, criterion, create_graph=False): for n, p in model.named_parameters(): if p.grad != None: gradient = p.grad - gradients.append(gradient+0.0) ## add 0 to create a copy + gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() return gradients @@ -87,143 +118,88 @@ def sample_rademacher(self, params): samples.append(r) return samples - def hutchinson_one_step(self, params, num_batches): + def hutchinson_one_step(self, params, enable_act, num_batches): v = self.sample_rademacher(params) H_v = [0] * len(v) cnt = 0 - for step, batch in enumerate(self.dataloader): - batch_size = batch[0].shape[0] + for step, data in enumerate(self.dataloader): + batch_size = data[0].shape[0] cnt += batch_size - gradients = self.get_gradients(self.model, batch, self.criterion, create_graph=True) + gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True,enable_act=enable_act) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) - H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] + H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if step == num_batches - 1: break if cnt > 0: H_v = [item / cnt for item in H_v] - v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)])##maybe sum is better + v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v + def backward_hook(self, name): + def grad_hook(model, grad_input, grad_output): + self.layer_acts_grads[name] = [grad_input, grad_output] + return grad_hook + + def forward_hook(self, name): + def enable_input_grad_hook(model, inputs, outputs): + try: + input = inputs[0]##TODO check whether this is right + except: + input = inputs - def get_avg_traces(self, num_batches=2): + if input.is_leaf == False: + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = input + + return enable_input_grad_hook + + def register_hook(self): + for name, module in self.model.named_modules(): + if name in self.op_list: + forward_handle = module.register_forward_hook(self.forward_hook(name)) + backward_handle = module.register_backward_hook(self.backward_hook(name)) + self.hook_handlers.append(forward_handle) + self.hook_handlers.append(backward_handle) + + def unregister_hook(self): + for handel in self.hook_handlers: + handel.remove() + + def get_avg_traces(self, enable_act=True, num_batches=2): """ Estimates average hessian trace for each parameter """ assert num_batches > 0 + if enable_act: + self.hook_handlers = [] + self.layer_acts = {} + self.layer_acts_grads = {} + self.register_hook() ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size - op_list = self.weight_list - + ##op_list = self.op_list ##TODO setting this in config - - params = [p for p in self.model.parameters() if p.requires_grad] layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.hutchinson_one_step(params, num_batches) + layer_traces = self.hutchinson_one_step(params, enable_act, num_batches ) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) - diff_ratio = abs(model_trace-prev_avg_model_trace)/(prev_avg_model_trace+self.eps) - if diff_ratio < self.tolerance and i > 10:##TODO magic number + diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) + if diff_ratio < self.tolerance and i > 10: ##TODO magic number break prev_avg_model_trace = model_trace layer_traces = layer_traces_estimate + self.unregister_hook() return layer_traces - - tmp = 1 - # - # weight_vhp = [] - # w_avg_total_trace = 0. - # w_avg_traces_per_iter = [] - # mean_avg_traces_per_param = None - # act_vhp = [] - # a_avg_total_trace = 0. - # a_avg_traces_per_iter = [] - # mean_avg_traces_per_act = None - # - # for i in range(self.max_iter): - # weight_vhp_list, w_v, \ - # act_vhp_list, a_v, op_act_grad = self.adaptor.get_2order_grad(self.model, - # criterion, - # self.dataloader, - # num_data_iter, - # qop_list) - # if not weight_vhp: - # weight_vhp = [np.random.randn(*p.shape) for p in w_v] - # for vhp_curr in weight_vhp_list: - # weight_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ - # for a, b in zip(weight_vhp, vhp_curr)] - # weight_vhp = [a / float(num_all_data) for a in weight_vhp] - # avg_traces_per_param = [np.sum(a * b) / a.size for (a, b) in zip(weight_vhp, w_v)] - # w_avg_traces_per_iter.append(avg_traces_per_param) - # mean_avg_traces_per_param = np.mean(w_avg_traces_per_iter, axis=0) - # w_mean_avg_total_trace = np.sum(mean_avg_traces_per_param) - # - # w_diff_avg = abs(w_mean_avg_total_trace - w_avg_total_trace) / \ - # (w_avg_total_trace + diff_eps) - # w_avg_total_trace = w_mean_avg_total_trace - # logger.info( - # '{}# weights difference_avg={} avg_trace={}'.format( - # i, w_diff_avg, w_avg_total_trace)) - # - # if not act_vhp: - # act_vhp = [np.random.randn(*p.shape) for p in a_v] - # for vhp_curr in act_vhp_list: - # act_vhp = [a + b * float(self.dataloader.batch_size) + 0. \ - # for a, b in zip(act_vhp, vhp_curr)] - # act_vhp = [a / float(num_all_data) for a in act_vhp] - # avg_traces_per_act = [np.sum(a * b) / a.size for (a, b) in zip(act_vhp, a_v)] - # a_avg_traces_per_iter.append(avg_traces_per_act) - # mean_avg_traces_per_act = np.mean(a_avg_traces_per_iter, axis=0) - # a_mean_avg_total_trace = np.sum(mean_avg_traces_per_act) - # - # a_diff_avg = abs(a_mean_avg_total_trace - a_avg_total_trace) / \ - # (a_avg_total_trace + diff_eps) - # a_avg_total_trace = a_mean_avg_total_trace - # logger.info( - # '{}# activation difference_avg={} avg_trace={}'.format( - # i, a_diff_avg, a_avg_total_trace)) - # - # if w_diff_avg < tolerance and a_diff_avg < tolerance: - # return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad - # - # return mean_avg_traces_per_param, mean_avg_traces_per_act, op_act_grad - - def _cal_trace(self): - """ - Calculate the trace for both weight and activation per layer - """ - pass - # trace_estimator = HessianTraceEstimator(self.model, - # self.conf, - # self.adaptor, - # self.op_cfgs_list, - # self.dataloader) - # w_avg_trace, a_avg_trace, op_act_grad = trace_estimator.get_avg_trace() - # - # # mapping trace to op per op_weight_mapping - # weights_name = self.adaptor.get_all_weight_names(self.model) - # op_weight_mapping = self.get_op_weight_mapping() - # trace_per_op = OrderedDict() - # w_op_trace_info = np.zeros(len(op_weight_mapping)) - # for i, (op_name, w_name) in enumerate(op_weight_mapping.items()): - # index = weights_name.index(w_name) - # w_op_trace_info[i] = w_avg_trace[index] - # act_trace = 0.0 - # if op_name in op_act_grad: - # a_index = op_act_grad.index(op_name) - # act_trace = a_avg_trace[a_index] - # trace_per_op[op_name] = (w_avg_trace[index], act_trace) - # return trace_per_op - - @strategy_registry class HawqTuneStrategy(TuneStrategy): """The basic tuning strategy which tunes the low precision model with below order. @@ -289,46 +265,6 @@ def __init__(self, model, conf, q_dataloader, q_func=None, dicts, q_hooks) - def is_fused_module(self, module): - """This is a helper function for `_propagate_qconfig_helper` to detecte - if this module is fused. - Args: - module (object): input module - Returns: - (bool): is fused or not - """ - op_type = str(type(module)) - if 'fused' in op_type: - return True - else: - return False - - def get_fused_mapping(self): - # tmp = self.model - # if isinstance(self._fp32_model, torch.nn.Module): - # fx_model = self._fp32_model - # - # model = copy.deepcopy(self._fp32_model) ##orig model - # model.eval() - # fx_model = fuse_fx(model) - model = self._fp32_model - weights_info = dict(model.named_parameters()) - weight_to_op = {} - - module_dict = dict(model.named_modules()) - for op_name, child in model.named_modules(): - if self.is_fused_module(child): - for name, _ in child.named_children(): - if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - weight_to_op[op_name + "." + name + ".weight"] = op_name - # module_prefix = op_name + '.' + name - # if module_prefix in module_dict: - # module_dict.pop(module_prefix) # remove sub-modules of fused modules - else: - if op_name + ".weight" in weights_info: - weight_to_op[op_name + ".weight"] = op_name - return weight_to_op - def next_tune_cfg(self): from copy import deepcopy tuning_space = self.tuning_space @@ -347,12 +283,12 @@ def next_tune_cfg(self): # print(n) # for n, p in self._fp32_model.named_parameters(): # print(n) - weight_to_op = self.get_fused_mapping() + orig_eval = True if self._fp32_model.training: orig_eval = False self._fp32_model.train() - ht = HessianTrace(self._fp32_model, self.cfg, self.adaptor, weight_to_op.keys(), self.calib_dataloader) + ht = HessianTrace(self._fp32_model, self.calib_dataloader) ht.get_avg_traces() if orig_eval: self._fp32_model.eval() From 20bed968470a144ceeda15142fadf0c96779389a Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Fri, 18 Nov 2022 15:47:25 +0800 Subject: [PATCH 058/128] reformat code --- neural_compressor/strategy/hawq.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 34d94901167..8ec728337b9 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -102,7 +102,7 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F for n, p in model.named_parameters(): if p.grad != None: gradient = p.grad - gradients.append(gradient + 0.0) ## add 0 to create a copy + gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() return gradients @@ -125,7 +125,7 @@ def hutchinson_one_step(self, params, enable_act, num_batches): for step, data in enumerate(self.dataloader): batch_size = data[0].shape[0] cnt += batch_size - gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True,enable_act=enable_act) + gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True, enable_act=enable_act) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if step == num_batches - 1: @@ -135,16 +135,16 @@ def hutchinson_one_step(self, params, enable_act, num_batches): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v - def backward_hook(self, name): def grad_hook(model, grad_input, grad_output): self.layer_acts_grads[name] = [grad_input, grad_output] + return grad_hook def forward_hook(self, name): def enable_input_grad_hook(model, inputs, outputs): try: - input = inputs[0]##TODO check whether this is right + input = inputs[0] ##TODO check whether this is right except: input = inputs @@ -167,7 +167,7 @@ def unregister_hook(self): for handel in self.hook_handlers: handel.remove() - def get_avg_traces(self, enable_act=True, num_batches=2): + def get_avg_traces(self, enable_act=False, num_batches=2): """ Estimates average hessian trace for each parameter """ @@ -186,7 +186,7 @@ def get_avg_traces(self, enable_act=True, num_batches=2): layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.hutchinson_one_step(params, enable_act, num_batches ) + layer_traces = self.hutchinson_one_step(params, enable_act, num_batches) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) From 806290a679ca0882be29fdd65eb945b49f736fae Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Fri, 18 Nov 2022 15:54:48 +0800 Subject: [PATCH 059/128] fix a bug --- neural_compressor/strategy/hawq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 8ec728337b9..7d2331af345 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -196,7 +196,8 @@ def get_avg_traces(self, enable_act=False, num_batches=2): prev_avg_model_trace = model_trace layer_traces = layer_traces_estimate - self.unregister_hook() + if enable_act: + self.unregister_hook() return layer_traces From 4efc18cd5a013f96849c7027a3cac3be9aeb4401 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 11:06:43 +0800 Subject: [PATCH 060/128] when reset the required grad, something goes wrong --- neural_compressor/strategy/hawq.py | 67 ++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 7d2331af345..228bb249e2a 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -92,6 +92,7 @@ def get_device(self, model: torch.nn.Module): def get_gradients(self, model, data, criterion, create_graph=False, enable_act=False): model.zero_grad() input = data[0].to(self.device) + ##self._input_shape = input.shape ## for resetting input activation target = data[1].to(self.device) if enable_act: input.requires_grad = True @@ -102,7 +103,7 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F for n, p in model.named_parameters(): if p.grad != None: gradient = p.grad - gradients.append(gradient + 0.0) ## add 0 to create a copy + gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() return gradients @@ -118,7 +119,7 @@ def sample_rademacher(self, params): samples.append(r) return samples - def hutchinson_one_step(self, params, enable_act, num_batches): + def get_hv_one_sample(self, params, enable_act, num_batches): v = self.sample_rademacher(params) H_v = [0] * len(v) cnt = 0 @@ -135,19 +136,17 @@ def hutchinson_one_step(self, params, enable_act, num_batches): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v - def backward_hook(self, name): - def grad_hook(model, grad_input, grad_output): + def _get_input_grad_hook(self, name): + def input_grad_hook(model, grad_input, grad_output): self.layer_acts_grads[name] = [grad_input, grad_output] + return input_grad_hook - return grad_hook - - def forward_hook(self, name): + def _get_enable_input_grad_hook(self, name): def enable_input_grad_hook(model, inputs, outputs): try: input = inputs[0] ##TODO check whether this is right except: input = inputs - if input.is_leaf == False: if input.requires_grad is False: input.requires_grad = True @@ -155,28 +154,54 @@ def enable_input_grad_hook(model, inputs, outputs): return enable_input_grad_hook - def register_hook(self): + # def _get_disable_input_grad_hook(self, name): + # def disable_input_grad_hook(model, inputs, outputs): + # try: + # input = inputs[0] ##TODO check whether this is right + # except: + # input = inputs + # if input.is_leaf == False:## you can only change requires_grad flags of leaf variables + # if input.requires_grad is True: + # input.requires_grad = False + # + # + # return disable_input_grad_hook + + + def _unregister_hook(self): + for handel in self.hook_handles: + handel.remove() + + def register_input_grad_hooks(self): for name, module in self.model.named_modules(): if name in self.op_list: - forward_handle = module.register_forward_hook(self.forward_hook(name)) - backward_handle = module.register_backward_hook(self.backward_hook(name)) - self.hook_handlers.append(forward_handle) - self.hook_handlers.append(backward_handle) + hook_handle = module.register_forward_hook(self._get_enable_input_grad_hook(name)) + self.hook_handles.append(hook_handle) + hook_handle = module.register_forward_hook(self._get_input_grad_hook(name)) + self.hook_handles.append(hook_handle) + + + def reset_input_gradient_and_hooks(self): + # tmp_input = torch.zeros(self._input_shape, device=self.device) + # for name, module in self.model.named_modules(): + # if name in self.op_list: + # hook_handle = module.register_forward_hook(self._get_disable_input_grad_hook(name)) + # self.hook_handles.append(hook_handle) + # self.model(tmp_input) + self._unregister_hook() + - def unregister_hook(self): - for handel in self.hook_handlers: - handel.remove() - def get_avg_traces(self, enable_act=False, num_batches=2): + def get_avg_traces(self, enable_act=True, num_batches=2): """ Estimates average hessian trace for each parameter """ assert num_batches > 0 if enable_act: - self.hook_handlers = [] + self.hook_handles = [] self.layer_acts = {} self.layer_acts_grads = {} - self.register_hook() + self.register_input_grad_hooks() ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size ##op_list = self.op_list @@ -186,7 +211,7 @@ def get_avg_traces(self, enable_act=False, num_batches=2): layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.hutchinson_one_step(params, enable_act, num_batches) + layer_traces = self.get_hv_one_sample(params, enable_act, num_batches) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) @@ -197,7 +222,7 @@ def get_avg_traces(self, enable_act=False, num_batches=2): layer_traces = layer_traces_estimate if enable_act: - self.unregister_hook() + self.reset_input_gradient_and_hooks() return layer_traces From 62dddf766c0917fe06848e1c7ee74d05893a1258 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 15:13:01 +0800 Subject: [PATCH 061/128] add trick imagenet dataset fix one issue --- .../experimental/quantization.py | 32 +++++++++++++++++++ neural_compressor/strategy/hawq.py | 3 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 3d7b7811ea2..77dfc51d465 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -146,6 +146,38 @@ def pre_process(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ + + import torchvision.datasets as datasets + import torchvision.transforms as transforms + data_path = "/mnt/data2/dataset/dataset/imagenet/img_raw" + traindir = os.path.join(data_path, 'train') + valdir = os.path.join(data_path, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + val_dataset = datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + from torch.utils.data import DataLoader + + self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) + self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) + self.strategy = STRATEGIES[strategy]( self._model, self.conf, diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 228bb249e2a..2beef8668b4 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -98,6 +98,7 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F input.requires_grad = True output = model(input) loss = criterion(output, target) + # torch.autograd.backward(loss, create_graph=create_graph) loss.backward(create_graph=create_graph) gradients = [] for n, p in model.named_parameters(): @@ -177,7 +178,7 @@ def register_input_grad_hooks(self): if name in self.op_list: hook_handle = module.register_forward_hook(self._get_enable_input_grad_hook(name)) self.hook_handles.append(hook_handle) - hook_handle = module.register_forward_hook(self._get_input_grad_hook(name)) + hook_handle = module.register_backward_hook(self._get_input_grad_hook(name)) self.hook_handles.append(hook_handle) From 755c38cc34218f5e6ceff8b4b9bb95c3e6fb7e14 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 30 Nov 2022 14:30:37 +0800 Subject: [PATCH 062/128] resolve conflicts Signed-off-by: yiliu30 --- neural_compressor/adaptor/pytorch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index e89f687f81a..3421828a8ab 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -3155,7 +3155,6 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): Returns: None """ - module_dict = dict(model.named_modules()) for op_name, child in model.named_modules(): if self.is_fused_module(child): From 87793cf19718f7b26e00e191aaa60c70b07cabfc Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 18:51:51 +0800 Subject: [PATCH 063/128] change to eval model, remove bias --- .../quantization/ptq/cpu/fx/conf.yaml | 12 +++-- .../experimental/quantization.py | 2 +- .../strategy/auto_mixed_precision.py | 1 + neural_compressor/strategy/hawq.py | 46 ++++++++++++++----- .../strategy/st_utils/hawq_wenhuach.py | 2 +- neural_compressor/strategy/strategy.py | 3 +- test/strategy/test_hawq_wenhuach.py | 2 +- 7 files changed, 47 insertions(+), 21 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml index d1dab0d2f43..064656e872b 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml @@ -21,10 +21,10 @@ quantization: # optional. tuning constrai calibration: sampling_size: 300 # optional. default value is 100. used to set how many samples should be used in calibration. dataloader: - batch_size: 30 + batch_size: 1 dataset: ImageFolder: - root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to calibration dataset location if needed transform: Resize: size: 256 @@ -40,10 +40,10 @@ evaluation: # optional. required if use metric: topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. dataloader: - batch_size: 30 + batch_size: 1 dataset: ImageFolder: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -61,7 +61,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -73,6 +73,8 @@ evaluation: # optional. required if use std: [0.229, 0.224, 0.225] tuning: + strategy: + name: hawq accuracy_criterion: relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. exit_policy: diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 77dfc51d465..dae0f8611c5 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -149,7 +149,7 @@ def pre_process(self): import torchvision.datasets as datasets import torchvision.transforms as transforms - data_path = "/mnt/data2/dataset/dataset/imagenet/img_raw" + data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" traindir = os.path.join(data_path, 'train') valdir = os.path.join(data_path, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 4b59cf2cced..7fbd759a87e 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -145,6 +145,7 @@ def traverse(self): if self.baseline is None and (self.eval_dataloader or self.eval_func): logger.info("Get FP32 model baseline.") self.baseline = self._evaluate(self.model) + self.baseline=[0.698,[700]] # record the FP32 baseline self._add_tuning_history() diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 2beef8668b4..09f0b1ef175 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -102,15 +102,15 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F loss.backward(create_graph=create_graph) gradients = [] for n, p in model.named_parameters(): - if p.grad != None: + if p.grad != None and n in self.weight_names: gradient = p.grad gradients.append(gradient + 0.0) ## add 0 to create a copy model.zero_grad() return gradients - def get_params(self, model): - parameters = [p for p in model.parameters() if p.requires_grad] - return parameters + # def get_params(self, model): + # parameters = [p for p in model.parameters() if p.requires_grad] + # return parameters def sample_rademacher(self, params): samples = [] @@ -191,9 +191,13 @@ def reset_input_gradient_and_hooks(self): # self.model(tmp_input) self._unregister_hook() + def get_params(self): + weight_names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias + params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias + self.weight_names = weight_names + self.params = params - - def get_avg_traces(self, enable_act=True, num_batches=2): + def get_avg_traces(self, enable_act=False, num_batches=2): """ Estimates average hessian trace for each parameter """ @@ -207,18 +211,22 @@ def get_avg_traces(self, enable_act=True, num_batches=2): ##num_all_data = num_data_iter * self.dataloader.batch_size ##op_list = self.op_list ##TODO setting this in config - params = [p for p in self.model.parameters() if p.requires_grad] + self.get_params() + # names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + # params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.get_hv_one_sample(params, enable_act, num_batches) + layer_traces = self.get_hv_one_sample(self.params, enable_act, num_batches) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) if diff_ratio < self.tolerance and i > 10: ##TODO magic number break + if i==50:##TODO for debug + break prev_avg_model_trace = model_trace layer_traces = layer_traces_estimate @@ -314,11 +322,25 @@ def next_tune_cfg(self): orig_eval = True if self._fp32_model.training: orig_eval = False - self._fp32_model.train() + self._fp32_model.eval() ht = HessianTrace(self._fp32_model, self.calib_dataloader) - ht.get_avg_traces() - if orig_eval: - self._fp32_model.eval() + traces = ht.get_avg_traces() + if orig_eval==False: + self._fp32_model.train() + + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + # tmp = 1 # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up diff --git a/neural_compressor/strategy/st_utils/hawq_wenhuach.py b/neural_compressor/strategy/st_utils/hawq_wenhuach.py index 6c74401c5fc..c0ced2af3f4 100644 --- a/neural_compressor/strategy/st_utils/hawq_wenhuach.py +++ b/neural_compressor/strategy/st_utils/hawq_wenhuach.py @@ -10,7 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from ..utils import logger +from ...utils import logger import torch import numpy as np from torch.autograd import Variable diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 63710b43264..58faa5d919a 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -219,7 +219,8 @@ def traverse(self): if self.baseline is None: logger.info("Get FP32 model baseline.") self._fp32_model = self.model - self.baseline = self._evaluate(self.model) + ##self.baseline = self._evaluate(self.model) + self.baseline = [0.698,[700]] # record the FP32 baseline self._add_tuning_history() self.show_baseline_info() diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index a6ee28b9d4a..236d8219e71 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -11,7 +11,7 @@ from neural_compressor.adaptor.pytorch import TemplateAdaptor from neural_compressor.adaptor import FRAMEWORKS import shutil -from neural_compressor.strategy.st_utils.hawq_wenhuach import Hawq_top, fix_seed +from neural_compressor.strategy.st_utils.hawq_wenhuach import fix_seed from torch.quantization.quantize_fx import fuse_fx fix_seed(1) From 7a7520bedcaaf7d1af9787885fd7d93d7e7f164d Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 19:16:10 +0800 Subject: [PATCH 064/128] fixed weight to op bug --- neural_compressor/strategy/hawq.py | 41 ++++++++++++++++++----------- test/strategy/test_hawq_wenhuach.py | 2 +- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 09f0b1ef175..015d9e678c1 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -75,11 +75,12 @@ def get_fused_mapping(self): if self.is_fused_module(child): for name, _ in child.named_children(): if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - weight_to_op[op_name + "." + name + ".weight"] = op_name + weight_to_op[op_name + "." + name + ".weight"] = op_name[7:] break else: - if op_name + ".weight" in weights_info: - weight_to_op[op_name + ".weight"] = op_name + name = op_name + ".weight" + if name in weights_info and name not in weight_to_op.keys(): + weight_to_op[op_name + ".weight"] = op_name[7:] op_list = [] for key in weight_to_op.keys(): op_list.append(weight_to_op[key]) @@ -232,7 +233,15 @@ def get_avg_traces(self, enable_act=False, num_batches=2): layer_traces = layer_traces_estimate if enable_act: self.reset_input_gradient_and_hooks() - return layer_traces + weight_name_to_traces={} + + for weigth_name,trace in zip(self.weight_names, layer_traces): + weight_name_to_traces[weigth_name] = trace + op_name_to_trace={} + for weigth_name in self.weight_names: + op_name = self.weight_to_op[weigth_name] + op_name_to_trace[op_name] = weight_name_to_traces[weigth_name] + return op_name_to_trace @strategy_registry @@ -328,18 +337,18 @@ def next_tune_cfg(self): if orig_eval==False: self._fp32_model.train() - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - logger.info(f"Start to accumulate fallback to {target_dtype}.") - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) - for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield op_tuning_cfg + # ordered_ops = sorted(op_fallback_acc_impact.keys(), + # key=lambda key: op_fallback_acc_impact[key], + # reverse=self.higher_is_better) + # op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + # logger.info(f"Start to accumulate fallback to {target_dtype}.") + # initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + # fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + # initial_op_tuning_cfg=initial_op_tuning_cfg, + # op_dtypes=op_dtypes, accumulate=True) + # for op_tuning_cfg in fallback_sampler: + # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # yield op_tuning_cfg # tmp = 1 diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index 236d8219e71..a09c83c3452 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -13,7 +13,7 @@ import shutil from neural_compressor.strategy.st_utils.hawq_wenhuach import fix_seed from torch.quantization.quantize_fx import fuse_fx -fix_seed(1) +# fix_seed(1) def build_ptq_yaml(): fake_yaml = ''' From 6cc95b047b3c754f9500d0b8967ea79a91459de1 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Mon, 21 Nov 2022 19:44:42 +0800 Subject: [PATCH 065/128] still have issues --- neural_compressor/strategy/hawq.py | 16 +++++++++++++++- .../strategy/st_utils/tuning_sampler.py | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 015d9e678c1..dbbaa98e931 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -333,10 +333,24 @@ def next_tune_cfg(self): orig_eval = False self._fp32_model.eval() ht = HessianTrace(self._fp32_model, self.calib_dataloader) - traces = ht.get_avg_traces() + op_to_traces = ht.get_avg_traces() if orig_eval==False: self._fp32_model.train() + ordered_ops = sorted(op_to_traces.keys(), + key=lambda key: op_to_traces[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(ordered_ops))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=None, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_size + yield op_tuning_cfg + + # ordered_ops = sorted(op_fallback_acc_impact.keys(), # key=lambda key: op_fallback_acc_impact[key], # reverse=self.higher_is_better) diff --git a/neural_compressor/strategy/st_utils/tuning_sampler.py b/neural_compressor/strategy/st_utils/tuning_sampler.py index fea140a9e4d..c583f1c2764 100644 --- a/neural_compressor/strategy/st_utils/tuning_sampler.py +++ b/neural_compressor/strategy/st_utils/tuning_sampler.py @@ -263,7 +263,7 @@ def __init__(self, def __iter__(self): new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) - skip_first = True + skip_first = False for op_name_type, target_dtype in self.op_dtypes.items(): if not self.accumulate: new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) From 72a238575b23e6b515030a79c18a754a34c91d93 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 22 Nov 2022 11:01:22 +0800 Subject: [PATCH 066/128] WA for align the op name --- neural_compressor/strategy/hawq.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index dbbaa98e931..bc042f06b2c 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -316,7 +316,22 @@ def next_tune_cfg(self): # Initialize the tuning config for each op according to the quantization approach op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = True + stage1_cnt = 0 + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = 2 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_size + yield op_tuning_cfg + # Fallback the ops supported both static and dynamic from static to dynamic quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] @@ -340,11 +355,16 @@ def next_tune_cfg(self): ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(ordered_ops))) + # WA for add op type + op_info_map = {} + for op_info in list(initial_op_tuning_cfg.keys()): + op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) + tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops] + op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops))) logger.info(f"Start to accumulate fallback to {target_dtype}.") fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=None, + initial_op_tuning_cfg=op_tuning_cfg, op_dtypes=op_dtypes, accumulate=True) for op_tuning_cfg in fallback_sampler: op_tuning_cfg['calib_sampling_size'] = calib_size From 71a4832e3be2edc0cdb8c95f8fddd2fb99390b08 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Tue, 22 Nov 2022 15:36:21 +0800 Subject: [PATCH 067/128] change entry point to main function fx model before entering into quantization --- .../quantization/ptq/cpu/fx/conf.yaml | 6 +- .../quantization/ptq/cpu/fx/main.py | 2 + .../experimental/quantization.py | 62 +++++++++---------- neural_compressor/strategy/hawq.py | 2 +- .../strategy/st_utils/tuning_sampler.py | 2 +- test/strategy/test_hawq_wenhuach.py | 8 +-- 6 files changed, 42 insertions(+), 40 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml index 064656e872b..4b50b559e6a 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml @@ -24,7 +24,7 @@ quantization: # optional. tuning constrai batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to calibration dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to calibration dataset location if needed transform: Resize: size: 256 @@ -43,7 +43,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to evaluation dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -61,7 +61,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw # NOTE: modify to evaluation dataset location if needed + root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py index 8646048ccf4..30008bfa3db 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py @@ -169,6 +169,8 @@ def main(): if args.tune: from neural_compressor.experimental import Quantization, common model.eval() + from torch.quantization.quantize_fx import fuse_fx + model = fuse_fx(model) quantizer = Quantization("./conf.yaml") quantizer.model = common.Model(model) q_model = quantizer.fit() diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index dae0f8611c5..bdcba064e6e 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -146,37 +146,37 @@ def pre_process(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - - import torchvision.datasets as datasets - import torchvision.transforms as transforms - data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" - traindir = os.path.join(data_path, 'train') - valdir = os.path.join(data_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - val_dataset = datasets.ImageFolder( - valdir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - from torch.utils.data import DataLoader - - self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) - self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) + # + # import torchvision.datasets as datasets + # import torchvision.transforms as transforms + # data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" + # traindir = os.path.join(data_path, 'train') + # valdir = os.path.join(data_path, 'val') + # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + # std=[0.229, 0.224, 0.225]) + # + # train_dataset = datasets.ImageFolder( + # traindir, + # transforms.Compose([ + # transforms.RandomResizedCrop(224), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # normalize, + # ])) + # + # val_dataset = datasets.ImageFolder( + # valdir, + # transforms.Compose([ + # transforms.RandomResizedCrop(224), + # transforms.RandomHorizontalFlip(), + # transforms.ToTensor(), + # normalize, + # ])) + # + # from torch.utils.data import DataLoader + # + # self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) + # self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) self.strategy = STRATEGIES[strategy]( self._model, diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index bc042f06b2c..6db4757aa0c 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -321,7 +321,7 @@ def next_tune_cfg(self): stage1_cnt = 0 quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = 2 # TODO set a more appropriate value + stage1_max = -1 # TODO set a more appropriate value op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: diff --git a/neural_compressor/strategy/st_utils/tuning_sampler.py b/neural_compressor/strategy/st_utils/tuning_sampler.py index c583f1c2764..f311d7c16a4 100644 --- a/neural_compressor/strategy/st_utils/tuning_sampler.py +++ b/neural_compressor/strategy/st_utils/tuning_sampler.py @@ -272,7 +272,7 @@ def __iter__(self): if self.accumulate and skip_first: # skip the first one skip_first = False continue - logger.debug(f"fallback {op_name_type} to {target_dtype}") + logger.info(f"fallback {op_name_type} to {target_dtype}") yield new_tune_cfg # need to skip the first one diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index a09c83c3452..2adcd5a5812 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -74,15 +74,15 @@ def tearDownClass(self): def test_run_hawq_one_trial(self): - def eval_func(model): - self.i -= 1 - return self.i + # def eval_func(model): + # self.i -= 1 + # return self.i from neural_compressor.experimental import Quantization, common model = copy.deepcopy(self.model) model.eval() model = fuse_fx(model) quantizer = Quantization('ptq_yaml.yaml') - quantizer.eval_func = eval_func + ##quantizer.eval_func = eval_func dataset = quantizer.dataset('dummy', (32, 3, 224, 224), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) From d9378c1af0a981a4c75063623e543c534847235a Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Wed, 23 Nov 2022 10:54:34 +0800 Subject: [PATCH 068/128] get activations and the corresponding gradients --- neural_compressor/strategy/hawq.py | 72 ++++++++++++++++-------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 6db4757aa0c..604008ac2d4 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -67,6 +67,13 @@ def is_fused_module(self, module): else: return False + def mapping_module_to_op(self, name): + length = len("_model.") + if len(name) < length: + return name + else: + return name[length:] + def get_fused_mapping(self): model = self.model weights_info = dict(model.named_parameters()) @@ -75,7 +82,8 @@ def get_fused_mapping(self): if self.is_fused_module(child): for name, _ in child.named_children(): if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - weight_to_op[op_name + "." + name + ".weight"] = op_name[7:] + + weight_to_op[op_name + "." + name + ".weight"] = self.mapping_module_to_op(op_name) break else: name = op_name + ".weight" @@ -95,8 +103,8 @@ def get_gradients(self, model, data, criterion, create_graph=False, enable_act=F input = data[0].to(self.device) ##self._input_shape = input.shape ## for resetting input activation target = data[1].to(self.device) - if enable_act: - input.requires_grad = True + # if enable_act: + # input.requires_grad = True output = model(input) loss = criterion(output, target) # torch.autograd.backward(loss, create_graph=create_graph) @@ -138,23 +146,24 @@ def get_hv_one_sample(self, params, enable_act, num_batches): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v - def _get_input_grad_hook(self, name): - def input_grad_hook(model, grad_input, grad_output): + def _get_act_grad_hook(self, name): + def act_grad_hook(model, grad_input, grad_output): self.layer_acts_grads[name] = [grad_input, grad_output] - return input_grad_hook - def _get_enable_input_grad_hook(self, name): - def enable_input_grad_hook(model, inputs, outputs): + return act_grad_hook + + def _get_enable_act_grad_hook(self, name): + def enable_act_grad_hook(model, inputs, outputs): try: input = inputs[0] ##TODO check whether this is right except: input = inputs - if input.is_leaf == False: - if input.requires_grad is False: - input.requires_grad = True - self.layer_acts[name] = input - return enable_input_grad_hook + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = input + + return enable_act_grad_hook # def _get_disable_input_grad_hook(self, name): # def disable_input_grad_hook(model, inputs, outputs): @@ -169,21 +178,19 @@ def enable_input_grad_hook(model, inputs, outputs): # # return disable_input_grad_hook - def _unregister_hook(self): for handel in self.hook_handles: handel.remove() - def register_input_grad_hooks(self): + def register_act_grad_hooks(self): for name, module in self.model.named_modules(): - if name in self.op_list: - hook_handle = module.register_forward_hook(self._get_enable_input_grad_hook(name)) + if self.mapping_module_to_op(name) in self.op_list: + hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) self.hook_handles.append(hook_handle) - hook_handle = module.register_backward_hook(self._get_input_grad_hook(name)) + hook_handle = module.register_backward_hook(self._get_act_grad_hook(name)) self.hook_handles.append(hook_handle) - - def reset_input_gradient_and_hooks(self): + def reset_act_gradient_and_hooks(self): # tmp_input = torch.zeros(self._input_shape, device=self.device) # for name, module in self.model.named_modules(): # if name in self.op_list: @@ -193,12 +200,13 @@ def reset_input_gradient_and_hooks(self): self._unregister_hook() def get_params(self): - weight_names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias + weight_names = [n for n, p in self.model.named_parameters() if + p.requires_grad and "bias" not in n] ##remove bias params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias self.weight_names = weight_names self.params = params - def get_avg_traces(self, enable_act=False, num_batches=2): + def get_avg_traces(self, enable_act=True, num_batches=2): """ Estimates average hessian trace for each parameter """ @@ -207,7 +215,7 @@ def get_avg_traces(self, enable_act=False, num_batches=2): self.hook_handles = [] self.layer_acts = {} self.layer_acts_grads = {} - self.register_input_grad_hooks() + self.register_act_grad_hooks() ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] ##num_all_data = num_data_iter * self.dataloader.batch_size ##op_list = self.op_list @@ -226,18 +234,18 @@ def get_avg_traces(self, enable_act=False, num_batches=2): diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) if diff_ratio < self.tolerance and i > 10: ##TODO magic number break - if i==50:##TODO for debug + if i == 50: ##TODO for debug break prev_avg_model_trace = model_trace layer_traces = layer_traces_estimate if enable_act: self.reset_input_gradient_and_hooks() - weight_name_to_traces={} + weight_name_to_traces = {} - for weigth_name,trace in zip(self.weight_names, layer_traces): + for weigth_name, trace in zip(self.weight_names, layer_traces): weight_name_to_traces[weigth_name] = trace - op_name_to_trace={} + op_name_to_trace = {} for weigth_name in self.weight_names: op_name = self.weight_to_op[weigth_name] op_name_to_trace[op_name] = weight_name_to_traces[weigth_name] @@ -322,8 +330,8 @@ def next_tune_cfg(self): quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] stage1_max = -1 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 if early_stop_tuning and stage1_cnt > stage1_max: @@ -349,7 +357,7 @@ def next_tune_cfg(self): self._fp32_model.eval() ht = HessianTrace(self._fp32_model, self.calib_dataloader) op_to_traces = ht.get_avg_traces() - if orig_eval==False: + if orig_eval == False: self._fp32_model.train() ordered_ops = sorted(op_to_traces.keys(), @@ -358,7 +366,7 @@ def next_tune_cfg(self): # WA for add op type op_info_map = {} for op_info in list(initial_op_tuning_cfg.keys()): - op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) + op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops] op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops))) logger.info(f"Start to accumulate fallback to {target_dtype}.") @@ -370,7 +378,6 @@ def next_tune_cfg(self): op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg - # ordered_ops = sorted(op_fallback_acc_impact.keys(), # key=lambda key: op_fallback_acc_impact[key], # reverse=self.higher_is_better) @@ -384,7 +391,6 @@ def next_tune_cfg(self): # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size # yield op_tuning_cfg - # tmp = 1 # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up # ops_sensitivity = self.adaptor.get_hessian_trace(self._fp32_model, From 17d381f7611b1e004778ac68c9fd53a3f791ea0f Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Wed, 23 Nov 2022 11:54:27 +0800 Subject: [PATCH 069/128] change fusefx position --- .../quantization/ptq/cpu/fx/main.py | 2 -- neural_compressor/strategy/hawq.py | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py index 30008bfa3db..8646048ccf4 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py @@ -169,8 +169,6 @@ def main(): if args.tune: from neural_compressor.experimental import Quantization, common model.eval() - from torch.quantization.quantize_fx import fuse_fx - model = fuse_fx(model) quantizer = Quantization("./conf.yaml") quantizer.model = common.Model(model) q_model = quantizer.fit() diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 604008ac2d4..2cfac2b5815 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -40,7 +40,9 @@ class HessianTrace: """ def __init__(self, model, dataloader, criterion=None): - self.model = model ##TODO need to check fused or not + from torch.quantization.quantize_fx import fuse_fx + self.model = fuse_fx(model.model) + self.dataloader = dataloader self.max_iter = 500 self.tolerance = 1e-5 @@ -68,11 +70,11 @@ def is_fused_module(self, module): return False def mapping_module_to_op(self, name): - length = len("_model.") - if len(name) < length: - return name - else: - return name[length:] + # length = len("_model.") + # if len(name) < length: + # return name + # else: + return name def get_fused_mapping(self): model = self.model @@ -88,7 +90,7 @@ def get_fused_mapping(self): else: name = op_name + ".weight" if name in weights_info and name not in weight_to_op.keys(): - weight_to_op[op_name + ".weight"] = op_name[7:] + weight_to_op[op_name + ".weight"] = op_name op_list = [] for key in weight_to_op.keys(): op_list.append(weight_to_op[key]) @@ -240,7 +242,7 @@ def get_avg_traces(self, enable_act=True, num_batches=2): layer_traces = layer_traces_estimate if enable_act: - self.reset_input_gradient_and_hooks() + self.reset_act_gradient_and_hooks() weight_name_to_traces = {} for weigth_name, trace in zip(self.weight_names, layer_traces): From d0a3fc7c199a2d165695e1b34190b991921bca15 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Wed, 23 Nov 2022 17:20:16 +0800 Subject: [PATCH 070/128] add weight quant loss, the current key is from quant model --- neural_compressor/strategy/hawq.py | 342 +++++++++++++++++------------ 1 file changed, 201 insertions(+), 141 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 2cfac2b5815..2f6a2e7e074 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -29,6 +29,7 @@ from .st_utils.tuning_space import TUNING_ITEMS_LST from torch.quantization.quantize_fx import fuse_fx import torchvision +from typing import Dict, List, Optional, Any, Union, Callable, Set class HessianTrace: @@ -55,6 +56,22 @@ def __init__(self, model, dataloader, criterion=None): self.criterion = self.criterion.to(self.device) self.weight_to_op, self.op_list = self.get_fused_mapping() + def get_qnt_weight_loss(self, weights_name): + + fp32_model = self.fp32model + + qnt_model = self.q_model + + # print(self.model.state_dict()) + for n, p in self.model.named_parameters(): + print(n) + + print("*" * 20) + + for n, p in self.q_model._model.named_parameters(): + print(n) + pass + def is_fused_module(self, module): """This is a helper function for `_propagate_qconfig_helper` to detecte if this module is fused. @@ -100,7 +117,7 @@ def get_device(self, model: torch.nn.Module): for n, p in model.named_parameters(): return p.data.device - def get_gradients(self, model, data, criterion, create_graph=False, enable_act=False): + def get_gradients(self, model, data, criterion, create_graph=False): model.zero_grad() input = data[0].to(self.device) ##self._input_shape = input.shape ## for resetting input activation @@ -131,14 +148,15 @@ def sample_rademacher(self, params): samples.append(r) return samples - def get_hv_one_sample(self, params, enable_act, num_batches): + def get_vtHv_weight(self, params, num_samples): + num_batches = (num_samples + self.dataloader.batchsize - 1) // self.dataloader v = self.sample_rademacher(params) H_v = [0] * len(v) cnt = 0 for step, data in enumerate(self.dataloader): batch_size = data[0].shape[0] cnt += batch_size - gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True, enable_act=enable_act) + gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if step == num_batches - 1: @@ -148,6 +166,25 @@ def get_hv_one_sample(self, params, enable_act, num_batches): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v + def get_vtHv_act(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 + input = data[0][i:i + 1] + target = data[1][i:i + 1] + + self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) + layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] + hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) + cnt += 1 + if cnt >= num_samples: + break + def _get_act_grad_hook(self, name): def act_grad_hook(model, grad_input, grad_output): self.layer_acts_grads[name] = [grad_input, grad_output] @@ -208,28 +245,12 @@ def get_params(self): self.weight_names = weight_names self.params = params - def get_avg_traces(self, enable_act=True, num_batches=2): - """ - Estimates average hessian trace for each parameter - """ - assert num_batches > 0 - if enable_act: - self.hook_handles = [] - self.layer_acts = {} - self.layer_acts_grads = {} - self.register_act_grad_hooks() - ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] - ##num_all_data = num_data_iter * self.dataloader.batch_size - ##op_list = self.op_list - ##TODO setting this in config - self.get_params() - # names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias - # params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + def get_weight_traces(self, num_samples): layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): - layer_traces = self.get_hv_one_sample(self.params, enable_act, num_batches) + layer_traces = self.get_vtHv_weight(self.params, num_samples) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) @@ -239,19 +260,152 @@ def get_avg_traces(self, enable_act=True, num_batches=2): if i == 50: ##TODO for debug break prev_avg_model_trace = model_trace - - layer_traces = layer_traces_estimate - if enable_act: - self.reset_act_gradient_and_hooks() weight_name_to_traces = {} - for weigth_name, trace in zip(self.weight_names, layer_traces): - weight_name_to_traces[weigth_name] = trace + for weight_name, trace in zip(self.weight_names, layer_traces): + weight_name_to_traces[weight_name] = trace op_name_to_trace = {} - for weigth_name in self.weight_names: - op_name = self.weight_to_op[weigth_name] - op_name_to_trace[op_name] = weight_name_to_traces[weigth_name] + for weight_name in self.weight_names: + op_name = self.weight_to_op[weight_name] + op_name_to_trace[op_name] = weight_name_to_traces[weight_name] return op_name_to_trace + return layer_traces_estimate + + def get_act_traces(self, num_samples): + self.hook_handles = [] + self.layer_acts = {} + self.layer_acts_grads = {} + self.register_act_grad_hooks() + for i in range(self.max_iter): + pass + + def get_avg_traces(self, enable_act=True, num_samples=100): + """ + Estimates average hessian trace for each parameter + """ + + assert num_samples > 0 + + ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] + ##num_all_data = num_data_iter * self.dataloader.batch_size + ##op_list = self.op_list + ##TODO setting this in config + self.get_params() + # names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + # params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + + ## handle activation + if enable_act: + self.get_act_traces(num_samples) + ##change batchsize to 1 + + # + # layer_traces = layer_traces_estimate + # if enable_act: + # self.reset_act_gradient_and_hooks() + + +##copy from torch.quantization._numeric_suite +def _find_match( + str_list: Union[Dict[str, Any], List[str]], key_str: str, + postfix: str, +) -> Optional[str]: + split_str = key_str.split(".") + if split_str[-1] == postfix: + match_string = "".join(key_str.split(".")[0:-1]) + for s2 in str_list: + pattern1 = "".join(s2.split(".")[0:-1]) + pattern2 = "".join(s2.split(".")[0:-2]) + if match_string == pattern1: + return s2 + if match_string == pattern2: + return s2 + + # For matching "fc.weight" and "fc._packed_params._packed_params" + if postfix == "_packed_params": + match_string = "".join(key_str.split(".")[0:-2]) + if len(match_string) == 0: + return None + for s2 in str_list: + pattern1 = "".join(s2.split(".")[0:-1]) + pattern2 = "".join(s2.split(".")[0:-2]) + if match_string == pattern1: + return s2 + if match_string == pattern2: + return s2 + return None + else: + return None + + +##copy form torch.quantization._numeric_suite +def compare_weights( + float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] +) -> Dict[str, Dict[str, torch.Tensor]]: + r"""Compare the weights of the float module with its corresponding quantized + module. Return a dict with key corresponding to module names and each entry being + a dictionary with two keys 'float' and 'quantized', containing the float and + quantized weights. This dict can be used to compare and compute the quantization + error of the weights of float and quantized models. + + Example usage:: + + wt_compare_dict = compare_weights( + float_model.state_dict(), qmodel.state_dict()) + for key in wt_compare_dict: + print( + key, + compute_error( + wt_compare_dict[key]['float'], + wt_compare_dict[key]['quantized'].dequantize() + ) + ) + + Args: + float_dict: state dict of the float model + quantized_dict: state dict of the quantized model + + Return: + weight_dict: dict with key corresponding to module names and each entry being + a dictionary with two keys 'float' and 'quantized', containing the float and + quantized weights + """ + + weight_dict: Dict[str, Dict] = {} + for key in quantized_dict: + match_key = _find_match(float_dict, key, "weight") + if match_key is not None: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[match_key] + weight_dict[key]["quantized"] = quantized_dict[key] + continue + + # For matching "fc.weight" and "fc._packed_params._packed_params" + match_key = _find_match(float_dict, key, "_packed_params") + if match_key is not None: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[match_key] + weight_dict[key]["quantized"] = quantized_dict[key][0] + + # For LSTM + split_str = key.split(".") + if split_str[-1] == "param" and split_str[-3] == "_all_weight_values": + layer = split_str[-2] + module_name = ".".join(split_str[:-3]) + float_weight_ih_key = module_name + ".weight_ih_l" + layer + float_weight_hh_key = module_name + ".weight_hh_l" + layer + if float_weight_ih_key in float_dict and float_weight_hh_key in float_dict: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[float_weight_ih_key] + weight_dict[key]["quantized"] = ( + quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] + ) + weight_dict[key]["float"] = float_dict[float_weight_hh_key] + weight_dict[key]["quantized"] = ( + quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] + ) + + return weight_dict @strategy_registry @@ -331,7 +485,7 @@ def next_tune_cfg(self): stage1_cnt = 0 quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = -1 # TODO set a more appropriate value + stage1_max = 1 # TODO set a more appropriate value op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: @@ -341,6 +495,12 @@ def next_tune_cfg(self): break op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg + + # import torch.quantization._numeric_suite as ns + # self.model.eval() + # fused_model = fuse_fx(self.model.model) + # res = compare_weights(fused_model.state_dict(), self.q_model.state_dict()) + # Fallback the ops supported both static and dynamic from static to dynamic quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] @@ -358,6 +518,16 @@ def next_tune_cfg(self): orig_eval = False self._fp32_model.eval() ht = HessianTrace(self._fp32_model, self.calib_dataloader) + + q_model_state_dict = { + } + for key in self.q_model.state_dict().keys(): + length = len("_model.") + new_key = key[length:] + q_model_state_dict[new_key] = self.q_model.state_dict()[key] + + weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) + op_to_traces = ht.get_avg_traces() if orig_eval == False: self._fp32_model.train() @@ -380,116 +550,6 @@ def next_tune_cfg(self): op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg - # ordered_ops = sorted(op_fallback_acc_impact.keys(), - # key=lambda key: op_fallback_acc_impact[key], - # reverse=self.higher_is_better) - # op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - # logger.info(f"Start to accumulate fallback to {target_dtype}.") - # initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - # fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - # initial_op_tuning_cfg=initial_op_tuning_cfg, - # op_dtypes=op_dtypes, accumulate=True) - # for op_tuning_cfg in fallback_sampler: - # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - # yield op_tuning_cfg - - # tmp = 1 - # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - # ops_sensitivity = self.adaptor.get_hessian_trace(self._fp32_model, - # self.calib_dataloader, - # self. - # method_args={'name': 'hessian_trace'}) - # tmp = 1 - - def next_tune_cfg_bk(self): - """The generator of yielding next tuning config to traverse by concrete strategies - according to last tuning result. - - Yields: - tune_config (dict): It's a dict containing the tuning configuration to run. - """ - from copy import deepcopy - tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options - - calib_sampling_size = calib_sampling_size_lst[0] - # Initialize the tuning config for each op according to the quantization approach - op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = False - stage1_cnt = 0 - quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = 1e9 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - # for op_tuning_cfg in op_wise_tuning_sampler: - # stage1_cnt += 1 - # if early_stop_tuning and stage1_cnt > stage1_max: - # logger.info("Early stopping the stage 1.") - # break - # op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - # yield op_tuning_cfg - # Fallback the ops supported both static and dynamic from static to dynamic - # Tuning items: None - # if self.cfg.quantization.approach == 'post_training_auto_quant': - # static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - # item in tuning_space.query_items_by_quant_mode('dynamic')] - # if static_dynamic_items: - # logger.info("Fallback all ops that support both dynamic and static to dynamic.") - # else: - # logger.info("Non ops that support both dynamic") - # - # new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) - # for item in static_dynamic_items: - # new_op_tuning_cfg[item.name] = self.initial_dynamic_cfg_based_on_static_cfg( - # new_op_tuning_cfg[item.name]) - # new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - # yield new_op_tuning_cfg - best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) - - # Fallback - for target_dtype in ['bf16', 'fp32']: - target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) - fallback_items_lst = [item for item in quant_ops if item in target_type_lst] - if fallback_items_lst: - logger.info(f"Start to fallback op to {target_dtype} one by one.") - self._fallback_started() - # fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, - self.calib_dataloader, - method_args={'name': 'hessian_trace'}) - - fallback_items_name_lst = sorted(ops_sensitivity, key=lambda items: items[1], reverse=True) - - op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) - - op_fallback_acc_impact = OrderedDict() - for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield op_tuning_cfg - acc, _ = self.last_tune_result - op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc - - # do accumulated fallback according to the order in the previous stage - if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - logger.info(f"Start to accumulate fallback to {target_dtype}.") - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) - for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - yield op_tuning_cfg - def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig): op_state = op_static_cfg.get_state() op_name = op_static_cfg.op_name From c46653971bceb06040635d1deef3982d897ee480 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Wed, 23 Nov 2022 19:06:28 +0800 Subject: [PATCH 071/128] add weights_quant loss eval still bugs for get avg traces --- neural_compressor/strategy/hawq.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 2f6a2e7e074..897dfcffea2 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -527,8 +527,16 @@ def next_tune_cfg(self): q_model_state_dict[new_key] = self.q_model.state_dict()[key] weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) - + pertur_lst={} + for key in weight_quant_loss: + op_float_tensor=weight_quant_loss[key]['float'] + op_qnt_tensor=weight_quant_loss[key]['quantized'].dequantize() + diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) #Formula: L2=||Q(w)-w||p^2 + pertur_lst[key]=diff_l2 + # for i in pertur_lst: + # print(pertur_lst[i]) op_to_traces = ht.get_avg_traces() + print(op_to_traces) if orig_eval == False: self._fp32_model.train() From c4c00cad00f9dd364d88ff5e9e95dc0b44a612e9 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 14:12:48 +0800 Subject: [PATCH 072/128] fixed weight trace issue --- neural_compressor/strategy/hawq.py | 50 ++++++++---------------------- 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 897dfcffea2..65c7ab72d82 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -55,22 +55,7 @@ def __init__(self, model, dataloader, criterion=None): self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config self.criterion = self.criterion.to(self.device) self.weight_to_op, self.op_list = self.get_fused_mapping() - - def get_qnt_weight_loss(self, weights_name): - - fp32_model = self.fp32model - - qnt_model = self.q_model - - # print(self.model.state_dict()) - for n, p in self.model.named_parameters(): - print(n) - - print("*" * 20) - - for n, p in self.q_model._model.named_parameters(): - print(n) - pass + self.get_params() def is_fused_module(self, module): """This is a helper function for `_propagate_qconfig_helper` to detecte @@ -149,7 +134,6 @@ def sample_rademacher(self, params): return samples def get_vtHv_weight(self, params, num_samples): - num_batches = (num_samples + self.dataloader.batchsize - 1) // self.dataloader v = self.sample_rademacher(params) H_v = [0] * len(v) cnt = 0 @@ -159,7 +143,7 @@ def get_vtHv_weight(self, params, num_samples): gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] - if step == num_batches - 1: + if cnt >=num_samples: break if cnt > 0: H_v = [item / cnt for item in H_v] @@ -246,7 +230,6 @@ def get_params(self): self.params = params def get_weight_traces(self, num_samples): - layer_traces_per_iter = [] prev_avg_model_trace = 0 for i in range(self.max_iter): @@ -261,7 +244,7 @@ def get_weight_traces(self, num_samples): break prev_avg_model_trace = model_trace weight_name_to_traces = {} - + layer_traces = layer_traces_estimate for weight_name, trace in zip(self.weight_names, layer_traces): weight_name_to_traces[weight_name] = trace op_name_to_trace = {} @@ -269,7 +252,6 @@ def get_weight_traces(self, num_samples): op_name = self.weight_to_op[weight_name] op_name_to_trace[op_name] = weight_name_to_traces[weight_name] return op_name_to_trace - return layer_traces_estimate def get_act_traces(self, num_samples): self.hook_handles = [] @@ -279,24 +261,18 @@ def get_act_traces(self, num_samples): for i in range(self.max_iter): pass - def get_avg_traces(self, enable_act=True, num_samples=100): + def get_avg_traces(self, enable_act=True, num_samples=32): """ Estimates average hessian trace for each parameter """ assert num_samples > 0 - - ##num_data_iter = self.op_cfgs_list[0]['calib_iteration'] - ##num_all_data = num_data_iter * self.dataloader.batch_size - ##op_list = self.op_list - ##TODO setting this in config - self.get_params() - # names = [n for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias - # params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n]##remove bias + weight_traces = self.get_weight_traces(num_samples) + return weight_traces ## handle activation - if enable_act: - self.get_act_traces(num_samples) + # if enable_act: + # self.get_act_traces(num_samples) ##change batchsize to 1 # @@ -527,12 +503,12 @@ def next_tune_cfg(self): q_model_state_dict[new_key] = self.q_model.state_dict()[key] weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) - pertur_lst={} + pertur_lst = {} for key in weight_quant_loss: - op_float_tensor=weight_quant_loss[key]['float'] - op_qnt_tensor=weight_quant_loss[key]['quantized'].dequantize() - diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) #Formula: L2=||Q(w)-w||p^2 - pertur_lst[key]=diff_l2 + op_float_tensor = weight_quant_loss[key]['float'] + op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() + diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 + pertur_lst[key] = diff_l2 # for i in pertur_lst: # print(pertur_lst[i]) op_to_traces = ht.get_avg_traces() From 85fac870c000efff48ec9b801a9a8b6a3d3fc736 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 14:15:11 +0800 Subject: [PATCH 073/128] fixed weight trace issue --- .../experimental/quantization.py | 62 +++++++++---------- test/strategy/test_hawq_wenhuach.py | 10 +-- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index bdcba064e6e..dae0f8611c5 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -146,37 +146,37 @@ def pre_process(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - # - # import torchvision.datasets as datasets - # import torchvision.transforms as transforms - # data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" - # traindir = os.path.join(data_path, 'train') - # valdir = os.path.join(data_path, 'val') - # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - # std=[0.229, 0.224, 0.225]) - # - # train_dataset = datasets.ImageFolder( - # traindir, - # transforms.Compose([ - # transforms.RandomResizedCrop(224), - # transforms.RandomHorizontalFlip(), - # transforms.ToTensor(), - # normalize, - # ])) - # - # val_dataset = datasets.ImageFolder( - # valdir, - # transforms.Compose([ - # transforms.RandomResizedCrop(224), - # transforms.RandomHorizontalFlip(), - # transforms.ToTensor(), - # normalize, - # ])) - # - # from torch.utils.data import DataLoader - # - # self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) - # self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) + + import torchvision.datasets as datasets + import torchvision.transforms as transforms + data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" + traindir = os.path.join(data_path, 'train') + valdir = os.path.join(data_path, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + val_dataset = datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + from torch.utils.data import DataLoader + + self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) + self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) self.strategy = STRATEGIES[strategy]( self._model, diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py index 2adcd5a5812..df70e32cd9e 100644 --- a/test/strategy/test_hawq_wenhuach.py +++ b/test/strategy/test_hawq_wenhuach.py @@ -74,15 +74,15 @@ def tearDownClass(self): def test_run_hawq_one_trial(self): - # def eval_func(model): - # self.i -= 1 - # return self.i + def eval_func(model): + self.i -= 1 + return self.i from neural_compressor.experimental import Quantization, common model = copy.deepcopy(self.model) model.eval() - model = fuse_fx(model) + # model = fuse_fx(model) quantizer = Quantization('ptq_yaml.yaml') - ##quantizer.eval_func = eval_func + quantizer.eval_func = eval_func dataset = quantizer.dataset('dummy', (32, 3, 224, 224), label=True) quantizer.calib_dataloader = common.DataLoader(dataset) quantizer.eval_dataloader = common.DataLoader(dataset) From dc28247c6f21814657f6bee0bec82852b2f1979d Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 17:12:30 +0800 Subject: [PATCH 074/128] act traces have some issues --- neural_compressor/strategy/hawq.py | 216 +++++++++++++++++------------ 1 file changed, 124 insertions(+), 92 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 65c7ab72d82..c9f8c4488da 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -28,7 +28,7 @@ from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST from torch.quantization.quantize_fx import fuse_fx -import torchvision + from typing import Dict, List, Optional, Any, Union, Callable, Set @@ -41,8 +41,7 @@ class HessianTrace: """ def __init__(self, model, dataloader, criterion=None): - from torch.quantization.quantize_fx import fuse_fx - self.model = fuse_fx(model.model) + self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused self.dataloader = dataloader self.max_iter = 500 @@ -102,89 +101,19 @@ def get_device(self, model: torch.nn.Module): for n, p in model.named_parameters(): return p.data.device - def get_gradients(self, model, data, criterion, create_graph=False): - model.zero_grad() - input = data[0].to(self.device) - ##self._input_shape = input.shape ## for resetting input activation - target = data[1].to(self.device) - # if enable_act: - # input.requires_grad = True - output = model(input) - loss = criterion(output, target) - # torch.autograd.backward(loss, create_graph=create_graph) - loss.backward(create_graph=create_graph) - gradients = [] - for n, p in model.named_parameters(): - if p.grad != None and n in self.weight_names: - gradient = p.grad - gradients.append(gradient + 0.0) ## add 0 to create a copy - model.zero_grad() - return gradients - - # def get_params(self, model): - # parameters = [p for p in model.parameters() if p.requires_grad] - # return parameters - - def sample_rademacher(self, params): - samples = [] - for param in params: - r = torch.randint_like(param, high=2, device=self.device) - r.masked_fill_(r == 0, -1) - samples.append(r) - return samples - - def get_vtHv_weight(self, params, num_samples): - v = self.sample_rademacher(params) - H_v = [0] * len(v) - cnt = 0 - for step, data in enumerate(self.dataloader): - batch_size = data[0].shape[0] - cnt += batch_size - gradients = self.get_gradients(self.model, data, self.criterion, create_graph=True) - H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) - H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] - if cnt >=num_samples: - break - if cnt > 0: - H_v = [item / cnt for item in H_v] - v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better - return v_t_H_v - - def get_vtHv_act(self, params, num_samples): - v = self.sample_rademacher(params) - H_v = [0] * len(v) - cnt = 0 - for step, data in enumerate(self.dataloader): - if cnt >= num_samples: - break - for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 - input = data[0][i:i + 1] - target = data[1][i:i + 1] - - self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) - layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] - layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] - hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) - cnt += 1 - if cnt >= num_samples: - break - def _get_act_grad_hook(self, name): def act_grad_hook(model, grad_input, grad_output): + ##print(name, grad_input[0].shape, grad_output[0].shape) self.layer_acts_grads[name] = [grad_input, grad_output] return act_grad_hook def _get_enable_act_grad_hook(self, name): def enable_act_grad_hook(model, inputs, outputs): - try: - input = inputs[0] ##TODO check whether this is right - except: - input = inputs - - if input.requires_grad is False: - input.requires_grad = True - self.layer_acts[name] = input + for input in inputs: + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = inputs return enable_act_grad_hook @@ -229,18 +158,87 @@ def get_params(self): self.weight_names = weight_names self.params = params + def forward_backward(self, data, create_graph=False, return_w_grad=True): + self.model.zero_grad() + input = data[0].to(self.device) + ##self._input_shape = input.shape ## for resetting input activation + target = data[1].to(self.device) + ##input.requires_grad = True + output = self.model(input) + loss = self.criterion(output, target) + torch.autograd.backward(loss, create_graph=create_graph) + ##loss.backward(create_graph=create_graph) + if return_w_grad: + gradients = [] + for n, p in self.model.named_parameters(): + if p.grad != None and n in self.weight_names: + gradient = p.grad + gradients.append(gradient + 0.0) ## add 0 to create a copy + self.model.zero_grad() + return gradients + else: + self.model.zero_grad() + + # def get_params(self, model): + # parameters = [p for p in model.parameters() if p.requires_grad] + # return parameters + + def sample_rademacher(self, params): + samples = [] + for param in params: + r = torch.randint_like(param, high=2, device=self.device) + r.masked_fill_(r == 0, -1) + samples.append(r) + return samples + + def get_vtHv_weight(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + batch_size = data[0].shape[0] + cnt += batch_size + gradients = self.forward_backward(data, create_graph=True) + H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) + H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] + if cnt >= num_samples: + break + if cnt > 0: + H_v = [item / cnt for item in H_v] + v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better + return v_t_H_v + + def get_vtHv_act(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 + input = data[0][i:i + 1] + target = data[1][i:i + 1] + + self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) + layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] + hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) + cnt += 1 + if cnt >= num_samples: + break + def get_weight_traces(self, num_samples): layer_traces_per_iter = [] prev_avg_model_trace = 0 - for i in range(self.max_iter): + for iter in range(self.max_iter): layer_traces = self.get_vtHv_weight(self.params, num_samples) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) model_trace = torch.sum(layer_traces_estimate) diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) - if diff_ratio < self.tolerance and i > 10: ##TODO magic number + if diff_ratio < self.tolerance and iter > 10: ##TODO magic number break - if i == 50: ##TODO for debug + if iter == 50: ##TODO for debug break prev_avg_model_trace = model_trace weight_name_to_traces = {} @@ -258,28 +256,62 @@ def get_act_traces(self, num_samples): self.layer_acts = {} self.layer_acts_grads = {} self.register_act_grad_hooks() - for i in range(self.max_iter): + cnt = 0 + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + bs = data[0].shape[0] + act_traces_sum = 0 + act_traces_per_iter = [] + prev_avg_model_trace = 0 + act_traces_sums = None + for i in range(bs): ##force the bs to be one + input = data[0][i:i + 1] + target = data[1][i:i + 1] + self.forward_backward((input, target), create_graph=True, return_w_grad=False) + acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + if act_traces_sums == None: + act_traces_sums = [0] * len(acts) + acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts + # vt_H_v_sum_per_act = [0] * len(acts) + # + # prev_model_act_trace = 0 + # for iter in range(self.max_iter): + # v = self.sample_rademacher(acts) + # H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=False) + # vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] + # + # vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in + # enumerate(vt_H_v_sum_per_act)] + # vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] + # current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) + # + # diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( + # prev_model_act_trace + self.eps) + # if diff_ratio < self.tolerance and iter > 10: ##TODO magic number + # break + # if iter == 50: ##TODO for debug + # break + # + # prev_model_act_trace = current_vt_H_v_mean_per_model + # + # cnt += 1 + # if cnt >= num_samples: + # break pass + self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False + def get_avg_traces(self, enable_act=True, num_samples=32): """ Estimates average hessian trace for each parameter """ assert num_samples > 0 + ##self.get_act_traces(num_samples) weight_traces = self.get_weight_traces(num_samples) return weight_traces - ## handle activation - # if enable_act: - # self.get_act_traces(num_samples) - ##change batchsize to 1 - - # - # layer_traces = layer_traces_estimate - # if enable_act: - # self.reset_act_gradient_and_hooks() - ##copy from torch.quantization._numeric_suite def _find_match( From deb413e9396563206d9d187a1a46976ba215e48d Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 20:10:25 +0800 Subject: [PATCH 075/128] support activation traces --- neural_compressor/strategy/hawq.py | 110 +++++++++++++++++------------ 1 file changed, 66 insertions(+), 44 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index c9f8c4488da..94745270ac1 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -41,6 +41,8 @@ class HessianTrace: """ def __init__(self, model, dataloader, criterion=None): + self.unfused_model = model.model + self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused self.dataloader = dataloader @@ -104,16 +106,19 @@ def get_device(self, model: torch.nn.Module): def _get_act_grad_hook(self, name): def act_grad_hook(model, grad_input, grad_output): ##print(name, grad_input[0].shape, grad_output[0].shape) - self.layer_acts_grads[name] = [grad_input, grad_output] + if type(model) == torch.nn.Linear: ##TODO very tricky + self.layer_acts_grads[name] = grad_input[1] + else: + self.layer_acts_grads[name] = grad_input[0] return act_grad_hook def _get_enable_act_grad_hook(self, name): def enable_act_grad_hook(model, inputs, outputs): - for input in inputs: - if input.requires_grad is False: - input.requires_grad = True - self.layer_acts[name] = inputs + input = inputs[0] + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = input return enable_act_grad_hook @@ -134,8 +139,8 @@ def _unregister_hook(self): for handel in self.hook_handles: handel.remove() - def register_act_grad_hooks(self): - for name, module in self.model.named_modules(): + def register_act_grad_hooks(self, model): + for name, module in model.named_modules(): if self.mapping_module_to_op(name) in self.op_list: hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) self.hook_handles.append(hook_handle) @@ -158,13 +163,13 @@ def get_params(self): self.weight_names = weight_names self.params = params - def forward_backward(self, data, create_graph=False, return_w_grad=True): - self.model.zero_grad() + def forward_backward(self, model, data, create_graph=False, return_w_grad=True): + model.zero_grad() input = data[0].to(self.device) ##self._input_shape = input.shape ## for resetting input activation target = data[1].to(self.device) - ##input.requires_grad = True - output = self.model(input) + input.requires_grad = True + output = model(input) loss = self.criterion(output, target) torch.autograd.backward(loss, create_graph=create_graph) ##loss.backward(create_graph=create_graph) @@ -174,10 +179,10 @@ def forward_backward(self, data, create_graph=False, return_w_grad=True): if p.grad != None and n in self.weight_names: gradient = p.grad gradients.append(gradient + 0.0) ## add 0 to create a copy - self.model.zero_grad() + model.zero_grad() return gradients else: - self.model.zero_grad() + model.zero_grad() # def get_params(self, model): # parameters = [p for p in model.parameters() if p.requires_grad] @@ -198,7 +203,7 @@ def get_vtHv_weight(self, params, num_samples): for step, data in enumerate(self.dataloader): batch_size = data[0].shape[0] cnt += batch_size - gradients = self.forward_backward(data, create_graph=True) + gradients = self.forward_backward(self.model, data, create_graph=True) H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] if cnt >= num_samples: @@ -252,11 +257,14 @@ def get_weight_traces(self, num_samples): return op_name_to_trace def get_act_traces(self, num_samples): + unfused_training = self.unfused_model.training + self.unfused_model.eval() self.hook_handles = [] self.layer_acts = {} self.layer_acts_grads = {} - self.register_act_grad_hooks() + self.register_act_grad_hooks(self.unfused_model) cnt = 0 + act_traces_per_sample = [] for step, data in enumerate(self.dataloader): if cnt >= num_samples: break @@ -268,39 +276,49 @@ def get_act_traces(self, num_samples): for i in range(bs): ##force the bs to be one input = data[0][i:i + 1] target = data[1][i:i + 1] - self.forward_backward((input, target), create_graph=True, return_w_grad=False) + self.forward_backward(self.unfused_model, (input, target), create_graph=True, return_w_grad=False) acts = [self.layer_acts[key] for key in self.layer_acts.keys()] if act_traces_sums == None: act_traces_sums = [0] * len(acts) acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts - # vt_H_v_sum_per_act = [0] * len(acts) - # - # prev_model_act_trace = 0 - # for iter in range(self.max_iter): - # v = self.sample_rademacher(acts) - # H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=False) - # vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] - # - # vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in - # enumerate(vt_H_v_sum_per_act)] - # vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] - # current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) - # - # diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( - # prev_model_act_trace + self.eps) - # if diff_ratio < self.tolerance and iter > 10: ##TODO magic number - # break - # if iter == 50: ##TODO for debug - # break - # - # prev_model_act_trace = current_vt_H_v_mean_per_model - # - # cnt += 1 - # if cnt >= num_samples: - # break - pass + vt_H_v_sum_per_act = [0] * len(acts) + + prev_model_act_trace = 0 + for iter in range(self.max_iter): + v = self.sample_rademacher(acts) + H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=True) + vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] + + vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in + enumerate(vt_H_v_sum_per_act)] + vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] + current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) + + diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( + prev_model_act_trace + self.eps) + if diff_ratio < self.tolerance and iter > 10: ##TODO magic number + break + if iter == 50: ##TODO for debug + break + prev_model_act_trace = current_model_act_trace + act_traces_per_sample.append(vt_H_v_mean_per_act) + cnt += 1 + if cnt >= num_samples: + break + + if unfused_training: + self.unfused_model.train() self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False + act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) + act_traces = torch.mean(act_traces_stack, dim=0) + res_dict={} + for index, key in enumerate(self.layer_acts.keys()): + res_dict[key]=act_traces[index] + + self.layer_acts=[] + self.layer_acts_grads=[] + return act_traces def get_avg_traces(self, enable_act=True, num_samples=32): """ @@ -308,9 +326,13 @@ def get_avg_traces(self, enable_act=True, num_samples=32): """ assert num_samples > 0 - ##self.get_act_traces(num_samples) + traces = {} weight_traces = self.get_weight_traces(num_samples) - return weight_traces + traces['weight'] = weight_traces + if enable_act: + act_traces = self.get_act_traces(num_samples) + traces['activation']= act_traces + return traces ##copy from torch.quantization._numeric_suite From 7c508d51c2ebe364c5142e5bbfcdd45ce722e1f2 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Thu, 24 Nov 2022 18:24:20 +0800 Subject: [PATCH 076/128] correct the qnt_weigths does't machted issue --- neural_compressor/strategy/hawq.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 94745270ac1..6575c21fccb 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -413,9 +413,10 @@ def compare_weights( # For matching "fc.weight" and "fc._packed_params._packed_params" match_key = _find_match(float_dict, key, "_packed_params") if match_key is not None: - weight_dict[key] = {} - weight_dict[key]["float"] = float_dict[match_key] - weight_dict[key]["quantized"] = quantized_dict[key][0] + weight_dict[match_key] = {} + weight_dict[match_key]["float"] = float_dict[match_key] + weight_dict[match_key]["quantized"] = quantized_dict[key][0] + ##TODO:should consider more models in further work # For LSTM split_str = key.split(".") @@ -608,4 +609,4 @@ def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig) quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode) tuning_item = quant_mode_item.get_option_by_name(att_item) dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None - return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) + return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) \ No newline at end of file From 2520925c8f3ad071b0df6820c80993f722c0fd54 Mon Sep 17 00:00:00 2001 From: wenhuach
Date: Thu, 24 Nov 2022 20:43:03 +0800 Subject: [PATCH 077/128] only enable weight traces currently --- neural_compressor/strategy/hawq.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 6575c21fccb..2e590c3f34b 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -312,12 +312,12 @@ def get_act_traces(self, num_samples): self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) act_traces = torch.mean(act_traces_stack, dim=0) - res_dict={} + res_dict = {} for index, key in enumerate(self.layer_acts.keys()): - res_dict[key]=act_traces[index] + res_dict[key] = act_traces[index] - self.layer_acts=[] - self.layer_acts_grads=[] + self.layer_acts = [] + self.layer_acts_grads = [] return act_traces def get_avg_traces(self, enable_act=True, num_samples=32): @@ -331,7 +331,7 @@ def get_avg_traces(self, enable_act=True, num_samples=32): traces['weight'] = weight_traces if enable_act: act_traces = self.get_act_traces(num_samples) - traces['activation']= act_traces + traces['activation'] = act_traces return traces @@ -566,7 +566,8 @@ def next_tune_cfg(self): pertur_lst[key] = diff_l2 # for i in pertur_lst: # print(pertur_lst[i]) - op_to_traces = ht.get_avg_traces() + traces = ht.get_avg_traces(enable_act=False) + op_to_traces = traces['weight'] print(op_to_traces) if orig_eval == False: self._fp32_model.train() @@ -609,4 +610,4 @@ def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig) quant_mode_item = tuning_space.query_quant_mode_item((op_name, op_type), op_quant_mode) tuning_item = quant_mode_item.get_option_by_name(att_item) dynamic_state[att_item] = tuning_item.options[0] if tuning_item else None - return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) \ No newline at end of file + return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) From 1530c94b33e3fc5fed7e95b92335bdd65dc1148e Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Fri, 25 Nov 2022 15:30:44 +0800 Subject: [PATCH 078/128] merge weights quantization loss and trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Formula:pertubation=trace*weights_qnt_loss --- neural_compressor/strategy/hawq.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 2e590c3f34b..c000def9440 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -564,18 +564,17 @@ def next_tune_cfg(self): op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 pertur_lst[key] = diff_l2 - # for i in pertur_lst: - # print(pertur_lst[i]) traces = ht.get_avg_traces(enable_act=False) op_to_traces = traces['weight'] - print(op_to_traces) + for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 if orig_eval == False: self._fp32_model.train() - ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) # WA for add op type + print("ordered_ops:",ordered_ops) op_info_map = {} for op_info in list(initial_op_tuning_cfg.keys()): op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) From 6edf3854f45fa4590655e4397462f2eba89c8169 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Mon, 28 Nov 2022 22:44:38 +0800 Subject: [PATCH 079/128] Update conf.yaml change root path to default config --- .../torchvision_models/quantization/ptq/cpu/fx/conf.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml index 4b50b559e6a..ef61c6c3e0b 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml @@ -24,7 +24,7 @@ quantization: # optional. tuning constrai batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to calibration dataset location if needed + root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed transform: Resize: size: 256 @@ -43,7 +43,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to evaluation dataset location if needed + root: /path/to/calibration/dataset # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -61,7 +61,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /home/bfang1/Projects/HAWQ_INC/datasets/raw/val # NOTE: modify to evaluation dataset location if needed + root: /path/to/calibration/dataset # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 From 80299f52573b70dbd2a2fbd1ed33803c181c46d9 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Mon, 28 Nov 2022 22:28:42 +0800 Subject: [PATCH 080/128] WA add loss for strategy Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index aef8f695291..f83dbf7ceaf 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -847,7 +847,7 @@ def percent_to_float(data): Optional('model_conversion'): model_conversion_schema, Optional('tuning', default={ - 'strategy': {'name': 'basic'}, + 'strategy': {'name': 'basic', 'loss': 'CrossEntropyLoss'}, # TODO move loss to appropriate position 'accuracy_criterion': {'relative': 0.01, 'higher_is_better': True}, 'objective': 'performance', 'exit_policy': {'timeout': 0, 'max_trials': 100, 'performance_only': False}, From 4b96aa5093bc172d5d36584ae906f434be6f80ef Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 30 Nov 2022 14:49:50 +0800 Subject: [PATCH 081/128] WA for hawq strategy loss Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index f83dbf7ceaf..dae5524c9ef 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -860,7 +860,8 @@ def percent_to_float(data): Optional('sigopt_project_id'): str, Optional('sigopt_experiment_name', default='nc-tune'): str, Optional('accuracy_weight', default=1.0): float, - Optional('latency_weight', default=1.0): float + Optional('latency_weight', default=1.0): float, + Optional('loss', default='CrossEntropyLoss'): str # TODO only for test, remove it before merge } , Hook('accuracy_criterion', handler=_valid_accuracy_field): object, Optional('accuracy_criterion', default={'relative': 0.01}): { From 26061f21fdfbd3c9aed34074fed1dd112ae43dff Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Wed, 30 Nov 2022 15:02:12 +0800 Subject: [PATCH 082/128] change to default path --- neural_compressor/experimental/quantization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 7e8e8cfbbac..4fa143fc5c8 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -146,7 +146,7 @@ def pre_process(self): import torchvision.datasets as datasets import torchvision.transforms as transforms - data_path = "/home/bfang1/Projects/HAWQ_INC/datasets/raw" + data_path = "/mnt/data2/dataset/dataset/imagenet/img_raw" traindir = os.path.join(data_path, 'train') valdir = os.path.join(data_path, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], From 31b11ff1c03df87194559dbe3fbcc99688763ce6 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 30 Nov 2022 15:59:50 +0800 Subject: [PATCH 083/128] remove useless code Signed-off-by: yiliu30 --- neural_compressor/adaptor/pytorch.py | 18 - .../experimental/quantization.py | 32 -- .../strategy/st_utils/hawq_metric.py | 339 ------------------ .../strategy/st_utils/hawq_wenhuach.py | 313 ---------------- test/strategy/test_hawq_wenhuach.py | 152 -------- test/strategy/test_hessian_trace_inc.py | 77 ---- 6 files changed, 931 deletions(-) delete mode 100644 neural_compressor/strategy/st_utils/hawq_metric.py delete mode 100644 neural_compressor/strategy/st_utils/hawq_wenhuach.py delete mode 100644 test/strategy/test_hawq_wenhuach.py delete mode 100644 test/strategy/test_hessian_trace_inc.py diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 3421828a8ab..06245b4fb0d 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -29,7 +29,6 @@ from ..utils import logger from .query import QueryBackendCapability from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader -from neural_compressor.strategy.hawq_metric import Hawq_top torch = LazyImport("torch") @@ -1095,23 +1094,6 @@ def is_fused_module(self, module): else: return False - def calculate_op_sensitivity(self, model, dataloader, method_args): - """Compute the op sensitivity by the specific method. - - Args: - model(INC model): The fp32 model. - dataloader: The calibration dataloader. - method_args(Dict): The parameters for specifying the method. - - Returns: - ops_sensitivity(Dict[tuple, float]): The key is (op_name, op_type), - the value is the sensitivity under the specified method - """ - if method_args['name']=='hessian_trace': - Hawq_top(model=model,yaml_cpu=None,yaml_trace=None,dataloader=dataloader) - hessian_cmp=Hawq_top.get_init_config() - return hessian_cmp - pass unify_op_type_mapping = { "ConvReLU2d": "Conv2d", diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 77dfc51d465..3d7b7811ea2 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -146,38 +146,6 @@ def pre_process(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - - import torchvision.datasets as datasets - import torchvision.transforms as transforms - data_path = "/mnt/data2/dataset/dataset/imagenet/img_raw" - traindir = os.path.join(data_path, 'train') - valdir = os.path.join(data_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - val_dataset = datasets.ImageFolder( - valdir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - from torch.utils.data import DataLoader - - self._calib_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) - self._eval_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False) - self.strategy = STRATEGIES[strategy]( self._model, self.conf, diff --git a/neural_compressor/strategy/st_utils/hawq_metric.py b/neural_compressor/strategy/st_utils/hawq_metric.py deleted file mode 100644 index 63db277ab14..00000000000 --- a/neural_compressor/strategy/st_utils/hawq_metric.py +++ /dev/null @@ -1,339 +0,0 @@ -""" - Copyright (c) 2022 Intel Corporation - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" -import logging -import torch -import numpy as np -from torch.autograd import Variable -import yaml -import torchvision.transforms as transforms -import torchvision -import random -import copy -from torch.quantization import get_default_qat_qconfig, quantize_jit,get_default_qconfig -from torch.quantization.quantize_fx import prepare_fx, convert_fx,fuse_fx -from torch.quantization import default_dynamic_qconfig, float_qparams_weight_only_qconfig -import torch.quantization._numeric_suite as ns - - -def fixed_seed(seed): - """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU - Args: - seed: an integer number - return: None - """ - np.random.seed(seed) #random - random.seed(seed) - torch.manual_seed(seed) #cpu - torch.cuda.manual_seed_all(seed) #parallel cpu - torch.backends.cudnn.deterministic = True #make sure results are same on cpu/gpu - torch.backends.cudnn.benchmark = True #accelerator -def cal_params_grad(model): - """ - get the gradients and parameters from given model - Args: - model: FP32 model specificed - return: - params: paratmeters of model - grads: gradients of model - """ - params=[] - grads=[] - for indx,(name, parm) in zip(enumerate(model.parameters()), model.named_parameters()): - logging.info('->tensor_index:', indx[0],'-->name:', name, '-->grad_requirs:',parm.requires_grad, '-->current tensor len:',parm.shape) - if not parm.requires_grad: - continue - params.append(parm) - grads.append(0. if parm.grad is None else parm.grad+0.) - return params, grads -def cal_vector_product(gradsH, params, v): - """compute the hessian vector product by torch.autograd.grad. - Agrs: - gradsH: gradient at current point - params: corresponding variables - v: vector - return: - hv: hessian vector product - """ - hv=torch.autograd.grad( - gradsH, - params, - grad_outputs=v, - only_inputs=True, - retain_graph=True) - return hv -def ptq_calibrate(model, data_loader,num_cal): - """Calibrate model in post train quantization model - Args: - model: a pre_quantization model to calibrate - data_laoder: datasets - num_cal: maximization number of calibrated samples, such as images - return: - model: a calibrated model - """ - #Generate some samples to calibrate from data_loader - calibrate_samples=[] - i=0 - for inputs, targets in data_loader: - calibrate_samples.append(inputs) - i=i+1 - if i>=num_cal: - break - # model.cpu() - model.eval() - #calibration - with torch.no_grad(): - for sample in calibrate_samples: - model(sample) - return model -def cal_weights_pertubation(model_qnt,model_fp32)->dict: - """calculate weights quantized perturbation using L2 normal - Args: - model_qnt: quantized model - model_fp32: float model - return: - pertur_lst: dict,which contains layer_name and value - - """ - - wq_cmp_dict=ns.compare_weights(model_fp32.state_dict(), model_qnt.state_dict()) - pertur_lst=[] - for key in wq_cmp_dict: - pertur_pair={"layer_name":'',"value":0} - op_float_tensor=wq_cmp_dict[key]['float'] - op_qnt_tensor=wq_cmp_dict[key]['quantized'].dequantize() - diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) #Formula: L2=||Q(w)-w||p^2 - pertur_pair['layer_name']=key - pertur_pair['value']=diff_l2 - pertur_lst.append(pertur_pair) - return pertur_lst -def cal_act_pertubation(model_fp32,model_qnt,data_loader,num_cal=100)->dict: - """calculate weights quantized perturbation using L2 normal - Args: - model_qunt: quantized model - model_fp32: float model - data_loader: path to datasets - return: - pretur_lst: dict - - """ - ns.prepare_model_outputs(model_fp32, model_qnt) - model_fp32.cpu() - model_fp32.eval() - model_qnt.cpu() - model_qnt.eval() - obv_samples=[] - i=0 - for inputs, targets in data_loader: - obv_samples.append(inputs) - i=i+1 - if i>=num_cal: - break - with torch.no_grad(): - for image in obv_samples: - model_fp32(image) - model_qnt(image) - act_qnt_pairs=[] - act_compare_dict = ns.get_matching_activations(model_fp32, q_module=model_qnt) - for key in act_compare_dict: - op_float_tensor=(act_compare_dict[key]['float'][0]) - op_qnt_tensor=act_compare_dict[key]['quantized'][0].dequantize() - diff_l2=(torch.norm(op_float_tensor-op_qnt_tensor,p=2)**2) - pertur_pair={"layer_name":'',"value":0} - pertur_pair['layer_name']=key - pertur_pair['value']=diff_l2 - act_qnt_pairs.append(pertur_pair) - return act_qnt_pairs - -class Hessian(): - """This class used to compute each layer hessian trace from given FP32 model - """ - def __init__(self,model,criterion, data=None, dataloader=None,device='cpu') -> None: - """Initial parameters - Args: - model: FP32 model specificed - criterion: loss function - data: a single batch of data, including inputs and its corresponding labels - dataloader: the data loader including bunch of batches of data - device: currently only supports cpu device - """ - #make sure we either pass a single batch or a dataloader - assert (data!=None and dataloader==None ) or (data==None and dataloader!=None) - #make mode is evaluation model - self.model=model.eval() - self.criterion=criterion - self.device=device - - if data!=None: - self.data=data - self.full_dataset=False - if not self.full_dataset: - self.inputs, self.targets=self.data - outputs=self.model(self.inputs) - loss=self.criterion(outputs,self.targets) - loss.backward(create_graph=True) - params, gradSH=cal_params_grad(self.model) - - self.params=params - self.gradSH=gradSH - def calculate_trace(self,max_Iter=100, tolerance=1e-3): - """Compute the hessian trace based on Hutchinson algorithm - Args: - max_Inter: number of maximization iteration - tolerance: minimum relative tolerance for stopping the algorithm. - return: - avg_traces_lst: return hessian trace per layer for given model - """ - avg_traces_lst=[] - for (i_grad, i_param,(module_name, _)) in zip(self.gradSH, self.params, self.model.named_parameters()): - v=[torch.randint_like(i_param,high=2, device=self.device)] - for v_i in v: - v_i[v_i==0]=-1 - i_v=v - trace_vhv=[] - trace=0. - trace_pair={"layer_name":" ", "trace":0} - self.model.zero_grad() - for i in range(max_Iter): - hv=cal_vector_product(i_grad,i_param,i_v) # hessian vector - trace_vhv_cur=sum([torch.sum(x * y) for (x, y) in zip(hv, v)]) - trace_vhv.append(trace_vhv_cur) - difference=(np.mean(trace_vhv)-trace)/(abs(trace)+1e-6) - if abs(difference) None: - self.dataloader=dataloader - if yaml_trace and yaml_cpu is not None: - with open(yaml_trace) as file: - params_config=yaml.load(file) - if params_config['loss']=='CrossEntropyLoss': - self.criterion=torch.nn.CrossEntropyLoss() - self.random_seed=params_config['random_seed'] - self.max_Iteration=params_config['max_Iteration'] - self.enable_op_fuse=params_config['enable_op_fuse'] - self.tolerance=float(params_config['tolerance']) - self.max_cal_sample=float(params_config['max_cal_smaple']) - self.quantize_mode=params_config['quantize_mode'] - with open(yaml_cpu,'r') as file: - yaml_config=yaml.load(file) - str_dtype=(yaml_config[0]['precisions']['names']) - self.list_dtype = str_dtype.split(",") - else: - self.criterion=torch.nn.CrossEntropyLoss() - self.random_seed=100 - self.max_Iteration=100 - self.enable_op_fuse=True - self.tolerance=1e-6 - self.max_cal_sample=1 - self.quantize_mode='ptq' - self.list_dtype=['int8','fp32'] - logging.info("Current parameters config for Hutchinson’s algorithm as below:") - logging.info("criterion:",self.criterion,"| random_seed:",self.random_seed,"| max_Iteration:", self.max_Iteration, \ - "| tolerance:", self.tolerance,"| en_op_fuse", self.enable_op_fuse,"| max_cal_sample:", self.max_cal_sample) - fixed_seed(self.random_seed) - self.model=model - self.model.eval() - model_tmp=copy.deepcopy(model) - model_tmp.eval() - self.model_fused= fuse_fx(model_tmp) - self.model_fused.eval() - self.hawq_level='L3' #L1:top engievalue L2:avg_trace L3:avg_trace+pertubation - - def get_init_config(self)->dict: - """ - """ - #Load a sample from dataloader to compute graident - for inputs, targets in self.dataloader: - break - #Hessian average trace computation - fixed_seed(self.random_seed) - with torch.enable_grad(): - if self.enable_op_fuse: - hawq_cmp=Hessian(self.model_fused,criterion=self.criterion,data=(inputs,targets)) - else: - hawq_cmp=Hessian(self.model,criterion=self.criterion,data=(inputs,targets)) - avg_traces_lst=hawq_cmp.calculate_trace(max_Iter=self.max_Iteration,tolerance=self.tolerance) - - #fiter none weight layer and save weight layer to match perturbation computation - if self.hawq_level=='L2': - avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) - logging.info("avg_traces desending sorted is:") - for i in avg_traces_lst_sorted: - logging.info(i) - list_sorted=avg_traces_lst_sorted - if self.hawq_level=='L3': - if self.quantize_mode=='ptq': - #PTQ quantization - qconfig = get_default_qconfig("fbgemm") - qconfig_dict={"":qconfig} #enable all layers/tensor to quantize - #calibrate - model_prepared=prepare_fx(self.model, qconfig_dict) - model_prepared=ptq_calibrate(model_prepared,data_loader=self.dataloader,num_cal=self.max_cal_sample) - model_prepared.cpu() - model_all_qnt=convert_fx(model_prepared) - #calculate weights quantized perturbation - weights_pertu_lst=cal_weights_pertubation(model_fp32=self.model,model_qnt=model_all_qnt) - #merge weights quantized perturbation - #generally, fused ops=quantized weights+quantized activation - avg_trace_i=0 - omigs=[] - for wct_i in weights_pertu_lst: - omig_pair={"layer_name":" ", "trace":0} - tmp_value=avg_traces_lst[avg_trace_i]['trace']*wct_i['value'] - omig_pair['layer_name']=avg_traces_lst[avg_trace_i]['layer_name'] - omig_pair['trace']=tmp_value - avg_trace_i=avg_trace_i+2 - omigs.append(omig_pair) - act_pertu_lst=cal_act_pertubation(model_fp32=self.model, model_qnt=model_all_qnt,data_loader=self.dataloader,num_cal=self.max_cal_sample) - avg_trace_i=1 - for act_i in act_pertu_lst: - omig_pair={"layer_name":" ", "trace":0} - tmp_value=avg_traces_lst[avg_trace_i]['trace']+act_i['value'] - omig_pair['layer_name']=avg_traces_lst[avg_trace_i]['layer_name'] - omig_pair['trace']=tmp_value - avg_trace_i=avg_trace_i+2 - omigs.append(omig_pair) - - # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): - # omig_pair={"layer_name":" ", "value":0} - # omig_val=avg_trace_i['trace']*omiga_i['value'] - # omig_pair['layer_name']=avg_trace_i['layer_name'] - # omig_pair['value']=omig_val - # omig_list.append(omig_pair) - # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) - omig_list_sorted=sorted(omigs,key=lambda x:x['trace'],reverse=True) - list_sorted=omig_list_sorted - tune_init_config_pairs=[] - for i in list_sorted: - tune_init_config_pair={"op_name":'',"op_type":'','trace':0} - if i['layer_name']==list_sorted[0]['layer_name']: - tune_init_config_pair['op_name']=i['layer_name'] - tune_init_config_pair['op_type']=self.list_dtype[-1] #setup as float op - tune_init_config_pair['trace']=float(i['trace']) - else: - tune_init_config_pair['op_name']=i['layer_name'] - tune_init_config_pair['op_type']=self.list_dtype[0] - tune_init_config_pair['trace']=float(i['trace']) - tune_init_config_pairs.append(tune_init_config_pair) - return tune_init_config_pairs diff --git a/neural_compressor/strategy/st_utils/hawq_wenhuach.py b/neural_compressor/strategy/st_utils/hawq_wenhuach.py deleted file mode 100644 index c0ced2af3f4..00000000000 --- a/neural_compressor/strategy/st_utils/hawq_wenhuach.py +++ /dev/null @@ -1,313 +0,0 @@ -""" - Copyright (c) 2022 Intel Corporation - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" -from ...utils import logger -import torch -import numpy as np -from torch.autograd import Variable -import yaml -import torchvision.transforms as transforms -import torchvision -import random -import copy -from torch.quantization import get_default_qat_qconfig, quantize_jit, get_default_qconfig -from torch.quantization.quantize_fx import prepare_fx, convert_fx, fuse_fx -from torch.quantization import default_dynamic_qconfig, float_qparams_weight_only_qconfig -import torch.quantization._numeric_suite as ns - - -def fix_seed(seed): - """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU - Args: - seed: an integer number - return: None - """ - np.random.seed(seed) # random - random.seed(seed) - torch.manual_seed(seed) # cpu - torch.cuda.manual_seed_all(seed) # parallel cpu - torch.backends.cudnn.deterministic = True # make sure results are same on cpu/gpu - torch.backends.cudnn.benchmark = True # accelerator - - -def calculate_params_gradients(model): - """ - get the gradients and parameters from given model - Args: - model: FP32 model specificed - return: - params: paratmeters of model - grads: gradients of model - """ - params = [] - grads = [] - for indx, (name, parm) in zip(enumerate(model.parameters()), model.named_parameters()): - logger.info( - f'index:{indx[0]}-->name:{name}:{parm.shape}') - - if not parm.requires_grad: - continue - params.append(parm) - grads.append(0. if parm.grad is None else parm.grad + 0.) - return params, grads - - -def calculate_inner_product(list_x, list_y): - """Compute the inner product of two lists of variables list_x,list_y - Args: - list_x: input list variables - list_y: input list variables - return: - sum of inner product - """ - return sum([torch.sum(x * y) for (x, y) in zip(list_x, list_y)]) - - -def calculate_vector_product(gradsH, params, v): - """compute the hessian vector product by torch.autograd.grad. - Agrs: - gradsH: gradient at current point - params: corresponding variables - v: vector - return: - hv: hessian vector product - """ - hv = torch.autograd.grad( - gradsH, - params, - grad_outputs=v, - only_inputs=True, - retain_graph=True) - return hv - - -def ptq_calibrate(model, data_loader, num_cal): - """Calibrate model in post train quantization model - Args: - model: a pre_quantization model to calibrate - data_laoder: datasets - num_cal: maximization number of calibrated samples, such as images - return: - model: a calibrated model - """ - # Generate some samples to calibrate from data_loader - calibrate_samples = [] - i = 0 - for inputs, targets in data_loader: - calibrate_samples.append(inputs) - i = i + 1 - if i >= num_cal: - break - # model.cpu() - model.eval() - # calibration - with torch.no_grad(): - for sample in calibrate_samples: - model(sample) - return model - - -def calculate_perturbation(model_qnt, model_fp32) -> dict: - """calculate weights quantized perturbation using L2 normal - Args: - model_qnt: quantized model - model_fp32: float model - return: - pertur_lst: dict,which contains layer_name and value - - """ - - wq_cmp_dict = ns.compare_weights(model_fp32.state_dict(), model_qnt.state_dict()) - pertur_lst = [] - for key in wq_cmp_dict: - pertur_pair = {"layer_name": '', "value": 0} - op_float_tensor = wq_cmp_dict[key]['float'] - op_qnt_tensor = wq_cmp_dict[key]['quantized'].dequantize() - diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 - pertur_pair['layer_name'] = key - pertur_pair['value'] = diff_l2 - pertur_lst.append(pertur_pair) - return pertur_lst - - -class Hessian(): - """This class used to compute each layer hessian trace from given FP32 model - """ - - def __init__(self, model, criterion, data=None, dataloader=None, device='cpu') -> None: - """Initial parameters - Args: - model: FP32 model specificed - criterion: loss function - data: a single batch of data, including inputs and its corresponding labels - dataloader: the data loader including bunch of batches of data - device: currently only supports cpu device - """ - # make sure we either pass a single batch or a dataloader - assert (data != None and dataloader == None) or (data == None and dataloader != None) - # make mode is evaluation model - self.model = model.eval() - self.criterion = criterion - self.device = device - - if data != None: - self.data = data - self.full_dataset = False - if not self.full_dataset: - self.inputs, self.targets = self.data - outputs = self.model(self.inputs) - loss = self.criterion(outputs, self.targets) - loss.backward(create_graph=True) - params, gradSH = calculate_params_gradients(self.model) - - self.params = params - self.gradSH = gradSH - - def calculate_trace(self, max_Iter=100, tolerance=1e-3): - """Compute the hessian trace based on Hutchinson algorithm - Args: - max_Inter: number of maximization iteration - tolerance: minimum relative tolerance for stopping the algorithm. - return: - avg_traces_lst: return hessian trace per layer for given model - """ - avg_traces_lst = [] - for (i_grad, i_param, (module_name, _)) in zip(self.gradSH, self.params, self.model.named_parameters()): - v = [torch.randint_like(i_param, high=2, device=self.device)] - for v_i in v: - v_i[v_i == 0] = -1 - i_v = v - trace_vhv = [] - trace = 0. - trace_pair = {"layer_name": " ", "trace": 0} - self.model.zero_grad() - for i in range(max_Iter): - hv = calculate_vector_product(i_grad, i_param, i_v) # hessian vector - trace_vhv_cur = calculate_inner_product(hv, v).cpu().item() # current point - trace_vhv.append(trace_vhv_cur) - difference = (np.mean(trace_vhv) - trace) / (abs(trace) + 1e-6) - if abs(difference) < tolerance: - avg_trace_vhv = np.mean(trace_vhv) - trace_pair["layer_name"] = module_name - trace_pair["trace"] = avg_trace_vhv - avg_traces_lst.append(trace_pair) - break - else: - trace = np.mean(trace_vhv) - return avg_traces_lst - - -class Hawq_top(): - """This class is a interface of hessian - """ - - def __init__(self, model, yaml_trace=None, yaml_cpu=None, dataloader=None) -> None: - self.dataloader = dataloader - if yaml_trace and yaml_cpu is not None: - with open(yaml_trace) as file: - params_config = yaml.load(file) - if params_config['loss'] == 'CrossEntropyLoss': - self.criterion = torch.nn.CrossEntropyLoss() - self.random_seed = params_config['random_seed'] - self.max_Iteration = params_config['max_Iteration'] - self.enable_op_fuse = params_config['enable_op_fuse'] - self.tolerance = float(params_config['tolerance']) - self.max_cal_sample = float(params_config['max_cal_smaple']) - self.quantize_mode = params_config['quantize_mode'] - with open(yaml_cpu, 'r') as file: - yaml_config = yaml.load(file) - str_dtype = (yaml_config[0]['precisions']['names']) - self.list_dtype = str_dtype.split(",") - else: - self.criterion = torch.nn.CrossEntropyLoss() - self.random_seed = 100 - self.max_Iteration = 100 - self.enable_op_fuse = True - self.tolerance = 1e-6 - self.max_cal_sample = 100 - self.quantize_mode = 'ptq' - self.list_dtype = ['int8', 'fp32'] - # logger.info("Current parameters config for Hutchinson’s algorithm as below:") - logger.info( - f"criterion:{self.criterion}| random_seed:{self.random_seed}| max_Iteration:self.max_Iteration| tolerance:{self.tolerance}") - # logger.info("criterion:", self.criterion, "| random_seed:", self.random_seed, "| max_Iteration:", - # self.max_Iteration, \ - # "| tolerance:", self.tolerance, "| en_op_fuse", self.enable_op_fuse, "| max_cal_sample:", - # self.max_cal_sample) - fix_seed(self.random_seed) - self.model = model - self.model.eval() - if self.enable_op_fuse: - self.model = fuse_fx(self.model) - - # model_tmp = copy.deepcopy(model) - # model_tmp.eval() - # self.model_fused = fuse_fx(model_tmp) - # self.model_fused.eval() - - def get_init_config(self) -> dict: - """ - """ - # Load a sample from dataloader to compute graident - inputs, targets = next(iter(self.dataloader)) - - with torch.enable_grad(): - # if self.enable_op_fuse: - # hawq_cmp = Hessian(self.model_fused, criterion=self.criterion, data=(inputs, targets)) - # else: - hawq_cmp = Hessian(self.model, criterion=self.criterion, data=(inputs, targets)) - avg_traces_lst = hawq_cmp.calculate_trace(max_Iter=self.max_Iteration, tolerance=self.tolerance) - - # fiter none weight layer and save weight layer to match perturbation computation - avg_traces_lst_weight = [] - for avg_trace_i in avg_traces_lst: - if 'weight' in avg_trace_i['layer_name']: - avg_traces_lst_weight.append(avg_trace_i) - # avg_traces_lst_sorted=sorted(avg_traces_lst,key=lambda x:x["trace"], reverse=True) - if self.quantize_mode == 'ptq': - # PTQ quantization - qconfig = get_default_qconfig("fbgemm") - qconfig_dict = {"": qconfig} # enable all layers/tensor to quantize - # calibrate - model_prepared = prepare_fx(self.model, qconfig_dict) - model_prepared = ptq_calibrate(model_prepared, data_loader=self.dataloader, num_cal=self.max_cal_sample) - model_prepared.cpu() - model_all_qnt = convert_fx(model_prepared) - # calculate perturbation - pertu_list = calculate_perturbation(model_fp32=self.model, model_qnt=model_all_qnt) - # calculate omiga - for omiga_i in pertu_list: - for avg_trace_i in avg_traces_lst: - if avg_trace_i['layer_name'] == omiga_i['layer_name']: - avg_trace_i['trace'] = avg_trace_i['trace'] * omiga_i['value'] - # for avg_trace_i, omiga_i in zip(avg_traces_lst_weight,pertu_list): - # omig_pair={"layer_name":" ", "value":0} - # omig_val=avg_trace_i['trace']*omiga_i['value'] - # omig_pair['layer_name']=avg_trace_i['layer_name'] - # omig_pair['value']=omig_val - # omig_list.append(omig_pair) - # omig_list_sorted=sorted(omig_list,key=lambda x:x['value'],reverse=True) - omig_list_sorted = sorted(avg_traces_lst, key=lambda x: x['trace'], reverse=True) - tune_init_config_pairs = [] - # - for i in omig_list_sorted: - tune_init_config_pair = {"op_name": '', "op_type": '', 'trace': 0} - if i['layer_name'] == omig_list_sorted[0]['layer_name']: - tune_init_config_pair['op_name'] = i['layer_name'] - tune_init_config_pair['op_type'] = self.list_dtype[-1] # setup as float op - tune_init_config_pair['trace'] = float(i['trace']) - else: - tune_init_config_pair['op_name'] = i['layer_name'] - tune_init_config_pair['op_type'] = self.list_dtype[0] - tune_init_config_pair['trace'] = float(i['trace']) - tune_init_config_pairs.append(tune_init_config_pair) - return tune_init_config_pairs diff --git a/test/strategy/test_hawq_wenhuach.py b/test/strategy/test_hawq_wenhuach.py deleted file mode 100644 index df70e32cd9e..00000000000 --- a/test/strategy/test_hawq_wenhuach.py +++ /dev/null @@ -1,152 +0,0 @@ -import torch -import unittest -import os -import sys -import copy -import torchvision -import torchvision.transforms as transforms -from torch.utils.data import DataLoader -from neural_compressor.data import DATASETS -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.adaptor.pytorch import TemplateAdaptor -from neural_compressor.adaptor import FRAMEWORKS -import shutil -from neural_compressor.strategy.st_utils.hawq_wenhuach import fix_seed -from torch.quantization.quantize_fx import fuse_fx -# fix_seed(1) - -def build_ptq_yaml(): - fake_yaml = ''' - model: - name: imagenet - framework: pytorch_fx - quantization: - calibration: - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: hawq - accuracy_criterion: - relative: -0.1 - random_seed: 9527 - exit_policy: - max_trials: 3 - workspace: - path: saved - ''' - with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: - f.write(fake_yaml) - -class TestPytorchAdaptor(unittest.TestCase): - framework_specific_info = {"device": "gpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "workspace_path": None} - framework = "pytorch" - adaptor = FRAMEWORKS[framework](framework_specific_info) - model = torchvision.models.resnet18() - - - # from collections import OrderedDict - # model = torch.nn.Sequential(OrderedDict([ - # ('conv1', torch.nn.Conv2d(3, 2, 1, 1)), - # ('conv2', torch.nn.Conv2d(2, 1, 1, 1)), - # ('flat', torch.nn.Flatten()), - # ])) - # model = torch.quantization.QuantWrapper(model) - - @classmethod - def setUpClass(self): - self.i = 0 - build_ptq_yaml() - - - @classmethod - def tearDownClass(self): - os.remove('ptq_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - - - - def test_run_hawq_one_trial(self): - def eval_func(model): - self.i -= 1 - return self.i - from neural_compressor.experimental import Quantization, common - model = copy.deepcopy(self.model) - model.eval() - # model = fuse_fx(model) - quantizer = Quantization('ptq_yaml.yaml') - quantizer.eval_func = eval_func - dataset = quantizer.dataset('dummy', (32, 3, 224, 224), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = model - quantizer() - -if __name__ == "__main__": - - unittest.main() - -# def build_hessian_trace(): -# hessian_trace_config_yaml = ''' -# loss: -# CrossEntropyLoss -# random_seed: -# 1 -# max_Iteration: -# 100 -# tolerance: -# 1e-3 -# enable_op_fuse: -# True -# max_cal_smaple: -# 100 -# quantize_mode: -# ptq -# ''' -# with open('./hessian_trace_config_yaml', 'w+', encoding="utf-8") as f: -# f.write(hessian_trace_config_yaml) -# -# -# class Test_hessian_trace(unittest.TestCase): -# # boot up test -# @classmethod -# def setUpClass(cls) -> None: -# build_hessian_trace() -# cls.model = torchvision.models.resnet18() -# -# # shotdown test -# @classmethod -# def tearDownClass(cls) -> None: -# os.remove('./hessian_trace_config_yaml') -# -# # one test case -# def test_run_hessian_trace(cls): -# """ -# hessian_trace_top -# Inputs: -# model: FP32 model -# dataloader: imagenet -# """ -# -# model = cls.model -# datasets = DATASETS('pytorch') -# dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) -# dummy_dataloader = PyTorchDataLoader(dummy_dataset) -# # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' -# # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) -# hessian_cmp = Hawq_top(model, yaml_cpu=None, yaml_trace=None, dataloader=dummy_dataloader) -# tuning_init_config = hessian_cmp.get_init_config() -# # print tuning init_config -# for i in tuning_init_config: -# print(i) - - -# if __name__ == "__main__": -# unittest.main() diff --git a/test/strategy/test_hessian_trace_inc.py b/test/strategy/test_hessian_trace_inc.py deleted file mode 100644 index 5285bc619c7..00000000000 --- a/test/strategy/test_hessian_trace_inc.py +++ /dev/null @@ -1,77 +0,0 @@ -import torch -import unittest -import os -import sys -import copy -import torchvision -import torchvision.transforms as transforms -from torch.utils.data import DataLoader -from neural_compressor.data import DATASETS -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.adaptor.pytorch import TemplateAdaptor -from neural_compressor.strategy.hawq_metric import Hawq_top -import random -import numpy as np -def fixed_seed(seed): - """Fixed rand seed to make sure results are same in different times on different devices.Eg CPU/GPU - Args: - seed: an integer number - return: None - """ - np.random.seed(seed) #random - random.seed(seed) - torch.manual_seed(seed) #cpu - torch.cuda.manual_seed_all(seed) #parallel cpu - torch.backends.cudnn.deterministic = True #make sure results are same on cpu/gpu - torch.backends.cudnn.benchmark = True #accelerator -fixed_seed(100) -def build_hessian_trace(): - hessian_trace_config_yaml=''' - loss: - CrossEntropyLoss - random_seed: - 1 - max_Iteration: - 100 - tolerance: - 1e-3 - enable_op_fuse: - True - max_cal_smaple: - 100 - quantize_mode: - ptq - ''' - with open('./hessian_trace_config_yaml','w+',encoding="utf-8") as f: - f.write(hessian_trace_config_yaml) -class Test_hessian_trace(unittest.TestCase): - #boot up test - @classmethod - def setUpClass(cls) -> None: - build_hessian_trace() - cls.model=torchvision.models.resnet18() - #shotdown test - @classmethod - def tearDownClass(cls) -> None: - os.remove('./hessian_trace_config_yaml') - #one test case - def test_run_hessian_trace(cls): - """ - hessian_trace_top - Inputs: - model: FP32 model - dataloader: imagenet - """ - model=cls.model - datasets = DATASETS('pytorch') - dummy_dataset = datasets['dummy'](shape=(200, 3, 224, 224), low=0., high=1., label=True) - dummy_dataloader = PyTorchDataLoader(dummy_dataset) - # yaml_cpu='/home/bfang1/Projects/HAWQ_INC/frameworks.ai.lpot.intel-lpot/neural_compressor/adaptor/pytorch_cpu.yaml' - # hessian_cmp=hawq_metric.Hawq_top(model,'./hessian_trace_config_yaml',yaml_cpu,dummy_dataloader) - hessian_cmp=Hawq_top(model,yaml_cpu=None,yaml_trace=None,dataloader=dummy_dataloader) - tuning_init_config=hessian_cmp.get_init_config() - #print tuning init_config - for i in tuning_init_config: - print(i) -if __name__ == "__main__": - unittest.main() \ No newline at end of file From 5b813ea52e4f5439a5d42ee62c77f63c5c8af185 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 30 Nov 2022 16:03:06 +0800 Subject: [PATCH 084/128] update ut Signed-off-by: yiliu30 --- test/strategy/test_basic_fallback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/strategy/test_basic_fallback.py b/test/strategy/test_basic_fallback.py index 352c81850c4..fef994a4f1b 100644 --- a/test/strategy/test_basic_fallback.py +++ b/test/strategy/test_basic_fallback.py @@ -20,7 +20,7 @@ def build_ptq_yaml(): framework: pytorch_fx tuning: strategy: - name: basic + name: hawq accuracy_criterion: absolute: -1 exit_policy: From 152774f8ceb87674248decbcf2bc13b5919a3428 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 30 Nov 2022 17:46:06 +0800 Subject: [PATCH 085/128] remove WA for hawq loss Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index dae5524c9ef..627d91e0d96 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -847,7 +847,7 @@ def percent_to_float(data): Optional('model_conversion'): model_conversion_schema, Optional('tuning', default={ - 'strategy': {'name': 'basic', 'loss': 'CrossEntropyLoss'}, # TODO move loss to appropriate position + 'strategy': {'name': 'basic'}, 'accuracy_criterion': {'relative': 0.01, 'higher_is_better': True}, 'objective': 'performance', 'exit_policy': {'timeout': 0, 'max_trials': 100, 'performance_only': False}, @@ -860,8 +860,7 @@ def percent_to_float(data): Optional('sigopt_project_id'): str, Optional('sigopt_experiment_name', default='nc-tune'): str, Optional('accuracy_weight', default=1.0): float, - Optional('latency_weight', default=1.0): float, - Optional('loss', default='CrossEntropyLoss'): str # TODO only for test, remove it before merge + Optional('latency_weight', default=1.0): float } , Hook('accuracy_criterion', handler=_valid_accuracy_field): object, Optional('accuracy_criterion', default={'relative': 0.01}): { From 5174c8027bbd79b98fdf8ac9889fe6e7eb6e921e Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 1 Dec 2022 09:04:25 +0800 Subject: [PATCH 086/128] remove hard code for baseline Signed-off-by: yiliu30 --- neural_compressor/strategy/auto_mixed_precision.py | 1 - neural_compressor/strategy/strategy.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 7fbd759a87e..4b59cf2cced 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -145,7 +145,6 @@ def traverse(self): if self.baseline is None and (self.eval_dataloader or self.eval_func): logger.info("Get FP32 model baseline.") self.baseline = self._evaluate(self.model) - self.baseline=[0.698,[700]] # record the FP32 baseline self._add_tuning_history() diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 58faa5d919a..63710b43264 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -219,8 +219,7 @@ def traverse(self): if self.baseline is None: logger.info("Get FP32 model baseline.") self._fp32_model = self.model - ##self.baseline = self._evaluate(self.model) - self.baseline = [0.698,[700]] + self.baseline = self._evaluate(self.model) # record the FP32 baseline self._add_tuning_history() self.show_baseline_info() From c9a16ae9247ddf99faa2ba5a7bd7e1e743aeae24 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Thu, 1 Dec 2022 15:38:37 +0800 Subject: [PATCH 087/128] add efficientnet_b0_fx model --- examples/.config/model_params_pytorch.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/examples/.config/model_params_pytorch.json b/examples/.config/model_params_pytorch.json index 848c1e9f0c6..d6e5e4f92ab 100644 --- a/examples/.config/model_params_pytorch.json +++ b/examples/.config/model_params_pytorch.json @@ -8,6 +8,15 @@ "strategy": "basic", "batch_size": 100, "new_benchmark": false + }, + "efficientnet_b0_fx": { + "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", + "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", + "input_model": "", + "yaml": "conf.yaml", + "strategy": "basic", + "batch_size": 100, + "new_benchmark": false }, "resnet18_fx": { "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", From a64c5707349e19f47634b1bbc126a1c8dbe3c4d5 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Thu, 1 Dec 2022 20:05:12 +0800 Subject: [PATCH 088/128] add act_qnt loss analysis --- neural_compressor/strategy/hawq.py | 164 +++++++++++++++++++++++++---- 1 file changed, 142 insertions(+), 22 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index c000def9440..241716bf293 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -28,10 +28,22 @@ from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST from torch.quantization.quantize_fx import fuse_fx - +import torch.nn.intrinsic.quantized as nniq +from torch.fx import symbolic_trace, graph_module +import torch.nn as nn +import logging +logger = logging.getLogger(__name__) from typing import Dict, List, Optional, Any, Union, Callable, Set - - +# Define Collector based on hook, which is used to record the intermediate result +class Node_collector: + def __init__(self, m): + self.handle = m.register_forward_hook(self.hook_fn_act) + def hook_fn_act(self, m, inp, outp): + self.out_features = outp.clone() + self.in_features = inp + self.m = m + def remove(self): + self.handle.remove() class HessianTrace: """ please refer to @@ -40,11 +52,10 @@ class HessianTrace: https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py """ - def __init__(self, model, dataloader, criterion=None): + def __init__(self, model, dataloader,q_model,criterion=None): self.unfused_model = model.model - + self.q_model=q_model self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused - self.dataloader = dataloader self.max_iter = 500 self.tolerance = 1e-5 @@ -78,7 +89,22 @@ def mapping_module_to_op(self, name): # return name # else: return name - + def mse_metric_gap(self,fp32_tensor, dequantize_tensor): + """Calculate the euclidean distance between fp32 tensor and int8 dequantize tensor + Args: + fp32_tensor (tensor): The FP32 tensor. + dequantize_tensor (tensor): The INT8 dequantize tensor. + """ + fp32_max = np.max(fp32_tensor) + fp32_min = np.min(fp32_tensor) + dequantize_max = np.max(dequantize_tensor) + dequantize_min = np.min(dequantize_tensor) + fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) + dequantize_tensor = (dequantize_tensor - dequantize_min) / \ + (dequantize_max - dequantize_min) + diff_tensor = fp32_tensor - dequantize_tensor + euclidean_dist = np.sum(diff_tensor ** 2) + return euclidean_dist / fp32_tensor.size def get_fused_mapping(self): model = self.model weights_info = dict(model.named_parameters()) @@ -255,7 +281,6 @@ def get_weight_traces(self, num_samples): op_name = self.weight_to_op[weight_name] op_name_to_trace[op_name] = weight_name_to_traces[weight_name] return op_name_to_trace - def get_act_traces(self, num_samples): unfused_training = self.unfused_model.training self.unfused_model.eval() @@ -318,19 +343,100 @@ def get_act_traces(self, num_samples): self.layer_acts = [] self.layer_acts_grads = [] - return act_traces - + return res_dict + def insert_hook(self, model, target_module_list): + intern_outputs = [] + for layer,module in model.named_modules(): + for target_module in target_module_list: + # print("layer:",layer) + # print("target_model:",target_module) + if layer == target_module: + logging.debug("Collect: %s" % (module)) + # print("Collect: %s" % (module)) + intern_outputs.append(Node_collector(module)) + + logging.info("Total %d hook inserted" % (len(intern_outputs))) + # print("Total %d hook inserted" % (len(intern_outputs))) + return model, intern_outputs + def insert_hook_quantize(self,model, target_module_list): + intern_outputs = [] + for layer,module in model.named_modules(): + for target_module in target_module_list: + # print("layer:",layer) + length = len("_model.") + new_key = layer[length:] + # print("target_model:",target_module) + if new_key == target_module: + logging.debug("Collect: %s" % (module)) + # print("Collect: %s" % (module)) + intern_outputs.append(Node_collector(module)) + logging.info("Total %d hook inserted" % (len(intern_outputs))) + # print("Total %d hook inserted" % (len(intern_outputs))) + return model, intern_outputs + def get_act_gap(self,fp32_model,q_model): + """ + Estimates each activation gap between quantized model and float model + """ + self.handle_acts=[] + fp32_model.eval() + # temp_model = fuse_fx(fp32_model.model) + temp_model=fp32_model + # target_module_list = [nn.ReLU] # Insert hook for FP32 model + target_module_list = self.op_list + temp_model, intern_outputs =self.insert_hook(temp_model, target_module_list) + # intern_outputs={} + for input, target in self.dataloader: + temp_model(input) + break + + fp32_act_out={} + for i, intern_output in enumerate(intern_outputs): + stat_features = intern_output.out_features.view(-1) + # print ("No.", i, " ", intern_output.out_features.shape) + # print ("Numpy No.", i, " ", intern_output.out_features.cpu().data.numpy().shape) + # print ("No.", i, " ", stat_features.cpu().data.numpy().shape) + # print ("Numpy No.", i, " ", stat_features.cpu().data.numpy()) + fp32_act_out[target_module_list[i]]=stat_features.cpu().data.numpy() + # break + for i in intern_outputs: + # print(i) + i.remove() + target_module_list = self.op_list + q_model, intern_outputs=self.insert_hook_quantize(q_model, target_module_list) + for input, target in self.dataloader: #only one sample + q_model(input) + break + qnt_act_out={} + intern_outputs={} + for i, intern_output in enumerate(intern_outputs): + stat_features = intern_output.out_features.view(-1) + qnt_act_out[target_module_list[i]]=stat_features.dequantize().cpu().data.numpy() + # break + for i in intern_outputs: + # print(i) + i.remove() + act_gap={} + mse_gap={} + for fp_i,int_i in zip(fp32_act_out,qnt_act_out): + activation_qnt_error=fp32_act_out[fp_i]-qnt_act_out[int_i] + mse_gap[fp_i]=self.mse_metric_gap(fp32_act_out[fp_i],qnt_act_out[int_i]) + act_gap[fp_i]=np.sum(activation_qnt_error)/activation_qnt_error.size + return act_gap,mse_gap def get_avg_traces(self, enable_act=True, num_samples=32): """ Estimates average hessian trace for each parameter """ - assert num_samples > 0 traces = {} weight_traces = self.get_weight_traces(num_samples) traces['weight'] = weight_traces + act_trace={} if enable_act: + act_gap,mse_gap=self.get_act_gap(self.model,self.q_model) act_traces = self.get_act_traces(num_samples) + for i,j in zip(act_traces,mse_gap): + #currently use mse to analysis + act_trace[i]=act_traces[i]+mse_gap[j] traces['activation'] = act_traces return traces @@ -536,22 +642,21 @@ def next_tune_cfg(self): quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - target_dtype = "fp32" ##TODO support bf16 + target_dtype = "int8" ##TODO support bf16 target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) fp_op_list = [item.name for item in quant_ops if item in target_type_lst] # for n, p in self._fp32_model.named_modules(): # print(n) # for n, p in self._fp32_model.named_parameters(): # print(n) - orig_eval = True if self._fp32_model.training: orig_eval = False self._fp32_model.eval() - ht = HessianTrace(self._fp32_model, self.calib_dataloader) - - q_model_state_dict = { - } + import copy + temp_q_model=copy.deepcopy(self.q_model) + ht = HessianTrace(self._fp32_model, self.calib_dataloader,temp_q_model) + q_model_state_dict = {} for key in self.q_model.state_dict().keys(): length = len("_model.") new_key = key[length:] @@ -564,24 +669,39 @@ def next_tune_cfg(self): op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 pertur_lst[key] = diff_l2 - traces = ht.get_avg_traces(enable_act=False) + traces = ht.get_avg_traces(enable_act=True) op_to_traces = traces['weight'] - for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): - op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 + act_to_traces=traces['activation'] + # print("act_to_traces:",act_to_traces) + #TODO() optimize relationship of weights quantized loss and activation quantized loss, to find best conbine + #TODO() do double check why layer1's output is not 0 for activation quantized + for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] #Formula:Omig=Trace*L2+act_trace + # for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): + # op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 if orig_eval == False: self._fp32_model.train() ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) # WA for add op type - print("ordered_ops:",ordered_ops) + # print("ordered_ops:",ordered_ops) op_info_map = {} for op_info in list(initial_op_tuning_cfg.keys()): op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops] op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops))) + indx=0 + #defautly fallback 5 ops + for i in op_dtypes.keys(): + op_dtypes[i]="fp32" + indx=indx+1 + if indx>4: + break + print(op_dtypes) + logger.info("hawq op_config:"+str(op_dtypes)) logger.info(f"Start to accumulate fallback to {target_dtype}.") - + initial_op_tuning_cfg = deepcopy(op_tuning_cfg) fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], initial_op_tuning_cfg=op_tuning_cfg, op_dtypes=op_dtypes, accumulate=True) From 81e04d5cf001dc1c7e00923f429a555fe55d4a5c Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 2 Dec 2022 09:19:36 +0800 Subject: [PATCH 089/128] comment some hard code for acc --- .../efficientnet/quantization/ptq/eager/run_tuning.sh | 3 ++- .../quantization/ptq/cpu/eager/run_tuning.sh | 7 ++++--- .../quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh | 7 ++++--- .../quantization/ptq/cpu/fx/run_tuning.sh | 7 ++++--- .../quantization/ptq/gpu/eager/run_tuning.sh | 7 ++++--- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/examples/pytorch/image_recognition/efficientnet/quantization/ptq/eager/run_tuning.sh b/examples/pytorch/image_recognition/efficientnet/quantization/ptq/eager/run_tuning.sh index c5c764b7155..588ec872406 100644 --- a/examples/pytorch/image_recognition/efficientnet/quantization/ptq/eager/run_tuning.sh +++ b/examples/pytorch/image_recognition/efficientnet/quantization/ptq/eager/run_tuning.sh @@ -41,7 +41,8 @@ function run_tuning { conf_yaml=conf_efficientnet_b0.yaml elif [ "${topology}" = "mobilenetv3_rw" ]; then conf_yaml=conf_mobilenetv3_rw.yaml - sed -i "/relative:/s|relative:.*|relative: 0.02|g" $conf_yaml + # TODO only for test, uncomment it before merge + # sed -i "/relative:/s|relative:.*|relative: 0.02|g" $conf_yaml fi sed -i "/\/path\/to\/calibration\/dataset/s|root:.*|root: $dataset_location/train|g" $conf_yaml sed -i "/\/path\/to\/evaluation\/dataset/s|root:.*|root: $dataset_location/val|g" $conf_yaml diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning.sh index 2f930ad1470..7752585ddb5 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning.sh +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning.sh @@ -37,9 +37,10 @@ function init_params { # run_tuning function run_tuning { - if [ "mobilenet_v2" = "$topology" ];then - sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml - fi + # TODO only for test, uncomment it before merge + # if [ "mobilenet_v2" = "$topology" ];then + # sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml + # fi extra_cmd="" if [ -n "$output_model" ];then extra_cmd = $extra_cmd"--tuned_checkpoint ${output_model}" diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh index 02f968d7d23..3c45fe25a32 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh @@ -39,9 +39,10 @@ function init_params { function run_tuning { sed -i "/\/path\/to\/calibration\/dataset/s|root:.*|root: $dataset_location/train|g" conf_dump_tensors.yaml sed -i "/\/path\/to\/evaluation\/dataset/s|root:.*|root: $dataset_location/val|g" conf_dump_tensors.yaml - if [ "mobilenet_v2" = "$topology" ];then - sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf_dump_tensors.yaml - fi + # TODO only for test, uncomment it before merge + # if [ "mobilenet_v2" = "$topology" ];then + # sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf_dump_tensors.yaml + # fi extra_cmd="" if [ -n "$output_model" ];then diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_tuning.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_tuning.sh index 054d4389d9c..eaa81d6e85c 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_tuning.sh +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_tuning.sh @@ -37,9 +37,10 @@ function init_params { # run_tuning function run_tuning { - if [ "mobilenet_v2" = "$topology" ];then - sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml - fi + # TODO only for test, uncomment it before merge + # if [ "mobilenet_v2" = "$topology" ];then + # sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml + # fi extra_cmd="" if [ -n "$output_model" ];then extra_cmd = $extra_cmd"--tuned_checkpoint ${output_model}" diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/gpu/eager/run_tuning.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/gpu/eager/run_tuning.sh index 3a272f7e8eb..a4460264ee2 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/gpu/eager/run_tuning.sh +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/gpu/eager/run_tuning.sh @@ -39,9 +39,10 @@ function init_params { function run_tuning { sed -i "/\/path\/to\/calibration\/dataset/s|root:.*|root: $dataset_location/train|g" conf.yaml sed -i "/\/path\/to\/evaluation\/dataset/s|root:.*|root: $dataset_location/val|g" conf.yaml - if [ "mobilenet_v2" = "$topology" ];then - sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml - fi + # TODO only for test, uncomment it before merge + # if [ "mobilenet_v2" = "$topology" ];then + # sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml + # fi extra_cmd="${dataset_location}" From d7f051178590abc9a3f358a66e8899ccbaa86643 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA <108742533+BiaoFangAIA@users.noreply.github.com> Date: Fri, 2 Dec 2022 14:30:46 +0800 Subject: [PATCH 090/128] setting as disable act qnt loss analysis add check fused model feature --- neural_compressor/strategy/hawq.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 241716bf293..3397cacdfcf 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -55,7 +55,13 @@ class HessianTrace: def __init__(self, model, dataloader,q_model,criterion=None): self.unfused_model = model.model self.q_model=q_model - self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused + tmp_model=model.model + if 'graph' in (str(dir(tmp_model))): #check the attribute and it's length + logger.info("This is aready fused model") + self.model=model.model + else: + logger.info("fusing model") + self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused self.dataloader = dataloader self.max_iter = 500 self.tolerance = 1e-5 @@ -654,14 +660,12 @@ def next_tune_cfg(self): orig_eval = False self._fp32_model.eval() import copy - temp_q_model=copy.deepcopy(self.q_model) - ht = HessianTrace(self._fp32_model, self.calib_dataloader,temp_q_model) + ht = HessianTrace(self._fp32_model, self.calib_dataloader,self.q_model) q_model_state_dict = {} for key in self.q_model.state_dict().keys(): length = len("_model.") new_key = key[length:] q_model_state_dict[new_key] = self.q_model.state_dict()[key] - weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) pertur_lst = {} for key in weight_quant_loss: @@ -669,16 +673,18 @@ def next_tune_cfg(self): op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 pertur_lst[key] = diff_l2 - traces = ht.get_avg_traces(enable_act=True) + self.enable_act=False #enable activation trace and quantization loss analysis feature + traces = ht.get_avg_traces(self.enable_act) op_to_traces = traces['weight'] - act_to_traces=traces['activation'] - # print("act_to_traces:",act_to_traces) + if self.enable_act: + act_to_traces=traces['activation'] #TODO() optimize relationship of weights quantized loss and activation quantized loss, to find best conbine #TODO() do double check why layer1's output is not 0 for activation quantized - for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): - op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] #Formula:Omig=Trace*L2+act_trace - # for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): - # op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 + for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] #Formula:Omig=Trace*L2+act_trace + else: + for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 if orig_eval == False: self._fp32_model.train() ordered_ops = sorted(op_to_traces.keys(), From 8a48f849f9b8dda0eb44a342f3887948ef43ef01 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 09:12:36 +0800 Subject: [PATCH 091/128] aligned the interface between adaptor and strategy Signed-off-by: yiliu30 --- neural_compressor/adaptor/pytorch.py | 19 +++++++++++++++++++ neural_compressor/strategy/hawq.py | 10 ++++++++++ 2 files changed, 29 insertions(+) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 06245b4fb0d..3589b65aca1 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -1094,6 +1094,25 @@ def is_fused_module(self, module): else: return False + def calculate_hessian_trace(fp32_model, + dataloader, + q_model, + criterion = torch.nn.CrossEntropyLoss(), + enable_act = False): + """Calculate hessian trace. + + Args: + fp32_model: The original fp32 model. + criterion: The loss function for calculate the hessian trace. # loss = criterion(output, target) + dataloader: The dataloader for calculate the gradient. + q_model: The INT8 AMAP model. + enable_act: Enabling quantization error or not. + + Return: + hessian_trace(Dict[Tuple, float]), key: (op_name, op_type); value: hessian trace. + """ + pass + unify_op_type_mapping = { "ConvReLU2d": "Conv2d", diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 3397cacdfcf..013e45ece32 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -638,6 +638,8 @@ def next_tune_cfg(self): break op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg + + # Start compute the hessian trace # import torch.quantization._numeric_suite as ns # self.model.eval() @@ -687,6 +689,14 @@ def next_tune_cfg(self): op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 if orig_eval == False: self._fp32_model.train() + + # End compute the hessian trace + # # TODO uncomment it when algo ready. + # op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, + # dataloader = self.calib_dataloader, + # q_model = self.q_model, + # criterion = torch.nn.CrossEntropyLoss(), # TODO replace it with user specify loss + # enable_act = False) ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) From 895cc207ef0741667e4695ba326c96d2c40a71da Mon Sep 17 00:00:00 2001 From: BiaoFangAIA Date: Tue, 6 Dec 2022 15:22:04 +0800 Subject: [PATCH 092/128] add hawq metric logical --- .../adaptor/torch_utils/hawq_metric.py | 579 ++++++++++++++++++ 1 file changed, 579 insertions(+) create mode 100644 neural_compressor/adaptor/torch_utils/hawq_metric.py diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py new file mode 100644 index 00000000000..465b7f9ca88 --- /dev/null +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -0,0 +1,579 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import numpy as np +from collections import OrderedDict +import torch.nn +from torch.quantization.quantize_fx import fuse_fx +import torch.nn.intrinsic.quantized as nniq +from torch.fx import symbolic_trace, graph_module +import torch.nn as nn +import logging +logger = logging.getLogger(__name__) +from typing import Dict, List, Optional, Any, Union, Callable, Set +# Define Collector based on hook, which is used to record the intermediate result +class Node_collector: + def __init__(self, m): + self.handle = m.register_forward_hook(self.hook_fn_act) + def hook_fn_act(self, m, inp, outp): + self.out_features = outp.clone() + self.in_features = inp + self.m = m + def remove(self): + self.handle.remove() +class HessianTrace: + """ + please refer to + Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." 2020 IEEE international conference on big data (Big data). IEEE, 2020. + Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." Advances in neural information processing systems 33 (2020): 18518-18529. + https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py + """ + + def __init__(self, model, dataloader,q_model,criterion=None): + self.unfused_model = model.model + self.q_model=q_model + tmp_model=model.model + if 'graph' in (str(dir(tmp_model))): #check the attribute and it's length + logger.info("This is aready fused model") + self.model=model.model + else: + logger.info("fusing model") + self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused + self.dataloader = dataloader + self.max_iter = 500 + self.tolerance = 1e-5 + self.eps = 1e-6 + self.index = 0 + self.device = self.get_device(self.model) + self.criterion = criterion + if self.criterion == None: + self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config + self.criterion = self.criterion.to(self.device) + self.weight_to_op, self.op_list = self.get_fused_mapping() + self.get_params() + + def is_fused_module(self, module): + """This is a helper function for `_propagate_qconfig_helper` to detecte + if this module is fused. + Args: + module (object): input module + Returns: + (bool): is fused or not + """ + op_type = str(type(module)) + if 'fused' in op_type: + return True + else: + return False + + def mapping_module_to_op(self, name): + # length = len("_model.") + # if len(name) < length: + # return name + # else: + return name + def mse_metric_gap(self,fp32_tensor, dequantize_tensor): + """Calculate the euclidean distance between fp32 tensor and int8 dequantize tensor + Args: + fp32_tensor (tensor): The FP32 tensor. + dequantize_tensor (tensor): The INT8 dequantize tensor. + """ + fp32_max = np.max(fp32_tensor) + fp32_min = np.min(fp32_tensor) + dequantize_max = np.max(dequantize_tensor) + dequantize_min = np.min(dequantize_tensor) + fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) + dequantize_tensor = (dequantize_tensor - dequantize_min) / \ + (dequantize_max - dequantize_min) + diff_tensor = fp32_tensor - dequantize_tensor + euclidean_dist = np.sum(diff_tensor ** 2) + return euclidean_dist / fp32_tensor.size + def get_fused_mapping(self): + model = self.model + weights_info = dict(model.named_parameters()) + weight_to_op = {} + for op_name, child in model.named_modules(): + if self.is_fused_module(child): + for name, _ in child.named_children(): + if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right + + weight_to_op[op_name + "." + name + ".weight"] = self.mapping_module_to_op(op_name) + break + else: + name = op_name + ".weight" + if name in weights_info and name not in weight_to_op.keys(): + weight_to_op[op_name + ".weight"] = op_name + op_list = [] + for key in weight_to_op.keys(): + op_list.append(weight_to_op[key]) + return weight_to_op, op_list + + def get_device(self, model: torch.nn.Module): + for n, p in model.named_parameters(): + return p.data.device + + def _get_act_grad_hook(self, name): + def act_grad_hook(model, grad_input, grad_output): + ##print(name, grad_input[0].shape, grad_output[0].shape) + if type(model) == torch.nn.Linear: ##TODO very tricky + self.layer_acts_grads[name] = grad_input[1] + else: + self.layer_acts_grads[name] = grad_input[0] + + return act_grad_hook + + def _get_enable_act_grad_hook(self, name): + def enable_act_grad_hook(model, inputs, outputs): + input = inputs[0] + if input.requires_grad is False: + input.requires_grad = True + self.layer_acts[name] = input + + return enable_act_grad_hook + + # def _get_disable_input_grad_hook(self, name): + # def disable_input_grad_hook(model, inputs, outputs): + # try: + # input = inputs[0] ##TODO check whether this is right + # except: + # input = inputs + # if input.is_leaf == False:## you can only change requires_grad flags of leaf variables + # if input.requires_grad is True: + # input.requires_grad = False + # + # + # return disable_input_grad_hook + + def _unregister_hook(self): + for handel in self.hook_handles: + handel.remove() + + def register_act_grad_hooks(self, model): + for name, module in model.named_modules(): + if self.mapping_module_to_op(name) in self.op_list: + hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) + self.hook_handles.append(hook_handle) + hook_handle = module.register_backward_hook(self._get_act_grad_hook(name)) + self.hook_handles.append(hook_handle) + + def reset_act_gradient_and_hooks(self): + # tmp_input = torch.zeros(self._input_shape, device=self.device) + # for name, module in self.model.named_modules(): + # if name in self.op_list: + # hook_handle = module.register_forward_hook(self._get_disable_input_grad_hook(name)) + # self.hook_handles.append(hook_handle) + # self.model(tmp_input) + self._unregister_hook() + + def get_params(self): + weight_names = [n for n, p in self.model.named_parameters() if + p.requires_grad and "bias" not in n] ##remove bias + params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias + self.weight_names = weight_names + self.params = params + + def forward_backward(self, model, data, create_graph=False, return_w_grad=True): + model.zero_grad() + input = data[0].to(self.device) + ##self._input_shape = input.shape ## for resetting input activation + target = data[1].to(self.device) + input.requires_grad = True + output = model(input) + loss = self.criterion(output, target) + torch.autograd.backward(loss, create_graph=create_graph) + ##loss.backward(create_graph=create_graph) + if return_w_grad: + gradients = [] + for n, p in self.model.named_parameters(): + if p.grad != None and n in self.weight_names: + gradient = p.grad + gradients.append(gradient + 0.0) ## add 0 to create a copy + model.zero_grad() + return gradients + else: + model.zero_grad() + + # def get_params(self, model): + # parameters = [p for p in model.parameters() if p.requires_grad] + # return parameters + + def sample_rademacher(self, params): + samples = [] + for param in params: + r = torch.randint_like(param, high=2, device=self.device) + r.masked_fill_(r == 0, -1) + samples.append(r) + return samples + + def get_vtHv_weight(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + batch_size = data[0].shape[0] + cnt += batch_size + gradients = self.forward_backward(self.model, data, create_graph=True) + H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) + H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] + if cnt >= num_samples: + break + if cnt > 0: + H_v = [item / cnt for item in H_v] + v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better + return v_t_H_v + + def get_vtHv_act(self, params, num_samples): + v = self.sample_rademacher(params) + H_v = [0] * len(v) + cnt = 0 + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 + input = data[0][i:i + 1] + target = data[1][i:i + 1] + + self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) + layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] + hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) + cnt += 1 + if cnt >= num_samples: + break + + def get_weight_traces(self, num_samples): + layer_traces_per_iter = [] + prev_avg_model_trace = 0 + for iter in range(self.max_iter): + layer_traces = self.get_vtHv_weight(self.params, num_samples) + layer_traces_per_iter.append(layer_traces) + layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) + model_trace = torch.sum(layer_traces_estimate) + diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) + if diff_ratio < self.tolerance and iter > 10: ##TODO magic number + break + if iter == 50: ##TODO for debug + break + prev_avg_model_trace = model_trace + weight_name_to_traces = {} + layer_traces = layer_traces_estimate + for weight_name, trace in zip(self.weight_names, layer_traces): + weight_name_to_traces[weight_name] = trace + op_name_to_trace = {} + for weight_name in self.weight_names: + op_name = self.weight_to_op[weight_name] + op_name_to_trace[op_name] = weight_name_to_traces[weight_name] + return op_name_to_trace + def get_act_traces(self, num_samples): + unfused_training = self.unfused_model.training + self.unfused_model.eval() + self.hook_handles = [] + self.layer_acts = {} + self.layer_acts_grads = {} + self.register_act_grad_hooks(self.unfused_model) + cnt = 0 + act_traces_per_sample = [] + for step, data in enumerate(self.dataloader): + if cnt >= num_samples: + break + bs = data[0].shape[0] + act_traces_sum = 0 + act_traces_per_iter = [] + prev_avg_model_trace = 0 + act_traces_sums = None + for i in range(bs): ##force the bs to be one + input = data[0][i:i + 1] + target = data[1][i:i + 1] + self.forward_backward(self.unfused_model, (input, target), create_graph=True, return_w_grad=False) + acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + if act_traces_sums == None: + act_traces_sums = [0] * len(acts) + acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts + vt_H_v_sum_per_act = [0] * len(acts) + + prev_model_act_trace = 0 + for iter in range(self.max_iter): + v = self.sample_rademacher(acts) + H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=True) + vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] + + vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in + enumerate(vt_H_v_sum_per_act)] + vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] + current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) + + diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( + prev_model_act_trace + self.eps) + if diff_ratio < self.tolerance and iter > 10: ##TODO magic number + break + if iter == 50: ##TODO for debug + break + + prev_model_act_trace = current_model_act_trace + act_traces_per_sample.append(vt_H_v_mean_per_act) + cnt += 1 + if cnt >= num_samples: + break + + if unfused_training: + self.unfused_model.train() + self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False + act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) + act_traces = torch.mean(act_traces_stack, dim=0) + res_dict = {} + for index, key in enumerate(self.layer_acts.keys()): + res_dict[key] = act_traces[index] + + self.layer_acts = [] + self.layer_acts_grads = [] + return res_dict + def insert_hook(self, model, target_module_list): + intern_outputs = [] + for layer,module in model.named_modules(): + for target_module in target_module_list: + # print("layer:",layer) + # print("target_model:",target_module) + if layer == target_module: + logging.debug("Collect: %s" % (module)) + # print("Collect: %s" % (module)) + intern_outputs.append(Node_collector(module)) + + logging.info("Total %d hook inserted" % (len(intern_outputs))) + # print("Total %d hook inserted" % (len(intern_outputs))) + return model, intern_outputs + def insert_hook_quantize(self,model, target_module_list): + intern_outputs = [] + for layer,module in model.named_modules(): + for target_module in target_module_list: + # print("layer:",layer) + length = len("_model.") + new_key = layer[length:] + # print("target_model:",target_module) + if new_key == target_module: + logging.debug("Collect: %s" % (module)) + # print("Collect: %s" % (module)) + intern_outputs.append(Node_collector(module)) + logging.info("Total %d hook inserted" % (len(intern_outputs))) + # print("Total %d hook inserted" % (len(intern_outputs))) + return model, intern_outputs + def get_act_gap(self,fp32_model,q_model): + """ + Estimates each activation gap between quantized model and float model + """ + self.handle_acts=[] + fp32_model.eval() + # temp_model = fuse_fx(fp32_model.model) + temp_model=fp32_model + # target_module_list = [nn.ReLU] # Insert hook for FP32 model + target_module_list = self.op_list + temp_model, intern_outputs =self.insert_hook(temp_model, target_module_list) + # intern_outputs={} + for input, target in self.dataloader: + temp_model(input) + break + + fp32_act_out={} + for i, intern_output in enumerate(intern_outputs): + stat_features = intern_output.out_features.view(-1) + # print ("No.", i, " ", intern_output.out_features.shape) + # print ("Numpy No.", i, " ", intern_output.out_features.cpu().data.numpy().shape) + # print ("No.", i, " ", stat_features.cpu().data.numpy().shape) + # print ("Numpy No.", i, " ", stat_features.cpu().data.numpy()) + fp32_act_out[target_module_list[i]]=stat_features.cpu().data.numpy() + # break + for i in intern_outputs: + # print(i) + i.remove() + target_module_list = self.op_list + q_model, intern_outputs=self.insert_hook_quantize(q_model, target_module_list) + for input, target in self.dataloader: #only one sample + q_model(input) + break + qnt_act_out={} + intern_outputs={} + for i, intern_output in enumerate(intern_outputs): + stat_features = intern_output.out_features.view(-1) + qnt_act_out[target_module_list[i]]=stat_features.dequantize().cpu().data.numpy() + # break + for i in intern_outputs: + # print(i) + i.remove() + act_gap={} + mse_gap={} + for fp_i,int_i in zip(fp32_act_out,qnt_act_out): + activation_qnt_error=fp32_act_out[fp_i]-qnt_act_out[int_i] + mse_gap[fp_i]=self.mse_metric_gap(fp32_act_out[fp_i],qnt_act_out[int_i]) + act_gap[fp_i]=np.sum(activation_qnt_error)/activation_qnt_error.size + return act_gap,mse_gap + def get_avg_traces(self, enable_act=True, num_samples=32): + """ + Estimates average hessian trace for each parameter + """ + assert num_samples > 0 + traces = {} + weight_traces = self.get_weight_traces(num_samples) + traces['weight'] = weight_traces + act_trace={} + if enable_act: + act_gap,mse_gap=self.get_act_gap(self.model,self.q_model) + act_traces = self.get_act_traces(num_samples) + for i,j in zip(act_traces,mse_gap): + #currently use mse to analysis + act_trace[i]=act_traces[i]+mse_gap[j] + traces['activation'] = act_traces + return traces + + +##copy from torch.quantization._numeric_suite +def _find_match( + str_list: Union[Dict[str, Any], List[str]], key_str: str, + postfix: str, +) -> Optional[str]: + split_str = key_str.split(".") + if split_str[-1] == postfix: + match_string = "".join(key_str.split(".")[0:-1]) + for s2 in str_list: + pattern1 = "".join(s2.split(".")[0:-1]) + pattern2 = "".join(s2.split(".")[0:-2]) + if match_string == pattern1: + return s2 + if match_string == pattern2: + return s2 + + # For matching "fc.weight" and "fc._packed_params._packed_params" + if postfix == "_packed_params": + match_string = "".join(key_str.split(".")[0:-2]) + if len(match_string) == 0: + return None + for s2 in str_list: + pattern1 = "".join(s2.split(".")[0:-1]) + pattern2 = "".join(s2.split(".")[0:-2]) + if match_string == pattern1: + return s2 + if match_string == pattern2: + return s2 + return None + else: + return None + + +##copy form torch.quantization._numeric_suite +def compare_weights( + float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] +) -> Dict[str, Dict[str, torch.Tensor]]: + r"""Compare the weights of the float module with its corresponding quantized + module. Return a dict with key corresponding to module names and each entry being + a dictionary with two keys 'float' and 'quantized', containing the float and + quantized weights. This dict can be used to compare and compute the quantization + error of the weights of float and quantized models. + + Example usage:: + + wt_compare_dict = compare_weights( + float_model.state_dict(), qmodel.state_dict()) + for key in wt_compare_dict: + print( + key, + compute_error( + wt_compare_dict[key]['float'], + wt_compare_dict[key]['quantized'].dequantize() + ) + ) + + Args: + float_dict: state dict of the float model + quantized_dict: state dict of the quantized model + + Return: + weight_dict: dict with key corresponding to module names and each entry being + a dictionary with two keys 'float' and 'quantized', containing the float and + quantized weights + """ + + weight_dict: Dict[str, Dict] = {} + for key in quantized_dict: + match_key = _find_match(float_dict, key, "weight") + if match_key is not None: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[match_key] + weight_dict[key]["quantized"] = quantized_dict[key] + continue + + # For matching "fc.weight" and "fc._packed_params._packed_params" + match_key = _find_match(float_dict, key, "_packed_params") + if match_key is not None: + weight_dict[match_key] = {} + weight_dict[match_key]["float"] = float_dict[match_key] + weight_dict[match_key]["quantized"] = quantized_dict[key][0] + ##TODO:should consider more models in further work + + # For LSTM + split_str = key.split(".") + if split_str[-1] == "param" and split_str[-3] == "_all_weight_values": + layer = split_str[-2] + module_name = ".".join(split_str[:-3]) + float_weight_ih_key = module_name + ".weight_ih_l" + layer + float_weight_hh_key = module_name + ".weight_hh_l" + layer + if float_weight_ih_key in float_dict and float_weight_hh_key in float_dict: + weight_dict[key] = {} + weight_dict[key]["float"] = float_dict[float_weight_ih_key] + weight_dict[key]["quantized"] = ( + quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] + ) + weight_dict[key]["float"] = float_dict[float_weight_hh_key] + weight_dict[key]["quantized"] = ( + quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] + ) + + return weight_dict +# op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, + # dataloader = self.calib_dataloader, + # q_model = self.q_model, + # criterion = torch.nn.CrossEntropyLoss(), # TODO replace it with user specify loss + # enable_act = False) +def hawq_top(fp32_model,q_model,dataloader,criterion,enable_act): + orig_eval=True + if fp32_model.training: + orig_eval=False + ht=HessianTrace(fp32_model,dataloader=dataloader,q_model=q_model) + q_model_state_dict={} + for key in q_model.state_dict().keys(): + length=len("_model.") + new_key=key[length:] + q_model_state_dict[new_key]=q_model.state_dict()[key] + weight_quant_loss=compare_weights(ht.model.state_dict(),q_model_state_dict) + pertur_lst={} + for key in weight_quant_loss: + op_float_tensor=weight_quant_loss[key]['float'] + op_qnt_tensor=weight_quant_loss[key]['quantized'].dequantize() + diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) + pertur_lst[key]=diff_l2 + traces=ht.get_act_traces(enable_act) + op_to_traces=traces['weight'] + if enable_act: + act_to_traces=traces['activation'] + for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] #Formula:Omig=Trace*L2+act_trace + else: + for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 + if orig_eval==False: + fp32_model.train() + return op_to_traces + + \ No newline at end of file From cb8fd30160588faaadbfb19566f1715573123c84 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA Date: Tue, 6 Dec 2022 15:25:24 +0800 Subject: [PATCH 093/128] add call hawq function --- neural_compressor/adaptor/pytorch.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 3589b65aca1..44392313d80 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -29,7 +29,7 @@ from ..utils import logger from .query import QueryBackendCapability from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader - +from .torch_utils.hawq_metric import hawq_top torch = LazyImport("torch") json = LazyImport("json") @@ -1094,11 +1094,13 @@ def is_fused_module(self, module): else: return False - def calculate_hessian_trace(fp32_model, + def calculate_hessian_trace( + fp32_model, dataloader, q_model, - criterion = torch.nn.CrossEntropyLoss(), - enable_act = False): + criterion, + enable_act = False + ): """Calculate hessian trace. Args: @@ -1111,6 +1113,8 @@ def calculate_hessian_trace(fp32_model, Return: hessian_trace(Dict[Tuple, float]), key: (op_name, op_type); value: hessian trace. """ + op_to_traces=hawq_top(fp32_model=fp32_model,dataloader=dataloader,q_model=q_model,criterion=criterion,enable_act=enable_act) + return op_to_traces pass From a5503985e5d83e681e1caa466438335b445e1ec3 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA Date: Tue, 6 Dec 2022 15:27:37 +0800 Subject: [PATCH 094/128] enable hawq interface --- neural_compressor/strategy/hawq.py | 1103 ++++++++++++++-------------- 1 file changed, 552 insertions(+), 551 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 013e45ece32..c1ce91e0ca0 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -34,521 +34,521 @@ import logging logger = logging.getLogger(__name__) from typing import Dict, List, Optional, Any, Union, Callable, Set -# Define Collector based on hook, which is used to record the intermediate result -class Node_collector: - def __init__(self, m): - self.handle = m.register_forward_hook(self.hook_fn_act) - def hook_fn_act(self, m, inp, outp): - self.out_features = outp.clone() - self.in_features = inp - self.m = m - def remove(self): - self.handle.remove() -class HessianTrace: - """ - please refer to - Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." 2020 IEEE international conference on big data (Big data). IEEE, 2020. - Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." Advances in neural information processing systems 33 (2020): 18518-18529. - https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py - """ - - def __init__(self, model, dataloader,q_model,criterion=None): - self.unfused_model = model.model - self.q_model=q_model - tmp_model=model.model - if 'graph' in (str(dir(tmp_model))): #check the attribute and it's length - logger.info("This is aready fused model") - self.model=model.model - else: - logger.info("fusing model") - self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused - self.dataloader = dataloader - self.max_iter = 500 - self.tolerance = 1e-5 - self.eps = 1e-6 - self.index = 0 - self.device = self.get_device(self.model) - self.criterion = criterion - if self.criterion == None: - self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config - self.criterion = self.criterion.to(self.device) - self.weight_to_op, self.op_list = self.get_fused_mapping() - self.get_params() - - def is_fused_module(self, module): - """This is a helper function for `_propagate_qconfig_helper` to detecte - if this module is fused. - Args: - module (object): input module - Returns: - (bool): is fused or not - """ - op_type = str(type(module)) - if 'fused' in op_type: - return True - else: - return False - - def mapping_module_to_op(self, name): - # length = len("_model.") - # if len(name) < length: - # return name - # else: - return name - def mse_metric_gap(self,fp32_tensor, dequantize_tensor): - """Calculate the euclidean distance between fp32 tensor and int8 dequantize tensor - Args: - fp32_tensor (tensor): The FP32 tensor. - dequantize_tensor (tensor): The INT8 dequantize tensor. - """ - fp32_max = np.max(fp32_tensor) - fp32_min = np.min(fp32_tensor) - dequantize_max = np.max(dequantize_tensor) - dequantize_min = np.min(dequantize_tensor) - fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) - dequantize_tensor = (dequantize_tensor - dequantize_min) / \ - (dequantize_max - dequantize_min) - diff_tensor = fp32_tensor - dequantize_tensor - euclidean_dist = np.sum(diff_tensor ** 2) - return euclidean_dist / fp32_tensor.size - def get_fused_mapping(self): - model = self.model - weights_info = dict(model.named_parameters()) - weight_to_op = {} - for op_name, child in model.named_modules(): - if self.is_fused_module(child): - for name, _ in child.named_children(): - if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - - weight_to_op[op_name + "." + name + ".weight"] = self.mapping_module_to_op(op_name) - break - else: - name = op_name + ".weight" - if name in weights_info and name not in weight_to_op.keys(): - weight_to_op[op_name + ".weight"] = op_name - op_list = [] - for key in weight_to_op.keys(): - op_list.append(weight_to_op[key]) - return weight_to_op, op_list - - def get_device(self, model: torch.nn.Module): - for n, p in model.named_parameters(): - return p.data.device - - def _get_act_grad_hook(self, name): - def act_grad_hook(model, grad_input, grad_output): - ##print(name, grad_input[0].shape, grad_output[0].shape) - if type(model) == torch.nn.Linear: ##TODO very tricky - self.layer_acts_grads[name] = grad_input[1] - else: - self.layer_acts_grads[name] = grad_input[0] - - return act_grad_hook - - def _get_enable_act_grad_hook(self, name): - def enable_act_grad_hook(model, inputs, outputs): - input = inputs[0] - if input.requires_grad is False: - input.requires_grad = True - self.layer_acts[name] = input - - return enable_act_grad_hook - - # def _get_disable_input_grad_hook(self, name): - # def disable_input_grad_hook(model, inputs, outputs): - # try: - # input = inputs[0] ##TODO check whether this is right - # except: - # input = inputs - # if input.is_leaf == False:## you can only change requires_grad flags of leaf variables - # if input.requires_grad is True: - # input.requires_grad = False - # - # - # return disable_input_grad_hook - - def _unregister_hook(self): - for handel in self.hook_handles: - handel.remove() - - def register_act_grad_hooks(self, model): - for name, module in model.named_modules(): - if self.mapping_module_to_op(name) in self.op_list: - hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) - self.hook_handles.append(hook_handle) - hook_handle = module.register_backward_hook(self._get_act_grad_hook(name)) - self.hook_handles.append(hook_handle) - - def reset_act_gradient_and_hooks(self): - # tmp_input = torch.zeros(self._input_shape, device=self.device) - # for name, module in self.model.named_modules(): - # if name in self.op_list: - # hook_handle = module.register_forward_hook(self._get_disable_input_grad_hook(name)) - # self.hook_handles.append(hook_handle) - # self.model(tmp_input) - self._unregister_hook() - - def get_params(self): - weight_names = [n for n, p in self.model.named_parameters() if - p.requires_grad and "bias" not in n] ##remove bias - params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias - self.weight_names = weight_names - self.params = params - - def forward_backward(self, model, data, create_graph=False, return_w_grad=True): - model.zero_grad() - input = data[0].to(self.device) - ##self._input_shape = input.shape ## for resetting input activation - target = data[1].to(self.device) - input.requires_grad = True - output = model(input) - loss = self.criterion(output, target) - torch.autograd.backward(loss, create_graph=create_graph) - ##loss.backward(create_graph=create_graph) - if return_w_grad: - gradients = [] - for n, p in self.model.named_parameters(): - if p.grad != None and n in self.weight_names: - gradient = p.grad - gradients.append(gradient + 0.0) ## add 0 to create a copy - model.zero_grad() - return gradients - else: - model.zero_grad() - - # def get_params(self, model): - # parameters = [p for p in model.parameters() if p.requires_grad] - # return parameters - - def sample_rademacher(self, params): - samples = [] - for param in params: - r = torch.randint_like(param, high=2, device=self.device) - r.masked_fill_(r == 0, -1) - samples.append(r) - return samples - - def get_vtHv_weight(self, params, num_samples): - v = self.sample_rademacher(params) - H_v = [0] * len(v) - cnt = 0 - for step, data in enumerate(self.dataloader): - batch_size = data[0].shape[0] - cnt += batch_size - gradients = self.forward_backward(self.model, data, create_graph=True) - H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) - H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] - if cnt >= num_samples: - break - if cnt > 0: - H_v = [item / cnt for item in H_v] - v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better - return v_t_H_v - - def get_vtHv_act(self, params, num_samples): - v = self.sample_rademacher(params) - H_v = [0] * len(v) - cnt = 0 - for step, data in enumerate(self.dataloader): - if cnt >= num_samples: - break - for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 - input = data[0][i:i + 1] - target = data[1][i:i + 1] - - self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) - layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] - layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] - hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) - cnt += 1 - if cnt >= num_samples: - break - - def get_weight_traces(self, num_samples): - layer_traces_per_iter = [] - prev_avg_model_trace = 0 - for iter in range(self.max_iter): - layer_traces = self.get_vtHv_weight(self.params, num_samples) - layer_traces_per_iter.append(layer_traces) - layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) - model_trace = torch.sum(layer_traces_estimate) - diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) - if diff_ratio < self.tolerance and iter > 10: ##TODO magic number - break - if iter == 50: ##TODO for debug - break - prev_avg_model_trace = model_trace - weight_name_to_traces = {} - layer_traces = layer_traces_estimate - for weight_name, trace in zip(self.weight_names, layer_traces): - weight_name_to_traces[weight_name] = trace - op_name_to_trace = {} - for weight_name in self.weight_names: - op_name = self.weight_to_op[weight_name] - op_name_to_trace[op_name] = weight_name_to_traces[weight_name] - return op_name_to_trace - def get_act_traces(self, num_samples): - unfused_training = self.unfused_model.training - self.unfused_model.eval() - self.hook_handles = [] - self.layer_acts = {} - self.layer_acts_grads = {} - self.register_act_grad_hooks(self.unfused_model) - cnt = 0 - act_traces_per_sample = [] - for step, data in enumerate(self.dataloader): - if cnt >= num_samples: - break - bs = data[0].shape[0] - act_traces_sum = 0 - act_traces_per_iter = [] - prev_avg_model_trace = 0 - act_traces_sums = None - for i in range(bs): ##force the bs to be one - input = data[0][i:i + 1] - target = data[1][i:i + 1] - self.forward_backward(self.unfused_model, (input, target), create_graph=True, return_w_grad=False) - acts = [self.layer_acts[key] for key in self.layer_acts.keys()] - if act_traces_sums == None: - act_traces_sums = [0] * len(acts) - acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts - vt_H_v_sum_per_act = [0] * len(acts) - - prev_model_act_trace = 0 - for iter in range(self.max_iter): - v = self.sample_rademacher(acts) - H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=True) - vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] - - vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in - enumerate(vt_H_v_sum_per_act)] - vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] - current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) - - diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( - prev_model_act_trace + self.eps) - if diff_ratio < self.tolerance and iter > 10: ##TODO magic number - break - if iter == 50: ##TODO for debug - break - - prev_model_act_trace = current_model_act_trace - act_traces_per_sample.append(vt_H_v_mean_per_act) - cnt += 1 - if cnt >= num_samples: - break - - if unfused_training: - self.unfused_model.train() - self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False - act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) - act_traces = torch.mean(act_traces_stack, dim=0) - res_dict = {} - for index, key in enumerate(self.layer_acts.keys()): - res_dict[key] = act_traces[index] - - self.layer_acts = [] - self.layer_acts_grads = [] - return res_dict - def insert_hook(self, model, target_module_list): - intern_outputs = [] - for layer,module in model.named_modules(): - for target_module in target_module_list: - # print("layer:",layer) - # print("target_model:",target_module) - if layer == target_module: - logging.debug("Collect: %s" % (module)) - # print("Collect: %s" % (module)) - intern_outputs.append(Node_collector(module)) +# # Define Collector based on hook, which is used to record the intermediate result +# class Node_collector: +# def __init__(self, m): +# self.handle = m.register_forward_hook(self.hook_fn_act) +# def hook_fn_act(self, m, inp, outp): +# self.out_features = outp.clone() +# self.in_features = inp +# self.m = m +# def remove(self): +# self.handle.remove() +# class HessianTrace: +# """ +# please refer to +# Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." 2020 IEEE international conference on big data (Big data). IEEE, 2020. +# Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." Advances in neural information processing systems 33 (2020): 18518-18529. +# https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py +# """ + +# def __init__(self, model, dataloader,q_model,criterion=None): +# self.unfused_model = model.model +# self.q_model=q_model +# tmp_model=model.model +# if 'graph' in (str(dir(tmp_model))): #check the attribute and it's length +# logger.info("This is aready fused model") +# self.model=model.model +# else: +# logger.info("fusing model") +# self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused +# self.dataloader = dataloader +# self.max_iter = 500 +# self.tolerance = 1e-5 +# self.eps = 1e-6 +# self.index = 0 +# self.device = self.get_device(self.model) +# self.criterion = criterion +# if self.criterion == None: +# self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config +# self.criterion = self.criterion.to(self.device) +# self.weight_to_op, self.op_list = self.get_fused_mapping() +# self.get_params() + +# def is_fused_module(self, module): +# """This is a helper function for `_propagate_qconfig_helper` to detecte +# if this module is fused. +# Args: +# module (object): input module +# Returns: +# (bool): is fused or not +# """ +# op_type = str(type(module)) +# if 'fused' in op_type: +# return True +# else: +# return False + +# def mapping_module_to_op(self, name): +# # length = len("_model.") +# # if len(name) < length: +# # return name +# # else: +# return name +# def mse_metric_gap(self,fp32_tensor, dequantize_tensor): +# """Calculate the euclidean distance between fp32 tensor and int8 dequantize tensor +# Args: +# fp32_tensor (tensor): The FP32 tensor. +# dequantize_tensor (tensor): The INT8 dequantize tensor. +# """ +# fp32_max = np.max(fp32_tensor) +# fp32_min = np.min(fp32_tensor) +# dequantize_max = np.max(dequantize_tensor) +# dequantize_min = np.min(dequantize_tensor) +# fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) +# dequantize_tensor = (dequantize_tensor - dequantize_min) / \ +# (dequantize_max - dequantize_min) +# diff_tensor = fp32_tensor - dequantize_tensor +# euclidean_dist = np.sum(diff_tensor ** 2) +# return euclidean_dist / fp32_tensor.size +# def get_fused_mapping(self): +# model = self.model +# weights_info = dict(model.named_parameters()) +# weight_to_op = {} +# for op_name, child in model.named_modules(): +# if self.is_fused_module(child): +# for name, _ in child.named_children(): +# if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right + +# weight_to_op[op_name + "." + name + ".weight"] = self.mapping_module_to_op(op_name) +# break +# else: +# name = op_name + ".weight" +# if name in weights_info and name not in weight_to_op.keys(): +# weight_to_op[op_name + ".weight"] = op_name +# op_list = [] +# for key in weight_to_op.keys(): +# op_list.append(weight_to_op[key]) +# return weight_to_op, op_list + +# def get_device(self, model: torch.nn.Module): +# for n, p in model.named_parameters(): +# return p.data.device + +# def _get_act_grad_hook(self, name): +# def act_grad_hook(model, grad_input, grad_output): +# ##print(name, grad_input[0].shape, grad_output[0].shape) +# if type(model) == torch.nn.Linear: ##TODO very tricky +# self.layer_acts_grads[name] = grad_input[1] +# else: +# self.layer_acts_grads[name] = grad_input[0] + +# return act_grad_hook + +# def _get_enable_act_grad_hook(self, name): +# def enable_act_grad_hook(model, inputs, outputs): +# input = inputs[0] +# if input.requires_grad is False: +# input.requires_grad = True +# self.layer_acts[name] = input + +# return enable_act_grad_hook + +# # def _get_disable_input_grad_hook(self, name): +# # def disable_input_grad_hook(model, inputs, outputs): +# # try: +# # input = inputs[0] ##TODO check whether this is right +# # except: +# # input = inputs +# # if input.is_leaf == False:## you can only change requires_grad flags of leaf variables +# # if input.requires_grad is True: +# # input.requires_grad = False +# # +# # +# # return disable_input_grad_hook + +# def _unregister_hook(self): +# for handel in self.hook_handles: +# handel.remove() + +# def register_act_grad_hooks(self, model): +# for name, module in model.named_modules(): +# if self.mapping_module_to_op(name) in self.op_list: +# hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) +# self.hook_handles.append(hook_handle) +# hook_handle = module.register_backward_hook(self._get_act_grad_hook(name)) +# self.hook_handles.append(hook_handle) + +# def reset_act_gradient_and_hooks(self): +# # tmp_input = torch.zeros(self._input_shape, device=self.device) +# # for name, module in self.model.named_modules(): +# # if name in self.op_list: +# # hook_handle = module.register_forward_hook(self._get_disable_input_grad_hook(name)) +# # self.hook_handles.append(hook_handle) +# # self.model(tmp_input) +# self._unregister_hook() + +# def get_params(self): +# weight_names = [n for n, p in self.model.named_parameters() if +# p.requires_grad and "bias" not in n] ##remove bias +# params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias +# self.weight_names = weight_names +# self.params = params + +# def forward_backward(self, model, data, create_graph=False, return_w_grad=True): +# model.zero_grad() +# input = data[0].to(self.device) +# ##self._input_shape = input.shape ## for resetting input activation +# target = data[1].to(self.device) +# input.requires_grad = True +# output = model(input) +# loss = self.criterion(output, target) +# torch.autograd.backward(loss, create_graph=create_graph) +# ##loss.backward(create_graph=create_graph) +# if return_w_grad: +# gradients = [] +# for n, p in self.model.named_parameters(): +# if p.grad != None and n in self.weight_names: +# gradient = p.grad +# gradients.append(gradient + 0.0) ## add 0 to create a copy +# model.zero_grad() +# return gradients +# else: +# model.zero_grad() + +# # def get_params(self, model): +# # parameters = [p for p in model.parameters() if p.requires_grad] +# # return parameters + +# def sample_rademacher(self, params): +# samples = [] +# for param in params: +# r = torch.randint_like(param, high=2, device=self.device) +# r.masked_fill_(r == 0, -1) +# samples.append(r) +# return samples + +# def get_vtHv_weight(self, params, num_samples): +# v = self.sample_rademacher(params) +# H_v = [0] * len(v) +# cnt = 0 +# for step, data in enumerate(self.dataloader): +# batch_size = data[0].shape[0] +# cnt += batch_size +# gradients = self.forward_backward(self.model, data, create_graph=True) +# H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) +# H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] +# if cnt >= num_samples: +# break +# if cnt > 0: +# H_v = [item / cnt for item in H_v] +# v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better +# return v_t_H_v + +# def get_vtHv_act(self, params, num_samples): +# v = self.sample_rademacher(params) +# H_v = [0] * len(v) +# cnt = 0 +# for step, data in enumerate(self.dataloader): +# if cnt >= num_samples: +# break +# for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 +# input = data[0][i:i + 1] +# target = data[1][i:i + 1] + +# self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) +# layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] +# layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] +# hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) +# cnt += 1 +# if cnt >= num_samples: +# break + +# def get_weight_traces(self, num_samples): +# layer_traces_per_iter = [] +# prev_avg_model_trace = 0 +# for iter in range(self.max_iter): +# layer_traces = self.get_vtHv_weight(self.params, num_samples) +# layer_traces_per_iter.append(layer_traces) +# layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) +# model_trace = torch.sum(layer_traces_estimate) +# diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) +# if diff_ratio < self.tolerance and iter > 10: ##TODO magic number +# break +# if iter == 50: ##TODO for debug +# break +# prev_avg_model_trace = model_trace +# weight_name_to_traces = {} +# layer_traces = layer_traces_estimate +# for weight_name, trace in zip(self.weight_names, layer_traces): +# weight_name_to_traces[weight_name] = trace +# op_name_to_trace = {} +# for weight_name in self.weight_names: +# op_name = self.weight_to_op[weight_name] +# op_name_to_trace[op_name] = weight_name_to_traces[weight_name] +# return op_name_to_trace +# def get_act_traces(self, num_samples): +# unfused_training = self.unfused_model.training +# self.unfused_model.eval() +# self.hook_handles = [] +# self.layer_acts = {} +# self.layer_acts_grads = {} +# self.register_act_grad_hooks(self.unfused_model) +# cnt = 0 +# act_traces_per_sample = [] +# for step, data in enumerate(self.dataloader): +# if cnt >= num_samples: +# break +# bs = data[0].shape[0] +# act_traces_sum = 0 +# act_traces_per_iter = [] +# prev_avg_model_trace = 0 +# act_traces_sums = None +# for i in range(bs): ##force the bs to be one +# input = data[0][i:i + 1] +# target = data[1][i:i + 1] +# self.forward_backward(self.unfused_model, (input, target), create_graph=True, return_w_grad=False) +# acts = [self.layer_acts[key] for key in self.layer_acts.keys()] +# if act_traces_sums == None: +# act_traces_sums = [0] * len(acts) +# acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts +# vt_H_v_sum_per_act = [0] * len(acts) + +# prev_model_act_trace = 0 +# for iter in range(self.max_iter): +# v = self.sample_rademacher(acts) +# H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=True) +# vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] + +# vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in +# enumerate(vt_H_v_sum_per_act)] +# vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] +# current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) + +# diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( +# prev_model_act_trace + self.eps) +# if diff_ratio < self.tolerance and iter > 10: ##TODO magic number +# break +# if iter == 50: ##TODO for debug +# break + +# prev_model_act_trace = current_model_act_trace +# act_traces_per_sample.append(vt_H_v_mean_per_act) +# cnt += 1 +# if cnt >= num_samples: +# break + +# if unfused_training: +# self.unfused_model.train() +# self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False +# act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) +# act_traces = torch.mean(act_traces_stack, dim=0) +# res_dict = {} +# for index, key in enumerate(self.layer_acts.keys()): +# res_dict[key] = act_traces[index] + +# self.layer_acts = [] +# self.layer_acts_grads = [] +# return res_dict +# def insert_hook(self, model, target_module_list): +# intern_outputs = [] +# for layer,module in model.named_modules(): +# for target_module in target_module_list: +# # print("layer:",layer) +# # print("target_model:",target_module) +# if layer == target_module: +# logging.debug("Collect: %s" % (module)) +# # print("Collect: %s" % (module)) +# intern_outputs.append(Node_collector(module)) - logging.info("Total %d hook inserted" % (len(intern_outputs))) - # print("Total %d hook inserted" % (len(intern_outputs))) - return model, intern_outputs - def insert_hook_quantize(self,model, target_module_list): - intern_outputs = [] - for layer,module in model.named_modules(): - for target_module in target_module_list: - # print("layer:",layer) - length = len("_model.") - new_key = layer[length:] - # print("target_model:",target_module) - if new_key == target_module: - logging.debug("Collect: %s" % (module)) - # print("Collect: %s" % (module)) - intern_outputs.append(Node_collector(module)) - logging.info("Total %d hook inserted" % (len(intern_outputs))) - # print("Total %d hook inserted" % (len(intern_outputs))) - return model, intern_outputs - def get_act_gap(self,fp32_model,q_model): - """ - Estimates each activation gap between quantized model and float model - """ - self.handle_acts=[] - fp32_model.eval() - # temp_model = fuse_fx(fp32_model.model) - temp_model=fp32_model - # target_module_list = [nn.ReLU] # Insert hook for FP32 model - target_module_list = self.op_list - temp_model, intern_outputs =self.insert_hook(temp_model, target_module_list) - # intern_outputs={} - for input, target in self.dataloader: - temp_model(input) - break - - fp32_act_out={} - for i, intern_output in enumerate(intern_outputs): - stat_features = intern_output.out_features.view(-1) - # print ("No.", i, " ", intern_output.out_features.shape) - # print ("Numpy No.", i, " ", intern_output.out_features.cpu().data.numpy().shape) - # print ("No.", i, " ", stat_features.cpu().data.numpy().shape) - # print ("Numpy No.", i, " ", stat_features.cpu().data.numpy()) - fp32_act_out[target_module_list[i]]=stat_features.cpu().data.numpy() - # break - for i in intern_outputs: - # print(i) - i.remove() - target_module_list = self.op_list - q_model, intern_outputs=self.insert_hook_quantize(q_model, target_module_list) - for input, target in self.dataloader: #only one sample - q_model(input) - break - qnt_act_out={} - intern_outputs={} - for i, intern_output in enumerate(intern_outputs): - stat_features = intern_output.out_features.view(-1) - qnt_act_out[target_module_list[i]]=stat_features.dequantize().cpu().data.numpy() - # break - for i in intern_outputs: - # print(i) - i.remove() - act_gap={} - mse_gap={} - for fp_i,int_i in zip(fp32_act_out,qnt_act_out): - activation_qnt_error=fp32_act_out[fp_i]-qnt_act_out[int_i] - mse_gap[fp_i]=self.mse_metric_gap(fp32_act_out[fp_i],qnt_act_out[int_i]) - act_gap[fp_i]=np.sum(activation_qnt_error)/activation_qnt_error.size - return act_gap,mse_gap - def get_avg_traces(self, enable_act=True, num_samples=32): - """ - Estimates average hessian trace for each parameter - """ - assert num_samples > 0 - traces = {} - weight_traces = self.get_weight_traces(num_samples) - traces['weight'] = weight_traces - act_trace={} - if enable_act: - act_gap,mse_gap=self.get_act_gap(self.model,self.q_model) - act_traces = self.get_act_traces(num_samples) - for i,j in zip(act_traces,mse_gap): - #currently use mse to analysis - act_trace[i]=act_traces[i]+mse_gap[j] - traces['activation'] = act_traces - return traces - - -##copy from torch.quantization._numeric_suite -def _find_match( - str_list: Union[Dict[str, Any], List[str]], key_str: str, - postfix: str, -) -> Optional[str]: - split_str = key_str.split(".") - if split_str[-1] == postfix: - match_string = "".join(key_str.split(".")[0:-1]) - for s2 in str_list: - pattern1 = "".join(s2.split(".")[0:-1]) - pattern2 = "".join(s2.split(".")[0:-2]) - if match_string == pattern1: - return s2 - if match_string == pattern2: - return s2 - - # For matching "fc.weight" and "fc._packed_params._packed_params" - if postfix == "_packed_params": - match_string = "".join(key_str.split(".")[0:-2]) - if len(match_string) == 0: - return None - for s2 in str_list: - pattern1 = "".join(s2.split(".")[0:-1]) - pattern2 = "".join(s2.split(".")[0:-2]) - if match_string == pattern1: - return s2 - if match_string == pattern2: - return s2 - return None - else: - return None - - -##copy form torch.quantization._numeric_suite -def compare_weights( - float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] -) -> Dict[str, Dict[str, torch.Tensor]]: - r"""Compare the weights of the float module with its corresponding quantized - module. Return a dict with key corresponding to module names and each entry being - a dictionary with two keys 'float' and 'quantized', containing the float and - quantized weights. This dict can be used to compare and compute the quantization - error of the weights of float and quantized models. - - Example usage:: - - wt_compare_dict = compare_weights( - float_model.state_dict(), qmodel.state_dict()) - for key in wt_compare_dict: - print( - key, - compute_error( - wt_compare_dict[key]['float'], - wt_compare_dict[key]['quantized'].dequantize() - ) - ) - - Args: - float_dict: state dict of the float model - quantized_dict: state dict of the quantized model - - Return: - weight_dict: dict with key corresponding to module names and each entry being - a dictionary with two keys 'float' and 'quantized', containing the float and - quantized weights - """ - - weight_dict: Dict[str, Dict] = {} - for key in quantized_dict: - match_key = _find_match(float_dict, key, "weight") - if match_key is not None: - weight_dict[key] = {} - weight_dict[key]["float"] = float_dict[match_key] - weight_dict[key]["quantized"] = quantized_dict[key] - continue - - # For matching "fc.weight" and "fc._packed_params._packed_params" - match_key = _find_match(float_dict, key, "_packed_params") - if match_key is not None: - weight_dict[match_key] = {} - weight_dict[match_key]["float"] = float_dict[match_key] - weight_dict[match_key]["quantized"] = quantized_dict[key][0] - ##TODO:should consider more models in further work - - # For LSTM - split_str = key.split(".") - if split_str[-1] == "param" and split_str[-3] == "_all_weight_values": - layer = split_str[-2] - module_name = ".".join(split_str[:-3]) - float_weight_ih_key = module_name + ".weight_ih_l" + layer - float_weight_hh_key = module_name + ".weight_hh_l" + layer - if float_weight_ih_key in float_dict and float_weight_hh_key in float_dict: - weight_dict[key] = {} - weight_dict[key]["float"] = float_dict[float_weight_ih_key] - weight_dict[key]["quantized"] = ( - quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] - ) - weight_dict[key]["float"] = float_dict[float_weight_hh_key] - weight_dict[key]["quantized"] = ( - quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] - ) - - return weight_dict +# logging.info("Total %d hook inserted" % (len(intern_outputs))) +# # print("Total %d hook inserted" % (len(intern_outputs))) +# return model, intern_outputs +# def insert_hook_quantize(self,model, target_module_list): +# intern_outputs = [] +# for layer,module in model.named_modules(): +# for target_module in target_module_list: +# # print("layer:",layer) +# length = len("_model.") +# new_key = layer[length:] +# # print("target_model:",target_module) +# if new_key == target_module: +# logging.debug("Collect: %s" % (module)) +# # print("Collect: %s" % (module)) +# intern_outputs.append(Node_collector(module)) +# logging.info("Total %d hook inserted" % (len(intern_outputs))) +# # print("Total %d hook inserted" % (len(intern_outputs))) +# return model, intern_outputs +# def get_act_gap(self,fp32_model,q_model): +# """ +# Estimates each activation gap between quantized model and float model +# """ +# self.handle_acts=[] +# fp32_model.eval() +# # temp_model = fuse_fx(fp32_model.model) +# temp_model=fp32_model +# # target_module_list = [nn.ReLU] # Insert hook for FP32 model +# target_module_list = self.op_list +# temp_model, intern_outputs =self.insert_hook(temp_model, target_module_list) +# # intern_outputs={} +# for input, target in self.dataloader: +# temp_model(input) +# break + +# fp32_act_out={} +# for i, intern_output in enumerate(intern_outputs): +# stat_features = intern_output.out_features.view(-1) +# # print ("No.", i, " ", intern_output.out_features.shape) +# # print ("Numpy No.", i, " ", intern_output.out_features.cpu().data.numpy().shape) +# # print ("No.", i, " ", stat_features.cpu().data.numpy().shape) +# # print ("Numpy No.", i, " ", stat_features.cpu().data.numpy()) +# fp32_act_out[target_module_list[i]]=stat_features.cpu().data.numpy() +# # break +# for i in intern_outputs: +# # print(i) +# i.remove() +# target_module_list = self.op_list +# q_model, intern_outputs=self.insert_hook_quantize(q_model, target_module_list) +# for input, target in self.dataloader: #only one sample +# q_model(input) +# break +# qnt_act_out={} +# intern_outputs={} +# for i, intern_output in enumerate(intern_outputs): +# stat_features = intern_output.out_features.view(-1) +# qnt_act_out[target_module_list[i]]=stat_features.dequantize().cpu().data.numpy() +# # break +# for i in intern_outputs: +# # print(i) +# i.remove() +# act_gap={} +# mse_gap={} +# for fp_i,int_i in zip(fp32_act_out,qnt_act_out): +# activation_qnt_error=fp32_act_out[fp_i]-qnt_act_out[int_i] +# mse_gap[fp_i]=self.mse_metric_gap(fp32_act_out[fp_i],qnt_act_out[int_i]) +# act_gap[fp_i]=np.sum(activation_qnt_error)/activation_qnt_error.size +# return act_gap,mse_gap +# def get_avg_traces(self, enable_act=True, num_samples=32): +# """ +# Estimates average hessian trace for each parameter +# """ +# assert num_samples > 0 +# traces = {} +# weight_traces = self.get_weight_traces(num_samples) +# traces['weight'] = weight_traces +# act_trace={} +# if enable_act: +# act_gap,mse_gap=self.get_act_gap(self.model,self.q_model) +# act_traces = self.get_act_traces(num_samples) +# for i,j in zip(act_traces,mse_gap): +# #currently use mse to analysis +# act_trace[i]=act_traces[i]+mse_gap[j] +# traces['activation'] = act_traces +# return traces + + +# ##copy from torch.quantization._numeric_suite +# def _find_match( +# str_list: Union[Dict[str, Any], List[str]], key_str: str, +# postfix: str, +# ) -> Optional[str]: +# split_str = key_str.split(".") +# if split_str[-1] == postfix: +# match_string = "".join(key_str.split(".")[0:-1]) +# for s2 in str_list: +# pattern1 = "".join(s2.split(".")[0:-1]) +# pattern2 = "".join(s2.split(".")[0:-2]) +# if match_string == pattern1: +# return s2 +# if match_string == pattern2: +# return s2 + +# # For matching "fc.weight" and "fc._packed_params._packed_params" +# if postfix == "_packed_params": +# match_string = "".join(key_str.split(".")[0:-2]) +# if len(match_string) == 0: +# return None +# for s2 in str_list: +# pattern1 = "".join(s2.split(".")[0:-1]) +# pattern2 = "".join(s2.split(".")[0:-2]) +# if match_string == pattern1: +# return s2 +# if match_string == pattern2: +# return s2 +# return None +# else: +# return None + + +# ##copy form torch.quantization._numeric_suite +# def compare_weights( +# float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] +# ) -> Dict[str, Dict[str, torch.Tensor]]: +# r"""Compare the weights of the float module with its corresponding quantized +# module. Return a dict with key corresponding to module names and each entry being +# a dictionary with two keys 'float' and 'quantized', containing the float and +# quantized weights. This dict can be used to compare and compute the quantization +# error of the weights of float and quantized models. + +# Example usage:: + +# wt_compare_dict = compare_weights( +# float_model.state_dict(), qmodel.state_dict()) +# for key in wt_compare_dict: +# print( +# key, +# compute_error( +# wt_compare_dict[key]['float'], +# wt_compare_dict[key]['quantized'].dequantize() +# ) +# ) + +# Args: +# float_dict: state dict of the float model +# quantized_dict: state dict of the quantized model + +# Return: +# weight_dict: dict with key corresponding to module names and each entry being +# a dictionary with two keys 'float' and 'quantized', containing the float and +# quantized weights +# """ + +# weight_dict: Dict[str, Dict] = {} +# for key in quantized_dict: +# match_key = _find_match(float_dict, key, "weight") +# if match_key is not None: +# weight_dict[key] = {} +# weight_dict[key]["float"] = float_dict[match_key] +# weight_dict[key]["quantized"] = quantized_dict[key] +# continue + +# # For matching "fc.weight" and "fc._packed_params._packed_params" +# match_key = _find_match(float_dict, key, "_packed_params") +# if match_key is not None: +# weight_dict[match_key] = {} +# weight_dict[match_key]["float"] = float_dict[match_key] +# weight_dict[match_key]["quantized"] = quantized_dict[key][0] +# ##TODO:should consider more models in further work + +# # For LSTM +# split_str = key.split(".") +# if split_str[-1] == "param" and split_str[-3] == "_all_weight_values": +# layer = split_str[-2] +# module_name = ".".join(split_str[:-3]) +# float_weight_ih_key = module_name + ".weight_ih_l" + layer +# float_weight_hh_key = module_name + ".weight_hh_l" + layer +# if float_weight_ih_key in float_dict and float_weight_hh_key in float_dict: +# weight_dict[key] = {} +# weight_dict[key]["float"] = float_dict[float_weight_ih_key] +# weight_dict[key]["quantized"] = ( +# quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] +# ) +# weight_dict[key]["float"] = float_dict[float_weight_hh_key] +# weight_dict[key]["quantized"] = ( +# quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] +# ) + +# return weight_dict @strategy_registry @@ -657,46 +657,47 @@ def next_tune_cfg(self): # print(n) # for n, p in self._fp32_model.named_parameters(): # print(n) - orig_eval = True - if self._fp32_model.training: - orig_eval = False - self._fp32_model.eval() - import copy - ht = HessianTrace(self._fp32_model, self.calib_dataloader,self.q_model) - q_model_state_dict = {} - for key in self.q_model.state_dict().keys(): - length = len("_model.") - new_key = key[length:] - q_model_state_dict[new_key] = self.q_model.state_dict()[key] - weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) - pertur_lst = {} - for key in weight_quant_loss: - op_float_tensor = weight_quant_loss[key]['float'] - op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() - diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 - pertur_lst[key] = diff_l2 - self.enable_act=False #enable activation trace and quantization loss analysis feature - traces = ht.get_avg_traces(self.enable_act) - op_to_traces = traces['weight'] - if self.enable_act: - act_to_traces=traces['activation'] - #TODO() optimize relationship of weights quantized loss and activation quantized loss, to find best conbine - #TODO() do double check why layer1's output is not 0 for activation quantized - for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): - op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] #Formula:Omig=Trace*L2+act_trace - else: - for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): - op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 - if orig_eval == False: - self._fp32_model.train() + # orig_eval = True + # if self._fp32_model.training: + # orig_eval = False + # self._fp32_model.eval() + # ht = HessianTrace(self._fp32_model, self.calib_dataloader,self.q_model) + # q_model_state_dict = {} + # for key in self.q_model.state_dict().keys(): + # length = len("_model.") + # new_key = key[length:] + # q_model_state_dict[new_key] = self.q_model.state_dict()[key] + # weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) + # pertur_lst = {} + # for key in weight_quant_loss: + # op_float_tensor = weight_quant_loss[key]['float'] + # op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() + # diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 + # pertur_lst[key] = diff_l2 + # self.enable_act=False #enable activation trace and quantization loss analysis feature + # traces = ht.get_avg_traces(self.enable_act) + # op_to_traces = traces['weight'] + # if self.enable_act: + # act_to_traces=traces['activation'] + # #TODO() optimize relationship of weights quantized loss and activation quantized loss, to find best conbine + # #TODO() do double check why layer1's output is not 0 for activation quantized + # for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): + # op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] #Formula:Omig=Trace*L2+act_trace + # else: + # for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): + # op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 + # if orig_eval == False: + # self._fp32_model.train() # End compute the hessian trace # # TODO uncomment it when algo ready. - # op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, - # dataloader = self.calib_dataloader, - # q_model = self.q_model, - # criterion = torch.nn.CrossEntropyLoss(), # TODO replace it with user specify loss - # enable_act = False) + criterion=torch.nn.CrossEntropyLoss()# TODO replace it with user specify loss + op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, + dataloader = self.calib_dataloader, + q_model = self.q_model, + criterion =criterion, + enable_act = False) + # op_to_traces = self.adaptor.calculate_hessian_trace() ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) From eb05a1fb3a9362aa6a43cbed38811fa5d33402be Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 15:44:15 +0800 Subject: [PATCH 095/128] add strategy kwargs for new api Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 9 +++++++++ neural_compressor/config.py | 25 +++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index 640b2fd36df..59b421f1e37 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -1346,8 +1346,17 @@ def map_pyconfig_to_cfg(self, pythonic_config): 'tuning.exit_policy.max_trials': pythonic_config.quantization.max_trials, 'tuning.exit_policy.performance_only': pythonic_config.quantization.performance_only, 'use_bf16': pythonic_config.quantization.use_bf16, + 'quantization.optimization_level': pythonic_config.quantization.optimization_level, 'reduce_range': pythonic_config.quantization.reduce_range }) + if pythonic_config.quantization.strategy_kwargs: + st_kwargs = pythonic_config.quantization.strategy_kwargs + for st_key in ['sigopt_api_token', 'sigopt_experiment_name', 'accuracy_weight', 'latency_weight']: + if st_key in st_kwargs: + st_val = st_kwargs[st_key] + print(st_key) + mapping.update({'tuning.strategy.' + st_key: st_val}) + if pythonic_config.distillation is not None: mapping.update({ 'distillation.train.criterion': pythonic_config.distillation.criterion, diff --git a/neural_compressor/config.py b/neural_compressor/config.py index f7337b546f0..0f6adf34c23 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -307,6 +307,7 @@ def __init__(self, op_type_list=None, op_name_list=None, strategy="basic", + strategy_kwargs=None, objective="performance", timeout=0, max_trials=100, @@ -321,6 +322,7 @@ def __init__(self, self._op_type_list = op_type_list self._op_name_list = op_name_list self._strategy = strategy + self._strategy_kwargs = strategy_kwargs self._objective = objective self._timeout = timeout self._max_trials = max_trials @@ -402,6 +404,14 @@ def strategy(self, strategy): ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): self._strategy = strategy + @property + def strategy_kwargs(self): + return self._strategy_kwargs + + @strategy_kwargs.setter + def strategy_kwargs(self, strategy_kwargs): + self._strategy_kwargs = strategy_kwargs + @property def op_name_list(self): return self._op_name_list @@ -480,11 +490,12 @@ def inputs(self, inputs): class TuningCriterion: - def __init__(self, strategy="basic", timeout=0, max_trials=100, objective="performance"): + def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, max_trials=100, objective="performance"): self._strategy = strategy self._timeout = timeout self._max_trials = max_trials self._objective = objective + self._strategy_kwargs = strategy_kwargs @property def max_trials(self): @@ -523,7 +534,14 @@ def strategy(self, strategy): if check_value('strategy', strategy, str, ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): self._strategy = strategy - + + @property + def strategy_kwargs(self): + return self._strategy_kwargs + + @strategy_kwargs.setter + def strategy_kwargs(self, strategy_kwargs): + self._strategy_kwargs = strategy_kwargs tuning_criterion = TuningCriterion() @@ -540,6 +558,7 @@ def __init__(self, op_name_list=None, reduce_range=None, extra_precisions = ["bf16"], + optimization_level=1, tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion, ): @@ -551,6 +570,7 @@ def __init__(self, op_type_list=op_type_list, op_name_list=op_name_list, strategy=tuning_criterion.strategy, + strategy_kwargs=tuning_criterion.strategy_kwargs, objective=tuning_criterion.objective, timeout=tuning_criterion.timeout, max_trials=tuning_criterion.max_trials, @@ -558,6 +578,7 @@ def __init__(self, extra_precisions=extra_precisions, accuracy_criterion=accuracy_criterion) self.approach = approach + self.optimization_level = optimization_level @property def approach(self): From 0afc168ed9a1de1d85bc00c921bb290d6b15477f Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 15:47:26 +0800 Subject: [PATCH 096/128] fixed some bugs Signed-off-by: yiliu30 --- neural_compressor/adaptor/pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 44392313d80..c2535e4fdba 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -1094,7 +1094,7 @@ def is_fused_module(self, module): else: return False - def calculate_hessian_trace( + def calculate_hessian_trace(self, fp32_model, dataloader, q_model, From b154e0cfe091d869f4dfb1cfdc8d04b79baa1b50 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 16:28:27 +0800 Subject: [PATCH 097/128] add uts Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 3 ++- neural_compressor/config.py | 4 ++-- test/strategy/test_basic.py | 22 +++++++++++++++++++++- test/strategy/test_sigopt.py | 26 +++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 5 deletions(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index 59b421f1e37..79f50237051 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -1351,7 +1351,8 @@ def map_pyconfig_to_cfg(self, pythonic_config): }) if pythonic_config.quantization.strategy_kwargs: st_kwargs = pythonic_config.quantization.strategy_kwargs - for st_key in ['sigopt_api_token', 'sigopt_experiment_name', 'accuracy_weight', 'latency_weight']: + for st_key in ['sigopt_api_token', 'sigopt_project_id', 'sigopt_experiment_name', \ + 'accuracy_weight', 'latency_weight']: if st_key in st_kwargs: st_val = st_kwargs[st_key] print(st_key) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 0f6adf34c23..4accfce4bd0 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -401,7 +401,7 @@ def strategy(self): @strategy.setter def strategy(self, strategy): if check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): + ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe']): self._strategy = strategy @property @@ -532,7 +532,7 @@ def strategy(self): @strategy.setter def strategy(self, strategy): if check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive']): + ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe']): self._strategy = strategy @property diff --git a/test/strategy/test_basic.py b/test/strategy/test_basic.py index 845e9b0ccae..239f26a071a 100644 --- a/test/strategy/test_basic.py +++ b/test/strategy/test_basic.py @@ -155,7 +155,7 @@ def build_fake_model(): tf.import_graph_def(graph_def, name='') return graph -class TestQuantization(unittest.TestCase): +class TestBasicTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -217,6 +217,26 @@ def test_run_basic_max_trials_multimetric_weight(self): quantizer.model = self.constant_graph quantizer.fit() + + def test_run_basic_one_trial_new_api(self): + from neural_compressor.quantization import fit + from neural_compressor.config import AccuracyCriterion, AccuracyLoss, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.experimental.common import DataLoader + from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset + + # dataset and dataloader + dataset = DummyDataset(shape=(100, 3, 3, 1), label=True) + dataloader = DataLoader(dataset) + + # tuning and accuracy criterion + tolerable_loss = AccuracyLoss(0.01) + accuracy_criterion = AccuracyCriterion(criterion='relative', tolerable_loss=tolerable_loss) + tuning_criterion = TuningCriterion(strategy='basic') + conf = PostTrainingQuantConfig(approach="static", backend="tensorflow", + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_dataloader=dataloader) + self.assertIsNotNone(q_model) if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_sigopt.py b/test/strategy/test_sigopt.py index ce7a7669862..062c96b638c 100644 --- a/test/strategy/test_sigopt.py +++ b/test/strategy/test_sigopt.py @@ -104,7 +104,7 @@ def build_fake_model(): return graph @unittest.skipIf(CONDITION , "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") -class TestQuantization(unittest.TestCase): +class TestSigoptTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -140,6 +140,30 @@ def test_run_basic_max_trials(self): quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() + + def test_run_sigopt_one_trial_new_api(self): + from neural_compressor.quantization import fit + from neural_compressor.config import AccuracyCriterion, AccuracyLoss, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.experimental.common import DataLoader + from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset + + # dataset and dataloader + dataset = DummyDataset(shape=(100, 3, 3, 1), label=True) + dataloader = DataLoader(dataset) + + # tuning and accuracy criterion + tolerable_loss = AccuracyLoss(0.01) + accuracy_criterion = AccuracyCriterion(criterion='relative', tolerable_loss=tolerable_loss) + strategy_kwargs = {'sigopt_api_token': 'sigopt_api_token_test', + 'sigopt_project_id': 'sigopt_project_id_test', + 'sigopt_experiment_name': 'nc-tune'} + tuning_criterion = TuningCriterion(strategy='sigopt', strategy_kwargs=strategy_kwargs, max_trials=3) + conf = PostTrainingQuantConfig(approach="static", backend="tensorflow", + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_dataloader=dataloader) + self.assertIsNotNone(q_model) + if __name__ == "__main__": unittest.main() From 1f5c859f5e933395a050143c385a95ae1143a7e9 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 16:42:40 +0800 Subject: [PATCH 098/128] remove the line for debug Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index 79f50237051..8b227697086 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -1355,7 +1355,6 @@ def map_pyconfig_to_cfg(self, pythonic_config): 'accuracy_weight', 'latency_weight']: if st_key in st_kwargs: st_val = st_kwargs[st_key] - print(st_key) mapping.update({'tuning.strategy.' + st_key: st_val}) if pythonic_config.distillation is not None: From fe03b257c38b0b902333e2271056873e415f9c19 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA Date: Tue, 6 Dec 2022 16:57:38 +0800 Subject: [PATCH 099/128] delete some unused code --- neural_compressor/strategy/hawq.py | 561 +---------------------------- 1 file changed, 2 insertions(+), 559 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index c1ce91e0ca0..4ba5ed7db8f 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -34,523 +34,6 @@ import logging logger = logging.getLogger(__name__) from typing import Dict, List, Optional, Any, Union, Callable, Set -# # Define Collector based on hook, which is used to record the intermediate result -# class Node_collector: -# def __init__(self, m): -# self.handle = m.register_forward_hook(self.hook_fn_act) -# def hook_fn_act(self, m, inp, outp): -# self.out_features = outp.clone() -# self.in_features = inp -# self.m = m -# def remove(self): -# self.handle.remove() -# class HessianTrace: -# """ -# please refer to -# Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." 2020 IEEE international conference on big data (Big data). IEEE, 2020. -# Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." Advances in neural information processing systems 33 (2020): 18518-18529. -# https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py -# """ - -# def __init__(self, model, dataloader,q_model,criterion=None): -# self.unfused_model = model.model -# self.q_model=q_model -# tmp_model=model.model -# if 'graph' in (str(dir(tmp_model))): #check the attribute and it's length -# logger.info("This is aready fused model") -# self.model=model.model -# else: -# logger.info("fusing model") -# self.model = fuse_fx(model.model) ##TODO need to check whether model has been already fused -# self.dataloader = dataloader -# self.max_iter = 500 -# self.tolerance = 1e-5 -# self.eps = 1e-6 -# self.index = 0 -# self.device = self.get_device(self.model) -# self.criterion = criterion -# if self.criterion == None: -# self.criterion = torch.nn.CrossEntropyLoss().to(self.device) ##TODO need to set in config -# self.criterion = self.criterion.to(self.device) -# self.weight_to_op, self.op_list = self.get_fused_mapping() -# self.get_params() - -# def is_fused_module(self, module): -# """This is a helper function for `_propagate_qconfig_helper` to detecte -# if this module is fused. -# Args: -# module (object): input module -# Returns: -# (bool): is fused or not -# """ -# op_type = str(type(module)) -# if 'fused' in op_type: -# return True -# else: -# return False - -# def mapping_module_to_op(self, name): -# # length = len("_model.") -# # if len(name) < length: -# # return name -# # else: -# return name -# def mse_metric_gap(self,fp32_tensor, dequantize_tensor): -# """Calculate the euclidean distance between fp32 tensor and int8 dequantize tensor -# Args: -# fp32_tensor (tensor): The FP32 tensor. -# dequantize_tensor (tensor): The INT8 dequantize tensor. -# """ -# fp32_max = np.max(fp32_tensor) -# fp32_min = np.min(fp32_tensor) -# dequantize_max = np.max(dequantize_tensor) -# dequantize_min = np.min(dequantize_tensor) -# fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) -# dequantize_tensor = (dequantize_tensor - dequantize_min) / \ -# (dequantize_max - dequantize_min) -# diff_tensor = fp32_tensor - dequantize_tensor -# euclidean_dist = np.sum(diff_tensor ** 2) -# return euclidean_dist / fp32_tensor.size -# def get_fused_mapping(self): -# model = self.model -# weights_info = dict(model.named_parameters()) -# weight_to_op = {} -# for op_name, child in model.named_modules(): -# if self.is_fused_module(child): -# for name, _ in child.named_children(): -# if op_name + "." + name + ".weight" in weights_info: ##TODO check if this is right - -# weight_to_op[op_name + "." + name + ".weight"] = self.mapping_module_to_op(op_name) -# break -# else: -# name = op_name + ".weight" -# if name in weights_info and name not in weight_to_op.keys(): -# weight_to_op[op_name + ".weight"] = op_name -# op_list = [] -# for key in weight_to_op.keys(): -# op_list.append(weight_to_op[key]) -# return weight_to_op, op_list - -# def get_device(self, model: torch.nn.Module): -# for n, p in model.named_parameters(): -# return p.data.device - -# def _get_act_grad_hook(self, name): -# def act_grad_hook(model, grad_input, grad_output): -# ##print(name, grad_input[0].shape, grad_output[0].shape) -# if type(model) == torch.nn.Linear: ##TODO very tricky -# self.layer_acts_grads[name] = grad_input[1] -# else: -# self.layer_acts_grads[name] = grad_input[0] - -# return act_grad_hook - -# def _get_enable_act_grad_hook(self, name): -# def enable_act_grad_hook(model, inputs, outputs): -# input = inputs[0] -# if input.requires_grad is False: -# input.requires_grad = True -# self.layer_acts[name] = input - -# return enable_act_grad_hook - -# # def _get_disable_input_grad_hook(self, name): -# # def disable_input_grad_hook(model, inputs, outputs): -# # try: -# # input = inputs[0] ##TODO check whether this is right -# # except: -# # input = inputs -# # if input.is_leaf == False:## you can only change requires_grad flags of leaf variables -# # if input.requires_grad is True: -# # input.requires_grad = False -# # -# # -# # return disable_input_grad_hook - -# def _unregister_hook(self): -# for handel in self.hook_handles: -# handel.remove() - -# def register_act_grad_hooks(self, model): -# for name, module in model.named_modules(): -# if self.mapping_module_to_op(name) in self.op_list: -# hook_handle = module.register_forward_hook(self._get_enable_act_grad_hook(name)) -# self.hook_handles.append(hook_handle) -# hook_handle = module.register_backward_hook(self._get_act_grad_hook(name)) -# self.hook_handles.append(hook_handle) - -# def reset_act_gradient_and_hooks(self): -# # tmp_input = torch.zeros(self._input_shape, device=self.device) -# # for name, module in self.model.named_modules(): -# # if name in self.op_list: -# # hook_handle = module.register_forward_hook(self._get_disable_input_grad_hook(name)) -# # self.hook_handles.append(hook_handle) -# # self.model(tmp_input) -# self._unregister_hook() - -# def get_params(self): -# weight_names = [n for n, p in self.model.named_parameters() if -# p.requires_grad and "bias" not in n] ##remove bias -# params = [p for n, p in self.model.named_parameters() if p.requires_grad and "bias" not in n] ##remove bias -# self.weight_names = weight_names -# self.params = params - -# def forward_backward(self, model, data, create_graph=False, return_w_grad=True): -# model.zero_grad() -# input = data[0].to(self.device) -# ##self._input_shape = input.shape ## for resetting input activation -# target = data[1].to(self.device) -# input.requires_grad = True -# output = model(input) -# loss = self.criterion(output, target) -# torch.autograd.backward(loss, create_graph=create_graph) -# ##loss.backward(create_graph=create_graph) -# if return_w_grad: -# gradients = [] -# for n, p in self.model.named_parameters(): -# if p.grad != None and n in self.weight_names: -# gradient = p.grad -# gradients.append(gradient + 0.0) ## add 0 to create a copy -# model.zero_grad() -# return gradients -# else: -# model.zero_grad() - -# # def get_params(self, model): -# # parameters = [p for p in model.parameters() if p.requires_grad] -# # return parameters - -# def sample_rademacher(self, params): -# samples = [] -# for param in params: -# r = torch.randint_like(param, high=2, device=self.device) -# r.masked_fill_(r == 0, -1) -# samples.append(r) -# return samples - -# def get_vtHv_weight(self, params, num_samples): -# v = self.sample_rademacher(params) -# H_v = [0] * len(v) -# cnt = 0 -# for step, data in enumerate(self.dataloader): -# batch_size = data[0].shape[0] -# cnt += batch_size -# gradients = self.forward_backward(self.model, data, create_graph=True) -# H_v_one = torch.autograd.grad(gradients, params, v, only_inputs=True, retain_graph=False) -# H_v = [pre + cur * float(batch_size) for cur, pre in zip(H_v_one, H_v)] -# if cnt >= num_samples: -# break -# if cnt > 0: -# H_v = [item / cnt for item in H_v] -# v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better -# return v_t_H_v - -# def get_vtHv_act(self, params, num_samples): -# v = self.sample_rademacher(params) -# H_v = [0] * len(v) -# cnt = 0 -# for step, data in enumerate(self.dataloader): -# if cnt >= num_samples: -# break -# for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 -# input = data[0][i:i + 1] -# target = data[1][i:i + 1] - -# self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) -# layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] -# layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] -# hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) -# cnt += 1 -# if cnt >= num_samples: -# break - -# def get_weight_traces(self, num_samples): -# layer_traces_per_iter = [] -# prev_avg_model_trace = 0 -# for iter in range(self.max_iter): -# layer_traces = self.get_vtHv_weight(self.params, num_samples) -# layer_traces_per_iter.append(layer_traces) -# layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) -# model_trace = torch.sum(layer_traces_estimate) -# diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) -# if diff_ratio < self.tolerance and iter > 10: ##TODO magic number -# break -# if iter == 50: ##TODO for debug -# break -# prev_avg_model_trace = model_trace -# weight_name_to_traces = {} -# layer_traces = layer_traces_estimate -# for weight_name, trace in zip(self.weight_names, layer_traces): -# weight_name_to_traces[weight_name] = trace -# op_name_to_trace = {} -# for weight_name in self.weight_names: -# op_name = self.weight_to_op[weight_name] -# op_name_to_trace[op_name] = weight_name_to_traces[weight_name] -# return op_name_to_trace -# def get_act_traces(self, num_samples): -# unfused_training = self.unfused_model.training -# self.unfused_model.eval() -# self.hook_handles = [] -# self.layer_acts = {} -# self.layer_acts_grads = {} -# self.register_act_grad_hooks(self.unfused_model) -# cnt = 0 -# act_traces_per_sample = [] -# for step, data in enumerate(self.dataloader): -# if cnt >= num_samples: -# break -# bs = data[0].shape[0] -# act_traces_sum = 0 -# act_traces_per_iter = [] -# prev_avg_model_trace = 0 -# act_traces_sums = None -# for i in range(bs): ##force the bs to be one -# input = data[0][i:i + 1] -# target = data[1][i:i + 1] -# self.forward_backward(self.unfused_model, (input, target), create_graph=True, return_w_grad=False) -# acts = [self.layer_acts[key] for key in self.layer_acts.keys()] -# if act_traces_sums == None: -# act_traces_sums = [0] * len(acts) -# acts_grad = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] ##same order with acts -# vt_H_v_sum_per_act = [0] * len(acts) - -# prev_model_act_trace = 0 -# for iter in range(self.max_iter): -# v = self.sample_rademacher(acts) -# H_v = torch.autograd.grad(acts_grad, acts, v, only_inputs=True, retain_graph=True) -# vt_H_v = [torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)] - -# vt_H_v_sum_per_act = [vt_H_v_sum_per_act[index] + vt_H_v[index] for index, item in -# enumerate(vt_H_v_sum_per_act)] -# vt_H_v_mean_per_act = [item / (iter + 1) for item in vt_H_v_sum_per_act] -# current_model_act_trace = torch.mean(torch.stack(vt_H_v_mean_per_act)) - -# diff_ratio = abs(current_model_act_trace - prev_model_act_trace) / ( -# prev_model_act_trace + self.eps) -# if diff_ratio < self.tolerance and iter > 10: ##TODO magic number -# break -# if iter == 50: ##TODO for debug -# break - -# prev_model_act_trace = current_model_act_trace -# act_traces_per_sample.append(vt_H_v_mean_per_act) -# cnt += 1 -# if cnt >= num_samples: -# break - -# if unfused_training: -# self.unfused_model.train() -# self.reset_act_gradient_and_hooks() ##TODO have issues to reset the input grad to False -# act_traces_stack = torch.stack([torch.stack(item) for item in act_traces_per_sample]) -# act_traces = torch.mean(act_traces_stack, dim=0) -# res_dict = {} -# for index, key in enumerate(self.layer_acts.keys()): -# res_dict[key] = act_traces[index] - -# self.layer_acts = [] -# self.layer_acts_grads = [] -# return res_dict -# def insert_hook(self, model, target_module_list): -# intern_outputs = [] -# for layer,module in model.named_modules(): -# for target_module in target_module_list: -# # print("layer:",layer) -# # print("target_model:",target_module) -# if layer == target_module: -# logging.debug("Collect: %s" % (module)) -# # print("Collect: %s" % (module)) -# intern_outputs.append(Node_collector(module)) - -# logging.info("Total %d hook inserted" % (len(intern_outputs))) -# # print("Total %d hook inserted" % (len(intern_outputs))) -# return model, intern_outputs -# def insert_hook_quantize(self,model, target_module_list): -# intern_outputs = [] -# for layer,module in model.named_modules(): -# for target_module in target_module_list: -# # print("layer:",layer) -# length = len("_model.") -# new_key = layer[length:] -# # print("target_model:",target_module) -# if new_key == target_module: -# logging.debug("Collect: %s" % (module)) -# # print("Collect: %s" % (module)) -# intern_outputs.append(Node_collector(module)) -# logging.info("Total %d hook inserted" % (len(intern_outputs))) -# # print("Total %d hook inserted" % (len(intern_outputs))) -# return model, intern_outputs -# def get_act_gap(self,fp32_model,q_model): -# """ -# Estimates each activation gap between quantized model and float model -# """ -# self.handle_acts=[] -# fp32_model.eval() -# # temp_model = fuse_fx(fp32_model.model) -# temp_model=fp32_model -# # target_module_list = [nn.ReLU] # Insert hook for FP32 model -# target_module_list = self.op_list -# temp_model, intern_outputs =self.insert_hook(temp_model, target_module_list) -# # intern_outputs={} -# for input, target in self.dataloader: -# temp_model(input) -# break - -# fp32_act_out={} -# for i, intern_output in enumerate(intern_outputs): -# stat_features = intern_output.out_features.view(-1) -# # print ("No.", i, " ", intern_output.out_features.shape) -# # print ("Numpy No.", i, " ", intern_output.out_features.cpu().data.numpy().shape) -# # print ("No.", i, " ", stat_features.cpu().data.numpy().shape) -# # print ("Numpy No.", i, " ", stat_features.cpu().data.numpy()) -# fp32_act_out[target_module_list[i]]=stat_features.cpu().data.numpy() -# # break -# for i in intern_outputs: -# # print(i) -# i.remove() -# target_module_list = self.op_list -# q_model, intern_outputs=self.insert_hook_quantize(q_model, target_module_list) -# for input, target in self.dataloader: #only one sample -# q_model(input) -# break -# qnt_act_out={} -# intern_outputs={} -# for i, intern_output in enumerate(intern_outputs): -# stat_features = intern_output.out_features.view(-1) -# qnt_act_out[target_module_list[i]]=stat_features.dequantize().cpu().data.numpy() -# # break -# for i in intern_outputs: -# # print(i) -# i.remove() -# act_gap={} -# mse_gap={} -# for fp_i,int_i in zip(fp32_act_out,qnt_act_out): -# activation_qnt_error=fp32_act_out[fp_i]-qnt_act_out[int_i] -# mse_gap[fp_i]=self.mse_metric_gap(fp32_act_out[fp_i],qnt_act_out[int_i]) -# act_gap[fp_i]=np.sum(activation_qnt_error)/activation_qnt_error.size -# return act_gap,mse_gap -# def get_avg_traces(self, enable_act=True, num_samples=32): -# """ -# Estimates average hessian trace for each parameter -# """ -# assert num_samples > 0 -# traces = {} -# weight_traces = self.get_weight_traces(num_samples) -# traces['weight'] = weight_traces -# act_trace={} -# if enable_act: -# act_gap,mse_gap=self.get_act_gap(self.model,self.q_model) -# act_traces = self.get_act_traces(num_samples) -# for i,j in zip(act_traces,mse_gap): -# #currently use mse to analysis -# act_trace[i]=act_traces[i]+mse_gap[j] -# traces['activation'] = act_traces -# return traces - - -# ##copy from torch.quantization._numeric_suite -# def _find_match( -# str_list: Union[Dict[str, Any], List[str]], key_str: str, -# postfix: str, -# ) -> Optional[str]: -# split_str = key_str.split(".") -# if split_str[-1] == postfix: -# match_string = "".join(key_str.split(".")[0:-1]) -# for s2 in str_list: -# pattern1 = "".join(s2.split(".")[0:-1]) -# pattern2 = "".join(s2.split(".")[0:-2]) -# if match_string == pattern1: -# return s2 -# if match_string == pattern2: -# return s2 - -# # For matching "fc.weight" and "fc._packed_params._packed_params" -# if postfix == "_packed_params": -# match_string = "".join(key_str.split(".")[0:-2]) -# if len(match_string) == 0: -# return None -# for s2 in str_list: -# pattern1 = "".join(s2.split(".")[0:-1]) -# pattern2 = "".join(s2.split(".")[0:-2]) -# if match_string == pattern1: -# return s2 -# if match_string == pattern2: -# return s2 -# return None -# else: -# return None - - -# ##copy form torch.quantization._numeric_suite -# def compare_weights( -# float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] -# ) -> Dict[str, Dict[str, torch.Tensor]]: -# r"""Compare the weights of the float module with its corresponding quantized -# module. Return a dict with key corresponding to module names and each entry being -# a dictionary with two keys 'float' and 'quantized', containing the float and -# quantized weights. This dict can be used to compare and compute the quantization -# error of the weights of float and quantized models. - -# Example usage:: - -# wt_compare_dict = compare_weights( -# float_model.state_dict(), qmodel.state_dict()) -# for key in wt_compare_dict: -# print( -# key, -# compute_error( -# wt_compare_dict[key]['float'], -# wt_compare_dict[key]['quantized'].dequantize() -# ) -# ) - -# Args: -# float_dict: state dict of the float model -# quantized_dict: state dict of the quantized model - -# Return: -# weight_dict: dict with key corresponding to module names and each entry being -# a dictionary with two keys 'float' and 'quantized', containing the float and -# quantized weights -# """ - -# weight_dict: Dict[str, Dict] = {} -# for key in quantized_dict: -# match_key = _find_match(float_dict, key, "weight") -# if match_key is not None: -# weight_dict[key] = {} -# weight_dict[key]["float"] = float_dict[match_key] -# weight_dict[key]["quantized"] = quantized_dict[key] -# continue - -# # For matching "fc.weight" and "fc._packed_params._packed_params" -# match_key = _find_match(float_dict, key, "_packed_params") -# if match_key is not None: -# weight_dict[match_key] = {} -# weight_dict[match_key]["float"] = float_dict[match_key] -# weight_dict[match_key]["quantized"] = quantized_dict[key][0] -# ##TODO:should consider more models in further work - -# # For LSTM -# split_str = key.split(".") -# if split_str[-1] == "param" and split_str[-3] == "_all_weight_values": -# layer = split_str[-2] -# module_name = ".".join(split_str[:-3]) -# float_weight_ih_key = module_name + ".weight_ih_l" + layer -# float_weight_hh_key = module_name + ".weight_hh_l" + layer -# if float_weight_ih_key in float_dict and float_weight_hh_key in float_dict: -# weight_dict[key] = {} -# weight_dict[key]["float"] = float_dict[float_weight_ih_key] -# weight_dict[key]["quantized"] = ( -# quantized_dict[key].__getstate__()[0][4][0].__getstate__()[0][0] -# ) -# weight_dict[key]["float"] = float_dict[float_weight_hh_key] -# weight_dict[key]["quantized"] = ( -# quantized_dict[key].__getstate__()[0][4][1].__getstate__()[0][0] -# ) - -# return weight_dict - - @strategy_registry class HawqTuneStrategy(TuneStrategy): """The basic tuning strategy which tunes the low precision model with below order. @@ -640,12 +123,6 @@ def next_tune_cfg(self): yield op_tuning_cfg # Start compute the hessian trace - - # import torch.quantization._numeric_suite as ns - # self.model.eval() - # fused_model = fuse_fx(self.model.model) - # res = compare_weights(fused_model.state_dict(), self.q_model.state_dict()) - # Fallback the ops supported both static and dynamic from static to dynamic quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] @@ -657,47 +134,13 @@ def next_tune_cfg(self): # print(n) # for n, p in self._fp32_model.named_parameters(): # print(n) - # orig_eval = True - # if self._fp32_model.training: - # orig_eval = False - # self._fp32_model.eval() - # ht = HessianTrace(self._fp32_model, self.calib_dataloader,self.q_model) - # q_model_state_dict = {} - # for key in self.q_model.state_dict().keys(): - # length = len("_model.") - # new_key = key[length:] - # q_model_state_dict[new_key] = self.q_model.state_dict()[key] - # weight_quant_loss = compare_weights(ht.model.state_dict(), q_model_state_dict) - # pertur_lst = {} - # for key in weight_quant_loss: - # op_float_tensor = weight_quant_loss[key]['float'] - # op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() - # diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) # Formula: L2=||Q(w)-w||p^2 - # pertur_lst[key] = diff_l2 - # self.enable_act=False #enable activation trace and quantization loss analysis feature - # traces = ht.get_avg_traces(self.enable_act) - # op_to_traces = traces['weight'] - # if self.enable_act: - # act_to_traces=traces['activation'] - # #TODO() optimize relationship of weights quantized loss and activation quantized loss, to find best conbine - # #TODO() do double check why layer1's output is not 0 for activation quantized - # for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): - # op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] #Formula:Omig=Trace*L2+act_trace - # else: - # for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): - # op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 - # if orig_eval == False: - # self._fp32_model.train() - - # End compute the hessian trace # # TODO uncomment it when algo ready. - criterion=torch.nn.CrossEntropyLoss()# TODO replace it with user specify loss + criterion=torch.nn.CrossEntropyLoss() op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, dataloader = self.calib_dataloader, q_model = self.q_model, - criterion =criterion, + criterion =criterion, # TODO replace it with user specify loss enable_act = False) - # op_to_traces = self.adaptor.calculate_hessian_trace() ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) From be4f5a284fb540788dd0ecac4695d2b838bed547 Mon Sep 17 00:00:00 2001 From: BiaoFangAIA Date: Tue, 6 Dec 2022 16:59:29 +0800 Subject: [PATCH 100/128] enable model.eval() first --- neural_compressor/adaptor/torch_utils/hawq_metric.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py index 465b7f9ca88..2b6a7790eb0 100644 --- a/neural_compressor/adaptor/torch_utils/hawq_metric.py +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -541,15 +541,11 @@ def compare_weights( ) return weight_dict -# op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, - # dataloader = self.calib_dataloader, - # q_model = self.q_model, - # criterion = torch.nn.CrossEntropyLoss(), # TODO replace it with user specify loss - # enable_act = False) def hawq_top(fp32_model,q_model,dataloader,criterion,enable_act): orig_eval=True if fp32_model.training: orig_eval=False + fp32_model.eval() ht=HessianTrace(fp32_model,dataloader=dataloader,q_model=q_model) q_model_state_dict={} for key in q_model.state_dict().keys(): @@ -563,7 +559,7 @@ def hawq_top(fp32_model,q_model,dataloader,criterion,enable_act): op_qnt_tensor=weight_quant_loss[key]['quantized'].dequantize() diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) pertur_lst[key]=diff_l2 - traces=ht.get_act_traces(enable_act) + traces=ht.get_avg_traces(enable_act) op_to_traces=traces['weight'] if enable_act: act_to_traces=traces['activation'] From b0b697c2edab56ed4a978796385b5ccc43fd7da6 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 17:08:23 +0800 Subject: [PATCH 101/128] remove some useless lines Signed-off-by: yiliu30 --- neural_compressor/strategy/hawq.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq.py index 4ba5ed7db8f..c6fc912fd3b 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq.py @@ -123,29 +123,17 @@ def next_tune_cfg(self): yield op_tuning_cfg # Start compute the hessian trace - # Fallback the ops supported both static and dynamic from static to dynamic - quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - - target_dtype = "int8" ##TODO support bf16 - target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) - fp_op_list = [item.name for item in quant_ops if item in target_type_lst] - # for n, p in self._fp32_model.named_modules(): - # print(n) - # for n, p in self._fp32_model.named_parameters(): - # print(n) - # # TODO uncomment it when algo ready. + target_dtype = "int8" # TODO support bf16 criterion=torch.nn.CrossEntropyLoss() op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, dataloader = self.calib_dataloader, q_model = self.q_model, - criterion =criterion, # TODO replace it with user specify loss + criterion =criterion, # TODO using user specify loss enable_act = False) ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], reverse=self.higher_is_better) # WA for add op type - # print("ordered_ops:",ordered_ops) op_info_map = {} for op_info in list(initial_op_tuning_cfg.keys()): op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) @@ -158,7 +146,7 @@ def next_tune_cfg(self): indx=indx+1 if indx>4: break - print(op_dtypes) + logger.info("hawq op_config:"+str(op_dtypes)) logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(op_tuning_cfg) From 9633ebd08604573e4de80a59ded14bb8c006e7d2 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 21:07:01 +0800 Subject: [PATCH 102/128] fixed some uts Signed-off-by: yiliu30 --- test/strategy/test_basic.py | 9 ++++----- test/strategy/test_sigopt.py | 7 +++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/test/strategy/test_basic.py b/test/strategy/test_basic.py index 239f26a071a..0a2812b5f79 100644 --- a/test/strategy/test_basic.py +++ b/test/strategy/test_basic.py @@ -221,12 +221,11 @@ def test_run_basic_max_trials_multimetric_weight(self): def test_run_basic_one_trial_new_api(self): from neural_compressor.quantization import fit from neural_compressor.config import AccuracyCriterion, AccuracyLoss, PostTrainingQuantConfig, TuningCriterion - from neural_compressor.experimental.common import DataLoader - from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset - + from neural_compressor.data import DATASETS, DATALOADERS + # dataset and dataloader - dataset = DummyDataset(shape=(100, 3, 3, 1), label=True) - dataloader = DataLoader(dataset) + dataset = DATASETS("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion tolerable_loss = AccuracyLoss(0.01) diff --git a/test/strategy/test_sigopt.py b/test/strategy/test_sigopt.py index 062c96b638c..5d443e3dba2 100644 --- a/test/strategy/test_sigopt.py +++ b/test/strategy/test_sigopt.py @@ -144,12 +144,11 @@ def test_run_basic_max_trials(self): def test_run_sigopt_one_trial_new_api(self): from neural_compressor.quantization import fit from neural_compressor.config import AccuracyCriterion, AccuracyLoss, PostTrainingQuantConfig, TuningCriterion - from neural_compressor.experimental.common import DataLoader - from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset + from neural_compressor.data import DATASETS, DATALOADERS # dataset and dataloader - dataset = DummyDataset(shape=(100, 3, 3, 1), label=True) - dataloader = DataLoader(dataset) + dataset = DATASETS("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion tolerable_loss = AccuracyLoss(0.01) From 09931956937c8e7b4b4b2951b21711f27ee0e1fc Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 21:29:33 +0800 Subject: [PATCH 103/128] add optimization_level in BaseQuantizationConfig Signed-off-by: yiliu30 --- neural_compressor/config.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 4accfce4bd0..b3a9fd4352e 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -314,6 +314,7 @@ def __init__(self, performance_only=False, reduce_range=None, extra_precisions=["bf16"], + optimization_level=1, accuracy_criterion=accuracy_criterion): self._inputs = inputs self._outputs = outputs @@ -330,6 +331,7 @@ def __init__(self, self._reduce_range = reduce_range self._extra_precisions = extra_precisions \ if isinstance(extra_precisions, List) else [extra_precisions] + self._optimization_level = optimization_level self.use_bf16 = "bf16" in self._extra_precisions self._accuracy_criterion = accuracy_criterion self._calibration_sampling_size = calibration_sampling_size @@ -348,6 +350,14 @@ def extra_precisions(self, extra_precisions): self._extra_precisions = extra_precisions self._use_bf16 = "bf16" in extra_precisions + @property + def optimization_level(self): + return self._optimization_level + + @optimization_level.setter + def optimization_level(self, optimization_level): + self._optimization_level = optimization_level + @property def reduce_range(self): return self._reduce_range @@ -576,9 +586,9 @@ def __init__(self, max_trials=tuning_criterion.max_trials, reduce_range=reduce_range, extra_precisions=extra_precisions, + optimization_level=optimization_level, accuracy_criterion=accuracy_criterion) self.approach = approach - self.optimization_level = optimization_level @property def approach(self): @@ -599,10 +609,12 @@ def __init__(self, op_type_list=None, op_name_list=None, reduce_range=None, - extra_precisions=["bf16"]): + extra_precisions=["bf16"], + optimization_level=1): super().__init__(inputs=inputs, outputs=outputs, device=device, backend=backend, op_type_list=op_type_list, op_name_list=op_name_list, - reduce_range=reduce_range, extra_precisions=extra_precisions) + reduce_range=reduce_range, extra_precisions=extra_precisions, + optimization_level=optimization_level) self._approach = 'quant_aware_training' @property From 087bdc624c16293e7366909c68d131971ac24eac Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 6 Dec 2022 23:08:17 +0800 Subject: [PATCH 104/128] add optimization_level to conf and pythonic_conf Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 2 ++ neural_compressor/conf/pythonic_config.py | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index 8b227697086..f9039be5a5b 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -841,6 +841,7 @@ def percent_to_float(data): }, }, Optional('use_bf16', default=True): bool, + Optional('optimization_level', default=1): And(int, lambda level: level in [0, 1]), Optional('graph_optimization'): graph_optimization_schema, Optional('mixed_precision'): mixed_precision_schema, @@ -1111,6 +1112,7 @@ def percent_to_float(data): 'activation': {}}, }): dict, Optional('use_bf16', default=False): bool, + Optional('optimization_level', default=1): int, Optional('tuning', default={ 'strategy': {'name': 'basic'}, 'accuracy_criterion': {'relative': 0.01, 'higher_is_better': True}, diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index dbf1d3dc6aa..c9975a9ebc6 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -34,17 +34,33 @@ def __init__(self, op_type_list=None, op_name_list=None, strategy='basic', + strategy_kwargs=None, objective='performance', timeout=0, max_trials=100, performance_only=False, reduce_range=None, use_bf16=True, + optimization_level=1, accuracy_criterion=accuracy_criterion): extra_precisions = ["bf16"] if use_bf16 else [] - super().__init__(inputs, outputs, backend, device, calibration_sampling_size, op_type_list, - op_name_list, strategy, objective, timeout, max_trials, performance_only, - reduce_range, extra_precisions, accuracy_criterion) + super().__init__(inputs=inputs, + outputs=outputs, + backend=backend, + device=device, + calibration_sampling_size=calibration_sampling_size, + op_type_list=op_type_list, + op_name_list=op_name_list, + strategy=strategy, + strategy_kwargs=strategy_kwargs, + objective=objective, + timeout=timeout, + max_trials=max_trials, + performance_only=performance_only, + reduce_range=reduce_range, + extra_precisions=extra_precisions, + optimization_level=optimization_level, + accuracy_criterion=accuracy_criterion) self._approach = approach @property From 75bd44c59418d2e8ba6d7bc2778302362dcb33a1 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 7 Dec 2022 13:00:36 +0800 Subject: [PATCH 105/128] rename test filename Signed-off-by: yiliu30 --- test/strategy/{test_basic_fallback.py => test_hawq_v2.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/strategy/{test_basic_fallback.py => test_hawq_v2.py} (100%) diff --git a/test/strategy/test_basic_fallback.py b/test/strategy/test_hawq_v2.py similarity index 100% rename from test/strategy/test_basic_fallback.py rename to test/strategy/test_hawq_v2.py From 1cc224e109d9f6eb17615e974ae0ef73f432daa3 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 7 Dec 2022 13:41:47 +0800 Subject: [PATCH 106/128] remove some incorrect comments Signed-off-by: yiliu30 --- neural_compressor/config.py | 2 +- .../strategy/{hawq.py => hawq_v2.py} | 11 +--- test/strategy/test_hawq_v2_2.x.py | 56 +++++++++++++++++++ 3 files changed, 60 insertions(+), 9 deletions(-) rename neural_compressor/strategy/{hawq.py => hawq_v2.py} (95%) create mode 100644 test/strategy/test_hawq_v2_2.x.py diff --git a/neural_compressor/config.py b/neural_compressor/config.py index ff3f3aa1772..1e4ce97504d 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -542,7 +542,7 @@ def strategy(self): @strategy.setter def strategy(self, strategy): if check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe']): + ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe', 'hawq_v2']): self._strategy = strategy @property diff --git a/neural_compressor/strategy/hawq.py b/neural_compressor/strategy/hawq_v2.py similarity index 95% rename from neural_compressor/strategy/hawq.py rename to neural_compressor/strategy/hawq_v2.py index c6fc912fd3b..31c4cfa1b30 100644 --- a/neural_compressor/strategy/hawq.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -35,13 +35,8 @@ logger = logging.getLogger(__name__) from typing import Dict, List, Optional, Any, Union, Callable, Set @strategy_registry -class HawqTuneStrategy(TuneStrategy): - """The basic tuning strategy which tunes the low precision model with below order. - - 1. modelwise tuning for all quantizable ops. - 2. fallback tuning from bottom to top to decide the priority of which op has biggest impact - on accuracy. - 3. incremental fallback tuning by fallbacking multiple ops with the order got from #2. +class HAWQ_V2TuneStrategy(TuneStrategy): + """The hawq v2 tuning strategy. Args: model (object): The FP32 model specified for low precision tuning. @@ -88,7 +83,7 @@ def eval_func(model): def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): super( - HawqTuneStrategy, + HAWQ_V2TuneStrategy, self).__init__( model, conf, diff --git a/test/strategy/test_hawq_v2_2.x.py b/test/strategy/test_hawq_v2_2.x.py new file mode 100644 index 00000000000..0ec055c26c3 --- /dev/null +++ b/test/strategy/test_hawq_v2_2.x.py @@ -0,0 +1,56 @@ +"""Tests for HAWQ v2 strategy""" + +import copy +import shutil +import unittest + +import numpy as np + +from neural_compressor.utils import logger + +class TestHAWQV2TuningStrategy(unittest.TestCase): + + @classmethod + def setUpClass(self): + import torchvision + self.model = torchvision.models.resnet18() + + @classmethod + def tearDownClass(self): + shutil.rmtree('saved', ignore_errors=True) + shutil.rmtree('nc_workspace', ignore_errors=True) + + + def test_hawq_v2_pipeline(self): + logger.info("*** Test: HAWQ v2 with pytorch model.") + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import DATASETS, DATALOADERS + + # model + model = copy.deepcopy(self.model) + + # fake evaluation function + self.test_hawq_v2_pipeline_fake_acc = 0 + def _fake_eval(model): + self.test_hawq_v2_pipeline_fake_acc -= 1 + return self.test_hawq_v2_pipeline_fake_acc + + # dataset and dataloader + dataset = DATASETS("pytorch")["dummy"](((1, 3, 224, 224))) + dataloader = DATALOADERS["pytorch"](dataset) + + # tuning and accuracy criterion + tuning_criterion = TuningCriterion(strategy='hawq_v2', max_trials=5) + conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion) + + # fit + q_model = fit(model=model, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_func=_fake_eval) + self.assertIsNone(q_model) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 8390d3afd0e2b564d24562e1bd5bc4c1b85d2637 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 7 Dec 2022 13:43:21 +0800 Subject: [PATCH 107/128] remove UTs based on old API(YAML) Signed-off-by: yiliu30 --- test/strategy/test_hawq_v2.py | 73 ----------------------------------- 1 file changed, 73 deletions(-) delete mode 100644 test/strategy/test_hawq_v2.py diff --git a/test/strategy/test_hawq_v2.py b/test/strategy/test_hawq_v2.py deleted file mode 100644 index fef994a4f1b..00000000000 --- a/test/strategy/test_hawq_v2.py +++ /dev/null @@ -1,73 +0,0 @@ -import torch -import unittest -import os -import sys -import copy -import torchvision -import torchvision.transforms as transforms -from torch.utils.data import DataLoader -from neural_compressor.data import DATASETS -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.adaptor.pytorch import TemplateAdaptor -from neural_compressor.adaptor import FRAMEWORKS -import shutil - - -def build_ptq_yaml(): - fake_yaml = ''' - model: - name: resnet18 - framework: pytorch_fx - tuning: - strategy: - name: hawq - accuracy_criterion: - absolute: -1 - exit_policy: - timeout: 0 - ''' - with open('ptq_yaml.yaml', 'w', encoding="utf-8") as f: - f.write(fake_yaml) - -class TestPytorchAdaptor(unittest.TestCase): - framework_specific_info = {"device": "cpu", - "approach": "post_training_static_quant", - "random_seed": 1234, - "q_dataloader": None, - "workspace_path": None} - framework = "pytorch" - adaptor = FRAMEWORKS[framework](framework_specific_info) - model = torchvision.models.resnet18() - - # model = torch.quantization.QuantWrapper(model) - - @classmethod - def setUpClass(self): - self.i = 0 - build_ptq_yaml() - - - @classmethod - def tearDownClass(self): - os.remove('ptq_yaml.yaml') - shutil.rmtree('./saved', ignore_errors=True) - shutil.rmtree('runs', ignore_errors=True) - - def test_basic_fallback(self): - def eval_func(model): - self.i -= 1 - return self.i - - from neural_compressor.experimental import Quantization, common - model = copy.deepcopy(self.model) - quantizer = Quantization('ptq_yaml.yaml') - quantizer.eval_func = eval_func - dataset = quantizer.dataset('dummy', (1, 3, 224, 224), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = model - q_model = quantizer() - self.assertTrue(q_model is None) - -if __name__ == "__main__": - unittest.main() From 73c634f74bbc24c9e82c881e5a39837204be5589 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 7 Dec 2022 13:48:51 +0800 Subject: [PATCH 108/128] remove some unused code Signed-off-by: yiliu30 --- neural_compressor/strategy/hawq_v2.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 31c4cfa1b30..500f1727e69 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -15,28 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy -import numpy as np from collections import OrderedDict - -import torch.nn +from copy import deepcopy from .strategy import strategy_registry, TuneStrategy -from ..utils import logger from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler from .st_utils.tuning_structs import OpTuningConfig from .st_utils.tuning_space import TUNING_ITEMS_LST -from torch.quantization.quantize_fx import fuse_fx -import torch.nn.intrinsic.quantized as nniq -from torch.fx import symbolic_trace, graph_module -import torch.nn as nn -import logging -logger = logging.getLogger(__name__) -from typing import Dict, List, Optional, Any, Union, Callable, Set +from ..utils import logger + @strategy_registry class HAWQ_V2TuneStrategy(TuneStrategy): - """The hawq v2 tuning strategy. + """The HAWQ v2 tuning strategy. Args: model (object): The FP32 model specified for low precision tuning. @@ -95,7 +86,8 @@ def __init__(self, model, conf, q_dataloader, q_func=None, q_hooks) def next_tune_cfg(self): - from copy import deepcopy + # TODO remove it before merge + import torch tuning_space = self.tuning_space calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] ##TODO suppoprt list From 2aabc2c60efe546852d563850e6302ea3f53ad8d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 7 Dec 2022 13:50:59 +0800 Subject: [PATCH 109/128] add some comments Signed-off-by: yiliu30 --- neural_compressor/strategy/hawq_v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 500f1727e69..5ac62641d44 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -110,7 +110,8 @@ def next_tune_cfg(self): yield op_tuning_cfg # Start compute the hessian trace - target_dtype = "int8" # TODO support bf16 + target_dtype = "int8" + # TODO remove it before merge criterion=torch.nn.CrossEntropyLoss() op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, dataloader = self.calib_dataloader, From 4e7a4a809eb763de9abae3b4da1569cb0ad3d243 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 7 Dec 2022 19:07:24 +0800 Subject: [PATCH 110/128] WA for mapping op Signed-off-by: yiliu30 --- neural_compressor/strategy/hawq_v2.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 5ac62641d44..43f300ef488 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -108,8 +108,8 @@ def next_tune_cfg(self): break op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg - # Start compute the hessian trace + logger.info(f"************** Start compute the hessian trace *****************") target_dtype = "int8" # TODO remove it before merge criterion=torch.nn.CrossEntropyLoss() @@ -118,15 +118,31 @@ def next_tune_cfg(self): q_model = self.q_model, criterion =criterion, # TODO using user specify loss enable_act = False) - ordered_ops = sorted(op_to_traces.keys(), - key=lambda key: op_to_traces[key], + sorted_op_to_traces = dict(sorted(op_to_traces.items(), key=lambda item: item[1], reverse=True)) + logger.info(f"************** Hessian Trace *****************") + for op_name, trace in sorted_op_to_traces.items(): + logger.info(f"*** op: {op_name}, hessian trace : {trace}") + logger.info(f"************************************************") + # WA for op mapping + ordered_ops_tmp = {} + for op_info in list(initial_op_tuning_cfg.keys()): + op_name, op_type = op_info + for op_trace_name in op_to_traces.keys(): + if isinstance(op_trace_name, str) and op_trace_name.startswith(op_name): + if op_name in ordered_ops_tmp: + logger.info((f"*** Already assigned the hessian trace to {op_name}", + f"update it with the value of {op_trace_name}")) + ordered_ops_tmp[op_name] = op_to_traces[op_trace_name] + + ordered_ops_tmp = sorted(ordered_ops_tmp.keys(), + key=lambda key: ordered_ops_tmp[key], reverse=self.higher_is_better) # WA for add op type op_info_map = {} for op_info in list(initial_op_tuning_cfg.keys()): op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) - tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops] - op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops))) + tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops_tmp] + op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops_tmp))) indx=0 #defautly fallback 5 ops for i in op_dtypes.keys(): From a3255bde465be9a2fd147b3bca22d3ef9ac7e848 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 7 Dec 2022 19:22:08 +0800 Subject: [PATCH 111/128] add efficientnet_b3_fx for test Signed-off-by: yiliu30 --- examples/.config/model_params_pytorch.json | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/examples/.config/model_params_pytorch.json b/examples/.config/model_params_pytorch.json index 16e03ea3bbb..184fdcefd50 100644 --- a/examples/.config/model_params_pytorch.json +++ b/examples/.config/model_params_pytorch.json @@ -14,10 +14,19 @@ "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", "input_model": "", "yaml": "conf.yaml", - "strategy": "basic", + "strategy": "hawq_v2", "batch_size": 100, "new_benchmark": false }, + "efficientnet_b3_fx": { + "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", + "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", + "input_model": "", + "yaml": "conf.yaml", + "strategy": "hawq_v2", + "batch_size": 100, + "new_benchmark": false + }, "resnet18_fx": { "model_src_dir": "image_recognition/torchvision_models/quantization/ptq/cpu/fx/", "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", From 5a36c596cef158e57126cd7a4512d745d68aab3d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 8 Dec 2022 10:07:43 +0800 Subject: [PATCH 112/128] support for adding hawq_v2 loss by new API Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 5 +++-- neural_compressor/strategy/hawq_v2.py | 17 ++++++++--------- test/strategy/test_hawq_v2_2.x.py | 14 +++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index a72a50e782a..1ce7b0c8c11 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -861,7 +861,8 @@ def percent_to_float(data): Optional('sigopt_project_id'): str, Optional('sigopt_experiment_name', default='nc-tune'): str, Optional('accuracy_weight', default=1.0): float, - Optional('latency_weight', default=1.0): float + Optional('latency_weight', default=1.0): float, + Optional('hawq_v2_loss', default=None): object, } , Hook('accuracy_criterion', handler=_valid_accuracy_field): object, Optional('accuracy_criterion', default={'relative': 0.01}): { @@ -1354,7 +1355,7 @@ def map_pyconfig_to_cfg(self, pythonic_config): if pythonic_config.quantization.strategy_kwargs: st_kwargs = pythonic_config.quantization.strategy_kwargs for st_key in ['sigopt_api_token', 'sigopt_project_id', 'sigopt_experiment_name', \ - 'accuracy_weight', 'latency_weight']: + 'accuracy_weight', 'latency_weight', 'hawq_v2_loss']: if st_key in st_kwargs: st_val = st_kwargs[st_key] mapping.update({'tuning.strategy.' + st_key: st_val}) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 5ac62641d44..2d467787501 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -86,10 +86,8 @@ def __init__(self, model, conf, q_dataloader, q_func=None, q_hooks) def next_tune_cfg(self): - # TODO remove it before merge - import torch tuning_space = self.tuning_space - calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] ##TODO suppoprt list + calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] # Initialize the tuning config for each op according to the quantization approach op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() @@ -111,12 +109,13 @@ def next_tune_cfg(self): # Start compute the hessian trace target_dtype = "int8" - # TODO remove it before merge - criterion=torch.nn.CrossEntropyLoss() - op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, - dataloader = self.calib_dataloader, - q_model = self.q_model, - criterion =criterion, # TODO using user specify loss + hawq_v2_criterion =self.cfg.tuning.strategy.hawq_v2_loss + assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ + Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." + op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, + dataloader = self.calib_dataloader, + q_model = self.q_model, + criterion =hawq_v2_criterion, enable_act = False) ordered_ops = sorted(op_to_traces.keys(), key=lambda key: op_to_traces[key], diff --git a/test/strategy/test_hawq_v2_2.x.py b/test/strategy/test_hawq_v2_2.x.py index 0ec055c26c3..8442154acb5 100644 --- a/test/strategy/test_hawq_v2_2.x.py +++ b/test/strategy/test_hawq_v2_2.x.py @@ -4,10 +4,13 @@ import shutil import unittest -import numpy as np - from neural_compressor.utils import logger +# loss function for hawq-v2 +def hawq_v2_loss(output, target): + import torch + return torch.nn.CrossEntropyLoss()(output, target) + class TestHAWQV2TuningStrategy(unittest.TestCase): @classmethod @@ -39,9 +42,10 @@ def _fake_eval(model): # dataset and dataloader dataset = DATASETS("pytorch")["dummy"](((1, 3, 224, 224))) dataloader = DATALOADERS["pytorch"](dataset) - - # tuning and accuracy criterion - tuning_criterion = TuningCriterion(strategy='hawq_v2', max_trials=5) + + #tuning and accuracy criterion + strategy_kwargs = {'hawq_v2_loss': hawq_v2_loss} + tuning_criterion = TuningCriterion(strategy='hawq_v2', strategy_kwargs=strategy_kwargs, max_trials=5) conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion) # fit From 8c7aa58dafa8e722ff9f8f6bbb36de08021cc69d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 8 Dec 2022 10:08:12 +0800 Subject: [PATCH 113/128] remove some WA Signed-off-by: yiliu30 --- neural_compressor/strategy/hawq_v2.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 2d467787501..4f17ed131c2 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -126,15 +126,7 @@ def next_tune_cfg(self): op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops] op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops))) - indx=0 - #defautly fallback 5 ops - for i in op_dtypes.keys(): - op_dtypes[i]="fp32" - indx=indx+1 - if indx>4: - break - logger.info("hawq op_config:"+str(op_dtypes)) logger.info(f"Start to accumulate fallback to {target_dtype}.") initial_op_tuning_cfg = deepcopy(op_tuning_cfg) fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], From 971c723cca1adddc73849cfa426c517370fa518b Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Wed, 7 Dec 2022 13:39:00 +0800 Subject: [PATCH 114/128] Support 'Square', 'Sum', 'SparseSegmentSqrtN' BF16 ops in TensorFlow backend (#223) Signed-off-by: Lv, Liang1 --- neural_compressor/adaptor/tensorflow.yaml | 2 +- .../adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/neural_compressor/adaptor/tensorflow.yaml b/neural_compressor/adaptor/tensorflow.yaml index 62524f544db..256eb4a17bb 100644 --- a/neural_compressor/adaptor/tensorflow.yaml +++ b/neural_compressor/adaptor/tensorflow.yaml @@ -35,7 +35,7 @@ "Erf", "FusedBatchNormV2", "FusedBatchNormGradV2", "FusedBatchNormV3", "FusedBatchNormGradV3", "LeakyRelu", "LeakyReluGrad", "Mean", "Mul", "Sub", "Elu", "EluGrad", "FloorDiv", "_FusedBatchNormEx", "Log", "Log1p", "LogSoftmax", "Prod", "RealDiv", "Reciprocal", "Rsqrt", "Selu", "SeluGrad", "Sigmoid", "SigmoidGrad", "Softmax", "Softplus", "SoftplusGrad", "Softsign", - "SoftsignGrad", "Sqrt", "SquaredDifference", "Tanh", "TanhGrad", #infer_list + "SoftsignGrad", "Sqrt", "Square", "SquaredDifference", "Sum", "Tanh", "TanhGrad", "SparseSegmentSqrtN", # infer_list "Abs", "ArgMax","ArgMin","BatchToSpace","BatchToSpaceND","BroadcastTo","Ceil","CheckNumerics","ClipByValue","Concat","ConcatV2", "DepthToSpace","DynamicPartition","DynamicStitch","EnsureShape","Enter","Equal","Exit","ExpandDims","Fill","Floor","Gather", "GatherNd","GatherV2","Greater","GreaterEqual","Identity","IsFinite","IsInf","IsNan","Less","LessEqual","Max","Maximum","MaxPool", diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py index 1b95f743fc5..40183e427d2 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py @@ -963,12 +963,6 @@ def _is_match_matmul(self, patterns, qdq_inserted=False): self.exclude_matmul_nodes.append(cur_node.name) continue - for i in self.node_name_mapping: - if weight_node.input and not weight_node.input[0].startswith('^') \ - and weight_node.name in self.node_name_mapping[i].output: - self.exclude_matmul_nodes.append(cur_node.name) - continue - for sub_rule in patterns: if sub_rule[0] != "Dequantize": self.exclude_matmul_nodes.append(cur_node.name) From 4e7e7e2082d35a27e7c2c3e2c41a806a121dd382 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Wed, 7 Dec 2022 13:41:11 +0800 Subject: [PATCH 115/128] Support Conv2D + BiasAdd + Relu + Sum fusion (#221) Signed-off-by: Lv, Liang1 --- neural_compressor/adaptor/tensorflow.yaml | 4 ++++ .../tf_utils/quantize_graph/qdq/fuse_qdq_conv.py | 14 ++++++++++---- test/tfnewapi/test_tensorflow_graph_conv_fusion.py | 2 +- .../test_tensorflow_graph_qdq_conv_fusion.py | 2 +- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/neural_compressor/adaptor/tensorflow.yaml b/neural_compressor/adaptor/tensorflow.yaml index 256eb4a17bb..188b5ce00e6 100644 --- a/neural_compressor/adaptor/tensorflow.yaml +++ b/neural_compressor/adaptor/tensorflow.yaml @@ -273,6 +273,10 @@ 'Dequantize + Conv2D + BiasAdd + LeakyRelu + Add + QuantizeV2', 'Dequantize + Conv2D + LeakyRelu + AddV2 + QuantizeV2', 'Dequantize + Conv2D + LeakyRelu + Add + QuantizeV2', + 'Dequantize + Conv2D + BiasAdd + Relu + AddV2 + QuantizeV2', + 'Dequantize + Conv2D + BiasAdd + Relu + Add + QuantizeV2', + 'Dequantize + Conv2D + Relu + AddV2 + QuantizeV2', + 'Dequantize + Conv2D + Relu + Add + QuantizeV2', 'Dequantize + Conv2D + Add + QuantizeV2', 'Dequantize + Conv2D + AddV2 + QuantizeV2', 'Dequantize + Conv2D + AddV2 + Add + QuantizeV2', diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py index 3db96745ed6..0b1b712a627 100644 --- a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +++ b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py @@ -58,12 +58,16 @@ def __init__(self, **kwargs): 'DequantizeConv2DSigmoidQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeConv2DBiasAddLeakyReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DBiasAddLeakyReluAddQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, + 'DequantizeConv2DBiasAddReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, + 'DequantizeConv2DBiasAddReluAddQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DBiasAddAddLeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DBiasAddAddV2LeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DAddLeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DAddV2LeakyReluQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DLeakyReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DLeakyReluAddQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, + 'DequantizeConv2DReluAddV2QuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, + 'DequantizeConv2DReluAddQuantizeV2': self.apply_newly_conv_biasadd_addn_relu_fusion, 'DequantizeConv2DAddRelu6QuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeConv2DAddReluQuantizeV2': self.apply_newly_conv_biasadd_relu_fusion, 'DequantizeConv2DBiasAddAddRelu6MulMulQuantizeV2': self.apply_conv_biasadd_hardswish_fusion, @@ -1194,7 +1198,9 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): # Dequantize + Conv2D + BiasAdd + AddV2 + Relu6 + QuantizeV2 # Dequantize + Conv2D + BiasAdd + Add + Relu + QuantizeV2 # Dequantize + Conv2D + BiasAdd + LeakyRelu + AddV2 + QuantizeV2 + # Dequantize + Conv2D + BiasAdd + Relu + AddV2(Add) + QuantizeV2 # Dequantize + Conv2D + LeakyRelu + AddV2 + QuantizeV2 + # Dequantize + Conv2D + Relu + AddV2(Add) + QuantizeV2 # Dequantize + Conv2D + Add + Add + Relu + QuantizeV2 # Dequantize + Conv2D + BiasAdd + Add + Relu + QuantizeV2 skip_node_name = match_node_name[2:] @@ -1236,8 +1242,8 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) forth_node = self.node_name_mapping[match_node_name[4]].node - if forth_node.op != 'LeakyRelu': - if third_node.op != 'LeakyRelu' and not self._find_relu_node(matched_node.node): + if forth_node.op not in ('LeakyRelu', 'Relu'): + if third_node.op not in ('LeakyRelu', 'Relu') and not self._find_relu_node(matched_node.node): return self.apply_newly_conv_biasadd_fusion(match_node_name[:3] + [match_node_name[-1]]) is_leakyrelu_add_fusion = third_node.op == 'LeakyRelu' and forth_node.op.find('Add') != -1 @@ -1251,7 +1257,7 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): sum_node_name = self.node_name_mapping[match_node_name[3 + relu_offset]].node.input[sum_index] deq_node = self.node_name_mapping[sum_node_name].node - if (deq_node.op != 'LeakyRelu' and deq_node.op != 'Dequantize') or \ + if (deq_node.op != 'LeakyRelu' and deq_node.op != 'Dequantize' and deq_node.op != 'BiasAdd') or \ deq_node.op.find("Quantize") != -1: return self.apply_newly_conv_biasadd_fusion(match_node_name[:3]+[match_node_name[-1]]) @@ -1350,7 +1356,7 @@ def apply_newly_conv_biasadd_addn_relu_fusion(self, match_node_name): self.add_output_graph_node(quantized_conv_node) - if is_leakyrelu_add_fusion or is_leakyrelu: + if is_leakyrelu_add_fusion or is_leakyrelu or is_relu_add_fusion: quantize_down_name = self._add_quantize_down_nodes( node, quantized_node_name, dtypes.qint8, False) self._intel_cpu_add_dequantize_result_node( diff --git a/test/tfnewapi/test_tensorflow_graph_conv_fusion.py b/test/tfnewapi/test_tensorflow_graph_conv_fusion.py index 09a595be4a9..e5402c910fa 100644 --- a/test/tfnewapi/test_tensorflow_graph_conv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_conv_fusion.py @@ -348,7 +348,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): for i in output_graph.graph_def.node: if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Dequantize']: + i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Relu', b'Requantize']: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py index 981bdbee29a..cb25dffd52b 100644 --- a/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py +++ b/test/tfnewapi/test_tensorflow_graph_qdq_conv_fusion.py @@ -317,7 +317,7 @@ def test_conv_biasadd_addv2_relu_fallback_fusion_1(self): for i in output_graph.graph_def.node: if i.op == '_FusedQuantizedConv2D' and \ - i.attr['fused_ops'].list.s == [b'BiasAdd', b'Dequantize']: + i.attr['fused_ops'].list.s == [b'BiasAdd', b'Sum', b'Relu', b'Requantize']: found_conv_fusion = True break self.assertEqual(found_conv_fusion, True) From 620c5f1aa25c663a580d648acf10c094679122a0 Mon Sep 17 00:00:00 2001 From: "chen, suyue" Date: Wed, 7 Dec 2022 15:17:44 +0800 Subject: [PATCH 116/128] update azure pipeline (#229) Signed-off-by: chensuyue --- .azure-pipelines/model-test.yml | 2 +- .azure-pipelines/scripts/ut/run_basic_adaptor.sh | 11 ++++++----- .../scripts/ut/run_basic_adaptor_tfnewapi.sh | 11 ++++++----- .azure-pipelines/scripts/ut/run_basic_ipex.sh | 11 ++++++----- .azure-pipelines/scripts/ut/run_basic_itex.sh | 11 ++++++----- .azure-pipelines/scripts/ut/run_basic_others.sh | 11 ++++++----- .azure-pipelines/scripts/ut/run_ncoder.sh | 8 +++++--- .azure-pipelines/scripts/ut/run_ux.sh | 8 +++++--- 8 files changed, 41 insertions(+), 32 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index 512fbe158cb..270a245bb65 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -45,7 +45,7 @@ parameters: - ssd_mobilenet_v1_ckpt # - ssd_resnet50_v1_ckpt - inception_v1 - - resnet50_fashion + # - resnet50_fashion - darknet19 - densenet-121 - resnet-101 diff --git a/.azure-pipelines/scripts/ut/run_basic_adaptor.sh b/.azure-pipelines/scripts/ut/run_basic_adaptor.sh index d9a9fd2d990..d5510bbd177 100644 --- a/.azure-pipelines/scripts/ut/run_basic_adaptor.sh +++ b/.azure-pipelines/scripts/ut/run_basic_adaptor.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic adaptor" @@ -23,11 +22,13 @@ ut_log_name=${LOG_DIR}/ut_tf_${tensorflow_version}_pt_${pytorch_version}.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.adaptor -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_basic_adaptor_tfnewapi.sh b/.azure-pipelines/scripts/ut/run_basic_adaptor_tfnewapi.sh index 2b687e633d3..ebd861efeb2 100644 --- a/.azure-pipelines/scripts/ut/run_basic_adaptor_tfnewapi.sh +++ b/.azure-pipelines/scripts/ut/run_basic_adaptor_tfnewapi.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic adaptor tfnewapi" @@ -19,11 +18,13 @@ ut_log_name=${LOG_DIR}/ut_tf_newapi.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.tfnewapi -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_basic_ipex.sh b/.azure-pipelines/scripts/ut/run_basic_ipex.sh index 9e22bc01be3..edc2b5d3aeb 100644 --- a/.azure-pipelines/scripts/ut/run_basic_ipex.sh +++ b/.azure-pipelines/scripts/ut/run_basic_ipex.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic ipex" @@ -20,11 +19,13 @@ ut_log_name=${LOG_DIR}/ut_ipex.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.ipex -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " diff --git a/.azure-pipelines/scripts/ut/run_basic_itex.sh b/.azure-pipelines/scripts/ut/run_basic_itex.sh index da9b9923ce9..45278216f8d 100644 --- a/.azure-pipelines/scripts/ut/run_basic_itex.sh +++ b/.azure-pipelines/scripts/ut/run_basic_itex.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic itex" @@ -19,11 +18,13 @@ ut_log_name=${LOG_DIR}/ut_itex.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.itex -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_basic_others.sh b/.azure-pipelines/scripts/ut/run_basic_others.sh index 9789802a75a..4781eb89468 100644 --- a/.azure-pipelines/scripts/ut/run_basic_others.sh +++ b/.azure-pipelines/scripts/ut/run_basic_others.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic others" @@ -29,11 +28,13 @@ ut_log_name=${LOG_DIR}/ut_tf_${tensorflow_version}_pt_${pytorch_version}.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} cp .coverage ${LOG_DIR}/.coverage.others -echo "list all in ${LOG_DIR}" -ls -a ${LOG_DIR} +echo "------UT end -------" + if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_ncoder.sh b/.azure-pipelines/scripts/ut/run_ncoder.sh index aef05d13e3f..bb3e3212494 100644 --- a/.azure-pipelines/scripts/ut/run_ncoder.sh +++ b/.azure-pipelines/scripts/ut/run_ncoder.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run coder" @@ -15,9 +14,12 @@ ut_log_name=${LOG_DIR}/ut_neural_coder.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} +echo "------UT end -------" if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/run_ux.sh b/.azure-pipelines/scripts/ut/run_ux.sh index ceb1c7fcefd..e7041cbacce 100644 --- a/.azure-pipelines/scripts/ut/run_ux.sh +++ b/.azure-pipelines/scripts/ut/run_ux.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run ux" @@ -21,9 +20,12 @@ ut_log_name=${LOG_DIR}/ut_tf_${tensorflow_version}_pt_${pytorch_version}.log echo "cat run.sh..." cat run.sh | tee ${ut_log_name} -echo "-------------" +echo "------UT start-------" bash run.sh 2>&1 | tee -a ${ut_log_name} +echo "------UT end -------" if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then + echo "Find errors in UT test, please check the output..." exit 1 -fi \ No newline at end of file +fi +echo "UT finished successfully! " \ No newline at end of file From 7ffbbf18c6eb2039342e859c7d1de14560eb1db0 Mon Sep 17 00:00:00 2001 From: xinhe Date: Wed, 7 Dec 2022 17:26:51 +0800 Subject: [PATCH 117/128] Add export examples for new API (#225) Signed-off-by: Xin He --- .../quantization/ptq_dynamic/eager/README.md | 5 +++ .../ptq_dynamic/eager/requirements.txt | 4 +- .../ptq_dynamic/eager/run_glue_tune.py | 35 ++++++++++++--- .../ptq_dynamic/eager/run_tuning.sh | 1 + .../quantization/ptq_static/fx/README.md | 8 ++++ .../ptq_static/fx/requirements.txt | 2 + .../quantization/ptq_static/fx/run_glue.py | 44 ++++++++++++++++++- .../quantization/ptq_static/fx/run_tuning.sh | 1 + .../quantization/qat/fx/README.md | 8 ++++ .../quantization/qat/fx/requirements.txt | 2 + .../quantization/qat/fx/run_glue_tune.py | 40 +++++++++++++++++ .../quantization/qat/fx/run_tuning.sh | 1 + 12 files changed, 143 insertions(+), 8 deletions(-) diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md index ac449cdb781..016d8d99456 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md @@ -198,5 +198,10 @@ Shapley values originate from cooperative game theory that come with desirable p > **Note** : run_glue_tune_with_shap.py is the example of "SST2" task. If you want to execute other glue task, you may take some slight change under "ShapleyMSE" class. +# Appendix +## Export to ONNX +Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model. + +By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model. diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt index 7ad9dc04d0c..688b5217718 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt @@ -6,4 +6,6 @@ torch >= 1.3 transformers>=4.10.0 shap scipy -sacremoses \ No newline at end of file +sacremoses +onnx +onnxruntime \ No newline at end of file diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py index 13812b30b4e..b41c077ac59 100755 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py @@ -144,18 +144,25 @@ class ModelArguments: tune: bool = field( default=False, metadata={ - "help": "tune quantized model with Intel Neural Compressor)." - }, + "help": "tune quantized model with Intel Neural Compressor)."}, ) benchmark: bool = field( default=False, - metadata={"help": "run benchmark."}) + metadata={"help": "run benchmark."}, + ) int8: bool = field( default=False, - metadata={"help":"run benchmark."}) + metadata={"help":"initialize int8 model."}, + ) accuracy_only: bool = field( default=False, - metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."}) + metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."}, + ) + onnx: bool = field( + default=False, metadata={"help": "convert PyTorch model to ONNX"} + ) + + def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. @@ -439,6 +446,24 @@ def eval_func_for_nc(model_tuned): q_model = fit(model, conf=conf, eval_func=eval_func_for_nc) from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir) + + if model_args.onnx: + eval_dataloader = trainer.get_eval_dataloader() + it = iter(eval_dataloader) + input = next(it) + input.pop('labels') + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + dynamic_axes = {k: symbolic_names for k in input.keys()} + from neural_compressor.config import Torch2ONNXConfig + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('int8-nlp-model.onnx', int8_onnx_config) exit(0) if model_args.accuracy_only: diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh index e01add178fb..edc07713079 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh @@ -86,6 +86,7 @@ function run_tuning { --no_cuda \ --output_dir ${tuned_checkpoint} \ --tune \ + --onnx \ ${extra_cmd} } diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md index 881332a1314..d9b82bf907b 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md @@ -187,3 +187,11 @@ quantizer.model = common.Model(model) model = quantizer.fit() model.save(training_args.output_dir) ``` + +# Appendix + +## Export to ONNX + +Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model. + +By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model. diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt index fbbce5e4433..01afab8e2ae 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt @@ -4,6 +4,8 @@ protobuf scipy scikit-learn Keras-Preprocessing +onnx +onnxruntime transformers >= 4.16.0 --find-links https://download.pytorch.org/whl/torch_stable.html torch >= 1.8.0+cpu diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py index 717ae91d886..113bfa69341 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py @@ -195,6 +195,9 @@ class ModelArguments: accuracy_only: bool = field( default=False, metadata={"help": "get accuracy"} ) + onnx: bool = field( + default=False, metadata={"help": "convert PyTorch model to ONNX"} + ) def main(): @@ -502,9 +505,46 @@ def eval_func(model): from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion tuning_criterion = TuningCriterion(max_trials=600) conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion) - model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func) + q_model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func) from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream - save_for_huggingface_upstream(model, tokenizer, training_args.output_dir) + save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir) + + if model_args.onnx: + it = iter(eval_dataloader) + input = next(it) + input.pop('labels') + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + dynamic_axes = {k: symbolic_names for k in input.keys()} + from neural_compressor.config import Torch2ONNXConfig + fp32_onnx_config = Torch2ONNXConfig( + dtype="fp32", + opset_version=14, + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('fp32-model.onnx', fp32_onnx_config) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('int8-nlp-qdq-model.onnx', int8_onnx_config) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + q_model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config) return if model_args.benchmark or model_args.accuracy_only: diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh index a3f5c6934c7..19712872786 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh @@ -92,6 +92,7 @@ function run_tuning { --no_cuda \ --output_dir ${tuned_checkpoint} \ --tune \ + --onnx \ --overwrite_output_dir \ ${extra_cmd} } diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md index e1c802c7ff2..fc6d1ccd4e1 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md @@ -117,3 +117,11 @@ model = OptimizedModel.from_pretrained( ``` We also upstreamed several int8 models into HuggingFace [model hub](https://huggingface.co/models?other=Intel%C2%AE%20Neural%20Compressor) for users to ramp up. + +# Appendix + +## Export to ONNX + +Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model. + +By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model. diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt index 5386769210e..2bb6fc03b2d 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt @@ -4,5 +4,7 @@ datasets == 1.18.0 sentencepiece != 0.1.92 protobuf scipy +onnx +onnxruntime --find-links https://download.pytorch.org/whl/torch_stable.html torch >= 1.8.0+cpu diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py index f5bc771e712..f9fe765dbc2 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue_tune.py @@ -194,6 +194,9 @@ class ModelArguments: benchmark: bool = field( default=False, metadata={"help": "get benchmark instead of accuracy"} ) + onnx: bool = field( + default=False, metadata={"help": "convert PyTorch model to ONNX"} + ) def main(): @@ -533,6 +536,43 @@ def benchmark(model): from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream save_for_huggingface_upstream(model, tokenizer, training_args.output_dir) + + if model_args.onnx: + it = iter(eval_dataloader) + input = next(it) + input.pop('labels') + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + dynamic_axes = {k: symbolic_names for k in input.keys()} + from neural_compressor.config import Torch2ONNXConfig + fp32_onnx_config = Torch2ONNXConfig( + dtype="fp32", + opset_version=14, + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + model.export('fp32-model.onnx', fp32_onnx_config) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QDQ", + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + model.export('int8-nlp-qdq-model.onnx', int8_onnx_config) + int8_onnx_config = Torch2ONNXConfig( + dtype="int8", + opset_version=14, + quant_format="QLinear", + example_inputs=tuple(input.values()), + input_names=list(input.keys()), + output_names=['labels'], + dynamic_axes=dynamic_axes, + ) + model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config) return if model_args.benchmark: diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_tuning.sh b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_tuning.sh index 888a8968d24..31d6f314e8b 100644 --- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_tuning.sh +++ b/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_tuning.sh @@ -60,6 +60,7 @@ function run_tuning { --save_strategy steps \ --metric_for_best_model f1 \ --save_total_limit 1 \ + --onnx \ --tune } From f9008e236a816a67da0f6a1683ccf24a98f9bf23 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 8 Dec 2022 10:07:43 +0800 Subject: [PATCH 118/128] support for adding hawq_v2 loss by new API Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 5 +++-- neural_compressor/strategy/hawq_v2.py | 17 ++++++++--------- test/strategy/test_hawq_v2_2.x.py | 14 +++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index a72a50e782a..1ce7b0c8c11 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -861,7 +861,8 @@ def percent_to_float(data): Optional('sigopt_project_id'): str, Optional('sigopt_experiment_name', default='nc-tune'): str, Optional('accuracy_weight', default=1.0): float, - Optional('latency_weight', default=1.0): float + Optional('latency_weight', default=1.0): float, + Optional('hawq_v2_loss', default=None): object, } , Hook('accuracy_criterion', handler=_valid_accuracy_field): object, Optional('accuracy_criterion', default={'relative': 0.01}): { @@ -1354,7 +1355,7 @@ def map_pyconfig_to_cfg(self, pythonic_config): if pythonic_config.quantization.strategy_kwargs: st_kwargs = pythonic_config.quantization.strategy_kwargs for st_key in ['sigopt_api_token', 'sigopt_project_id', 'sigopt_experiment_name', \ - 'accuracy_weight', 'latency_weight']: + 'accuracy_weight', 'latency_weight', 'hawq_v2_loss']: if st_key in st_kwargs: st_val = st_kwargs[st_key] mapping.update({'tuning.strategy.' + st_key: st_val}) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 43f300ef488..e6b5d7c619d 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -86,10 +86,8 @@ def __init__(self, model, conf, q_dataloader, q_func=None, q_hooks) def next_tune_cfg(self): - # TODO remove it before merge - import torch tuning_space = self.tuning_space - calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] ##TODO suppoprt list + calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] # Initialize the tuning config for each op according to the quantization approach op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() @@ -111,12 +109,13 @@ def next_tune_cfg(self): # Start compute the hessian trace logger.info(f"************** Start compute the hessian trace *****************") target_dtype = "int8" - # TODO remove it before merge - criterion=torch.nn.CrossEntropyLoss() - op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, - dataloader = self.calib_dataloader, - q_model = self.q_model, - criterion =criterion, # TODO using user specify loss + hawq_v2_criterion =self.cfg.tuning.strategy.hawq_v2_loss + assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ + Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." + op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, + dataloader = self.calib_dataloader, + q_model = self.q_model, + criterion =hawq_v2_criterion, enable_act = False) sorted_op_to_traces = dict(sorted(op_to_traces.items(), key=lambda item: item[1], reverse=True)) logger.info(f"************** Hessian Trace *****************") diff --git a/test/strategy/test_hawq_v2_2.x.py b/test/strategy/test_hawq_v2_2.x.py index 0ec055c26c3..8442154acb5 100644 --- a/test/strategy/test_hawq_v2_2.x.py +++ b/test/strategy/test_hawq_v2_2.x.py @@ -4,10 +4,13 @@ import shutil import unittest -import numpy as np - from neural_compressor.utils import logger +# loss function for hawq-v2 +def hawq_v2_loss(output, target): + import torch + return torch.nn.CrossEntropyLoss()(output, target) + class TestHAWQV2TuningStrategy(unittest.TestCase): @classmethod @@ -39,9 +42,10 @@ def _fake_eval(model): # dataset and dataloader dataset = DATASETS("pytorch")["dummy"](((1, 3, 224, 224))) dataloader = DATALOADERS["pytorch"](dataset) - - # tuning and accuracy criterion - tuning_criterion = TuningCriterion(strategy='hawq_v2', max_trials=5) + + #tuning and accuracy criterion + strategy_kwargs = {'hawq_v2_loss': hawq_v2_loss} + tuning_criterion = TuningCriterion(strategy='hawq_v2', strategy_kwargs=strategy_kwargs, max_trials=5) conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion) # fit From 0d8f0e830b8e462b19a8c64985d73e14ff9a1e18 Mon Sep 17 00:00:00 2001 From: "biao.fang" Date: Thu, 8 Dec 2022 14:31:55 +0800 Subject: [PATCH 119/128] enable trace type Tensor->float --- neural_compressor/adaptor/torch_utils/hawq_metric.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py index 2b6a7790eb0..0e505848a85 100644 --- a/neural_compressor/adaptor/torch_utils/hawq_metric.py +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -267,13 +267,13 @@ def get_weight_traces(self, num_samples): diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) if diff_ratio < self.tolerance and iter > 10: ##TODO magic number break - if iter == 50: ##TODO for debug + if iter == 20: ##TODO for debug break prev_avg_model_trace = model_trace weight_name_to_traces = {} layer_traces = layer_traces_estimate for weight_name, trace in zip(self.weight_names, layer_traces): - weight_name_to_traces[weight_name] = trace + weight_name_to_traces[weight_name] = float(trace)# tensor->float op_name_to_trace = {} for weight_name in self.weight_names: op_name = self.weight_to_op[weight_name] @@ -434,7 +434,7 @@ def get_avg_traces(self, enable_act=True, num_samples=32): act_traces = self.get_act_traces(num_samples) for i,j in zip(act_traces,mse_gap): #currently use mse to analysis - act_trace[i]=act_traces[i]+mse_gap[j] + act_trace[i]=float(act_traces[i])+float(mse_gap[j])# Tensor->float traces['activation'] = act_traces return traces From 8350241179af04cc0cb40fee9dcca73723ecc72f Mon Sep 17 00:00:00 2001 From: "biao.fang" Date: Thu, 8 Dec 2022 15:03:17 +0800 Subject: [PATCH 120/128] cancel Max iter times for debugging --- neural_compressor/adaptor/torch_utils/hawq_metric.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py index 0e505848a85..fd1428acc12 100644 --- a/neural_compressor/adaptor/torch_utils/hawq_metric.py +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -267,8 +267,8 @@ def get_weight_traces(self, num_samples): diff_ratio = abs(model_trace - prev_avg_model_trace) / (prev_avg_model_trace + self.eps) if diff_ratio < self.tolerance and iter > 10: ##TODO magic number break - if iter == 20: ##TODO for debug - break + # if iter == 20: ##TODO for debugging + # break prev_avg_model_trace = model_trace weight_name_to_traces = {} layer_traces = layer_traces_estimate @@ -321,8 +321,8 @@ def get_act_traces(self, num_samples): prev_model_act_trace + self.eps) if diff_ratio < self.tolerance and iter > 10: ##TODO magic number break - if iter == 50: ##TODO for debug - break + # if iter == 50: ##TODO for debug + # break prev_model_act_trace = current_model_act_trace act_traces_per_sample.append(vt_H_v_mean_per_act) From 8b7993819ed26edaa4c756f2a4b1a8fc95de4845 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 8 Dec 2022 18:51:25 +0800 Subject: [PATCH 121/128] revert change for test Signed-off-by: yiliu30 --- .../quantization/ptq/cpu/fx/conf.yaml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml index ef61c6c3e0b..f11483acd16 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/conf.yaml @@ -21,7 +21,7 @@ quantization: # optional. tuning constrai calibration: sampling_size: 300 # optional. default value is 100. used to set how many samples should be used in calibration. dataloader: - batch_size: 1 + batch_size: 30 dataset: ImageFolder: root: /path/to/calibration/dataset # NOTE: modify to calibration dataset location if needed @@ -40,10 +40,10 @@ evaluation: # optional. required if use metric: topk: 1 # built-in metrics are topk, map, f1, allow user to register new metric. dataloader: - batch_size: 1 + batch_size: 30 dataset: ImageFolder: - root: /path/to/calibration/dataset # NOTE: modify to evaluation dataset location if needed + root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -61,7 +61,7 @@ evaluation: # optional. required if use batch_size: 1 dataset: ImageFolder: - root: /path/to/calibration/dataset # NOTE: modify to evaluation dataset location if needed + root: /path/to/evaluation/dataset # NOTE: modify to evaluation dataset location if needed transform: Resize: size: 256 @@ -73,10 +73,8 @@ evaluation: # optional. required if use std: [0.229, 0.224, 0.225] tuning: - strategy: - name: hawq accuracy_criterion: relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. exit_policy: timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. + random_seed: 9527 # optional. random seed for deterministic tuning. \ No newline at end of file From 04fc7aed3f696479067614bb1787248e11f6859e Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 8 Dec 2022 18:55:43 +0800 Subject: [PATCH 122/128] fixed some bugs Signed-off-by: yiliu30 --- neural_compressor/adaptor/pytorch.py | 2 +- neural_compressor/adaptor/torch_utils/hawq_metric.py | 2 ++ neural_compressor/strategy/basic.py | 5 ----- neural_compressor/strategy/hawq_v2.py | 4 ++-- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 79df22deeba..2ff206c392c 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -29,7 +29,6 @@ from ..utils import logger from .query import QueryBackendCapability from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader -from .torch_utils.hawq_metric import hawq_top torch = LazyImport("torch") json = LazyImport("json") @@ -1113,6 +1112,7 @@ def calculate_hessian_trace(self, Return: hessian_trace(Dict[Tuple, float]), key: (op_name, op_type); value: hessian trace. """ + from .torch_utils.hawq_metric import hawq_top op_to_traces=hawq_top(fp32_model=fp32_model,dataloader=dataloader,q_model=q_model,criterion=criterion,enable_act=enable_act) return op_to_traces pass diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py index fd1428acc12..c6973826516 100644 --- a/neural_compressor/adaptor/torch_utils/hawq_metric.py +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -14,6 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from ...utils.utility import LazyImport +torch = LazyImport("torch") import copy import numpy as np diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index 184a15996f7..c35398dd4bb 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -144,11 +144,6 @@ def next_tune_cfg(self): logger.info(f"Start to fallback op to {target_dtype} one by one.") self._fallback_started() fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - # ops_sensitivity = self.adaptor.calculate_op_sensitivity(self._fp32_model, - # self.calib_dataloader, - # method_args = {'name': 'hessian_trace'}) - #fallback_items_name_lst = sorted(ops_sensitivity, key = lambda items: items[1], reverse=True) - op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 0647b6a15d4..cb4d759bfcc 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -110,8 +110,8 @@ def next_tune_cfg(self): logger.info(f"************** Start compute the hessian trace *****************") target_dtype = "int8" hawq_v2_criterion =self.cfg.tuning.strategy.hawq_v2_loss - assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ - Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." + # assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ + # Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, dataloader = self.calib_dataloader, q_model = self.q_model, From 953d861d1cdb36f17e6ac40a1d0bd6799d433f9b Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 8 Dec 2022 19:00:12 +0800 Subject: [PATCH 123/128] revert change for test Signed-off-by: yiliu30 --- neural_compressor/strategy/mse.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index 8dafa35759d..614984359ba 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -194,11 +194,10 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = True + early_stop_tuning = False stage1_cnt = 0 int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value - stage1_max=-1 op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: From 2e14eb121ba10a5babe1599c1651d19cb4cdf6ea Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 8 Dec 2022 19:09:42 +0800 Subject: [PATCH 124/128] add more log info Signed-off-by: yiliu30 --- neural_compressor/adaptor/torch_utils/hawq_metric.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py index c6973826516..a63b2ef4c85 100644 --- a/neural_compressor/adaptor/torch_utils/hawq_metric.py +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -259,9 +259,10 @@ def get_vtHv_act(self, params, num_samples): break def get_weight_traces(self, num_samples): + import tqdm layer_traces_per_iter = [] prev_avg_model_trace = 0 - for iter in range(self.max_iter): + for iter in tqdm.tqdm(range(self.max_iter)): layer_traces = self.get_vtHv_weight(self.params, num_samples) layer_traces_per_iter.append(layer_traces) layer_traces_estimate = torch.mean(torch.stack(layer_traces_per_iter), dim=0) From 52ee89d57218e20b6029a9a519f6d4caa1b78bbe Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 8 Dec 2022 19:18:14 +0800 Subject: [PATCH 125/128] add skip first as arg Signed-off-by: yiliu30 --- neural_compressor/strategy/hawq_v2.py | 3 ++- neural_compressor/strategy/st_utils/tuning_sampler.py | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index cb4d759bfcc..6645fd86153 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -147,7 +147,8 @@ def next_tune_cfg(self): initial_op_tuning_cfg = deepcopy(op_tuning_cfg) fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], initial_op_tuning_cfg=op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) + op_dtypes=op_dtypes, accumulate=True, + skip_first=False) for op_tuning_cfg in fallback_sampler: op_tuning_cfg['calib_sampling_size'] = calib_size yield op_tuning_cfg diff --git a/neural_compressor/strategy/st_utils/tuning_sampler.py b/neural_compressor/strategy/st_utils/tuning_sampler.py index f311d7c16a4..9b5eff7dc1b 100644 --- a/neural_compressor/strategy/st_utils/tuning_sampler.py +++ b/neural_compressor/strategy/st_utils/tuning_sampler.py @@ -254,16 +254,18 @@ def __init__(self, tuning_order_lst: List[TuningOrder], initial_op_tuning_cfg: Dict[tuple, Any], op_dtypes: Dict[str, str], - accumulate: bool + accumulate: bool, + skip_first: bool = True ): super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) self.op_dtypes = op_dtypes self.accumulate = accumulate + self.skip_first = skip_first pass def __iter__(self): new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) - skip_first = False + skip_first = self.skip_first for op_name_type, target_dtype in self.op_dtypes.items(): if not self.accumulate: new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) @@ -272,7 +274,7 @@ def __iter__(self): if self.accumulate and skip_first: # skip the first one skip_first = False continue - logger.info(f"fallback {op_name_type} to {target_dtype}") + logger.debug(f"fallback {op_name_type} to {target_dtype}") yield new_tune_cfg # need to skip the first one From 6aac6c510b2bed1194de602dc1a053114f96ae48 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 9 Dec 2022 08:57:48 +0800 Subject: [PATCH 126/128] fixed some format error Signed-off-by: yiliu30 --- neural_compressor/adaptor/pytorch.py | 6 ++- .../adaptor/torch_utils/hawq_metric.py | 46 ++++++++++--------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 2ff206c392c..238df231513 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -1113,7 +1113,11 @@ def calculate_hessian_trace(self, hessian_trace(Dict[Tuple, float]), key: (op_name, op_type); value: hessian trace. """ from .torch_utils.hawq_metric import hawq_top - op_to_traces=hawq_top(fp32_model=fp32_model,dataloader=dataloader,q_model=q_model,criterion=criterion,enable_act=enable_act) + op_to_traces=hawq_top(fp32_model=fp32_model, + dataloader=dataloader, + q_model=q_model, + criterion=criterion, + enable_act=enable_act) return op_to_traces pass diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py index a63b2ef4c85..f68a1234164 100644 --- a/neural_compressor/adaptor/torch_utils/hawq_metric.py +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -41,8 +41,10 @@ def remove(self): class HessianTrace: """ please refer to - Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." 2020 IEEE international conference on big data (Big data). IEEE, 2020. - Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." Advances in neural information processing systems 33 (2020): 18518-18529. + Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." + 2020 IEEE international conference on big data (Big data). IEEE, 2020. + Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." + Advances in neural information processing systems 33 (2020): 18518-18529. https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py """ @@ -239,24 +241,25 @@ def get_vtHv_weight(self, params, num_samples): v_t_H_v = torch.stack([torch.mean(h_v * v_t) for (h_v, v_t) in zip(H_v, v)]) ##maybe sum is better return v_t_H_v - def get_vtHv_act(self, params, num_samples): - v = self.sample_rademacher(params) - H_v = [0] * len(v) - cnt = 0 - for step, data in enumerate(self.dataloader): - if cnt >= num_samples: - break - for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 - input = data[0][i:i + 1] - target = data[1][i:i + 1] - - self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) - layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] - layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] - hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, only_inputs=True, retain_graph=False) - cnt += 1 - if cnt >= num_samples: - break + # def get_vtHv_act(self, params, num_samples): + # v = self.sample_rademacher(params) + # H_v = [0] * len(v) + # cnt = 0 + # for step, data in enumerate(self.dataloader): + # if cnt >= num_samples: + # break + # for i in range(self.dataloader.batchsize): ##force to batchsize to be 1 + # input = data[0][i:i + 1] + # target = data[1][i:i + 1] + + # self.get_gradients(self.model, (input, target), self.criterion, create_graph=True) + # layer_acts = [self.layer_acts[key] for key in self.layer_acts.keys()] + # layer_act_gradients = [self.layer_acts_grads[key] for key in self.layer_acts.keys()] + # hv_one = torch.autograd.grad(layer_act_gradients, layer_acts, v, + # only_inputs=True, retain_graph=False) + # cnt += 1 + # if cnt >= num_samples: + # break def get_weight_traces(self, num_samples): import tqdm @@ -567,7 +570,8 @@ def hawq_top(fp32_model,q_model,dataloader,criterion,enable_act): if enable_act: act_to_traces=traces['activation'] for trace_i, pertur_i,act_i in zip(op_to_traces.keys(),pertur_lst.keys(),act_to_traces.keys()): - op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] #Formula:Omig=Trace*L2+act_trace + #Formula:Omig=Trace*L2+act_trace + op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i]+act_to_traces[act_i] else: for trace_i, pertur_i in zip(op_to_traces.keys(),pertur_lst.keys()): op_to_traces[trace_i]=pertur_lst[pertur_i]*op_to_traces[trace_i] #Formula:Omig=Trace*L2 From e63195cf2f52d00e43539df380d1b354715dcdea Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 9 Dec 2022 20:59:26 +0800 Subject: [PATCH 127/128] resolved the conflicts Signed-off-by: yiliu30 --- neural_compressor/strategy/hawq_v2.py | 6 +++--- test/strategy/test_hawq_v2_2.x.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 6645fd86153..2f33bf39ba4 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -20,9 +20,9 @@ from .strategy import strategy_registry, TuneStrategy -from .st_utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .st_utils.tuning_structs import OpTuningConfig -from .st_utils.tuning_space import TUNING_ITEMS_LST +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig +from .utils.tuning_space import TUNING_ITEMS_LST from ..utils import logger @strategy_registry diff --git a/test/strategy/test_hawq_v2_2.x.py b/test/strategy/test_hawq_v2_2.x.py index 8442154acb5..19b52e07826 100644 --- a/test/strategy/test_hawq_v2_2.x.py +++ b/test/strategy/test_hawq_v2_2.x.py @@ -46,7 +46,7 @@ def _fake_eval(model): #tuning and accuracy criterion strategy_kwargs = {'hawq_v2_loss': hawq_v2_loss} tuning_criterion = TuningCriterion(strategy='hawq_v2', strategy_kwargs=strategy_kwargs, max_trials=5) - conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion) + conf = PostTrainingQuantConfig(approach="static", tuning_criterion=tuning_criterion) # fit q_model = fit(model=model, From 36137c266e876eadbd42366ad9dacd66a67a6220 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 9 Dec 2022 21:27:17 +0800 Subject: [PATCH 128/128] revert some change for test Signed-off-by: yiliu30 --- .../efficientnet/quantization/ptq/eager/run_tuning.sh | 3 +-- .../quantization/ptq/cpu/eager/run_tuning.sh | 7 +++---- .../quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh | 7 +++---- .../quantization/ptq/cpu/fx/run_tuning.sh | 7 +++---- .../quantization/ptq/gpu/eager/run_tuning.sh | 7 +++---- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/examples/pytorch/image_recognition/efficientnet/quantization/ptq/eager/run_tuning.sh b/examples/pytorch/image_recognition/efficientnet/quantization/ptq/eager/run_tuning.sh index 588ec872406..c5c764b7155 100644 --- a/examples/pytorch/image_recognition/efficientnet/quantization/ptq/eager/run_tuning.sh +++ b/examples/pytorch/image_recognition/efficientnet/quantization/ptq/eager/run_tuning.sh @@ -41,8 +41,7 @@ function run_tuning { conf_yaml=conf_efficientnet_b0.yaml elif [ "${topology}" = "mobilenetv3_rw" ]; then conf_yaml=conf_mobilenetv3_rw.yaml - # TODO only for test, uncomment it before merge - # sed -i "/relative:/s|relative:.*|relative: 0.02|g" $conf_yaml + sed -i "/relative:/s|relative:.*|relative: 0.02|g" $conf_yaml fi sed -i "/\/path\/to\/calibration\/dataset/s|root:.*|root: $dataset_location/train|g" $conf_yaml sed -i "/\/path\/to\/evaluation\/dataset/s|root:.*|root: $dataset_location/val|g" $conf_yaml diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning.sh index 7752585ddb5..2f930ad1470 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning.sh +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning.sh @@ -37,10 +37,9 @@ function init_params { # run_tuning function run_tuning { - # TODO only for test, uncomment it before merge - # if [ "mobilenet_v2" = "$topology" ];then - # sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml - # fi + if [ "mobilenet_v2" = "$topology" ];then + sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml + fi extra_cmd="" if [ -n "$output_model" ];then extra_cmd = $extra_cmd"--tuned_checkpoint ${output_model}" diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh index 3c45fe25a32..02f968d7d23 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/eager/run_tuning_dump_tensor.sh @@ -39,10 +39,9 @@ function init_params { function run_tuning { sed -i "/\/path\/to\/calibration\/dataset/s|root:.*|root: $dataset_location/train|g" conf_dump_tensors.yaml sed -i "/\/path\/to\/evaluation\/dataset/s|root:.*|root: $dataset_location/val|g" conf_dump_tensors.yaml - # TODO only for test, uncomment it before merge - # if [ "mobilenet_v2" = "$topology" ];then - # sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf_dump_tensors.yaml - # fi + if [ "mobilenet_v2" = "$topology" ];then + sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf_dump_tensors.yaml + fi extra_cmd="" if [ -n "$output_model" ];then diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_tuning.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_tuning.sh index eaa81d6e85c..054d4389d9c 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_tuning.sh +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_tuning.sh @@ -37,10 +37,9 @@ function init_params { # run_tuning function run_tuning { - # TODO only for test, uncomment it before merge - # if [ "mobilenet_v2" = "$topology" ];then - # sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml - # fi + if [ "mobilenet_v2" = "$topology" ];then + sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml + fi extra_cmd="" if [ -n "$output_model" ];then extra_cmd = $extra_cmd"--tuned_checkpoint ${output_model}" diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/gpu/eager/run_tuning.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/gpu/eager/run_tuning.sh index a4460264ee2..3a272f7e8eb 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/gpu/eager/run_tuning.sh +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/gpu/eager/run_tuning.sh @@ -39,10 +39,9 @@ function init_params { function run_tuning { sed -i "/\/path\/to\/calibration\/dataset/s|root:.*|root: $dataset_location/train|g" conf.yaml sed -i "/\/path\/to\/evaluation\/dataset/s|root:.*|root: $dataset_location/val|g" conf.yaml - # TODO only for test, uncomment it before merge - # if [ "mobilenet_v2" = "$topology" ];then - # sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml - # fi + if [ "mobilenet_v2" = "$topology" ];then + sed -i "/relative:/s|relative:.*|relative: 0.02|g" conf.yaml + fi extra_cmd="${dataset_location}"