|
15 | 15 | # See the License for the specific language governing permissions and |
16 | 16 | # limitations under the License. |
17 | 17 |
|
18 | | -import os |
19 | | -from .utils import logger |
20 | | -from .data import DATALOADERS, DATASETS |
21 | 18 | from .experimental import Quantization as ExpQuantization |
22 | | -from deprecated import deprecated |
23 | 19 | from neural_compressor.conf.pythonic_config import Config |
24 | 20 | from neural_compressor.config import PostTrainingQuantConfig |
25 | 21 |
|
26 | | -class Quantization(object): |
27 | | - """Quantization class automatically searches for optimal quantization recipes for low |
28 | | - precision model inference, achieving best tuning objectives like inference performance |
29 | | - within accuracy loss constraints. |
30 | | -
|
31 | | - Tuner abstracts out the differences of quantization APIs across various DL frameworks |
32 | | - and brings a unified API for automatic quantization that works on frameworks including |
33 | | - tensorflow, pytorch and mxnet. |
34 | | -
|
35 | | - Since DL use cases vary in the accuracy metrics (Top-1, MAP, ROC etc.), loss criteria |
36 | | - (<1% or <0.1% etc.) and tuning objectives (performance, memory footprint etc.). |
37 | | - Tuner class provides a flexible configuration interface via YAML for users to specify |
38 | | - these parameters. |
39 | | -
|
40 | | - Args: |
41 | | - conf_fname_or_obj (string or obj): The path to the YAML configuration file or |
42 | | - Quantization_Conf class containing accuracy goal, tuning objective and preferred |
43 | | - calibration & quantization tuning space etc. |
44 | | -
|
45 | | - """ |
46 | | - |
47 | | - def __init__(self, conf_fname_or_obj): |
48 | | - self.exp_quantizer = ExpQuantization(conf_fname_or_obj) |
49 | | - |
50 | | - @deprecated(version='2.0', reason="please use neural_compressor.quantization.fit instead") |
51 | | - def __call__(self, model, q_dataloader=None, q_func=None, eval_dataloader=None, |
52 | | - eval_func=None): |
53 | | - """The main entry point of automatic quantization tuning. |
54 | | -
|
55 | | - This interface works on all the DL frameworks that neural_compressor supports |
56 | | - and provides three usages: |
57 | | - a) Fully yaml configuration: User specifies all the info through yaml, |
58 | | - including dataloaders used in calibration and evaluation phases |
59 | | - and quantization tuning settings. |
60 | | -
|
61 | | - For this usage, only model parameter is mandatory. |
62 | | -
|
63 | | - b) Partial yaml configuration: User specifies dataloaders used in calibration |
64 | | - and evaluation phase by code. |
65 | | - The tool provides built-in dataloaders and evaluators, user just need provide |
66 | | - a dataset implemented __iter__ or __getitem__ methods and invoke dataloader() |
67 | | - with dataset as input parameter to create neural_compressor dataloader before calling this |
68 | | - function. |
69 | | -
|
70 | | - After that, User specifies fp32 "model", calibration dataset "q_dataloader" |
71 | | - and evaluation dataset "eval_dataloader". |
72 | | - The calibrated and quantized model is evaluated with "eval_dataloader" |
73 | | - with evaluation metrics specified in the configuration file. The evaluation tells |
74 | | - the tuner whether the quantized model meets the accuracy criteria. If not, |
75 | | - the tuner starts a new calibration and tuning flow. |
76 | | -
|
77 | | - For this usage, model, q_dataloader and eval_dataloader parameters are mandatory. |
78 | | -
|
79 | | - c) Partial yaml configuration: User specifies dataloaders used in calibration phase |
80 | | - by code. |
81 | | - This usage is quite similar with b), just user specifies a custom "eval_func" |
82 | | - which encapsulates the evaluation dataset by itself. |
83 | | - The calibrated and quantized model is evaluated with "eval_func". |
84 | | - The "eval_func" tells the tuner whether the quantized model meets |
85 | | - the accuracy criteria. If not, the Tuner starts a new calibration and tuning flow. |
86 | | -
|
87 | | - For this usage, model, q_dataloader and eval_func parameters are mandatory. |
88 | | -
|
89 | | - Args: |
90 | | - model (object): For Tensorflow model, it could be a path |
91 | | - to frozen pb,loaded graph_def object or |
92 | | - a path to ckpt/savedmodel folder. |
93 | | - For PyTorch model, it's torch.nn.model |
94 | | - instance. |
95 | | - For MXNet model, it's mxnet.symbol.Symbol |
96 | | - or gluon.HybirdBlock instance. |
97 | | - q_dataloader (generator): Data loader for calibration, mandatory for |
98 | | - post-training quantization. It is iterable |
99 | | - and should yield a tuple (input, label) for |
100 | | - calibration dataset containing label, |
101 | | - or yield (input, _) for label-free calibration |
102 | | - dataset. The input could be a object, list, |
103 | | - tuple or dict, depending on user implementation, |
104 | | - as well as it can be taken as model input. |
105 | | - q_func (function, optional): Training function for Quantization-Aware |
106 | | - Training. It is optional and only takes effect |
107 | | - when user choose "quant_aware_training" |
108 | | - approach in yaml. |
109 | | - This function takes "model" as input parameter |
110 | | - and executes entire training process with self |
111 | | - contained training hyper-parameters. If this |
112 | | - parameter specified, eval_dataloader parameter |
113 | | - plus metric defined in yaml, or eval_func |
114 | | - parameter should also be specified at same time. |
115 | | - eval_dataloader (generator, optional): Data loader for evaluation. It is iterable |
116 | | - and should yield a tuple of (input, label). |
117 | | - The input could be a object, list, tuple or |
118 | | - dict, depending on user implementation, |
119 | | - as well as it can be taken as model input. |
120 | | - The label should be able to take as input of |
121 | | - supported metrics. If this parameter is |
122 | | - not None, user needs to specify pre-defined |
123 | | - evaluation metrics through configuration file |
124 | | - and should set "eval_func" paramter as None. |
125 | | - Tuner will combine model, eval_dataloader |
126 | | - and pre-defined metrics to run evaluation |
127 | | - process. |
128 | | - eval_func (function, optional): The evaluation function provided by user. |
129 | | - This function takes model as parameter, |
130 | | - and evaluation dataset and metrics should be |
131 | | - encapsulated in this function implementation |
132 | | - and outputs a higher-is-better accuracy scalar |
133 | | - value. |
134 | | -
|
135 | | - The pseudo code should be something like: |
136 | | -
|
137 | | - def eval_func(model): |
138 | | - input, label = dataloader() |
139 | | - output = model(input) |
140 | | - accuracy = metric(output, label) |
141 | | - return accuracy |
142 | | -
|
143 | | - Returns: |
144 | | - quantized model: best qanitized model found, otherwise return None |
145 | | -
|
146 | | - """ |
147 | | - |
148 | | - logger.warning("This API is going to be deprecated. Please import " |
149 | | - "neural_compressor.experimental.Quantization, initialize an instance of `Quantization`," |
150 | | - "set its dataloader and metric attributes, then invoke its __call__ method.") |
151 | | - |
152 | | - self.exp_quantizer.model = model |
153 | | - if q_dataloader is not None: |
154 | | - self.exp_quantizer.calib_dataloader = q_dataloader |
155 | | - elif q_func is not None: |
156 | | - self.exp_quantizer.q_func = q_func |
157 | | - |
158 | | - if eval_func is not None: |
159 | | - self.exp_quantizer.eval_func = eval_func |
160 | | - elif eval_dataloader is not None: |
161 | | - self.exp_quantizer.eval_dataloader = eval_dataloader |
162 | | - |
163 | | - nc_model = self.exp_quantizer.fit() |
164 | | - if self.exp_quantizer.framework == 'tensorflow': |
165 | | - return nc_model.graph if nc_model else None |
166 | | - if self.exp_quantizer.framework == 'pytorch': |
167 | | - saved_path = os.path.abspath(os.path.join(os.path.expanduser( |
168 | | - self.exp_quantizer.conf.usr_cfg.tuning.workspace.path), 'checkpoint')) |
169 | | - nc_model.save(saved_path) |
170 | | - return nc_model.model |
171 | | - |
172 | | - fit = __call__ |
173 | | - |
174 | | - @deprecated(version='2.0', reason="this function has been deprecated") |
175 | | - def dataset(self, dataset_type, *args, **kwargs): |
176 | | - return DATASETS(self.exp_quantizer.framework)[dataset_type](*args, **kwargs) |
177 | | - |
178 | | - @deprecated(version='2.0', reason="this function has been deprecated") |
179 | | - def dataloader(self, dataset, batch_size=1, collate_fn=None, last_batch='rollover', |
180 | | - sampler=None, batch_sampler=None, num_workers=0, pin_memory=False): |
181 | | - return DATALOADERS[self.exp_quantizer.framework]( |
182 | | - dataset=dataset, |
183 | | - batch_size=batch_size, collate_fn=collate_fn, last_batch=last_batch, |
184 | | - sampler=sampler, batch_sampler=batch_sampler, num_workers=num_workers, |
185 | | - pin_memory=pin_memory |
186 | | - ) |
187 | | - |
188 | | - @deprecated(version='2.0', reason="this function has been deprecated") |
189 | | - def metric(self, name, metric_cls, **kwargs): |
190 | | - from .experimental.common import Metric as NCMetric |
191 | | - nc_metric = NCMetric(metric_cls, name, **kwargs) |
192 | | - self.exp_quantizer.metric = nc_metric |
193 | | - |
194 | | - @deprecated(version='2.0', reason="this function has been deprecated") |
195 | | - def postprocess(self, name, postprocess_cls, **kwargs): |
196 | | - from .experimental.common import Postprocess as NCPostprocess |
197 | | - nc_postprocess = NCPostprocess(postprocess_cls, name, **kwargs) |
198 | | - self.exp_quantizer.postprocess = nc_postprocess |
199 | | - |
200 | 22 |
|
201 | 23 | def fit(model, |
202 | 24 | conf, |
|
0 commit comments