From d81933fc971397c30a6ca935db9ef6653e8f4a30 Mon Sep 17 00:00:00 2001 From: Jessica Lin Date: Tue, 14 Jul 2020 18:27:54 -0700 Subject: [PATCH] jlin27_numeric_suite_tutorial Made some syntax edits because original headings were not rendering properly and breaking the build: - Removed the lines of pound sign (#) delimiters under text because when placed under text, it renders them all as headers - Add lines of pound delimiters above certain blocks of text to force them to show up as plain text between the code rather than comments with the code - Added code syntax (e.g.``compare_weights``) Suggestions: - Link to code or documentation (for example in the beginning when referencing new code or new concepts) - Add a conclusion section with links to references or learn more at the end - Examples: https://pytorch.org/tutorials/intermediate/dynamic_quantization_bert_tutorial.html#conclusion Fixes: - Currently the tutorial references images in `/_static/img/` but they are placed in `/_static/`. Make sure these match up. --- prototype_source/numeric_suite_tutorial.py | 175 ++++++++++----------- 1 file changed, 79 insertions(+), 96 deletions(-) diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py index c1da4a14929..6bdcfb431de 100644 --- a/prototype_source/numeric_suite_tutorial.py +++ b/prototype_source/numeric_suite_tutorial.py @@ -10,15 +10,18 @@ One important step of debugging is to measure the statistics of the float model and its corresponding quantized model to know where are they differ most. We built a suite of numeric tools called PyTorch Numeric Suite in PyTorch quantization to enable the measurement of the statistics between quantized module and float module to support quantization debugging efforts. Even for the quantized model with good accuracy, PyTorch Numeric Suite can still be used as the profiling tool to better understand the quantization error within the model and provide the guidance for further optimization. -PyTorch Numeric Suite crrently supports models quantized through both static quantization and dynamic quantization with unified APIs. +PyTorch Numeric Suite currently supports models quantized through both static quantization and dynamic quantization with unified APIs. In this tutorial we will first use ResNet18 as an example to show how to use PyTorch Numeric Suite to measure the statistics between static quantized model and float model in eager mode. Then we will use LSTM based sequence model as an example to show the usage of PyTorch Numeric Suite for dynamic quantized model. Numeric Suite for Static Quantization ------------------------------------- +Setup +^^^^^^ We’ll start by doing the necessary imports: """ + ############################################################################## from __future__ import print_function, division, absolute_import @@ -39,7 +42,7 @@ ############################################################################## # Then we load the pretrained float ResNet18 model, and quantize it into qmodel. We cannot compare two arbitrary models, only a float model and the quantized model derived from it can be compared. -############################################################################## + float_model = torchvision.models.quantization.resnet18(pretrained=True, quantize=False) float_model.to('cpu') @@ -53,12 +56,11 @@ ############################################################################## # 1. Compare the weights of float and quantized models # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -# The first thing we usually want to compare are the weights of quantized model and float model. We can call compare_weights() from PyTorch Numeric Suite to get a dictionary wt_compare_dict with key corresponding to module names and each entry is a dictionary with two keys 'float' and 'quantized', containing the float and quantized weights. -############################################################################## - -# Takes in floating point and quantized state dict and returns a dict, with keys corresponding to the +# The first thing we usually want to compare are the weights of quantized model and float model. +# We can call ``compare_weights()`` from PyTorch Numeric Suite to get a ``dictionary wt_compare_dict`` with key corresponding to module names and each entry is a dictionary with two keys 'float' and 'quantized', containing the float and quantized weights. +# ``compare_weights()`` takes in floating point and quantized state dict and returns a dict, with keys corresponding to the # floating point weights and values being a dictionary of floating point and quantized weights + wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict()) print('keys of wt_compare_dict:') @@ -69,13 +71,13 @@ print(wt_compare_dict['conv1.weight']['float'].shape) print(wt_compare_dict['conv1.weight']['quantized'].shape) -############################################################################## -# Once get wt_compare_dict, users can process this dictionary in whatever way they want. Here as an example we compute the quantization error of the weights of float and quantized models as following. -############################################################################## -# Compute the Signal-to-Quantization-Noise Ratio (SQNR) of the quantized tensor y. The SQNR reflects the +############################################################################## +# Once get ``wt_compare_dict``, users can process this dictionary in whatever way they want. Here as an example we compute the quantization error of the weights of float and quantized models as following. +# Compute the Signal-to-Quantization-Noise Ratio (SQNR) of the quantized tensor ``y```. The SQNR reflects the # relationship between the maximum nominal signal strength and the quantization error introduced in the # quantization. Higher SQNR corresponds to lower quantization error. + def compute_error(x, y): Ps = torch.norm(x) Pn = torch.norm(x-y) @@ -85,8 +87,7 @@ def compute_error(x, y): print(key, compute_error(wt_compare_dict[key]['float'], wt_compare_dict[key]['quantized'].dequantize())) ############################################################################## -# As another example wt_compare_dict can also be used to plot the histogram of the weights of floating point and quantized models. -############################################################################## +# As another example ``wt_compare_dict`` can also be used to plot the histogram of the weights of floating point and quantized models. import matplotlib.pyplot as plt @@ -100,22 +101,22 @@ def compute_error(x, y): plt.title("Quantized model weights of conv1") plt.show() -############################################################################## -""" -2. Compare float point and quantized models at corresponding locations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The second tool allows for comparison of weights and activations between float and quantized models at corresponding locations for the same input as shown in the figure below. Red arrows indicate the locations of the comparison. -.. figure:: /_static/img/compare_output.png - -We call compare_model_outputs() from PyTorch Numeric Suite to get the activations in float model and quantized model at corresponding locations for the given input data. This API returns a dict with module names being keys. Each entry is itself a dict with two keys 'float' and 'quantized' containing the activations. -""" ############################################################################## - +# +# 2. Compare float point and quantized models at corresponding locations +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# The second tool allows for comparison of weights and activations between float and quantized models at corresponding locations for the same input as shown in the figure below. Red arrows indicate the locations of the comparison. +# +# .. figure:: /_static/img/compare_output.png +# +# We call ``compare_model_outputs()`` from PyTorch Numeric Suite to get the activations in float model and quantized model at corresponding locations for the given input data. This API returns a dict with module names being keys. Each entry is itself a dict with two keys 'float' and 'quantized' containing the activations. data = img_data[0][0] -# Takes in floating point and quantized model as well as input data, and returns a dict, with keys +############################################################################## +# ``img_data`` in floating point and quantized model as well as input data, and returns a dict, with keys # corresponding to the quantized module names and each entry being a dictionary with two keys 'float' and # 'quantized', containing the activations of floating point and quantized model at matching locations. act_compare_dict = ns.compare_model_outputs(float_model, qmodel, data) @@ -130,15 +131,11 @@ def compute_error(x, y): ############################################################################## # This dict can be used to compare and compute the quantization error of the activations of float and quantized models as following. -############################################################################## - for key in act_compare_dict: print(key, compute_error(act_compare_dict[key]['float'], act_compare_dict[key]['quantized'].dequantize())) ############################################################################## -If we want to do the comparison for more than one input data, we can do the following. -############################################################################## - +# If we want to do the comparison for more than one input data, we can do the following. # Prepare the model by attaching the logger to both floating point module and quantized # module if they are in the white_list. Default logger is OutputLogger, and default white_list # is DEFAULT_NUMERIC_SUITE_COMPARE_MODEL_OUTPUT_WHITE_LIST @@ -148,14 +145,15 @@ def compute_error(x, y): float_model(data[0]) qmodel(data[0]) +############################################################################## # Find the matching activation between floating point and quantized modules, and return a dict with key # corresponding to quantized module names and each entry being a dictionary with two keys 'float' # and 'quantized', containing the matching floating point and quantized activations logged by the logger act_compare_dict = ns.get_matching_activations(float_model, qmodel) + ############################################################################## # The default logger used in above APIs is OutputLogger, which is used to log the outputs of the modules. We can inherit from base Logger class and create our own logger to perform different functionalities. For example we can make a new MyOutputLogger class as below. -############################################################################## class MyOutputLogger(ns.Logger): r"""Customized logger class @@ -171,14 +169,12 @@ def forward(self, x): ############################################################################## # And then we can pass this logger into above APIs such as: -############################################################################## data = img_data[0][0] act_compare_dict = ns.compare_model_outputs(float_model, qmodel, data, Logger=MyOutputLogger) ############################################################################## # or: -############################################################################## ns.prepare_model_outputs(float_model, qmodel, MyOutputLogger) for data in img_data: @@ -186,26 +182,26 @@ def forward(self, x): qmodel(data[0]) act_compare_dict = ns.get_matching_activations(float_model, qmodel) -############################################################################## -""" -3. Compare a module in a quantized model with its float point equivalent, with the same input data -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The third tool allows for comparing a quantized module in a model with its float point counterpart, feeding both of them the same input and comparing their outputs as shown below. - -.. figure:: /_static/img/compare_stub.png -In practice we call prepare_model_with_stubs() to swap the quantized module that we want to compare with the Shadow module, which is illustrated as below: -.. figure:: /_static/img/shadow.png - -The Shadow module takes quantized module, float module and logger as input, and creates a forward path inside to make the float module to shadow quantized module sharing the same input tensor. - -The logger can be customizable, default logger is ShadowLogger and it will save the outputs of the quantized module and float module that can be used to compute the module level quantization error. - -Notice before each call of compare_model_outputs() and compare_model_stub() we need to have clean float and quantized model. This is because compare_model_outputs() and compare_model_stub() modify float and quantized model inplace, and it will cause unexpected results if call one right after another. -""" ############################################################################## +# +# 3. Compare a module in a quantized model with its float point equivalent, with the same input data +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# The third tool allows for comparing a quantized module in a model with its float point counterpart, feeding both of them the same input and comparing their outputs as shown below. +# +# .. figure:: /_static/img/compare_stub.png +# +# In practice we call prepare_model_with_stubs() to swap the quantized module that we want to compare with the Shadow module, which is illustrated as below: +# +# .. figure:: /_static/img/shadow.png +# +# The Shadow module takes quantized module, float module and logger as input, and creates a forward path inside to make the float module to shadow quantized module sharing the same input tensor. +# +# The logger can be customizable, default logger is ShadowLogger and it will save the outputs of the quantized module and float module that can be used to compute the module level quantization error. +# +# Notice before each call of compare_model_outputs() and compare_model_stub() we need to have clean float and quantized model. This is because compare_model_outputs() and compare_model_stub() modify float and quantized model inplace, and it will cause unexpected results if call one right after another. float_model = torchvision.models.quantization.resnet18(pretrained=True, quantize=False) float_model.to('cpu') @@ -218,11 +214,11 @@ def forward(self, x): ############################################################################## # In the following example we call compare_model_stub() from PyTorch Numeric Suite to compare QuantizableBasicBlock module with its float point equivalent. This API returns a dict with key corresponding to module names and each entry being a dictionary with two keys 'float' and 'quantized', containing the output tensors of quantized and its matching float shadow module. -############################################################################## data = img_data[0][0] module_swap_list = [torchvision.models.quantization.resnet.QuantizableBasicBlock] +############################################################################## # Takes in floating point and quantized model as well as input data, and returns a dict with key # corresponding to module names and each entry being a dictionary with two keys 'float' and # 'quantized', containing the output tensors of quantized module and its matching floating point shadow module. @@ -238,14 +234,12 @@ def forward(self, x): ############################################################################## # This dict can be then used to compare and compute the module level quantization error. -############################################################################## for key in ob_dict: print(key, compute_error(ob_dict[key]['float'], ob_dict[key]['quantized'].dequantize())) ############################################################################## # If we want to do the comparison for more than one input data, we can do the following. -############################################################################## ns.prepare_model_with_stubs(float_model, qmodel, module_swap_list, ns.ShadowLogger) for data in img_data: @@ -254,7 +248,6 @@ def forward(self, x): ############################################################################## # The default logger used in above APIs is ShadowLogger, which is used to log the outputs of the quantized module and its matching float shadow module. We can inherit from base Logger class and create our own logger to perform different functionalities. For example we can make a new MyShadowLogger class as below. -############################################################################## class MyShadowLogger(ns.Logger): r"""Customized logger class @@ -270,30 +263,29 @@ def forward(self, x, y): ############################################################################## # And then we can pass this logger into above APIs such as: -############################################################################## data = img_data[0][0] ob_dict = ns.compare_model_stub(float_model, qmodel, module_swap_list, data, Logger=MyShadowLogger) ############################################################################## # or: -############################################################################## ns.prepare_model_with_stubs(float_model, qmodel, module_swap_list, MyShadowLogger) for data in img_data: qmodel(data[0]) ob_dict = ns.get_logger_dict(qmodel) -############################################################################## -"""" -Numeric Suite for Dynamic Quantization -====================================== - -Numeric Suite APIs are designed in such as way that they work for both dynamic quantized model and static quantized model. We will use a model with both LSTM and Linear modules to demonstrate the usage of Numeric Suite on dynamic quantized model. +############################################################################### +# Numeric Suite for Dynamic Quantization +# ------------------------------------- +# +# Numeric Suite APIs are designed in such as way that they work for both dynamic quantized model and static quantized model. We will use a model with both LSTM and Linear modules to demonstrate the usage of Numeric Suite on dynamic quantized model. +# -First we define the model as below. Notice that within this model only nn.LSTM and nn.Linear modules will be quantized dynamically and nn.Embedding will remain as floating point module after quantization. -"""" -############################################################################## +################################# +# Setup +# ^^^^^^ +# First we define the model as below. Notice that within this model only nn. LSTM and nn.Linear modules will be quantized dynamically and nn. Embedding will remain as floating point module after quantization. class LSTMModel(nn.Module): """Container module with an encoder, a recurrent module, and a decoder.""" @@ -327,8 +319,7 @@ def init_hidden(self, bsz): weight.new_zeros(self.nlayers, bsz, self.nhid)) ############################################################################## -# Then we create the float_model and quantize it into qmodel. -############################################################################## +# Then we create the ``float_model``` and quantize it into qmodel. ntokens = 10 @@ -346,19 +337,16 @@ def init_hidden(self, bsz): ) ############################################################################## -"""" -1. Compare the weights of float and quantized models -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We first call compare_weights() from PyTorch Numeric Suite to get a dictionary wt_compare_dict with key corresponding to module names and each entry is a dictionary with two keys 'float' and 'quantized', containing the float and quantized weights. -"""" -############################################################################## +# +# 1. Compare the weights of float and quantized models +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# We first call ``compare_weights()`` from PyTorch Numeric Suite to get a dictionary ``wt_compare_dict`` with key corresponding to module names and each entry is a dictionary with two keys 'float' and 'quantized', containing the float and quantized weights. wt_compare_dict = ns.compare_weights(float_model.state_dict(), qmodel.state_dict()) ############################################################################## -# Once we get wt_compare_dict, it can be used to compare and compute the quantization error of the weights of float and quantized models as following. -############################################################################## +# Once we get ``wt_compare_dict``, it can be used to compare and compute the quantization error of the weights of float and quantized models as following. for key in wt_compare_dict: if wt_compare_dict[key]['quantized'].is_quantized: @@ -367,15 +355,14 @@ def init_hidden(self, bsz): print(key, compute_error(wt_compare_dict[key]['float'], wt_compare_dict[key]['quantized'])) ############################################################################## -"""" -The Inf value in encoder.weight entry above is because encoder module is not quantized and the weights are the same in both floating point and quantized models. +# +# The Inf value in ``encoder.weight`` entry above is because encoder module is not quantized and the weights are the same in both floating point and quantized models. +# +# 2. Compare float point and quantized models at corresponding locations +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Then we call ``compare_model_outputs()`` from PyTorch Numeric Suite to get the activations in float model and quantized model at corresponding locations for the given input data. This API returns a dict with module names being keys. Each entry is itself a dict with two keys 'float' and 'quantized' containing the activations. Notice that this sequence model has two inputs, and we can pass both inputs into compare_model_outputs() and compare_model_stub(). -2. Compare float point and quantized models at corresponding locations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Then we call compare_model_outputs() from PyTorch Numeric Suite to get the activations in float model and quantized model at corresponding locations for the given input data. This API returns a dict with module names being keys. Each entry is itself a dict with two keys 'float' and 'quantized' containing the activations. Notice that this sequence model has two inputs, and we can pass both inputs into compare_model_outputs() and compare_model_stub(). -"""" -############################################################################## input_ = torch.randint(ntokens, (1, 1), dtype=torch.long) hidden = float_model.init_hidden(1) @@ -385,21 +372,20 @@ def init_hidden(self, bsz): ############################################################################## # This dict can be used to compare and compute the quantization error of the activations of float and quantized models as following. The LSTM module in this model has two outputs, in this example we compute the error of the first output as below. -############################################################################## + for key in act_compare_dict: print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0])) ############################################################################## -"""" -3. Compare a module in a quantized model with its float point equivalent, with the same input data -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Next we call compare_model_stub() from PyTorch Numeric Suite to compare LSTM and Linear module with its float point equivalent. This API returns a dict with key corresponding to module names and each entry being a dictionary with two keys 'float' and 'quantized', containing the output tensors of quantized and its matching float shadow module. +# +# 3. Compare a module in a quantized model with its float point equivalent, with the same input data +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Next we call compare_model_stub() from PyTorch Numeric Suite to compare LSTM and Linear module with its float point equivalent. This API returns a dict with key corresponding to module names and each entry being a dictionary with two keys 'float' and 'quantized', containing the output tensors of quantized and its matching float shadow module. +# +# We reset the model first. -We reset the model first. -"""" -############################################################################## float_model = LSTMModel( ntoken = ntokens, @@ -414,8 +400,7 @@ def init_hidden(self, bsz): ) ############################################################################## -# Next we call compare_model_stub() from PyTorch Numeric Suite to compare LSTM and Linear module with its float point equivalent. This API returns a dict with key corresponding to module names and each entry being a dictionary with two keys 'float' and 'quantized', containing the output tensors of quantized and its matching float shadow module. -############################################################################## +# Next we call ``compare_model_stub()`` from PyTorch Numeric Suite to compare LSTM and Linear module with its float point equivalent. This API returns a dict with key corresponding to module names and each entry being a dictionary with two keys 'float' and 'quantized', containing the output tensors of quantized and its matching float shadow module. module_swap_list = [nn.Linear, nn.LSTM] ob_dict = ns.compare_model_stub(float_model, qmodel, module_swap_list, input_, hidden) @@ -423,11 +408,9 @@ def init_hidden(self, bsz): ############################################################################## # This dict can be then used to compare and compute the module level quantization error. -############################################################################## for key in ob_dict: print(key, compute_error(ob_dict[key]['float'], ob_dict[key]['quantized'])) ############################################################################## # SQNR of 40 dB is high and this is a situation where we have very good numerical alignment between the floating point and quantized model. -##############################################################################