From 9d3bd127d48a6dcd176ec834b518db2cbc88714d Mon Sep 17 00:00:00 2001
From: xin3he <xin3.he@intel.com>
Date: Mon, 1 Jul 2024 14:56:12 +0800
Subject: [PATCH 1/5] fix bf16 symbolic_trace bug

Signed-off-by: xin3he <xin3.he@intel.com>
---
 .../adaptor/torch_utils/bf16_convert.py       | 25 -------------------
 1 file changed, 25 deletions(-)

diff --git a/neural_compressor/adaptor/torch_utils/bf16_convert.py b/neural_compressor/adaptor/torch_utils/bf16_convert.py
index 917976c2810..ce9a26e1b2a 100644
--- a/neural_compressor/adaptor/torch_utils/bf16_convert.py
+++ b/neural_compressor/adaptor/torch_utils/bf16_convert.py
@@ -17,7 +17,6 @@
 """Bf16 Convert for Torch Utils."""
 import torch
 import torch.nn as nn
-from torch.fx import symbolic_trace
 
 from ...utils import logger
 
@@ -58,8 +57,6 @@ def Convert(model, tune_cfg):
     if len(bf16_ops_list) > 0:
         logger.info("Convert operators to bfloat16")
     mixed_precision_model = _bf16_wrapper_model(model, bf16_ops_list)
-    if fx_sub_module_list is not None and len(fx_sub_module_list) > 0:
-        mixed_precision_model = bf16_symbolic_trace(mixed_precision_model, fx_sub_module_list)
     return mixed_precision_model
 
 
@@ -73,25 +70,3 @@ def _bf16_wrapper_model(model, bf16_ops_list, prefix=""):
             _bf16_wrapper_model(child, bf16_ops_list, op_name)
             setattr(model, name, child)
     return model
-
-
-def bf16_symbolic_trace(model, fx_sub_module_list, prefix=""):
-    """Symbolic trace for bf16 models.
-
-    Args:
-        model (object): the input model.
-        fx_sub_module_list (list): _description_
-        prefix (str): prefix of op name.
-
-    Returns:
-        model (object)
-    """
-    for name, child in model.named_children():
-        op_name = prefix + "." + name if prefix != "" else name
-        for fx_sub_module_name in fx_sub_module_list:
-            if op_name == fx_sub_module_name:
-                child = symbolic_trace(child)
-        else:
-            bf16_symbolic_trace(child, fx_sub_module_list, op_name)
-            setattr(model, name, child)
-    return model

From c97b6d83cfe2750ca39b923657566182ea759485 Mon Sep 17 00:00:00 2001
From: xin3he <xin3.he@intel.com>
Date: Mon, 1 Jul 2024 15:14:43 +0800
Subject: [PATCH 2/5] patch 1

Signed-off-by: xin3he <xin3.he@intel.com>
---
 neural_compressor/adaptor/pytorch.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 2ac0af30efe..5909de644ac 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -3579,6 +3579,16 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):
             return q_model
 
         self.tune_cfg["fx_sub_module_list"] = self.sub_module_list
+
+        # BF16 fallback
+        if (
+            len(self.tune_cfg["bf16_ops_list"]) > 0
+            and self.version.release >= Version("1.11.0").release
+            and self.use_bf16
+            and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1")
+        ):  # pragma: no cover
+            q_model._model = torch_utils.bf16_convert.Convert(q_model._model, self.tune_cfg)
+
         if self.approach == "quant_aware_training":
             q_model._model.train()
             if self.sub_module_list is None:
@@ -3665,14 +3675,6 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None):
                 self.sub_module_list, q_model._model, prefix="", custom_config=self.prepare_custom_config_dict
             )
 
-        if (
-            len(self.tune_cfg["bf16_ops_list"]) > 0
-            and self.version.release >= Version("1.11.0").release
-            and self.use_bf16
-            and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1")
-        ):  # pragma: no cover
-            q_model._model = torch_utils.bf16_convert.Convert(q_model._model, self.tune_cfg)
-
         self.fused_dict = self.get_fused_list(q_model.model)
         q_model.is_quantized = True
         q_model.q_config = copy.deepcopy(self.tune_cfg)

From f2673f4101d0a60c7510364a937cb5cb6e6a7da3 Mon Sep 17 00:00:00 2001
From: xin3he <xin3.he@intel.com>
Date: Wed, 3 Jul 2024 12:59:49 +0800
Subject: [PATCH 3/5] update mix_precision usage

Signed-off-by: xin3he <xin3.he@intel.com>
---
 neural_compressor/adaptor/pytorch_cpu.yaml    |    2 +-
 .../adaptor/torch_utils/bf16_convert.py       |   10 +-
 .../test_adaptor_pytorch_1x.py                | 1209 -----------------
 .../test_adaptor_pytorch_2x.py                |   12 +-
 4 files changed, 8 insertions(+), 1225 deletions(-)
 delete mode 100644 test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1x.py

diff --git a/neural_compressor/adaptor/pytorch_cpu.yaml b/neural_compressor/adaptor/pytorch_cpu.yaml
index f815c5c7f18..fafad6f860b 100644
--- a/neural_compressor/adaptor/pytorch_cpu.yaml
+++ b/neural_compressor/adaptor/pytorch_cpu.yaml
@@ -19,7 +19,7 @@
     name: '1.11'
 
   bf16: ['Linear', 'bmm', 'mm', 'baddbmm', 'addmm', 'addbmm',
-          '_convolution', 'LSTM', 'LSTMCell', 'GRU', 'GRUCell']
+          'Conv1d', 'Conv2d', 'Conv3d', 'LSTM', 'LSTMCell', 'GRU', 'GRUCell']
   fp32: ['*'] # `*` means all op types.
   int8: &1_11_capabilities {
     'static': &cap_s8_1_11 {
diff --git a/neural_compressor/adaptor/torch_utils/bf16_convert.py b/neural_compressor/adaptor/torch_utils/bf16_convert.py
index ce9a26e1b2a..b6d5e6d01bd 100644
--- a/neural_compressor/adaptor/torch_utils/bf16_convert.py
+++ b/neural_compressor/adaptor/torch_utils/bf16_convert.py
@@ -27,6 +27,7 @@ class BF16ModuleWrapper(nn.Module):
     def __init__(self, module):
         """Init a BF16ModuleWrapper object."""
         super(BF16ModuleWrapper, self).__init__()
+        module = module.bfloat16()
         self.add_module("module", module)
         self.train(module.training)
         # WA for TransformerEncoder to access its Linear's weights and bias
@@ -37,7 +38,6 @@ def __init__(self, module):
     def forward(self, X):
         """Convert dtype."""
         X = X.to(torch.bfloat16)
-        self.module.bfloat16()
         X = self.module(X)
         return X.float()
 
@@ -53,7 +53,6 @@ def Convert(model, tune_cfg):
         mixed_precision_model (object): model with mixed precision.
     """
     bf16_ops_list = tune_cfg["bf16_ops_list"]
-    fx_sub_module_list = tune_cfg["fx_sub_module_list"] if "fx_sub_module_list" in tune_cfg.keys() else []
     if len(bf16_ops_list) > 0:
         logger.info("Convert operators to bfloat16")
     mixed_precision_model = _bf16_wrapper_model(model, bf16_ops_list)
@@ -64,9 +63,8 @@ def _bf16_wrapper_model(model, bf16_ops_list, prefix=""):
     for name, child in model.named_children():
         op_name = prefix + "." + name if prefix != "" else name
         for bf16_op_name in bf16_ops_list:
-            if op_name == bf16_op_name[0]:
+            if op_name == bf16_op_name[0] or op_name == bf16_op_name[0].split(".module")[0]:
                 child = BF16ModuleWrapper(child)
-        else:
-            _bf16_wrapper_model(child, bf16_ops_list, op_name)
-            setattr(model, name, child)
+                setattr(model, name, child)
+        _bf16_wrapper_model(child, bf16_ops_list, op_name)
     return model
diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1x.py
deleted file mode 100644
index b13c6ff5a76..00000000000
--- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1x.py
+++ /dev/null
@@ -1,1209 +0,0 @@
-import copy
-import os
-import pickle
-import shutil
-import unittest
-
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.quantized as nnq
-from packaging.version import Version
-from torch.quantization import DeQuantStub, QuantStub
-
-import neural_compressor.adaptor.pytorch as nc_torch
-from neural_compressor.adaptor import FRAMEWORKS
-from neural_compressor.conf.config import QuantConf
-from neural_compressor.experimental import Quantization, common
-from neural_compressor.model import MODELS
-from neural_compressor.utils.pytorch import load
-from neural_compressor.utils.utility import LazyImport, recover
-
-try:
-    import intel_extension_for_pytorch as ipex
-
-    IPEX = True
-except:
-    IPEX = False
-
-# improve lazy import UT coverage
-resnet18 = LazyImport("torchvision.models.resnet18")
-q_resnet18 = LazyImport("torchvision.models.quantization.resnet18")
-
-PT_VERSION = nc_torch.get_torch_version().release
-if PT_VERSION >= Version("1.8.0").release:
-    FX_MODE = True
-else:
-    FX_MODE = False
-
-
-fake_dyn_yaml = """
-    model:
-      name: imagenet
-      framework: pytorch
-
-    quantization:
-      approach: post_training_dynamic_quant
-      op_wise: {
-              'decoder': {
-                'activation':  {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              }
-      }
-    evaluation:
-      accuracy:
-        metric:
-          topk: 1
-      performance:
-        warmup: 5
-        iteration: 10
-
-    tuning:
-      accuracy_criterion:
-        relative:  0.01
-      exit_policy:
-        timeout: 0
-      random_seed: 9527
-      workspace:
-        path: saved
-    """
-
-
-fake_ptq_yaml = """
-    model:
-      name: imagenet
-      framework: pytorch
-
-    quantization:
-      op_wise: {
-
-              'layer1.0.conv1': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
-              'layer1.0.conv2': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
-              'layer2.0.conv1': {
-                'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']},
-                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
-              },
-              'layer3.0.conv1': {
-                'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']},
-                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
-              },
-              'layer1.0.add_relu': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
-      }
-    evaluation:
-      accuracy:
-        metric:
-          topk: 1
-      performance:
-        warmup: 1
-        iteration: 10
-
-    tuning:
-      accuracy_criterion:
-        relative:  0.01
-      exit_policy:
-        timeout: 0
-      random_seed: 9527
-      workspace:
-        path: saved
-    """
-
-fake_auto_yaml = """
-    model:
-      name: imagenet
-      framework: pytorch_fx
-
-    quantization:
-      approach: post_training_auto_quant
-    evaluation:
-      accuracy:
-        metric:
-          topk: 1
-      performance:
-        warmup: 1
-        iteration: 10
-
-    tuning:
-      accuracy_criterion:
-        relative:  0.01
-      exit_policy:
-        timeout: 1000
-        max_trials: 3
-      random_seed: 9527
-      workspace:
-        path: saved
-    """
-
-
-fake_ptq_yaml_for_fx = """
-    model:
-      name: imagenet
-      framework: pytorch_fx
-
-    quantization:
-      approach: post_training_auto_quant
-      op_wise: {
-              'layer1.0.conv1': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
-              'layer1.0.conv2': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
-              'layer2.0.conv1': {
-                'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']},
-                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
-              },
-              'layer3.0.conv1': {
-                'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']},
-                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
-              },
-              'layer1.0.add_relu': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
-              'conv.module': {
-                'weight': {'dtype': ['fp32']},
-                'activation': {'dtype': ['fp32']}
-              },
-              'default_qconfig': {
-                'activation':  {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              }
-      }
-    evaluation:
-      accuracy:
-        metric:
-          topk: 1
-      performance:
-        warmup: 5
-        iteration: 10
-
-    tuning:
-      accuracy_criterion:
-        relative:  0.01
-      exit_policy:
-        timeout: 0
-      random_seed: 9527
-      workspace:
-        path: saved
-    """
-
-
-fake_qat_yaml = """
-    model:
-      name: imagenet
-      framework: pytorch
-
-    quantization:
-      approach: quant_aware_training
-      train:
-        end_epoch: 1
-        iteration: 1
-        optimizer:
-          SGD:
-            learning_rate: 0.0001
-        criterion:
-          CrossEntropyLoss:
-            reduction: mean
-      op_wise: {
-              'layer1.0.conv1': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
-              'layer1.0.conv2': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              },
-              'layer2.0.conv1': {
-                'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']},
-                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
-              },
-              'layer3.0.conv1': {
-                'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']},
-                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
-              },
-              'layer1.0.add_relu': {
-                'activation': {'dtype': ['fp32']},
-                'weight': {'dtype': ['fp32']}
-              }
-      }
-    evaluation:
-      accuracy:
-        metric:
-          topk: 1
-
-    tuning:
-      accuracy_criterion:
-        relative:  0.01
-      exit_policy:
-        timeout: 0
-      random_seed: 9527
-      workspace:
-        path: saved
-    """
-
-
-def build_pytorch_yaml():
-    with open("ptq_yaml.yaml", "w", encoding="utf-8") as f:
-        f.write(fake_ptq_yaml)
-
-    with open("dynamic_yaml.yaml", "w", encoding="utf-8") as f:
-        f.write(fake_dyn_yaml)
-
-    with open("qat_yaml.yaml", "w", encoding="utf-8") as f:
-        f.write(fake_qat_yaml)
-
-    with open("auto_yaml.yaml", "w", encoding="utf-8") as f:
-        f.write(fake_auto_yaml)
-
-
-def build_pytorch_fx_yaml():
-    if PT_VERSION >= Version("1.9.0").release:
-        fake_fx_ptq_yaml = fake_ptq_yaml_for_fx
-    else:
-        fake_fx_ptq_yaml = fake_ptq_yaml.replace("pytorch", "pytorch_fx")
-    with open("fx_ptq_yaml.yaml", "w", encoding="utf-8") as f:
-        f.write(fake_fx_ptq_yaml)
-
-    fake_fx_dyn_yaml = fake_dyn_yaml.replace("pytorch", "pytorch_fx")
-    with open("fx_dynamic_yaml.yaml", "w", encoding="utf-8") as f:
-        f.write(fake_fx_dyn_yaml)
-
-    fake_fx_qat_yaml = fake_qat_yaml.replace("pytorch", "pytorch_fx")
-    with open("fx_qat_yaml.yaml", "w", encoding="utf-8") as f:
-        f.write(fake_fx_qat_yaml)
-
-
-def build_dump_tensors_yaml():
-    fake_yaml = """
-        model:
-          name: imagenet
-          framework: pytorch
-
-        evaluation:
-          accuracy:
-            metric:
-              topk: 1
-
-        tuning:
-          accuracy_criterion:
-            relative:  0.01
-          exit_policy:
-            timeout: 0
-          random_seed: 9527
-          workspace:
-            path: saved
-          tensorboard: true
-        """
-    with open("dump_yaml.yaml", "w", encoding="utf-8") as f:
-        f.write(fake_yaml)
-
-
-class M(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.quant = QuantStub()
-        self.conv = nn.Conv2d(3, 1, 1)
-        self.linear = nn.Linear(224 * 224, 5)
-        self.dequant = DeQuantStub()
-
-    def forward(self, x):
-        x = self.quant(x)
-        x = self.conv(x)
-        x = x.view(1, -1)
-        x = self.linear(x)
-        x = self.dequant(x)
-        return x
-
-
-class FP32Model(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-
-    def forward(self, x):
-        times = x.size(1)
-        if times == 1:
-            return torch.ones(x.shape)
-        return torch.ones(x.shape) + 1
-
-
-class DynamicModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.conv = nn.Conv2d(1, 1, 1)
-
-    def forward(self, x):
-        if x is not None:
-            x = self.conv(x)
-        return x
-
-
-class SubModel(torch.nn.Module):
-    def __init__(self, bypass=True):
-        super().__init__()
-        self.quant = QuantStub()
-        self.conv = nn.Conv2d(1, 1, 1)
-        self.conv1 = nn.Conv2d(1, 1, 1)
-        self.bn = nn.BatchNorm2d(1)
-        self.relu = nn.ReLU()
-        self.fp32 = FP32Model()
-        self.norm = nn.LayerNorm([1, 224, 224])
-        self.dequant = DeQuantStub()
-        self.bypass = bypass
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.quant(x)
-        x = self.relu(x)
-        x = self.conv1(x)
-        x = self.dequant(x)
-        if not self.bypass:
-            x = self.fp32(x)
-        x = self.norm(x)
-        return x
-
-
-class PartialQuantModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.quant = QuantStub()
-        self.conv = nn.Conv2d(3, 1, 1)
-        self.bn = nn.BatchNorm2d(1)
-        self.conv1 = nn.Conv2d(1, 1, 1)
-        self.bn1 = nn.BatchNorm2d(1)
-        self.conv2 = nn.Conv2d(1, 1, 1)
-        self.linear = nn.Linear(224 * 224, 1)
-        self.dequant = DeQuantStub()
-        self.sub = SubModel(bypass=False)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.sub(x)
-        x = self.quant(x)
-        x = self.conv2(x)
-        x = x.view(1, -1)
-        x = self.linear(x)
-        x = self.dequant(x)
-        return x
-
-
-class DynamicControlModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.conv = nn.Conv2d(3, 1, 1)
-        self.bn = nn.BatchNorm2d(1)
-        self.linear = nn.Linear(224 * 224, 1)
-        self.sub = SubModel()
-        self.fp32 = FP32Model()
-        self.dyn = DynamicModel()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.dyn(x)
-        x = self.bn(x)
-        x = self.sub(x)
-        x = self.fp32(x)
-        x = x.view(1, -1)
-        x = self.linear(x)
-        return x
-
-
-class LSTMModel(nn.Module):
-    """Container module with an encoder, a recurrent module, and a decoder."""
-
-    def __init__(self, ntoken=10, ninp=512, nhid=256, nlayers=5, dropout=0.5):
-        super(LSTMModel, self).__init__()
-        self.drop = nn.Dropout(dropout)
-        self.encoder = nn.Embedding(ntoken, ninp)
-        self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
-        self.decoder = nn.Linear(nhid, ntoken)
-        self.init_weights()
-        self.nhid = nhid
-        self.nlayers = nlayers
-
-    def init_weights(self):
-        initrange = 0.1
-        self.encoder.weight.data.uniform_(-initrange, initrange)
-        self.decoder.bias.data.zero_()
-        self.decoder.weight.data.uniform_(-initrange, initrange)
-
-    def forward(self, input):
-        input = torch.ones((3, 10), dtype=torch.int32)
-        h0 = torch.randn(2, 10, 256)
-        c0 = torch.randn(2, 10, 256)
-        hidden = (h0, c0)
-        emb = self.encoder(input)
-        output, hidden = self.rnn(emb, hidden)
-        output = self.drop(output)
-        decoded = self.decoder(output)
-        return decoded, hidden
-
-
-def eval_func(model):
-    # switch to evaluate mode
-    model.eval()
-    with torch.no_grad():
-        input = torch.randn(1, 3, 224, 224)
-        # compute output
-        output = model(input)
-    return 0.0
-
-
-def q_func(model):
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
-    # switch to evaluate mode
-    model.train()
-    input = torch.randn(1, 3, 224, 224)
-    # compute output
-    output = model(input)
-    loss = output.mean()
-    optimizer.zero_grad()
-    loss.backward()
-    optimizer.step()
-    return model
-
-
-class TestPytorchAdaptor(unittest.TestCase):
-    framework_specific_info = {
-        "device": "cpu",
-        "approach": "post_training_static_quant",
-        "random_seed": 1234,
-        "q_dataloader": None,
-        "workspace_path": "./",
-    }
-    framework = "pytorch"
-    adaptor = FRAMEWORKS[framework](framework_specific_info)
-    model = q_resnet18()
-    nc_model = MODELS["pytorch"](model)
-
-    @classmethod
-    def setUpClass(self):
-        build_pytorch_yaml()
-        build_dump_tensors_yaml()
-
-    @classmethod
-    def tearDownClass(self):
-        os.remove("ptq_yaml.yaml")
-        os.remove("dynamic_yaml.yaml")
-        os.remove("qat_yaml.yaml")
-        os.remove("dump_yaml.yaml")
-        os.remove("auto_yaml.yaml")
-        shutil.rmtree("./saved", ignore_errors=True)
-        shutil.rmtree("runs", ignore_errors=True)
-
-    def test_get_all_weight_name(self):
-        assert len(list(self.nc_model.get_all_weight_names())) == 62
-
-    def test_get_weight(self):
-        for name, param in self.model.named_parameters():
-            if name == "layer4.1.conv2.weight":
-                param.data.fill_(0.0)
-            if name == "fc.bias":
-                param.data.fill_(0.1)
-        assert int(torch.sum(self.nc_model.get_weight("layer4.1.conv2.weight"))) == 0
-        assert torch.allclose(torch.sum(self.nc_model.get_weight("fc.bias")), torch.tensor(100.0))
-
-    def test_get_input(self):
-        model = MODELS["pytorch"](q_resnet18())
-        model.model.eval().fuse_model()
-        model.register_forward_pre_hook()
-        rand_input = torch.rand(100, 3, 224, 224).float()
-        model.model(rand_input)
-        assert torch.equal(model.get_inputs("x"), rand_input)
-        model.remove_hooks()
-
-    def test_update_weights(self):
-        self.nc_model.update_weights("fc.bias", torch.zeros([1000]))
-        assert int(torch.sum(self.nc_model.get_weight("fc.bias"))) == 0
-
-    def test_get_gradient(self):
-        with self.assertRaises(AssertionError):
-            self.nc_model.get_gradient("fc.bias")
-
-        for name, tensor in self.nc_model._model.named_parameters():
-            if name == "fc.bias":
-                tensor.grad = torch.zeros_like(tensor)
-                break
-        assert torch.equal(torch.Tensor(self.nc_model.get_gradient("fc.bias")), torch.zeros_like(tensor))
-
-        rand_input = torch.rand(100, 3, 224, 224).float()
-        rand_input.grad = torch.ones_like(rand_input)
-        assert torch.equal(torch.Tensor(self.nc_model.get_gradient(rand_input)), torch.ones_like(rand_input))
-
-    def test_report_sparsity(self):
-        df, total_sparsity = self.nc_model.report_sparsity()
-        self.assertTrue(total_sparsity > 0)
-        self.assertTrue(len(df) == 22)
-
-    def test_quantization_saved(self):
-        for fake_yaml in ["dynamic_yaml.yaml", "qat_yaml.yaml", "ptq_yaml.yaml"]:
-            model = M()
-            quantizer = Quantization(fake_yaml)
-            quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True
-            dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
-            quantizer.model = model
-            quantizer.calib_dataloader = common.DataLoader(dataset)
-            quantizer.eval_dataloader = common.DataLoader(dataset)
-            q_model = quantizer.fit()
-            eval_func(q_model)
-            q_model.save("./saved")
-            # Load configure and weights by neural_compressor.utils
-            saved_model = load("./saved", model)
-            eval_func(saved_model)
-            # recover int8 model from history
-            history_file = "./saved/history.snapshot"
-            model_recover = recover(model, history_file, 0)
-            eval_func(model_recover)
-            self.assertEqual(type(saved_model.conv), type(model_recover.conv))
-            shutil.rmtree("./saved", ignore_errors=True)
-        from neural_compressor.experimental import Benchmark
-
-        evaluator = Benchmark("ptq_yaml.yaml")
-        # Load configure and weights by neural_compressor.model
-        evaluator.model = model
-        evaluator.b_dataloader = common.DataLoader(dataset)
-        evaluator.fit("accuracy")
-
-        for fake_yaml in ["qat_yaml.yaml", "ptq_yaml.yaml"]:
-            model = copy.deepcopy(self.model)
-            if fake_yaml == "ptq_yaml.yaml":
-                model.eval().fuse_model()
-            conf = QuantConf(fake_yaml)
-            quantizer = Quantization(conf)
-            dataset = quantizer.dataset("dummy", (100, 3, 224, 224))
-            quantizer.model = model
-            if fake_yaml == "qat_yaml.yaml":
-                quantizer.q_func = q_func
-            else:
-                quantizer.calib_dataloader = common.DataLoader(dataset)
-            quantizer.eval_func = eval_func
-            q_model = quantizer.fit()
-            q_model.save("./saved")
-            # Load configure and weights by neural_compressor.utils
-            saved_model = load("./saved", model)
-            eval_func(saved_model)
-            shutil.rmtree("./saved", ignore_errors=True)
-
-    def test_quantization_new_saved(self):
-        for fake_yaml in ["dynamic_yaml.yaml", "qat_yaml.yaml", "ptq_yaml.yaml"]:
-            model = M()
-            quantizer = Quantization(fake_yaml)
-            quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True
-            dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
-            quantizer.model = model
-            quantizer.calib_dataloader = common.DataLoader(dataset)
-            quantizer.eval_dataloader = common.DataLoader(dataset)
-            q_model = quantizer.fit()
-            eval_func(q_model)
-            torch.save(q_model.quantized_state_dict(), "./saved/model.pt")
-            # Load configure and weights by neural_compressor.utils
-            from neural_compressor.experimental.common import Model
-
-            common_model = Model(model)
-            common_model.load_quantized_state_dict(torch.load("./saved/model.pt"))
-            eval_func(common_model)
-            self.assertEqual(type(q_model._model.linear), type(common_model._model.linear))
-            shutil.rmtree("./saved", ignore_errors=True)
-
-    @unittest.skipIf(IPEX, "this function is affected by IPEX, Fixing now.")
-    def test_non_quant_module(self):
-        for fake_yaml in ["qat_yaml.yaml", "ptq_yaml.yaml"]:
-            model = PartialQuantModel()
-            conf = QuantConf(fake_yaml)
-            quantizer = Quantization(conf)
-            dataset = quantizer.dataset("dummy", (1, 3, 224, 224))
-            non_quant_dict = {
-                "non_quant_module_name": ["conv", "conv1", "sub.conv"],
-                "non_quant_module_class": ["BatchNorm2d", "FP32Model"],
-            }
-            quantizer.model = common.Model(model, **non_quant_dict)
-            if fake_yaml == "qat_yaml.yaml":
-                quantizer.q_func = q_func
-            else:
-                quantizer.calib_func = eval_func
-            quantizer.eval_func = eval_func
-            q_model = quantizer.fit()
-            self.assertTrue(isinstance(q_model.model.conv, torch.nn.Conv2d))
-            self.assertTrue("quantize" in str(q_model.model.conv2.__class__))
-            q_model.save("./saved")
-            saved_model = load("./saved", model, **non_quant_dict)
-            eval_func(saved_model)
-            shutil.rmtree("./saved", ignore_errors=True)
-
-    def test_auto_quant(self):
-        def eval_func(model):
-            return 1
-
-        model_origin = LSTMModel(
-            ntoken=10,
-            ninp=512,
-            nhid=256,
-            nlayers=2,
-        )
-        # run fx_quant in neural_compressor and save the quantized GraphModule
-        quantizer = Quantization("auto_yaml.yaml")
-        dataset = quantizer.dataset("dummy", (3, 10), label=True)
-        quantizer.eval_func = eval_func
-        quantizer.calib_dataloader = common.DataLoader(dataset)
-        quantizer.model = common.Model(model_origin)
-        q_model = quantizer.fit()
-        self.assertNotEqual(q_model, None)
-
-    def test_workspace_path(self):
-        model = M()
-        quantizer = Quantization("ptq_yaml.yaml")
-        quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True
-        dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
-        quantizer.model = model
-        quantizer.calib_dataloader = common.DataLoader(dataset)
-        quantizer.eval_dataloader = common.DataLoader(dataset)
-        q_model = quantizer.fit()
-        eval_func(q_model)
-        torch.save(q_model.quantized_state_dict(), "./saved/best_model.pt")
-        # Load configure and weights by workspace_path
-        from neural_compressor.experimental.common import Model
-
-        common_model = Model(model)
-        common_model.workspace_path = "./saved"
-        eval_func(common_model)
-        self.assertEqual(type(q_model._model.linear), type(common_model._model.linear))
-        shutil.rmtree("./saved", ignore_errors=True)
-
-    def test_get_graph_info(self):
-        from neural_compressor.model.torch_model import PyTorchModel
-
-        model = PyTorchModel(self.model)
-        op_map = model.graph_info
-        self.assertTrue(op_map["conv1"] == "Conv2d")
-
-    def test_tensorboard(self):
-        model = copy.deepcopy(self.nc_model)
-        model.model.eval().fuse_model()
-        quantizer = Quantization("dump_yaml.yaml")
-        dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
-        quantizer.model = model.model
-        quantizer.calib_dataloader = common.DataLoader(dataset)
-        quantizer.eval_func = eval_func
-        quantizer.fit()
-        self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False)
-        quantizer.eval_dataloader = common.DataLoader(dataset)
-        quantizer.eval_func = None
-        quantizer.fit()
-        self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False)
-
-    def test_tensor_dump_and_set(self):
-        model = copy.deepcopy(self.nc_model)
-        model.model.eval().fuse_model()
-        quantizer = Quantization("ptq_yaml.yaml")
-        dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
-        dataloader = common.DataLoader(dataset)
-        dataloader = common._generate_common_dataloader(dataloader, "pytorch")
-        quantizer.eval_dataloader = dataloader
-        quantizer.calib_dataloader = dataloader
-        quantizer.model = model.model
-        q_model = quantizer.fit()
-        quantizer.strategy.adaptor.inspect_tensor(
-            model,
-            dataloader,
-            op_list=["conv1.0", "layer1.0.conv1.0"],
-            iteration_list=[1, 2],
-            inspect_type="all",
-            save_to_disk=True,
-        )
-        with open("saved/inspect_result.pkl", "rb") as fp:
-            tensor_dict = pickle.load(fp)
-        a = tensor_dict["activation"][0]
-        w = tensor_dict["weight"]
-        if PT_VERSION >= Version("1.8.0").release:
-            self.assertTrue(w["conv1.0"]["conv1.0.weight"].shape[0] == a["conv1.0"]["conv1.0.output0"].shape[1])
-        else:
-            self.assertTrue(w["conv1.0"]["conv1.0.weight"].shape[0] == a["conv1.0"]["conv1.1.output0"].shape[1])
-        data = np.random.random(w["conv1.0"]["conv1.0.weight"].shape).astype(np.float32)
-        quantizer.strategy.adaptor.set_tensor(q_model, {"conv1.0.weight": data})
-        changed_tensor = q_model.get_weight("conv1.weight")
-        scales = changed_tensor.q_per_channel_scales()
-        changed_tensor_fp32 = torch.dequantize(changed_tensor)
-        self.assertTrue(np.allclose(data, changed_tensor_fp32.numpy(), atol=2 / np.min(scales.numpy())))
-        quantizer.strategy.adaptor.inspect_tensor(
-            q_model,
-            dataloader,
-            op_list=["conv1.0", "layer1.0.conv1.0"],
-            iteration_list=[1, 2],
-            inspect_type="all",
-            save_to_disk=False,
-        )
-
-    def test_forward_wrapper(self):
-        vision_model = resnet18()
-
-        class dummymodel(torch.nn.Module):
-            def __init__(self, model):
-                super(dummymodel, self).__init__()
-                self._model = model
-
-            def forward(self, input=None):
-                return self._model(input)
-
-        data = [
-            [{"input": torch.rand(3, 224, 224)}, torch.ones(1, 1)],
-        ]
-        # dataloader.batch_size=100
-        dataloader = common.DataLoader(data, batch_size=1)
-        quantizer = Quantization("dynamic_yaml.yaml")
-        model = dummymodel(vision_model)
-        quantizer.model = model
-        quantizer.calib_dataloader = dataloader
-        quantizer.eval_dataloader = dataloader
-        model = quantizer.fit()
-        self.assertTrue(isinstance(model, torch.nn.Module))
-
-    def test_floatfunctions_fallback(self):
-        class ModelWithFunctionals(torch.nn.Module):
-            def __init__(self):
-                super(ModelWithFunctionals, self).__init__()
-                self.mycat = nnq.FloatFunctional()
-                self.myadd = nnq.FloatFunctional()
-                self.myadd_relu = nnq.FloatFunctional()
-                # Tracing doesn't work yet for c10 ops with scalar inputs
-                # https://github.com/pytorch/pytorch/issues/27097
-                self.my_scalar_add = nnq.FloatFunctional()
-                self.mymul = nnq.FloatFunctional()
-                self.my_scalar_mul = nnq.FloatFunctional()
-                self.quant = QuantStub()
-                self.dequant = DeQuantStub()
-
-            def forward(self, x):
-                x = self.quant(x)
-                y = self.mycat.cat([x, x, x])
-                z = self.myadd.add(y, y)
-                w = self.myadd_relu.add_relu(z, z)
-                # Tracing doesn't work yet for c10 ops with scalar inputs
-                # https://github.com/pytorch/pytorch/issues/27097
-                w = self.my_scalar_add.add_scalar(w, -0.5)
-                w = self.mymul.mul(w, w)
-                w = self.my_scalar_mul.mul_scalar(w, 0.5)
-                w = self.dequant(w)
-                return w
-
-        model = ModelWithFunctionals()
-        model = MODELS["pytorch"](model)
-        x = torch.rand(10, 1, dtype=torch.float)
-        y = model.model(x)
-        fallback_ops = []
-        q_capability = self.adaptor.query_fw_capability(model)
-        for k, v in q_capability["opwise"].items():
-            if k[0] != "quant" and k[0] != "dequant":
-                fallback_ops.append(k[0])
-        model.model.qconfig = torch.quantization.default_qconfig
-        model.model.quant.qconfig = torch.quantization.default_qconfig
-        if PT_VERSION >= Version("1.8.0").release:
-            model.model.dequant.qconfig = torch.quantization.default_qconfig
-        nc_torch._fallback_quantizable_ops_recursively(model.model, "", fallback_ops, op_qcfgs={})
-        if PT_VERSION >= Version("2.0.0").release:
-            from torch.quantization.quantize import _add_observer_ as add_observer_
-        else:
-            from torch.quantization.quantize import add_observer_
-        add_observer_(model.model)
-        model.model(x)
-        torch.quantization.convert(model.model, self.adaptor.q_mapping, inplace=True)
-        qy = model.model(x)
-        tol = {"atol": 1e-01, "rtol": 1e-03}
-        self.assertTrue(np.allclose(y, qy, **tol))
-
-
-@unittest.skipIf(not FX_MODE, "Unsupported Fx Mode with PyTorch Version Below 1.8")
-class TestPytorchFXAdaptor(unittest.TestCase):
-    framework_specific_info = {
-        "device": "cpu",
-        "approach": "post_training_static_quant",
-        "random_seed": 1234,
-        "q_dataloader": None,
-        "workspace_path": "./",
-    }
-    framework = "pytorch_fx"
-    adaptor = FRAMEWORKS[framework](framework_specific_info)
-
-    @classmethod
-    def setUpClass(self):
-        build_pytorch_fx_yaml()
-
-    @classmethod
-    def tearDownClass(self):
-        os.remove("fx_ptq_yaml.yaml")
-        os.remove("fx_dynamic_yaml.yaml")
-        shutil.rmtree("./saved", ignore_errors=True)
-        shutil.rmtree("runs", ignore_errors=True)
-
-    def test_fx_quant(self):
-        for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]:
-            model_origin = resnet18()
-            # run fx_quant in neural_compressor and save the quantized GraphModule
-            quantizer = Quantization(fake_yaml)
-            dataset = quantizer.dataset("dummy", (10, 3, 224, 224), label=True)
-            quantizer.eval_func = eval_func
-            if fake_yaml == "fx_qat_yaml.yaml":
-                quantizer.q_func = q_func
-            else:
-                quantizer.calib_func = eval_func
-            dataloader = common.DataLoader(dataset)
-            quantizer.calib_dataloader = dataloader
-            quantizer.model = common.Model(
-                model_origin,
-                **{
-                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                    "convert_custom_config_dict": {"preserved_attributes": []},
-                }
-            )
-            q_model = quantizer.fit()
-            q_model.save("./saved")
-            # Load configure and weights with neural_compressor.utils
-            model_fx = load(
-                "./saved",
-                model_origin,
-                **{
-                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                    "convert_custom_config_dict": {"preserved_attributes": []},
-                }
-            )
-            self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule))
-
-            # recover int8 model with only tune_cfg
-            history_file = "./saved/history.snapshot"
-            model_fx_recover = recover(
-                model_origin,
-                history_file,
-                0,
-                **{
-                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                    "convert_custom_config_dict": {"preserved_attributes": []},
-                }
-            )
-            self.assertEqual(model_fx.code, model_fx_recover.code)
-            shutil.rmtree("./saved", ignore_errors=True)
-
-        for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]:
-            model_origin = M()
-            # run fx_quant in neural_compressor and save the quantized GraphModule
-            quantizer = Quantization(fake_yaml)
-            quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True
-            dataset = quantizer.dataset("dummy", (10, 3, 224, 224), label=True)
-            quantizer.calib_dataloader = common.DataLoader(dataset)
-            quantizer.eval_dataloader = common.DataLoader(dataset)
-            quantizer.model = common.Model(
-                model_origin,
-                **{
-                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                    "convert_custom_config_dict": {"preserved_attributes": []},
-                }
-            )
-            q_model = quantizer.fit()
-            q_model.save("./saved")
-            # Load configure and weights with neural_compressor.utils
-            model_fx = load(
-                "./saved",
-                model_origin,
-                **{
-                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                    "convert_custom_config_dict": {"preserved_attributes": []},
-                    "dataloader": quantizer.calib_dataloader,
-                }
-            )
-            self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule))
-            shutil.rmtree("./saved", ignore_errors=True)
-
-    @unittest.skipIf(
-        PT_VERSION < Version("1.9.0").release,
-        "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend",
-    )
-    def test_fx_dynamic_quant(self):
-        model = LSTMModel(
-            ntoken=10,
-            ninp=512,
-            nhid=256,
-            nlayers=5,
-        )
-        # run fx_quant in neural_compressor and save the quantized GraphModule
-        model.eval()
-        quantizer = Quantization("fx_dynamic_yaml.yaml")
-        quantizer.model = common.Model(
-            copy.deepcopy(model),
-            **{
-                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                "convert_custom_config_dict": {"preserved_attributes": []},
-            }
-        )
-        q_model = quantizer.fit()
-        q_model.save("./saved")
-
-        # Load configure and weights by neural_compressor.utils
-        model_fx = load(
-            "./saved",
-            copy.deepcopy(model),
-            **{
-                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                "convert_custom_config_dict": {"preserved_attributes": []},
-            }
-        )
-        self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule))
-
-        # Test the functionality of older model saving type
-        state_dict = torch.load("./saved/best_model.pt")
-        tune_cfg = state_dict.pop("best_configure")
-        import yaml
-
-        with open("./saved/best_configure.yaml", "w") as f:
-            yaml.dump(tune_cfg, f, default_flow_style=False)
-        torch.save(state_dict, "./saved/best_model_weights.pt")
-        os.remove("./saved/best_model.pt")
-        model_fx = load(
-            "./saved",
-            copy.deepcopy(model),
-            **{
-                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                "convert_custom_config_dict": {"preserved_attributes": []},
-            }
-        )
-        self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule))
-
-        # recover int8 model with only tune_cfg
-        history_file = "./saved/history.snapshot"
-        model_fx_recover = recover(
-            model,
-            history_file,
-            0,
-            **{
-                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                "convert_custom_config_dict": {"preserved_attributes": []},
-            }
-        )
-        self.assertEqual(model_fx.code, model_fx_recover.code)
-        shutil.rmtree("./saved", ignore_errors=True)
-
-    def test_default_dynamic_quant(self):
-        def eval_func(model):
-            return 1
-
-        def q_func(model):
-            return model
-
-        # Model Definition
-        for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]:
-            model_origin = LSTMModel(
-                ntoken=10,
-                ninp=512,
-                nhid=256,
-                nlayers=2,
-            )
-            # run fx_quant in neural_compressor and save the quantized GraphModule
-            quantizer = Quantization(fake_yaml)
-            dataset = quantizer.dataset("dummy", (3, 10), label=True)
-            quantizer.eval_func = eval_func
-            if fake_yaml == "fx_qat_yaml.yaml":
-                quantizer.q_func = q_func
-            quantizer.calib_dataloader = common.DataLoader(dataset)
-            quantizer.model = common.Model(model_origin)
-            q_model = quantizer.fit()
-            self.assertTrue("quantize" in str(type(q_model.model.encoder)))
-            self.assertTrue("quantize" in str(type(q_model.model.rnn)))
-
-    def test_fx_sub_module_quant(self):
-        for fake_yaml in ["fx_qat_yaml.yaml", "fx_dynamic_yaml.yaml", "fx_ptq_yaml.yaml"]:
-            model_origin = DynamicControlModel()
-            # run fx_quant in neural_compressor and save the quantized GraphModule
-            quantizer = Quantization(fake_yaml)
-            dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True)
-            quantizer.eval_func = eval_func
-            if fake_yaml == "fx_qat_yaml.yaml":
-                quantizer.q_func = q_func
-            quantizer.calib_dataloader = common.DataLoader(dataset)
-            quantizer.model = common.Model(
-                model_origin,
-                **{
-                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                    "convert_custom_config_dict": {"preserved_attributes": []},
-                }
-            )
-            q_model = quantizer.fit()
-            q_model.save("./saved")
-            # Load configure and weights with neural_compressor.utils
-            model_fx = load(
-                "./saved/best_model.pt",
-                model_origin,
-                **{
-                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                    "convert_custom_config_dict": {"preserved_attributes": []},
-                }
-            )
-            self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule))
-
-            # recover int8 model with only tune_cfg
-            history_file = "./saved/history.snapshot"
-            model_fx_recover = recover(
-                model_origin,
-                history_file,
-                0,
-                **{
-                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                    "convert_custom_config_dict": {"preserved_attributes": []},
-                }
-            )
-            self.assertEqual(model_fx.sub.code, model_fx_recover.sub.code)
-            shutil.rmtree("./saved", ignore_errors=True)
-
-    def test_deepcopy_failure(self):
-        def eval_func(model):
-            return 1
-
-        # To build an object t2, which will fail on deepcopy.
-        class T1:
-            def __init__(self, t1) -> None:
-                self.t1 = t1
-                self.j = 1
-
-            # required for usage with set in T1
-            def __hash__(self):
-                return hash(self.j)
-
-        t1 = set()
-        t2 = T1([t1])
-        t1.add(t2)
-
-        for fake_yaml in ["fx_ptq_yaml.yaml"]:
-            model_origin = M()
-            model_origin.tmp = t2
-            # run fx_quant in neural_compressor and save the quantized GraphModule
-            quantizer = Quantization(fake_yaml)
-            dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True)
-            quantizer.eval_func = eval_func
-            quantizer.calib_dataloader = common.DataLoader(dataset)
-            quantizer.model = common.Model(model_origin)
-            q_model = quantizer.fit()
-            self.assertTrue(isinstance(q_model.model, torch.fx.graph_module.GraphModule))
-
-    @unittest.skipIf(
-        PT_VERSION < Version("1.11.0").release,
-        "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend",
-    )
-    def test_bf16_capability(self):
-        model_origin = DynamicControlModel()
-        os.environ["FORCE_BF16"] = "1"
-        q_capability = self.adaptor._get_quantizable_ops(model_origin)
-        del os.environ["FORCE_BF16"]
-
-        self.assertEqual([elem["weight"]["dtype"] for elem in q_capability["optypewise"]["Conv2d"]], [["int8"], "fp32"])
-        self.assertEqual(
-            [elem["activation"]["dtype"] for elem in q_capability["optypewise"]["Conv2d"]], [["uint8"], "fp32"]
-        )
-        self.assertEqual(
-            [elem["weight"]["dtype"] for elem in q_capability["opwise"][("conv", "Conv2d")]], [["int8"], "fp32"]
-        )
-        self.assertEqual(
-            [elem["activation"]["dtype"] for elem in q_capability["opwise"][("conv", "Conv2d")]], [["uint8"], "fp32"]
-        )
-        self.assertEqual(
-            [elem["weight"]["dtype"] for elem in q_capability["opwise"][("linear", "Linear")]],
-            [["int8"], "fp32", "bf16"],
-        )
-        self.assertEqual(
-            [elem["activation"]["dtype"] for elem in q_capability["opwise"][("linear", "Linear")]],
-            [["uint8"], "fp32", "bf16"],
-        )
-
-    @unittest.skipIf(
-        PT_VERSION < Version("1.11.0").release,
-        "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend",
-    )
-    def test_mix_precision(self):
-        fake_yaml = "fx_ptq_yaml.yaml"
-        model_origin = DynamicControlModel()
-        # run fx_quant in neural_compressor and save the quantized GraphModule
-        quantizer = Quantization(fake_yaml)
-        dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True)
-        quantizer.eval_func = eval_func
-        quantizer.calib_dataloader = common.DataLoader(dataset)
-        quantizer.model = common.Model(
-            model_origin,
-            **{
-                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
-                "convert_custom_config_dict": {"preserved_attributes": []},
-            }
-        )
-        q_model = quantizer.fit()
-        tune_cfg = q_model.q_config
-        tune_cfg["op"][("conv.module", "Conv2d")].clear()
-        tune_cfg["op"][("conv.module", "Conv2d")] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
-        tune_cfg["bf16_ops_list"].append(("conv.module", "Conv2d"))
-        from neural_compressor.adaptor.torch_utils.bf16_convert import Convert
-
-        q_model._model = Convert(q_model._model, tune_cfg)
-
-        self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16)
-        self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16)
-
-    def test_symbolic_trace(self):
-        from neural_compressor.adaptor.torch_utils.symbolic_trace import symbolic_trace
-
-        model_origin = DynamicControlModel()
-        traced_model = symbolic_trace(model_origin, is_qat=False)
-        if PT_VERSION >= Version("1.11.0").release:
-            self.assertTrue(isinstance(traced_model.sub, torch.nn.Module))
-            self.assertTrue(isinstance(traced_model.conv, torch.fx.graph_module.GraphModule))
-        else:
-            self.assertTrue(isinstance(traced_model.sub, torch.fx.graph_module.GraphModule))
-        traced_model_qat = symbolic_trace(model_origin, is_qat=True)
-        self.assertTrue(isinstance(traced_model_qat.sub, torch.fx.graph_module.GraphModule))
-
-    def test_tensor_dump(self):
-        model = resnet18()
-        model = MODELS["pytorch"](model)
-        quantizer = Quantization("fx_ptq_yaml.yaml")
-        dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
-        dataloader = common.DataLoader(dataset)
-        dataloader = common._generate_common_dataloader(dataloader, "pytorch")
-        quantizer.eval_dataloader = dataloader
-        quantizer.calib_dataloader = dataloader
-        quantizer.model = model.model
-        q_model = quantizer.fit()
-        op_list, _ = quantizer.strategy.adaptor.diagnosis_helper(model, q_model, None)
-        quantizer.strategy.adaptor.inspect_tensor(
-            model, dataloader, op_list=op_list, iteration_list=[1], inspect_type="all", save_to_disk=True
-        )
-        with open("saved/inspect_result.pkl", "rb") as fp:
-            tensor_dict = pickle.load(fp)
-        a = tensor_dict["activation"][0]
-        w = tensor_dict["weight"]
-        self.assertTrue(w["conv1"]["conv1.weight"].shape[0] == a["conv1"]["conv1.output0"].shape[1])
-        quantizer.strategy.adaptor.inspect_tensor(
-            q_model,
-            dataloader,
-            op_list=["conv1", "layer2.0.downsample.0"],
-            iteration_list=[1, 2],
-            inspect_type="all",
-            save_to_disk=True,
-        )
-        with open("saved/inspect_result.pkl", "rb") as fp:
-            tensor_dict = pickle.load(fp)
-        a = tensor_dict["activation"][0]
-        w = tensor_dict["weight"]
-        self.assertTrue(
-            w["layer2.0.downsample.0"]["layer2.0.downsample.0.weight"].shape[0]
-            == a["layer2.0.downsample.0"]["layer2.0.downsample.0.output0"].shape[1]
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
index ea8a18f424e..1bfa38a0bb7 100644
--- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
+++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
@@ -392,21 +392,15 @@ def test_fx_sub_module_quant(self):
         "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend",
     )
     def test_mix_precision(self):
+        os.environ["FORCE_BF16"] = "1"
         model_origin = DynamicControlModel()
-        # run fx_quant in neural_compressor and save the quantized GraphModule
         dataset = Datasets("pytorch")["dummy"]((100, 3, 224, 224))
         dataloader = DataLoader("pytorch", dataset)
         set_workspace("./saved")
+        # fx mode usually has .module suffix due to tracing of the entire model fails, so use conv.* to leverage re.match
+        ptq_fx_op_name_list["conv.*"] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
         conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list)
         q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, calib_func=eval_func)
-        tune_cfg = q_model.q_config
-        tune_cfg["op"][("conv.module", "Conv2d")].clear()
-        tune_cfg["op"][("conv.module", "Conv2d")] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
-        tune_cfg["bf16_ops_list"].append(("conv.module", "Conv2d"))
-        from neural_compressor.adaptor.torch_utils.bf16_convert import Convert
-
-        q_model._model = Convert(q_model._model, tune_cfg)
-
         self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16)
         self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16)
 

From 82018410ed160d828bec1b26eb180818503e898d Mon Sep 17 00:00:00 2001
From: xin3he <xin3.he@intel.com>
Date: Thu, 4 Jul 2024 10:20:54 +0800
Subject: [PATCH 4/5] add 1x back

Signed-off-by: xin3he <xin3.he@intel.com>
---
 .../test_adaptor_pytorch_1x.py                | 1209 +++++++++++++++++
 1 file changed, 1209 insertions(+)
 create mode 100644 test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1x.py

diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1x.py
new file mode 100644
index 00000000000..b13c6ff5a76
--- /dev/null
+++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_1x.py
@@ -0,0 +1,1209 @@
+import copy
+import os
+import pickle
+import shutil
+import unittest
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.quantized as nnq
+from packaging.version import Version
+from torch.quantization import DeQuantStub, QuantStub
+
+import neural_compressor.adaptor.pytorch as nc_torch
+from neural_compressor.adaptor import FRAMEWORKS
+from neural_compressor.conf.config import QuantConf
+from neural_compressor.experimental import Quantization, common
+from neural_compressor.model import MODELS
+from neural_compressor.utils.pytorch import load
+from neural_compressor.utils.utility import LazyImport, recover
+
+try:
+    import intel_extension_for_pytorch as ipex
+
+    IPEX = True
+except:
+    IPEX = False
+
+# improve lazy import UT coverage
+resnet18 = LazyImport("torchvision.models.resnet18")
+q_resnet18 = LazyImport("torchvision.models.quantization.resnet18")
+
+PT_VERSION = nc_torch.get_torch_version().release
+if PT_VERSION >= Version("1.8.0").release:
+    FX_MODE = True
+else:
+    FX_MODE = False
+
+
+fake_dyn_yaml = """
+    model:
+      name: imagenet
+      framework: pytorch
+
+    quantization:
+      approach: post_training_dynamic_quant
+      op_wise: {
+              'decoder': {
+                'activation':  {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              }
+      }
+    evaluation:
+      accuracy:
+        metric:
+          topk: 1
+      performance:
+        warmup: 5
+        iteration: 10
+
+    tuning:
+      accuracy_criterion:
+        relative:  0.01
+      exit_policy:
+        timeout: 0
+      random_seed: 9527
+      workspace:
+        path: saved
+    """
+
+
+fake_ptq_yaml = """
+    model:
+      name: imagenet
+      framework: pytorch
+
+    quantization:
+      op_wise: {
+
+              'layer1.0.conv1': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              },
+              'layer1.0.conv2': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              },
+              'layer2.0.conv1': {
+                'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']},
+                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
+              },
+              'layer3.0.conv1': {
+                'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']},
+                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
+              },
+              'layer1.0.add_relu': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              },
+      }
+    evaluation:
+      accuracy:
+        metric:
+          topk: 1
+      performance:
+        warmup: 1
+        iteration: 10
+
+    tuning:
+      accuracy_criterion:
+        relative:  0.01
+      exit_policy:
+        timeout: 0
+      random_seed: 9527
+      workspace:
+        path: saved
+    """
+
+fake_auto_yaml = """
+    model:
+      name: imagenet
+      framework: pytorch_fx
+
+    quantization:
+      approach: post_training_auto_quant
+    evaluation:
+      accuracy:
+        metric:
+          topk: 1
+      performance:
+        warmup: 1
+        iteration: 10
+
+    tuning:
+      accuracy_criterion:
+        relative:  0.01
+      exit_policy:
+        timeout: 1000
+        max_trials: 3
+      random_seed: 9527
+      workspace:
+        path: saved
+    """
+
+
+fake_ptq_yaml_for_fx = """
+    model:
+      name: imagenet
+      framework: pytorch_fx
+
+    quantization:
+      approach: post_training_auto_quant
+      op_wise: {
+              'layer1.0.conv1': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              },
+              'layer1.0.conv2': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              },
+              'layer2.0.conv1': {
+                'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']},
+                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
+              },
+              'layer3.0.conv1': {
+                'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']},
+                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
+              },
+              'layer1.0.add_relu': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              },
+              'conv.module': {
+                'weight': {'dtype': ['fp32']},
+                'activation': {'dtype': ['fp32']}
+              },
+              'default_qconfig': {
+                'activation':  {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              }
+      }
+    evaluation:
+      accuracy:
+        metric:
+          topk: 1
+      performance:
+        warmup: 5
+        iteration: 10
+
+    tuning:
+      accuracy_criterion:
+        relative:  0.01
+      exit_policy:
+        timeout: 0
+      random_seed: 9527
+      workspace:
+        path: saved
+    """
+
+
+fake_qat_yaml = """
+    model:
+      name: imagenet
+      framework: pytorch
+
+    quantization:
+      approach: quant_aware_training
+      train:
+        end_epoch: 1
+        iteration: 1
+        optimizer:
+          SGD:
+            learning_rate: 0.0001
+        criterion:
+          CrossEntropyLoss:
+            reduction: mean
+      op_wise: {
+              'layer1.0.conv1': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              },
+              'layer1.0.conv2': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              },
+              'layer2.0.conv1': {
+                'activation': {'dtype': ['uint8'], 'algorithm': ['minmax'], 'granularity': ['per_tensor'], 'scheme':['sym']},
+                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
+              },
+              'layer3.0.conv1': {
+                'activation': {'dtype': ['uint8'], 'algorithm': ['kl'], 'granularity': ['per_tensor'], 'scheme':['sym']},
+                'weight': {'dtype': ['int8'], 'algorithm': ['minmax'], 'granularity': ['per_channel'], 'scheme':['sym']}
+              },
+              'layer1.0.add_relu': {
+                'activation': {'dtype': ['fp32']},
+                'weight': {'dtype': ['fp32']}
+              }
+      }
+    evaluation:
+      accuracy:
+        metric:
+          topk: 1
+
+    tuning:
+      accuracy_criterion:
+        relative:  0.01
+      exit_policy:
+        timeout: 0
+      random_seed: 9527
+      workspace:
+        path: saved
+    """
+
+
+def build_pytorch_yaml():
+    with open("ptq_yaml.yaml", "w", encoding="utf-8") as f:
+        f.write(fake_ptq_yaml)
+
+    with open("dynamic_yaml.yaml", "w", encoding="utf-8") as f:
+        f.write(fake_dyn_yaml)
+
+    with open("qat_yaml.yaml", "w", encoding="utf-8") as f:
+        f.write(fake_qat_yaml)
+
+    with open("auto_yaml.yaml", "w", encoding="utf-8") as f:
+        f.write(fake_auto_yaml)
+
+
+def build_pytorch_fx_yaml():
+    if PT_VERSION >= Version("1.9.0").release:
+        fake_fx_ptq_yaml = fake_ptq_yaml_for_fx
+    else:
+        fake_fx_ptq_yaml = fake_ptq_yaml.replace("pytorch", "pytorch_fx")
+    with open("fx_ptq_yaml.yaml", "w", encoding="utf-8") as f:
+        f.write(fake_fx_ptq_yaml)
+
+    fake_fx_dyn_yaml = fake_dyn_yaml.replace("pytorch", "pytorch_fx")
+    with open("fx_dynamic_yaml.yaml", "w", encoding="utf-8") as f:
+        f.write(fake_fx_dyn_yaml)
+
+    fake_fx_qat_yaml = fake_qat_yaml.replace("pytorch", "pytorch_fx")
+    with open("fx_qat_yaml.yaml", "w", encoding="utf-8") as f:
+        f.write(fake_fx_qat_yaml)
+
+
+def build_dump_tensors_yaml():
+    fake_yaml = """
+        model:
+          name: imagenet
+          framework: pytorch
+
+        evaluation:
+          accuracy:
+            metric:
+              topk: 1
+
+        tuning:
+          accuracy_criterion:
+            relative:  0.01
+          exit_policy:
+            timeout: 0
+          random_seed: 9527
+          workspace:
+            path: saved
+          tensorboard: true
+        """
+    with open("dump_yaml.yaml", "w", encoding="utf-8") as f:
+        f.write(fake_yaml)
+
+
+class M(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.quant = QuantStub()
+        self.conv = nn.Conv2d(3, 1, 1)
+        self.linear = nn.Linear(224 * 224, 5)
+        self.dequant = DeQuantStub()
+
+    def forward(self, x):
+        x = self.quant(x)
+        x = self.conv(x)
+        x = x.view(1, -1)
+        x = self.linear(x)
+        x = self.dequant(x)
+        return x
+
+
+class FP32Model(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        times = x.size(1)
+        if times == 1:
+            return torch.ones(x.shape)
+        return torch.ones(x.shape) + 1
+
+
+class DynamicModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv = nn.Conv2d(1, 1, 1)
+
+    def forward(self, x):
+        if x is not None:
+            x = self.conv(x)
+        return x
+
+
+class SubModel(torch.nn.Module):
+    def __init__(self, bypass=True):
+        super().__init__()
+        self.quant = QuantStub()
+        self.conv = nn.Conv2d(1, 1, 1)
+        self.conv1 = nn.Conv2d(1, 1, 1)
+        self.bn = nn.BatchNorm2d(1)
+        self.relu = nn.ReLU()
+        self.fp32 = FP32Model()
+        self.norm = nn.LayerNorm([1, 224, 224])
+        self.dequant = DeQuantStub()
+        self.bypass = bypass
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.quant(x)
+        x = self.relu(x)
+        x = self.conv1(x)
+        x = self.dequant(x)
+        if not self.bypass:
+            x = self.fp32(x)
+        x = self.norm(x)
+        return x
+
+
+class PartialQuantModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.quant = QuantStub()
+        self.conv = nn.Conv2d(3, 1, 1)
+        self.bn = nn.BatchNorm2d(1)
+        self.conv1 = nn.Conv2d(1, 1, 1)
+        self.bn1 = nn.BatchNorm2d(1)
+        self.conv2 = nn.Conv2d(1, 1, 1)
+        self.linear = nn.Linear(224 * 224, 1)
+        self.dequant = DeQuantStub()
+        self.sub = SubModel(bypass=False)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.sub(x)
+        x = self.quant(x)
+        x = self.conv2(x)
+        x = x.view(1, -1)
+        x = self.linear(x)
+        x = self.dequant(x)
+        return x
+
+
+class DynamicControlModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv = nn.Conv2d(3, 1, 1)
+        self.bn = nn.BatchNorm2d(1)
+        self.linear = nn.Linear(224 * 224, 1)
+        self.sub = SubModel()
+        self.fp32 = FP32Model()
+        self.dyn = DynamicModel()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.dyn(x)
+        x = self.bn(x)
+        x = self.sub(x)
+        x = self.fp32(x)
+        x = x.view(1, -1)
+        x = self.linear(x)
+        return x
+
+
+class LSTMModel(nn.Module):
+    """Container module with an encoder, a recurrent module, and a decoder."""
+
+    def __init__(self, ntoken=10, ninp=512, nhid=256, nlayers=5, dropout=0.5):
+        super(LSTMModel, self).__init__()
+        self.drop = nn.Dropout(dropout)
+        self.encoder = nn.Embedding(ntoken, ninp)
+        self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
+        self.decoder = nn.Linear(nhid, ntoken)
+        self.init_weights()
+        self.nhid = nhid
+        self.nlayers = nlayers
+
+    def init_weights(self):
+        initrange = 0.1
+        self.encoder.weight.data.uniform_(-initrange, initrange)
+        self.decoder.bias.data.zero_()
+        self.decoder.weight.data.uniform_(-initrange, initrange)
+
+    def forward(self, input):
+        input = torch.ones((3, 10), dtype=torch.int32)
+        h0 = torch.randn(2, 10, 256)
+        c0 = torch.randn(2, 10, 256)
+        hidden = (h0, c0)
+        emb = self.encoder(input)
+        output, hidden = self.rnn(emb, hidden)
+        output = self.drop(output)
+        decoded = self.decoder(output)
+        return decoded, hidden
+
+
+def eval_func(model):
+    # switch to evaluate mode
+    model.eval()
+    with torch.no_grad():
+        input = torch.randn(1, 3, 224, 224)
+        # compute output
+        output = model(input)
+    return 0.0
+
+
+def q_func(model):
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
+    # switch to evaluate mode
+    model.train()
+    input = torch.randn(1, 3, 224, 224)
+    # compute output
+    output = model(input)
+    loss = output.mean()
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+    return model
+
+
+class TestPytorchAdaptor(unittest.TestCase):
+    framework_specific_info = {
+        "device": "cpu",
+        "approach": "post_training_static_quant",
+        "random_seed": 1234,
+        "q_dataloader": None,
+        "workspace_path": "./",
+    }
+    framework = "pytorch"
+    adaptor = FRAMEWORKS[framework](framework_specific_info)
+    model = q_resnet18()
+    nc_model = MODELS["pytorch"](model)
+
+    @classmethod
+    def setUpClass(self):
+        build_pytorch_yaml()
+        build_dump_tensors_yaml()
+
+    @classmethod
+    def tearDownClass(self):
+        os.remove("ptq_yaml.yaml")
+        os.remove("dynamic_yaml.yaml")
+        os.remove("qat_yaml.yaml")
+        os.remove("dump_yaml.yaml")
+        os.remove("auto_yaml.yaml")
+        shutil.rmtree("./saved", ignore_errors=True)
+        shutil.rmtree("runs", ignore_errors=True)
+
+    def test_get_all_weight_name(self):
+        assert len(list(self.nc_model.get_all_weight_names())) == 62
+
+    def test_get_weight(self):
+        for name, param in self.model.named_parameters():
+            if name == "layer4.1.conv2.weight":
+                param.data.fill_(0.0)
+            if name == "fc.bias":
+                param.data.fill_(0.1)
+        assert int(torch.sum(self.nc_model.get_weight("layer4.1.conv2.weight"))) == 0
+        assert torch.allclose(torch.sum(self.nc_model.get_weight("fc.bias")), torch.tensor(100.0))
+
+    def test_get_input(self):
+        model = MODELS["pytorch"](q_resnet18())
+        model.model.eval().fuse_model()
+        model.register_forward_pre_hook()
+        rand_input = torch.rand(100, 3, 224, 224).float()
+        model.model(rand_input)
+        assert torch.equal(model.get_inputs("x"), rand_input)
+        model.remove_hooks()
+
+    def test_update_weights(self):
+        self.nc_model.update_weights("fc.bias", torch.zeros([1000]))
+        assert int(torch.sum(self.nc_model.get_weight("fc.bias"))) == 0
+
+    def test_get_gradient(self):
+        with self.assertRaises(AssertionError):
+            self.nc_model.get_gradient("fc.bias")
+
+        for name, tensor in self.nc_model._model.named_parameters():
+            if name == "fc.bias":
+                tensor.grad = torch.zeros_like(tensor)
+                break
+        assert torch.equal(torch.Tensor(self.nc_model.get_gradient("fc.bias")), torch.zeros_like(tensor))
+
+        rand_input = torch.rand(100, 3, 224, 224).float()
+        rand_input.grad = torch.ones_like(rand_input)
+        assert torch.equal(torch.Tensor(self.nc_model.get_gradient(rand_input)), torch.ones_like(rand_input))
+
+    def test_report_sparsity(self):
+        df, total_sparsity = self.nc_model.report_sparsity()
+        self.assertTrue(total_sparsity > 0)
+        self.assertTrue(len(df) == 22)
+
+    def test_quantization_saved(self):
+        for fake_yaml in ["dynamic_yaml.yaml", "qat_yaml.yaml", "ptq_yaml.yaml"]:
+            model = M()
+            quantizer = Quantization(fake_yaml)
+            quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True
+            dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
+            quantizer.model = model
+            quantizer.calib_dataloader = common.DataLoader(dataset)
+            quantizer.eval_dataloader = common.DataLoader(dataset)
+            q_model = quantizer.fit()
+            eval_func(q_model)
+            q_model.save("./saved")
+            # Load configure and weights by neural_compressor.utils
+            saved_model = load("./saved", model)
+            eval_func(saved_model)
+            # recover int8 model from history
+            history_file = "./saved/history.snapshot"
+            model_recover = recover(model, history_file, 0)
+            eval_func(model_recover)
+            self.assertEqual(type(saved_model.conv), type(model_recover.conv))
+            shutil.rmtree("./saved", ignore_errors=True)
+        from neural_compressor.experimental import Benchmark
+
+        evaluator = Benchmark("ptq_yaml.yaml")
+        # Load configure and weights by neural_compressor.model
+        evaluator.model = model
+        evaluator.b_dataloader = common.DataLoader(dataset)
+        evaluator.fit("accuracy")
+
+        for fake_yaml in ["qat_yaml.yaml", "ptq_yaml.yaml"]:
+            model = copy.deepcopy(self.model)
+            if fake_yaml == "ptq_yaml.yaml":
+                model.eval().fuse_model()
+            conf = QuantConf(fake_yaml)
+            quantizer = Quantization(conf)
+            dataset = quantizer.dataset("dummy", (100, 3, 224, 224))
+            quantizer.model = model
+            if fake_yaml == "qat_yaml.yaml":
+                quantizer.q_func = q_func
+            else:
+                quantizer.calib_dataloader = common.DataLoader(dataset)
+            quantizer.eval_func = eval_func
+            q_model = quantizer.fit()
+            q_model.save("./saved")
+            # Load configure and weights by neural_compressor.utils
+            saved_model = load("./saved", model)
+            eval_func(saved_model)
+            shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_quantization_new_saved(self):
+        for fake_yaml in ["dynamic_yaml.yaml", "qat_yaml.yaml", "ptq_yaml.yaml"]:
+            model = M()
+            quantizer = Quantization(fake_yaml)
+            quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True
+            dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
+            quantizer.model = model
+            quantizer.calib_dataloader = common.DataLoader(dataset)
+            quantizer.eval_dataloader = common.DataLoader(dataset)
+            q_model = quantizer.fit()
+            eval_func(q_model)
+            torch.save(q_model.quantized_state_dict(), "./saved/model.pt")
+            # Load configure and weights by neural_compressor.utils
+            from neural_compressor.experimental.common import Model
+
+            common_model = Model(model)
+            common_model.load_quantized_state_dict(torch.load("./saved/model.pt"))
+            eval_func(common_model)
+            self.assertEqual(type(q_model._model.linear), type(common_model._model.linear))
+            shutil.rmtree("./saved", ignore_errors=True)
+
+    @unittest.skipIf(IPEX, "this function is affected by IPEX, Fixing now.")
+    def test_non_quant_module(self):
+        for fake_yaml in ["qat_yaml.yaml", "ptq_yaml.yaml"]:
+            model = PartialQuantModel()
+            conf = QuantConf(fake_yaml)
+            quantizer = Quantization(conf)
+            dataset = quantizer.dataset("dummy", (1, 3, 224, 224))
+            non_quant_dict = {
+                "non_quant_module_name": ["conv", "conv1", "sub.conv"],
+                "non_quant_module_class": ["BatchNorm2d", "FP32Model"],
+            }
+            quantizer.model = common.Model(model, **non_quant_dict)
+            if fake_yaml == "qat_yaml.yaml":
+                quantizer.q_func = q_func
+            else:
+                quantizer.calib_func = eval_func
+            quantizer.eval_func = eval_func
+            q_model = quantizer.fit()
+            self.assertTrue(isinstance(q_model.model.conv, torch.nn.Conv2d))
+            self.assertTrue("quantize" in str(q_model.model.conv2.__class__))
+            q_model.save("./saved")
+            saved_model = load("./saved", model, **non_quant_dict)
+            eval_func(saved_model)
+            shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_auto_quant(self):
+        def eval_func(model):
+            return 1
+
+        model_origin = LSTMModel(
+            ntoken=10,
+            ninp=512,
+            nhid=256,
+            nlayers=2,
+        )
+        # run fx_quant in neural_compressor and save the quantized GraphModule
+        quantizer = Quantization("auto_yaml.yaml")
+        dataset = quantizer.dataset("dummy", (3, 10), label=True)
+        quantizer.eval_func = eval_func
+        quantizer.calib_dataloader = common.DataLoader(dataset)
+        quantizer.model = common.Model(model_origin)
+        q_model = quantizer.fit()
+        self.assertNotEqual(q_model, None)
+
+    def test_workspace_path(self):
+        model = M()
+        quantizer = Quantization("ptq_yaml.yaml")
+        quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True
+        dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
+        quantizer.model = model
+        quantizer.calib_dataloader = common.DataLoader(dataset)
+        quantizer.eval_dataloader = common.DataLoader(dataset)
+        q_model = quantizer.fit()
+        eval_func(q_model)
+        torch.save(q_model.quantized_state_dict(), "./saved/best_model.pt")
+        # Load configure and weights by workspace_path
+        from neural_compressor.experimental.common import Model
+
+        common_model = Model(model)
+        common_model.workspace_path = "./saved"
+        eval_func(common_model)
+        self.assertEqual(type(q_model._model.linear), type(common_model._model.linear))
+        shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_get_graph_info(self):
+        from neural_compressor.model.torch_model import PyTorchModel
+
+        model = PyTorchModel(self.model)
+        op_map = model.graph_info
+        self.assertTrue(op_map["conv1"] == "Conv2d")
+
+    def test_tensorboard(self):
+        model = copy.deepcopy(self.nc_model)
+        model.model.eval().fuse_model()
+        quantizer = Quantization("dump_yaml.yaml")
+        dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
+        quantizer.model = model.model
+        quantizer.calib_dataloader = common.DataLoader(dataset)
+        quantizer.eval_func = eval_func
+        quantizer.fit()
+        self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False)
+        quantizer.eval_dataloader = common.DataLoader(dataset)
+        quantizer.eval_func = None
+        quantizer.fit()
+        self.assertTrue(True if os.path.exists("runs/eval/baseline_acc0.0") else False)
+
+    def test_tensor_dump_and_set(self):
+        model = copy.deepcopy(self.nc_model)
+        model.model.eval().fuse_model()
+        quantizer = Quantization("ptq_yaml.yaml")
+        dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
+        dataloader = common.DataLoader(dataset)
+        dataloader = common._generate_common_dataloader(dataloader, "pytorch")
+        quantizer.eval_dataloader = dataloader
+        quantizer.calib_dataloader = dataloader
+        quantizer.model = model.model
+        q_model = quantizer.fit()
+        quantizer.strategy.adaptor.inspect_tensor(
+            model,
+            dataloader,
+            op_list=["conv1.0", "layer1.0.conv1.0"],
+            iteration_list=[1, 2],
+            inspect_type="all",
+            save_to_disk=True,
+        )
+        with open("saved/inspect_result.pkl", "rb") as fp:
+            tensor_dict = pickle.load(fp)
+        a = tensor_dict["activation"][0]
+        w = tensor_dict["weight"]
+        if PT_VERSION >= Version("1.8.0").release:
+            self.assertTrue(w["conv1.0"]["conv1.0.weight"].shape[0] == a["conv1.0"]["conv1.0.output0"].shape[1])
+        else:
+            self.assertTrue(w["conv1.0"]["conv1.0.weight"].shape[0] == a["conv1.0"]["conv1.1.output0"].shape[1])
+        data = np.random.random(w["conv1.0"]["conv1.0.weight"].shape).astype(np.float32)
+        quantizer.strategy.adaptor.set_tensor(q_model, {"conv1.0.weight": data})
+        changed_tensor = q_model.get_weight("conv1.weight")
+        scales = changed_tensor.q_per_channel_scales()
+        changed_tensor_fp32 = torch.dequantize(changed_tensor)
+        self.assertTrue(np.allclose(data, changed_tensor_fp32.numpy(), atol=2 / np.min(scales.numpy())))
+        quantizer.strategy.adaptor.inspect_tensor(
+            q_model,
+            dataloader,
+            op_list=["conv1.0", "layer1.0.conv1.0"],
+            iteration_list=[1, 2],
+            inspect_type="all",
+            save_to_disk=False,
+        )
+
+    def test_forward_wrapper(self):
+        vision_model = resnet18()
+
+        class dummymodel(torch.nn.Module):
+            def __init__(self, model):
+                super(dummymodel, self).__init__()
+                self._model = model
+
+            def forward(self, input=None):
+                return self._model(input)
+
+        data = [
+            [{"input": torch.rand(3, 224, 224)}, torch.ones(1, 1)],
+        ]
+        # dataloader.batch_size=100
+        dataloader = common.DataLoader(data, batch_size=1)
+        quantizer = Quantization("dynamic_yaml.yaml")
+        model = dummymodel(vision_model)
+        quantizer.model = model
+        quantizer.calib_dataloader = dataloader
+        quantizer.eval_dataloader = dataloader
+        model = quantizer.fit()
+        self.assertTrue(isinstance(model, torch.nn.Module))
+
+    def test_floatfunctions_fallback(self):
+        class ModelWithFunctionals(torch.nn.Module):
+            def __init__(self):
+                super(ModelWithFunctionals, self).__init__()
+                self.mycat = nnq.FloatFunctional()
+                self.myadd = nnq.FloatFunctional()
+                self.myadd_relu = nnq.FloatFunctional()
+                # Tracing doesn't work yet for c10 ops with scalar inputs
+                # https://github.com/pytorch/pytorch/issues/27097
+                self.my_scalar_add = nnq.FloatFunctional()
+                self.mymul = nnq.FloatFunctional()
+                self.my_scalar_mul = nnq.FloatFunctional()
+                self.quant = QuantStub()
+                self.dequant = DeQuantStub()
+
+            def forward(self, x):
+                x = self.quant(x)
+                y = self.mycat.cat([x, x, x])
+                z = self.myadd.add(y, y)
+                w = self.myadd_relu.add_relu(z, z)
+                # Tracing doesn't work yet for c10 ops with scalar inputs
+                # https://github.com/pytorch/pytorch/issues/27097
+                w = self.my_scalar_add.add_scalar(w, -0.5)
+                w = self.mymul.mul(w, w)
+                w = self.my_scalar_mul.mul_scalar(w, 0.5)
+                w = self.dequant(w)
+                return w
+
+        model = ModelWithFunctionals()
+        model = MODELS["pytorch"](model)
+        x = torch.rand(10, 1, dtype=torch.float)
+        y = model.model(x)
+        fallback_ops = []
+        q_capability = self.adaptor.query_fw_capability(model)
+        for k, v in q_capability["opwise"].items():
+            if k[0] != "quant" and k[0] != "dequant":
+                fallback_ops.append(k[0])
+        model.model.qconfig = torch.quantization.default_qconfig
+        model.model.quant.qconfig = torch.quantization.default_qconfig
+        if PT_VERSION >= Version("1.8.0").release:
+            model.model.dequant.qconfig = torch.quantization.default_qconfig
+        nc_torch._fallback_quantizable_ops_recursively(model.model, "", fallback_ops, op_qcfgs={})
+        if PT_VERSION >= Version("2.0.0").release:
+            from torch.quantization.quantize import _add_observer_ as add_observer_
+        else:
+            from torch.quantization.quantize import add_observer_
+        add_observer_(model.model)
+        model.model(x)
+        torch.quantization.convert(model.model, self.adaptor.q_mapping, inplace=True)
+        qy = model.model(x)
+        tol = {"atol": 1e-01, "rtol": 1e-03}
+        self.assertTrue(np.allclose(y, qy, **tol))
+
+
+@unittest.skipIf(not FX_MODE, "Unsupported Fx Mode with PyTorch Version Below 1.8")
+class TestPytorchFXAdaptor(unittest.TestCase):
+    framework_specific_info = {
+        "device": "cpu",
+        "approach": "post_training_static_quant",
+        "random_seed": 1234,
+        "q_dataloader": None,
+        "workspace_path": "./",
+    }
+    framework = "pytorch_fx"
+    adaptor = FRAMEWORKS[framework](framework_specific_info)
+
+    @classmethod
+    def setUpClass(self):
+        build_pytorch_fx_yaml()
+
+    @classmethod
+    def tearDownClass(self):
+        os.remove("fx_ptq_yaml.yaml")
+        os.remove("fx_dynamic_yaml.yaml")
+        shutil.rmtree("./saved", ignore_errors=True)
+        shutil.rmtree("runs", ignore_errors=True)
+
+    def test_fx_quant(self):
+        for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]:
+            model_origin = resnet18()
+            # run fx_quant in neural_compressor and save the quantized GraphModule
+            quantizer = Quantization(fake_yaml)
+            dataset = quantizer.dataset("dummy", (10, 3, 224, 224), label=True)
+            quantizer.eval_func = eval_func
+            if fake_yaml == "fx_qat_yaml.yaml":
+                quantizer.q_func = q_func
+            else:
+                quantizer.calib_func = eval_func
+            dataloader = common.DataLoader(dataset)
+            quantizer.calib_dataloader = dataloader
+            quantizer.model = common.Model(
+                model_origin,
+                **{
+                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                    "convert_custom_config_dict": {"preserved_attributes": []},
+                }
+            )
+            q_model = quantizer.fit()
+            q_model.save("./saved")
+            # Load configure and weights with neural_compressor.utils
+            model_fx = load(
+                "./saved",
+                model_origin,
+                **{
+                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                    "convert_custom_config_dict": {"preserved_attributes": []},
+                }
+            )
+            self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule))
+
+            # recover int8 model with only tune_cfg
+            history_file = "./saved/history.snapshot"
+            model_fx_recover = recover(
+                model_origin,
+                history_file,
+                0,
+                **{
+                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                    "convert_custom_config_dict": {"preserved_attributes": []},
+                }
+            )
+            self.assertEqual(model_fx.code, model_fx_recover.code)
+            shutil.rmtree("./saved", ignore_errors=True)
+
+        for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]:
+            model_origin = M()
+            # run fx_quant in neural_compressor and save the quantized GraphModule
+            quantizer = Quantization(fake_yaml)
+            quantizer.conf.usr_cfg.tuning.exit_policy["performance_only"] = True
+            dataset = quantizer.dataset("dummy", (10, 3, 224, 224), label=True)
+            quantizer.calib_dataloader = common.DataLoader(dataset)
+            quantizer.eval_dataloader = common.DataLoader(dataset)
+            quantizer.model = common.Model(
+                model_origin,
+                **{
+                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                    "convert_custom_config_dict": {"preserved_attributes": []},
+                }
+            )
+            q_model = quantizer.fit()
+            q_model.save("./saved")
+            # Load configure and weights with neural_compressor.utils
+            model_fx = load(
+                "./saved",
+                model_origin,
+                **{
+                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                    "convert_custom_config_dict": {"preserved_attributes": []},
+                    "dataloader": quantizer.calib_dataloader,
+                }
+            )
+            self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule))
+            shutil.rmtree("./saved", ignore_errors=True)
+
+    @unittest.skipIf(
+        PT_VERSION < Version("1.9.0").release,
+        "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend",
+    )
+    def test_fx_dynamic_quant(self):
+        model = LSTMModel(
+            ntoken=10,
+            ninp=512,
+            nhid=256,
+            nlayers=5,
+        )
+        # run fx_quant in neural_compressor and save the quantized GraphModule
+        model.eval()
+        quantizer = Quantization("fx_dynamic_yaml.yaml")
+        quantizer.model = common.Model(
+            copy.deepcopy(model),
+            **{
+                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                "convert_custom_config_dict": {"preserved_attributes": []},
+            }
+        )
+        q_model = quantizer.fit()
+        q_model.save("./saved")
+
+        # Load configure and weights by neural_compressor.utils
+        model_fx = load(
+            "./saved",
+            copy.deepcopy(model),
+            **{
+                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                "convert_custom_config_dict": {"preserved_attributes": []},
+            }
+        )
+        self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule))
+
+        # Test the functionality of older model saving type
+        state_dict = torch.load("./saved/best_model.pt")
+        tune_cfg = state_dict.pop("best_configure")
+        import yaml
+
+        with open("./saved/best_configure.yaml", "w") as f:
+            yaml.dump(tune_cfg, f, default_flow_style=False)
+        torch.save(state_dict, "./saved/best_model_weights.pt")
+        os.remove("./saved/best_model.pt")
+        model_fx = load(
+            "./saved",
+            copy.deepcopy(model),
+            **{
+                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                "convert_custom_config_dict": {"preserved_attributes": []},
+            }
+        )
+        self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule))
+
+        # recover int8 model with only tune_cfg
+        history_file = "./saved/history.snapshot"
+        model_fx_recover = recover(
+            model,
+            history_file,
+            0,
+            **{
+                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                "convert_custom_config_dict": {"preserved_attributes": []},
+            }
+        )
+        self.assertEqual(model_fx.code, model_fx_recover.code)
+        shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_default_dynamic_quant(self):
+        def eval_func(model):
+            return 1
+
+        def q_func(model):
+            return model
+
+        # Model Definition
+        for fake_yaml in ["fx_qat_yaml.yaml", "fx_ptq_yaml.yaml"]:
+            model_origin = LSTMModel(
+                ntoken=10,
+                ninp=512,
+                nhid=256,
+                nlayers=2,
+            )
+            # run fx_quant in neural_compressor and save the quantized GraphModule
+            quantizer = Quantization(fake_yaml)
+            dataset = quantizer.dataset("dummy", (3, 10), label=True)
+            quantizer.eval_func = eval_func
+            if fake_yaml == "fx_qat_yaml.yaml":
+                quantizer.q_func = q_func
+            quantizer.calib_dataloader = common.DataLoader(dataset)
+            quantizer.model = common.Model(model_origin)
+            q_model = quantizer.fit()
+            self.assertTrue("quantize" in str(type(q_model.model.encoder)))
+            self.assertTrue("quantize" in str(type(q_model.model.rnn)))
+
+    def test_fx_sub_module_quant(self):
+        for fake_yaml in ["fx_qat_yaml.yaml", "fx_dynamic_yaml.yaml", "fx_ptq_yaml.yaml"]:
+            model_origin = DynamicControlModel()
+            # run fx_quant in neural_compressor and save the quantized GraphModule
+            quantizer = Quantization(fake_yaml)
+            dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True)
+            quantizer.eval_func = eval_func
+            if fake_yaml == "fx_qat_yaml.yaml":
+                quantizer.q_func = q_func
+            quantizer.calib_dataloader = common.DataLoader(dataset)
+            quantizer.model = common.Model(
+                model_origin,
+                **{
+                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                    "convert_custom_config_dict": {"preserved_attributes": []},
+                }
+            )
+            q_model = quantizer.fit()
+            q_model.save("./saved")
+            # Load configure and weights with neural_compressor.utils
+            model_fx = load(
+                "./saved/best_model.pt",
+                model_origin,
+                **{
+                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                    "convert_custom_config_dict": {"preserved_attributes": []},
+                }
+            )
+            self.assertTrue(isinstance(model_fx.sub, torch.fx.graph_module.GraphModule))
+
+            # recover int8 model with only tune_cfg
+            history_file = "./saved/history.snapshot"
+            model_fx_recover = recover(
+                model_origin,
+                history_file,
+                0,
+                **{
+                    "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                    "convert_custom_config_dict": {"preserved_attributes": []},
+                }
+            )
+            self.assertEqual(model_fx.sub.code, model_fx_recover.sub.code)
+            shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_deepcopy_failure(self):
+        def eval_func(model):
+            return 1
+
+        # To build an object t2, which will fail on deepcopy.
+        class T1:
+            def __init__(self, t1) -> None:
+                self.t1 = t1
+                self.j = 1
+
+            # required for usage with set in T1
+            def __hash__(self):
+                return hash(self.j)
+
+        t1 = set()
+        t2 = T1([t1])
+        t1.add(t2)
+
+        for fake_yaml in ["fx_ptq_yaml.yaml"]:
+            model_origin = M()
+            model_origin.tmp = t2
+            # run fx_quant in neural_compressor and save the quantized GraphModule
+            quantizer = Quantization(fake_yaml)
+            dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True)
+            quantizer.eval_func = eval_func
+            quantizer.calib_dataloader = common.DataLoader(dataset)
+            quantizer.model = common.Model(model_origin)
+            q_model = quantizer.fit()
+            self.assertTrue(isinstance(q_model.model, torch.fx.graph_module.GraphModule))
+
+    @unittest.skipIf(
+        PT_VERSION < Version("1.11.0").release,
+        "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend",
+    )
+    def test_bf16_capability(self):
+        model_origin = DynamicControlModel()
+        os.environ["FORCE_BF16"] = "1"
+        q_capability = self.adaptor._get_quantizable_ops(model_origin)
+        del os.environ["FORCE_BF16"]
+
+        self.assertEqual([elem["weight"]["dtype"] for elem in q_capability["optypewise"]["Conv2d"]], [["int8"], "fp32"])
+        self.assertEqual(
+            [elem["activation"]["dtype"] for elem in q_capability["optypewise"]["Conv2d"]], [["uint8"], "fp32"]
+        )
+        self.assertEqual(
+            [elem["weight"]["dtype"] for elem in q_capability["opwise"][("conv", "Conv2d")]], [["int8"], "fp32"]
+        )
+        self.assertEqual(
+            [elem["activation"]["dtype"] for elem in q_capability["opwise"][("conv", "Conv2d")]], [["uint8"], "fp32"]
+        )
+        self.assertEqual(
+            [elem["weight"]["dtype"] for elem in q_capability["opwise"][("linear", "Linear")]],
+            [["int8"], "fp32", "bf16"],
+        )
+        self.assertEqual(
+            [elem["activation"]["dtype"] for elem in q_capability["opwise"][("linear", "Linear")]],
+            [["uint8"], "fp32", "bf16"],
+        )
+
+    @unittest.skipIf(
+        PT_VERSION < Version("1.11.0").release,
+        "Please use PyTroch 1.11 or higher version for mixed precision with pytorch_fx or pytorch backend",
+    )
+    def test_mix_precision(self):
+        fake_yaml = "fx_ptq_yaml.yaml"
+        model_origin = DynamicControlModel()
+        # run fx_quant in neural_compressor and save the quantized GraphModule
+        quantizer = Quantization(fake_yaml)
+        dataset = quantizer.dataset("dummy", (1, 3, 224, 224), label=True)
+        quantizer.eval_func = eval_func
+        quantizer.calib_dataloader = common.DataLoader(dataset)
+        quantizer.model = common.Model(
+            model_origin,
+            **{
+                "prepare_custom_config_dict": {"non_traceable_module_name": ["a"]},
+                "convert_custom_config_dict": {"preserved_attributes": []},
+            }
+        )
+        q_model = quantizer.fit()
+        tune_cfg = q_model.q_config
+        tune_cfg["op"][("conv.module", "Conv2d")].clear()
+        tune_cfg["op"][("conv.module", "Conv2d")] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
+        tune_cfg["bf16_ops_list"].append(("conv.module", "Conv2d"))
+        from neural_compressor.adaptor.torch_utils.bf16_convert import Convert
+
+        q_model._model = Convert(q_model._model, tune_cfg)
+
+        self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16)
+        self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16)
+
+    def test_symbolic_trace(self):
+        from neural_compressor.adaptor.torch_utils.symbolic_trace import symbolic_trace
+
+        model_origin = DynamicControlModel()
+        traced_model = symbolic_trace(model_origin, is_qat=False)
+        if PT_VERSION >= Version("1.11.0").release:
+            self.assertTrue(isinstance(traced_model.sub, torch.nn.Module))
+            self.assertTrue(isinstance(traced_model.conv, torch.fx.graph_module.GraphModule))
+        else:
+            self.assertTrue(isinstance(traced_model.sub, torch.fx.graph_module.GraphModule))
+        traced_model_qat = symbolic_trace(model_origin, is_qat=True)
+        self.assertTrue(isinstance(traced_model_qat.sub, torch.fx.graph_module.GraphModule))
+
+    def test_tensor_dump(self):
+        model = resnet18()
+        model = MODELS["pytorch"](model)
+        quantizer = Quantization("fx_ptq_yaml.yaml")
+        dataset = quantizer.dataset("dummy", (100, 3, 224, 224), label=True)
+        dataloader = common.DataLoader(dataset)
+        dataloader = common._generate_common_dataloader(dataloader, "pytorch")
+        quantizer.eval_dataloader = dataloader
+        quantizer.calib_dataloader = dataloader
+        quantizer.model = model.model
+        q_model = quantizer.fit()
+        op_list, _ = quantizer.strategy.adaptor.diagnosis_helper(model, q_model, None)
+        quantizer.strategy.adaptor.inspect_tensor(
+            model, dataloader, op_list=op_list, iteration_list=[1], inspect_type="all", save_to_disk=True
+        )
+        with open("saved/inspect_result.pkl", "rb") as fp:
+            tensor_dict = pickle.load(fp)
+        a = tensor_dict["activation"][0]
+        w = tensor_dict["weight"]
+        self.assertTrue(w["conv1"]["conv1.weight"].shape[0] == a["conv1"]["conv1.output0"].shape[1])
+        quantizer.strategy.adaptor.inspect_tensor(
+            q_model,
+            dataloader,
+            op_list=["conv1", "layer2.0.downsample.0"],
+            iteration_list=[1, 2],
+            inspect_type="all",
+            save_to_disk=True,
+        )
+        with open("saved/inspect_result.pkl", "rb") as fp:
+            tensor_dict = pickle.load(fp)
+        a = tensor_dict["activation"][0]
+        w = tensor_dict["weight"]
+        self.assertTrue(
+            w["layer2.0.downsample.0"]["layer2.0.downsample.0.weight"].shape[0]
+            == a["layer2.0.downsample.0"]["layer2.0.downsample.0.output0"].shape[1]
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()

From 3c8d881175de390cb25dc6aa569332c6fd454682 Mon Sep 17 00:00:00 2001
From: xin3he <xin3.he@intel.com>
Date: Mon, 8 Jul 2024 11:10:14 +0800
Subject: [PATCH 5/5] fix bug

Signed-off-by: xin3he <xin3.he@intel.com>
---
 neural_compressor/adaptor/pytorch.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 5909de644ac..1dfc27c1237 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -1242,6 +1242,8 @@ def _combine_capability(self, bf16_ops, q_capability):
                 q_capability["opwise"][bf16_op] = [bf16_config, fp32_config]
                 if bf16_op[1] not in q_capability["optypewise"]:
                     q_capability["optypewise"][bf16_op[1]] = [bf16_config, fp32_config]
+            if bf16_op[1] in q_capability["optypewise"] and bf16_config not in q_capability["optypewise"][bf16_op[1]]:
+                q_capability["optypewise"][bf16_op[1]].append(bf16_config)
         return q_capability
 
     def get_fused_list(self, model):