From f419a7ff8d124b0c16bc92d7dea692e9d66ffebc Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 4 Nov 2022 15:44:08 -0700
Subject: [PATCH 01/60] Update normalization in predictors

---
 .../nas/dynast/dynas_predictor.py             | 36 ++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
index 8f142bfffec..a92c0bef5c2 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
@@ -30,8 +30,13 @@ class Predictor:
     DEFAULT_COST_FACTORS = np.arange(1.0, 101.0, 1.0)
     DEFAULT_MAX_ITERATIONS = 1000000
 
-    def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS,
-                 max_iterations=DEFAULT_MAX_ITERATIONS, verbose=False):
+    def __init__(
+        self,
+        alphas=DEFAULT_ALPHAS,
+        cost_factors=DEFAULT_COST_FACTORS,
+        max_iterations=DEFAULT_MAX_ITERATIONS,
+        verbose=False,
+    ):
 
         SEARCHER_VERBOSITY = 10
 
@@ -42,15 +47,24 @@ def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS,
         self.best_index = 0
 
         # Create lists of regressors and associated hyper-parameters
-        regressors = [linear_model.Ridge(max_iter=max_iterations),
-                      svm.SVR(kernel='rbf', gamma='auto', epsilon=0.0, max_iter=max_iterations)]
+        regressors = [
+            linear_model.Ridge(max_iter=max_iterations),
+            svm.SVR(kernel='rbf', gamma='auto', epsilon=0.0, max_iter=max_iterations),
+        ]
         hyper_parameters = [{'alpha': alphas}, {'C': cost_factors}]
 
         # Create list of hyper-parameter searchers
         self.searchers = []
         for regressor, parameters in zip(regressors, hyper_parameters):
-            self.searchers.append(GridSearchCV(estimator=regressor, param_grid=parameters, n_jobs=-1,
-            scoring='neg_mean_absolute_percentage_error', verbose=SEARCHER_VERBOSITY if (verbose) else 0))
+            self.searchers.append(
+                GridSearchCV(
+                    estimator=regressor,
+                    param_grid=parameters,
+                    n_jobs=-1,
+                    scoring='neg_mean_absolute_percentage_error',
+                    verbose=SEARCHER_VERBOSITY if (verbose) else 0,
+                )
+            )
 
     def train(self, examples, labels):
 
@@ -65,8 +79,14 @@ def train(self, examples, labels):
             None
         '''
 
+        # Compute normalization factor
+        max_label = np.amax(np.abs(labels))
+        if max_label > 0.0:
+            self.normalization_factor = 10 ** (np.floor(np.log10(max_label)) - 1.0)
+        else:
+            self.normalization_factor = 1.0
+
         # Compute normalized labels
-        self.normalization_factor = 10 ** (np.floor(np.log10(np.amax(labels))) - 1.0)
         normalized_labels = labels / self.normalization_factor
 
         # Train regressors with optimal parameters
@@ -108,7 +128,7 @@ def get_parameters(self):
         '''
 
         # Retrieve optimal parameters
-        parameters = {}
+        parameters = {'best_index': self.best_index}
         for searcher in self.searchers:
             regressor_name = searcher.best_estimator_.__class__.__name__
             for key in searcher.best_params_:

From 363bdff46541e48634792e66f682872dcd87cb4e Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 4 Nov 2022 15:48:50 -0700
Subject: [PATCH 02/60] Add notebook's checkpoint to gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index d039b651c1f..509d3f1d1a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 .idea
 /venv/
 */__pycache__
+.ipynb_checkpoints/
 *.snapshot
 *.csv
 *.pb
@@ -17,4 +18,4 @@ build/
 _build
 lpot_workspace/
 .torch/
-node_modules
\ No newline at end of file
+node_modules

From 9d7f1f4eb972521e7a693ec1ab843cf841b34745 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 4 Nov 2022 16:02:23 -0700
Subject: [PATCH 03/60] Add TODO note.

---
 neural_compressor/experimental/nas/dynas.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index b3e4d2cc37e..f70dcf1ec1c 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -103,6 +103,7 @@ def search(self):
 
         # Randomly sample search space for initial population
         # if number of results in results_csv_path smaller than population.
+        # TODO(macsz) Create empty CSV if it does not exists.
         df = pd.read_csv(self.results_csv_path)
         latest_population = [self.supernet_manager.random_sample() \
             for _ in range(max(self.population - df.shape[0], 0))]

From 1dceb8a6b79df7aa7cc8b77bd6d59bc8f483cea6 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 4 Nov 2022 16:20:30 -0700
Subject: [PATCH 04/60] Add Runners dictionary

---
 neural_compressor/experimental/nas/dynas.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index f70dcf1ec1c..2353d23eacf 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -39,7 +39,6 @@ def __init__(self, conf_fname_or_obj):
         self.Predictor = Predictor
         self.ProblemMultiObjective = ProblemMultiObjective
         self.SearchAlgoManager = SearchAlgoManager
-        self.OFARunner = OFARunner
         self.SUPERNET_PARAMETERS = {
                                     'ofa_resnet50':
                                         {'d'  :  {'count' : 5,  'vars' : [0, 1, 2]},
@@ -54,6 +53,12 @@ def __init__(self, conf_fname_or_obj):
                                             'e'   :  {'count' : 20, 'vars' : [3, 4, 6]},
                                             'd'   :  {'count' : 5,  'vars' : [2, 3, 4]} }
                                     }
+        self.RUNNERS = {
+            'ofa_resnet50': OFARunner,
+            'ofa_mbv3_d234_e346_k357_w1.0': OFARunner,
+            'ofa_mbv3_d234_e346_k357_w1.2': OFARunner,
+        }
+
         self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50,
                                      'ofa_mbv3_d234_e346_k357_w1.0': EvaluationInterfaceMobileNetV3,
                                      'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3}
@@ -78,7 +83,7 @@ def init_for_search(self):
         )
 
         # Validation High-Fidelity Measurement Runner
-        self.runner_validate = self.OFARunner(
+        self.runner_validate = self.RUNNERS[self.supernet](
             supernet=self.supernet,
             acc_predictor=None,
             macs_predictor=None,
@@ -121,7 +126,7 @@ def search(self):
             self.create_latency_predictor()
 
             # Inner-loop Low-Fidelity Predictor Runner, need to re-instantiate every loop
-            runner_predict = self.OFARunner(
+            runner_predict = self.RUNNERS[self.supernet](
                 supernet=self.supernet,
                 acc_predictor=self.acc_predictor,
                 macs_predictor=self.macs_predictor,

From 2c17af6dcbddcf842364ff18f961ae4b0a32bd79 Mon Sep 17 00:00:00 2001
From: "Nittur Sridhar, Sharath" <sharath.nittur.sridhar@intel.com>
Date: Sun, 6 Nov 2022 22:54:56 -0800
Subject: [PATCH 05/60] add transformer example

---
 neural_compressor/conf/config.py              |   1 +
 neural_compressor/experimental/nas/dynas.py   |  54 +-
 .../experimental/nas/dynast/dynas_manager.py  | 211 ++++
 .../experimental/nas/dynast/dynas_utils.py    | 171 +++
 .../modules_supernetwork.py                   | 590 +++++++++++
 .../transformer_interface.py                  | 498 +++++++++
 .../transformer_supernetwork.py               | 992 ++++++++++++++++++
 7 files changed, 2506 insertions(+), 11 deletions(-)
 create mode 100644 neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
 create mode 100644 neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
 create mode 100644 neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py

diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py
index f06560cc56e..d165298be29 100644
--- a/neural_compressor/conf/config.py
+++ b/neural_compressor/conf/config.py
@@ -1066,6 +1066,7 @@ def percent_to_float(data):
             Optional("num_evals", default=100000): int,
             Optional("results_csv_path", default=None): str,
             Optional("dataset_path", default=None): str,
+            Optional("supernet_ckpt_path", default=None): str,
             Optional("batch_size", default=64): int,
             },
     },
diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index 2353d23eacf..a5d70dde6ae 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -21,7 +21,6 @@
 from .nas import NASBase
 from .nas_utils import nas_registry
 
-
 @nas_registry("DyNAS")
 class DyNAS(NASBase):
     """
@@ -31,10 +30,13 @@ class DyNAS(NASBase):
     """
     def __init__(self, conf_fname_or_obj):
         from .dynast.dynas_manager import ParameterManager
+        from .dynast.dynas_manager import TransformerLTEncoding
         from .dynast.dynas_predictor import Predictor
         from .dynast.dynas_search import ProblemMultiObjective, SearchAlgoManager
         from .dynast.dynas_utils import (EvaluationInterfaceMobileNetV3,
-                                        EvaluationInterfaceResNet50, OFARunner)
+                                        EvaluationInterfaceResNet50, EvaluationInterfaceTransformerLT,
+                                        OFARunner,TransformerLTRunner)
+
         self.ParameterManager = ParameterManager
         self.Predictor = Predictor
         self.ProblemMultiObjective = ProblemMultiObjective
@@ -51,20 +53,44 @@ def __init__(self, conf_fname_or_obj):
                                         'ofa_mbv3_d234_e346_k357_w1.2':
                                         {'ks'  :  {'count' : 20, 'vars' : [3, 5, 7]},
                                             'e'   :  {'count' : 20, 'vars' : [3, 4, 6]},
-                                            'd'   :  {'count' : 5,  'vars' : [2, 3, 4]} }
-                                    }
+                                            'd'   :  {'count' : 5,  'vars' : [2, 3, 4]} },
+
+                                        'transformer_lt_wmt_en_de': 
+                                        {'encoder_embed_dim': {'count':1,'vars':[640, 512]},
+                                        'decoder_embed_dim': {'count':1, 'vars': [640, 512]},
+                                        'encoder_ffn_embed_dim': {'count':6, 'vars':[3072, 2048, 1024]},
+                                        'decoder_ffn_embed_dim' : {'count':6,'vars': [3072, 2048, 1024]},
+                                        'decoder_layer_num': {'count':1,'vars':[6, 5, 4, 3, 2, 1]},
+                                        'encoder_self_attention_heads': {'count':6, 'vars':[8, 4]},
+                                        'decoder_self_attention_heads': {'count':6, 'vars':[8, 4]},
+                                        'decoder_ende_attention_heads': {'count':6, 'vars':[8, 4]},
+                                        'decoder_arbitrary_ende_attn': {'count':6, 'vars':[-1, 1, 2]}}
+        }
         self.RUNNERS = {
             'ofa_resnet50': OFARunner,
             'ofa_mbv3_d234_e346_k357_w1.0': OFARunner,
             'ofa_mbv3_d234_e346_k357_w1.2': OFARunner,
+            'transformer_lt_wmt_en_de': TransformerLTRunner
         }
 
         self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50,
                                      'ofa_mbv3_d234_e346_k357_w1.0': EvaluationInterfaceMobileNetV3,
-                                     'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3}
+                                     'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3,
+                                      'transformer_lt_wmt_en_de': EvaluationInterfaceTransformerLT}
+
         self.LINAS_INNERLOOP_EVALS = {'ofa_resnet50': 5000,
                                       'ofa_mbv3_d234_e346_k357_w1.0': 20000,
-                                      'ofa_mbv3_d234_e346_k357_w1.2': 20000}
+                                      'ofa_mbv3_d234_e346_k357_w1.2': 20000,
+                                      'transformer_lt_wmt_en_de': 10000}
+        
+        self.SUPERNET_ENCODING = {
+        'ofa_resnet50': ParameterManager,
+        'ofa_mbv3_d234_e346_k357_w1.0': ParameterManager,
+        'ofa_mbv3_d234_e346_k357_w1.2': ParameterManager,
+        'ofa_proxyless_d234_e346_k357_w1.3': ParameterManager,
+        'transformer_lt_wmt_en_de': TransformerLTEncoding,
+        }
+
         super().__init__()
         self.acc_predictor = None
         self.macs_predictor = None
@@ -77,10 +103,13 @@ def estimate(self, individual):
         self.validation_interface.eval_subnet(individual)
 
     def init_for_search(self):
-        self.supernet_manager = self.ParameterManager(
-            param_dict=self.SUPERNET_PARAMETERS[self.supernet],
-            seed=self.seed
+        self.supernet_manager = self.SUPERNET_ENCODING[self.supernet](
+            param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed
         )
+        #self.supernet_manager = self.ParameterManager(
+        #    param_dict=self.SUPERNET_PARAMETERS[self.supernet],
+        #    seed=self.seed
+        #)
 
         # Validation High-Fidelity Measurement Runner
         self.runner_validate = self.RUNNERS[self.supernet](
@@ -88,8 +117,9 @@ def init_for_search(self):
             acc_predictor=None,
             macs_predictor=None,
             latency_predictor=None,
-            imagenetpath=self.dataset_path,
+            datasetpath=self.dataset_path,
             batch_size=self.batch_size,
+            checkpoint_path=self.supernet_ckpt_path
         )
 
         # Setup validation interface
@@ -131,8 +161,9 @@ def search(self):
                 acc_predictor=self.acc_predictor,
                 macs_predictor=self.macs_predictor,
                 latency_predictor=self.latency_predictor,
-                imagenetpath=self.dataset_path,
+                datasetpath=self.dataset_path,
                 batch_size=self.batch_size,
+                checkpoint_path=self.supernet_ckpt_path
             )
 
             # Setup validation interface
@@ -236,6 +267,7 @@ def init_cfg(self, conf_fname_or_obj):
         self.num_evals = dynas_config.num_evals
         self.results_csv_path = dynas_config.results_csv_path
         self.dataset_path = dynas_config.dataset_path
+        self.supernet_ckpt_path = dynas_config.supernet_ckpt_path
         self.batch_size = dynas_config.batch_size
         if dynas_config.population < 10: # pragma: no cover
             raise NotImplementedError(
diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py
index 0394dc2117b..898d85e3ab0 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_manager.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py
@@ -276,3 +276,214 @@ def create_training_set(
                 )
             )
             return features_train, features_test, labels_train, labels_test
+
+
+
+
+
+
+class TransformerLTEncoding(ParameterManager):
+    def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0):
+        super().__init__(param_dict, verbose, seed)
+
+    def onehot_custom(self,subnet_cfg,provide_onehot=True):
+
+        features = []
+        #import ipdb;ipdb.set_trace()
+        features.extend(subnet_cfg['encoder_embed_dim'])
+
+        #encoder_layer_num = subnet_cfg['encoder_layer_num']
+        encode_layer_num_int = 6#encoder_layer_num[0]
+        #features.extend(encoder_layer_num)
+
+        #Encoder FFN Embed Dim
+        encoder_ffn_embed_dim = subnet_cfg['encoder_ffn_embed_dim']
+
+        if encode_layer_num_int < 6:
+            encoder_ffn_embed_dim.extend([0]*(6-encode_layer_num_int))
+        features.extend(encoder_ffn_embed_dim)
+
+        #Encoder Self-Attn Heads
+
+        encoder_self_attention_heads = subnet_cfg['encoder_self_attention_heads'][:encode_layer_num_int]
+
+        if encode_layer_num_int < 6:
+            encoder_self_attention_heads.extend([0]*(6-encode_layer_num_int))
+        features.extend(encoder_self_attention_heads)
+
+
+        features.extend(subnet_cfg['decoder_embed_dim'])
+
+        decoder_layer_num = subnet_cfg['decoder_layer_num']
+        decoder_layer_num_int = decoder_layer_num[0]
+        features.extend(decoder_layer_num)
+
+        #Decoder FFN Embed Dim
+        decoder_ffn_embed_dim = subnet_cfg['decoder_ffn_embed_dim'][:decoder_layer_num_int]
+
+        if decoder_layer_num_int < 6:
+            decoder_ffn_embed_dim.extend([0]*(6-decoder_layer_num_int))
+        features.extend(decoder_ffn_embed_dim)
+
+
+        #Decoder Attn Heads
+        decoder_self_attention_heads = subnet_cfg['decoder_self_attention_heads'][:decoder_layer_num_int]
+
+        if decoder_layer_num_int < 6:
+                    decoder_self_attention_heads.extend([0]*(6-decoder_layer_num_int))
+        features.extend(decoder_self_attention_heads)
+
+        #Decoder ENDE HEADS
+
+        decoder_ende_attention_heads = subnet_cfg['decoder_ende_attention_heads'][:decoder_layer_num_int]
+
+        if decoder_layer_num_int < 6:
+                    decoder_ende_attention_heads.extend([0]*(6-decoder_layer_num_int))
+
+        features.extend(decoder_ende_attention_heads)
+
+        arbitrary_ende_attn_trans = []
+        for i in range(decoder_layer_num_int):
+            if subnet_cfg['decoder_arbitrary_ende_attn'][i] == -1:
+                arbitrary_ende_attn_trans.append(1)
+            elif subnet_cfg['decoder_arbitrary_ende_attn'][i] == 1:
+                arbitrary_ende_attn_trans.append(2)
+            elif subnet_cfg['decoder_arbitrary_ende_attn'][i] == 2:
+                arbitrary_ende_attn_trans.append(3)
+
+        if decoder_layer_num_int < 6:
+                    arbitrary_ende_attn_trans.extend([0]*(6-decoder_layer_num_int))
+        features.extend(arbitrary_ende_attn_trans)
+
+        if provide_onehot==True:
+            examples = np.array([features])
+            one_hot_count = 0
+            unique_values = self.unique_values
+
+            #uncomment
+            #with open(self.onehot_unique,'rb') as f:
+            #    load_unique_values = pickle.load(f)
+            #    unique_values = load_unique_values.tolist()
+            for unique in unique_values:
+                one_hot_count += len(unique.tolist())
+
+
+            one_hot_examples = np.zeros((examples.shape[0], one_hot_count))
+            for e, example in enumerate(examples):
+                offset = 0
+                for f in range(len(example)):
+                    index = np.where(unique_values[f] == example[f])[0] + offset
+                    one_hot_examples[e, index] = 1.0
+                    offset += len(unique_values[f])
+            return one_hot_examples
+
+        else:
+            return features
+
+        #return np.array(ks_onehot + ex_onehot)
+
+
+    def import_csv(
+        self,
+        filepath: str,
+        config: str,
+        objective: str,
+        column_names: List[str] = None,
+        drop_duplicates: bool = True,
+     ) -> pd.DataFrame:
+        '''
+        Import a csv file generated from a supernetwork search for the purpose
+        of training a predictor.
+
+        filepath - path of the csv to be imported.
+        config - the subnetwork configuration
+        objective - target/label for the subnet configuration (e.g. accuracy, latency)
+        column_names - a list of column names for the dataframe
+        df - the output dataframe that contains the original config dict, pymoo, and 1-hot
+             equivalent vector for training.
+        '''
+
+        if column_names == None:
+            df = pd.read_csv(filepath)
+        else:
+            df = pd.read_csv(filepath)
+            df.columns = column_names
+        df = df[[config, objective]]
+        # Old corner case coverage
+        df[config] = df[config].replace({'null': 'None'}, regex=True)
+
+        if drop_duplicates:
+            df.drop_duplicates(subset=[config], inplace=True)
+            df.reset_index(drop=True, inplace=True)
+
+        convert_to_dict = list()
+        convert_to_pymoo = list()
+        convert_to_onehot = list()
+        for i in range(len(df)):
+            # Elastic Param Config format
+            config_as_dict = ast.literal_eval(df[config].iloc[i])
+            convert_to_dict.append(config_as_dict)
+            # PyMoo 1-D vector format
+            config_as_pymoo = self.translate2pymoo(config_as_dict)
+            convert_to_pymoo.append(config_as_pymoo)
+            # Onehot predictor format
+            config_as_onehot = self.onehot_custom(config_as_dict,provide_onehot=False)
+            convert_to_onehot.append(config_as_onehot)
+        #import ipdb;ipdb.set_trace()
+        df[config] = convert_to_dict
+        df['config_pymoo'] = convert_to_pymoo
+        df['config_onehot'] = convert_to_onehot
+
+        return df
+
+    #@staticmethod
+    def create_training_set(
+        self,
+        dataframe: pd.DataFrame,
+        train_with_all: bool = True,
+        split: float = 0.33,
+        seed: bool = None,
+    ) -> Tuple[list, list, list, list]:
+        '''
+        Create a sklearn compatible test/train set from an imported results csv
+        after "import_csv" method is run.
+        '''
+
+        collect_rows = list()
+        for i in range(len(dataframe)):
+            collect_rows.append(np.asarray(dataframe['config_onehot'].iloc[i]))
+        features = np.asarray(collect_rows)
+        labels = dataframe.drop(columns=['config', 'config_pymoo', 'config_onehot']).values
+
+        assert len(features) == len(labels)
+        one_hot_count = 0
+        unique_values = []
+
+        for c in range(features.shape[1]):
+            unique_values.append(np.unique(features[:, c]))
+            one_hot_count += len(unique_values[-1])
+        one_hot_examples = np.zeros((features.shape[0], one_hot_count))
+        for e, example in enumerate(features):
+            offset = 0
+            for f in range(len(example)):
+                index = np.where(unique_values[f] == example[f])[0] + offset
+                one_hot_examples[e, index] = 1.0
+                offset += len(unique_values[f])
+
+        features = one_hot_examples
+        self.unique_values = unique_values
+        if train_with_all:
+            logger.info('[DyNAS-T] Training set size={}'.format(len(labels)))
+            return features, labels
+        else:
+            features_train, features_test, labels_train, labels_test = train_test_split(
+                features, labels, test_size=split, random_state=seed
+            )
+            logger.info(
+                '[DyNAS-T] Test ({}) Train ({}) ratio is {}.'.format(
+                    len(labels_train), len(labels_test), split
+                )
+            )
+            return features_train, features_test, labels_train, labels_test
+
+
diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index 8fe2608c67e..e15ca455005 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -33,6 +33,10 @@
 from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor
 from neural_compressor.utils.utility import LazyImport, logger
 
+from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import (
+    compute_bleu,
+    compute_latency
+) 
 torch = LazyImport('torch')
 torchvision = LazyImport('torchvision')
 
@@ -270,6 +274,103 @@ def get_subnet(
         return self.subnet
 
 
+
+
+class TransformerLTRunner(Runner):
+    """The OFARunner class manages the sub-network selection from the OFA super-network and
+    the validation measurements of the sub-networks. ResNet50, MobileNetV3 w1.0, and MobileNetV3 w1.2
+    are currently supported. Imagenet is required for these super-networks `imagenet-ilsvrc2012`.
+    """
+
+    def __init__(
+        self,
+        supernet: str,
+        acc_predictor: Predictor,
+        macs_predictor: Predictor,
+        latency_predictor: Predictor,
+        datasetpath: str,
+        batch_size: int,
+        checkpoint_path: str
+    ) -> None:
+        self.supernet = supernet
+        self.acc_predictor = acc_predictor
+        self.macs_predictor = macs_predictor
+        self.latency_predictor = latency_predictor
+        self.device = 'cpu'
+        self.test_size = None
+        self.batch_size = batch_size
+        self.dataset_path = datasetpath
+        self.checkpoint_path = checkpoint_path
+
+    def estimate_accuracy_bleu(
+        self,
+        subnet_cfg: dict,
+    ) -> float:
+        top1 = self.acc_predictor.predict(subnet_cfg)
+        return top1
+
+    def estimate_macs(
+        self,
+        subnet_cfg: dict,
+    ) -> int:
+        macs = self.macs_predictor.predict(subnet_cfg)
+        return macs
+
+    def estimate_latency(
+        self,
+        subnet_cfg: dict,
+    ) -> float:
+        latency = self.latency_predictor.predict(subnet_cfg)
+        return latency
+
+    def validate_bleu(
+        self,
+        subnet_cfg: dict,
+    ) -> float: # pragma: no cover
+        
+        bleu = compute_bleu(subnet_cfg,self.dataset_path,self.checkpoint_path)
+        return bleu
+
+    def validate_macs(
+        self,
+        subnet_cfg: dict,
+    ) -> float:
+        """Measure Torch model's FLOPs/MACs as per FVCore calculation
+        Args:
+            subnet_cfg: sub-network Torch model
+        Returns:
+            `macs`
+        """
+
+        #model = self.get_subnet(subnet_cfg)
+        #input_size = (self.batch_size, 3, 224, 224)
+        #macs = get_macs(model=model, input_size=input_size, device=self.device)
+        macs = 0
+        #logger.info('Model\'s macs: {}'.format(macs))
+        return macs
+
+    @torch.no_grad()
+    def measure_latency(
+        self,
+        subnet_cfg: dict,
+        warmup_steps: int = None,
+        measure_steps: int = None,
+    ) -> Tuple[float, float]:
+        """Measure OFA model's latency.
+        Args:
+            subnet_cfg: sub-network Torch model
+        Returns:
+            mean latency; std latency
+        """
+       
+        latency_mean, latency_std = compute_latency(subnet_cfg,self.dataset_path)
+        logger.info('Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
+
+        return latency_mean, latency_std
+
+
+
+
 class EvaluationInterface:
     """
     The interface class update is required to be updated for each unique SuperNetwork
@@ -430,6 +531,76 @@ def eval_subnet(
             return sample, macs, -top1
 
 
+
+class EvaluationInterfaceTransformerLT(EvaluationInterface):
+    def __init__(
+        self,
+        evaluator: Runner,
+        manager: ParameterManager,
+        metrics=['acc', 'macs'],
+        predictor_mode=False,
+        csv_path=None,
+    ) -> None:
+        super().__init__(evaluator, manager, metrics, predictor_mode, csv_path)
+
+    def eval_subnet(
+        self,
+        x: list,
+    ) -> Tuple[dict, float, float]:
+        # PyMoo vector to Elastic Parameter Mapping
+        param_dict = self.manager.translate2param(x)
+
+        sample = {
+            'encoder': {
+                'encoder_embed_dim': param_dict['encoder_embed_dim'][0],
+                'encoder_layer_num': 6,#param_dict['encoder_layer_num'][0],
+                'encoder_ffn_embed_dim': param_dict['encoder_ffn_embed_dim'],
+                'encoder_self_attention_heads': param_dict['encoder_self_attention_heads'],
+            },
+            'decoder': {
+                'decoder_embed_dim': param_dict['decoder_embed_dim'][0],
+                'decoder_layer_num': param_dict['decoder_layer_num'][0],
+                'decoder_ffn_embed_dim': param_dict['decoder_ffn_embed_dim'],
+                'decoder_self_attention_heads': param_dict['decoder_self_attention_heads'],
+                'decoder_ende_attention_heads': param_dict['decoder_ende_attention_heads'],
+                'decoder_arbitrary_ende_attn':param_dict['decoder_arbitrary_ende_attn']
+            }
+            }
+
+        subnet_sample = copy.deepcopy(sample)
+
+        # Always evaluate/predict top1
+        lat, macs = 0, 0
+        if self.predictor_mode == True:
+            bleu = self.evaluator.estimate_accuracy_bleu(self.manager.onehot_custom(param_dict).reshape(1,-1))[0]
+            if 'macs' in self.metrics:
+                macs = self.evaluator.estimate_macs(self.manager.onehot_custom(param_dict).reshape(1,-1))[0]
+            if 'lat' in self.metrics:
+                lat = self.evaluator.estimate_latency(self.manager.onehot_custom(param_dict).reshape(1,-1))[0]
+        else:
+            bleu = self.evaluator.validate_bleu(subnet_sample)
+            macs = self.evaluator.validate_macs(subnet_sample)
+            if 'lat' in self.metrics:
+                lat, _ = self.evaluator.measure_latency(subnet_sample)
+
+        if self.csv_path:
+            with open(self.csv_path, 'a') as f:
+                writer = csv.writer(f)
+                date = str(datetime.now())
+                result = [param_dict, date, lat, macs, bleu,]
+                writer.writerow(result)
+
+        # PyMoo only minimizes objectives, thus accuracy needs to be negative
+        # Requires format: subnetwork, objective x, objective y
+        if 'lat' in self.metrics:
+            return sample, lat, -bleu
+        else:
+            return sample, macs, -bleu
+
+
+
+
+
 def get_torchvision_model(
     model_name: str,
 ) -> torch.nn.Module:
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
new file mode 100644
index 00000000000..ef4cbddc952
--- /dev/null
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
@@ -0,0 +1,590 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import numpy as np
+from torch.nn.modules.module import _addindent
+from torch.nn import Parameter
+
+from fairseq import utils
+from collections import defaultdict
+
+
+INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0)
+
+
+def _get_full_incremental_state_key(module_instance, key):
+    module_name = module_instance.__class__.__name__
+
+    # assign a unique ID to each module instance, so that incremental state is
+    # not shared across module instances
+    if not hasattr(module_instance, '_fairseq_instance_id'):
+        INCREMENTAL_STATE_INSTANCE_ID[module_name] += 1
+        module_instance._fairseq_instance_id = INCREMENTAL_STATE_INSTANCE_ID[module_name]
+
+    return '{}.{}.{}'.format(module_name, module_instance._fairseq_instance_id, key)
+
+
+def get_incremental_state(module, incremental_state, key):
+    """Helper for getting incremental state for an nn.Module."""
+    full_key = _get_full_incremental_state_key(module, key)
+    if incremental_state is None or full_key not in incremental_state:
+        return None
+    return incremental_state[full_key]
+
+
+def set_incremental_state(module, incremental_state, key, value):
+    """Helper for setting incremental state for an nn.Module."""
+    if incremental_state is not None:
+        full_key = _get_full_incremental_state_key(module, key)
+        incremental_state[full_key] = value
+
+class EmbeddingSuper(nn.Embedding):
+    def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs):
+        super().__init__(num_embeddings, super_embed_dim, padding_idx, *args, **kwargs)
+
+        # the largest embed dim
+        self.super_embed_dim = {'encoder': super_embed_dim, 'decoder': super_embed_dim}
+
+        # the current sampled embed dim
+        self.sample_embed_dim = {'encoder': None, 'decoder': None}
+
+        self.samples = {'encoder': {}, 'decoder': {}}
+        self.profiling = False
+        self.reset_parameters()
+
+    def profile(self, mode=True):
+        self.profiling = mode
+
+    def reset_parameters(self):
+        super().reset_parameters()
+        nn.init.normal_(self.weight, mean=0, std=self.embedding_dim ** -0.5)
+        nn.init.constant_(self.weight[self.padding_idx], 0)
+
+    def set_sample_config(self, sample_embed_dim, part):
+        self.sample_embed_dim[part] = sample_embed_dim
+        self._sample_parameters(part)
+
+    def _sample_parameters(self, part):
+        weight = self.weight[..., :self.sample_embed_dim[part]]
+        self.samples[part]['weight'] = weight
+
+        return self.samples
+
+    def sample_parameters(self, part, resample=False):
+        return self._sample_parameters(part) if self.profiling or resample else self.samples
+
+    def sampled_weight(self, part):
+        return self.sample_parameters(part)[part]['weight']
+
+    def forward(self, input, part='encoder'):
+        return F.embedding(input, self.sampled_weight(part), self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse)
+
+
+class LinearSuper(nn.Linear):
+    def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear'):
+        super().__init__(super_in_dim, super_out_dim, bias=bias)
+
+        # super_in_dim and super_out_dim indicate the largest network!
+        self.super_in_dim = super_in_dim
+        self.super_out_dim = super_out_dim
+
+        # input_dim and output_dim indicate the current sampled size
+        self.sample_in_dim = None
+        self.sample_out_dim = None
+
+        self.samples = {}
+
+        self._reset_parameters(bias, uniform_, non_linear)
+        self.profiling = False
+
+    def profile(self, mode=True):
+        self.profiling = mode
+
+    def sample_parameters(self, resample=False):
+        if self.profiling or resample:
+            return self._sample_parameters()
+        return self.samples
+
+    def _reset_parameters(self, bias, uniform_, non_linear):
+        nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_(
+            self.weight, non_linear=non_linear)
+        if bias:
+            nn.init.constant_(self.bias, 0.)
+
+    def set_sample_config(self, sample_in_dim, sample_out_dim):
+        self.sample_in_dim = sample_in_dim
+        self.sample_out_dim = sample_out_dim
+
+        self._sample_parameters()
+
+    def _sample_parameters(self):
+        self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim)
+        self.samples['bias'] = self.bias
+        if self.bias is not None:
+            self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
+        return self.samples
+
+    def forward(self, x):
+        self.sample_parameters()
+        return F.linear(x, self.samples['weight'], self.samples['bias'])
+
+    def calc_sampled_param_num(self):
+        assert 'weight' in self.samples.keys()
+        weight_numel = self.samples['weight'].numel()
+
+        if self.samples['bias'] is not None:
+            bias_numel = self.samples['bias'].numel()
+        else:
+            bias_numel = 0
+
+        return weight_numel + bias_numel
+
+
+def sample_weight(weight, sample_in_dim, sample_out_dim):
+    sample_weight = weight[:, :sample_in_dim]
+    sample_weight = sample_weight[:sample_out_dim, :]
+
+    return sample_weight
+
+
+def sample_bias(bias, sample_out_dim):
+    sample_bias = bias[:sample_out_dim]
+
+    return sample_bias
+
+def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False):
+    if not export and torch.cuda.is_available():
+        try:
+            from apex.normalization import FusedLayerNorm
+            return FusedLayerNorm(normalized_shape, eps, elementwise_affine)
+        except ImportError:
+            pass
+    return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine)
+
+
+class LayerNormSuper(torch.nn.LayerNorm):
+    def __init__(self, super_embed_dim):
+        super().__init__(super_embed_dim)
+
+        # the largest embed dim
+        self.super_embed_dim = super_embed_dim
+
+        # the current sampled embed dim
+        self.sample_embed_dim = None
+
+        self.samples = {}
+        self.profiling = False
+
+    def profile(self, mode=True):
+        self.profiling = mode
+
+    def sample_parameters(self, resample=False):
+        if self.profiling or resample:
+            return self._sample_parameters()
+        return self.samples
+
+    def _sample_parameters(self):
+        self.samples['weight'] = self.weight[:self.sample_embed_dim]
+        self.samples['bias'] = self.bias[:self.sample_embed_dim]
+        return self.samples
+
+    def set_sample_config(self, sample_embed_dim):
+        self.sample_embed_dim = sample_embed_dim
+        self._sample_parameters()
+
+    def forward(self, x):
+        self.sample_parameters()
+        return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps)
+
+    def calc_sampled_param_num(self):
+        assert 'weight' in self.samples.keys()
+        assert 'bias' in self.samples.keys()
+        return self.samples['weight'].numel() + self.samples['bias'].numel()
+
+
+
+class MultiheadAttentionSuper(nn.Module):
+    """Multi-headed attention.
+
+    See "Attention Is All You Need" for more details.
+    """
+
+    def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, super_vdim=None, dropout=0., bias=True,
+                 add_bias_kv=False, add_zero_attn=False, self_attention=False,
+                 encoder_decoder_attention=False, out_dim=None, qkv_dim=None):
+        super().__init__()
+
+
+
+        # the configs of super arch
+        self.super_q_embed_dim = super_embed_dim
+        self.super_kv_embed_dim = None
+
+        # the configs of current sampled arch
+        self.sample_q_embed_dim = None
+        self.sample_kv_embed_dim = None
+
+        if super_kdim is not None:
+            assert super_kdim == super_vdim
+            self.super_kv_embed_dim = super_kdim
+        else:
+            self.super_kv_embed_dim = self.super_q_embed_dim
+
+        if qkv_dim is None:
+            self.qkv_dim = self.super_q_embed_dim
+        else:
+            self.qkv_dim = qkv_dim
+
+        # this qkv same dim means the input dim for qkv are the same, not the output dim
+        # self.qkv_same_dim = self.kdim == self.super_embed_dim and self.vdim == self.super_embed_dim
+        self.qkv_same_dim = self.super_kv_embed_dim == self.super_q_embed_dim
+        self.encoder = is_encoder
+
+        # Caution! these actually are the sampled num_heads, head_dim and scaling
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = self.qkv_dim // num_heads
+        assert self.head_dim * num_heads == self.qkv_dim, "qkv must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+
+        self.self_attention = self_attention
+        self.encoder_decoder_attention = encoder_decoder_attention
+
+        assert not self.self_attention or self.qkv_same_dim, 'Self-attention requires query, key and ' \
+                                                             'value to be of the same size'
+
+        if self.qkv_same_dim:
+            self.in_proj_weight = Parameter(torch.Tensor(3 * self.qkv_dim, self.super_q_embed_dim))
+        else:
+            self.k_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_kv_embed_dim))
+            self.v_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_kv_embed_dim))
+            self.q_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_q_embed_dim))
+
+        if bias:
+            self.in_proj_bias = Parameter(torch.Tensor(3 * self.qkv_dim))
+        else:
+            self.register_parameter('in_proj_bias', None)
+
+        if out_dim is None:
+            out_dim = self.super_q_embed_dim
+        self.out_proj = LinearSuper(super_in_dim=self.qkv_dim, super_out_dim=out_dim, bias=bias)
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.Tensor(1, 1, self.super_q_embed_dim))
+            self.bias_v = Parameter(torch.Tensor(1, 1, self.super_q_embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+
+        self.reset_parameters()
+
+        self.onnx_trace = False
+
+        self.enable_torch_version = False
+        if hasattr(F, "multi_head_attention_forward"):
+            self.enable_torch_version = True
+        else:
+            self.enable_torch_version = False
+        self.enable_torch_version = False
+
+    def calc_sampled_param_num(self):
+        assert self.in_proj_weight is not None and self.in_proj_bias is not None
+        in_proj_q_weight_numel = self.sample_q_embed_dim * self.qkv_dim
+        in_proj_v_weight_numel = in_proj_k_weight_numel = self.sample_kv_embed_dim * self.qkv_dim
+        in_proj_bias_numel = self.in_proj_bias.numel()
+
+        # does not count in the output proj because it will be counted in LinearSuper layer
+        # out_proj_weight_numel = self.qkv_dim * self.sample_q_embed_dim
+        # out_proj_bias_numel = self.
+
+        return in_proj_q_weight_numel + in_proj_k_weight_numel + in_proj_v_weight_numel + in_proj_bias_numel
+
+
+
+
+    def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None):
+        self.sample_q_embed_dim = sample_q_embed_dim
+        if sample_kv_embed_dim is None:
+            self.sample_kv_embed_dim = sample_q_embed_dim
+        else:
+            self.sample_kv_embed_dim = sample_kv_embed_dim
+
+        self.num_heads = sample_attention_heads
+        self.head_dim = self.qkv_dim // self.num_heads
+        assert self.head_dim * self.num_heads == self.qkv_dim, "qkv_dim must be divisible by sampled num_heads"
+        self.scaling = self.head_dim ** -0.5
+
+        self.out_proj.set_sample_config(sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim)
+
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def reset_parameters(self):
+        if self.qkv_same_dim:
+            nn.init.xavier_uniform_(self.in_proj_weight)
+        else:
+            nn.init.xavier_uniform_(self.k_proj_weight)
+            nn.init.xavier_uniform_(self.v_proj_weight)
+            nn.init.xavier_uniform_(self.q_proj_weight)
+
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.in_proj_bias is not None:
+            nn.init.constant_(self.in_proj_bias, 0.)
+            nn.init.constant_(self.out_proj.bias, 0.)
+        if self.bias_k is not None:
+            nn.init.xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            nn.init.xavier_normal_(self.bias_v)
+
+
+    def forward(self, query, key, value, key_padding_mask=None, incremental_state=None,
+                need_weights=True, static_kv=False, attn_mask=None):
+        """Input shape: Time x Batch x Channel
+
+        Timesteps can be masked by supplying a T x T mask in the
+        `attn_mask` argument. Padding elements can be excluded from
+        the key by passing a binary ByteTensor (`key_padding_mask`) with shape:
+        batch x src_len, where padding elements are indicated by 1s.
+        """
+
+
+        tgt_len, bsz, embed_dim = query.size()
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if 'prev_key' in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            # self-attention
+            q, k, v = self.in_proj_qkv(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.in_proj_q(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.in_proj_k(key)
+                v = self.in_proj_v(key)
+
+        else:
+            q = self.in_proj_q(query)
+            k = self.in_proj_k(key)
+            v = self.in_proj_v(value)
+
+        q *= self.scaling
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1)
+
+        q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+
+        if k is not None:
+            k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+        if v is not None:
+            v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)
+
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if 'prev_key' in saved_state:
+                prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    k = torch.cat((prev_key, k), dim=1)
+            if 'prev_value' in saved_state:
+                prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    v = torch.cat((prev_value, v), dim=1)
+            saved_state['prev_key'] = k.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state['prev_value'] = v.view(bsz, self.num_heads, -1, self.head_dim)
+
+            self._set_input_buffer(incremental_state, saved_state)
+
+        src_len = k.size(1)
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.shape == torch.Size([]):
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            fil = key_padding_mask.new_ones(key_padding_mask.size(0), src_len-key_padding_mask.size(1))
+            key_padding_mask = torch.cat((key_padding_mask, fil), dim=1)
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+
+        if self.add_zero_attn:
+            src_len += 1
+            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            if attn_mask is not None:
+                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1)
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
+
+        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            if self.onnx_trace:
+                attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1)
+            attn_weights += attn_mask
+
+        if key_padding_mask is not None:
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            if self.onnx_trace:
+                attn_weights = torch.where(
+                    key_padding_mask.unsqueeze(1).unsqueeze(2),
+                    torch.Tensor([float("-Inf")]),
+                    attn_weights.float()
+                ).type_as(attn_weights)
+            else:
+                attn_weights = attn_weights.masked_fill(
+                    key_padding_mask.unsqueeze(1).unsqueeze(2),
+                    float('-inf'),
+                )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        attn_weights = utils.softmax(
+            attn_weights, dim=-1, onnx_trace=self.onnx_trace,
+        ).type_as(attn_weights)
+        attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training)
+
+        attn = torch.bmm(attn_weights, v)
+
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+
+        if (self.onnx_trace and attn.size(1) == 1):
+            # when ONNX tracing a single decoder step (sequence length == 1)
+            # the transpose is a no-op copy before view, thus unnecessary
+            attn = attn.contiguous().view(tgt_len, bsz, self.qkv_dim)
+        else:
+            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.qkv_dim)
+        attn = self.out_proj(attn)
+
+        if need_weights:
+            # average attention weights over heads
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+
+            attn_weights = attn_weights.sum(dim=1) / self.num_heads
+        else:
+            attn_weights = None
+
+        return attn, attn_weights
+
+    def in_proj_qkv(self, query):
+        return self._in_proj(query, sample_dim=self.sample_q_embed_dim).chunk(3, dim=-1)
+
+    def in_proj_q(self, query):
+        if self.qkv_same_dim:
+            return self._in_proj(query, end=self.qkv_dim, sample_dim=self.sample_q_embed_dim)
+        else:
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[:self.qkv_dim]
+            return F.linear(query, self.q_proj_weight[..., :self.sample_q_embed_dim], bias)
+
+    def in_proj_k(self, key):
+        if self.qkv_same_dim:
+            return self._in_proj(key, start=self.qkv_dim, end=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim)
+        else:
+            weight = self.k_proj_weight
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[self.qkv_dim:2 * self.qkv_dim]
+            return F.linear(key, weight[..., :self.sample_kv_embed_dim], bias)
+
+    def in_proj_v(self, value):
+        if self.qkv_same_dim:
+            return self._in_proj(value, start=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim)
+        else:
+            weight = self.v_proj_weight
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[2 * self.qkv_dim:]
+            return F.linear(value, weight[..., :self.sample_kv_embed_dim], bias)
+
+    def _in_proj(self, input, sample_dim, start=0, end=None):
+        weight = self.in_proj_weight
+        bias = self.in_proj_bias
+        weight = weight[start:end, :sample_dim]
+        if bias is not None:
+            bias = bias[start:end]
+        return F.linear(input, weight, bias)
+
+    def reorder_incremental_state(self, incremental_state, new_order):
+        """Reorder buffered internal state (for incremental generation)."""
+        input_buffer = self._get_input_buffer(incremental_state)
+        if input_buffer is not None:
+            for k in input_buffer.keys():
+                input_buffer[k] = input_buffer[k].index_select(0, new_order)
+            self._set_input_buffer(incremental_state, input_buffer)
+
+    def _get_input_buffer(self, incremental_state):
+        return get_incremental_state(  #utils.
+            self,
+            incremental_state,
+            'attn_state',
+        ) or {}
+
+    def _set_input_buffer(self, incremental_state, buffer):
+        set_incremental_state(
+            self,
+            incremental_state,
+            'attn_state',
+            buffer,
+        )
+
+    def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz):
+        return attn_weights
+
+    def __repr__(self):
+        # We treat the extra repr like the sub-module, one item per line
+        extra_lines = []
+        extra_repr = self.extra_repr()
+        # empty string will be split into list ['']
+        if extra_repr:
+            extra_lines = extra_repr.split('\n')
+        child_lines = []
+        for key, module in self._modules.items():
+            mod_str = repr(module)
+            mod_str = _addindent(mod_str, 2)
+            child_lines.append('(' + key + '): ' + mod_str)
+        lines = extra_lines + child_lines
+
+        main_str = self._get_name() + '\tnum_heads:' + str(self.num_heads) + '\t qkv_dim:' + str(self.qkv_dim)
+        if lines:
+            # simple one-liner info, which most builtin Modules will use
+            if len(extra_lines) == 1 and not child_lines:
+                main_str += extra_lines[0]
+            else:
+                main_str += '\n  ' + '\n  '.join(lines) + '\n'
+
+        main_str += ')'
+        return main_str
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
new file mode 100644
index 00000000000..75d990f26dd
--- /dev/null
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -0,0 +1,498 @@
+"""
+Translate pre-processed data with a trained model.
+"""
+import torch
+
+from fairseq import checkpoint_utils, options, progress_bar, tasks, utils
+from fairseq.meters import StopwatchMeter, TimeMeter
+import sys
+import pdb
+import numpy as np
+import subprocess
+import os
+from fairseq.data import dictionary
+import csv
+import json
+import warnings
+from .transformer_supernetwork import TransformerSuperNetwork
+
+import sys
+import logging
+import tqdm
+import time
+import copy
+from datetime import datetime
+import ctypes
+import math
+warnings.filterwarnings("ignore")
+
+
+try:
+    from fairseq import libbleu
+except ImportError as e:
+    import sys
+    sys.stderr.write('ERROR: missing libbleu.so. run `pip install --editable .`\n')
+    raise e
+
+
+C = ctypes.cdll.LoadLibrary(libbleu.__file__)
+
+
+class BleuStat(ctypes.Structure):
+    _fields_ = [
+        ('reflen', ctypes.c_size_t),
+        ('predlen', ctypes.c_size_t),
+        ('match1', ctypes.c_size_t),
+        ('count1', ctypes.c_size_t),
+        ('match2', ctypes.c_size_t),
+        ('count2', ctypes.c_size_t),
+        ('match3', ctypes.c_size_t),
+        ('count3', ctypes.c_size_t),
+        ('match4', ctypes.c_size_t),
+        ('count4', ctypes.c_size_t),
+    ]
+
+
+class Scorer(object):
+    def __init__(self, pad, eos, unk):
+        self.stat = BleuStat()
+        self.pad = pad
+        self.eos = eos
+        self.unk = unk
+        self.reset()
+
+    def reset(self, one_init=False):
+        if one_init:
+            C.bleu_one_init(ctypes.byref(self.stat))
+        else:
+            C.bleu_zero_init(ctypes.byref(self.stat))
+
+    def add(self, ref, pred):
+        if not isinstance(ref, torch.IntTensor):
+            raise TypeError('ref must be a torch.IntTensor (got {})'
+                            .format(type(ref)))
+        if not isinstance(pred, torch.IntTensor):
+            raise TypeError('pred must be a torch.IntTensor(got {})'
+                            .format(type(pred)))
+
+        # don't match unknown words
+        rref = ref.clone()
+        assert not rref.lt(0).any()
+        rref[rref.eq(self.unk)] = -999
+
+        rref = rref.contiguous().view(-1)
+        pred = pred.contiguous().view(-1)
+
+        C.bleu_add(
+            ctypes.byref(self.stat),
+            ctypes.c_size_t(rref.size(0)),
+            ctypes.c_void_p(rref.data_ptr()),
+            ctypes.c_size_t(pred.size(0)),
+            ctypes.c_void_p(pred.data_ptr()),
+            ctypes.c_int(self.pad),
+            ctypes.c_int(self.eos))
+
+    def score(self, order=4):
+        psum = sum(math.log(p) if p > 0 else float('-Inf')
+                   for p in self.precision()[:order])
+        return self.brevity() * math.exp(psum / order) * 100
+
+    def precision(self):
+        def ratio(a, b):
+            return a / b if b > 0 else 0
+
+        return [
+            ratio(self.stat.match1, self.stat.count1),
+            ratio(self.stat.match2, self.stat.count2),
+            ratio(self.stat.match3, self.stat.count3),
+            ratio(self.stat.match4, self.stat.count4),
+        ]
+
+    def brevity(self):
+        r = self.stat.reflen / self.stat.predlen
+        return min(1, math.exp(1 - r))
+
+    def result_string(self, order=4):
+        assert order <= 4, "BLEU scores for order > 4 aren't supported"
+        fmt = 'BLEU{} = {:2.2f}, {:2.1f}'
+        for _ in range(1, order):
+            fmt += '/{:2.1f}'
+        fmt += ' (BP={:.3f}, ratio={:.3f}, syslen={}, reflen={})'
+        bleup = [p * 100 for p in self.precision()[:order]]
+        return fmt.format(order, self.score(order=order), *bleup,
+                          self.brevity(), self.stat.predlen/self.stat.reflen,
+                          self.stat.predlen, self.stat.reflen)
+
+
+def get_bleu_score(args,ref,sys):
+    dict = dictionary.Dictionary()
+    order =4
+    sacrebleu = False
+    sentence_bleu = False
+    ignore_case = False
+    def readlines(fd):
+        for line in fd.readlines():
+            if ignore_case:
+                yield line.lower()
+            else:
+                yield line
+
+
+    if sentence_bleu:
+        def score(fdsys):
+            with open(ref) as fdref:
+                scorer = Scorer(dict.pad(), dict.eos(), dict.unk())
+                for i, (sys_tok, ref_tok) in enumerate(zip(readlines(fdsys), readlines(fdref))):
+                    scorer.reset(one_init=True)
+                    sys_tok = dict.encode_line(sys_tok)
+                    ref_tok = dict.encode_line(ref_tok)
+                    scorer.add(ref_tok, sys_tok)
+                    print(i, scorer.result_string(order))
+    else:
+        def score(fdsys):
+            with open(ref) as fdref:
+                scorer = Scorer(dict.pad(), dict.eos(), dict.unk())
+                for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
+                    sys_tok = dict.encode_line(sys_tok)
+                    ref_tok = dict.encode_line(ref_tok)
+                    scorer.add(ref_tok, sys_tok)
+                print(scorer.result_string(order))
+                return(scorer.score(order))
+
+
+    if sys == '-':
+        score = score(sys.stdin)
+    else:
+        with open(sys, 'r') as f:
+            score = score(f)
+    return score
+
+def compute_bleu(config,dataset_path,checkpoint_path):
+
+    parser = options.get_generation_parser()
+
+    args = options.parse_args_and_arch(parser,[dataset_path])
+
+    args.data = dataset_path
+    args.beam = 5
+    args.remove_bpe = '@@ '
+    args.gen_subset = 'test'
+    args.lenpen = 0.6
+    args.source_lang = 'en'
+    args.target_lang = 'de'
+    args.batch_size = 128
+    utils.import_user_module(args)
+    max_tokens = 12000
+
+
+    use_cuda = torch.cuda.is_available() and not args.cpu
+
+    # when running on CPU, use fp32 as default
+    if not use_cuda:
+        args.fp16 = False
+
+    torch.manual_seed(args.seed)
+
+    # Optimize ensemble for generation
+    # Load dataset splits
+    task = tasks.setup_task(args)
+    task.load_dataset(args.gen_subset)
+    # Set dictionaries
+    try:
+        src_dict = getattr(task, 'source_dictionary', None)
+    except NotImplementedError:
+        src_dict = None
+    tgt_dict = task.target_dictionary
+
+
+    # Load ensemble
+    print('| loading model(s) from {}'.format(args.path))
+    model = TransformerSuperNetwork(task)
+    state = torch.load(checkpoint_path,map_location=torch.device('cpu'))
+
+    model.load_state_dict(state['model'],
+                            strict=True)
+
+    if use_cuda:
+        model.cuda()
+    print(config)
+    model.set_sample_config(config)
+    model.make_generation_fast_(
+        beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
+        need_attn=args.print_alignment,
+    )
+    if args.fp16:
+        model.half()
+    if use_cuda:
+        model.cuda()
+
+    print(args.path, file=sys.stderr)
+
+    # Load alignment dictionary for unknown word replacement
+    # (None if no unknown word replacement, empty if no path to align dictionary)
+    align_dict = utils.load_align_dict(args.replace_unk)
+
+    # Load dataset (possibly sharded)
+    itr = task.get_batch_iterator(
+        dataset=task.dataset(args.gen_subset),
+        max_tokens=args.max_tokens,
+        max_sentences=128,
+        max_positions=utils.resolve_max_positions(
+            task.max_positions(),
+            *[model.max_positions()]
+        ),
+        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
+        required_batch_size_multiple=args.required_batch_size_multiple,
+        num_shards=args.num_shards,
+        shard_id=args.shard_id,
+        num_workers=args.num_workers,
+    ).next_epoch_itr(shuffle=False)
+
+    # Initialize generator
+    gen_timer = StopwatchMeter()
+    generator = task.build_generator([model],args)
+
+    num_sentences = 0
+    has_target = True
+    decoder_times_all = []
+    input_len_all = []
+    with open('translations_out.txt','a') as fname_translations:
+        with progress_bar.build_progress_bar(args, itr) as t:
+            wps_meter = TimeMeter()
+            for sample in t:
+
+                sample = utils.move_to_cuda(sample) if use_cuda else sample
+                if 'net_input' not in sample:
+                    continue
+
+                prefix_tokens = None
+                if args.prefix_size > 0:
+                    prefix_tokens = sample['target'][:, :args.prefix_size]
+
+                gen_timer.start()
+                hypos = task.inference_step(generator, [model], sample, prefix_tokens)
+                input_len_all.append(np.mean(sample['net_input']['src_lengths'].cpu().numpy()))
+                num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos)
+                gen_timer.stop(num_generated_tokens)
+
+                for i, sample_id in enumerate(sample['id'].tolist()):
+                    has_target = sample['target'] is not None
+
+                    # Remove padding
+                    src_tokens = utils.strip_pad(sample['net_input']['src_tokens'][i, :], tgt_dict.pad())
+                    target_tokens = None
+                    if has_target:
+                        target_tokens = utils.strip_pad(sample['target'][i, :], tgt_dict.pad()).int().cpu()
+
+                    # Either retrieve the original sentences or regenerate them from tokens.
+                    if align_dict is not None:
+                        src_str = task.dataset(args.gen_subset).src.get_original_text(sample_id)
+                        target_str = task.dataset(args.gen_subset).tgt.get_original_text(sample_id)
+                    else:
+                        if src_dict is not None:
+                            src_str = src_dict.string(src_tokens, args.remove_bpe)
+                        else:
+                            src_str = ""
+                        if has_target:
+                            target_str = tgt_dict.string(target_tokens, args.remove_bpe, escape_unk=True)
+
+                    if not args.quiet:
+                        if src_dict is not None:
+                            #print('S-{}\t{}'.format(sample_id, src_str))
+                            fname_translations.write('S-{}\t{}'.format(sample_id, src_str))
+                            fname_translations.write('\n')
+
+                        if has_target:
+                            #print('T-{}\t{}'.format(sample_id, target_str))
+                            fname_translations.write('T-{}\t{}'.format(sample_id, target_str))
+                            fname_translations.write('\n')
+
+                    # Process top predictions
+                    for j, hypo in enumerate(hypos[i][:args.nbest]):
+                        hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
+                            hypo_tokens=hypo['tokens'].int().cpu(),
+                            src_str=src_str,
+                            alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None,
+                            align_dict=align_dict,
+                            tgt_dict=tgt_dict,
+                            remove_bpe=args.remove_bpe,
+                        )
+
+                        if not args.quiet:
+                           
+                            fname_translations.write('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str))
+                            fname_translations.write('\n')
+                            fname_translations.write('P-{}\t{}'.format(
+                                    sample_id,
+                                    ' '.join(map(
+                                        lambda x: '{:.4f}'.format(x),
+                                        hypo['positional_scores'].tolist(),
+                                    ))
+                                ))
+                            fname_translations.write('\n')
+
+                            if args.print_alignment:
+                                fname_translations.write('A-{}\t{}'.format(
+                                    sample_id,
+                                    ' '.join(map(lambda x: str(utils.item(x)), alignment))
+                                ))
+                                fname_translations.write('\n')
+
+                wps_meter.update(num_generated_tokens)
+                t.log({'wps': round(wps_meter.avg)})
+                num_sentences += sample['nsentences']
+
+
+    os.system("grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt")
+    os.system("grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt")
+    bleu_score = get_bleu_score(args,"ref.txt","sys.txt")
+    print(bleu_score)
+
+    os.system("rm ref.txt")
+    os.system("rm sys.txt")
+    os.system("rm translations_out.txt")
+    return bleu_score
+
+def compute_latency(config,dataset_path,get_model_parameters=False):
+    parser = options.get_generation_parser()
+
+    args = options.parse_args_and_arch(parser,[dataset_path])
+
+    args.data = dataset_path
+    args.beam = 5
+    args.remove_bpe = '@@ '
+    args.gen_subset = 'test'
+    args.lenpen = 0.6
+    args.source_lang = 'en'
+    args.target_lang = 'de'
+    args.batch_size = 128
+    utils.import_user_module(args)
+    max_tokens = 12000
+    args.latgpu=False
+    args.latcpu=True
+    args.latiter=100
+
+    # Initialize CUDA and distributed training
+    if torch.cuda.is_available() and not args.cpu:
+        torch.cuda.set_device(args.device_id)
+    torch.manual_seed(args.seed)
+
+    #Optimize ensemble for generation
+    # Load dataset splits
+    task = tasks.setup_task(args)
+    task.load_dataset(args.gen_subset)
+    # Set dictionaries
+    try:
+        src_dict = getattr(task, 'source_dictionary', None)
+    except NotImplementedError:
+        src_dict = None
+    tgt_dict = task.target_dictionary
+
+
+    # Load ensemble
+    print('| loading model(s) from {}'.format(args.path))
+    model = TransformerSuperNetwork(task)
+
+    # specify the length of the dummy input for profile
+    # for iwslt, the average length is 23, for wmt, that is 30
+    dummy_sentence_length_dict = {'iwslt': 23, 'wmt': 30}
+
+    dummy_sentence_length = dummy_sentence_length_dict['wmt']
+
+
+    dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1)
+    dummy_prev = [7] * (dummy_sentence_length - 1) + [2]
+
+    src_tokens_test = torch.tensor([dummy_src_tokens], dtype=torch.long)#.cuda()
+    src_lengths_test = torch.tensor([dummy_sentence_length])#.cuda()
+    prev_output_tokens_test_with_beam = torch.tensor([dummy_prev] * args.beam, dtype=torch.long)#.cuda()
+    bsz = 1
+    new_order = torch.arange(bsz).view(-1, 1).repeat(1, args.beam).view(-1).long()#.cuda()
+    if args.latcpu:
+        model.cpu()
+        print('Measuring model latency on CPU for dataset generation...')
+    elif args.latgpu:
+        model.cuda()
+        src_tokens_test = src_tokens_test#.cuda()
+        src_lengths_test = src_lengths_test#.cuda()
+        prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam#.cuda()
+        print('Measuring model latency on GPU for dataset generation...')
+        start = torch.cuda.Event(enable_timing=True)
+        end = torch.cuda.Event(enable_timing=True)
+
+
+    model.set_sample_config(config)
+    
+    model.eval()
+   
+    with torch.no_grad():
+
+        # dry runs
+        for _ in range(15):
+            encoder_out_test = model.encoder(src_tokens=src_tokens_test, src_lengths=src_lengths_test)
+
+        encoder_latencies = []
+        print('Measuring encoder for dataset generation...')
+        for _ in range(args.latiter):
+            if args.latgpu:
+                #start.record()
+                start = time.time()
+            elif args.latcpu:
+                start = time.time()
+
+            model.encoder(src_tokens=src_tokens_test, src_lengths=src_lengths_test)
+
+            if args.latgpu:
+                end = time.time()
+                encoder_latencies.append((end - start) * 1000)
+            elif args.latcpu:
+                end = time.time()
+                encoder_latencies.append((end - start) * 1000)
+
+        encoder_latencies.sort()
+        encoder_latencies = encoder_latencies[int(args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
+        print(f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; \t Std: {np.std(encoder_latencies)} ms')
+
+
+        encoder_out_test_with_beam = model.encoder.reorder_encoder_out(encoder_out_test, new_order)
+
+        # dry runs
+        for _ in range(15):
+            model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam,
+                               encoder_out=encoder_out_test_with_beam)
+
+        # decoder is more complicated because we need to deal with incremental states and auto regressive things
+        decoder_iterations_dict = {'iwslt': 23, 'wmt': 30}
+
+        decoder_iterations = decoder_iterations_dict['wmt']
+        print(decoder_iterations)
+        decoder_latencies = []
+        print('Measuring decoder for dataset generation...')
+        for _ in range(args.latiter):
+            if args.latgpu:
+                start = time.time()
+                #start.record()
+            elif args.latcpu:
+                start = time.time()
+            incre_states = {}
+            for k_regressive in range(decoder_iterations):
+                model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam[:, :k_regressive + 1],
+                                   encoder_out=encoder_out_test_with_beam, incremental_state=incre_states)
+            if args.latgpu:
+                end = time.time()
+                decoder_latencies.append((end - start) * 1000)
+
+            elif args.latcpu:
+                end = time.time()
+                decoder_latencies.append((end - start) * 1000)
+
+        # only use the 10% to 90% latencies to avoid outliers
+        decoder_latencies.sort()
+        decoder_latencies = decoder_latencies[int(args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
+
+    print(decoder_latencies)
+    print(f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms')
+
+    lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies)
+    lat_std = np.std(encoder_latencies)+np.std(decoder_latencies)
+    return lat_mean, lat_std
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
new file mode 100644
index 00000000000..58f29a94aa3
--- /dev/null
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
@@ -0,0 +1,992 @@
+import torch
+from torch import nn
+from torch.nn import Parameter
+import torch.nn.functional as F
+
+from fairseq import utils
+from fairseq import options, utils
+from fairseq.models import (
+    FairseqEncoder,
+    FairseqIncrementalDecoder,
+    BaseFairseqModel
+)
+
+from fairseq.modules import (PositionalEmbedding,SinusoidalPositionalEmbedding)
+from .modules_supernetwork import (
+
+    MultiheadAttentionSuper,
+    EmbeddingSuper,
+    LinearSuper,
+    LayerNormSuper
+
+)
+import math
+
+DEFAULT_MAX_SOURCE_POSITIONS = 1024
+DEFAULT_MAX_TARGET_POSITIONS = 1024
+class TransformerSuperNetwork(BaseFairseqModel):
+    """
+    Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)
+    <https://arxiv.org/abs/1706.03762>`_.
+
+    Args:
+        encoder (TransformerEncoder): the encoder
+        decoder (TransformerDecoder): the decoder
+
+    The Transformer model provides the following named architectures and
+    command-line arguments:
+
+    .. argparse::
+        :ref: fairseq.models.transformer_parser
+        :prog:
+    """
+
+
+    def __init__(self,task):
+        super().__init__()
+
+        src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
+        encoder_config ={'encoder_embed_dim': 640,
+                         'encoder_layers': 6,
+                         'encoder_attention_heads': 8,
+                         'encoder_ffn_embed_dim':3072,
+                         'encoder_embed_path': None}
+
+        decoder_config ={'decoder_embed_dim': 640,
+                         'decoder_layers': 6,
+                         'decoder_attention_heads': 8,
+                         'decoder_ffn_embed_dim':3072}
+
+        encoder_embed_tokens = self.build_embedding(
+            src_dict, encoder_config['encoder_embed_dim'], encoder_config['encoder_embed_path']
+             )
+        decoder_embed_tokens = encoder_embed_tokens
+        self.share_decoder_input_output_embed = True
+
+        self.encoder = TransformerEncoder(encoder_config, src_dict, encoder_embed_tokens)
+        self.decoder = TransformerDecoder(decoder_config, tgt_dict, decoder_embed_tokens)
+
+    def build_embedding(self,dictionary, embed_dim, path=None):
+        num_embeddings = len(dictionary)
+        padding_idx = dictionary.pad()
+        emb = Embedding(num_embeddings, embed_dim, padding_idx)
+        # if provided, load from preloaded dictionaries
+        if path:
+            embed_dict = utils.parse_embedding(path)
+            utils.load_embedding(embed_dict, dictionary, emb)
+        return emb
+
+    def profile(self, mode=True):
+        for module in self.modules():
+            if hasattr(module, 'profile') and self != module:
+                module.profile(mode)
+
+    def get_sampled_params_numel(self, config):
+        self.set_sample_config(config)
+        numels = []
+        for name, module in self.named_modules():
+            if hasattr(module, 'calc_sampled_param_num'):
+                # a hacky way to skip the layers that exceed encoder-layer-num or decoder-layer-num
+                if name.split('.')[0] == 'encoder' and eval(name.split('.')[2]) >= config['encoder']['encoder_layer_num']:
+                    continue
+                if name.split('.')[0] == 'decoder' and eval(name.split('.')[2]) >= config['decoder']['decoder_layer_num']:
+                    continue
+
+                numels.append(module.calc_sampled_param_num())
+        return sum(numels)
+
+    def set_sample_config(self, config):
+        self.encoder.set_sample_config(config)
+        self.decoder.set_sample_config(config)
+   
+
+class TransformerEncoder(FairseqEncoder):
+    """
+    Transformer encoder consisting of *args.encoder_layers* layers. Each layer
+    is a :class:`TransformerEncoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): encoding dictionary
+        embed_tokens (torch.nn.Embedding): input embedding
+    """
+
+    def __init__(self, encoder_config, dictionary, embed_tokens):
+        super().__init__(dictionary)
+        # the configs of super arch
+        self.super_embed_dim = encoder_config['encoder_embed_dim']
+        self.super_ffn_embed_dim = [encoder_config['encoder_ffn_embed_dim']] * encoder_config['encoder_layers']
+        self.super_layer_num = encoder_config['encoder_layers']
+        self.super_self_attention_heads = [encoder_config['encoder_attention_heads']] * encoder_config['encoder_layers']
+
+        self.super_dropout = 0.3
+        self.super_activation_dropout = 0
+
+        self.super_embed_scale = math.sqrt(self.super_embed_dim)
+
+        # the configs of current sampled arch
+        self.sample_embed_dim = None
+        self.sample_ffn_embed_dim = None
+        self.sample_layer_num = None
+        self.sample_self_attention_heads = None
+
+        self.sample_dropout = None
+        self.sample_activation_dropout = None
+
+        self.sample_embed_scale = None
+
+        self.register_buffer('version', torch.Tensor([3]))
+
+        self.padding_idx = embed_tokens.padding_idx
+        self.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS
+
+        self.embed_tokens = embed_tokens
+      
+        self.embed_positions = PositionalEmbedding(
+            self.max_source_positions, self.super_embed_dim, self.padding_idx,
+            learned= False,
+        ) 
+
+        self.layers = nn.ModuleList([])
+        self.layers.extend([
+            TransformerEncoderLayer(encoder_config, layer_idx=i)
+            for i in range(self.super_layer_num)
+        ])
+
+        if False:
+            self.layer_norm = LayerNormSuper(self.super_embed_dim)
+        else:
+            self.layer_norm = None
+
+        self.vocab_original_scaling = False 
+
+
+    def set_sample_config(self, config:dict):
+
+        self.sample_embed_dim = config['encoder']['encoder_embed_dim']
+
+        # Caution: this is a list for all layers
+        self.sample_ffn_embed_dim = config['encoder']['encoder_ffn_embed_dim']
+
+        self.sample_layer_num = config['encoder']['encoder_layer_num']
+
+        # Caution: this is a list for all layers
+        self.sample_self_attention_heads = config['encoder']['encoder_self_attention_heads']
+
+        self.sample_dropout = calc_dropout(self.super_dropout, self.sample_embed_dim, self.super_embed_dim)
+        self.sample_activation_dropout = calc_dropout(self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim)
+
+        self.sample_embed_scale = math.sqrt(self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale
+
+        self.embed_tokens.set_sample_config(sample_embed_dim=self.sample_embed_dim, part='encoder')
+
+        if self.layer_norm is not None:
+            self.layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+
+        for i, layer in enumerate(self.layers):
+            # not exceed sample layer number
+            if i < self.sample_layer_num:
+                layer.set_sample_config(is_identity_layer=False,
+                                        sample_embed_dim=self.sample_embed_dim,
+                                        sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i],
+                                        sample_self_attention_heads_this_layer=self.sample_self_attention_heads[i],
+                                        sample_dropout=self.sample_dropout,
+                                        sample_activation_dropout=self.sample_activation_dropout)
+            # exceeds sample layer number
+            else:
+                layer.set_sample_config(is_identity_layer=True)
+
+
+    def forward(self, src_tokens, src_lengths):
+        """
+        Args:
+            src_tokens (LongTensor): tokens in the source language of shape
+                `(batch, src_len)`
+            src_lengths (torch.LongTensor): lengths of each source sentence of
+                shape `(batch)`
+
+        Returns:
+            dict:
+                - **encoder_out** (Tensor): the last encoder layer's output of
+                  shape `(src_len, batch, embed_dim)`
+                - **encoder_padding_mask** (ByteTensor): the positions of
+                  padding elements of shape `(batch, src_len)`
+        """
+        # embed tokens and positions
+        x = self.sample_embed_scale * self.embed_tokens(src_tokens, part='encoder')
+        if self.embed_positions is not None:
+            positions = self.embed_positions(src_tokens)
+
+            # sample the positional embedding and add
+            x += positions[..., :self.sample_embed_dim]
+        x = F.dropout(x, p=self.sample_dropout, training=self.training)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        # compute padding mask
+        encoder_padding_mask = src_tokens.eq(self.padding_idx)
+        if not encoder_padding_mask.any():
+            encoder_padding_mask = None
+
+        all_x = []
+        # encoder layers
+        for layer in self.layers:
+            # print(x.shape)
+            x = layer(x, encoder_padding_mask)
+            all_x.append(x)
+
+
+        if self.layer_norm:
+            x = self.layer_norm(x)
+
+        return {
+                'encoder_out': x,
+                'encoder_out_all' : all_x,
+                'encoder_padding_mask': encoder_padding_mask,
+        }
+
+    def reorder_encoder_out(self, encoder_out, new_order):
+        """
+        Reorder encoder output according to *new_order*.
+
+        Args:
+            encoder_out: output from the ``forward()`` method
+            new_order (LongTensor): desired order
+
+        Returns:
+            *encoder_out* rearranged according to *new_order*
+        """
+        if encoder_out['encoder_out'] is not None:
+            encoder_out['encoder_out'] = \
+                encoder_out['encoder_out'].index_select(1, new_order)
+        if encoder_out['encoder_padding_mask'] is not None:
+            encoder_out['encoder_padding_mask'] = \
+                encoder_out['encoder_padding_mask'].index_select(0, new_order)
+        # need to reorder each layer of output
+        if 'encoder_out_all' in encoder_out.keys():
+            new_encoder_out_all = []
+            for encoder_out_one_layer in encoder_out['encoder_out_all']:
+                new_encoder_out_all.append(encoder_out_one_layer.index_select(1, new_order))
+            encoder_out['encoder_out_all'] = new_encoder_out_all
+
+        return encoder_out
+
+    def max_positions(self):
+        """Maximum input length supported by the encoder."""
+        if self.embed_positions is None:
+            return self.max_source_positions
+        return min(self.max_source_positions, self.embed_positions.max_positions())
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+            weights_key = '{}.embed_positions.weights'.format(name)
+            if weights_key in state_dict:
+                del state_dict[weights_key]
+            state_dict['{}.embed_positions._float_tensor'.format(name)] = torch.FloatTensor(1)
+        for i in range(len(self.layers)):
+            # update layer norms
+            self.layers[i].upgrade_state_dict_named(state_dict, "{}.layers.{}".format(name, i))
+
+        version_key = '{}.version'.format(name)
+        if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2:
+            # earlier checkpoints did not normalize after the stack of layers
+            self.layer_norm = None
+            self.normalize = False
+            state_dict[version_key] = torch.Tensor([1])
+        return state_dict
+
+
+class TransformerDecoder(FairseqIncrementalDecoder):
+    """
+    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
+    is a :class:`TransformerDecoderLayer`.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        dictionary (~fairseq.data.Dictionary): decoding dictionary
+        embed_tokens (torch.nn.Embedding): output embedding
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=False):
+        super().__init__(dictionary)
+
+        # the configs of super arch
+        self.super_embed_dim = decoder_config['decoder_embed_dim']
+        self.super_ffn_embed_dim = decoder_config['decoder_ffn_embed_dim'] * decoder_config['decoder_layers']
+        self.super_layer_num = decoder_config['decoder_layers']
+        self.super_self_attention_heads = 8*[decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers']
+        self.super_ende_attention_heads = [decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers']
+        self.super_arbitrary_ende_attn = [-1] * decoder_config['decoder_layers']
+
+        self.super_dropout = 0.3
+        self.super_activation_dropout = 0.0
+
+        self.super_embed_scale = math.sqrt(self.super_embed_dim)
+
+        # the configs of current sampled arch
+        self.sample_embed_dim = None
+        self.sample_ffn_embed_dim = None
+        self.sample_layer_num = None
+        self.sample_self_attention_heads = None
+        self.sample_ende_attention_heads = None
+        self.sample_arbitrary_ende_attn = None
+
+        self.sample_dropout = None
+        self.sample_activation_dropout = None
+
+        self.sample_embed_scale = None
+
+
+        # the configs of current sampled arch
+        self.register_buffer('version', torch.Tensor([3]))
+
+        self.share_input_output_embed = True
+
+        self.output_embed_dim = decoder_config['decoder_embed_dim']
+
+        padding_idx = embed_tokens.padding_idx
+        self.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS
+
+        self.embed_tokens = embed_tokens
+
+
+        self.embed_positions = PositionalEmbedding(
+            self.max_target_positions, self.super_embed_dim, padding_idx,
+            learned=False,
+        ) if not False else None 
+
+        self.layers = nn.ModuleList([])
+        self.layers.extend([
+            TransformerDecoderLayer(decoder_config, layer_idx=i, no_encoder_attn=no_encoder_attn)
+            for i in range(self.super_layer_num)
+        ])
+
+        self.adaptive_softmax = None
+
+        self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \
+            if self.super_embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None
+
+        if False:# args.adaptive_softmax_cutoff is not None:
+            self.adaptive_softmax = AdaptiveSoftmax(
+                len(dictionary),
+                self.output_embed_dim,
+                options.eval_str_list(args.adaptive_softmax_cutoff, type=int),
+                dropout=args.adaptive_softmax_dropout,
+                adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None,
+                factor=args.adaptive_softmax_factor,
+                tie_proj=args.tie_adaptive_proj,
+            )
+        elif not self.share_input_output_embed:
+            self.embed_out = nn.Parameter(torch.Tensor(len(dictionary), self.output_embed_dim))
+            nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim ** -0.5)
+
+        self.layer_norm = None
+        self.get_attn = False
+
+        self.vocab_original_scaling = False
+
+    def set_sample_config(self, config:dict):
+
+        self.sample_embed_dim = config['decoder']['decoder_embed_dim']
+        self.sample_encoder_embed_dim = config['encoder']['encoder_embed_dim']
+
+        # Caution: this is a list for all layers
+        self.sample_ffn_embed_dim = config['decoder']['decoder_ffn_embed_dim']
+
+        # Caution: this is a list for all layers
+        self.sample_self_attention_heads = config['decoder']['decoder_self_attention_heads']
+
+        # Caution: this is a list for all layers
+        self.sample_ende_attention_heads = config['decoder']['decoder_ende_attention_heads']
+
+        self.sample_arbitrary_ende_attn = config['decoder']['decoder_arbitrary_ende_attn']
+
+        self.sample_layer_num = config['decoder']['decoder_layer_num']
+
+        self.sample_dropout = calc_dropout(self.super_dropout, self.sample_embed_dim, self.super_embed_dim)
+        self.sample_activation_dropout = calc_dropout(self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim)
+
+        self.sample_embed_scale = math.sqrt(self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale
+
+        self.embed_tokens.set_sample_config(sample_embed_dim=self.sample_embed_dim, part='decoder')
+
+        if self.layer_norm is not None:
+            self.layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+
+        for i, layer in enumerate(self.layers):
+            # not exceed sample layer number
+            if i < self.sample_layer_num:
+                layer.set_sample_config(is_identity_layer=False,
+                                        sample_embed_dim=self.sample_embed_dim,
+                                        sample_encoder_embed_dim=self.sample_encoder_embed_dim,
+                                        sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i],
+                                        sample_self_attention_heads_this_layer=self.sample_self_attention_heads[i],
+                                        sample_ende_attention_heads_this_layer=self.sample_ende_attention_heads[i],
+                                        sample_dropout=self.sample_dropout,
+                                        sample_activation_dropout=self.sample_activation_dropout)
+            # exceeds sample layer number
+            else:
+                layer.set_sample_config(is_identity_layer=True)
+
+
+
+    def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused):
+        """
+        Args:
+            prev_output_tokens (LongTensor): previous decoder outputs of shape
+                `(batch, tgt_len)`, for teacher forcing
+            encoder_out (Tensor, optional): output from the encoder, used for
+                encoder-side attention
+            incremental_state (dict): dictionary used for storing state during
+                :ref:`Incremental decoding`
+
+        Returns:
+            tuple:
+                - the decoder's output of shape `(batch, tgt_len, vocab)`
+                - a dictionary with any model-specific outputs
+        """
+        x, extra = self.extract_features(prev_output_tokens, encoder_out, incremental_state)
+        x = self.output_layer(x)
+        return x, extra
+
+    def extract_features(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused):
+        """
+        Similar to *forward* but only return features.
+
+        Returns:
+            tuple:
+                - the decoder's features of shape `(batch, tgt_len, embed_dim)`
+                - a dictionary with any model-specific outputs
+        """
+        # embed positions
+        positions = self.embed_positions(
+            prev_output_tokens,
+            incremental_state=incremental_state,
+        ) if self.embed_positions is not None else None
+
+        if positions is not None:
+            positions = positions[..., :self.sample_embed_dim]
+
+        if incremental_state is not None:
+            # only take the last token in to the decoder
+            prev_output_tokens = prev_output_tokens[:, -1:]
+            if positions is not None:
+                positions = positions[:, -1:]
+
+        # embed tokens and positions
+        x = self.sample_embed_scale * self.embed_tokens(prev_output_tokens, part='decoder')
+
+        if positions is not None:
+            x += positions
+        x = F.dropout(x, p=self.sample_dropout, training=self.training)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+        attn = None
+        attns = []
+        inner_states = [x]
+
+        # decoder layers
+        for i, layer in enumerate(self.layers):
+            encoder_out_feed = None
+            encoder_padding_mask_feed = None
+
+            if encoder_out is not None:
+                # only use the last layer
+                if i >= self.sample_layer_num or self.sample_arbitrary_ende_attn[i] == -1:
+                    encoder_out_feed = encoder_out['encoder_out']
+                # concat one second last output layer
+                elif self.sample_arbitrary_ende_attn[i] == 1:
+                    encoder_out_feed = torch.cat([encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0)
+                elif self.sample_arbitrary_ende_attn[i] == 2:
+                    encoder_out_feed = torch.cat([encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2], encoder_out['encoder_out_all'][-3]], dim=0)
+                else:
+                    raise NotImplementedError("arbitrary_ende_attn should in [-1, 1, 2]")
+
+            if encoder_out['encoder_padding_mask'] is not None:
+                if i >= self.sample_layer_num or self.sample_arbitrary_ende_attn[i] == -1:
+                    encoder_padding_mask_feed = encoder_out['encoder_padding_mask']
+                # concat one more
+                elif self.sample_arbitrary_ende_attn[i] == 1:
+                    encoder_padding_mask_feed = torch.cat([encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1)
+                # concat two more
+                elif self.sample_arbitrary_ende_attn[i] == 2:
+                    encoder_padding_mask_feed = torch.cat([encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1)
+                else:
+                    raise NotImplementedError("arbitrary_ende_attn should in [-1, 1, 2]")
+
+
+            x, attn = layer(
+                x,
+                encoder_out_feed,
+                encoder_padding_mask_feed,
+                incremental_state,
+                self_attn_mask=self.buffered_future_mask(x) if incremental_state is None else None,
+            )
+            inner_states.append(x)
+            attns.append(attn)
+
+
+        if self.layer_norm:
+            x = self.layer_norm(x)
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        if self.project_out_dim is not None:
+            x = self.project_out_dim(x)
+        if not self.get_attn:
+            attns = attns[-1]
+        return x, {'attn': attns, 'inner_states': inner_states}
+
+    def output_layer(self, features, **kwargs):
+        """Project features to the vocabulary size."""
+        if self.adaptive_softmax is None:
+            # project back to size of vocabulary
+            if self.share_input_output_embed:
+                return F.linear(features, self.embed_tokens.sampled_weight('decoder'))
+            else:
+                return F.linear(features, self.embed_out[:, :self.sample_embed_dim])
+        else:
+            return features
+
+    def max_positions(self):
+        """Maximum output length supported by the decoder."""
+        if self.embed_positions is None:
+            return self.max_target_positions
+        import ipdb;ipdb.set_trace()
+        return min(self.max_target_positions, self.embed_positions.max_positions())
+
+    def buffered_future_mask(self, tensor):
+        dim = tensor.size(0)
+        if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device or self._future_mask.size(0) < dim:
+            self._future_mask = torch.triu(utils.fill_with_neg_inf(tensor.new(dim, dim)), 1)
+        return self._future_mask[:dim, :dim]
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """Upgrade a (possibly old) state dict for new versions of fairseq."""
+        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+            weights_key = '{}.embed_positions.weights'.format(name)
+            if weights_key in state_dict:
+                del state_dict[weights_key]
+            state_dict['{}.embed_positions._float_tensor'.format(name)] = torch.FloatTensor(1)
+
+        for i in range(len(self.layers)):
+            # update layer norms
+            layer_norm_map = {
+                '0': 'self_attn_layer_norm',
+                '1': 'encoder_attn_layer_norm',
+                '2': 'final_layer_norm'
+            }
+            for old, new in layer_norm_map.items():
+                for m in ('weight', 'bias'):
+                    k = '{}.layers.{}.layer_norms.{}.{}'.format(name, i, old, m)
+                    if k in state_dict:
+                        state_dict['{}.layers.{}.{}.{}'.format(name, i, new, m)] = state_dict[k]
+                        del state_dict[k]
+
+        version_key = '{}.version'.format(name)
+        if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2:
+            # earlier checkpoints did not normalize after the stack of layers
+            self.layer_norm = None
+            self.normalize = False
+            state_dict[version_key] = torch.Tensor([1])
+
+        return state_dict
+
+
+class TransformerEncoderLayer(nn.Module):
+    """Encoder layer block.
+
+    In the original paper each operation (multi-head attention or FFN) is
+    postprocessed with: `dropout -> add residual -> layernorm`. In the
+    tensor2tensor code they suggest that learning is more robust when
+    preprocessing each layer with layernorm and postprocessing with:
+    `dropout -> add residual`. We default to the approach in the paper, but the
+    tensor2tensor approach can be enabled by setting
+    *args.encoder_normalize_before* to ``True``.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+    """
+
+    def __init__(self, encoder_config, layer_idx):
+        super().__init__()
+
+        # the configs of super arch
+        self.super_embed_dim = encoder_config['encoder_embed_dim']
+        self.super_ffn_embed_dim_this_layer = encoder_config['encoder_ffn_embed_dim']
+        self.super_self_attention_heads_this_layer = encoder_config['encoder_attention_heads']
+
+        self.super_dropout = 0.3
+        self.super_activation_dropout =0
+
+        # the configs of current sampled arch
+        self.sample_embed_dim = None
+        self.sample_ffn_embed_dim_this_layer = None
+        self.sample_self_attention_heads_this_layer = None
+
+        self.sample_dropout = None
+        self.sample_activation_dropout = None
+
+        self.is_identity_layer = None
+
+        self.qkv_dim= 512
+
+
+        self.self_attn = MultiheadAttentionSuper(
+            super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer, is_encoder=True,
+            dropout=0.1, self_attention=True, qkv_dim=self.qkv_dim
+        )
+
+        self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim)
+        self.dropout = 0.1
+        self.activation_fn = utils.get_activation_fn(
+            activation='relu'
+        )
+        self.normalize_before = False
+
+        self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer, uniform_=None, non_linear='relu') #init.uniform_
+        self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear')
+        self.final_layer_norm = LayerNormSuper(self.super_embed_dim)
+
+
+    def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None):
+
+        if is_identity_layer:
+            self.is_identity_layer = True
+            return
+
+        self.is_identity_layer = False
+
+        self.sample_embed_dim = sample_embed_dim
+        self.sample_ffn_embed_dim_this_layer = sample_ffn_embed_dim_this_layer
+        self.sample_self_attention_heads_this_layer = sample_self_attention_heads_this_layer
+
+        self.sample_dropout = sample_dropout
+        self.sample_activation_dropout = sample_activation_dropout
+
+        self.self_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+
+        self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer)
+
+        self.fc1.set_sample_config(sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer)
+        self.fc2.set_sample_config(sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim)
+
+        self.final_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+
+
+    def upgrade_state_dict_named(self, state_dict, name):
+        """
+        Rename layer norm states from `...layer_norms.0.weight` to
+        `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to
+        `...final_layer_norm.weight`
+        """
+        layer_norm_map = {
+            '0': 'self_attn_layer_norm',
+            '1': 'final_layer_norm'
+        }
+        for old, new in layer_norm_map.items():
+            for m in ('weight', 'bias'):
+                k = '{}.layer_norms.{}.{}'.format(name, old, m)
+                if k in state_dict:
+                    state_dict[
+                        '{}.{}.{}'.format(name, new, m)
+                    ] = state_dict[k]
+                    del state_dict[k]
+
+    def forward(self, x, encoder_padding_mask, attn_mask=None):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor): binary ByteTensor of shape
+                `(batch, src_len)` where padding elements are indicated by ``1``.
+            attn_mask (ByteTensor): binary tensor of shape (T_tgt, T_src), where
+            T_tgt is the length of query, while T_src is the length of key,
+            though here both query and key is x here,
+            attn_mask[t_tgt, t_src] = 1 means when calculating embedding
+            for t_tgt, t_src is excluded (or masked out), =0 means it is
+            included in attention
+
+        Returns:
+            encoded output of shape `(seq_len, batch, embed_dim)`
+        """
+        if self.is_identity_layer:
+            return x
+        residual = x
+        x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True)
+        if attn_mask is not None:
+            attn_mask = attn_mask.masked_fill(attn_mask.byte(), -1e8)
+        # anything in original attn_mask = 1, becomes -1e8
+        # anything in original attn_mask = 0, becomes 0
+        # Note that we cannot use -inf here, because at some edge cases,
+        # the attention weight (before softmax) for some padded element in query
+        # will become -inf, which results in NaN in model parameters
+        # TODO: to formally solve this problem, we need to change fairseq's
+        # MultiheadAttention. We will do this later on.
+        x, _ = self.self_attn(query=x, key=x, value=x, key_padding_mask=encoder_padding_mask)
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x[:residual.size(0),:,:] = residual + x[:residual.size(0),:,:]
+        x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True)
+
+        residual = x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, before=True)
+        x = self.activation_fn(self.fc1(x))
+        x = F.dropout(x, p=self.sample_activation_dropout, training=self.training)
+        x = self.fc2(x)
+        x = F.dropout(x, p=self.sample_dropout, training=self.training)
+        x = residual + x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, after=True)
+        return x
+
+    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):
+        assert before ^ after
+        if after ^ self.normalize_before:
+            return layer_norm(x)
+        else:
+            return x
+
+
+class TransformerDecoderLayer(nn.Module):
+    """Decoder layer block.
+
+    In the original paper each operation (multi-head attention, encoder
+    attention or FFN) is postprocessed with: `dropout -> add residual ->
+    layernorm`. In the tensor2tensor code they suggest that learning is more
+    robust when preprocessing each layer with layernorm and postprocessing with:
+    `dropout -> add residual`. We default to the approach in the paper, but the
+    tensor2tensor approach can be enabled by setting
+    *args.decoder_normalize_before* to ``True``.
+
+    Args:
+        args (argparse.Namespace): parsed command-line arguments
+        no_encoder_attn (bool, optional): whether to attend to encoder outputs
+            (default: False).
+    """
+
+    def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False):
+        super().__init__()
+
+        # the configs of super arch
+        self.super_embed_dim = decoder_config['decoder_embed_dim']
+        self.super_encoder_embed_dim = decoder_config['decoder_embed_dim']
+        self.super_ffn_embed_dim_this_layer = decoder_config['decoder_ffn_embed_dim']
+        self.super_self_attention_heads_this_layer = decoder_config['decoder_attention_heads']
+        self.super_ende_attention_heads_this_layer = decoder_config['decoder_attention_heads']
+
+        self.super_dropout = 0.3
+        self.super_activation_dropout = 0
+
+        # the configs of current sampled arch
+        self.sample_embed_dim = None
+        self.sample_encoder_embed_dim = None
+        self.sample_ffn_embed_dim_this_layer = None
+        self.sample_self_attention_heads_this_layer = None
+        self.sample_ende_attention_heads_this_layer = None
+        self.sample_dropout = None
+        self.sample_activation_dropout = None
+        self.is_identity_layer = None
+        self.qkv_dim = 512
+        self.layer_idx = layer_idx
+
+
+        self.self_attn = MultiheadAttentionSuper(
+            is_encoder=False,
+            super_embed_dim=self.super_embed_dim,
+            num_heads=self.super_self_attention_heads_this_layer,
+            dropout=0.1,
+            add_bias_kv=add_bias_kv,
+            add_zero_attn=add_zero_attn,
+            self_attention=True,
+            qkv_dim=self.qkv_dim
+        )
+        self.activation_fn = utils.get_activation_fn(
+            activation='relu'
+        )
+        self.normalize_before = False
+
+        # use layerNorm rather than FusedLayerNorm for exporting.
+        # char_inputs can be used to determint this.
+        # TODO  remove this once we update apex with the fix
+        export = False
+        self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim)
+
+        if no_encoder_attn:
+            self.encoder_attn = None
+            self.encoder_attn_layer_norm = None
+        else:
+            self.encoder_attn = MultiheadAttentionSuper(
+                super_embed_dim=self.super_embed_dim,
+                num_heads=self.super_ende_attention_heads_this_layer,
+                is_encoder=False,
+                super_kdim=self.super_encoder_embed_dim,
+                super_vdim=self.super_encoder_embed_dim,
+                dropout=0.1,
+                encoder_decoder_attention=True,
+                qkv_dim=self.qkv_dim
+            )
+            self.encoder_attn_layer_norm = LayerNormSuper(self.super_embed_dim)
+
+        self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer,
+                               uniform_=None, non_linear='relu')
+        self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, super_out_dim=self.super_embed_dim,
+                               uniform_=None, non_linear='linear')
+
+        self.final_layer_norm = LayerNormSuper(self.super_embed_dim)
+        self.need_attn = True
+
+        self.onnx_trace = False
+
+
+    def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_encoder_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_ende_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None):
+
+        if is_identity_layer:
+            self.is_identity_layer = True
+            return
+
+        self.is_identity_layer = False
+
+        self.sample_embed_dim = sample_embed_dim
+        self.sample_encoder_embed_dim = sample_encoder_embed_dim
+        self.sample_ffn_embed_dim_this_layer = sample_ffn_embed_dim_this_layer
+        self.sample_self_attention_heads_this_layer = sample_self_attention_heads_this_layer
+        self.sample_ende_attention_heads_this_layer = sample_ende_attention_heads_this_layer
+
+        self.sample_dropout = sample_dropout
+        self.sample_activation_dropout = sample_activation_dropout
+
+
+        self.self_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+        self.encoder_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+
+        self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer)
+        self.encoder_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_kv_embed_dim=self.sample_encoder_embed_dim, sample_attention_heads=self.sample_ende_attention_heads_this_layer)
+
+        self.fc1.set_sample_config(sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer)
+        self.fc2.set_sample_config(sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim)
+
+        self.final_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+
+
+    def prepare_for_onnx_export_(self):
+        self.onnx_trace = True
+
+    def forward(
+        self,
+        x,
+        encoder_out=None,
+        encoder_padding_mask=None,
+        incremental_state=None,
+        prev_self_attn_state=None,
+        prev_attn_state=None,
+        self_attn_mask=None,
+        self_attn_padding_mask=None,
+    ):
+        """
+        Args:
+            x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
+            encoder_padding_mask (ByteTensor): binary ByteTensor of shape
+                `(batch, src_len)` where padding elements are indicated by ``1``.
+
+        Returns:
+            encoded output of shape `(seq_len, batch, embed_dim)`
+        """
+        if self.is_identity_layer:
+            return x, None
+
+        residual = x
+        x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True)
+        if prev_self_attn_state is not None:
+            if incremental_state is None:
+                incremental_state = {}
+            prev_key, prev_value = prev_self_attn_state
+            saved_state = {"prev_key": prev_key, "prev_value": prev_value}
+            self.self_attn._set_input_buffer(incremental_state, saved_state)
+        x, attn = self.self_attn(
+            query=x,
+            key=x,
+            value=x,
+            key_padding_mask=self_attn_padding_mask,
+            incremental_state=incremental_state,
+            need_weights=False,
+            attn_mask=self_attn_mask,
+        )
+        x = F.dropout(x, p=self.sample_dropout, training=self.training)
+        x = residual + x
+        x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True)
+
+        if self.encoder_attn is not None:
+            residual = x
+            x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, before=True)
+            if prev_attn_state is not None:
+                if incremental_state is None:
+                    incremental_state = {}
+                prev_key, prev_value = prev_attn_state
+                saved_state = {"prev_key": prev_key, "prev_value": prev_value}
+                self.encoder_attn._set_input_buffer(incremental_state, saved_state)
+            x, attn = self.encoder_attn(
+                query=x,
+                key=encoder_out,
+                value=encoder_out,
+                key_padding_mask=encoder_padding_mask,
+                incremental_state=incremental_state,
+                static_kv=True,
+                need_weights=(not self.training and self.need_attn),
+            )
+            x = F.dropout(x, p=self.sample_dropout, training=self.training)
+            x = residual + x
+            x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, after=True)
+
+        residual = x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, before=True)
+        x = self.activation_fn(self.fc1(x))
+        x = F.dropout(x, p=self.sample_activation_dropout, training=self.training)
+        x = self.fc2(x)
+        x = F.dropout(x, p=self.sample_dropout, training=self.training)
+        x = residual + x
+        x = self.maybe_layer_norm(self.final_layer_norm, x, after=True)
+        if self.onnx_trace and incremental_state is not None:
+            saved_state = self.self_attn._get_input_buffer(incremental_state)
+            self_attn_state = saved_state["prev_key"], saved_state["prev_value"]
+            return x, attn, self_attn_state
+        return x, attn
+
+    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):
+        assert before ^ after
+        if after ^ self.normalize_before:
+            return layer_norm(x)
+        else:
+            return x
+
+    def make_generation_fast_(self, need_attn=False, **kwargs):
+        self.need_attn = need_attn
+
+def calc_dropout(dropout, sample_embed_dim, super_embed_dim):
+    return dropout * 1.0 * sample_embed_dim / super_embed_dim
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx)
+
+def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(m.weight, non_linear=non_linear)
+    if bias:
+        nn.init.constant_(m.bias, 0.)
+    return m
+
+def calc_dropout(dropout, sample_embed_dim, super_embed_dim):
+    return dropout * 1.0 * sample_embed_dim / super_embed_dim
+
+def Embedding(num_embeddings, embedding_dim, padding_idx):
+    return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx)
+
+def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'):
+    m = nn.Linear(in_features, out_features, bias)
+    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(m.weight, non_linear=non_linear)
+    if bias:
+        nn.init.constant_(m.bias, 0.)
+    return m

From 7b205272ddb09060e1ce5a87520285dbbd41bbb0 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Thu, 17 Nov 2022 15:50:39 -0800
Subject: [PATCH 06/60] Run `autopep8`

---
 neural_compressor/experimental/nas/dynas.py   | 110 +++----
 .../experimental/nas/dynast/__init__.py       |   2 +-
 .../experimental/nas/dynast/dynas_manager.py  |  73 +++--
 .../nas/dynast/dynas_predictor.py             |   7 +-
 .../experimental/nas/dynast/dynas_search.py   |  22 +-
 .../experimental/nas/dynast/dynas_utils.py    |  69 +++--
 neural_compressor/experimental/nas/nas.py     | 104 ++++---
 .../experimental/nas/nas_utils.py             |   3 +-
 .../experimental/nas/search_algorithms.py     |  16 +-
 .../modules_supernetwork.py                   |  96 +++---
 .../transformer_interface.py                  | 152 +++++-----
 .../transformer_supernetwork.py               | 275 +++++++++++-------
 12 files changed, 535 insertions(+), 394 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index a5d70dde6ae..7ba96ef169e 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -21,6 +21,7 @@
 from .nas import NASBase
 from .nas_utils import nas_registry
 
+
 @nas_registry("DyNAS")
 class DyNAS(NASBase):
     """
@@ -28,43 +29,47 @@ class DyNAS(NASBase):
         conf_fname_or_obj (string or obj):
             The path to the YAML configuration file or the object of NASConfig.
     """
+
     def __init__(self, conf_fname_or_obj):
         from .dynast.dynas_manager import ParameterManager
         from .dynast.dynas_manager import TransformerLTEncoding
         from .dynast.dynas_predictor import Predictor
         from .dynast.dynas_search import ProblemMultiObjective, SearchAlgoManager
         from .dynast.dynas_utils import (EvaluationInterfaceMobileNetV3,
-                                        EvaluationInterfaceResNet50, EvaluationInterfaceTransformerLT,
-                                        OFARunner,TransformerLTRunner)
+                                         EvaluationInterfaceResNet50, EvaluationInterfaceTransformerLT,
+                                         OFARunner, TransformerLTRunner)
 
         self.ParameterManager = ParameterManager
         self.Predictor = Predictor
         self.ProblemMultiObjective = ProblemMultiObjective
         self.SearchAlgoManager = SearchAlgoManager
         self.SUPERNET_PARAMETERS = {
-                                    'ofa_resnet50':
-                                        {'d'  :  {'count' : 5,  'vars' : [0, 1, 2]},
-                                            'e'  :  {'count' : 18, 'vars' : [0.2, 0.25, 0.35]},
-                                            'w'  :  {'count' : 6,  'vars' : [0, 1, 2]} },
-                                    'ofa_mbv3_d234_e346_k357_w1.0':
-                                        {'ks'  :  {'count' : 20, 'vars' : [3, 5, 7]},
-                                            'e'   :  {'count' : 20, 'vars' : [3, 4, 6]},
-                                            'd'   :  {'count' : 5,  'vars' : [2, 3, 4]} },
-                                        'ofa_mbv3_d234_e346_k357_w1.2':
-                                        {'ks'  :  {'count' : 20, 'vars' : [3, 5, 7]},
-                                            'e'   :  {'count' : 20, 'vars' : [3, 4, 6]},
-                                            'd'   :  {'count' : 5,  'vars' : [2, 3, 4]} },
-
-                                        'transformer_lt_wmt_en_de': 
-                                        {'encoder_embed_dim': {'count':1,'vars':[640, 512]},
-                                        'decoder_embed_dim': {'count':1, 'vars': [640, 512]},
-                                        'encoder_ffn_embed_dim': {'count':6, 'vars':[3072, 2048, 1024]},
-                                        'decoder_ffn_embed_dim' : {'count':6,'vars': [3072, 2048, 1024]},
-                                        'decoder_layer_num': {'count':1,'vars':[6, 5, 4, 3, 2, 1]},
-                                        'encoder_self_attention_heads': {'count':6, 'vars':[8, 4]},
-                                        'decoder_self_attention_heads': {'count':6, 'vars':[8, 4]},
-                                        'decoder_ende_attention_heads': {'count':6, 'vars':[8, 4]},
-                                        'decoder_arbitrary_ende_attn': {'count':6, 'vars':[-1, 1, 2]}}
+            'ofa_resnet50': {
+                'd':  {'count': 5,  'vars': [0, 1, 2]},
+                'e':  {'count': 18, 'vars': [0.2, 0.25, 0.35]},
+                'w':  {'count': 6,  'vars': [0, 1, 2]},
+            },
+            'ofa_mbv3_d234_e346_k357_w1.0': {
+                'ks':  {'count': 20, 'vars': [3, 5, 7]},
+                'e':  {'count': 20, 'vars': [3, 4, 6]},
+                'd':  {'count': 5,  'vars': [2, 3, 4]},
+            },
+            'ofa_mbv3_d234_e346_k357_w1.2': {
+                'ks':  {'count': 20, 'vars': [3, 5, 7]},
+                'e':  {'count': 20, 'vars': [3, 4, 6]},
+                'd':  {'count': 5,  'vars': [2, 3, 4]},
+            },
+            'transformer_lt_wmt_en_de': {
+                'encoder_embed_dim': {'count': 1, 'vars': [640, 512]},
+                'decoder_embed_dim': {'count': 1, 'vars': [640, 512]},
+                'encoder_ffn_embed_dim': {'count': 6, 'vars': [3072, 2048, 1024]},
+                'decoder_ffn_embed_dim': {'count': 6, 'vars': [3072, 2048, 1024]},
+                'decoder_layer_num': {'count': 1, 'vars': [6, 5, 4, 3, 2, 1]},
+                'encoder_self_attention_heads': {'count': 6, 'vars': [8, 4]},
+                'decoder_self_attention_heads': {'count': 6, 'vars': [8, 4]},
+                'decoder_ende_attention_heads': {'count': 6, 'vars': [8, 4]},
+                'decoder_arbitrary_ende_attn': {'count': 6, 'vars': [-1, 1, 2]},
+            },
         }
         self.RUNNERS = {
             'ofa_resnet50': OFARunner,
@@ -76,19 +81,19 @@ def __init__(self, conf_fname_or_obj):
         self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50,
                                      'ofa_mbv3_d234_e346_k357_w1.0': EvaluationInterfaceMobileNetV3,
                                      'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3,
-                                      'transformer_lt_wmt_en_de': EvaluationInterfaceTransformerLT}
+                                     'transformer_lt_wmt_en_de': EvaluationInterfaceTransformerLT}
 
         self.LINAS_INNERLOOP_EVALS = {'ofa_resnet50': 5000,
                                       'ofa_mbv3_d234_e346_k357_w1.0': 20000,
                                       'ofa_mbv3_d234_e346_k357_w1.2': 20000,
                                       'transformer_lt_wmt_en_de': 10000}
-        
+
         self.SUPERNET_ENCODING = {
-        'ofa_resnet50': ParameterManager,
-        'ofa_mbv3_d234_e346_k357_w1.0': ParameterManager,
-        'ofa_mbv3_d234_e346_k357_w1.2': ParameterManager,
-        'ofa_proxyless_d234_e346_k357_w1.3': ParameterManager,
-        'transformer_lt_wmt_en_de': TransformerLTEncoding,
+            'ofa_resnet50': ParameterManager,
+            'ofa_mbv3_d234_e346_k357_w1.0': ParameterManager,
+            'ofa_mbv3_d234_e346_k357_w1.2': ParameterManager,
+            'ofa_proxyless_d234_e346_k357_w1.3': ParameterManager,
+            'transformer_lt_wmt_en_de': TransformerLTEncoding,
         }
 
         super().__init__()
@@ -98,7 +103,6 @@ def __init__(self, conf_fname_or_obj):
         self.results_csv_path = None
         self.init_cfg(conf_fname_or_obj)
 
-
     def estimate(self, individual):
         self.validation_interface.eval_subnet(individual)
 
@@ -106,10 +110,10 @@ def init_for_search(self):
         self.supernet_manager = self.SUPERNET_ENCODING[self.supernet](
             param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed
         )
-        #self.supernet_manager = self.ParameterManager(
+        # self.supernet_manager = self.ParameterManager(
         #    param_dict=self.SUPERNET_PARAMETERS[self.supernet],
         #    seed=self.seed
-        #)
+        # )
 
         # Validation High-Fidelity Measurement Runner
         self.runner_validate = self.RUNNERS[self.supernet](
@@ -140,13 +144,14 @@ def search(self):
         # if number of results in results_csv_path smaller than population.
         # TODO(macsz) Create empty CSV if it does not exists.
         df = pd.read_csv(self.results_csv_path)
-        latest_population = [self.supernet_manager.random_sample() \
-            for _ in range(max(self.population - df.shape[0], 0))]
+        latest_population = [self.supernet_manager.random_sample()
+                             for _ in range(max(self.population - df.shape[0], 0))]
 
         # Start Lightweight Iterative Neural Architecture Search (LINAS)
         num_loops = round(self.num_evals/self.population)
         for loop in range(num_loops):
-            logger.info('[DyNAS-T] Starting LINAS loop {} of {}.'.format(loop+1, num_loops))
+            logger.info(
+                '[DyNAS-T] Starting LINAS loop {} of {}.'.format(loop+1, num_loops))
 
             for individual in latest_population:
                 self.validation_interface.eval_subnet(individual)
@@ -172,7 +177,7 @@ def search(self):
                 manager=self.supernet_manager,
                 metrics=self.metrics,
                 csv_path=None,
-                predictor_mode = True
+                predictor_mode=True
             )
 
             problem = self.ProblemMultiObjective(
@@ -182,19 +187,22 @@ def search(self):
             )
 
             if self.search_algo == 'age':
-                search_manager = self.SearchAlgoManager(algorithm='age', seed=self.seed)
+                search_manager = self.SearchAlgoManager(
+                    algorithm='age', seed=self.seed)
                 search_manager.configure_age(population=self.population,
-                                            num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet])
+                                             num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet])
             else:
-                search_manager = self.SearchAlgoManager(algorithm='nsga2', seed=self.seed)
+                search_manager = self.SearchAlgoManager(
+                    algorithm='nsga2', seed=self.seed)
                 search_manager.configure_nsga2(population=self.population,
-                                            num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet])
+                                               num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet])
 
             results = search_manager.run_search(problem)
 
             latest_population = results.pop.get('X')
 
-        logger.info("[DyNAS-T] Validated model architectures in file: {}".format(self.results_csv_path))
+        logger.info(
+            "[DyNAS-T] Validated model architectures in file: {}".format(self.results_csv_path))
 
         output = list()
         for individual in latest_population:
@@ -202,7 +210,7 @@ def search(self):
 
         return output
 
-    def select_model_arch(self): # pragma: no cover
+    def select_model_arch(self):  # pragma: no cover
         # model_arch_proposition intrinsically contained in
         # pymoo.minimize API of search_manager.run_search method,
         # don't have to implement it explicitly.
@@ -214,7 +222,7 @@ def create_acc_predictor(self):
             df = self.supernet_manager.import_csv(self.results_csv_path,
                                                   config='config',
                                                   objective='acc',
-                                                  column_names=['config','date','lat','macs','acc'])
+                                                  column_names=['config', 'date', 'lat', 'macs', 'acc'])
             features, labels = self.supernet_manager.create_training_set(df)
             self.acc_predictor = self.Predictor()
             self.acc_predictor.train(features, labels.ravel())
@@ -227,7 +235,7 @@ def create_macs_predictor(self):
             df = self.supernet_manager.import_csv(self.results_csv_path,
                                                   config='config',
                                                   objective='macs',
-                                                  column_names=['config','date','lat','macs','acc'])
+                                                  column_names=['config', 'date', 'lat', 'macs', 'acc'])
             features, labels = self.supernet_manager.create_training_set(df)
             self.macs_predictor = self.Predictor()
             self.macs_predictor.train(features, labels.ravel())
@@ -240,7 +248,7 @@ def create_latency_predictor(self):
             df = self.supernet_manager.import_csv(self.results_csv_path,
                                                   config='config',
                                                   objective='lat',
-                                                  column_names=['config','date','lat','macs','acc'])
+                                                  column_names=['config', 'date', 'lat', 'macs', 'acc'])
             features, labels = self.supernet_manager.create_training_set(df)
             self.latency_predictor = self.Predictor()
             self.latency_predictor.train(features, labels.ravel())
@@ -254,11 +262,11 @@ def init_cfg(self, conf_fname_or_obj):
         elif isinstance(conf_fname_or_obj, NASConfig):
             conf_fname_or_obj.validate()
             self.conf = conf_fname_or_obj.usr_cfg
-        else: # pragma: no cover
+        else:  # pragma: no cover
             raise NotImplementedError(
                 "Please provide a str path to the config file or an object of NASConfig."
             )
-        #self.init_search_cfg(self.conf.nas)
+        # self.init_search_cfg(self.conf.nas)
         assert 'dynas' in self.conf.nas, "Must specify dynas section."
         dynas_config = self.conf.nas.dynas
         self.search_algo = self.conf.nas.search.search_algorithm
@@ -269,7 +277,7 @@ def init_cfg(self, conf_fname_or_obj):
         self.dataset_path = dynas_config.dataset_path
         self.supernet_ckpt_path = dynas_config.supernet_ckpt_path
         self.batch_size = dynas_config.batch_size
-        if dynas_config.population < 10: # pragma: no cover
+        if dynas_config.population < 10:  # pragma: no cover
             raise NotImplementedError(
                 "Please specify a population size >= 10"
             )
diff --git a/neural_compressor/experimental/nas/dynast/__init__.py b/neural_compressor/experimental/nas/dynast/__init__.py
index 1c73aaf4901..369707c0ef6 100644
--- a/neural_compressor/experimental/nas/dynast/__init__.py
+++ b/neural_compressor/experimental/nas/dynast/__init__.py
@@ -13,4 +13,4 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py
index 898d85e3ab0..61956464e97 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_manager.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py
@@ -61,21 +61,25 @@ def process_param_dict(self) -> Tuple[list, list, int]:
             for i in range(options['count']):
                 parameter_upperbound.append(len(options['vars']) - 1)
                 index_simple = [x for x in range(len(options['vars']))]
-                parameter_mapper.append(dict(zip(index_simple, options['vars'])))
+                parameter_mapper.append(
+                    dict(zip(index_simple, options['vars'])))
 
-        if self.verbose: # pragma: no cover
+        if self.verbose:  # pragma: no cover
             logger.info(
-                '[DyNAS-T] Problem definition variables: {}'.format(parameter_count)
+                '[DyNAS-T] Problem definition variables: {}'.format(
+                    parameter_count)
             )
             logger.info(
-                '[DyNAS-T] Variable Upper Bound array: {}'.format(parameter_upperbound)
+                '[DyNAS-T] Variable Upper Bound array: {}'.format(
+                    parameter_upperbound)
             )
             logger.info(
                 '[DyNAS-T] Mapping dictionary created of length: {}'.format(
                     len(parameter_mapper)
                 )
             )
-            logger.info('[DyNAS-T] Parameter Bound: {}'.format(parameter_bound))
+            logger.info(
+                '[DyNAS-T] Parameter Bound: {}'.format(parameter_bound))
 
         return parameter_mapper, parameter_upperbound, parameter_count
 
@@ -138,7 +142,8 @@ def random_samples(self, size: int = 100, trial_limit: int = 100000) -> List[lis
             trials += 1
 
         if trials >= trial_limit:
-            logger.warning('[DyNAS-T] Unable to create unique list of samples.')
+            logger.warning(
+                '[DyNAS-T] Unable to create unique list of samples.')
 
         return pymoo_vector_list
 
@@ -172,7 +177,8 @@ def translate2pymoo(self, parameters: dict) -> list:
             param_counter = 0
             for i in range(value['count']):
                 output.append(
-                    self.inv_mapper[mapper_counter][parameters[key][param_counter]]
+                    self.inv_mapper[mapper_counter][parameters[key]
+                                                    [param_counter]]
                 )
                 mapper_counter += 1
                 param_counter += 1
@@ -278,32 +284,28 @@ def create_training_set(
             return features_train, features_test, labels_train, labels_test
 
 
-
-
-
-
 class TransformerLTEncoding(ParameterManager):
     def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0):
         super().__init__(param_dict, verbose, seed)
 
-    def onehot_custom(self,subnet_cfg,provide_onehot=True):
+    def onehot_custom(self, subnet_cfg, provide_onehot=True):
 
         features = []
         #import ipdb;ipdb.set_trace()
         features.extend(subnet_cfg['encoder_embed_dim'])
 
         #encoder_layer_num = subnet_cfg['encoder_layer_num']
-        encode_layer_num_int = 6#encoder_layer_num[0]
-        #features.extend(encoder_layer_num)
+        encode_layer_num_int = 6  # encoder_layer_num[0]
+        # features.extend(encoder_layer_num)
 
-        #Encoder FFN Embed Dim
+        # Encoder FFN Embed Dim
         encoder_ffn_embed_dim = subnet_cfg['encoder_ffn_embed_dim']
 
         if encode_layer_num_int < 6:
             encoder_ffn_embed_dim.extend([0]*(6-encode_layer_num_int))
         features.extend(encoder_ffn_embed_dim)
 
-        #Encoder Self-Attn Heads
+        # Encoder Self-Attn Heads
 
         encoder_self_attention_heads = subnet_cfg['encoder_self_attention_heads'][:encode_layer_num_int]
 
@@ -311,34 +313,32 @@ def onehot_custom(self,subnet_cfg,provide_onehot=True):
             encoder_self_attention_heads.extend([0]*(6-encode_layer_num_int))
         features.extend(encoder_self_attention_heads)
 
-
         features.extend(subnet_cfg['decoder_embed_dim'])
 
         decoder_layer_num = subnet_cfg['decoder_layer_num']
         decoder_layer_num_int = decoder_layer_num[0]
         features.extend(decoder_layer_num)
 
-        #Decoder FFN Embed Dim
+        # Decoder FFN Embed Dim
         decoder_ffn_embed_dim = subnet_cfg['decoder_ffn_embed_dim'][:decoder_layer_num_int]
 
         if decoder_layer_num_int < 6:
             decoder_ffn_embed_dim.extend([0]*(6-decoder_layer_num_int))
         features.extend(decoder_ffn_embed_dim)
 
-
-        #Decoder Attn Heads
+        # Decoder Attn Heads
         decoder_self_attention_heads = subnet_cfg['decoder_self_attention_heads'][:decoder_layer_num_int]
 
         if decoder_layer_num_int < 6:
-                    decoder_self_attention_heads.extend([0]*(6-decoder_layer_num_int))
+            decoder_self_attention_heads.extend([0]*(6-decoder_layer_num_int))
         features.extend(decoder_self_attention_heads)
 
-        #Decoder ENDE HEADS
+        # Decoder ENDE HEADS
 
         decoder_ende_attention_heads = subnet_cfg['decoder_ende_attention_heads'][:decoder_layer_num_int]
 
         if decoder_layer_num_int < 6:
-                    decoder_ende_attention_heads.extend([0]*(6-decoder_layer_num_int))
+            decoder_ende_attention_heads.extend([0]*(6-decoder_layer_num_int))
 
         features.extend(decoder_ende_attention_heads)
 
@@ -352,27 +352,27 @@ def onehot_custom(self,subnet_cfg,provide_onehot=True):
                 arbitrary_ende_attn_trans.append(3)
 
         if decoder_layer_num_int < 6:
-                    arbitrary_ende_attn_trans.extend([0]*(6-decoder_layer_num_int))
+            arbitrary_ende_attn_trans.extend([0]*(6-decoder_layer_num_int))
         features.extend(arbitrary_ende_attn_trans)
 
-        if provide_onehot==True:
+        if provide_onehot == True:
             examples = np.array([features])
             one_hot_count = 0
             unique_values = self.unique_values
 
-            #uncomment
-            #with open(self.onehot_unique,'rb') as f:
+            # uncomment
+            # with open(self.onehot_unique,'rb') as f:
             #    load_unique_values = pickle.load(f)
             #    unique_values = load_unique_values.tolist()
             for unique in unique_values:
                 one_hot_count += len(unique.tolist())
 
-
             one_hot_examples = np.zeros((examples.shape[0], one_hot_count))
             for e, example in enumerate(examples):
                 offset = 0
                 for f in range(len(example)):
-                    index = np.where(unique_values[f] == example[f])[0] + offset
+                    index = np.where(unique_values[f] == example[f])[
+                        0] + offset
                     one_hot_examples[e, index] = 1.0
                     offset += len(unique_values[f])
             return one_hot_examples
@@ -380,8 +380,7 @@ def onehot_custom(self,subnet_cfg,provide_onehot=True):
         else:
             return features
 
-        #return np.array(ks_onehot + ex_onehot)
-
+        # return np.array(ks_onehot + ex_onehot)
 
     def import_csv(
         self,
@@ -390,7 +389,7 @@ def import_csv(
         objective: str,
         column_names: List[str] = None,
         drop_duplicates: bool = True,
-     ) -> pd.DataFrame:
+    ) -> pd.DataFrame:
         '''
         Import a csv file generated from a supernetwork search for the purpose
         of training a predictor.
@@ -427,7 +426,8 @@ def import_csv(
             config_as_pymoo = self.translate2pymoo(config_as_dict)
             convert_to_pymoo.append(config_as_pymoo)
             # Onehot predictor format
-            config_as_onehot = self.onehot_custom(config_as_dict,provide_onehot=False)
+            config_as_onehot = self.onehot_custom(
+                config_as_dict, provide_onehot=False)
             convert_to_onehot.append(config_as_onehot)
         #import ipdb;ipdb.set_trace()
         df[config] = convert_to_dict
@@ -436,7 +436,7 @@ def import_csv(
 
         return df
 
-    #@staticmethod
+    # @staticmethod
     def create_training_set(
         self,
         dataframe: pd.DataFrame,
@@ -453,7 +453,8 @@ def create_training_set(
         for i in range(len(dataframe)):
             collect_rows.append(np.asarray(dataframe['config_onehot'].iloc[i]))
         features = np.asarray(collect_rows)
-        labels = dataframe.drop(columns=['config', 'config_pymoo', 'config_onehot']).values
+        labels = dataframe.drop(
+            columns=['config', 'config_pymoo', 'config_onehot']).values
 
         assert len(features) == len(labels)
         one_hot_count = 0
@@ -485,5 +486,3 @@ def create_training_set(
                 )
             )
             return features_train, features_test, labels_train, labels_test
-
-
diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
index a92c0bef5c2..1a47837d153 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
@@ -49,7 +49,8 @@ def __init__(
         # Create lists of regressors and associated hyper-parameters
         regressors = [
             linear_model.Ridge(max_iter=max_iterations),
-            svm.SVR(kernel='rbf', gamma='auto', epsilon=0.0, max_iter=max_iterations),
+            svm.SVR(kernel='rbf', gamma='auto',
+                    epsilon=0.0, max_iter=max_iterations),
         ]
         hyper_parameters = [{'alpha': alphas}, {'C': cost_factors}]
 
@@ -67,7 +68,6 @@ def __init__(
             )
 
     def train(self, examples, labels):
-
         '''
         Trains the predictor on the specified examples and labels using the underlying regressor.
         Parameters
@@ -82,7 +82,8 @@ def train(self, examples, labels):
         # Compute normalization factor
         max_label = np.amax(np.abs(labels))
         if max_label > 0.0:
-            self.normalization_factor = 10 ** (np.floor(np.log10(max_label)) - 1.0)
+            self.normalization_factor = 10 ** (
+                np.floor(np.log10(max_label)) - 1.0)
         else:
             self.normalization_factor = 1.0
 
diff --git a/neural_compressor/experimental/nas/dynast/dynas_search.py b/neural_compressor/experimental/nas/dynast/dynas_search.py
index a19ec5075f5..fc3d5123450 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_search.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_search.py
@@ -62,9 +62,10 @@ def __init__(
         elif self.algorithm == 'age':
             self.configure_age()
             self.engine = 'pymoo'
-        else: # pragma: no cover
+        else:  # pragma: no cover
             logger.error(
-                '[DyNAS-T] algorithm "{}" not implemented.'.format(self.algorithm)
+                '[DyNAS-T] algorithm "{}" not implemented.'.format(
+                    self.algorithm)
             )
             raise NotImplementedError
 
@@ -89,8 +90,10 @@ def configure_nsga2(
         self.algorithm_def = NSGA2(
             pop_size=population,
             sampling=sample_strategy,
-            crossover=get_crossover("int_sbx", prob=crossover_prob, eta=crossover_eta),
-            mutation=get_mutation("int_pm", prob=mutation_prob, eta=mutation_eta),
+            crossover=get_crossover(
+                "int_sbx", prob=crossover_prob, eta=crossover_eta),
+            mutation=get_mutation(
+                "int_pm", prob=mutation_prob, eta=mutation_eta),
             eliminate_duplicates=True,
         )
 
@@ -116,8 +119,10 @@ def configure_age(
         self.algorithm_def = AGEMOEA(
             pop_size=population,
             sampling=sample_strategy,
-            crossover=get_crossover("int_sbx", prob=crossover_prob, eta=crossover_eta),
-            mutation=get_mutation("int_pm", prob=mutation_prob, eta=mutation_eta),
+            crossover=get_crossover(
+                "int_sbx", prob=crossover_prob, eta=crossover_eta),
+            mutation=get_mutation(
+                "int_pm", prob=mutation_prob, eta=mutation_eta),
             eliminate_duplicates=True,
         )
 
@@ -143,7 +148,7 @@ def run_search(
                 save_history=save_history,
                 verbose=self.verbose,
             )
-        else: # pragma: no cover
+        else:  # pragma: no cover
             logger.error('[DyNAS-T] Invalid algorithm engine configuration!')
             raise NotImplementedError
 
@@ -200,7 +205,8 @@ def _evaluate(
         # Measure new individuals
         for i in range(len(x)):
 
-            _, objective_x, objective_y = self.evaluation_interface.eval_subnet(x[i])
+            _, objective_x, objective_y = self.evaluation_interface.eval_subnet(
+                x[i])
 
             objective_x_arr.append(objective_x)
             objective_y_arr.append(objective_y)
diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index e15ca455005..2149860ff8e 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -36,7 +36,7 @@
 from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import (
     compute_bleu,
     compute_latency
-) 
+)
 torch = LazyImport('torch')
 torchvision = LazyImport('torchvision')
 
@@ -198,7 +198,7 @@ def estimate_latency(
     def validate_top1(
         self,
         subnet_cfg: dict,
-    ) -> float: # pragma: no cover
+    ) -> float:  # pragma: no cover
         subnet = self.get_subnet(subnet_cfg)
         folder_name = '.torch/tmp-{}'.format(uuid.uuid1().hex)
         run_manager = RunManager(
@@ -207,7 +207,8 @@ def validate_top1(
         run_manager.reset_running_statistics(net=subnet)
 
         # Test sampled subnet
-        self.run_config.data_provider.assign_active_img_size(subnet_cfg['r'][0])
+        self.run_config.data_provider.assign_active_img_size(
+            subnet_cfg['r'][0])
         loss, acc = run_manager.validate(net=subnet, no_logs=False)
         top1 = acc[0]
         return top1
@@ -252,7 +253,8 @@ def measure_latency(
             measure_steps=measure_steps,
             device=self.device,
         )
-        logger.info('Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
+        logger.info(
+            'Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
 
         return latency_mean, latency_std
 
@@ -274,8 +276,6 @@ def get_subnet(
         return self.subnet
 
 
-
-
 class TransformerLTRunner(Runner):
     """The OFARunner class manages the sub-network selection from the OFA super-network and
     the validation measurements of the sub-networks. ResNet50, MobileNetV3 w1.0, and MobileNetV3 w1.2
@@ -326,9 +326,10 @@ def estimate_latency(
     def validate_bleu(
         self,
         subnet_cfg: dict,
-    ) -> float: # pragma: no cover
-        
-        bleu = compute_bleu(subnet_cfg,self.dataset_path,self.checkpoint_path)
+    ) -> float:  # pragma: no cover
+
+        bleu = compute_bleu(subnet_cfg, self.dataset_path,
+                            self.checkpoint_path)
         return bleu
 
     def validate_macs(
@@ -362,13 +363,13 @@ def measure_latency(
         Returns:
             mean latency; std latency
         """
-       
-        latency_mean, latency_std = compute_latency(subnet_cfg,self.dataset_path)
-        logger.info('Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
-
-        return latency_mean, latency_std
 
+        latency_mean, latency_std = compute_latency(
+            subnet_cfg, self.dataset_path)
+        logger.info(
+            'Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
 
+        return latency_mean, latency_std
 
 
 class EvaluationInterface:
@@ -409,7 +410,8 @@ def clear_csv(self) -> None:
         if self.csv_path:
             f = open(self.csv_path, "w")
             writer = csv.writer(f)
-            result = ['Sub-network', 'Date', 'Latency (ms)', ' MACs', 'Top-1 Acc (%)']
+            result = ['Sub-network', 'Date',
+                      'Latency (ms)', ' MACs', 'Top-1 Acc (%)']
             writer.writerow(result)
             f.close()
 
@@ -505,11 +507,14 @@ def eval_subnet(
         # Always evaluate/predict top1
         lat, macs = 0, 0
         if self.predictor_mode == True:
-            top1 = self.evaluator.estimate_accuracy_top1(self.manager.onehot_generic(x).reshape(1,-1))[0]
+            top1 = self.evaluator.estimate_accuracy_top1(
+                self.manager.onehot_generic(x).reshape(1, -1))[0]
             if 'macs' in self.metrics:
-                macs = self.evaluator.estimate_macs(self.manager.onehot_generic(x).reshape(1,-1))[0]
+                macs = self.evaluator.estimate_macs(
+                    self.manager.onehot_generic(x).reshape(1, -1))[0]
             if 'lat' in self.metrics:
-                lat = self.evaluator.estimate_latency(self.manager.onehot_generic(x).reshape(1,-1))[0]
+                lat = self.evaluator.estimate_latency(
+                    self.manager.onehot_generic(x).reshape(1, -1))[0]
         else:
             top1 = self.evaluator.validate_top1(subnet_sample)
             macs = self.evaluator.validate_macs(subnet_sample)
@@ -531,7 +536,6 @@ def eval_subnet(
             return sample, macs, -top1
 
 
-
 class EvaluationInterfaceTransformerLT(EvaluationInterface):
     def __init__(
         self,
@@ -553,7 +557,7 @@ def eval_subnet(
         sample = {
             'encoder': {
                 'encoder_embed_dim': param_dict['encoder_embed_dim'][0],
-                'encoder_layer_num': 6,#param_dict['encoder_layer_num'][0],
+                'encoder_layer_num': 6,  # param_dict['encoder_layer_num'][0],
                 'encoder_ffn_embed_dim': param_dict['encoder_ffn_embed_dim'],
                 'encoder_self_attention_heads': param_dict['encoder_self_attention_heads'],
             },
@@ -563,20 +567,23 @@ def eval_subnet(
                 'decoder_ffn_embed_dim': param_dict['decoder_ffn_embed_dim'],
                 'decoder_self_attention_heads': param_dict['decoder_self_attention_heads'],
                 'decoder_ende_attention_heads': param_dict['decoder_ende_attention_heads'],
-                'decoder_arbitrary_ende_attn':param_dict['decoder_arbitrary_ende_attn']
-            }
+                'decoder_arbitrary_ende_attn': param_dict['decoder_arbitrary_ende_attn']
             }
+        }
 
         subnet_sample = copy.deepcopy(sample)
 
         # Always evaluate/predict top1
         lat, macs = 0, 0
         if self.predictor_mode == True:
-            bleu = self.evaluator.estimate_accuracy_bleu(self.manager.onehot_custom(param_dict).reshape(1,-1))[0]
+            bleu = self.evaluator.estimate_accuracy_bleu(
+                self.manager.onehot_custom(param_dict).reshape(1, -1))[0]
             if 'macs' in self.metrics:
-                macs = self.evaluator.estimate_macs(self.manager.onehot_custom(param_dict).reshape(1,-1))[0]
+                macs = self.evaluator.estimate_macs(
+                    self.manager.onehot_custom(param_dict).reshape(1, -1))[0]
             if 'lat' in self.metrics:
-                lat = self.evaluator.estimate_latency(self.manager.onehot_custom(param_dict).reshape(1,-1))[0]
+                lat = self.evaluator.estimate_latency(
+                    self.manager.onehot_custom(param_dict).reshape(1, -1))[0]
         else:
             bleu = self.evaluator.validate_bleu(subnet_sample)
             macs = self.evaluator.validate_macs(subnet_sample)
@@ -587,7 +594,7 @@ def eval_subnet(
             with open(self.csv_path, 'a') as f:
                 writer = csv.writer(f)
                 date = str(datetime.now())
-                result = [param_dict, date, lat, macs, bleu,]
+                result = [param_dict, date, lat, macs, bleu, ]
                 writer.writerow(result)
 
         # PyMoo only minimizes objectives, thus accuracy needs to be negative
@@ -598,9 +605,6 @@ def eval_subnet(
             return sample, macs, -bleu
 
 
-
-
-
 def get_torchvision_model(
     model_name: str,
 ) -> torch.nn.Module:
@@ -608,14 +612,15 @@ def get_torchvision_model(
         model = getattr(torchvision.models, model_name)(pretrained=True)
         model.eval()
         return model
-    except AttributeError as ae: # pragma: no cover
+    except AttributeError as ae:  # pragma: no cover
         logger.error(
             'Model {model_name} not available. This can be due to either a typo or the model is not '
             'available in torchvision=={torchvision_version}. \nAvailable models: {available_models}'.format(
                 model_name=model_name,
                 torchvision_version=torchvision.__version__,
                 available_models=', '.join(
-                    [m for m in dir(torchvision.models) if not m.startswith('_')]
+                    [m for m in dir(torchvision.models)
+                     if not m.startswith('_')]
                 ),
             )
         )
@@ -652,7 +657,7 @@ def __init__(
         # separately to avoid modifications to the model being passed between calls.
         get_torchvision_model(model_name=self.model_name)
 
-    def validate_top1(self) -> Tuple[float, float, float]: # pragma: no cover
+    def validate_top1(self) -> Tuple[float, float, float]:  # pragma: no cover
         ImagenetDataProvider.DEFAULT_PATH = self.dataset_path
         model = get_torchvision_model(model_name=self.model_name)
         run_config = ImagenetRunConfig(test_batch_size=64, n_worker=20)
diff --git a/neural_compressor/experimental/nas/nas.py b/neural_compressor/experimental/nas/nas.py
index cbdf9c17ae3..390ef8ada46 100644
--- a/neural_compressor/experimental/nas/nas.py
+++ b/neural_compressor/experimental/nas/nas.py
@@ -39,13 +39,13 @@ def __new__(self, conf_fname_or_obj, *args, **kwargs):
         elif isinstance(conf_fname_or_obj, Config):
             self.conf = NASConfig()
             self.conf.map_pyconfig_to_cfg(conf_fname_or_obj)
-        else: # pragma: no cover
+        else:  # pragma: no cover
             raise NotImplementedError(
                 "Please provide a str path to the config file."
             )
         assert self.conf.usr_cfg.nas is not None, "nas section must be set"
         if isinstance(self.conf.usr_cfg.nas.approach, str) and \
-            self.conf.usr_cfg.nas.approach.lower() in NASMethods:
+                self.conf.usr_cfg.nas.approach.lower() in NASMethods:
             method = self.conf.usr_cfg.nas.approach.lower()
         else:
             logger.warning(
@@ -110,29 +110,38 @@ def search(self, res_save_path=None):
                 )
             )
             model_arch_paras = self.select_model_arch()
-            logger.info("Model architecture {} proposed.".format(model_arch_paras))
+            logger.info(
+                "Model architecture {} proposed.".format(model_arch_paras))
             model = self._model_builder(model_arch_paras)
             model_paras = self.count_model_parameters(model)
             logger.info(
-                "***** Number of model parameters: {:.2f}M *****".format(model_paras / 10**6)
+                "***** Number of model parameters: {:.2f}M *****".format(
+                    model_paras / 10**6)
             )
-            self.model_paras_num[tuple(model_arch_paras.values())] = model_paras
+            self.model_paras_num[tuple(
+                model_arch_paras.values())] = model_paras
             if tuple(model_arch_paras.values()) in self.search_results:
-                logger.info("Skip evaluated model architecture {}.".format(model_arch_paras))
+                logger.info(
+                    "Skip evaluated model architecture {}.".format(model_arch_paras))
                 continue
             if tuple(model_arch_paras.values()) in self.resumed_search_results:
                 logger.info(
-                    "Find previous results of model architecture: {}.".format(model_arch_paras)
+                    "Find previous results of model architecture: {}.".format(
+                        model_arch_paras)
                 )
-                metrics = self.resumed_search_results[tuple(model_arch_paras.values())]
+                metrics = self.resumed_search_results[tuple(
+                    model_arch_paras.values())]
             else:
-                logger.info("Assessing model architecture: {}.".format(model_arch_paras))
+                logger.info(
+                    "Assessing model architecture: {}.".format(model_arch_paras))
                 metrics = self.estimate(model)
             logger.info(
-                "Metrics of model architecture {} is {}.".format(model_arch_paras, metrics)
+                "Metrics of model architecture {} is {}.".format(
+                    model_arch_paras, metrics)
             )
             self.search_results[tuple(model_arch_paras.values())] = metrics
-            self._search_algorithm.get_feedback(sum(self.metrics_conversion(metrics)))
+            self._search_algorithm.get_feedback(
+                sum(self.metrics_conversion(metrics)))
             self.dump_search_results(
                 os.path.join(save_path, 'Trial_{}_results.txt'.format(i+1))
             )
@@ -141,9 +150,12 @@ def search(self, res_save_path=None):
             if model_arch_vec not in self.search_results:
                 self.search_results[model_arch_vec] = \
                     self.resumed_search_results[model_arch_vec]
-                model = self._model_builder(self.params_vec2params_dict(model_arch_vec))
-                self.model_paras_num[model_arch_vec] = self.count_model_parameters(model)
-        self.dump_search_results(os.path.join(save_path, 'Final_results.txt'.format(i+1)))
+                model = self._model_builder(
+                    self.params_vec2params_dict(model_arch_vec))
+                self.model_paras_num[model_arch_vec] = self.count_model_parameters(
+                    model)
+        self.dump_search_results(os.path.join(
+            save_path, 'Final_results.txt'.format(i+1)))
         self.find_best_model_archs()
         logger.info(
             "{fix} Found {n} best model architectures {fix}".format(
@@ -151,10 +163,11 @@ def search(self, res_save_path=None):
             )
         )
         for i, model_arch in enumerate(self.best_model_archs):
-            logger.info("Best model architecture {}: {}".format(i+1, model_arch))
+            logger.info(
+                "Best model architecture {}: {}".format(i+1, model_arch))
         return self.best_model_archs
 
-    def estimate(self, model): # pragma: no cover
+    def estimate(self, model):  # pragma: no cover
         """Estimate performance of the model. Depends on specific NAS algorithm.
 
         Returns:
@@ -166,57 +179,63 @@ def count_model_parameters(self, model):
         if isinstance(model, torch.nn.Module):
             return sum(p.numel() for p in model.parameters())
         else:
-            raise NotImplementedError("Only support torch model now.") # pragma: no cover
+            raise NotImplementedError(
+                "Only support torch model now.")  # pragma: no cover
 
     def load_search_results(self, path):
         self.resumed_search_results = {}
         lastest_results_record = os.path.join(path, 'lastest_results.npy')
         if not os.path.exists(path) or not os.path.exists(lastest_results_record):
             return
-        self.resumed_search_results = np.load(lastest_results_record, allow_pickle=True).item()
+        self.resumed_search_results = np.load(
+            lastest_results_record, allow_pickle=True).item()
         os.makedirs(os.path.join(path, 'previous_results'), exist_ok=True)
         for f in os.listdir(path):
             if os.path.isfile(os.path.join(path, f)):
-                shutil.move(os.path.join(path, f), os.path.join(path, 'previous_results', f))
+                shutil.move(os.path.join(path, f), os.path.join(
+                    path, 'previous_results', f))
         logger.info("Loaded previous results.")
 
     def dump_search_results(self, path):
-        lastest_results_record = os.path.join(os.path.dirname(path), 'lastest_results.npy')
+        lastest_results_record = os.path.join(
+            os.path.dirname(path), 'lastest_results.npy')
         np.save(lastest_results_record, self.search_results, allow_pickle=True)
         write_contents = '=' * 30 + ' All Search Results ' + '=' * 30 + '\n\n'
         for model_arch_vec in self.search_results:
-            tmp = ','.join(['{}_{}'.format(k, v) \
-                for k, v in zip(self.search_space_keys, model_arch_vec)])
+            tmp = ','.join(['{}_{}'.format(k, v)
+                            for k, v in zip(self.search_space_keys, model_arch_vec)])
             write_contents += '{}: {} Paras: {}M\n'.format(
                 tmp, self.search_results[model_arch_vec],
                 self.model_paras_num[model_arch_vec] / 10**6
             )
-        write_contents += '\n\n\n' + '=' * 30 + ' Best Search Results ' + '=' * 30 + '\n\n'
+        write_contents += '\n\n\n' + '=' * 30 + \
+            ' Best Search Results ' + '=' * 30 + '\n\n'
         self.find_best_model_archs()
         for i, model_arch in enumerate(self.best_model_archs):
             model_arch_vec = tuple(model_arch.values())
-            tmp = ','.join(['{}_{}'.format(k, v) \
-                for k, v in zip(self.search_space_keys, model_arch_vec)])
+            tmp = ','.join(['{}_{}'.format(k, v)
+                            for k, v in zip(self.search_space_keys, model_arch_vec)])
             write_contents += \
                 '{}. {}: {} Paras: {}M\n'.format(
                     i+1, tmp, self.search_results[model_arch_vec],
                     self.model_paras_num[model_arch_vec] / 10**6
-            )
+                )
         with open(path, mode='w') as f:
             f.write(write_contents)
 
     def params_vec2params_dict(self, paras_vec):
         assert len(paras_vec) == len(self.search_space_keys), \
             "Length of paras_vec and search_space_keys should be the same."
-        return {k:v for k, v in zip(self.search_space_keys, paras_vec)}
+        return {k: v for k, v in zip(self.search_space_keys, paras_vec)}
 
     def find_best_model_archs(self):
         assert len(self.search_results) > 0, "Zero result in search_results."
         model_arches = list(self.search_results.keys())
-        metrics = [self.metrics_conversion(self.search_results[ma]) for ma in model_arches]
+        metrics = [self.metrics_conversion(
+            self.search_results[ma]) for ma in model_arches]
         pareto_front_indices = find_pareto_front(metrics)
-        self.best_model_archs = [self.params_vec2params_dict(model_arches[i]) \
-            for i in pareto_front_indices]
+        self.best_model_archs = [self.params_vec2params_dict(model_arches[i])
+                                 for i in pareto_front_indices]
 
     def metrics_conversion(self, metrics):
         if not isinstance(metrics, Iterable):
@@ -228,11 +247,11 @@ def metrics_conversion(self, metrics):
                 "Keys of metrics not match with metrics in the configuration."
             metrics = list(metrics.values())
         if self.higher_is_better is None:
-            self.higher_is_better = [True,] * len(metrics)
-            logger.warning("higher_is_better not set in the configuration, " + \
-                "set it to all True for every metric entry by default.")
-        converted_metrics = [metric if higher_is_better else -metric \
-            for metric, higher_is_better in zip(metrics, self.higher_is_better)]
+            self.higher_is_better = [True, ] * len(metrics)
+            logger.warning("higher_is_better not set in the configuration, " +
+                           "set it to all True for every metric entry by default.")
+        converted_metrics = [metric if higher_is_better else -metric
+                             for metric, higher_is_better in zip(metrics, self.higher_is_better)]
         return converted_metrics
 
     def init_search_cfg(self, config):
@@ -260,18 +279,21 @@ def init_search_cfg(self, config):
             if self.search_cfg.higher_is_better else None
         self.seed = self.search_cfg.seed
         self.max_trials = self.search_cfg.max_trials \
-            if self.search_cfg.max_trials is not None else 3 # set default 3 for max_trials
+            if self.search_cfg.max_trials is not None else 3  # set default 3 for max_trials
         self.search_algorithm_type = self.search_cfg.search_algorithm \
             if self.search_cfg.search_algorithm else None
         if not self.search_algorithm_type:
-            self._search_algorithm = BayesianOptimizationSearcher(self.search_space, self.seed)
+            self._search_algorithm = BayesianOptimizationSearcher(
+                self.search_space, self.seed)
         elif self.search_algorithm_type.lower() == 'grid':
             self._search_algorithm = GridSearcher(self.search_space)
         elif self.search_algorithm_type.lower() == 'random':
-            self._search_algorithm = RandomSearcher(self.search_space, self.seed)
+            self._search_algorithm = RandomSearcher(
+                self.search_space, self.seed)
         elif self.search_algorithm_type.lower() == 'bo':
-            self._search_algorithm = BayesianOptimizationSearcher(self.search_space, self.seed)
-        else: # pragma: no cover
+            self._search_algorithm = BayesianOptimizationSearcher(
+                self.search_space, self.seed)
+        else:  # pragma: no cover
             logger.warning(
                 'Please be aware that \'{}\' is not a built-in search algorithm.'.format(
                     self.search_algorithm_type
@@ -303,4 +325,4 @@ def model_builder(self, model_builder):
         self._model_builder = model_builder
 
     def __repr__(self):
-        return 'Base Class of NAS' # pragma: no cover
\ No newline at end of file
+        return 'Base Class of NAS'  # pragma: no cover
diff --git a/neural_compressor/experimental/nas/nas_utils.py b/neural_compressor/experimental/nas/nas_utils.py
index 1b19f0cd871..139cf818338 100644
--- a/neural_compressor/experimental/nas/nas_utils.py
+++ b/neural_compressor/experimental/nas/nas_utils.py
@@ -30,6 +30,7 @@ def nas_registry(nas_method):
         cls: The class of register.
     """
     assert isinstance(nas_method, str), "Expect nas_method to be a string."
+
     def decorator(cls):
         NASMethods[nas_method.lower()] = cls
         return cls
@@ -68,4 +69,4 @@ def find_pareto_front(metrics):
         pareto_front_point_indices = pareto_front_point_indices[nondominated_points]
         metrics = metrics[nondominated_points]
         next_point_idx = np.sum(nondominated_points[:next_point_idx+1])
-    return pareto_front_point_indices
\ No newline at end of file
+    return pareto_front_point_indices
diff --git a/neural_compressor/experimental/nas/search_algorithms.py b/neural_compressor/experimental/nas/search_algorithms.py
index c3475c53eee..35cb8a0b3b3 100644
--- a/neural_compressor/experimental/nas/search_algorithms.py
+++ b/neural_compressor/experimental/nas/search_algorithms.py
@@ -29,10 +29,12 @@ def __init__(self, search_space) -> None:
         self.search_space_keys = sorted(search_space.keys())
         for k in self.search_space_keys:
             assert isinstance(self.search_space[k], (list, tuple)), \
-                "Value of key \'{}\' must be a list or tuple to specify choices".format(k)
+                "Value of key \'{}\' must be a list or tuple to specify choices".format(
+                    k)
 
     def suggest(self):
-        raise NotImplementedError('Depends on specific search algorithm.') # pragma: no cover
+        raise NotImplementedError(
+            'Depends on specific search algorithm.')  # pragma: no cover
 
     def get_feedback(self, metric):
         pass
@@ -74,8 +76,10 @@ def suggest(self):
 class BayesianOptimizationSearcher(Searcher):
     def __init__(self, search_space, seed=42) -> None:
         super(BayesianOptimizationSearcher, self).__init__(search_space)
-        idx_search_space = {k: (0, len(search_space[k])-1) for k in self.search_space_keys}
-        self.bo_agent = BayesianOptimization(idx_search_space, random_seed=seed)
+        idx_search_space = {
+            k: (0, len(search_space[k])-1) for k in self.search_space_keys}
+        self.bo_agent = BayesianOptimization(
+            idx_search_space, random_seed=seed)
         self.last_param_indices = None
 
     def suggest(self):
@@ -88,7 +92,7 @@ def get_feedback(self, metric):
             "to get parameters and the input metric is corresponding to this parameters."
         try:
             self.bo_agent._space.register(self.last_param_indices, metric)
-        except KeyError: # pragma: no cover
+        except KeyError:  # pragma: no cover
             logger.debug("Find registered params, skip it.")
             pass
         self.last_param_indices = None
@@ -99,4 +103,4 @@ def indices2params_vec(self, indices):
             # keep ind within the index range of self.search_space[key]
             ind = int(min(max(round(ind), 0), len(self.search_space[key])-1))
             res.append(self.search_space[key][ind])
-        return res
\ No newline at end of file
+        return res
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
index ef4cbddc952..61aeb409ae5 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
@@ -39,12 +39,14 @@ def set_incremental_state(module, incremental_state, key, value):
         full_key = _get_full_incremental_state_key(module, key)
         incremental_state[full_key] = value
 
+
 class EmbeddingSuper(nn.Embedding):
     def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs):
         super().__init__(num_embeddings, super_embed_dim, padding_idx, *args, **kwargs)
 
         # the largest embed dim
-        self.super_embed_dim = {'encoder': super_embed_dim, 'decoder': super_embed_dim}
+        self.super_embed_dim = {
+            'encoder': super_embed_dim, 'decoder': super_embed_dim}
 
         # the current sampled embed dim
         self.sample_embed_dim = {'encoder': None, 'decoder': None}
@@ -119,7 +121,8 @@ def set_sample_config(self, sample_in_dim, sample_out_dim):
         self._sample_parameters()
 
     def _sample_parameters(self):
-        self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim)
+        self.samples['weight'] = sample_weight(
+            self.weight, self.sample_in_dim, self.sample_out_dim)
         self.samples['bias'] = self.bias
         if self.bias is not None:
             self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
@@ -153,6 +156,7 @@ def sample_bias(bias, sample_out_dim):
 
     return sample_bias
 
+
 def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False):
     if not export and torch.cuda.is_available():
         try:
@@ -203,7 +207,6 @@ def calc_sampled_param_num(self):
         return self.samples['weight'].numel() + self.samples['bias'].numel()
 
 
-
 class MultiheadAttentionSuper(nn.Module):
     """Multi-headed attention.
 
@@ -215,8 +218,6 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe
                  encoder_decoder_attention=False, out_dim=None, qkv_dim=None):
         super().__init__()
 
-
-
         # the configs of super arch
         self.super_q_embed_dim = super_embed_dim
         self.super_kv_embed_dim = None
@@ -255,11 +256,15 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe
                                                              'value to be of the same size'
 
         if self.qkv_same_dim:
-            self.in_proj_weight = Parameter(torch.Tensor(3 * self.qkv_dim, self.super_q_embed_dim))
+            self.in_proj_weight = Parameter(torch.Tensor(
+                3 * self.qkv_dim, self.super_q_embed_dim))
         else:
-            self.k_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_kv_embed_dim))
-            self.v_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_kv_embed_dim))
-            self.q_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_q_embed_dim))
+            self.k_proj_weight = Parameter(torch.Tensor(
+                self.qkv_dim, self.super_kv_embed_dim))
+            self.v_proj_weight = Parameter(torch.Tensor(
+                self.qkv_dim, self.super_kv_embed_dim))
+            self.q_proj_weight = Parameter(torch.Tensor(
+                self.qkv_dim, self.super_q_embed_dim))
 
         if bias:
             self.in_proj_bias = Parameter(torch.Tensor(3 * self.qkv_dim))
@@ -268,7 +273,8 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe
 
         if out_dim is None:
             out_dim = self.super_q_embed_dim
-        self.out_proj = LinearSuper(super_in_dim=self.qkv_dim, super_out_dim=out_dim, bias=bias)
+        self.out_proj = LinearSuper(
+            super_in_dim=self.qkv_dim, super_out_dim=out_dim, bias=bias)
 
         if add_bias_kv:
             self.bias_k = Parameter(torch.Tensor(1, 1, self.super_q_embed_dim))
@@ -278,7 +284,6 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe
 
         self.add_zero_attn = add_zero_attn
 
-
         self.reset_parameters()
 
         self.onnx_trace = False
@@ -302,9 +307,6 @@ def calc_sampled_param_num(self):
 
         return in_proj_q_weight_numel + in_proj_k_weight_numel + in_proj_v_weight_numel + in_proj_bias_numel
 
-
-
-
     def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None):
         self.sample_q_embed_dim = sample_q_embed_dim
         if sample_kv_embed_dim is None:
@@ -314,11 +316,12 @@ def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_k
 
         self.num_heads = sample_attention_heads
         self.head_dim = self.qkv_dim // self.num_heads
-        assert self.head_dim * self.num_heads == self.qkv_dim, "qkv_dim must be divisible by sampled num_heads"
+        assert self.head_dim * \
+            self.num_heads == self.qkv_dim, "qkv_dim must be divisible by sampled num_heads"
         self.scaling = self.head_dim ** -0.5
 
-        self.out_proj.set_sample_config(sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim)
-
+        self.out_proj.set_sample_config(
+            sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim)
 
     def prepare_for_onnx_export_(self):
         self.onnx_trace = True
@@ -340,7 +343,6 @@ def reset_parameters(self):
         if self.bias_v is not None:
             nn.init.xavier_normal_(self.bias_v)
 
-
     def forward(self, query, key, value, key_padding_mask=None, incremental_state=None,
                 need_weights=True, static_kv=False, attn_mask=None):
         """Input shape: Time x Batch x Channel
@@ -351,7 +353,6 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
         batch x src_len, where padding elements are indicated by 1s.
         """
 
-
         tgt_len, bsz, embed_dim = query.size()
 
         if incremental_state is not None:
@@ -390,7 +391,8 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
             k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
             v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
             if attn_mask is not None:
-                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
             if key_padding_mask is not None:
                 key_padding_mask = torch.cat(
                     [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1)
@@ -405,19 +407,23 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
         if saved_state is not None:
             # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
             if 'prev_key' in saved_state:
-                prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim)
+                prev_key = saved_state['prev_key'].view(
+                    bsz * self.num_heads, -1, self.head_dim)
                 if static_kv:
                     k = prev_key
                 else:
                     k = torch.cat((prev_key, k), dim=1)
             if 'prev_value' in saved_state:
-                prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim)
+                prev_value = saved_state['prev_value'].view(
+                    bsz * self.num_heads, -1, self.head_dim)
                 if static_kv:
                     v = prev_value
                 else:
                     v = torch.cat((prev_value, v), dim=1)
-            saved_state['prev_key'] = k.view(bsz, self.num_heads, -1, self.head_dim)
-            saved_state['prev_value'] = v.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state['prev_key'] = k.view(
+                bsz, self.num_heads, -1, self.head_dim)
+            saved_state['prev_value'] = v.view(
+                bsz, self.num_heads, -1, self.head_dim)
 
             self._set_input_buffer(incremental_state, saved_state)
 
@@ -429,26 +435,31 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
             key_padding_mask = None
 
         if key_padding_mask is not None:
-            fil = key_padding_mask.new_ones(key_padding_mask.size(0), src_len-key_padding_mask.size(1))
+            fil = key_padding_mask.new_ones(
+                key_padding_mask.size(0), src_len-key_padding_mask.size(1))
             key_padding_mask = torch.cat((key_padding_mask, fil), dim=1)
             assert key_padding_mask.size(0) == bsz
             assert key_padding_mask.size(1) == src_len
 
-
         if self.add_zero_attn:
             src_len += 1
-            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
-            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            k = torch.cat(
+                [k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat(
+                [v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
             if attn_mask is not None:
-                attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1)
             if key_padding_mask is not None:
                 key_padding_mask = torch.cat(
                     [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1)
 
         attn_weights = torch.bmm(q, k.transpose(1, 2))
-        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
+        attn_weights = self.apply_sparse_mask(
+            attn_weights, tgt_len, src_len, bsz)
 
-        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+        assert list(attn_weights.size()) == [
+            bsz * self.num_heads, tgt_len, src_len]
 
         if attn_mask is not None:
             attn_mask = attn_mask.unsqueeze(0)
@@ -457,7 +468,8 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
             attn_weights += attn_mask
 
         if key_padding_mask is not None:
-            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.view(
+                bsz, self.num_heads, tgt_len, src_len)
             if self.onnx_trace:
                 attn_weights = torch.where(
                     key_padding_mask.unsqueeze(1).unsqueeze(2),
@@ -469,28 +481,33 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
                     key_padding_mask.unsqueeze(1).unsqueeze(2),
                     float('-inf'),
                 )
-            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.view(
+                bsz * self.num_heads, tgt_len, src_len)
 
         attn_weights = utils.softmax(
             attn_weights, dim=-1, onnx_trace=self.onnx_trace,
         ).type_as(attn_weights)
-        attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training)
+        attn_weights = F.dropout(
+            attn_weights, p=self.dropout, training=self.training)
 
         attn = torch.bmm(attn_weights, v)
 
-        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        assert list(attn.size()) == [
+            bsz * self.num_heads, tgt_len, self.head_dim]
 
         if (self.onnx_trace and attn.size(1) == 1):
             # when ONNX tracing a single decoder step (sequence length == 1)
             # the transpose is a no-op copy before view, thus unnecessary
             attn = attn.contiguous().view(tgt_len, bsz, self.qkv_dim)
         else:
-            attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.qkv_dim)
+            attn = attn.transpose(0, 1).contiguous().view(
+                tgt_len, bsz, self.qkv_dim)
         attn = self.out_proj(attn)
 
         if need_weights:
             # average attention weights over heads
-            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.view(
+                bsz, self.num_heads, tgt_len, src_len)
 
             attn_weights = attn_weights.sum(dim=1) / self.num_heads
         else:
@@ -547,7 +564,7 @@ def reorder_incremental_state(self, incremental_state, new_order):
             self._set_input_buffer(incremental_state, input_buffer)
 
     def _get_input_buffer(self, incremental_state):
-        return get_incremental_state(  #utils.
+        return get_incremental_state(  # utils.
             self,
             incremental_state,
             'attn_state',
@@ -578,7 +595,8 @@ def __repr__(self):
             child_lines.append('(' + key + '): ' + mod_str)
         lines = extra_lines + child_lines
 
-        main_str = self._get_name() + '\tnum_heads:' + str(self.num_heads) + '\t qkv_dim:' + str(self.qkv_dim)
+        main_str = self._get_name() + '\tnum_heads:' + str(self.num_heads) + \
+            '\t qkv_dim:' + str(self.qkv_dim)
         if lines:
             # simple one-liner info, which most builtin Modules will use
             if len(extra_lines) == 1 and not child_lines:
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
index 75d990f26dd..554b7619a7a 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -31,7 +31,8 @@
     from fairseq import libbleu
 except ImportError as e:
     import sys
-    sys.stderr.write('ERROR: missing libbleu.so. run `pip install --editable .`\n')
+    sys.stderr.write(
+        'ERROR: missing libbleu.so. run `pip install --editable .`\n')
     raise e
 
 
@@ -124,12 +125,13 @@ def result_string(self, order=4):
                           self.stat.predlen, self.stat.reflen)
 
 
-def get_bleu_score(args,ref,sys):
+def get_bleu_score(args, ref, sys):
     dict = dictionary.Dictionary()
-    order =4
+    order = 4
     sacrebleu = False
     sentence_bleu = False
     ignore_case = False
+
     def readlines(fd):
         for line in fd.readlines():
             if ignore_case:
@@ -137,7 +139,6 @@ def readlines(fd):
             else:
                 yield line
 
-
     if sentence_bleu:
         def score(fdsys):
             with open(ref) as fdref:
@@ -159,7 +160,6 @@ def score(fdsys):
                 print(scorer.result_string(order))
                 return(scorer.score(order))
 
-
     if sys == '-':
         score = score(sys.stdin)
     else:
@@ -167,11 +167,12 @@ def score(fdsys):
             score = score(f)
     return score
 
-def compute_bleu(config,dataset_path,checkpoint_path):
+
+def compute_bleu(config, dataset_path, checkpoint_path):
 
     parser = options.get_generation_parser()
 
-    args = options.parse_args_and_arch(parser,[dataset_path])
+    args = options.parse_args_and_arch(parser, [dataset_path])
 
     args.data = dataset_path
     args.beam = 5
@@ -184,7 +185,6 @@ def compute_bleu(config,dataset_path,checkpoint_path):
     utils.import_user_module(args)
     max_tokens = 12000
 
-
     use_cuda = torch.cuda.is_available() and not args.cpu
 
     # when running on CPU, use fp32 as default
@@ -204,14 +204,13 @@ def compute_bleu(config,dataset_path,checkpoint_path):
         src_dict = None
     tgt_dict = task.target_dictionary
 
-
     # Load ensemble
     print('| loading model(s) from {}'.format(args.path))
     model = TransformerSuperNetwork(task)
-    state = torch.load(checkpoint_path,map_location=torch.device('cpu'))
+    state = torch.load(checkpoint_path, map_location=torch.device('cpu'))
 
     model.load_state_dict(state['model'],
-                            strict=True)
+                          strict=True)
 
     if use_cuda:
         model.cuda()
@@ -250,13 +249,13 @@ def compute_bleu(config,dataset_path,checkpoint_path):
 
     # Initialize generator
     gen_timer = StopwatchMeter()
-    generator = task.build_generator([model],args)
+    generator = task.build_generator([model], args)
 
     num_sentences = 0
     has_target = True
     decoder_times_all = []
     input_len_all = []
-    with open('translations_out.txt','a') as fname_translations:
+    with open('translations_out.txt', 'a') as fname_translations:
         with progress_bar.build_progress_bar(args, itr) as t:
             wps_meter = TimeMeter()
             for sample in t:
@@ -270,8 +269,10 @@ def compute_bleu(config,dataset_path,checkpoint_path):
                     prefix_tokens = sample['target'][:, :args.prefix_size]
 
                 gen_timer.start()
-                hypos = task.inference_step(generator, [model], sample, prefix_tokens)
-                input_len_all.append(np.mean(sample['net_input']['src_lengths'].cpu().numpy()))
+                hypos = task.inference_step(
+                    generator, [model], sample, prefix_tokens)
+                input_len_all.append(
+                    np.mean(sample['net_input']['src_lengths'].cpu().numpy()))
                 num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos)
                 gen_timer.stop(num_generated_tokens)
 
@@ -279,32 +280,40 @@ def compute_bleu(config,dataset_path,checkpoint_path):
                     has_target = sample['target'] is not None
 
                     # Remove padding
-                    src_tokens = utils.strip_pad(sample['net_input']['src_tokens'][i, :], tgt_dict.pad())
+                    src_tokens = utils.strip_pad(
+                        sample['net_input']['src_tokens'][i, :], tgt_dict.pad())
                     target_tokens = None
                     if has_target:
-                        target_tokens = utils.strip_pad(sample['target'][i, :], tgt_dict.pad()).int().cpu()
+                        target_tokens = utils.strip_pad(
+                            sample['target'][i, :], tgt_dict.pad()).int().cpu()
 
                     # Either retrieve the original sentences or regenerate them from tokens.
                     if align_dict is not None:
-                        src_str = task.dataset(args.gen_subset).src.get_original_text(sample_id)
-                        target_str = task.dataset(args.gen_subset).tgt.get_original_text(sample_id)
+                        src_str = task.dataset(
+                            args.gen_subset).src.get_original_text(sample_id)
+                        target_str = task.dataset(
+                            args.gen_subset).tgt.get_original_text(sample_id)
                     else:
                         if src_dict is not None:
-                            src_str = src_dict.string(src_tokens, args.remove_bpe)
+                            src_str = src_dict.string(
+                                src_tokens, args.remove_bpe)
                         else:
                             src_str = ""
                         if has_target:
-                            target_str = tgt_dict.string(target_tokens, args.remove_bpe, escape_unk=True)
+                            target_str = tgt_dict.string(
+                                target_tokens, args.remove_bpe, escape_unk=True)
 
                     if not args.quiet:
                         if src_dict is not None:
                             #print('S-{}\t{}'.format(sample_id, src_str))
-                            fname_translations.write('S-{}\t{}'.format(sample_id, src_str))
+                            fname_translations.write(
+                                'S-{}\t{}'.format(sample_id, src_str))
                             fname_translations.write('\n')
 
                         if has_target:
                             #print('T-{}\t{}'.format(sample_id, target_str))
-                            fname_translations.write('T-{}\t{}'.format(sample_id, target_str))
+                            fname_translations.write(
+                                'T-{}\t{}'.format(sample_id, target_str))
                             fname_translations.write('\n')
 
                     # Process top predictions
@@ -312,29 +321,32 @@ def compute_bleu(config,dataset_path,checkpoint_path):
                         hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                             hypo_tokens=hypo['tokens'].int().cpu(),
                             src_str=src_str,
-                            alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None,
+                            alignment=hypo['alignment'].int().cpu(
+                            ) if hypo['alignment'] is not None else None,
                             align_dict=align_dict,
                             tgt_dict=tgt_dict,
                             remove_bpe=args.remove_bpe,
                         )
 
                         if not args.quiet:
-                           
-                            fname_translations.write('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str))
+
+                            fname_translations.write(
+                                'H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str))
                             fname_translations.write('\n')
                             fname_translations.write('P-{}\t{}'.format(
-                                    sample_id,
-                                    ' '.join(map(
-                                        lambda x: '{:.4f}'.format(x),
-                                        hypo['positional_scores'].tolist(),
-                                    ))
+                                sample_id,
+                                ' '.join(map(
+                                    lambda x: '{:.4f}'.format(x),
+                                    hypo['positional_scores'].tolist(),
                                 ))
+                            ))
                             fname_translations.write('\n')
 
                             if args.print_alignment:
                                 fname_translations.write('A-{}\t{}'.format(
                                     sample_id,
-                                    ' '.join(map(lambda x: str(utils.item(x)), alignment))
+                                    ' '.join(
+                                        map(lambda x: str(utils.item(x)), alignment))
                                 ))
                                 fname_translations.write('\n')
 
@@ -342,10 +354,11 @@ def compute_bleu(config,dataset_path,checkpoint_path):
                 t.log({'wps': round(wps_meter.avg)})
                 num_sentences += sample['nsentences']
 
-
-    os.system("grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt")
-    os.system("grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt")
-    bleu_score = get_bleu_score(args,"ref.txt","sys.txt")
+    os.system(
+        "grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt")
+    os.system(
+        "grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt")
+    bleu_score = get_bleu_score(args, "ref.txt", "sys.txt")
     print(bleu_score)
 
     os.system("rm ref.txt")
@@ -353,10 +366,11 @@ def compute_bleu(config,dataset_path,checkpoint_path):
     os.system("rm translations_out.txt")
     return bleu_score
 
-def compute_latency(config,dataset_path,get_model_parameters=False):
+
+def compute_latency(config, dataset_path, get_model_parameters=False):
     parser = options.get_generation_parser()
 
-    args = options.parse_args_and_arch(parser,[dataset_path])
+    args = options.parse_args_and_arch(parser, [dataset_path])
 
     args.data = dataset_path
     args.beam = 5
@@ -368,16 +382,16 @@ def compute_latency(config,dataset_path,get_model_parameters=False):
     args.batch_size = 128
     utils.import_user_module(args)
     max_tokens = 12000
-    args.latgpu=False
-    args.latcpu=True
-    args.latiter=100
+    args.latgpu = False
+    args.latcpu = True
+    args.latiter = 100
 
     # Initialize CUDA and distributed training
     if torch.cuda.is_available() and not args.cpu:
         torch.cuda.set_device(args.device_id)
     torch.manual_seed(args.seed)
 
-    #Optimize ensemble for generation
+    # Optimize ensemble for generation
     # Load dataset splits
     task = tasks.setup_task(args)
     task.load_dataset(args.gen_subset)
@@ -388,7 +402,6 @@ def compute_latency(config,dataset_path,get_model_parameters=False):
         src_dict = None
     tgt_dict = task.target_dictionary
 
-
     # Load ensemble
     print('| loading model(s) from {}'.format(args.path))
     model = TransformerSuperNetwork(task)
@@ -399,48 +412,51 @@ def compute_latency(config,dataset_path,get_model_parameters=False):
 
     dummy_sentence_length = dummy_sentence_length_dict['wmt']
 
-
     dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1)
     dummy_prev = [7] * (dummy_sentence_length - 1) + [2]
 
-    src_tokens_test = torch.tensor([dummy_src_tokens], dtype=torch.long)#.cuda()
-    src_lengths_test = torch.tensor([dummy_sentence_length])#.cuda()
-    prev_output_tokens_test_with_beam = torch.tensor([dummy_prev] * args.beam, dtype=torch.long)#.cuda()
+    src_tokens_test = torch.tensor(
+        [dummy_src_tokens], dtype=torch.long)  # .cuda()
+    src_lengths_test = torch.tensor([dummy_sentence_length])  # .cuda()
+    prev_output_tokens_test_with_beam = torch.tensor(
+        [dummy_prev] * args.beam, dtype=torch.long)  # .cuda()
     bsz = 1
-    new_order = torch.arange(bsz).view(-1, 1).repeat(1, args.beam).view(-1).long()#.cuda()
+    new_order = torch.arange(bsz).view(-1, 1).repeat(1,
+                                                     args.beam).view(-1).long()  # .cuda()
     if args.latcpu:
         model.cpu()
         print('Measuring model latency on CPU for dataset generation...')
     elif args.latgpu:
         model.cuda()
-        src_tokens_test = src_tokens_test#.cuda()
-        src_lengths_test = src_lengths_test#.cuda()
-        prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam#.cuda()
+        src_tokens_test = src_tokens_test  # .cuda()
+        src_lengths_test = src_lengths_test  # .cuda()
+        prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam  # .cuda()
         print('Measuring model latency on GPU for dataset generation...')
         start = torch.cuda.Event(enable_timing=True)
         end = torch.cuda.Event(enable_timing=True)
 
-
     model.set_sample_config(config)
-    
+
     model.eval()
-   
+
     with torch.no_grad():
 
         # dry runs
         for _ in range(15):
-            encoder_out_test = model.encoder(src_tokens=src_tokens_test, src_lengths=src_lengths_test)
+            encoder_out_test = model.encoder(
+                src_tokens=src_tokens_test, src_lengths=src_lengths_test)
 
         encoder_latencies = []
         print('Measuring encoder for dataset generation...')
         for _ in range(args.latiter):
             if args.latgpu:
-                #start.record()
+                # start.record()
                 start = time.time()
             elif args.latcpu:
                 start = time.time()
 
-            model.encoder(src_tokens=src_tokens_test, src_lengths=src_lengths_test)
+            model.encoder(src_tokens=src_tokens_test,
+                          src_lengths=src_lengths_test)
 
             if args.latgpu:
                 end = time.time()
@@ -450,16 +466,18 @@ def compute_latency(config,dataset_path,get_model_parameters=False):
                 encoder_latencies.append((end - start) * 1000)
 
         encoder_latencies.sort()
-        encoder_latencies = encoder_latencies[int(args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
-        print(f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; \t Std: {np.std(encoder_latencies)} ms')
-
+        encoder_latencies = encoder_latencies[int(
+            args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
+        print(
+            f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; \t Std: {np.std(encoder_latencies)} ms')
 
-        encoder_out_test_with_beam = model.encoder.reorder_encoder_out(encoder_out_test, new_order)
+        encoder_out_test_with_beam = model.encoder.reorder_encoder_out(
+            encoder_out_test, new_order)
 
         # dry runs
         for _ in range(15):
             model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam,
-                               encoder_out=encoder_out_test_with_beam)
+                          encoder_out=encoder_out_test_with_beam)
 
         # decoder is more complicated because we need to deal with incremental states and auto regressive things
         decoder_iterations_dict = {'iwslt': 23, 'wmt': 30}
@@ -471,13 +489,13 @@ def compute_latency(config,dataset_path,get_model_parameters=False):
         for _ in range(args.latiter):
             if args.latgpu:
                 start = time.time()
-                #start.record()
+                # start.record()
             elif args.latcpu:
                 start = time.time()
             incre_states = {}
             for k_regressive in range(decoder_iterations):
                 model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam[:, :k_regressive + 1],
-                                   encoder_out=encoder_out_test_with_beam, incremental_state=incre_states)
+                              encoder_out=encoder_out_test_with_beam, incremental_state=incre_states)
             if args.latgpu:
                 end = time.time()
                 decoder_latencies.append((end - start) * 1000)
@@ -488,10 +506,12 @@ def compute_latency(config,dataset_path,get_model_parameters=False):
 
         # only use the 10% to 90% latencies to avoid outliers
         decoder_latencies.sort()
-        decoder_latencies = decoder_latencies[int(args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
+        decoder_latencies = decoder_latencies[int(
+            args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
 
     print(decoder_latencies)
-    print(f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms')
+    print(
+        f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms')
 
     lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies)
     lat_std = np.std(encoder_latencies)+np.std(decoder_latencies)
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
index 58f29a94aa3..a47837bb36f 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
@@ -11,7 +11,8 @@
     BaseFairseqModel
 )
 
-from fairseq.modules import (PositionalEmbedding,SinusoidalPositionalEmbedding)
+from fairseq.modules import (
+    PositionalEmbedding, SinusoidalPositionalEmbedding)
 from .modules_supernetwork import (
 
     MultiheadAttentionSuper,
@@ -24,6 +25,8 @@
 
 DEFAULT_MAX_SOURCE_POSITIONS = 1024
 DEFAULT_MAX_TARGET_POSITIONS = 1024
+
+
 class TransformerSuperNetwork(BaseFairseqModel):
     """
     Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)
@@ -41,32 +44,33 @@ class TransformerSuperNetwork(BaseFairseqModel):
         :prog:
     """
 
-
-    def __init__(self,task):
+    def __init__(self, task):
         super().__init__()
 
         src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
-        encoder_config ={'encoder_embed_dim': 640,
-                         'encoder_layers': 6,
-                         'encoder_attention_heads': 8,
-                         'encoder_ffn_embed_dim':3072,
-                         'encoder_embed_path': None}
+        encoder_config = {'encoder_embed_dim': 640,
+                          'encoder_layers': 6,
+                          'encoder_attention_heads': 8,
+                          'encoder_ffn_embed_dim': 3072,
+                          'encoder_embed_path': None}
 
-        decoder_config ={'decoder_embed_dim': 640,
-                         'decoder_layers': 6,
-                         'decoder_attention_heads': 8,
-                         'decoder_ffn_embed_dim':3072}
+        decoder_config = {'decoder_embed_dim': 640,
+                          'decoder_layers': 6,
+                          'decoder_attention_heads': 8,
+                          'decoder_ffn_embed_dim': 3072}
 
         encoder_embed_tokens = self.build_embedding(
             src_dict, encoder_config['encoder_embed_dim'], encoder_config['encoder_embed_path']
-             )
+        )
         decoder_embed_tokens = encoder_embed_tokens
         self.share_decoder_input_output_embed = True
 
-        self.encoder = TransformerEncoder(encoder_config, src_dict, encoder_embed_tokens)
-        self.decoder = TransformerDecoder(decoder_config, tgt_dict, decoder_embed_tokens)
+        self.encoder = TransformerEncoder(
+            encoder_config, src_dict, encoder_embed_tokens)
+        self.decoder = TransformerDecoder(
+            decoder_config, tgt_dict, decoder_embed_tokens)
 
-    def build_embedding(self,dictionary, embed_dim, path=None):
+    def build_embedding(self, dictionary, embed_dim, path=None):
         num_embeddings = len(dictionary)
         padding_idx = dictionary.pad()
         emb = Embedding(num_embeddings, embed_dim, padding_idx)
@@ -98,7 +102,7 @@ def get_sampled_params_numel(self, config):
     def set_sample_config(self, config):
         self.encoder.set_sample_config(config)
         self.decoder.set_sample_config(config)
-   
+
 
 class TransformerEncoder(FairseqEncoder):
     """
@@ -115,9 +119,11 @@ def __init__(self, encoder_config, dictionary, embed_tokens):
         super().__init__(dictionary)
         # the configs of super arch
         self.super_embed_dim = encoder_config['encoder_embed_dim']
-        self.super_ffn_embed_dim = [encoder_config['encoder_ffn_embed_dim']] * encoder_config['encoder_layers']
+        self.super_ffn_embed_dim = [
+            encoder_config['encoder_ffn_embed_dim']] * encoder_config['encoder_layers']
         self.super_layer_num = encoder_config['encoder_layers']
-        self.super_self_attention_heads = [encoder_config['encoder_attention_heads']] * encoder_config['encoder_layers']
+        self.super_self_attention_heads = [
+            encoder_config['encoder_attention_heads']] * encoder_config['encoder_layers']
 
         self.super_dropout = 0.3
         self.super_activation_dropout = 0
@@ -141,11 +147,11 @@ def __init__(self, encoder_config, dictionary, embed_tokens):
         self.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS
 
         self.embed_tokens = embed_tokens
-      
+
         self.embed_positions = PositionalEmbedding(
             self.max_source_positions, self.super_embed_dim, self.padding_idx,
-            learned= False,
-        ) 
+            learned=False,
+        )
 
         self.layers = nn.ModuleList([])
         self.layers.extend([
@@ -158,10 +164,9 @@ def __init__(self, encoder_config, dictionary, embed_tokens):
         else:
             self.layer_norm = None
 
-        self.vocab_original_scaling = False 
-
+        self.vocab_original_scaling = False
 
-    def set_sample_config(self, config:dict):
+    def set_sample_config(self, config: dict):
 
         self.sample_embed_dim = config['encoder']['encoder_embed_dim']
 
@@ -173,15 +178,20 @@ def set_sample_config(self, config:dict):
         # Caution: this is a list for all layers
         self.sample_self_attention_heads = config['encoder']['encoder_self_attention_heads']
 
-        self.sample_dropout = calc_dropout(self.super_dropout, self.sample_embed_dim, self.super_embed_dim)
-        self.sample_activation_dropout = calc_dropout(self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim)
+        self.sample_dropout = calc_dropout(
+            self.super_dropout, self.sample_embed_dim, self.super_embed_dim)
+        self.sample_activation_dropout = calc_dropout(
+            self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim)
 
-        self.sample_embed_scale = math.sqrt(self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale
+        self.sample_embed_scale = math.sqrt(
+            self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale
 
-        self.embed_tokens.set_sample_config(sample_embed_dim=self.sample_embed_dim, part='encoder')
+        self.embed_tokens.set_sample_config(
+            sample_embed_dim=self.sample_embed_dim, part='encoder')
 
         if self.layer_norm is not None:
-            self.layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+            self.layer_norm.set_sample_config(
+                sample_embed_dim=self.sample_embed_dim)
 
         for i, layer in enumerate(self.layers):
             # not exceed sample layer number
@@ -189,14 +199,14 @@ def set_sample_config(self, config:dict):
                 layer.set_sample_config(is_identity_layer=False,
                                         sample_embed_dim=self.sample_embed_dim,
                                         sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i],
-                                        sample_self_attention_heads_this_layer=self.sample_self_attention_heads[i],
+                                        sample_self_attention_heads_this_layer=self.sample_self_attention_heads[
+                                            i],
                                         sample_dropout=self.sample_dropout,
                                         sample_activation_dropout=self.sample_activation_dropout)
             # exceeds sample layer number
             else:
                 layer.set_sample_config(is_identity_layer=True)
 
-
     def forward(self, src_tokens, src_lengths):
         """
         Args:
@@ -213,7 +223,8 @@ def forward(self, src_tokens, src_lengths):
                   padding elements of shape `(batch, src_len)`
         """
         # embed tokens and positions
-        x = self.sample_embed_scale * self.embed_tokens(src_tokens, part='encoder')
+        x = self.sample_embed_scale * \
+            self.embed_tokens(src_tokens, part='encoder')
         if self.embed_positions is not None:
             positions = self.embed_positions(src_tokens)
 
@@ -236,14 +247,13 @@ def forward(self, src_tokens, src_lengths):
             x = layer(x, encoder_padding_mask)
             all_x.append(x)
 
-
         if self.layer_norm:
             x = self.layer_norm(x)
 
         return {
-                'encoder_out': x,
-                'encoder_out_all' : all_x,
-                'encoder_padding_mask': encoder_padding_mask,
+            'encoder_out': x,
+            'encoder_out_all': all_x,
+            'encoder_padding_mask': encoder_padding_mask,
         }
 
     def reorder_encoder_out(self, encoder_out, new_order):
@@ -267,7 +277,8 @@ def reorder_encoder_out(self, encoder_out, new_order):
         if 'encoder_out_all' in encoder_out.keys():
             new_encoder_out_all = []
             for encoder_out_one_layer in encoder_out['encoder_out_all']:
-                new_encoder_out_all.append(encoder_out_one_layer.index_select(1, new_order))
+                new_encoder_out_all.append(
+                    encoder_out_one_layer.index_select(1, new_order))
             encoder_out['encoder_out_all'] = new_encoder_out_all
 
         return encoder_out
@@ -284,10 +295,12 @@ def upgrade_state_dict_named(self, state_dict, name):
             weights_key = '{}.embed_positions.weights'.format(name)
             if weights_key in state_dict:
                 del state_dict[weights_key]
-            state_dict['{}.embed_positions._float_tensor'.format(name)] = torch.FloatTensor(1)
+            state_dict['{}.embed_positions._float_tensor'.format(
+                name)] = torch.FloatTensor(1)
         for i in range(len(self.layers)):
             # update layer norms
-            self.layers[i].upgrade_state_dict_named(state_dict, "{}.layers.{}".format(name, i))
+            self.layers[i].upgrade_state_dict_named(
+                state_dict, "{}.layers.{}".format(name, i))
 
         version_key = '{}.version'.format(name)
         if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2:
@@ -316,11 +329,16 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
 
         # the configs of super arch
         self.super_embed_dim = decoder_config['decoder_embed_dim']
-        self.super_ffn_embed_dim = decoder_config['decoder_ffn_embed_dim'] * decoder_config['decoder_layers']
+        self.super_ffn_embed_dim = decoder_config['decoder_ffn_embed_dim'] * \
+            decoder_config['decoder_layers']
         self.super_layer_num = decoder_config['decoder_layers']
-        self.super_self_attention_heads = 8*[decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers']
-        self.super_ende_attention_heads = [decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers']
-        self.super_arbitrary_ende_attn = [-1] * decoder_config['decoder_layers']
+        self.super_self_attention_heads = 8 * \
+            [decoder_config['decoder_attention_heads']] * \
+            decoder_config['decoder_layers']
+        self.super_ende_attention_heads = [
+            decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers']
+        self.super_arbitrary_ende_attn = [-1] * \
+            decoder_config['decoder_layers']
 
         self.super_dropout = 0.3
         self.super_activation_dropout = 0.0
@@ -340,7 +358,6 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
 
         self.sample_embed_scale = None
 
-
         # the configs of current sampled arch
         self.register_buffer('version', torch.Tensor([3]))
 
@@ -353,15 +370,15 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
 
         self.embed_tokens = embed_tokens
 
-
         self.embed_positions = PositionalEmbedding(
             self.max_target_positions, self.super_embed_dim, padding_idx,
             learned=False,
-        ) if not False else None 
+        ) if not False else None
 
         self.layers = nn.ModuleList([])
         self.layers.extend([
-            TransformerDecoderLayer(decoder_config, layer_idx=i, no_encoder_attn=no_encoder_attn)
+            TransformerDecoderLayer(
+                decoder_config, layer_idx=i, no_encoder_attn=no_encoder_attn)
             for i in range(self.super_layer_num)
         ])
 
@@ -370,7 +387,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
         self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \
             if self.super_embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None
 
-        if False:# args.adaptive_softmax_cutoff is not None:
+        if False:  # args.adaptive_softmax_cutoff is not None:
             self.adaptive_softmax = AdaptiveSoftmax(
                 len(dictionary),
                 self.output_embed_dim,
@@ -381,15 +398,17 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
                 tie_proj=args.tie_adaptive_proj,
             )
         elif not self.share_input_output_embed:
-            self.embed_out = nn.Parameter(torch.Tensor(len(dictionary), self.output_embed_dim))
-            nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim ** -0.5)
+            self.embed_out = nn.Parameter(torch.Tensor(
+                len(dictionary), self.output_embed_dim))
+            nn.init.normal_(self.embed_out, mean=0,
+                            std=self.output_embed_dim ** -0.5)
 
         self.layer_norm = None
         self.get_attn = False
 
         self.vocab_original_scaling = False
 
-    def set_sample_config(self, config:dict):
+    def set_sample_config(self, config: dict):
 
         self.sample_embed_dim = config['decoder']['decoder_embed_dim']
         self.sample_encoder_embed_dim = config['encoder']['encoder_embed_dim']
@@ -407,15 +426,20 @@ def set_sample_config(self, config:dict):
 
         self.sample_layer_num = config['decoder']['decoder_layer_num']
 
-        self.sample_dropout = calc_dropout(self.super_dropout, self.sample_embed_dim, self.super_embed_dim)
-        self.sample_activation_dropout = calc_dropout(self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim)
+        self.sample_dropout = calc_dropout(
+            self.super_dropout, self.sample_embed_dim, self.super_embed_dim)
+        self.sample_activation_dropout = calc_dropout(
+            self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim)
 
-        self.sample_embed_scale = math.sqrt(self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale
+        self.sample_embed_scale = math.sqrt(
+            self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale
 
-        self.embed_tokens.set_sample_config(sample_embed_dim=self.sample_embed_dim, part='decoder')
+        self.embed_tokens.set_sample_config(
+            sample_embed_dim=self.sample_embed_dim, part='decoder')
 
         if self.layer_norm is not None:
-            self.layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+            self.layer_norm.set_sample_config(
+                sample_embed_dim=self.sample_embed_dim)
 
         for i, layer in enumerate(self.layers):
             # not exceed sample layer number
@@ -424,16 +448,16 @@ def set_sample_config(self, config:dict):
                                         sample_embed_dim=self.sample_embed_dim,
                                         sample_encoder_embed_dim=self.sample_encoder_embed_dim,
                                         sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i],
-                                        sample_self_attention_heads_this_layer=self.sample_self_attention_heads[i],
-                                        sample_ende_attention_heads_this_layer=self.sample_ende_attention_heads[i],
+                                        sample_self_attention_heads_this_layer=self.sample_self_attention_heads[
+                                            i],
+                                        sample_ende_attention_heads_this_layer=self.sample_ende_attention_heads[
+                                            i],
                                         sample_dropout=self.sample_dropout,
                                         sample_activation_dropout=self.sample_activation_dropout)
             # exceeds sample layer number
             else:
                 layer.set_sample_config(is_identity_layer=True)
 
-
-
     def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused):
         """
         Args:
@@ -449,7 +473,8 @@ def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None,
                 - the decoder's output of shape `(batch, tgt_len, vocab)`
                 - a dictionary with any model-specific outputs
         """
-        x, extra = self.extract_features(prev_output_tokens, encoder_out, incremental_state)
+        x, extra = self.extract_features(
+            prev_output_tokens, encoder_out, incremental_state)
         x = self.output_layer(x)
         return x, extra
 
@@ -478,7 +503,8 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta
                 positions = positions[:, -1:]
 
         # embed tokens and positions
-        x = self.sample_embed_scale * self.embed_tokens(prev_output_tokens, part='decoder')
+        x = self.sample_embed_scale * \
+            self.embed_tokens(prev_output_tokens, part='decoder')
 
         if positions is not None:
             x += positions
@@ -501,36 +527,41 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta
                     encoder_out_feed = encoder_out['encoder_out']
                 # concat one second last output layer
                 elif self.sample_arbitrary_ende_attn[i] == 1:
-                    encoder_out_feed = torch.cat([encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0)
+                    encoder_out_feed = torch.cat(
+                        [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0)
                 elif self.sample_arbitrary_ende_attn[i] == 2:
-                    encoder_out_feed = torch.cat([encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2], encoder_out['encoder_out_all'][-3]], dim=0)
+                    encoder_out_feed = torch.cat(
+                        [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2], encoder_out['encoder_out_all'][-3]], dim=0)
                 else:
-                    raise NotImplementedError("arbitrary_ende_attn should in [-1, 1, 2]")
+                    raise NotImplementedError(
+                        "arbitrary_ende_attn should in [-1, 1, 2]")
 
             if encoder_out['encoder_padding_mask'] is not None:
                 if i >= self.sample_layer_num or self.sample_arbitrary_ende_attn[i] == -1:
                     encoder_padding_mask_feed = encoder_out['encoder_padding_mask']
                 # concat one more
                 elif self.sample_arbitrary_ende_attn[i] == 1:
-                    encoder_padding_mask_feed = torch.cat([encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1)
+                    encoder_padding_mask_feed = torch.cat(
+                        [encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1)
                 # concat two more
                 elif self.sample_arbitrary_ende_attn[i] == 2:
-                    encoder_padding_mask_feed = torch.cat([encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1)
+                    encoder_padding_mask_feed = torch.cat(
+                        [encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1)
                 else:
-                    raise NotImplementedError("arbitrary_ende_attn should in [-1, 1, 2]")
-
+                    raise NotImplementedError(
+                        "arbitrary_ende_attn should in [-1, 1, 2]")
 
             x, attn = layer(
                 x,
                 encoder_out_feed,
                 encoder_padding_mask_feed,
                 incremental_state,
-                self_attn_mask=self.buffered_future_mask(x) if incremental_state is None else None,
+                self_attn_mask=self.buffered_future_mask(
+                    x) if incremental_state is None else None,
             )
             inner_states.append(x)
             attns.append(attn)
 
-
         if self.layer_norm:
             x = self.layer_norm(x)
 
@@ -558,13 +589,15 @@ def max_positions(self):
         """Maximum output length supported by the decoder."""
         if self.embed_positions is None:
             return self.max_target_positions
-        import ipdb;ipdb.set_trace()
+        import ipdb
+        ipdb.set_trace()
         return min(self.max_target_positions, self.embed_positions.max_positions())
 
     def buffered_future_mask(self, tensor):
         dim = tensor.size(0)
         if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device or self._future_mask.size(0) < dim:
-            self._future_mask = torch.triu(utils.fill_with_neg_inf(tensor.new(dim, dim)), 1)
+            self._future_mask = torch.triu(
+                utils.fill_with_neg_inf(tensor.new(dim, dim)), 1)
         return self._future_mask[:dim, :dim]
 
     def upgrade_state_dict_named(self, state_dict, name):
@@ -573,7 +606,8 @@ def upgrade_state_dict_named(self, state_dict, name):
             weights_key = '{}.embed_positions.weights'.format(name)
             if weights_key in state_dict:
                 del state_dict[weights_key]
-            state_dict['{}.embed_positions._float_tensor'.format(name)] = torch.FloatTensor(1)
+            state_dict['{}.embed_positions._float_tensor'.format(
+                name)] = torch.FloatTensor(1)
 
         for i in range(len(self.layers)):
             # update layer norms
@@ -584,9 +618,11 @@ def upgrade_state_dict_named(self, state_dict, name):
             }
             for old, new in layer_norm_map.items():
                 for m in ('weight', 'bias'):
-                    k = '{}.layers.{}.layer_norms.{}.{}'.format(name, i, old, m)
+                    k = '{}.layers.{}.layer_norms.{}.{}'.format(
+                        name, i, old, m)
                     if k in state_dict:
-                        state_dict['{}.layers.{}.{}.{}'.format(name, i, new, m)] = state_dict[k]
+                        state_dict['{}.layers.{}.{}.{}'.format(
+                            name, i, new, m)] = state_dict[k]
                         del state_dict[k]
 
         version_key = '{}.version'.format(name)
@@ -623,7 +659,7 @@ def __init__(self, encoder_config, layer_idx):
         self.super_self_attention_heads_this_layer = encoder_config['encoder_attention_heads']
 
         self.super_dropout = 0.3
-        self.super_activation_dropout =0
+        self.super_activation_dropout = 0
 
         # the configs of current sampled arch
         self.sample_embed_dim = None
@@ -635,8 +671,7 @@ def __init__(self, encoder_config, layer_idx):
 
         self.is_identity_layer = None
 
-        self.qkv_dim= 512
-
+        self.qkv_dim = 512
 
         self.self_attn = MultiheadAttentionSuper(
             super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer, is_encoder=True,
@@ -650,11 +685,12 @@ def __init__(self, encoder_config, layer_idx):
         )
         self.normalize_before = False
 
-        self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer, uniform_=None, non_linear='relu') #init.uniform_
-        self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear')
+        self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer,
+                               uniform_=None, non_linear='relu')  # init.uniform_
+        self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer,
+                               super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear')
         self.final_layer_norm = LayerNormSuper(self.super_embed_dim)
 
-
     def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None):
 
         if is_identity_layer:
@@ -670,15 +706,19 @@ def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn
         self.sample_dropout = sample_dropout
         self.sample_activation_dropout = sample_activation_dropout
 
-        self.self_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
-
-        self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer)
+        self.self_attn_layer_norm.set_sample_config(
+            sample_embed_dim=self.sample_embed_dim)
 
-        self.fc1.set_sample_config(sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer)
-        self.fc2.set_sample_config(sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim)
+        self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim,
+                                         sample_attention_heads=self.sample_self_attention_heads_this_layer)
 
-        self.final_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+        self.fc1.set_sample_config(
+            sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer)
+        self.fc2.set_sample_config(
+            sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim)
 
+        self.final_layer_norm.set_sample_config(
+            sample_embed_dim=self.sample_embed_dim)
 
     def upgrade_state_dict_named(self, state_dict, name):
         """
@@ -728,15 +768,17 @@ def forward(self, x, encoder_padding_mask, attn_mask=None):
         # will become -inf, which results in NaN in model parameters
         # TODO: to formally solve this problem, we need to change fairseq's
         # MultiheadAttention. We will do this later on.
-        x, _ = self.self_attn(query=x, key=x, value=x, key_padding_mask=encoder_padding_mask)
+        x, _ = self.self_attn(query=x, key=x, value=x,
+                              key_padding_mask=encoder_padding_mask)
         x = F.dropout(x, p=self.dropout, training=self.training)
-        x[:residual.size(0),:,:] = residual + x[:residual.size(0),:,:]
+        x[:residual.size(0), :, :] = residual + x[:residual.size(0), :, :]
         x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True)
 
         residual = x
         x = self.maybe_layer_norm(self.final_layer_norm, x, before=True)
         x = self.activation_fn(self.fc1(x))
-        x = F.dropout(x, p=self.sample_activation_dropout, training=self.training)
+        x = F.dropout(x, p=self.sample_activation_dropout,
+                      training=self.training)
         x = self.fc2(x)
         x = F.dropout(x, p=self.sample_dropout, training=self.training)
         x = residual + x
@@ -793,7 +835,6 @@ def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv
         self.qkv_dim = 512
         self.layer_idx = layer_idx
 
-
         self.self_attn = MultiheadAttentionSuper(
             is_encoder=False,
             super_embed_dim=self.super_embed_dim,
@@ -841,7 +882,6 @@ def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv
 
         self.onnx_trace = False
 
-
     def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_encoder_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_ende_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None):
 
         if is_identity_layer:
@@ -859,18 +899,23 @@ def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_enc
         self.sample_dropout = sample_dropout
         self.sample_activation_dropout = sample_activation_dropout
 
+        self.self_attn_layer_norm.set_sample_config(
+            sample_embed_dim=self.sample_embed_dim)
+        self.encoder_attn_layer_norm.set_sample_config(
+            sample_embed_dim=self.sample_embed_dim)
 
-        self.self_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
-        self.encoder_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+        self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim,
+                                         sample_attention_heads=self.sample_self_attention_heads_this_layer)
+        self.encoder_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_kv_embed_dim=self.sample_encoder_embed_dim,
+                                            sample_attention_heads=self.sample_ende_attention_heads_this_layer)
 
-        self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer)
-        self.encoder_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_kv_embed_dim=self.sample_encoder_embed_dim, sample_attention_heads=self.sample_ende_attention_heads_this_layer)
-
-        self.fc1.set_sample_config(sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer)
-        self.fc2.set_sample_config(sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim)
-
-        self.final_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim)
+        self.fc1.set_sample_config(
+            sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer)
+        self.fc2.set_sample_config(
+            sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim)
 
+        self.final_layer_norm.set_sample_config(
+            sample_embed_dim=self.sample_embed_dim)
 
     def prepare_for_onnx_export_(self):
         self.onnx_trace = True
@@ -921,13 +966,15 @@ def forward(
 
         if self.encoder_attn is not None:
             residual = x
-            x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, before=True)
+            x = self.maybe_layer_norm(
+                self.encoder_attn_layer_norm, x, before=True)
             if prev_attn_state is not None:
                 if incremental_state is None:
                     incremental_state = {}
                 prev_key, prev_value = prev_attn_state
                 saved_state = {"prev_key": prev_key, "prev_value": prev_value}
-                self.encoder_attn._set_input_buffer(incremental_state, saved_state)
+                self.encoder_attn._set_input_buffer(
+                    incremental_state, saved_state)
             x, attn = self.encoder_attn(
                 query=x,
                 key=encoder_out,
@@ -939,12 +986,14 @@ def forward(
             )
             x = F.dropout(x, p=self.sample_dropout, training=self.training)
             x = residual + x
-            x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, after=True)
+            x = self.maybe_layer_norm(
+                self.encoder_attn_layer_norm, x, after=True)
 
         residual = x
         x = self.maybe_layer_norm(self.final_layer_norm, x, before=True)
         x = self.activation_fn(self.fc1(x))
-        x = F.dropout(x, p=self.sample_activation_dropout, training=self.training)
+        x = F.dropout(x, p=self.sample_activation_dropout,
+                      training=self.training)
         x = self.fc2(x)
         x = F.dropout(x, p=self.sample_dropout, training=self.training)
         x = residual + x
@@ -965,28 +1014,36 @@ def maybe_layer_norm(self, layer_norm, x, before=False, after=False):
     def make_generation_fast_(self, need_attn=False, **kwargs):
         self.need_attn = need_attn
 
+
 def calc_dropout(dropout, sample_embed_dim, super_embed_dim):
     return dropout * 1.0 * sample_embed_dim / super_embed_dim
 
+
 def Embedding(num_embeddings, embedding_dim, padding_idx):
     return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx)
 
+
 def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'):
     m = nn.Linear(in_features, out_features, bias)
-    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(m.weight, non_linear=non_linear)
+    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(
+        m.weight, non_linear=non_linear)
     if bias:
         nn.init.constant_(m.bias, 0.)
     return m
 
+
 def calc_dropout(dropout, sample_embed_dim, super_embed_dim):
     return dropout * 1.0 * sample_embed_dim / super_embed_dim
 
+
 def Embedding(num_embeddings, embedding_dim, padding_idx):
     return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx)
 
+
 def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'):
     m = nn.Linear(in_features, out_features, bias)
-    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(m.weight, non_linear=non_linear)
+    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(
+        m.weight, non_linear=non_linear)
     if bias:
         nn.init.constant_(m.bias, 0.)
     return m

From 4b05fd240d09a888fc0e1af12f257800490bd28c Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Thu, 17 Nov 2022 22:06:23 -0800
Subject: [PATCH 07/60] Cleanup imports

---
 neural_compressor/experimental/nas/dynas.py            |  2 +-
 .../machine_translation/transformer_interface.py       | 10 +---------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index 7ba96ef169e..7cd280ff6c1 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -75,7 +75,7 @@ def __init__(self, conf_fname_or_obj):
             'ofa_resnet50': OFARunner,
             'ofa_mbv3_d234_e346_k357_w1.0': OFARunner,
             'ofa_mbv3_d234_e346_k357_w1.2': OFARunner,
-            'transformer_lt_wmt_en_de': TransformerLTRunner
+            'transformer_lt_wmt_en_de': TransformerLTRunner,
         }
 
         self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50,
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
index 554b7619a7a..a7ca55a153d 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -3,25 +3,17 @@
 """
 import torch
 
-from fairseq import checkpoint_utils, options, progress_bar, tasks, utils
+from fairseq import options, progress_bar, tasks, utils
 from fairseq.meters import StopwatchMeter, TimeMeter
 import sys
-import pdb
 import numpy as np
-import subprocess
 import os
 from fairseq.data import dictionary
-import csv
-import json
 import warnings
 from .transformer_supernetwork import TransformerSuperNetwork
 
 import sys
-import logging
-import tqdm
 import time
-import copy
-from datetime import datetime
 import ctypes
 import math
 warnings.filterwarnings("ignore")

From 1e19ff8d88698e5603cbf44a7f420acc9415c1d4 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Thu, 17 Nov 2022 22:12:14 -0800
Subject: [PATCH 08/60] Cleanup imports

---
 neural_compressor/experimental/nas/dynas.py   | 12 ++++----
 .../experimental/nas/dynast/dynas_manager.py  |  3 +-
 .../experimental/nas/dynast/dynas_search.py   |  6 ++--
 .../experimental/nas/dynast/dynas_utils.py    | 18 +++++------
 .../modules_supernetwork.py                   | 11 +++----
 .../transformer_interface.py                  | 20 ++++++-------
 .../transformer_supernetwork.py               | 30 ++++++-------------
 7 files changed, 43 insertions(+), 57 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index 7cd280ff6c1..bf8e7482213 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import os
-import pandas as pd
 
+import pandas as pd
 from neural_compressor.conf.config import Conf, NASConfig
 from neural_compressor.utils import logger
 
@@ -31,12 +31,14 @@ class DyNAS(NASBase):
     """
 
     def __init__(self, conf_fname_or_obj):
-        from .dynast.dynas_manager import ParameterManager
-        from .dynast.dynas_manager import TransformerLTEncoding
+        from .dynast.dynas_manager import (ParameterManager,
+                                           TransformerLTEncoding)
         from .dynast.dynas_predictor import Predictor
-        from .dynast.dynas_search import ProblemMultiObjective, SearchAlgoManager
+        from .dynast.dynas_search import (ProblemMultiObjective,
+                                          SearchAlgoManager)
         from .dynast.dynas_utils import (EvaluationInterfaceMobileNetV3,
-                                         EvaluationInterfaceResNet50, EvaluationInterfaceTransformerLT,
+                                         EvaluationInterfaceResNet50,
+                                         EvaluationInterfaceTransformerLT,
                                          OFARunner, TransformerLTRunner)
 
         self.ParameterManager = ParameterManager
diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py
index 61956464e97..6908e8d2d73 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_manager.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py
@@ -21,9 +21,8 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.model_selection import train_test_split
-
 from neural_compressor.utils import logger
+from sklearn.model_selection import train_test_split
 
 
 class ParameterManager:
diff --git a/neural_compressor/experimental/nas/dynast/dynas_search.py b/neural_compressor/experimental/nas/dynast/dynas_search.py
index fc3d5123450..e74075aac74 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_search.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_search.py
@@ -20,15 +20,15 @@
 import autograd.numpy as anp
 import numpy as np
 import pymoo
+from neural_compressor.experimental.nas.dynast.dynas_utils import \
+    EvaluationInterface
+from neural_compressor.utils import logger
 from pymoo.algorithms.moo.age import AGEMOEA
 from pymoo.algorithms.moo.nsga2 import NSGA2
 from pymoo.core.problem import Problem
 from pymoo.factory import get_crossover, get_mutation, get_sampling
 from pymoo.optimize import minimize
 
-from neural_compressor.experimental.nas.dynast.dynas_utils import EvaluationInterface
-from neural_compressor.utils import logger
-
 
 class SearchAlgoManager:
     """
diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index 2149860ff8e..7940834d49f 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -25,18 +25,18 @@
 import numpy as np
 import ofa
 from fvcore.nn import FlopCountAnalysis
-from ofa.imagenet_classification.data_providers.imagenet import ImagenetDataProvider
-from ofa.imagenet_classification.run_manager import ImagenetRunConfig, RunManager
-from ofa.tutorial.flops_table import rm_bn_from_net
-
-from neural_compressor.experimental.nas.dynast.dynas_manager import ParameterManager
+from neural_compressor.experimental.nas.dynast.dynas_manager import \
+    ParameterManager
 from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor
+from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import (
+    compute_bleu, compute_latency)
 from neural_compressor.utils.utility import LazyImport, logger
+from ofa.imagenet_classification.data_providers.imagenet import \
+    ImagenetDataProvider
+from ofa.imagenet_classification.run_manager import (ImagenetRunConfig,
+                                                     RunManager)
+from ofa.tutorial.flops_table import rm_bn_from_net
 
-from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import (
-    compute_bleu,
-    compute_latency
-)
 torch = LazyImport('torch')
 torchvision = LazyImport('torchvision')
 
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
index 61aeb409ae5..19a7f8b2b8f 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
@@ -1,14 +1,11 @@
+from collections import defaultdict
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-
-import numpy as np
-from torch.nn.modules.module import _addindent
-from torch.nn import Parameter
-
 from fairseq import utils
-from collections import defaultdict
-
+from torch.nn import Parameter
+from torch.nn.modules.module import _addindent
 
 INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0)
 
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
index a7ca55a153d..8caef91a202 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -1,21 +1,21 @@
 """
 Translate pre-processed data with a trained model.
 """
-import torch
-
-from fairseq import options, progress_bar, tasks, utils
-from fairseq.meters import StopwatchMeter, TimeMeter
+import ctypes
+import math
+import os
 import sys
+import time
+import warnings
+
 import numpy as np
-import os
+import torch
+from fairseq import options, progress_bar, tasks, utils
 from fairseq.data import dictionary
-import warnings
+from fairseq.meters import StopwatchMeter, TimeMeter
+
 from .transformer_supernetwork import TransformerSuperNetwork
 
-import sys
-import time
-import ctypes
-import math
 warnings.filterwarnings("ignore")
 
 
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
index a47837bb36f..edf70234ea4 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
@@ -1,27 +1,15 @@
+import math
+
 import torch
-from torch import nn
-from torch.nn import Parameter
 import torch.nn.functional as F
-
-from fairseq import utils
 from fairseq import options, utils
-from fairseq.models import (
-    FairseqEncoder,
-    FairseqIncrementalDecoder,
-    BaseFairseqModel
-)
-
-from fairseq.modules import (
-    PositionalEmbedding, SinusoidalPositionalEmbedding)
-from .modules_supernetwork import (
-
-    MultiheadAttentionSuper,
-    EmbeddingSuper,
-    LinearSuper,
-    LayerNormSuper
-
-)
-import math
+from fairseq.models import (BaseFairseqModel, FairseqEncoder,
+                            FairseqIncrementalDecoder)
+from fairseq.modules import PositionalEmbedding, SinusoidalPositionalEmbedding
+from torch import nn
+
+from .modules_supernetwork import (EmbeddingSuper, LayerNormSuper, LinearSuper,
+                                   MultiheadAttentionSuper)
 
 DEFAULT_MAX_SOURCE_POSITIONS = 1024
 DEFAULT_MAX_TARGET_POSITIONS = 1024

From 8d7f56be8a5939c17f6e97e7e795302ae5b5c9f0 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 10:46:46 -0800
Subject: [PATCH 09/60] Replace print with logger

---
 .../experimental/nas/dynast/dynas_search.py   |  2 -
 .../transformer_interface.py                  | 45 ++++++-------------
 .../transformer_supernetwork.py               |  4 +-
 3 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_search.py b/neural_compressor/experimental/nas/dynast/dynas_search.py
index e74075aac74..6a0a07b7f89 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_search.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_search.py
@@ -211,7 +211,5 @@ def _evaluate(
             objective_x_arr.append(objective_x)
             objective_y_arr.append(objective_y)
 
-        print('.', end='', flush=True)
-
         # Update PyMoo with evaluation data
         out["F"] = anp.column_stack([objective_x_arr, objective_y_arr])
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
index 8caef91a202..c9fce76927d 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -14,6 +14,8 @@
 from fairseq.data import dictionary
 from fairseq.meters import StopwatchMeter, TimeMeter
 
+from neural_compressor.utils import logger
+
 from .transformer_supernetwork import TransformerSuperNetwork
 
 warnings.filterwarnings("ignore")
@@ -23,8 +25,7 @@
     from fairseq import libbleu
 except ImportError as e:
     import sys
-    sys.stderr.write(
-        'ERROR: missing libbleu.so. run `pip install --editable .`\n')
+    logger.error('missing libbleu.so. run `pip install --editable .`')
     raise e
 
 
@@ -120,7 +121,6 @@ def result_string(self, order=4):
 def get_bleu_score(args, ref, sys):
     dict = dictionary.Dictionary()
     order = 4
-    sacrebleu = False
     sentence_bleu = False
     ignore_case = False
 
@@ -140,7 +140,6 @@ def score(fdsys):
                     sys_tok = dict.encode_line(sys_tok)
                     ref_tok = dict.encode_line(ref_tok)
                     scorer.add(ref_tok, sys_tok)
-                    print(i, scorer.result_string(order))
     else:
         def score(fdsys):
             with open(ref) as fdref:
@@ -149,7 +148,6 @@ def score(fdsys):
                     sys_tok = dict.encode_line(sys_tok)
                     ref_tok = dict.encode_line(ref_tok)
                     scorer.add(ref_tok, sys_tok)
-                print(scorer.result_string(order))
                 return(scorer.score(order))
 
     if sys == '-':
@@ -157,6 +155,7 @@ def score(fdsys):
     else:
         with open(sys, 'r') as f:
             score = score(f)
+    logger.debug('Achieved BLEU score: {}'.format(score))
     return score
 
 
@@ -175,7 +174,6 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     args.target_lang = 'de'
     args.batch_size = 128
     utils.import_user_module(args)
-    max_tokens = 12000
 
     use_cuda = torch.cuda.is_available() and not args.cpu
 
@@ -197,7 +195,7 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     tgt_dict = task.target_dictionary
 
     # Load ensemble
-    print('| loading model(s) from {}'.format(args.path))
+    logger.info('Loading model(s) from {}'.format(args.path))
     model = TransformerSuperNetwork(task)
     state = torch.load(checkpoint_path, map_location=torch.device('cpu'))
 
@@ -206,7 +204,6 @@ def compute_bleu(config, dataset_path, checkpoint_path):
 
     if use_cuda:
         model.cuda()
-    print(config)
     model.set_sample_config(config)
     model.make_generation_fast_(
         beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
@@ -217,7 +214,6 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     if use_cuda:
         model.cuda()
 
-    print(args.path, file=sys.stderr)
 
     # Load alignment dictionary for unknown word replacement
     # (None if no unknown word replacement, empty if no path to align dictionary)
@@ -297,13 +293,11 @@ def compute_bleu(config, dataset_path, checkpoint_path):
 
                     if not args.quiet:
                         if src_dict is not None:
-                            #print('S-{}\t{}'.format(sample_id, src_str))
                             fname_translations.write(
                                 'S-{}\t{}'.format(sample_id, src_str))
                             fname_translations.write('\n')
 
                         if has_target:
-                            #print('T-{}\t{}'.format(sample_id, target_str))
                             fname_translations.write(
                                 'T-{}\t{}'.format(sample_id, target_str))
                             fname_translations.write('\n')
@@ -351,7 +345,6 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     os.system(
         "grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt")
     bleu_score = get_bleu_score(args, "ref.txt", "sys.txt")
-    print(bleu_score)
 
     os.system("rm ref.txt")
     os.system("rm sys.txt")
@@ -373,7 +366,6 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
     args.target_lang = 'de'
     args.batch_size = 128
     utils.import_user_module(args)
-    max_tokens = 12000
     args.latgpu = False
     args.latcpu = True
     args.latiter = 100
@@ -387,15 +379,9 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
     # Load dataset splits
     task = tasks.setup_task(args)
     task.load_dataset(args.gen_subset)
-    # Set dictionaries
-    try:
-        src_dict = getattr(task, 'source_dictionary', None)
-    except NotImplementedError:
-        src_dict = None
-    tgt_dict = task.target_dictionary
 
     # Load ensemble
-    print('| loading model(s) from {}'.format(args.path))
+    logger.info('Loading model(s) from {}'.format(args.path))
     model = TransformerSuperNetwork(task)
 
     # specify the length of the dummy input for profile
@@ -417,13 +403,13 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
                                                      args.beam).view(-1).long()  # .cuda()
     if args.latcpu:
         model.cpu()
-        print('Measuring model latency on CPU for dataset generation...')
+        logger.info('Measuring model latency on CPU for dataset generation...')
     elif args.latgpu:
         model.cuda()
         src_tokens_test = src_tokens_test  # .cuda()
         src_lengths_test = src_lengths_test  # .cuda()
         prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam  # .cuda()
-        print('Measuring model latency on GPU for dataset generation...')
+        logger.info('Measuring model latency on GPU for dataset generation...')
         start = torch.cuda.Event(enable_timing=True)
         end = torch.cuda.Event(enable_timing=True)
 
@@ -439,10 +425,9 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
                 src_tokens=src_tokens_test, src_lengths=src_lengths_test)
 
         encoder_latencies = []
-        print('Measuring encoder for dataset generation...')
+        logger.info('Measuring encoder for dataset generation...')
         for _ in range(args.latiter):
             if args.latgpu:
-                # start.record()
                 start = time.time()
             elif args.latcpu:
                 start = time.time()
@@ -460,8 +445,8 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
         encoder_latencies.sort()
         encoder_latencies = encoder_latencies[int(
             args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
-        print(
-            f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; \t Std: {np.std(encoder_latencies)} ms')
+        logger.info(
+            f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms')
 
         encoder_out_test_with_beam = model.encoder.reorder_encoder_out(
             encoder_out_test, new_order)
@@ -475,13 +460,12 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
         decoder_iterations_dict = {'iwslt': 23, 'wmt': 30}
 
         decoder_iterations = decoder_iterations_dict['wmt']
-        print(decoder_iterations)
         decoder_latencies = []
-        print('Measuring decoder for dataset generation...')
+
+        logger.info('Measuring decoder for dataset generation...')
         for _ in range(args.latiter):
             if args.latgpu:
                 start = time.time()
-                # start.record()
             elif args.latcpu:
                 start = time.time()
             incre_states = {}
@@ -501,8 +485,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
         decoder_latencies = decoder_latencies[int(
             args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
 
-    print(decoder_latencies)
-    print(
+    logger.info(
         f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms')
 
     lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies)
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
index edf70234ea4..af11dbd3720 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
@@ -8,6 +8,8 @@
 from fairseq.modules import PositionalEmbedding, SinusoidalPositionalEmbedding
 from torch import nn
 
+from neural_compressor.utils import logger
+
 from .modules_supernetwork import (EmbeddingSuper, LayerNormSuper, LinearSuper,
                                    MultiheadAttentionSuper)
 
@@ -88,6 +90,7 @@ def get_sampled_params_numel(self, config):
         return sum(numels)
 
     def set_sample_config(self, config):
+        logger.debug('Setting active configuration to {}'.format(config))
         self.encoder.set_sample_config(config)
         self.decoder.set_sample_config(config)
 
@@ -231,7 +234,6 @@ def forward(self, src_tokens, src_lengths):
         all_x = []
         # encoder layers
         for layer in self.layers:
-            # print(x.shape)
             x = layer(x, encoder_padding_mask)
             all_x.append(x)
 

From 55e7725d503cb7eef7232bd9c6e8d2868a029674 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 10:50:36 -0800
Subject: [PATCH 10/60] Replace `os.system('rm ...')` with `os.remove`

---
 .../machine_translation/transformer_interface.py            | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
index c9fce76927d..bbf7bbfb841 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -346,9 +346,9 @@ def compute_bleu(config, dataset_path, checkpoint_path):
         "grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt")
     bleu_score = get_bleu_score(args, "ref.txt", "sys.txt")
 
-    os.system("rm ref.txt")
-    os.system("rm sys.txt")
-    os.system("rm translations_out.txt")
+    os.remove("ref.txt")
+    os.remove("sys.txt")
+    os.remove("translations_out.txt")
     return bleu_score
 
 

From 16cccb4a83923822c14d892fdd2fc634f45b2542 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 11:04:26 -0800
Subject: [PATCH 11/60] Remove unused logs

---
 .../supernetwork/machine_translation/transformer_interface.py   | 2 --
 1 file changed, 2 deletions(-)

diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
index bbf7bbfb841..befef3803f1 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -195,7 +195,6 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     tgt_dict = task.target_dictionary
 
     # Load ensemble
-    logger.info('Loading model(s) from {}'.format(args.path))
     model = TransformerSuperNetwork(task)
     state = torch.load(checkpoint_path, map_location=torch.device('cpu'))
 
@@ -381,7 +380,6 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
     task.load_dataset(args.gen_subset)
 
     # Load ensemble
-    logger.info('Loading model(s) from {}'.format(args.path))
     model = TransformerSuperNetwork(task)
 
     # specify the length of the dummy input for profile

From cb121e26c5debe453905e1fa51e441435a73377e Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 11:09:10 -0800
Subject: [PATCH 12/60] Make variable names more descriptive

---
 .../machine_translation/transformer_interface.py       | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
index befef3803f1..a4c4e431959 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -118,7 +118,7 @@ def result_string(self, order=4):
                           self.stat.predlen, self.stat.reflen)
 
 
-def get_bleu_score(args, ref, sys):
+def get_bleu_score(args, reference_sentences_fpath, translated_sentences_fpath):
     dict = dictionary.Dictionary()
     order = 4
     sentence_bleu = False
@@ -133,7 +133,7 @@ def readlines(fd):
 
     if sentence_bleu:
         def score(fdsys):
-            with open(ref) as fdref:
+            with open(reference_sentences_fpath) as fdref:
                 scorer = Scorer(dict.pad(), dict.eos(), dict.unk())
                 for i, (sys_tok, ref_tok) in enumerate(zip(readlines(fdsys), readlines(fdref))):
                     scorer.reset(one_init=True)
@@ -142,7 +142,7 @@ def score(fdsys):
                     scorer.add(ref_tok, sys_tok)
     else:
         def score(fdsys):
-            with open(ref) as fdref:
+            with open(reference_sentences_fpath) as fdref:
                 scorer = Scorer(dict.pad(), dict.eos(), dict.unk())
                 for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
                     sys_tok = dict.encode_line(sys_tok)
@@ -150,10 +150,10 @@ def score(fdsys):
                     scorer.add(ref_tok, sys_tok)
                 return(scorer.score(order))
 
-    if sys == '-':
+    if translated_sentences_fpath == '-':
         score = score(sys.stdin)
     else:
-        with open(sys, 'r') as f:
+        with open(translated_sentences_fpath, 'r') as f:
             score = score(f)
     logger.debug('Achieved BLEU score: {}'.format(score))
     return score

From 1e9b6b8a9f001a18f8a089142a3c8dfad991bc1c Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 11:51:10 -0800
Subject: [PATCH 13/60] Remove duplicate definitions

---
 .../transformer_supernetwork.py                 | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
index af11dbd3720..083a3492c0c 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
@@ -1020,20 +1020,3 @@ def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='line
     if bias:
         nn.init.constant_(m.bias, 0.)
     return m
-
-
-def calc_dropout(dropout, sample_embed_dim, super_embed_dim):
-    return dropout * 1.0 * sample_embed_dim / super_embed_dim
-
-
-def Embedding(num_embeddings, embedding_dim, padding_idx):
-    return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx)
-
-
-def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'):
-    m = nn.Linear(in_features, out_features, bias)
-    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(
-        m.weight, non_linear=non_linear)
-    if bias:
-        nn.init.constant_(m.bias, 0.)
-    return m

From 322c277432fca3b9512157126c3087a7fd02212e Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 11:52:02 -0800
Subject: [PATCH 14/60] Remove unused code

---
 .../transformer_supernetwork.py                  | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
index 083a3492c0c..69b9ad6ed25 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
@@ -2,7 +2,7 @@
 
 import torch
 import torch.nn.functional as F
-from fairseq import options, utils
+from fairseq import utils
 from fairseq.models import (BaseFairseqModel, FairseqEncoder,
                             FairseqIncrementalDecoder)
 from fairseq.modules import PositionalEmbedding, SinusoidalPositionalEmbedding
@@ -377,17 +377,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
         self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \
             if self.super_embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None
 
-        if False:  # args.adaptive_softmax_cutoff is not None:
-            self.adaptive_softmax = AdaptiveSoftmax(
-                len(dictionary),
-                self.output_embed_dim,
-                options.eval_str_list(args.adaptive_softmax_cutoff, type=int),
-                dropout=args.adaptive_softmax_dropout,
-                adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None,
-                factor=args.adaptive_softmax_factor,
-                tie_proj=args.tie_adaptive_proj,
-            )
-        elif not self.share_input_output_embed:
+        if not self.share_input_output_embed:
             self.embed_out = nn.Parameter(torch.Tensor(
                 len(dictionary), self.output_embed_dim))
             nn.init.normal_(self.embed_out, mean=0,
@@ -843,7 +833,7 @@ def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv
         # use layerNorm rather than FusedLayerNorm for exporting.
         # char_inputs can be used to determint this.
         # TODO  remove this once we update apex with the fix
-        export = False
+
         self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim)
 
         if no_encoder_attn:

From abddec7713f9aa00e4dcbcccd6aac5a62fc62fb7 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 11:52:20 -0800
Subject: [PATCH 15/60] Add TODOs

---
 .../machine_translation/transformer_interface.py           | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
index a4c4e431959..d0f700eaa15 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
@@ -24,7 +24,6 @@
 try:
     from fairseq import libbleu
 except ImportError as e:
-    import sys
     logger.error('missing libbleu.so. run `pip install --editable .`')
     raise e
 
@@ -155,7 +154,7 @@ def score(fdsys):
     else:
         with open(translated_sentences_fpath, 'r') as f:
             score = score(f)
-    logger.debug('Achieved BLEU score: {}'.format(score))
+    logger.info('Achieved BLEU score: {}'.format(score))
     return score
 
 
@@ -164,7 +163,7 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     parser = options.get_generation_parser()
 
     args = options.parse_args_and_arch(parser, [dataset_path])
-
+    # TODO(macsz) Un-hardcode args
     args.data = dataset_path
     args.beam = 5
     args.remove_bpe = '@@ '
@@ -240,7 +239,6 @@ def compute_bleu(config, dataset_path, checkpoint_path):
 
     num_sentences = 0
     has_target = True
-    decoder_times_all = []
     input_len_all = []
     with open('translations_out.txt', 'a') as fname_translations:
         with progress_bar.build_progress_bar(args, itr) as t:
@@ -356,6 +354,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
 
     args = options.parse_args_and_arch(parser, [dataset_path])
 
+    # TODO(macsz) Un-hardcode args
     args.data = dataset_path
     args.beam = 5
     args.remove_bpe = '@@ '

From 051ae2626fb6832d6f86ea616b78170084fba4d5 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 11:59:25 -0800
Subject: [PATCH 16/60] Update progress tracking

---
 neural_compressor/experimental/nas/dynas.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index bf8e7482213..af1903bc0f4 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -152,10 +152,11 @@ def search(self):
         # Start Lightweight Iterative Neural Architecture Search (LINAS)
         num_loops = round(self.num_evals/self.population)
         for loop in range(num_loops):
-            logger.info(
-                '[DyNAS-T] Starting LINAS loop {} of {}.'.format(loop+1, num_loops))
 
-            for individual in latest_population:
+            for i, individual in enumerate(latest_population):
+                logger.info(
+                '[DyNAS-T] Starting eval {} of {} in LINAS loop {} of {}.'.format(
+                    i+1, max(self.population - df.shape[0], 0), loop+1, num_loops))
                 self.validation_interface.eval_subnet(individual)
 
             self.create_acc_predictor()

From d70c55e4bfbdbae7493d0b5fe96e6de4d388c2aa Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 12:23:29 -0800
Subject: [PATCH 17/60] Move supernetwork dir under dynast dir

---
 neural_compressor/experimental/nas/dynas.py                   | 4 ----
 neural_compressor/experimental/nas/dynast/dynas_utils.py      | 2 +-
 .../supernetwork/machine_translation/modules_supernetwork.py  | 0
 .../supernetwork/machine_translation/transformer_interface.py | 0
 .../machine_translation/transformer_supernetwork.py           | 2 +-
 5 files changed, 2 insertions(+), 6 deletions(-)
 rename neural_compressor/experimental/nas/{ => dynast}/supernetwork/machine_translation/modules_supernetwork.py (100%)
 rename neural_compressor/experimental/nas/{ => dynast}/supernetwork/machine_translation/transformer_interface.py (100%)
 rename neural_compressor/experimental/nas/{ => dynast}/supernetwork/machine_translation/transformer_supernetwork.py (99%)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index af1903bc0f4..7821bca9e7b 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -112,10 +112,6 @@ def init_for_search(self):
         self.supernet_manager = self.SUPERNET_ENCODING[self.supernet](
             param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed
         )
-        # self.supernet_manager = self.ParameterManager(
-        #    param_dict=self.SUPERNET_PARAMETERS[self.supernet],
-        #    seed=self.seed
-        # )
 
         # Validation High-Fidelity Measurement Runner
         self.runner_validate = self.RUNNERS[self.supernet](
diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index 7940834d49f..a62838dfa2e 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -28,7 +28,7 @@
 from neural_compressor.experimental.nas.dynast.dynas_manager import \
     ParameterManager
 from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor
-from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import (
+from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import (
     compute_bleu, compute_latency)
 from neural_compressor.utils.utility import LazyImport, logger
 from ofa.imagenet_classification.data_providers.imagenet import \
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
similarity index 100%
rename from neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py
rename to neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
similarity index 100%
rename from neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py
rename to neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
similarity index 99%
rename from neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
rename to neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
index 69b9ad6ed25..b0619f62184 100644
--- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
@@ -90,7 +90,7 @@ def get_sampled_params_numel(self, config):
         return sum(numels)
 
     def set_sample_config(self, config):
-        logger.debug('Setting active configuration to {}'.format(config))
+        logger.info('[DyNAS-T] Setting active configuration to {}'.format(config))
         self.encoder.set_sample_config(config)
         self.decoder.set_sample_config(config)
 

From 4a60f47be59752f59b22b6ff48b0b0b708ad5631 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 12:30:37 -0800
Subject: [PATCH 18/60] Update logging and TODos

---
 neural_compressor/experimental/nas/dynas.py              | 6 +++---
 neural_compressor/experimental/nas/dynast/dynas_utils.py | 4 ++--
 .../machine_translation/transformer_interface.py         | 9 +++++----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index 7821bca9e7b..eef3fcdbf20 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -217,7 +217,7 @@ def select_model_arch(self):  # pragma: no cover
 
     def create_acc_predictor(self):
         if 'acc' in self.metrics:
-            logger.info('Building Accuracy Predictor')
+            logger.info('[DyNAS-T] Building Accuracy Predictor')
             df = self.supernet_manager.import_csv(self.results_csv_path,
                                                   config='config',
                                                   objective='acc',
@@ -230,7 +230,7 @@ def create_acc_predictor(self):
 
     def create_macs_predictor(self):
         if 'macs' in self.metrics:
-            logger.info('Building MACs Predictor')
+            logger.info('[DyNAS-T] Building MACs Predictor')
             df = self.supernet_manager.import_csv(self.results_csv_path,
                                                   config='config',
                                                   objective='macs',
@@ -243,7 +243,7 @@ def create_macs_predictor(self):
 
     def create_latency_predictor(self):
         if 'lat' in self.metrics:
-            logger.info('Building Latency Predictor')
+            logger.info('[DyNAS-T] Building Latency Predictor')
             df = self.supernet_manager.import_csv(self.results_csv_path,
                                                   config='config',
                                                   objective='lat',
diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index a62838dfa2e..65b5b478a75 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -227,7 +227,7 @@ def validate_macs(
         model = self.get_subnet(subnet_cfg)
         input_size = (self.batch_size, 3, 224, 224)
         macs = get_macs(model=model, input_size=input_size, device=self.device)
-        logger.info('Model\'s macs: {}'.format(macs))
+        logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs))
         return macs
 
     @torch.no_grad()
@@ -254,7 +254,7 @@ def measure_latency(
             device=self.device,
         )
         logger.info(
-            'Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
+            '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
 
         return latency_mean, latency_std
 
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index d0f700eaa15..940ab7e47f5 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -337,6 +337,7 @@ def compute_bleu(config, dataset_path, checkpoint_path):
                 t.log({'wps': round(wps_meter.avg)})
                 num_sentences += sample['nsentences']
 
+    # TODO(macsz) Try to convert this system call to Python code
     os.system(
         "grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt")
     os.system(
@@ -422,7 +423,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
                 src_tokens=src_tokens_test, src_lengths=src_lengths_test)
 
         encoder_latencies = []
-        logger.info('Measuring encoder for dataset generation...')
+        logger.info('[DyNAS-T] Measuring encoder for dataset generation...')
         for _ in range(args.latiter):
             if args.latgpu:
                 start = time.time()
@@ -443,7 +444,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
         encoder_latencies = encoder_latencies[int(
             args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
         logger.info(
-            f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms')
+            f'[DyNAS-T] Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms')
 
         encoder_out_test_with_beam = model.encoder.reorder_encoder_out(
             encoder_out_test, new_order)
@@ -459,7 +460,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
         decoder_iterations = decoder_iterations_dict['wmt']
         decoder_latencies = []
 
-        logger.info('Measuring decoder for dataset generation...')
+        logger.info('[DyNAS-T] Measuring decoder for dataset generation...')
         for _ in range(args.latiter):
             if args.latgpu:
                 start = time.time()
@@ -483,7 +484,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
             args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
 
     logger.info(
-        f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms')
+        f'[DyNAS-T] Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms')
 
     lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies)
     lat_std = np.std(encoder_latencies)+np.std(decoder_latencies)

From d8866a9d5c73af3c75d6dbc2a664e18e76dca94e Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 12:31:45 -0800
Subject: [PATCH 19/60] Log warning when measuring MACs for transformer LT (not
 supported)

---
 neural_compressor/experimental/nas/dynast/dynas_utils.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index 65b5b478a75..a7a01f8f6a7 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -342,12 +342,9 @@ def validate_macs(
         Returns:
             `macs`
         """
-
-        #model = self.get_subnet(subnet_cfg)
-        #input_size = (self.batch_size, 3, 224, 224)
-        #macs = get_macs(model=model, input_size=input_size, device=self.device)
+        logger.warning('Transformer LT search space does not currently support MACs metric.')
+        # TODO(macsz) Provide fix for MACs measurement for Transformer LT search space.
         macs = 0
-        #logger.info('Model\'s macs: {}'.format(macs))
         return macs
 
     @torch.no_grad()

From 41bebb4f18a677600ad86774ac203b45c6b717a0 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 12:34:09 -0800
Subject: [PATCH 20/60] Update LINAS loop

---
 neural_compressor/experimental/nas/dynas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index eef3fcdbf20..53e18a1afb5 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -152,7 +152,7 @@ def search(self):
             for i, individual in enumerate(latest_population):
                 logger.info(
                 '[DyNAS-T] Starting eval {} of {} in LINAS loop {} of {}.'.format(
-                    i+1, max(self.population - df.shape[0], 0), loop+1, num_loops))
+                    i+1, len(latest_population), loop+1, num_loops))
                 self.validation_interface.eval_subnet(individual)
 
             self.create_acc_predictor()

From e773a55a2681ed34af66b49190e0e2eff1cb0faf Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 12:36:57 -0800
Subject: [PATCH 21/60] Fix error when CSV file does not exist

---
 neural_compressor/experimental/nas/dynas.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index 53e18a1afb5..6b205e4e269 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -140,7 +140,11 @@ def search(self):
 
         # Randomly sample search space for initial population
         # if number of results in results_csv_path smaller than population.
-        # TODO(macsz) Create empty CSV if it does not exists.
+
+        if not os.path.exists(self.results_csv_path):
+            # Clear also creates empty CSV file.
+            self.validation_interface.clear_csv()
+
         df = pd.read_csv(self.results_csv_path)
         latest_population = [self.supernet_manager.random_sample()
                              for _ in range(max(self.population - df.shape[0], 0))]

From aba049551dad4dc6c80aae88cd3a1d3b0e780738 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 18 Nov 2022 12:45:25 -0800
Subject: [PATCH 22/60] Change column names in CSV file for Transformer LT

---
 .../experimental/nas/dynast/dynas_utils.py      | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index a7a01f8f6a7..ef03435426f 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -277,10 +277,6 @@ def get_subnet(
 
 
 class TransformerLTRunner(Runner):
-    """The OFARunner class manages the sub-network selection from the OFA super-network and
-    the validation measurements of the sub-networks. ResNet50, MobileNetV3 w1.0, and MobileNetV3 w1.2
-    are currently supported. Imagenet is required for these super-networks `imagenet-ilsvrc2012`.
-    """
 
     def __init__(
         self,
@@ -354,7 +350,7 @@ def measure_latency(
         warmup_steps: int = None,
         measure_steps: int = None,
     ) -> Tuple[float, float]:
-        """Measure OFA model's latency.
+        """Measure model's latency.
         Args:
             subnet_cfg: sub-network Torch model
         Returns:
@@ -408,7 +404,7 @@ def clear_csv(self) -> None:
             f = open(self.csv_path, "w")
             writer = csv.writer(f)
             result = ['Sub-network', 'Date',
-                      'Latency (ms)', ' MACs', 'Top-1 Acc (%)']
+                      'Latency (ms)', 'MACs', 'Top-1 Acc (%)']
             writer.writerow(result)
             f.close()
 
@@ -601,6 +597,15 @@ def eval_subnet(
         else:
             return sample, macs, -bleu
 
+    def clear_csv(self) -> None:
+        if self.csv_path:
+            f = open(self.csv_path, "w")
+            writer = csv.writer(f)
+            result = ['Sub-network', 'Date',
+                      'Latency (ms)', 'MACs', 'BLEU']
+            writer.writerow(result)
+            f.close()
+
 
 def get_torchvision_model(
     model_name: str,

From ead69995bafcfd1b2ff3faa940404ccf59f6c36a Mon Sep 17 00:00:00 2001
From: "Nittur Sridhar, Sharath" <sharath.nittur.sridhar@intel.com>
Date: Fri, 18 Nov 2022 18:10:28 -0800
Subject: [PATCH 23/60] add macs computation for transformers

---
 .../experimental/nas/dynast/dynas_utils.py    | 11 ++--
 .../modules_supernetwork.py                   |  2 +-
 .../transformer_interface.py                  | 61 +++++++++++++++++++
 .../transformer_supernetwork.py               |  5 ++
 4 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index ef03435426f..7cfe7298b92 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -29,7 +29,7 @@
     ParameterManager
 from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor
 from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import (
-    compute_bleu, compute_latency)
+    compute_bleu, compute_latency, compute_macs)
 from neural_compressor.utils.utility import LazyImport, logger
 from ofa.imagenet_classification.data_providers.imagenet import \
     ImagenetDataProvider
@@ -338,9 +338,10 @@ def validate_macs(
         Returns:
             `macs`
         """
-        logger.warning('Transformer LT search space does not currently support MACs metric.')
-        # TODO(macsz) Provide fix for MACs measurement for Transformer LT search space.
-        macs = 0
+        
+        macs = compute_macs(subnet_cfg, self.dataset_path)
+        logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs))
+
         return macs
 
     @torch.no_grad()
@@ -360,7 +361,7 @@ def measure_latency(
         latency_mean, latency_std = compute_latency(
             subnet_cfg, self.dataset_path)
         logger.info(
-            'Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
+            '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
 
         return latency_mean, latency_std
 
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
index 19a7f8b2b8f..bdb44d25a54 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
@@ -381,7 +381,7 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
             k = self.in_proj_k(key)
             v = self.in_proj_v(value)
 
-        q *= self.scaling
+        q = q * self.scaling
 
         if self.bias_k is not None:
             assert self.bias_v is not None
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 940ab7e47f5..d9a6f07b475 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -17,6 +17,7 @@
 from neural_compressor.utils import logger
 
 from .transformer_supernetwork import TransformerSuperNetwork
+from fvcore.nn import FlopCountAnalysis
 
 warnings.filterwarnings("ignore")
 
@@ -489,3 +490,63 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
     lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies)
     lat_std = np.std(encoder_latencies)+np.std(decoder_latencies)
     return lat_mean, lat_std
+
+
+def compute_macs(config,dataset_path):
+    parser = options.get_generation_parser()
+
+    args = options.parse_args_and_arch(parser,[dataset_path])
+
+    args.data = dataset_path
+    args.beam = 5
+    args.remove_bpe = '@@ '
+    args.gen_subset = 'test'
+    args.lenpen = 0.6
+    args.source_lang = 'en'
+    args.target_lang = 'de'
+    args.batch_size = 128
+    utils.import_user_module(args)
+    max_tokens = 12000
+    args.latgpu=False
+    args.latcpu=True
+    args.latiter=100
+
+    # Initialize CUDA and distributed training
+    if torch.cuda.is_available() and not args.cpu:
+        torch.cuda.set_device(args.device_id)
+    torch.manual_seed(args.seed)
+
+    #Optimize ensemble for generation
+    # Load dataset splits
+    task = tasks.setup_task(args)
+    task.load_dataset(args.gen_subset)
+    # Set dictionaries
+    try:
+        src_dict = getattr(task, 'source_dictionary', None)
+    except NotImplementedError:
+        src_dict = None
+    tgt_dict = task.target_dictionary
+
+    # Load model
+    print('| loading model(s) from {}'.format(args.path))
+    model = TransformerSuperNetwork(task)
+
+    # specify the length of the dummy input for profile
+    # for iwslt, the average length is 23, for wmt, that is 30
+    dummy_sentence_length_dict = {'iwslt': 23, 'wmt': 30}
+
+    dummy_sentence_length = dummy_sentence_length_dict['wmt']
+
+
+    dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1)
+    dummy_prev = [7] * (dummy_sentence_length - 1) + [2]
+
+    model.set_sample_config(config)
+
+    model.profile(mode=True)
+    macs = FlopCountAnalysis(model, (torch.tensor([dummy_src_tokens], dtype=torch.long),
+                           torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long)))
+    macs_tot =  macs.total()
+    model.profile(mode=False)
+
+    return macs_tot
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
index b0619f62184..7890e0605d3 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
@@ -94,6 +94,11 @@ def set_sample_config(self, config):
         self.encoder.set_sample_config(config)
         self.decoder.set_sample_config(config)
 
+    def forward(self,src_tokens,src_lengths,prev_output_token):
+         encoder_output = self.encoder.forward(src_tokens,src_lengths)
+         output = self.decoder(prev_output_token,encoder_output)
+         return output
+
 
 class TransformerEncoder(FairseqEncoder):
     """

From e222eb7c7a339b5fc8c31aeb3ec9c2a4665733ad Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Mon, 21 Nov 2022 12:52:13 -0800
Subject: [PATCH 24/60] Add `batch size` to compute latency for TransformerLT

---
 neural_compressor/experimental/nas/dynast/dynas_utils.py   | 6 ++----
 .../machine_translation/transformer_interface.py           | 7 +++----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index 7cfe7298b92..2fa203fe9a2 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -338,7 +338,7 @@ def validate_macs(
         Returns:
             `macs`
         """
-        
+
         macs = compute_macs(subnet_cfg, self.dataset_path)
         logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs))
 
@@ -348,8 +348,6 @@ def validate_macs(
     def measure_latency(
         self,
         subnet_cfg: dict,
-        warmup_steps: int = None,
-        measure_steps: int = None,
     ) -> Tuple[float, float]:
         """Measure model's latency.
         Args:
@@ -359,7 +357,7 @@ def measure_latency(
         """
 
         latency_mean, latency_std = compute_latency(
-            subnet_cfg, self.dataset_path)
+            subnet_cfg, self.dataset_path, self.batch_size)
         logger.info(
             '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))
 
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index d9a6f07b475..8e38a457152 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -164,7 +164,7 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     parser = options.get_generation_parser()
 
     args = options.parse_args_and_arch(parser, [dataset_path])
-    # TODO(macsz) Un-hardcode args
+
     args.data = dataset_path
     args.beam = 5
     args.remove_bpe = '@@ '
@@ -351,12 +351,11 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     return bleu_score
 
 
-def compute_latency(config, dataset_path, get_model_parameters=False):
+def compute_latency(config, dataset_path, batch_size, get_model_parameters=False):
     parser = options.get_generation_parser()
 
     args = options.parse_args_and_arch(parser, [dataset_path])
 
-    # TODO(macsz) Un-hardcode args
     args.data = dataset_path
     args.beam = 5
     args.remove_bpe = '@@ '
@@ -364,7 +363,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False):
     args.lenpen = 0.6
     args.source_lang = 'en'
     args.target_lang = 'de'
-    args.batch_size = 128
+    args.batch_size = batch_size
     utils.import_user_module(args)
     args.latgpu = False
     args.latcpu = True

From f5b79223cb0210aeddf47828696c7c340cf38188 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Mon, 21 Nov 2022 13:09:48 -0800
Subject: [PATCH 25/60] Remove old cuda calls

---
 .../transformer_interface.py                  | 23 +++++++------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 8e38a457152..99e5fb0bf3e 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -392,21 +392,21 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False
     dummy_prev = [7] * (dummy_sentence_length - 1) + [2]
 
     src_tokens_test = torch.tensor(
-        [dummy_src_tokens], dtype=torch.long)  # .cuda()
-    src_lengths_test = torch.tensor([dummy_sentence_length])  # .cuda()
+        [dummy_src_tokens], dtype=torch.long)
+    src_lengths_test = torch.tensor([dummy_sentence_length])
     prev_output_tokens_test_with_beam = torch.tensor(
-        [dummy_prev] * args.beam, dtype=torch.long)  # .cuda()
+        [dummy_prev] * args.beam, dtype=torch.long)
     bsz = 1
     new_order = torch.arange(bsz).view(-1, 1).repeat(1,
-                                                     args.beam).view(-1).long()  # .cuda()
+                                                     args.beam).view(-1).long()
     if args.latcpu:
         model.cpu()
         logger.info('Measuring model latency on CPU for dataset generation...')
     elif args.latgpu:
         model.cuda()
-        src_tokens_test = src_tokens_test  # .cuda()
-        src_lengths_test = src_lengths_test  # .cuda()
-        prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam  # .cuda()
+        src_tokens_test = src_tokens_test
+        src_lengths_test = src_lengths_test
+        prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam
         logger.info('Measuring model latency on GPU for dataset generation...')
         start = torch.cuda.Event(enable_timing=True)
         end = torch.cuda.Event(enable_timing=True)
@@ -505,7 +505,6 @@ def compute_macs(config,dataset_path):
     args.target_lang = 'de'
     args.batch_size = 128
     utils.import_user_module(args)
-    max_tokens = 12000
     args.latgpu=False
     args.latcpu=True
     args.latiter=100
@@ -519,15 +518,9 @@ def compute_macs(config,dataset_path):
     # Load dataset splits
     task = tasks.setup_task(args)
     task.load_dataset(args.gen_subset)
-    # Set dictionaries
-    try:
-        src_dict = getattr(task, 'source_dictionary', None)
-    except NotImplementedError:
-        src_dict = None
-    tgt_dict = task.target_dictionary
 
     # Load model
-    print('| loading model(s) from {}'.format(args.path))
+    logger.info('[DyNAS-T] loading model(s) from {}'.format(args.path))
     model = TransformerSuperNetwork(task)
 
     # specify the length of the dummy input for profile

From 4ef8f0543bce4aa196e20837c6343263c5743e89 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Tue, 22 Nov 2022 07:45:23 -0800
Subject: [PATCH 26/60] Remove comment

---
 .../supernetwork/machine_translation/modules_supernetwork.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
index bdb44d25a54..8650a024db7 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
@@ -561,7 +561,7 @@ def reorder_incremental_state(self, incremental_state, new_order):
             self._set_input_buffer(incremental_state, input_buffer)
 
     def _get_input_buffer(self, incremental_state):
-        return get_incremental_state(  # utils.
+        return get_incremental_state(
             self,
             incremental_state,
             'attn_state',

From f022e2a90a2e57f62c14ecfbd9280f6bfb3102de Mon Sep 17 00:00:00 2001
From: "Nittur Sridhar, Sharath" <sharath.nittur.sridhar@intel.com>
Date: Sun, 27 Nov 2022 22:19:31 -0800
Subject: [PATCH 27/60] replace bleu with sacrebleu

---
 .../transformer_interface.py                  | 266 ++----------------
 1 file changed, 20 insertions(+), 246 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 99e5fb0bf3e..dac058d24ec 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -13,6 +13,7 @@
 from fairseq import options, progress_bar, tasks, utils
 from fairseq.data import dictionary
 from fairseq.meters import StopwatchMeter, TimeMeter
+from fairseq.data.encoders.moses_tokenizer import MosesTokenizer
 
 from neural_compressor.utils import logger
 
@@ -22,143 +23,6 @@
 warnings.filterwarnings("ignore")
 
 
-try:
-    from fairseq import libbleu
-except ImportError as e:
-    logger.error('missing libbleu.so. run `pip install --editable .`')
-    raise e
-
-
-C = ctypes.cdll.LoadLibrary(libbleu.__file__)
-
-
-class BleuStat(ctypes.Structure):
-    _fields_ = [
-        ('reflen', ctypes.c_size_t),
-        ('predlen', ctypes.c_size_t),
-        ('match1', ctypes.c_size_t),
-        ('count1', ctypes.c_size_t),
-        ('match2', ctypes.c_size_t),
-        ('count2', ctypes.c_size_t),
-        ('match3', ctypes.c_size_t),
-        ('count3', ctypes.c_size_t),
-        ('match4', ctypes.c_size_t),
-        ('count4', ctypes.c_size_t),
-    ]
-
-
-class Scorer(object):
-    def __init__(self, pad, eos, unk):
-        self.stat = BleuStat()
-        self.pad = pad
-        self.eos = eos
-        self.unk = unk
-        self.reset()
-
-    def reset(self, one_init=False):
-        if one_init:
-            C.bleu_one_init(ctypes.byref(self.stat))
-        else:
-            C.bleu_zero_init(ctypes.byref(self.stat))
-
-    def add(self, ref, pred):
-        if not isinstance(ref, torch.IntTensor):
-            raise TypeError('ref must be a torch.IntTensor (got {})'
-                            .format(type(ref)))
-        if not isinstance(pred, torch.IntTensor):
-            raise TypeError('pred must be a torch.IntTensor(got {})'
-                            .format(type(pred)))
-
-        # don't match unknown words
-        rref = ref.clone()
-        assert not rref.lt(0).any()
-        rref[rref.eq(self.unk)] = -999
-
-        rref = rref.contiguous().view(-1)
-        pred = pred.contiguous().view(-1)
-
-        C.bleu_add(
-            ctypes.byref(self.stat),
-            ctypes.c_size_t(rref.size(0)),
-            ctypes.c_void_p(rref.data_ptr()),
-            ctypes.c_size_t(pred.size(0)),
-            ctypes.c_void_p(pred.data_ptr()),
-            ctypes.c_int(self.pad),
-            ctypes.c_int(self.eos))
-
-    def score(self, order=4):
-        psum = sum(math.log(p) if p > 0 else float('-Inf')
-                   for p in self.precision()[:order])
-        return self.brevity() * math.exp(psum / order) * 100
-
-    def precision(self):
-        def ratio(a, b):
-            return a / b if b > 0 else 0
-
-        return [
-            ratio(self.stat.match1, self.stat.count1),
-            ratio(self.stat.match2, self.stat.count2),
-            ratio(self.stat.match3, self.stat.count3),
-            ratio(self.stat.match4, self.stat.count4),
-        ]
-
-    def brevity(self):
-        r = self.stat.reflen / self.stat.predlen
-        return min(1, math.exp(1 - r))
-
-    def result_string(self, order=4):
-        assert order <= 4, "BLEU scores for order > 4 aren't supported"
-        fmt = 'BLEU{} = {:2.2f}, {:2.1f}'
-        for _ in range(1, order):
-            fmt += '/{:2.1f}'
-        fmt += ' (BP={:.3f}, ratio={:.3f}, syslen={}, reflen={})'
-        bleup = [p * 100 for p in self.precision()[:order]]
-        return fmt.format(order, self.score(order=order), *bleup,
-                          self.brevity(), self.stat.predlen/self.stat.reflen,
-                          self.stat.predlen, self.stat.reflen)
-
-
-def get_bleu_score(args, reference_sentences_fpath, translated_sentences_fpath):
-    dict = dictionary.Dictionary()
-    order = 4
-    sentence_bleu = False
-    ignore_case = False
-
-    def readlines(fd):
-        for line in fd.readlines():
-            if ignore_case:
-                yield line.lower()
-            else:
-                yield line
-
-    if sentence_bleu:
-        def score(fdsys):
-            with open(reference_sentences_fpath) as fdref:
-                scorer = Scorer(dict.pad(), dict.eos(), dict.unk())
-                for i, (sys_tok, ref_tok) in enumerate(zip(readlines(fdsys), readlines(fdref))):
-                    scorer.reset(one_init=True)
-                    sys_tok = dict.encode_line(sys_tok)
-                    ref_tok = dict.encode_line(ref_tok)
-                    scorer.add(ref_tok, sys_tok)
-    else:
-        def score(fdsys):
-            with open(reference_sentences_fpath) as fdref:
-                scorer = Scorer(dict.pad(), dict.eos(), dict.unk())
-                for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
-                    sys_tok = dict.encode_line(sys_tok)
-                    ref_tok = dict.encode_line(ref_tok)
-                    scorer.add(ref_tok, sys_tok)
-                return(scorer.score(order))
-
-    if translated_sentences_fpath == '-':
-        score = score(sys.stdin)
-    else:
-        with open(translated_sentences_fpath, 'r') as f:
-            score = score(f)
-    logger.info('Achieved BLEU score: {}'.format(score))
-    return score
-
-
 def compute_bleu(config, dataset_path, checkpoint_path):
 
     parser = options.get_generation_parser()
@@ -173,6 +37,9 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     args.source_lang = 'en'
     args.target_lang = 'de'
     args.batch_size = 128
+    args.eval_bleu_remove_bpe = '@@ '
+    args.eval_bleu_detok = 'moses'
+
     utils.import_user_module(args)
 
     use_cuda = torch.cuda.is_available() and not args.cpu
@@ -187,6 +54,9 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     # Load dataset splits
     task = tasks.setup_task(args)
     task.load_dataset(args.gen_subset)
+
+    tokenizer = MosesTokenizer(args)
+    task.tokenizer=tokenizer
     # Set dictionaries
     try:
         src_dict = getattr(task, 'source_dictionary', None)
@@ -239,115 +109,19 @@ def compute_bleu(config, dataset_path, checkpoint_path):
     generator = task.build_generator([model], args)
 
     num_sentences = 0
-    has_target = True
-    input_len_all = []
-    with open('translations_out.txt', 'a') as fname_translations:
-        with progress_bar.build_progress_bar(args, itr) as t:
-            wps_meter = TimeMeter()
-            for sample in t:
-
-                sample = utils.move_to_cuda(sample) if use_cuda else sample
-                if 'net_input' not in sample:
-                    continue
-
-                prefix_tokens = None
-                if args.prefix_size > 0:
-                    prefix_tokens = sample['target'][:, :args.prefix_size]
-
-                gen_timer.start()
-                hypos = task.inference_step(
-                    generator, [model], sample, prefix_tokens)
-                input_len_all.append(
-                    np.mean(sample['net_input']['src_lengths'].cpu().numpy()))
-                num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos)
-                gen_timer.stop(num_generated_tokens)
-
-                for i, sample_id in enumerate(sample['id'].tolist()):
-                    has_target = sample['target'] is not None
-
-                    # Remove padding
-                    src_tokens = utils.strip_pad(
-                        sample['net_input']['src_tokens'][i, :], tgt_dict.pad())
-                    target_tokens = None
-                    if has_target:
-                        target_tokens = utils.strip_pad(
-                            sample['target'][i, :], tgt_dict.pad()).int().cpu()
-
-                    # Either retrieve the original sentences or regenerate them from tokens.
-                    if align_dict is not None:
-                        src_str = task.dataset(
-                            args.gen_subset).src.get_original_text(sample_id)
-                        target_str = task.dataset(
-                            args.gen_subset).tgt.get_original_text(sample_id)
-                    else:
-                        if src_dict is not None:
-                            src_str = src_dict.string(
-                                src_tokens, args.remove_bpe)
-                        else:
-                            src_str = ""
-                        if has_target:
-                            target_str = tgt_dict.string(
-                                target_tokens, args.remove_bpe, escape_unk=True)
-
-                    if not args.quiet:
-                        if src_dict is not None:
-                            fname_translations.write(
-                                'S-{}\t{}'.format(sample_id, src_str))
-                            fname_translations.write('\n')
-
-                        if has_target:
-                            fname_translations.write(
-                                'T-{}\t{}'.format(sample_id, target_str))
-                            fname_translations.write('\n')
-
-                    # Process top predictions
-                    for j, hypo in enumerate(hypos[i][:args.nbest]):
-                        hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
-                            hypo_tokens=hypo['tokens'].int().cpu(),
-                            src_str=src_str,
-                            alignment=hypo['alignment'].int().cpu(
-                            ) if hypo['alignment'] is not None else None,
-                            align_dict=align_dict,
-                            tgt_dict=tgt_dict,
-                            remove_bpe=args.remove_bpe,
-                        )
-
-                        if not args.quiet:
-
-                            fname_translations.write(
-                                'H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str))
-                            fname_translations.write('\n')
-                            fname_translations.write('P-{}\t{}'.format(
-                                sample_id,
-                                ' '.join(map(
-                                    lambda x: '{:.4f}'.format(x),
-                                    hypo['positional_scores'].tolist(),
-                                ))
-                            ))
-                            fname_translations.write('\n')
-
-                            if args.print_alignment:
-                                fname_translations.write('A-{}\t{}'.format(
-                                    sample_id,
-                                    ' '.join(
-                                        map(lambda x: str(utils.item(x)), alignment))
-                                ))
-                                fname_translations.write('\n')
-
-                wps_meter.update(num_generated_tokens)
-                t.log({'wps': round(wps_meter.avg)})
-                num_sentences += sample['nsentences']
-
-    # TODO(macsz) Try to convert this system call to Python code
-    os.system(
-        "grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt")
-    os.system(
-        "grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt")
-    bleu_score = get_bleu_score(args, "ref.txt", "sys.txt")
-
-    os.remove("ref.txt")
-    os.remove("sys.txt")
-    os.remove("translations_out.txt")
+    bleu_list = []
+    with progress_bar.build_progress_bar(args, itr) as t:
+        for sample in t:
+            sample = utils.move_to_cuda(sample) if use_cuda else sample
+            if 'net_input' not in sample:
+                continue
+
+            bleu = task._inference_with_bleu(generator,sample,model)
+            bleu_list.append(bleu.score)
+
+            num_sentences += sample['nsentences']
+    
+    bleu_score = np.mean(np.array(bleu_list))
     return bleu_score
 
 

From a35901b48afd27f4811d8513d02617f3c51f15cd Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Tue, 29 Nov 2022 11:55:16 -0800
Subject: [PATCH 28/60] Fix problem with `dataset` for OFA

---
 neural_compressor/experimental/nas/dynas.py               | 2 +-
 neural_compressor/experimental/nas/dynast/dynas_utils.py  | 8 +++++---
 .../machine_translation/transformer_interface.py          | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index dc11928c0fb..8430cbab8ea 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -131,7 +131,7 @@ def init_for_search(self):
             latency_predictor=None,
             datasetpath=self.dataset_path,
             batch_size=self.batch_size,
-            checkpoint_path=self.supernet_ckpt_path
+            checkpoint_path=self.supernet_ckpt_path,
         )
 
         # Setup validation interface
diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index 2bf204deab5..e1902a73c93 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -175,8 +175,9 @@ def __init__(
         acc_predictor: Predictor,
         macs_predictor: Predictor,
         latency_predictor: Predictor,
-        imagenetpath: str,
+        datasetpath: str,
         batch_size: int,
+        **kwargs,
     ) -> None:
         """Initialize the attributes."""
         self.supernet = supernet
@@ -185,7 +186,7 @@ def __init__(
         self.latency_predictor = latency_predictor
         self.device = 'cpu'
         self.test_size = None
-        ImagenetDataProvider.DEFAULT_PATH = imagenetpath
+        ImagenetDataProvider.DEFAULT_PATH = datasetpath
         self.ofa_network = ofa.model_zoo.ofa_net(supernet, pretrained=True)
         self.run_config = ImagenetRunConfig(test_batch_size=64, n_worker=20)
         self.batch_size = batch_size
@@ -345,7 +346,8 @@ def __init__(
         latency_predictor: Predictor,
         datasetpath: str,
         batch_size: int,
-        checkpoint_path: str
+        checkpoint_path: str,
+        **kwargs,
     ) -> None:
         self.supernet = supernet
         self.acc_predictor = acc_predictor
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index dac058d24ec..336a26fad47 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -120,7 +120,7 @@ def compute_bleu(config, dataset_path, checkpoint_path):
             bleu_list.append(bleu.score)
 
             num_sentences += sample['nsentences']
-    
+
     bleu_score = np.mean(np.array(bleu_list))
     return bleu_score
 

From 8b3a795e3aeb7f2c31a4e4d02eb1aeb6a0da49f8 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Tue, 29 Nov 2022 12:07:58 -0800
Subject: [PATCH 29/60] Remove unused imports

---
 .../machine_translation/transformer_interface.py         | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 336a26fad47..34ce0a7e96b 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -1,18 +1,13 @@
 """
 Translate pre-processed data with a trained model.
 """
-import ctypes
-import math
-import os
-import sys
 import time
 import warnings
 
 import numpy as np
 import torch
 from fairseq import options, progress_bar, tasks, utils
-from fairseq.data import dictionary
-from fairseq.meters import StopwatchMeter, TimeMeter
+from fairseq.meters import StopwatchMeter
 from fairseq.data.encoders.moses_tokenizer import MosesTokenizer
 
 from neural_compressor.utils import logger
@@ -265,7 +260,7 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False
     return lat_mean, lat_std
 
 
-def compute_macs(config,dataset_path):
+def compute_macs(config, dataset_path):
     parser = options.get_generation_parser()
 
     args = options.parse_args_and_arch(parser,[dataset_path])

From 53f4af78555211443cb9acbb646dab58bd3f1b84 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Tue, 29 Nov 2022 18:33:38 -0800
Subject: [PATCH 30/60] Fix indentation

---
 neural_compressor/experimental/nas/dynast/dynas_predictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
index 0042d1df729..e9b7764d3de 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
@@ -75,7 +75,7 @@ def train(self, examples, labels):
             examples: Examples to be used for training.
             labels: Labels to be used for training.
         """
-                # Compute normalization factor
+        # Compute normalization factor
         max_label = np.amax(np.abs(labels))
         if max_label > 0.0:
             self.normalization_factor = 10 ** (np.floor(np.log10(max_label)) - 1.0)

From d255b68b5982436e8ed699f911ca2462fc782aa5 Mon Sep 17 00:00:00 2001
From: "Nittur Sridhar, Sharath" <sharath.nittur.sridhar@intel.com>
Date: Tue, 29 Nov 2022 20:38:38 -0800
Subject: [PATCH 31/60] replace fvcore with torchprofile to avoid mem leaks

---
 .../machine_translation/transformer_interface.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 34ce0a7e96b..5ad471ec858 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -13,7 +13,7 @@
 from neural_compressor.utils import logger
 
 from .transformer_supernetwork import TransformerSuperNetwork
-from fvcore.nn import FlopCountAnalysis
+import torchprofile
 
 warnings.filterwarnings("ignore")
 
@@ -301,13 +301,13 @@ def compute_macs(config, dataset_path):
 
     dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1)
     dummy_prev = [7] * (dummy_sentence_length - 1) + [2]
-
-    model.set_sample_config(config)
-
+    
+    model.eval()
     model.profile(mode=True)
-    macs = FlopCountAnalysis(model, (torch.tensor([dummy_src_tokens], dtype=torch.long),
-                           torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long)))
-    macs_tot =  macs.total()
+    model.set_sample_config(config)
+    macs = torchprofile.profile_macs(model, args=(torch.tensor([dummy_src_tokens], dtype=torch.long),
+                                   torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long)))
+    
     model.profile(mode=False)
 
-    return macs_tot
+    return macs

From bb2a53aaa6e587e598fc99b48ceb6f6ddd259375 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Wed, 30 Nov 2022 11:24:34 -0800
Subject: [PATCH 32/60] Add DyNAS Transformer LT example

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 ..._Supernet_NAS.ipynb => Supernet_NAS.ipynb} | 53 ++++++++++++++-----
 1 file changed, 40 insertions(+), 13 deletions(-)
 rename examples/notebook/dynas/{MobileNetV3_Supernet_NAS.ipynb => Supernet_NAS.ipynb} (95%)

diff --git a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb b/examples/notebook/dynas/Supernet_NAS.ipynb
similarity index 95%
rename from examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb
rename to examples/notebook/dynas/Supernet_NAS.ipynb
index 4fdbc291284..ffe71eaa4b1 100644
--- a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb
+++ b/examples/notebook/dynas/Supernet_NAS.ipynb
@@ -13,7 +13,7 @@
     "\n",
     "#### Super-Networks\n",
     "\n",
-    "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification task on ImageNet-ilsvrc2012.\n",
+    "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n",
     "\n",
     "#### Methodology\n",
     "\n",
@@ -29,23 +29,25 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Prerequisites"
+    "## Prerequisites\n",
+    "\n",
+    "For released version of Neural Compressor:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2"
+    "!pip install -q neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Import Packages"
+    "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:"
    ]
   },
   {
@@ -53,6 +55,24 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "# import sys\n",
+    "# sys.path.insert(0,'<path to neural-compressor>')\n",
+    "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import Packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from neural_compressor.conf.config import NASConfig\n",
     "from neural_compressor.experimental.nas import NAS\n",
@@ -72,7 +92,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -84,12 +104,16 @@
    "metadata": {},
    "source": [
     "### Define Architecture\n",
-    "We currently leverage pre-trained Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path. \n",
+    "We currently support pre-trained super-networks:\n",
+    "\n",
+    "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n",
+    "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n",
     "\n",
     "Super-network options (choose 1): \n",
     "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n",
     "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n",
-    "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures.  "
+    "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures.  \n",
+    "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]."
    ]
   },
   {
@@ -113,7 +137,7 @@
     "* `['acc', 'lat']` \n",
     "\n",
     "Description:\n",
-    "* `'acc'` - ImageNet Top-1 Accuracy (%)\n",
+    "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n",
     "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n",
     "* `'lat'` - Latency (inference time) measurement (ms)"
    ]
@@ -137,7 +161,8 @@
     "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n",
     "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n",
     "* `config.dynas.batch_size` - Batch size used during latency measurements.\n",
-    "* `config.dynas.dataset_path` - Path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php"
+    "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n",
+    "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)."
    ]
   },
   {
@@ -272,8 +297,10 @@
     "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791.   \n",
     "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n",
     "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358.   \n",
-    "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778.   \n",
-    "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324.    "
+    "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778.  \n",
+    "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324.    \n",
+    "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187.    \n",
+    "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30."
    ]
   },
   {
@@ -300,7 +327,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.7.11"
   }
  },
  "nbformat": 4,

From 1cd635fd6f848c7928f0c223f9b1e56d8771ae6c Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Wed, 30 Nov 2022 11:25:40 -0800
Subject: [PATCH 33/60] Code cleanup

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 neural_compressor/experimental/nas/dynas.py               | 6 ++++--
 .../experimental/nas/dynast/dynas_predictor.py            | 2 --
 .../machine_translation/transformer_interface.py          | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py
index 8430cbab8ea..3c63e19d27f 100644
--- a/neural_compressor/experimental/nas/dynas.py
+++ b/neural_compressor/experimental/nas/dynas.py
@@ -37,7 +37,8 @@ class DyNAS(NASBase):
 
     def __init__(self, conf_fname_or_obj):
         """Initialize the attributes."""
-        from .dynast.dynas_manager import ParameterManager, TransformerLTEncoding
+        from .dynast.dynas_manager import (ParameterManager,
+                                           TransformerLTEncoding)
         from .dynast.dynas_predictor import Predictor
         from .dynast.dynas_search import (ProblemMultiObjective,
                                           SearchAlgoManager)
@@ -121,7 +122,8 @@ def estimate(self, individual):
     def init_for_search(self):
         """Initialize the search configuration."""
         self.supernet_manager = self.SUPERNET_ENCODING[self.supernet](
-            param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed)
+            param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed
+        )
 
         # Validation High-Fidelity Measurement Runner
         self.runner_validate = self.RUNNERS[self.supernet](
diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
index e9b7764d3de..477e4fcf7ca 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
@@ -70,7 +70,6 @@ def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS,
 
     def train(self, examples, labels):
         """Train the predictor on the specified examples and labels using the underlying regressor.
-
         Args:
             examples: Examples to be used for training.
             labels: Labels to be used for training.
@@ -94,7 +93,6 @@ def train(self, examples, labels):
         # Determine index of best searcher
         self.best_index = np.argmax(scores)
 
-
     def predict(self, examples):
         """Predict the output values of the specified examples using the underlying regressor.
 
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 5ad471ec858..cffd80246cd 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -6,14 +6,14 @@
 
 import numpy as np
 import torch
+import torchprofile
 from fairseq import options, progress_bar, tasks, utils
-from fairseq.meters import StopwatchMeter
 from fairseq.data.encoders.moses_tokenizer import MosesTokenizer
+from fairseq.meters import StopwatchMeter
 
 from neural_compressor.utils import logger
 
 from .transformer_supernetwork import TransformerSuperNetwork
-import torchprofile
 
 warnings.filterwarnings("ignore")
 
@@ -301,13 +301,13 @@ def compute_macs(config, dataset_path):
 
     dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1)
     dummy_prev = [7] * (dummy_sentence_length - 1) + [2]
-    
+
     model.eval()
     model.profile(mode=True)
     model.set_sample_config(config)
     macs = torchprofile.profile_macs(model, args=(torch.tensor([dummy_src_tokens], dtype=torch.long),
                                    torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long)))
-    
+
     model.profile(mode=False)
 
     return macs

From 1c6ddf68c059dafb67dd0cdbb6bf3dc3a2143963 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Wed, 30 Nov 2022 15:04:45 -0800
Subject: [PATCH 34/60] Add fairseq and HAT license

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../supernetwork/machine_translation/LICENSE  | 51 +++++++++++++++++++
 .../modules_supernetwork.py                   |  3 ++
 .../transformer_interface.py                  |  3 ++
 .../transformer_supernetwork.py               |  3 ++
 4 files changed, 60 insertions(+)
 create mode 100644 neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE
new file mode 100644
index 00000000000..4c15682134a
--- /dev/null
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE
@@ -0,0 +1,51 @@
+MIT License
+------------ LICENSE For Hardware-Aware Transformer software ---------------
+Copyright (c) 2020, Hanrui Wang, Zhanghao Wu, Zhijian Liu, Han Cai,
+Ligeng Zhu, Chuang Gan and Song Han
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+------------------------- LICENSE FOR Fairseq ------------------------------
+MIT License
+
+Copyright (c) Facebook, Inc. and its affiliates.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
index 8650a024db7..7ffac3351ab 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
@@ -1,3 +1,6 @@
+# Part of this source code is licensed under the MIT license found in the
+# LICENSE file in the same directory as this file.
+
 from collections import defaultdict
 
 import torch
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index cffd80246cd..4ebfb7e215e 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -1,3 +1,6 @@
+# Part of this source code is licensed under the MIT license found in the
+# LICENSE file in the same directory as this file.
+
 """
 Translate pre-processed data with a trained model.
 """
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
index 7890e0605d3..ee6b7b3a861 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
@@ -1,3 +1,6 @@
+# Part of this source code is licensed under the MIT license found in the
+# LICENSE file in the same directory as this file.
+
 import math
 
 import torch

From 3afbcc74d8bf89051dd06304a1983db3c99fecdb Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Tue, 6 Dec 2022 03:34:58 -0800
Subject: [PATCH 35/60] Update license headers in transformer t supernet code

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../supernetwork/machine_translation/LICENSE  | 51 -------------------
 .../modules_supernetwork.py                   | 17 ++++++-
 .../transformer_interface.py                  | 17 ++++++-
 .../transformer_supernetwork.py               | 17 ++++++-
 4 files changed, 45 insertions(+), 57 deletions(-)
 delete mode 100644 neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE
deleted file mode 100644
index 4c15682134a..00000000000
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE
+++ /dev/null
@@ -1,51 +0,0 @@
-MIT License
------------- LICENSE For Hardware-Aware Transformer software ---------------
-Copyright (c) 2020, Hanrui Wang, Zhanghao Wu, Zhijian Liu, Han Cai,
-Ligeng Zhu, Chuang Gan and Song Han
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-------------------------- LICENSE FOR Fairseq ------------------------------
-MIT License
-
-Copyright (c) Facebook, Inc. and its affiliates.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
index 7ffac3351ab..4c750e45c5c 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
@@ -1,5 +1,18 @@
-# Part of this source code is licensed under the MIT license found in the
-# LICENSE file in the same directory as this file.
+# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from collections import defaultdict
 
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 4ebfb7e215e..7ca6532d17d 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -1,5 +1,18 @@
-# Part of this source code is licensed under the MIT license found in the
-# LICENSE file in the same directory as this file.
+# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 """
 Translate pre-processed data with a trained model.
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
index ee6b7b3a861..6ccc8b9c4a2 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
@@ -1,5 +1,18 @@
-# Part of this source code is licensed under the MIT license found in the
-# LICENSE file in the same directory as this file.
+# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 import math
 

From 2ea3572c001ec9f5c3062a3bf14ed57096036271 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Wed, 7 Dec 2022 14:34:45 -0800
Subject: [PATCH 36/60] Revert "Add DyNAS Transformer LT example"

This reverts commit bb2a53aaa6e587e598fc99b48ceb6f6ddd259375.
---
 ...S.ipynb => MobileNetV3_Supernet_NAS.ipynb} | 53 +++++--------------
 1 file changed, 13 insertions(+), 40 deletions(-)
 rename examples/notebook/dynas/{Supernet_NAS.ipynb => MobileNetV3_Supernet_NAS.ipynb} (95%)

diff --git a/examples/notebook/dynas/Supernet_NAS.ipynb b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb
similarity index 95%
rename from examples/notebook/dynas/Supernet_NAS.ipynb
rename to examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb
index ffe71eaa4b1..4fdbc291284 100644
--- a/examples/notebook/dynas/Supernet_NAS.ipynb
+++ b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb
@@ -13,7 +13,7 @@
     "\n",
     "#### Super-Networks\n",
     "\n",
-    "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n",
+    "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification task on ImageNet-ilsvrc2012.\n",
     "\n",
     "#### Methodology\n",
     "\n",
@@ -29,36 +29,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Prerequisites\n",
-    "\n",
-    "For released version of Neural Compressor:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install -q neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:"
+    "## Prerequisites"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# import sys\n",
-    "# sys.path.insert(0,'<path to neural-compressor>')\n",
-    "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2"
+    "!pip install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2"
    ]
   },
   {
@@ -70,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -92,7 +72,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -104,16 +84,12 @@
    "metadata": {},
    "source": [
     "### Define Architecture\n",
-    "We currently support pre-trained super-networks:\n",
-    "\n",
-    "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n",
-    "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n",
+    "We currently leverage pre-trained Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path. \n",
     "\n",
     "Super-network options (choose 1): \n",
     "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n",
     "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n",
-    "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures.  \n",
-    "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]."
+    "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures.  "
    ]
   },
   {
@@ -137,7 +113,7 @@
     "* `['acc', 'lat']` \n",
     "\n",
     "Description:\n",
-    "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n",
+    "* `'acc'` - ImageNet Top-1 Accuracy (%)\n",
     "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n",
     "* `'lat'` - Latency (inference time) measurement (ms)"
    ]
@@ -161,8 +137,7 @@
     "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n",
     "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n",
     "* `config.dynas.batch_size` - Batch size used during latency measurements.\n",
-    "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n",
-    "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)."
+    "* `config.dynas.dataset_path` - Path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php"
    ]
   },
   {
@@ -297,10 +272,8 @@
     "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791.   \n",
     "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n",
     "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358.   \n",
-    "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778.  \n",
-    "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324.    \n",
-    "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187.    \n",
-    "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30."
+    "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778.   \n",
+    "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324.    "
    ]
   },
   {
@@ -327,7 +300,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,

From 6dba11432aac7d2c20672a4d6f3d8e77d6a5440b Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Thu, 8 Dec 2022 02:30:33 -0800
Subject: [PATCH 37/60] Add example results for Transformer LT search space

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../dynas/results_transformerlt_macs.csv      | 501 ++++++++++++++++++
 1 file changed, 501 insertions(+)
 create mode 100644 examples/notebook/dynas/results_transformerlt_macs.csv

diff --git a/examples/notebook/dynas/results_transformerlt_macs.csv b/examples/notebook/dynas/results_transformerlt_macs.csv
new file mode 100644
index 00000000000..326d9894762
--- /dev/null
+++ b/examples/notebook/dynas/results_transformerlt_macs.csv
@@ -0,0 +1,501 @@
+Sub-network,Date,Latency (ms),MACs,BLEU
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, -1, -1]}",2022-11-29 22:54:58.796773,0,1397702484,23.35221720436182
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 2048, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, 2, 2, -1]}",2022-11-29 22:55:36.708362,0,2117790828,25.699488742308187
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, 1]}",2022-11-29 22:56:06.143948,0,1700582490,25.0628359775166
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 2, -1, 1, -1]}",2022-11-29 22:56:40.372306,0,1593972576,25.51774692114225
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 1024, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-29 22:57:09.483908,0,1234590804,22.56186718543443
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 3072, 2048, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 1, -1, 1]}",2022-11-29 22:57:47.479253,0,2320469868,26.46877217919795
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 3072, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 1, 1, 1, -1]}",2022-11-29 22:58:16.629295,0,1269811290,24.64774544301779
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 2048, 3072, 3072], 'decoder_ffn_embed_dim': [3072, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, -1, 1, -1, 2]}",2022-11-29 22:58:57.917629,0,2481530994,26.07415311884126
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 1024, 1024, 1024, 2048], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 1024, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 2, 1, 1, -1]}",2022-11-29 22:59:26.905633,0,1319024724,22.493311676649537
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 1024, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 1, 2, 2, 1]}",2022-11-29 23:00:07.328829,0,1880709234,26.00344571579533
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 1024, 2048, 2048, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, 2, -1, 1]}",2022-11-29 23:00:38.792088,0,1671939936,25.692425623480723
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 2048, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 2, -1, 1, -1]}",2022-11-29 23:01:14.719074,0,1804297062,26.07342689295033
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 2, 1, 2, -1]}",2022-11-29 23:01:56.128203,0,2350798194,26.332192395799687
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, -1, 1, -1, 1]}",2022-11-29 23:02:26.540354,0,1397483610,25.69929087830039
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, 1, 1, 2]}",2022-11-29 23:03:07.262003,0,2119699314,26.35980541802738
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, -1, 2, -1, -1]}",2022-11-29 23:03:35.089457,0,1110604884,22.97494000005183
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 2048], 'decoder_ffn_embed_dim': [3072, 3072, 1024, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 1, -1, 1, 2]}",2022-11-29 23:04:06.642167,0,1801651290,25.757473996484833
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 3072, 1024, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 2, 2, -1, -1, -1]}",2022-11-29 23:04:41.625679,0,1888961382,25.85426108217189
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, 1, 1, 1]}",2022-11-29 23:05:11.833000,0,1490960730,25.63143521434478
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 3072, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, 1, 1, 2]}",2022-11-29 23:05:41.444763,0,1364183130,25.072061221515387
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, 1, 1, -1]}",2022-11-29 23:06:20.219204,0,2281236594,26.08920225424034
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 1024, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 2, 1, -1, -1]}",2022-11-29 23:06:54.484344,0,1688332896,25.54971935098368
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 1024, 3072, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, 1, 1, 2]}",2022-11-29 23:07:31.477666,0,1540039776,25.66937359699742
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, 2, 1, 1]}",2022-11-29 23:08:00.927883,0,1543246170,25.23650526106691
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 1024, 3072, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 2, 2, -1, 2]}",2022-11-29 23:08:37.748235,0,1840608102,25.84950449942653
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:09:14.377541,0,1662908256,25.748175360241753
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 1024, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 2, 1, -1]}",2022-11-29 23:09:53.068127,0,2074525548,26.02259252150837
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, -1, 2, 2]}",2022-11-29 23:10:31.871226,0,2061818988,26.071718195164653
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, -1, 2, 1, 2]}",2022-11-29 23:11:01.734997,0,1412290650,25.399141175298542
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, -1, -1, 1]}",2022-11-29 23:11:40.315729,0,1971394674,26.21330617046487
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 1, 1, -1]}",2022-11-29 23:12:16.427543,0,1830900582,26.184771020867597
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [3072, 3072, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 1, 2, 2, 1]}",2022-11-29 23:12:57.379424,0,2421538668,26.175261088262666
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 2, 1, -1]}",2022-11-29 23:13:25.381877,0,1251290964,23.320679652947288
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 1]}",2022-11-29 23:13:58.935098,0,1588224102,25.972021275557776
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 2]}",2022-11-29 23:14:35.478235,0,1960738668,26.14494989795422
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, 2]}",2022-11-29 23:15:03.906597,0,1419452244,22.7811520313731
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 2, 2]}",2022-11-29 23:15:33.346144,0,1316075610,24.898481627702125
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 1, 2]}",2022-11-29 23:16:12.674460,0,1893596268,26.420978678385804
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, 2, -1]}",2022-11-29 23:16:45.186860,0,1642325856,25.982442735663543
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 1024, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 2, 2, 2, 2]}",2022-11-29 23:17:21.824602,0,1856336742,25.94442144683277
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 3072, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, 1]}",2022-11-29 23:17:58.260936,0,2023653228,26.07457768169323
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 2048], 'decoder_ffn_embed_dim': [3072, 1024, 3072, 3072, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, -1, 2, 2]}",2022-11-29 23:18:27.089691,0,1582529364,23.950252879196924
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 1, 2, -1, 2, -1]}",2022-11-29 23:19:02.726800,0,2035372902,26.447028779186226
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 3072, 2048, 2048, 3072], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 2, 2, 2, 2, 2]}",2022-11-29 23:19:39.439886,0,1945854822,26.003986822056245
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, 1, 1]}",2022-11-29 23:20:17.584270,0,2299887468,26.442901941442834
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 1024, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 2]}",2022-11-29 23:20:58.503235,0,2382305394,26.517600251211515
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 2, -1, 1, -1]}",2022-11-29 23:21:34.739346,0,1693274982,25.604765879724265
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 3072, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 2, 2, 2, 1]}",2022-11-29 23:22:15.291570,0,2189491308,25.71548559680124
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, 1, -1, 2]}",2022-11-29 23:22:53.173666,0,2225126508,26.022116504070834
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 2, 2, 1, 1, 1]}",2022-11-29 23:23:30.358756,0,1890804582,25.70580338518658
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 1, -1]}",2022-11-29 23:24:17.798475,0,1156869204,23.410008497520735
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:24:59.431502,0,2226789234,26.12743322887944
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 2, -1]}",2022-11-29 23:25:28.533368,0,1253161050,24.92578691671575
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:25:56.800152,0,1173519444,23.28780146013261
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:26:33.876814,0,1510425696,25.417654573154596
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:27:12.966148,0,1784417388,25.760464304216683
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:27:50.697432,0,1478968416,25.19809949508387
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:28:21.688106,0,1334569050,24.839033226584537
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:28:58.223688,0,1765847142,25.798742018362613
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:29:28.855348,0,1334569050,24.797771375743167
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:30:10.461923,0,1943623794,25.731563707029
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:30:51.410770,0,2006538354,25.93412075285396
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:31:27.644610,0,1640018022,25.48925301539262
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:31:58.151027,0,1303111770,24.636635975207156
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:32:34.035788,0,1623367782,25.55649399276896
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:33:09.644672,0,1591910502,25.363108576481086
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:33:50.627277,0,1943623794,25.471976693004432
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:34:27.530520,0,1734389862,26.022537181002058
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:35:08.733562,0,2148146034,25.76800258460572
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:35:48.076930,0,1784417388,25.746269422993464
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:36:18.846293,0,1397483610,24.957529704052245
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:37:00.512837,0,2226789234,26.00635517076823
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:37:41.235777,0,1912166514,25.566969946150067
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:38:18.262099,0,1541882976,25.33456038818163
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:38:57.094675,0,1815874668,25.598195468771692
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:39:33.146325,0,1640018022,25.552118216389637
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:40:09.706551,0,1462318176,25.29731961246495
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, -1]}",2022-11-29 23:40:38.756573,0,1253161050,24.86012555534481
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 2, -1]}",2022-11-29 23:41:07.530840,0,1253161050,24.9440878831812
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, 2, 1, -1]}",2022-11-29 23:41:34.610233,0,1156869204,23.274975491818346
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:42:01.811817,0,1173519444,23.245718341488995
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:42:37.425300,0,1591910502,25.413674094921433
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:43:13.878058,0,1608560742,25.365752658832324
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:43:50.046834,0,1608560742,25.36745967365502
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 1, -1]}",2022-11-29 23:44:18.851277,0,1253161050,25.005251094503805
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:44:46.110775,0,1156869204,23.277213964898888
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 2, -1]}",2022-11-29 23:45:14.844805,0,1253161050,24.834074772858695
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:45:42.403812,0,1156869204,23.178044546083612
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:46:23.812450,0,2116688754,25.77854866366567
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:47:03.270121,0,1815874668,25.683722427952674
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 1, -1]}",2022-11-29 23:47:43.427381,0,1926973554,25.6886566287921
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:48:14.256367,0,1303111770,24.786229852100394
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:48:50.842520,0,1702932582,25.571343061345555
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:49:31.529598,0,1975081074,25.722542126362086
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:50:13.472772,0,2163874674,26.02132010597597
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:50:49.447197,0,1671475302,25.60807605923095
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:51:20.051939,0,1303111770,24.844209249533108
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:52:01.725725,0,2116688754,25.774689748379366
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:52:43.523756,0,2037995634,25.839133901981196
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:53:22.095038,0,1573340256,25.369471344799734
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-29 23:54:11.017282,0,982932564,21.75634266526977
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:54:50.536084,0,2047077234,26.357977252559444
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, -1]}",2022-11-29 23:55:18.749444,0,1079224410,23.671710345039983
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:55:45.598303,0,1062497364,22.630106123134603
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:56:23.927095,0,1799301234,26.237657577301754
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:57:02.821107,0,1799301234,26.235752390680105
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:57:41.722481,0,1976298354,26.409237334320427
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:58:20.422891,0,1862215794,26.41037129035317
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-29 23:58:51.406394,0,1334645856,25.47756540184132
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:59:24.731374,0,1479045222,25.751217013602062
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-29 23:59:57.442326,0,1462394982,25.872011121812324
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:00:30.249710,0,1399480422,25.17096109375348
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:01:04.136585,0,1573417062,26.100985377959983
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:01:43.420676,0,2015619954,26.590522766056612
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:02:22.331936,0,1830758514,26.2075294199728
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:02:51.964958,0,1269811290,25.475704676620662
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:03:29.946063,0,1751193714,26.105643502743355
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:04:07.937498,0,1751193714,25.93245158011976
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:04:37.760555,0,1206896730,24.91733058699306
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:05:12.938706,0,1541959782,25.94168073173522
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:05:50.741638,0,1719736434,26.03682929942318
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:06:25.032545,0,1447587942,25.535328515473058
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:06:58.207200,0,1319838816,25.37435112751941
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:07:37.942726,0,2007755634,26.51729964072713
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:08:09.189292,0,1271731296,25.166134982432663
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:08:43.141792,0,1416130662,25.449204232481016
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:09:11.020741,0,999582804,21.951713272811833
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:09:40.649180,0,1238354010,24.98037680186457
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:10:13.227827,0,1303188576,25.448120268873193
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:10:43.351246,0,1175439450,24.62386693490815
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:11:23.467151,0,2007755634,26.232304249661833
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:11:51.316006,0,1031040084,22.17006946878601
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:12:21.453576,0,1238354010,24.98730164778759
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:12:51.154193,0,1175439450,24.370328478496752
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:13:21.253989,0,1143982170,24.242154307788027
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:13:48.722881,0,1062497364,22.741194138868078
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:14:18.241161,0,1127331930,24.234335725130748
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:14:57.044258,0,1830758514,26.44666962861665
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:15:32.368557,0,1541959782,26.050701171511186
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:16:01.654068,0,1221703770,24.996951152986856
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:16:33.903998,0,1288381536,25.159811999467085
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:17:07.162316,0,1399480422,25.443390153441023
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:17:35.512007,0,982932564,21.871299717957186
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:18:09.340609,0,1416130662,25.447142262784027
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:18:37.410682,0,1045847124,22.47970882205465
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:19:06.749493,0,1095874650,23.814093767809908
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:19:39.893348,0,1319838816,25.478955124339844
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:20:09.251167,0,1095874650,23.750972433853825
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:20:38.348608,0,1127331930,23.993329632521927
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:21:06.810591,0,1014389844,22.26199233258481
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 1, 1]}",2022-11-30 00:22:10.464888,0,2500270194,26.490267758415033
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 2]}",2022-11-30 00:22:38.706298,0,1077304404,23.03639520261316
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 2]}",2022-11-30 00:23:08.174166,0,1142138970,24.176833511635046
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:23:37.202278,0,1156869204,23.451597618181914
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:24:12.037672,0,1894982502,26.570692923549505
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:24:48.692945,0,1795538028,26.656160779307765
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:25:18.655715,0,1221703770,24.876611482664103
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:25:53.952187,0,1571573862,26.243286807849493
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 2]}",2022-11-30 00:26:28.198620,0,1397560416,25.575803459047332
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:27:03.351000,0,1619681382,26.29731327668079
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, -1, 1]}",2022-11-30 00:27:32.454206,0,1205053530,24.652752198325597
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:28:00.103253,0,1125411924,23.2436921421545
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:28:38.079240,0,2318626668,26.646036771408653
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:29:17.534918,0,2097273714,26.452370488677506
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:29:52.746931,0,1651138662,26.151231823189608
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:30:22.172110,0,1284618330,25.49140283243417
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:30:52.032834,0,1284618330,25.483324803555185
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:31:26.157137,0,1682595942,26.243800774630134
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:32:02.794238,0,1412367456,25.73672237614388
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:32:38.498129,0,1412367456,25.694815754009024
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 2]}",2022-11-30 00:33:14.792750,0,1460474976,25.87514706229428
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:33:55.738731,0,2419783794,26.642604563885367
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:34:35.635995,0,2168052594,26.669695273057105
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:35:13.689253,0,2015788908,26.70123813916216
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:35:50.004608,0,1460474976,25.829650262816568
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:36:19.755425,0,1221703770,24.917846107035597
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:36:48.610538,0,1093954644,23.08549976883442
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:37:23.140196,0,1571573862,26.495457209152665
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:37:58.602077,0,1349452896,25.103371841310192
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:38:34.600291,0,1523389536,26.028214026012492
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:39:10.001278,0,1349452896,24.972561656938478
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:39:44.443816,0,1714053222,26.16022517555216
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:40:21.100754,0,1491932256,25.983857779417527
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:41:01.553620,0,2459105394,26.362258167185217
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:41:41.438856,0,2136595314,26.728063906094565
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:42:09.810325,0,1093954644,22.9713116425012
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:42:38.643170,0,1156869204,23.4523958088576
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:43:07.305868,0,1093954644,23.166873916592454
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:43:36.751041,0,1316075610,25.51338789773949
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:44:07.237775,0,1253161050,25.004771948649424
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:44:43.589272,0,1747430508,26.62802459531885
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 1]}",2022-11-30 00:45:13.240897,0,1142138970,24.356415715383232
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:45:43.655137,0,1301268570,25.49676417511692
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 2]}",2022-11-30 00:46:13.344754,0,1142138970,24.224366625169438
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:46:50.784832,0,1945010028,26.612602269295685
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:47:21.162395,0,1238354010,24.912296082302873
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:47:50.863610,0,1190246490,24.44551868443464
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:48:20.655219,0,1190246490,24.332970362633144
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:48:57.884646,0,1984331628,26.656284353019814
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:49:36.098176,0,1976467308,26.4326896541673
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:50:27.405530,0,982932564,21.902795709633452
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:51:08.032064,0,2180770674,26.599782674768583
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:51:35.807356,0,1062497364,22.371131012623422
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, 2, 1, 2]}",2022-11-30 00:52:05.763177,0,1079224410,23.52409715160256
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:52:39.045430,0,1571573862,26.256831208948178
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:53:12.642643,0,1508659302,26.00462272595473
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:53:42.368319,0,1158789210,24.50401996100462
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:54:12.059480,0,1127331930,24.087230950865788
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:54:51.957601,0,1956587634,26.41692984127425
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:55:29.312918,0,1826995308,26.542101655879033
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:56:07.198728,0,2007924588,26.616730692591272
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:56:48.032095,0,2149313394,26.572361617382416
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:57:18.152332,0,1095874650,23.782891130205417
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:57:47.584518,0,1173596250,24.547205997391597
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:58:17.345898,0,1110681690,23.94407626548769
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:58:47.056374,0,1190246490,24.491800170896195
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:59:16.745616,0,1253161050,25.254425153820556
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:59:47.189514,0,1236510810,25.16579743713827
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:00:16.840252,0,1347532890,25.676132056093994
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:00:46.759507,0,1221703770,24.768354572582403
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:01:21.302832,0,1412367456,25.746119527915003
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:01:55.146488,0,1634488422,26.23053149772474
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:02:23.005266,0,1062497364,22.439008935256105
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:02:59.282910,0,1778887788,26.56040601873973
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:03:36.281891,0,1826995308,26.583100181248412
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:04:12.687089,0,1715973228,26.303092956441024
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:04:48.509486,0,1475282016,25.672530277500815
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:05:25.099950,0,1747430508,26.33985155284849
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:05:53.329815,0,1031040084,22.262405116542624
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:06:30.890827,0,1976467308,26.580438511490023
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:00.560411,0,1079224410,23.728343099659895
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:28.617193,0,999582804,21.72906013722468
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:57.530176,0,1014389844,22.471135438983048
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:08:32.633405,0,1682595942,26.278826842615302
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:09:03.054659,0,1284618330,25.50001897765904
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:09:31.687974,0,999582804,21.967989901189092
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:10:06.993955,0,1506739296,25.812328256697807
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:10:36.948650,0,1284618330,25.440948757479756
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:11:11.600524,0,1380910176,25.703810906558953
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:11:46.811777,0,1380910176,25.597256331992767
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:12:14.807619,0,982932564,21.644880427454428
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:12:45.339890,0,1378990170,25.719053744735888
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:13:19.554133,0,1651138662,26.296467387644455
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:13:55.039207,0,1443824736,25.92682154046769
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:14:25.339862,0,1378990170,25.618097500353073
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:14:58.311883,0,1443824736,26.05836967026129
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:15:35.880349,0,1826995308,26.665846653894516
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:16:05.964065,0,1316075610,25.603591515089885
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:16:35.543255,0,1316075610,25.726771790425637
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:17:09.354304,0,1588224102,26.44933722401916
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:18:13.173994,0,2136595314,26.65406908362065
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:18:40.903167,0,982932564,21.753935985565093
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:19:09.095827,0,1077304404,22.592892971887863
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, -1]}",2022-11-30 01:19:38.452316,0,1079224410,23.708705837824187
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:20:15.347627,0,1651138662,26.467237911633077
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:20:53.408627,0,2015788908,26.720937123019265
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:21:30.489630,0,1732623468,26.539145946856273
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:22:04.657299,0,1588224102,26.359690429140286
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:22:41.410352,0,1795538028,26.7016667761388
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:23:11.191925,0,1045847124,22.30727022285813
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:23:48.495707,0,1945010028,26.92853123605158
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:24:18.247397,0,1142138970,24.635872154023456
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:24:48.348618,0,1127331930,24.1530263191461
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:25:26.938625,0,1860372594,26.730853774460698
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:25:59.999243,0,1366103136,25.823752934217687
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:26:38.109367,0,1460474976,25.87027954805624
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:27:14.992048,0,1826995308,26.80291796821574
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, 1]}",2022-11-30 01:27:44.593561,0,1110681690,24.51106684934533
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:28:13.315260,0,999582804,21.754138520655324
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:28:43.054739,0,1095874650,23.72840125694677
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:29:16.453346,0,1540116582,26.383471679803876
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:29:44.596431,0,1014389844,21.82697530935213
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:30:22.103158,0,1523389536,25.605380278566003
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:30:52.162081,0,1158789210,24.84491236430293
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 01:31:27.756932,0,1571573862,26.52406333073416
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:31:58.252949,0,1253161050,25.329648091984946
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:32:34.692819,0,1669708908,26.5312087342803
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:33:04.851690,0,1316075610,25.720879408387013
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:33:34.153425,0,1079224410,24.103822174703968
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:34:13.928399,0,2073680754,26.650073433583966
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:34:52.003469,0,1491932256,25.68166831220549
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:35:22.255164,0,1316075610,25.63882428357699
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:35:51.968182,0,1347532890,25.833426111273635
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:36:31.293492,0,1877022834,26.470763981649853
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:37:01.112958,0,1062497364,22.484020387193706
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:37:41.228117,0,2105138034,26.83517275846072
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:38:19.780658,0,1443824736,25.801099953730787
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:38:50.000595,0,1077304404,22.347079305577246
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 01:39:21.802091,0,1380910176,25.69437359008145
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:39:51.661673,0,1253161050,25.278872112854447
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:40:28.495428,0,1701166188,26.376182230389777
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:40:58.659095,0,1284618330,25.38778340159123
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:41:29.227300,0,1284618330,25.42396262714132
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:42:01.693769,0,1491932256,26.049173770932143
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:42:39.003275,0,1412367456,25.450957445704358
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:43:08.560591,0,1205053530,24.982224495498585
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:43:38.655869,0,1190246490,24.935927330385375
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:44:07.301698,0,1045847124,22.58554082987191
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:44:37.461976,0,1190246490,24.671958393096833
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:45:07.650100,0,1221703770,24.956348430879675
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:46:01.404524,0,1031040084,22.152066191159324
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:46:38.986518,0,2082647148,26.632249118046865
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:47:07.608826,0,1125411924,23.429789667811992
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 2]}",2022-11-30 01:47:37.867613,0,1127331930,24.145917266173598
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:48:15.090054,0,1826995308,26.770198001318732
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:48:53.000294,0,2011868268,26.415120168334965
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:49:26.441014,0,1382830182,25.316083834952625
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 2]}",2022-11-30 01:49:59.544683,0,1414287462,25.711571467826957
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:50:30.849009,0,1223546970,24.831513194585497
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:51:01.107149,0,1079147604,22.675323576984212
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, 1, -1, 2]}",2022-11-30 01:51:31.946530,0,1143982170,24.30020519062015
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:52:01.944178,0,1158789210,24.760354519096623
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:52:32.644831,0,1175439450,24.745534135337422
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 2]}",2022-11-30 01:53:01.116235,0,1062497364,22.613029220889654
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:53:35.041431,0,1619681382,26.2082672911056
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:54:05.943378,0,1255004250,25.072608209842993
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:54:35.408367,0,1110604884,23.1627621167102
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:55:06.650719,0,1317918810,25.578281446432214
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:55:36.475238,0,1127331930,24.221637357380914
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:56:12.790186,0,1669708908,26.54140556368842
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:56:43.360950,0,1206896730,24.98000861493876
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:57:13.953244,0,1364183130,25.71705181089393
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:57:51.090401,0,1701166188,26.74516092088886
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:58:27.981238,0,1732623468,26.832353130261968
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:58:56.696835,0,1047690324,21.97995176988117
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:59:31.345276,0,1588224102,26.3358070373563
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 02:00:00.113102,0,1062497364,22.362618000056415
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 2]}",2022-11-30 02:00:29.945995,0,1127331930,24.648057370763627
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:01:00.265931,0,1301268570,25.436935433814305
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 02:01:31.151981,0,1380833370,25.654131643295717
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:02:09.301256,0,1795538028,26.84364156318439
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 02:02:37.517073,0,1031040084,21.937041004808965
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:03:12.651254,0,1556766822,26.361614746242726
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 02:03:43.953623,0,1286461530,25.33590186847325
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 2]}",2022-11-30 02:04:14.640998,0,1127331930,24.26800692018226
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:04:48.236910,0,1445744742,25.8605605687801
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:05:22.538414,0,1430937702,25.736368197822884
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:05:51.408503,0,1031040084,22.160851653528795
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:06:26.648873,0,1525309542,26.283505470568386
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:07:01.841145,0,1651138662,26.200923396852648
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:07:36.826535,0,1493852262,26.022938040890846
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:08:11.743622,0,1651138662,26.323507870111342
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:08:40.810079,0,1093954644,23.10914434974903
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:09:10.497316,0,1093954644,22.93525727677559
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:09:45.035662,0,1525309542,26.303765162822714
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 02:10:15.853041,0,1269811290,25.14969354058586
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:10:53.034327,0,1764080748,26.719747425528375
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:11:27.448102,0,1493852262,26.147631963028342
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:12:01.745228,0,1462394982,25.806638854264037
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, 1, 1, 2]}",2022-11-30 02:12:33.064042,0,1192089690,24.67646662934568
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 1024, 3072, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, 1, -1, 1]}",2022-11-30 02:13:31.905219,0,1014389844,22.07444316375671
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:14:13.239756,0,2196499314,26.808937868203532
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:14:45.016226,0,1108761684,22.99185474514479
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:15:16.151531,0,1110681690,24.451311787293157
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:15:52.689296,0,1653058668,26.388890341662112
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:16:30.799650,0,1945010028,26.832269839482606
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:17:09.628703,0,1810345068,26.87035240576452
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:17:41.163075,0,1158789210,24.73994162716646
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:18:12.461543,0,1253161050,25.18416515633912
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:18:46.947088,0,1445744742,25.761961494652613
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:19:21.548056,0,1540116582,26.36816465236295
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:19:55.543285,0,1477202022,26.259735292233113
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:20:26.257628,0,1410447450,25.769535189140193
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:21:00.935996,0,1508659302,26.45714906422908
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:21:39.693771,0,2064818028,27.05141228767061
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:22:16.858717,0,1634488422,26.322548970809848
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:22:53.592330,0,1715973228,26.657027696998984
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:23:23.753177,0,1142138970,24.539293515717535
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:24:02.314804,0,1860295788,27.020732069299534
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:24:40.894881,0,2023653228,26.816836839483663
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:25:19.593071,0,1860295788,26.957189752523504
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:25:50.458625,0,1284618330,25.61733025863949
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, 1]}",2022-11-30 02:26:20.744436,0,1236510810,25.25415313294958
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:26:53.298888,0,1045847124,22.08090715962085
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 1]}",2022-11-30 02:27:27.137456,0,1492009062,26.052263535617904
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:28:01.945411,0,1571573862,26.118237209470863
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:28:38.594669,0,1747430508,26.903183105861135
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:29:18.253776,0,2076691314,26.879803077690028
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:29:56.298060,0,2003070828,26.839090098499142
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:30:33.772050,0,1603031142,26.482417706814886
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:31:13.542551,0,2155334514,26.87436116692265
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:31:47.410084,0,1077304404,22.604758748152854
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, -1]}",2022-11-30 02:32:17.856179,0,1173596250,24.7651262424877
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:32:48.975305,0,1127331930,24.704098583912252
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, -1]}",2022-11-30 02:33:19.260398,0,1205053530,25.137127615371046
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 2, 1]}",2022-11-30 02:33:51.759557,0,1364183130,25.966442456302147
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:34:21.926894,0,1093954644,23.220261036558885
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:34:53.358603,0,1190246490,25.1359189611972
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:35:22.930318,0,1062497364,22.7755095138295
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:35:53.858887,0,1284618330,25.537416313175953
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:36:31.595724,0,1764080748,26.882073104251866
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:37:02.267669,0,1316075610,25.570143211664274
+"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:37:41.647836,0,2134752114,26.898452516723687
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:38:13.005215,0,1031040084,22.282220573694513
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:38:44.151950,0,1347532890,25.784134961825835
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:39:15.266274,0,1221703770,25.202375572779086
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 2, 1]}",2022-11-30 02:39:46.953641,0,1332725850,25.851297056506393
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:40:17.653376,0,1173596250,24.972344992193374
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:40:49.609980,0,1062497364,22.535667407831653
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:41:20.761069,0,1110681690,24.40576457445596
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:42:27.496173,0,1828838508,26.85145321742662
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:42:57.919093,0,982932564,21.761604389716634
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 02:43:28.655608,0,1079224410,23.718368472465446
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:43:58.512911,0,1062497364,22.74182664332034
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:44:32.816447,0,1445667936,26.19212122197997
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:45:10.899684,0,1477125216,26.010153160637532
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:45:41.313907,0,1045847124,22.827474314422002
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:46:19.207836,0,1749273708,26.60418713411001
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:46:53.592587,0,1414210656,26.00726305388265
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, 1]}",2022-11-30 02:47:23.850237,0,1142138970,24.6489542661405
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:47:54.746779,0,1316075610,25.689218653291295
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:48:25.548201,0,1347532890,25.751911210030475
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:49:04.198324,0,1780730988,27.00579454971298
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:49:42.700124,0,1812188268,26.874801178813083
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:50:13.662203,0,1158789210,24.921368325030492
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:50:43.904715,0,999582804,22.026558500552152
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:51:22.015853,0,1686359148,26.62215586535193
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:52:00.755827,0,1667788902,26.61057212093059
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:52:37.454316,0,1636331622,26.6512297923577
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, 1]}",2022-11-30 02:53:07.714810,0,1014389844,22.393618015178536
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:53:43.144481,0,1510502502,25.993038659880476
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:54:19.052536,0,1573417062,26.421662873667255
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:54:49.657459,0,1095874650,23.883133083803862
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 02:55:21.070738,0,1206896730,25.148678288815
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:55:52.009648,0,1014389844,22.06913488473211
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:56:25.509329,0,1382753376,25.54875613062997
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 02:56:56.957773,0,1301268570,25.649734062232383
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 1, 1]}",2022-11-30 02:57:31.518411,0,1493775456,26.10832383442528
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:58:01.613773,0,1031040084,22.362002479551165
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:58:31.705698,0,1062497364,22.819328594276566
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, 1]}",2022-11-30 02:59:02.697733,0,1173596250,24.988685667678173
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:59:33.659490,0,1221703770,25.302962108892185
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:00:09.446922,0,1541959782,26.112077468690867
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 03:00:39.473024,0,1079224410,23.955772671551667
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:01:15.155182,0,1541959782,26.31776013053777
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:01:46.072247,0,1253161050,25.489643989727142
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, 1]}",2022-11-30 03:02:17.596453,0,1364183130,25.815981278057624
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 03:02:48.933726,0,1045847124,22.32432265753782
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 03:03:21.469495,0,1269811290,25.513369223069635
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:03:53.160286,0,1284618330,25.46582706181441
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 03:04:31.684525,0,1717816428,26.710511535879036
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 03:05:10.042233,0,1717816428,26.84283267727433
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:05:41.855922,0,1190246490,24.970583908771232
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 03:06:20.184503,0,1636331622,26.504345633971067
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:06:51.460598,0,1127331930,24.268248941796838
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:07:28.142691,0,1573417062,26.430998131687453
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:07:59.749480,0,1253161050,25.38845190370804
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:08:30.871157,0,1190246490,25.041571400304573
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 03:09:02.664589,0,1110681690,24.006237352801346
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 03:09:34.238610,0,1238354010,25.34276030239052
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:10:34.311288,0,999582804,21.80118151877014
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:11:13.593310,0,1797381228,27.043140669460854
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:11:43.503387,0,1093954644,23.007414463752117
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:12:14.163528,0,1095874650,23.79086433289906
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:12:50.848022,0,1621524582,26.025864778342207
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:13:21.556759,0,1190246490,25.077358466768757
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:13:55.398877,0,1429017696,26.12059006524399
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 1]}",2022-11-30 03:14:26.326007,0,999582804,22.127524836343532
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 1]}",2022-11-30 03:14:57.762642,0,1221703770,25.21914419948441
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:15:36.252929,0,1667788902,26.6927938370969
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:16:14.254276,0,1652981862,26.66556123105007
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 2]}",2022-11-30 03:16:45.445260,0,1127331930,24.13548008816253
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 2]}",2022-11-30 03:17:15.639603,0,1031040084,22.152066191159324
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:17:54.414138,0,1477125216,26.149095479536744
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 1, 1]}",2022-11-30 03:18:25.964419,0,1347532890,25.790673448942552
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:18:56.523414,0,1031040084,22.518290658576667
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 2, 1, 1]}",2022-11-30 03:19:27.666139,0,1316075610,25.42608049975173
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:19:59.551900,0,1221703770,25.355575788780996
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:20:30.914307,0,1062497364,22.58490426133199
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 1, 1]}",2022-11-30 03:21:03.259812,0,1316075610,25.632790518407255
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:21:37.308205,0,1445667936,26.01769230789309
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:22:13.654111,0,1493852262,26.206006379852774
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:22:44.661526,0,1127331930,24.704098583912252
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:23:16.791537,0,1158789210,24.4930221168223
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 1]}",2022-11-30 03:23:48.818752,0,1062497364,22.608186762946183
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:24:26.151864,0,1590067302,26.365434081423825
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:25:04.997659,0,1686359148,26.898935796393708
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:25:38.612617,0,1397560416,26.061090394566637
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:26:18.290877,0,1734466668,26.887018361917384
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:26:50.457197,0,1284618330,25.456784215173144
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:27:24.405212,0,1397560416,26.060964017387292
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:28:00.762632,0,1558610022,26.24033664128142
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:28:30.909491,0,1093954644,22.91490934985308
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:29:06.861622,0,1573417062,26.51638836290174
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:29:38.763394,0,1253161050,25.32548928774925
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:30:14.354144,0,1541959782,26.36876929191619
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:30:45.758366,0,1095874650,24.13357019256553
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:31:22.344668,0,1527152742,26.27025764535305
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:31:54.461060,0,1253161050,25.41836880480659
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:32:26.060470,0,1284618330,25.472978060833974
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:33:02.551992,0,1510502502,26.241380252439953
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:33:33.960758,0,1347532890,25.707442276004617
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:34:12.877897,0,1717816428,26.937139740910155
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:34:43.973196,0,1095874650,23.88536811980862
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:35:22.799736,0,1749273708,27.035785346779857
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:36:01.488138,0,1780730988,26.94260959266693
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:36:41.098531,0,1765923948,27.1368398440508
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 1, 1, 1]}",2022-11-30 03:37:11.314304,0,1062497364,22.486957414672982
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:37:43.678526,0,1190246490,24.977323840246868
+"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:38:15.756806,0,1095874650,23.82580846204717

From 0b2fa19de0fe11841d67bca16f8755c7d484daf6 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Thu, 8 Dec 2022 03:07:52 -0800
Subject: [PATCH 38/60] Update MobileNetV3 example

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../dynas/MobileNetV3_Supernet_NAS.ipynb      | 43 +++++++++++++++----
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb
index 4fdbc291284..cbbd678b4bc 100644
--- a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb
+++ b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb
@@ -13,7 +13,7 @@
     "\n",
     "#### Super-Networks\n",
     "\n",
-    "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification task on ImageNet-ilsvrc2012.\n",
+    "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n",
     "\n",
     "#### Methodology\n",
     "\n",
@@ -38,7 +38,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2"
+    "!pip -q install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import sys\n",
+    "# sys.path.insert(0,'<path to neural compressor>')\n",
+    "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2"
    ]
   },
   {
@@ -84,12 +102,16 @@
    "metadata": {},
    "source": [
     "### Define Architecture\n",
-    "We currently leverage pre-trained Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path. \n",
+    "We currently support pre-trained super-networks:\n",
+    "\n",
+    "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n",
+    "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n",
     "\n",
     "Super-network options (choose 1): \n",
     "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n",
     "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n",
-    "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures.  "
+    "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures.  \n",
+    "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]."
    ]
   },
   {
@@ -113,7 +135,7 @@
     "* `['acc', 'lat']` \n",
     "\n",
     "Description:\n",
-    "* `'acc'` - ImageNet Top-1 Accuracy (%)\n",
+    "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n",
     "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n",
     "* `'lat'` - Latency (inference time) measurement (ms)"
    ]
@@ -137,7 +159,8 @@
     "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n",
     "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n",
     "* `config.dynas.batch_size` - Batch size used during latency measurements.\n",
-    "* `config.dynas.dataset_path` - Path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php"
+    "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n",
+    "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)."
    ]
   },
   {
@@ -272,8 +295,10 @@
     "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791.   \n",
     "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n",
     "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358.   \n",
-    "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778.   \n",
-    "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324.    "
+    "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778.  \n",
+    "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324.    \n",
+    "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187.    \n",
+    "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30."
    ]
   },
   {
@@ -300,7 +325,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.7.11"
   }
  },
  "nbformat": 4,

From bfd3dc37529d818b8b066bad2ef26a83f48871ea Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Thu, 8 Dec 2022 03:08:20 -0800
Subject: [PATCH 39/60] Add Transformer LT example notebook

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../dynas/TransformerLT_Supernet_NAS.ipynb    | 310 ++++++++++++++++++
 1 file changed, 310 insertions(+)
 create mode 100644 examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb

diff --git a/examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb b/examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb
new file mode 100644
index 00000000000..1e7ffcd71b5
--- /dev/null
+++ b/examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb
@@ -0,0 +1,310 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction\n",
+    "\n",
+    "This tutorial demonstrates how to perform a multi-objective neural architecture search (NAS) on a MobileNetV3 one-shot weight-sharing super-network [1] using the Intel® Neural Compressor Dynamic NAS (DyNAS) search approach. \n",
+    "\n",
+    "#### Background\n",
+    "Neural architecture search, the study of automating the discovery of optimal deep neural network architectures for tasks in domains such as computer vision and natural language processing, has seen rapid growth in the machine learning research community. While there have been many recent advancements in NAS, there is still a significant focus on reducing the computational cost incurred when validating discovered architectures by making search more efficient. Evolutionary algorithms, specifically genetic algorithms, have a history of usage in NAS and continue to gain popularity as a highly efficient way to explore the architecture objective space. In this tutorial, we show how evolutionary algorithms [2] can be paired with lightly trained objective predictors in an iterative cycle to accelerate multi-objective architectural exploration. Specifically, we use a bi-level optimization approach [3] denoted as `dynas`. This technique is ~4x more sample efficient than typical one-shot predictor-based NAS approaches. \n",
+    "\n",
+    "#### Super-Networks\n",
+    "\n",
+    "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n",
+    "\n",
+    "#### Methodology\n",
+    "\n",
+    "The flow of the DyNAS approach (`approach='dynas'`) is shown in the following figure. Currently, three pre-trained super-network options for the image classification task are provided. In the first phase of the search, a small population (`config.dynas.population`) of sub-networks are randomly sampled and evaluated (validation measurement) to provide the initial training set for the inner predictor loop. After the predictors are trained, a multi-objective evolutionary search (`search_algorithm`) is performed in the predictor objective space. After an extensive search is performed, the best performing sub-network configurations are selected to be the next iteration's validation population. The cycle continues until the search concludes when the user defined evaluation count (`config.dynas.num_evals`) is met. \n",
+    "   \n",
+    "<br>\n",
+    "<div>\n",
+    "<img src=\"DyNAS_flow.png\" width=\"750\"/>\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "For released version of Neural Compressor:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -q neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import sys\n",
+    "# sys.path.insert(0,'<path to neural compressor>')\n",
+    "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import Packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from neural_compressor.conf.config import NASConfig\n",
+    "from neural_compressor.experimental.nas import NAS\n",
+    "from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Configure NAS Algorithm\n",
+    "\n",
+    "The `NASConfig` class allows us to define the appropriate paramenters for determining how the neural architecture search is performed. Currently, the following multi-objective evolutionary algorithms are supported by the `dynas` approach: \n",
+    "* `'nsga2'`\n",
+    "* `'age'`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = NASConfig(approach='dynas', search_algorithm='nsga2')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define Architecture\n",
+    "We currently support pre-trained super-networks:\n",
+    "\n",
+    "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n",
+    "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n",
+    "\n",
+    "Super-network options (choose 1): \n",
+    "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n",
+    "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n",
+    "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures.  \n",
+    "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config.dynas.supernet = 'transformer_lt_wmt_en_de'\n",
+    "config.seed = 42"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Select performance metrics\n",
+    "\n",
+    "Performance metric options are as follows. Currently, the `dynas` approach supports the use exactly 2 objectives.\n",
+    "* `['acc', 'macs'] `\n",
+    "* `['acc', 'lat']` \n",
+    "\n",
+    "Description:\n",
+    "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n",
+    "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n",
+    "* `'lat'` - Latency (inference time) measurement (ms)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config.dynas.metrics = ['acc', 'macs']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Search parameters\n",
+    "\n",
+    "* `config.dynas.population` - Size of the population for evolutionary/genetic algorithm (50 recommended)\n",
+    "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n",
+    "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n",
+    "* `config.dynas.batch_size` - Batch size used during latency measurements.\n",
+    "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n",
+    "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config.dynas.population = 50\n",
+    "config.dynas.num_evals = 250\n",
+    "config.dynas.results_csv_path = 'results_transformerlt_macs.csv'\n",
+    "config.dynas.batch_size = 64\n",
+    "config.dynas.dataset_path = '/datasets/hat_dataset/data/binary/wmt16_en_de'  # example\n",
+    "config.dynas.supernet_ckpt_path  ='/datasets/hat_dataset/HAT_wmt14ende_super_space0.pt'  # example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Perform Search\n",
+    "\n",
+    "After the DyNAS configuration parameters are set, the search process can be started. Depending on how many evaluations `config.dynas.num_evals` were defined, the search time can vary from hours to days. \n",
+    "The search process will populate the `config.dynas.results_csv_path` file and will also return a list of the final iteration's best sub-network population recommondation. \n",
+    "\n",
+    "Note: example search results are provided for the plotting section if you wish to skip this step for now. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent = NAS(config)\n",
+    "results = agent.search()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plot Search Results in the Multi-Objective Space"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcMAAAFOCAYAAAD6qHbYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAACjpElEQVR4nOydd3gc1fWw37NFq94s925jg00zYNOLgdAh9JoQIAmE8Ev7QnolvRcSOqGDaQYbAwZccO+9d1myem/bd2fO98eMZclWtSVbtud9nnm0O7edmV3N2XvvKaKqODg4ODg4HM+4jrQADg4ODg4ORxpHGTo4ODg4HPc4ytDBwcHB4bjHUYYODg4ODsc9jjJ0cHBwcDjucZShg4ODg8Nxj6MMHY4oIvKyiPz+SMvRXYiIisgJR1oOBweHtnGUoUO7iEieiHyhg3XnisjXu2DMr4rIShGpFpEdIvJ7EUlup839tvL50X7nC0Vk4qHK1B2IyFUiMl9EGkSkQkTmicgXj7RcDg7HG44ydOhxiMh/gYnAl1Q1G7gAaAA+FZGkdppXAz8SkbTulRJExHOI7W8D3gVeBQYBfYFfATccunSHJJeIyBF9NoiI+0iO73D84ShDh05hz74WisjfRaRGRHaLyDV22R+Ai4AnRMQvIk/Y508SkZn2LG+biNzRRv9XAgL8ElgjIjXAJ8D7wL+Bn7Uj4hZgCfD9Vvp3ichPRGSXiFSJyDsikm2XTRSRwv3qN86KReQxEZksIq+LSD1wv4icLSJLRKRWREpE5AkRSWhHRkREgH8Cv1PV/6lqnaqaqjpPVR9sIusvRCRfRMpF5FURybDLhtmz4AdEpMD+LB4WkQkist6W54km490vIots+epEZKuIXN6kfK6I/EFEFgFBYERbn5uIXCsim+0ZbZGI/MA+nyMiH9njV4vIgr2KVUTG2OPUisimpjNge7n8aRGZLiIB4NL27qGDQ5eiqs7hHG0eQB7wBfv1/UAMeBBwA98EigGxy+cCX2/SNgUoAB4APMAZQCUw1i5/Gfh9k/rPA1nAMGCufW4S8Kj9+t025LwfWAiMA2qAbPt8ITDRfv1dYCnWTMwHPAu8aZdNBArbuPbH7Gu/CeuHZBJwFnCufW3DsJTx95q0V+CEFmQ9yS4b3sb1fBXYCYwAUrF+ELxmlw2z2z8DJAJXAmFgKtAHGAiUA5c0uTdx4P8BXuBOoK7JPZoL7AFOtq8lo53PrQS4yH6dBZxpv/6TLZPXPi7C+nHjta/lZ0ACcBnWbP/EJt+DOqxVABeQeKS/985xfB3OzNDhYMhX1edV1QBeAfpjLfG1xPVAnqq+pKpxVV0DvAfc3kp9r6rW2K8vFJEwMAR43T4XaE84VV0LzAR+3ELxw8DPVbVQVSNYCu62Tix5LlHVqWrN4kKqukpVl9rXloelXC/pQD+97L8lbdT5EvBPVc1VVT/wU+Cu/WT9naqGVXUG1r15U1XLVbUIWIClxPZSDvxbVWOq+jawDbiuSfnLqrpJVePA1bT9ucWAsSKSrqo1qrq6yfn+wFB7nAWqqlg/GFKBP6tqVFU/Bz4C7m4y/gequsi+t+H2bqCDQ1fiKEOHg6F07wtVDdovU1upOxQ4x14aqxWRWqyHfL9W6sdEJMt+vRAYA2RizU7Amml2hF8B3xSR/ZX0UGBKE1m2AAatK/P9KWj6RkRG28uCpfbS6R+BnA70U2X/7d9GnQFAfpP3+Vj3oamsZU1eh1p43/RzKbIVU9P+BjR53/Ta2vvcbgWuBfJto5/z7PN/w5oBzhCRXBH5SZNrKVBVc7/xB7YyvoPDYcVRhg5dzf5pUAqAeaqa2eRIVdVvttJ+MpZCiQDbVHU38HPgW7bBybYOCaG6FWtZ8ectyHPNfvIk2jOpANBosWobcfRu5/qeBrYCo1Q1HWsZUDog4jZbllvbqFOMpZT2MgRrqbOs5ertMtDeq2zaX3GT902vrc3PTVVXqOqNWEuyU4F37PMNqvqoqo4Avgh8396bLAYG72eYMwQoamV8B4fDiqMMHbqaMqw9rr18BIwWkXtFxGsfE0RkTEuNVfUzIAr8HfiX/fBcCtQD3wL+0AlZfoO155XZ5NwzwB9EZCiAiPQWkRvtsu1AoohcJyJe4BdY+4ptkWbL5heRk7D2UNvFnqF9H/ilbQSTbhvMXCgiz9nV3gT+n4gMF5FUrB8Jb9vLmAdDH+A79mdwO9ase3ordVv93EQkQUS+JCIZqhqzr98EEJHrReQEW+nWYc26TWAZlmHOj+y+JmJZzb51kNfi4NClOMrQoat5HGsPrkZE/qOqDVjGHXdhzQ5Kgb/QhpJR1e8Cs4A3sFwlFmItj16rqqGOCmLPKl+j+dLq48A0rGW8BixFe45dvw54BPgf1owlgGV80xY/AO7BMgZ5Hni7E/JNxjJk+SrWvSkDfg98YFd50ZZ/PrAby0Dm2x3tvwWWAaOwDGH+ANymqlUtVezA53YvkGcvDT+MtYSK3f8swI9l1fuUqs5R1SiW8rvGHv8p4Cv2DN7B4Yiz1wLQwcHhGEZE7sey8r3wSMvi4NATcWaGDg4ODg7HPY4ydHBwcHA47nGWSR0cHBwcjnucmaGDg4ODw3GPowwdjltE5BkR+eWRlqMj2C4gW6X9QOU9BhF5T+y4tQ4OPR1HGTp0KWIFtg7ZAZxrRWSxHUC6x33XVPVhVf1dZ9vZ11guIilNzn1dRObuV0/sKCybW+jjZBGZYQezrhWRVSJybRvD/gQrXFrIbj9XRML2fa632/9ERNrzi9w7fofTXTWpe2cL/fxMrGDtfrttU9eSv2C5ijg49Hh63APK4ZjgBlVNw4qe8mesGKEvHFmRuhw3VtDvtrgYy9F9hIhM2K/sQ6z4qf3sOt/Bcl4/AFvB3ce++Kx7+ZZ9n/sDj2L5BE7fL8pMW3Q03dV9dt2v7CfXfVj+hl9Q1VRgPDB7b7mqLgfSRWR8B+VxcDhiOMrQodtQKy3RNCzH8vtE5BQ7ikmZNMlXJyK3iMg6+/VjYqVVetWe9Wxq+jCVfemXGsRKIXRzk7K9aYr+Zc+2ckXkfPt8gT2bu69J/ZdF5PdN3t8oImvtmdYuEbm6jcv7G/ADEclso859WA700+3Xe8fJAYYDz9tBq6N2gOqFrfRzDlCrqi0GAFDVgKrOxQp/dh5wnYj0E5GgiOwNCI6InClWAmGvfarNdFd2m6FYgccfAq4SkaYxZScAn6nqLluOUlV9br8u5tI8GLiDQ4/EUYYO3Y49QyjESvmzAitI9ZVNqtyLleB2L1/ECtOViRUt5okmZbuw0gJlYIVbe11Emga7PgdYj5UVYpLdzwTgBODLWLkWDwgqLiJn2zL80B73Yqz0Ta2xEutB/4OWCkUkGbgNK4rOG1jZJvbmOazCCmb9uojcJAcGE9+fU+lATFZV3WPLdZGqltryNc0deS/wlh1CbS+/BL4ndk7HFvgKsFJV38NSnl9qUrYU+IqI/FBExkvLCXm3AKe3J7uDw5HGUYYOh4tiYO8D9xUsxYT9EL4KS3HtZaGqTrdTRL1Gk4epqr6rqsV2mp+3gR3A2U3a7rbTDhlYodEGA79V1Yid5iiKpRj352vAi6o60+67qAOhwn4FfFtE9g/mDXALVrDxGcDHWPn8rrOvQbGS1+YB/wBKRGS+iIxqZZxMrHBvHaG1++zGSpf0WtPK7aS7AksZ7v1sJtFkqVRVX8cKD3cVMA8oF5H9+2mgeWxYB4ceiaMMHQ4XA7H2ncDa+7rBNkC5A1igqk3z+pU2eR3ECp7tARCRr9hLmXvTCp1C85RJ+6cwQlXbSmu0l8FYs84Oo6obsQJa/6SF4vuAd+xcgGGsXID3NWlbqKrfUtWRWHurAZrPjptSgxUQvCM0vc8fYOUcHA5cAdTZs/T9aTHdlYhcgLWcuzeY9iTgVBEZ1+Q63lDVL2ApvIeB34nIVU26SQNqOyi7g8MRw1GGDt2ObTwyECvgNna6pCVYs6d72W+20kY/Q7GCYX8L6KWqmcBGOpYyqT0KgJEH0e7XwIM0ycsnIoOwMrl/Waw8h6VYS6bX2vuFzVDVAuBJLMXeEuuB0e0JIiKDgbOwkvpiK+F3sGaHrd7nNtJd3Yd1b9fa17Csyfn9+4ip6ru2rE2vYwywrj3ZHRyONI4ydOg2xEpLdD3WzOJ1Vd3QpPhV4EdY+2Hvd7DLFKycdxV2/w/QugLpLC8AD4jI5WKlUhooVkqmNlHVnVjLsd9pcvperHRQJwLj7GM01r7p3SKSJSK/ESvVkctWkF/F2oNrieVApogMbKlQRJJF5BKsmeBymqdlehW4H2sftq0fHc3SXYlIItas/aEm1zAOa1n0HhHx2IZJ14lImn0d1wAns09pgmV880kb4zo49AgcZejQHXwoVnqkAqzZxj+xHrRNmYKddV5Vgx3pVFU3Y+2xLcFaDj0VWNQVAtvLhw8A/8LKwzeP5ol12+K3NE8TdR9W6qLSpgdWLsX7sPYth2GlOqrHmt1GsJRWS7JFgZex9/+a8IR9n8uAf2MtxV7dNJu8qi7Cyie4WlXzW7uAFtJd3YS1pPzqftfwIuABrrZl/xmwB2sp9K/AN/daxdorAv5WlmYdHHoUTmxShyOGiOwCvqGqs460LD0d20hnAXBGZ3I62m0/Byap6v+6RbjWx30PeEFVW0sg7ODQY3CUocMRQURuxYpQMrrpTMaha7FnZzOBwXbCXgcHhxbwHGkBHI4/xApbNha411GE3YeIvIK13PldRxE6OLSNMzN0cHBwcDjucQxoHBwcHByOexxl6ODQBBG5QER22FkYbjrS8nSU/eOsOjRHrEwjXzjScjj0XBxleBQhIheKlRKpTqzUP4vkwGwIPRqxUgG1FA5tb/lEEWkxIPVh4rfAE6qaqqpTj6AcB00PuIeO8nE46nAMaI4SRCQdK/TXN7GiiiRgBayOHEYZBGuf+YgavYiIR1Xj3dT9UGDTwTTsZrkQEbcdc9XBwaGLcWaGRw+jAVT1TVU1VDWkqjNUdb1YaY8ac92JyDB7BrY3nudcEfmTiCy30xN90DRLgYica884a0VknTRJ7mq3/YOILMKKEzrC7vthezmxVkSetBXl3jZfFZEtIlIjIp/ZYdQQkfl2lXX2MmSzZLF2rNJPgAF2uV9EBtjXN1lEXheReuB+ETlbRJbY45eIyBOyLysEbcloR36ZZ8+wK8VOSGv7PY7AChrgFxGfPf40eya+U0QebDJGS3LNFZHf2/fTLyIfikgvEXnDvvcrRGRYkz5OEpGZdv/bROSOJmUvi8jTIjJdRAJYwb3bpLV72ErdRLESMefY738uInH7hxci8jsR+XcTWZ4SkU/sPheJlSbq3/bnvFVEzrDrvgYMaXIff9TS+E3kaO/79zt7vAaxEiIfENKuhT7vFZF8EakSkZ/vV+aSfanAqsRKGdZa1g6H4wVVdY6j4ADSsVL/vAJcA2Q1KXsMK9zZ3vfDsMKWeez3c4EirNBlKViRSl63ywba/V6L9ePoCvt97yZt92CF2fJgZV9QrFlqJtZDrwIr8gnAjVjpicbY9X8BLG4imwIn7HdttcCF9uuJQOF+5Y8BMSw3AReQhBWD81x7jGFYqYK+t984rcn4JlZkHBeQuHdsuywPK1nt3vfzgafseuPsfi5rQ6659vWPxEoztRkrNNsXbFlfBV6y26dgRel5wC47A6gExtrlL2NFw7lgr6xtfD9eBn7f2j1so9184Fb79QysYOXXNCm7uUn/lfZ9TwQ+B3ZjZbFwY2W0n9PafWxj/I58/3Zh/Rjce3//3E6fYwE/VhouH1YEpPheebCSMi8FBtnlzwJvHun/cec4soczMzxKUNV64EKsh/zzQIU9Y2kvF95eXlPVjaoawMphd4dYaX2+DExXK2WSqaozsXLiXduk7cuqukmtDAx7c+H9WVVr1cqhNwdLUYCVueBPqrpFrSXDPwLj9s4OW7m2TG09se1elqjqVFvGkKquUtWltkx5WA+0S/Zr05qMMazl0AGqGm5tbLECX18A/Niutxb4H80zvjeTyz73kqruUtU6rFnaLlWdZd+Pd7GUHsD1QJ5aKafiqroG64fK7U36/0CtxL+mWoG3u5p5wCVirSKcBvzHfp+IlQdyfpO6U+z7HsYKpxdW1Vd1X7qsM+g8Hfn+vaSq2+37+w77PsfWuA34SFXnq2oE6/vedGn/YeDnamUOiWD9qLnNvgcOxymOMjyKsBXM/ao6CGuWNwArJmVHKGjyOh9rhpeDpRRut5eo9qZFuhDo30rbveyfZmlvWqShwONN+qrGynzQYpDpTtBMBhEZLSIfiZUVoh5L6e6/fNaajD+yZVouIptE5KutjDkAqNbmDuv5NL+Wlu7N/imjWkshNRQ4Z797/yWgaTb5lvrvSuZhzSTPBDZgRau5BGvWvVNVq5rU7eh1dYaOfP9a+xxbYwBN7pv9A7DpdQwFpjQZbwtgAB39YelwDOL8EjpKUdWtIvIy8A1gNZDcpLhfC00GN3k9BGt2VIn10HhNVR9soU3jcJ0QrQD4g6q+0Yk2HRlr//NPA2uAu1W1QUS+hzUjaH8AK+D0g2BZ6AKzRGS+WhkomlIMZItIWhOFOARrybk9eTtCATBPVa9oS9yD6LczbRZjZde42ZZls4gMwZqZzTuIsTsrQ0e+f52lBGuZHrCyegC99hvzq2oFMXdwAJyZ4VGDbWjxqFi58vYu4d2NtfexFrhYRIaISAbw0xa6+LKIjLUfDL8FJtvLW3sT7V4lIm7bqGLi3nEOgmeAn4rIybacGSLSdNmvDMtIpTXKgF72dbRFGlbWBL9YqZa+2VEBReT2JtdXg/XgPsBCVq08g4uBP9n35TTga1j3rCv4CBhtG3t47WOCiIxpt2XbdPQeolbGkFXA/7FP+S3GWko8FGXY3ue8l67+/gFMBq4XyxUpAev73vRZ9wzwB9ln2NVbRG48hPEcjgEcZXj00ACcAyyzLQuXYqX+edTeZ3kbK7HqKqyH7P68hmUEUYplAPEdaHzg34iViqcC61fzDznI74aqTsEKwP2WvXy5EcvgZy+PAa/YS1R3ANgWhxfZ7bdiGbjk2nVatIQEfgDcg3Vfnse6/o4yAes++oFpWLE7c1upezeWgU4x1j7Zr7WLsmzYs80rgbvs/kux7p3vEPvt6D3cyzysZfPlTd6n0Xy/sLP8CfiFPf4P2pC1S79/dp+bsJT7JKxZYg1WLsm9PI71uc8QKwXWUqz/LYfjGCc26XGAWIGxX9fDnMLHwcHB4WjBmRk6ODg4OBwytk/oPPvv+51s2541+f717xcRl/36JyJyqAZ6jjJ0cDhasC1f/S0cX2qn3SettPvZYZL7S62Mf1CRfrqrT4cu4XJVnaiqt3TzOPdj6y9V/bOqFrVdvX2cZVIHBwcHh0PG3o75gu1Piz1b+4eq3mX7NM9W1Yki8g6WG0sEuE1V60VkoapeuPfv3v7s+j8FrsaydXgYa397BrAOy+/3cqygDxXAG1gBStaq6ndE5H7gBqyADQDXaStKz5kZOjg4ODh0FbPtZdKn7NlallgBHC5in0HW/ap6CVYAhTtb66gJj9v1vwT8QFWXY1nQX66qrzWp9xDwtqpeDCSLyF6jqEJVvRbLJeq01gZx/AwdHBwcHLqKy7V5sPrPsKzJLwOet2eIfxORU7FmcFNa6kRkX6xj4F57K8Ckbf/VkcB0+/VKYG92nI323yKs8IwtctQpw5ycHB02bFiX9BWPx/F4evYtcGTsGhwZuwZHxq6hp8m4atWqSlXt3Q1dT8Zys+mvVlKBs4AUVb1YrKD3+xu+iIj4sAJB7OURrFB/I7HcqMAKGuLGijm7l11YsXM3AeOxllBPorkCbapkm9FzPo0OMmzYMFauXNklfVVWVpKT024A/COKI2PX4MjYNTgydg09TUYRye+irmaLiAKGql6uqntEZDhWgHWAbcAJIvIplk/p/oYvLwMLsWL67mU51hJrU7/Xj4GpItLUXex5YJKtZNer6lI7IEeHOOqUoYODg4NDz0NVJ7Zy/vwmr/1YsWf3r3Oh/fd59s3+9pYdEKpPVf/NvrjM7zUpuna/ei83ef1YW/I7BjQODg4ODsc9jjJ0cHBwcDjucZShg4ODg0OXICKX264V80Vkioj0ar9Vm/3dJCLZXSVfWzh7hg4ODg4Oh4yI9AZ+BVxvp1UbDSQcYrc3YblGVB9iP+3izAwdHBwcHLqCa7FyUzYAqOp2VS0RkdNFZJGILBWRL0NjHFPP3tf235dF5BkRWSgivxYrr+bVwBsi8sPuFt5Rhg4Oh4mS3DJCgfCRFsPBobvoj5Uya39+hxU95iLg2yLibaOPz2zL0mtVdQ/wKfAlVf1bl0u7H44ydHA4DMSicf74pf8w+Z8tpZp0cDgmKAFayp2Zpap5qhoDdgN9aN0Rfm+0mFD3iNg6jjJ0cDgMLJyyjJqyOj6ftJCa8rojLY6DQ3cwHfiyiKQBiMgJItIfqBWRYfaMcARQDtQB/UUkB+jXpI/9w63tjTTT7TjK0MGhm4lF40z+x0eIWwgHI3z07MwjLZKDQ5ejqhVYS6Ifich84O9AFMuoZhJWZJkn7Rnic8CHwGNY2SZa4zPgKRF5uBtFBxxrUgeHbsM0TQq3lZCWnULvITmk56QBEAlGmtWrqWxg/dKdnHzu4CMhZrcy7fMN1PtDfPmLZx9pURwOA6o6C5i13+kq4Pz96k1nX1Dtvefub/J6ov13MlZ8027nsChDERkMvIqVw0qB51T1cRF5m30BWTOBWlUddzhkcnDobtbN28x/Hvkff/z4pzw2+dFmZdFIHI/XjcslTH1pAZ9/sJpfPHdPh+NV7syvYNvuMq6beEp3iN4lBIIR3py+CsMwuPbik8nOTDnSIjk4tMrhWiaNA4+q6ljgXOD/RGSsqt6pquNsBfge8P5hksfBoVsxTZO3/zqNQH2Q9//zSbMyVeVPv53Kh1NXUVlax8JP1+Nyu1j4yfoO9a2qPPfWIl58dwmVNf7uEL9L+Hj+JmKxOKapvD9r3ZEWx+EoRkTyRGSDiKwVkZX2uWwRmSkiO+y/WfZ5EZH/iMhOEVkvImd2ZIzDogxVtURVV9uvG4AtNEndYeeuugN483DI4+DQ3ayfv4WCbUWkZaex5MOVlOSWNZZt3ljEjm2lfPj+Kqa8vAB/fQg1TTYsz6WssH3f4o3bi8ktqEAV3p+xthuv4tD4ZP5myxpC4POl24nG4u01cXBoi0vtydN4+/1PgNmqOgqYbb8HK3/iKPt4CHi6I50f9j1DERmGlZtqWZPTFwFlqrqjlTYPYV0UgwYNorKysktkqavr+VZ9joxdQ3fKWFNWi8vtJsPeEwQoLytn7CV2blGBPXmFeNMto7iPpy0hMyGKOwFIhi99/1KrnitOKBqgstJsc7zpn68iJcHEMJS1G3aSf95wUpJ9ByW7qYpLWk3xdgCduY8/f/BiolEDAI/HTX1dbWfFOyiO9+/jccSNwET79StYaaJ+bJ9/VVUVWCoimSLSX1Vb8oFs5LAqQxFJxVoO/Z6q1jcpups2ZoWq+hyW9RHjx4/XrswD1pNyirWGI2PX0B0yqir/+uqLJKX4+Pmk77A3QfeVd17KFXdMRBVcrn3KpqYmwNZ1ZZTMWI87I4WM3n148DvXAx3PcXfJeaewZG0JsbjJZReOpn//viR4O/+vXOb388gH03jyizfQLy2t/QY2Hb2PR/Ircbx+H49hFJhh50p81tYJfZsouFIsmxSwVh0LmrQttM+1qQwPm2uF7WPyHvCGqr7f5LwHuAV4+3DJ4uDQWUL+MH99+AXqq5vv0W1YuJWiHSXsXJvHjtW7MeIGW1bsAmDS1BU89ercxrq1tUGyslK47vzh9OmTTq8kD1++YzydZU9hDR63i5RkH2s3F+JxH5wb1utr1pJXU8ura9YeVPvOEonEOly3pLSOvz/+KYbR9izZoeeSI/00XbLbPURko4isbHI81EJ3F6rqmVhLoP8nIhc3LbRngfv7KHaKw6IM7T3BF4AtqvrP/Yq/AGxV1cLDIYuDw8Hw+eRlrJy1kemv7Eu2raq8/bdpRMMxIqEob/9tGounr+EvDz7PlnV5fDR7AwuW76KkvI4du8r47o/eZPeuMj56bibRcIxAXfCAiDSvv7eUH//wTUxTG8fYn227y0j0eUnwuAmFY1TXBTp9PWV+Px9v206f1FQ+3b6D0oaGTvfRGQqKa3j4p5OoqOrYOO9OWcHCxTtYtjIXsBTpY//4kMrqnmsw5NCcKFHO9VzZ7gGEVXV8k+O5/ftS1SL7bzkwBTgbKLOd+rH/ltvVi4CmfkqD7HNtcriWSS8A7gU2iMha+9zPbF+Tu3AMZxx6MCF/mA+fn0NGTiqz3lzMtfddTHp2KqZh0ndIDtGoicstZPZO5+1/f0IkHOPJX0/G6J8BwLsfraKuIkBtXZBpn67j9kdvwIhZe2lZ/TKZtWY7A3tlkOIymPTaYoz6CCuW72L8hBH85jdTuOuucxk7ttHejF9++9oW5ewMi/LyiZkGpt/ASIAFefncfmrbbhrRWJyN24qZeBDLe+98uJKKygben76Gb9y770d9NG6wp7qWE/rsy/RTUlrLshW7SE1LZNI7Szln/AhmLdzKynX5vP/xah669+KWhnDoaYiAHPp8S0RSAJedCSMFuBL4LTANuA/4s/33A7vJNOBbIvIWcA5Q195+IRw+a9KFqiqqetpeVwpbEaKq96vqM4dDDgeHg2HFzA3464KgEA5GWfDBSgDcHjcP//M+wqlpxDIyOf3yU8nPLSdoGuxeuRujPoQqLFq2i42bi8jOTmHpyt2cef14bnj4Sm54+EpOv/I0nvhgEf/9YBGffL6ReEMUdQnPPzuHZctz2bSlmNffWNziDPFQuOnksTxz5Q30ynPz9BXXc+spJ7fbZs6ibbw9bSU786yAIarKh7M3EAhF22xXUFzD8rX55GSnMXfJ9mazww/Xb+U773xEbXBfAPPPZm4kGjVwCZSV1bNy9W7e/XAVmelJzFm8rdXZYa0/RG3DYQ9p6dAKAohL2j06QF9goYisA5YDH6vqp1hK8AoR2YG1wvhnu/50IBfYCTwPPNKRQZwINA4O7XDW5afw6xF9Gt/3G7pvZrRoxkYC/jCCsGLpDuL9UlDgzLNHcte3r2HEKYOZPXcz0z5eiwIpyT5y8yro19eaNU5dsgnDVArKaghvLgVVQCgqqObpZ2dRb0TIz69k06YiTjllUJddk0uEqbPW4w9GmDprA6c82L/N+tFYnLenrcLngbemruAX37uWTdtLeP7tRURjcW69+oxW267dVIBpmPiDYUSEDVuLuOyCkwjH4ry6bDWBSJR3V2/gwQsnAHDDteM45+wRje2Ly+to8IdJ9HmJRg1mL9jCnTdOOGCcf74+B1OV791+zkHeFYcupwtmhqqaC5zewvkq4PIWzivwf50dx1GGDg7tkJKexKjThx5w3jBMpryykEgohqqydPUezCtHIiK4Rw3mtHMs14obrzuDG687UFmEIjHeX7iBmGEQj5ukj8jkGzecC0B9dYBJ7y4lapj4XVFmf765S5VhXnE1KzbtITsjmZWb95BbVMWIga0nJZ+7eDtVNQEG90ti1fp8du4u57Wpy/G63Uz5bB1XX3IyKUkt53G94YrTuOGK0w44//GGbdSGwiR6Pby7agO3n3kqmcmJ9OqVSq9eqY31hg/rzbDB+36A9Mk50PJ1+54KNu4qQYE9pTWOpWZPQATcR0/4a0cZOjh0kphh8N+PFvHVyydw61cvJhqOoQpvLFyLOzURgIr6AHHDxNPGw8DjdvHIDecTty0mk1xxJp5l7dutKizi6R1riQTiePolc98DFzZra5gm/miUjMTEg7qG3MJKMlITUYWEVDe7CyvbVIYZ6UlcNXEsCe44J5/kIa+omk07SkjwuqmpDzJjwWZuvnJcp2TwRyKM6dsbALfbRXUgSGbyvuuJmnESXB4SfV5GDuvdZl+vT19BLG7dxxnLtnHmKaM6JYtD9yAuRxk6OByzzNuQy7Rlm+mVlsx914ynvLgWl1u48pazWm3jD0V4+ePlfOOm8/F6LFcIr8fNFWeOxlRFgKqqqsb6H27YSihugE+o9ofYVlPFOWnJjeVvrFnHzJ07eeWO2zrlNL+Xy84ezWVnj+5w/XPOGM45Zwxv9IV8felqzr5kBKf2tbLvnDi8Tzs9HMh9553Jfee1HCkrbho8vOQN7j/hfC7u27ZiM00lZpgM7ptpvzcxTBP3UfQgPmY5iO/mkcJRhg4OrfDx64uYcOlY+gzMajwXMwxenr2StKQE3l+8kZvOOZn//vYDEpMT+Nk/7mp0uj+gr0Wb+WDBRk4c0ocrzj6xWdkvZ87ilH59uWLAvryovdxJZCclkZyQQDgWI6nJv2pDJMKkdesIxWIsyMvjkuHDu/jK2yYQjfKfjUsR4AeXXkayt63E5QfHvNLt5DZU8vz2BVzQZyTuNvaeXC7hT/93feP7yspKRxH2BETgKPocjh5JHRy6ib0+fQBzpq6irtpPwc4y3nx8Bu8993mzuku37qG0tgERF4FwlFfeXkT+rnJ2bilmx6aWXZn8oQjvzV1HWpKP1z5dSSxuNJZtr6xkfn4eL69aTSi+L3bnDWeM4Sc3TuTiM4bz21uvYHjv7MayKRs3E4rF8bjcPLt0BWYXW5o2pSzUcIAl6+sb1lAfCVMXCfPGhjUd6qcz1rBx0+B/OxaRJAmUh+pZWLazWfnWumIm7V7c4f4cjiBuV/tHD6HnSOLgcATYsHYPv/j+JOJxg8JdZbzwx2l8+MpC3ntuDp4ENyvmbKEkf18s3LFD+vKbu6/kx7dM5LG7ryB3bi4VVQ34G8K8++KCZg99Q02ipsHsFdtpCEQQESpr/SzekNdY538rVlIX81MZbmBhXn7j+WG9s9gdqeWDgm306pVCWtK+2KObystJSfDi87iJGHHK/d3jiF4YqOXeua+zsWafi5YCL61dhaql4F5Ys7JDyvhXH8/i0y07KKquwzDbjiqzq6GC6kiA/PwYtXUmc0u37RtflWd2zOb13YsoDtYc9LUdLDNL1jG9aFWX9vlu/lJKQrVd2mePQEDE1e7RU3CWSR2OW1SVSS8vJHdnGcsW7mDVp+twu13Menc5sUgMcbuIB6N89NpCvvz9a/n3X6fzyPeu5IKxwwAw4gavmwaamgApCaTvl6/viS3zqKht4P/GTiSnSdmJQ60QinXhMEuK9hA3DeKYLN6Tz93nWm4BNaEQ723ZhMclPL9mJf+6cp+j/V+uuapLrt9UZXbxNi4fcGKL+46vbF9ObTTEc1uX8J/zbkFECEQjkBFiQG9FEJK8cVRNkNZDwm0tq2Bhbj7rC0vRoMkjl5/H1ae3vl95YkY/Hh5yBX/Zs4D+Rio/OeXqxrL1tQXk+itwi4tJeUt4dMw1rS5NdzWheJSXd3+OqcolfU8mxXNwxktN2e0v54Vdc9kdqORHY69vv8FRhUDH/Ah7BD1HLTs4HGY2rNlDcWE1aWlJvP7cHJbN2oSqEvSHGTVuKPG0JM686jQuvHYcs2dsZOWyXUz/YHVj++r6IBVZHrInDMQYkcEFt57R+GAuDzXw0Yr1LPnXZsqCdVxw2ojGIyfDUozpPh8TThdOPjXKmFNDnDF6n4HM25s3UBMOETVMFhXsYWtlRZdf/9LyPH639jOWlO8+oGxrTTmfFW6lly+F9dXFjbPDeSW5GHj46tgz+M/F1/DOFd/A7Wo7NuqLS1bicbkoq26gvCHASwtWEm2yVLw/UcPg5TVrSEvwURMKMzcvr7HsrbwlRI04IsKc0k08svA9FpYeKH938FnJGiJGnJgZ5+Mumh2+tnshCS43iyu2URhsP33XUYdI+0cPwVGGDsctC+ZuwYgbxOMGDf4wl95+Nrd/83LuffQaMgZl40pLYlt+FTmDsvlg8goyslKY8ckG6mqDABSW1tEnO4205ER6Z6dRULpv2W5S7krMJRHwm/z7jRnNxlVVFpfk0xALYbriZCUl0SsphdrYvsgs4/sP5KpRozC9Jl8/8yyyEpM6fF25/hKe2fFRm/t0pirPbluEojy7ddEBS52/WjSLUFDwuT1k+ZLZUVdJfTTM7MKdJLo9PL95MTPLJpPkbtt4psIfYG1RKaapBANRYoZBlT/I55t2tdpmd00NoXiMuGkiInyWu4P/t/BDDNPkvhEX8avTbuYnJ9/AHUMuZF11MU9sWtju0uuhoqpMLVyGqSaKMq1oBXGzdYXeEQoCVSyp2IEiBOJR3s1f2kXS9hAEcLvbP3oIzjKpwzHD3od/R5fNHvr2FXzl65c0tklJ9SEi1NUGeffrL2IClTV+XnxmDn5/mKSkBCLhKAvmbOH6m8/ijLGDePo3dzXrc2NZGYZpsmDddrTEgFShcHkFZRV19O1tRZ1ZVlrADxd8wuOX3MC/z/pmY9umeTpP69uPXy6fSUiiBDRC39RUOsrrebNZV5PLxL6ncVL6kBbrLKvIY2d9BUluL7saKllSvpsL+lpRXzZUlFLqD5Asifz33NsYnGbJ/crWlfhjEaISoSEi7KxtYEv9ek7OGNeqLL1TU3jz/jvZVlLBUzOXoqq4XS6Ka1vP53diTg4z7r2/8f2Pl0xnVsFuPi/axRWDLTcLVeXFLWtIcnspD/mZX5rLpQNO6PA96iwiwk9PvpWwYWXeSHR727Rw7QiZCSn8vzHXsjfZwoCkrLYbHHX0rJlfezjK0OGY4X9zlrF52lr+8c+HcHXApNvrdePxJFJfGyQja9+eXjgU5bQzhzJz7XbCmAwc1otb79wX4quvHYB7fwzT5Pdz5xI3Te7xncG0NGtJ1ZUk5OVW0rd3BqrKU+uXEjUMnlm/jOe/cHOLynvGnh3sabBmSG9uX8eXx5xB76SUA+rtz/b6QrbWF+Bze3kzfy6PnXJvi/0nu718ccipje9TPPuixzyzfrk1A1J4eeMqfnneZQBM6DsYbzjE1JqPcEscryfCrLIP21SGAL1Skjn/hKGcf8KBUXzaY2ddFUtL95DpS+SZTUu5bOBI3C4XeQ3V7KqrxCVC3DSYlr+pW5UhwInpA9uv1AK1oWXEzGp6p1zT7HyaN5Er+5/aSqtjBEcZOjgcXmpDYd547lMS5hcx98o1XHbtPgd4VaW4tLbFEF2LZ23mrWc/52+vfYNEO5xY3/6ZnPnFsUyqyUXVzY7EEF8+sV+7MszPy2u07EyfkM0rtz58QJ1lpQVsra4kxZvAhqpSVpUXM77vgQ/ZPfU1GJh43W5MlPKgv0PK8KPipcTMOInuBLbWF5AfKGNY6oGyn95rEKf3OjC8WzgeozoUIsNnGYfk19c2lo3N6svWksVkp/u5ou9F9Evsg8916EYkbTEldyNR08DjclESbGBlRSHn9B3CsLRs3vnCVzDtWVVTZd6TMDVGXs3fMNRPVtJFeFwdn+F3lNJQNVvq87i0b8sBDI4ojjJ0cDg0guEodQ0h+vdueRa2P6/MX4Z7eSmmR3juj+8x8eozQASXCJu3l/D8Gwv57jfSOWHYvkgp8ZjB5BfmU13ewNyP1nLGNWOZsnkz35gwgX99vKDR/3D6qq08esNFpCe1/uBXVZ5dsYJAzFpGe3bFCi4fOfIAK01DlYsG7p0hSat7XYbLJC3JS7LHS8SIo23kLY3G4zz28Wy+d9kF3D30Uq7sZyUMFoGByZ2L0Zno8fLm9Xe2WFYXq2Nzw1ZSfankB8u4ZeCdh2TJGYpHSWpHiX197Nl8cdjYxvfD0y1/SxGhd1LXK5b2UKOSuP85POk/RtqwoN1LdXAOMdMyjCn3T2FA+r1dLtOk/BmsqtnOqZkjyPFldnn/B43Qo/wI28NRhg49ktc+WM7qzQU8+cs78Hjaf+hMe30erpCBuoXq3ZXM+HQVr4Z28/R1N/DG+8uIRGJ2toXrGtssm7uVmsoGUtISmfraYlZlBZi2axvnDRlM/37pxLyWAkr0egjF4qS3Y8Ny45gxBKJWOqPWorJcMGAoFwxof7nw8kEnMDw9m50N+ZRHqhiQkt5q3U8372DO9lz6pqfy3UsvoH9S6zFGO0pxsB6fy02vxH2z0QUVi4iaEURc5Af2kBvYzchUa5+xtP51IE6/9Ps71H/YiPLtVc/zjROuYkKv1pc3s3xJZPk6bjzU1fijm6gPr2RA+n0AGMHXMUPvY/rOxZ14Wbvti+pfwtQogouShjfpl3YHLvG1266j7AmUsa52J4IwtXABXx95Q5f1feh0TT7Dw4WjDB16HJU1fmYv3UbcMFm4OpeJZ7cfdDnjsmT82gv3kgjuG9KYHyphW1UFT81YzLZdZQzul8SajQXsyqtoDPq8c1MRScnWzETdwuxVW0nMSeCZFSt49ist7+W1hohw92kHZmY4WMZm9WVURjZz1n9AQIPECQH7XC9UlW0N6xiWNJaXl6wiMymJ6Ru3c/f40xHewy3JZKfe3uHxJu9az6jMHE7vNQBV5YfL3qdvUhr/PPfWxjrjMk8jK5JJnWmSW9JAH591H+NGHWUNLwNKr5Qb8brbNwSZWbKOklANr+yew1nZI3D1wIemqlJQ+28C0S1kJ38BnyRghqaBKx3D/ywu3yXtzg6HZn6HuGktnbskAaFrQ9dNKZpPwAiTIF7mlK/hlkGXkO1r/YfTYUVw/AwdHA6F92asJRyNI8AbH60gvp9P2prC4gOWF/9085fpVZyOJCQypKYPS/wV9E5JYcbmHQwamEV2ZgrDB/eiujbQ2Oa+713J09O+x9PTvseJPxxPIFMwyqOsyS9mc0Xbfn2RWLzN8q5gSdVagoaVA/Cj4rnNygpCu5hc+DzvbJxHddBKaBuJx/lw/VqqGl6ivP4ZDLNjkWkqwwH+u3ERf187D1OVJeW72VJbwILS7eyo23cfBiUP4sS00Sxc6WfagiJCQdun0v82SgxVg3L/W+2OFzaivLNnEZneZEpDNaysbt3N4kjSEFlNKLYTEQ8l9S9jhKaiGgRcqLEHjbbvCpGZdB45KVeQk3IF2cmXdHnElYtyTuPrI27gK8Ov5r7hV+Nz97C906PIz9CZGTr0OMLROCMGWUt9SYkJ+IMRMtOtWdHWsgq+983nuOWKcXz3Ozexo8hyRyhdV059eQCP103+qhJip/bH2FWHDkvjqi+fwdlZ2W3muLtm9Gj6uVN48dXFjDt9EIPSW/91XR0I8uBrU/jLLVdzQp9DX5JsjVllizHVxC0uVtVs5I74NaR6klFVPi//AFMNqhIX8t87Hm58yHpd72IacZA4NYH3yEm7r91xJu2w4osW+GtZUprHvzbNIK5KzIzx382f85/z9u0hFlbVsSavGBHhzcXr+M7V51EZmGoVilLh/wDTcymDU05qdbxd/jIipuVHCLCyaidn9xqFmn7isc14fWcf5B3rWkoaXsHUCEIC1cEZDEj5D97MMY3l4j3lsMixoGwHJ2X0o3figXkcz8w+sYUWPYWjK1C3owwdehzfvXdiq2V/nTQT78pipm8s42sPXMVf3/4cEWFIYRyXx2W5VKhyflUGaz7bwqnnjuLqH42iukl6pKaoKiLCSTm9+eCjdSQnJJC7o4JoKA6t2Mu8vWI9JbUNvLRoFX+4+cpWZV1VVkRDNMLEwSNardMW3xn1FUJGGAC3uElxW3tnBaFd7AnswOvy0WCW4kkrZUz6GajG2F461VqeUqXK/xa9Ur/S5nJvfTTMlN0bMVWJmQb/27qckJaRnRhDUcrDBc3qz9ywnUAkSoLbzcdrt/Lli87gxD7PY6ol507/Tt4ueJz7h/+SHN+Alobk5IzBvHPhDw84Hw68QTj4Khk5U3C5O58SqqsZmP4gMfNmAAQP3oST2tzvi8c24vaM7dLZX1XEz+/XT+ey/ify4yZh6Y4KjrJlUkcZOhwVVNT4qY6G2DllI764iek3+dVjb1CeYj147r77Iu791hcAy9/vke8/jZqwbfVudq3fQ9bAfYYgexUgwJNvzGfYwF6ccfIgPl+xg0A4SrLPy3uz1/LwbRceIEd1IMi0dVvonZbMirxCdpZXkZWWRHZiUjOlY5gmf1w+j0Asyrn9B5Po6fxeUY6v5b03QTglY0Lje7fs/Tf2MCznuUbF5JKkdvc9E91efnnWFxpnaaleN6VxP1EzAkCGt/nM99xRQxk9fKjdv5Dk9eDzWK4hphosqXqRmBllYcU0bhp0oGtJa5hmLeHQm6jGCAVeJSX9Bx1u212k+jruA2jE8whWP0xSxm/xdsCwpqO8s3slqsrc0u18ecQ5DEw+yhzze9AyaHs4ytChx1NcWcdDf3mHS88YgW9nFRJXQNn03mr0ztMBeHPBWp757m24XMIHy9ZhrKtAVIkGYrz0+Md8/693NPb3+PZpnJw+hLGu4cxdtoNE327OPHkQqVlJ1JaFufCskYwf2zxyS3kwQHZiEmsLSogZZmNszZnbdvJ++Sb+eOGVTOi7z29vTkEuFaEAKEzbtZU7Tuw65+rBySMZnDzygPMigs/buVlogtvNZQP3t+Y8sO+9jBnYp9Xl5u0Na6iJluFzJbGtYRWVkeJWZ4f7Ewm8jZp1IElEQ++RlPKVHjE77CgR/wuoBgn7n8TTAcOajlAdCTClYC2GmoRiUd7IXcaPjrLZoTrK0MGhY6zctIcxI/qRktT6xv9TUxZRt2gXywrruPCOC9iwZBeC0P+MgZx0wckApCQmUBHyY6DsrqgldFY/XAKmCQMv3Pdw3xMoZ1HFZtbW5HLK5ioEiETjTP18A6V+P/EkWJVfzE/v+0Jjm4gR58FPp3D3mNO486RTmTh6n8L595pFVIWDPLNuGeOvGNg4E3tuwwpCsRgiwkubVnPzqLF42wlofbQTNcIMsfcKBSFkBNppsQ9xZ5GQeLXd1oNqrFtk7A6MeB6x8AxE0jDjBcQj87pkdugS4Y6h4zHVmrUPSc1up0UPQwC3owwdHNqlrKqePz73GbdfeQZ3Xze+WVk0EuO1373HZQ9dxrxFW/DtqqQ6v5rc04WUXumoqUiDwZcvPI3pL83ljv93LffMeZVAPMqvRl+D3mv9EwrCFRP2GXO8mT/fSswbjDJ39XYS8ILCR/M2EnHHcYmL0toG1ucWc9bowQBM37WN3LpKnl23nBtPGEOS7UNYEQowdddmeiUlsbWmslk0ma+fOh6/7XPoc3tw0fZDYVrRy5zqupAcOuckf7gx1GCXP4+RqcMOKDst60JOyzpwabkjJCbfAcl3tF+xB6JmDe6E08BWWqbZetzVzpCZkMzXRx/c/ewZ9Cxr0fZwlKHDEWPyZ2sxTOWDORu4fuIpSNzEXxuk39AcFry/nM9enkthJIJrcxkgEDdJynTz0Hcsx+Kk5AQ+fnEOHzw7G8+J6awPFQMQTIvw/266uNlYlZWV1EUDrKnJJVInuBJMht7hoqoimWHpWXhNL7P35JKAi5gohr3FGDHiPLFmCWGNUhaq54Odm7lrjLU0u73GCqtmqkmqN4GNlWWNyvDKoe37Ru5lU90KFlV+Qp2rjuH9v9usrCFWR8AI0C+xY8uNXYE/NBuvuz++hLEHlG2p387Uwk/5xdjvk+PrPkvaowlPwhmkZj9/pMXomRxFyvDosXt1OGrJy6vktVcXNjtXXtXAzCVb8bhcNATCfDRvI5P+8TF//84rhIMR3n98OimZyez6ZB0Z5QGSE70k+7zEcis489yRnHnuSAYMzGT2m0tISvbx0t+mYpiKofCXdTNbTF+UkZDCv8c9hHdLP06qPI0bh11KsfpZUlfI3eNP5yfjLqRPiY8XvngTZ/W3lFqp30+9EcTrdoHLZFnZvmz0FwwYyic3389nt3yVz255gPtPPrjYkB8VvwpAQXAX1ZHyZmUfFL3GK7v/haHd79cIYJp+qup+S2Xd7w+4h3EzzpKqFUTNKJ+Wfn5Y5DkeUDUbjZ6OKcQKZtHe0VNwZoYO3c4bry9i3do9nH/+KEaeYGV5N0zl0nNGE48auDwuvFGTZTM2YBrKm3/9kOqyWpJSE4n5w3z521cz4apxACSl7fN3mDVpMYH6IEkpicTyGsjMTyE+OomaWB3BeJQU74Fm8Gu2lREIxVm/q5Qd3hpbgRq8vX0dZUvqqKkLsnlrKSddbBtveOIMGBDG6/IQMWNk92q+l1UZ9rOroZxzeh+c+8SW+lVUR62Zr4nJtOKXuH/4jwEoC5ewpX4FpkZYV7OMM7MvOKgxOkN98D1MjRKN5xGKLiXZd15j2bLq1fjjQXwJPpZWreLqfpc5s8MuoLrhKSKxrQzo9cSRFqXrOYpmho4ydOhWduwoZeuWYnw+D2+/vYyf/fyLAPTvnc43b7+An9/zFPf+4FqWfrKWurogcdNkzaIdfPeJr+Gyg/wOGzuIPkMO3Esbf8Up9B1iPYxLQtUMGj+A1JwUElxeklsIAB2LG7wydxWGaRA2TKIFJiPGWEYJJUX15JdXEXCHmPT5Gq45ZwxJPi8Dk7L447i7G8NkZyU0zxzx4s6FzCvbxhsXPUhmQjKdJdmdxqg0a9nV60pmSPLwxrIZpW8RNSO4UD4rfZPTs85p4kbRtaiaFFZ/j1h0OWgEMKhpeKqZMjTVYHDyQCLJMVwiBI1Qt8jS1ZgaJ27WkeDueYrbMKqpC7yNaoxwdCOJCYfHkf+w4ShDh+OdJ345mbMvHcuitflEowa+RA8b1hdQUFDF4MHWQ2nBR2spyi3n7SdmMmBoNuEkD6ZhktY/g1MuPInUjJaVy4ZVu0lM8jFq7ECGnrQv/dGM+Zsp3FHB7de0HMEkEo8zblh/QnZmiUHZGTx4hZWn8IkpC9kYKyCuJnWRIDuLKjl1RH88LjcnZw5usb+iYA1zS7dhYjI5byVfH33xAXVKwyVkejNJdLccbHpoymi+PuLngLWv2dRtwWNsYXBCA+DCJ0VE4vUke7vHojAQWYI/vACvuz85aV9GRHC7MpvVuaj3eYyRUW1G8umJFDVMpsQ/jfH9X8YlPStcWa3/DVTjCC6q659iQM5TR1qkLkMRx7XC4fhm95Ziln++mZ0bC/nuP+5h4kQrhJW4hD59rDBn0UiMKc/PIT0rhZK8SgZcMAKzfzYajOG+YFirijAWi/Ps3z8lMSmBv/7vgcYkvsFQlBcnL6QmXM0F5w5lQFbfA9qmJvr4yS2XttjvhIv68HmvKD6XFwgzckj7SmdS7jIC8QgJLg/v7VnN7cPGk9Fkdhg1o7yw+ylOzziT6wfc3G5/0Hxf8Oo+5xEzCgEQ8ZHYTXEnVU0q6p/CJWkYZh0J3jEk+87oUFvDDOF2tZ5VQtXs8nicnSFuBiion4Rh+qkIzKFv6lVt1jeMSoLBt0lNfeSQ0lN1lGBkEWCgQCS2FcNswO06MOza/qhGkC7MftEtHGWuFY4BjcNBU1fRwL+/8/IBhhaTn5+Dx+umviZA8fZS3AMTGTA6h3HjhuLzWW4JuZuKCDaECYeixOMGc6auwqgNY0YN1i7ZRZ2/5SW4hbM2428IU1Vez6rFOxvPz5i3mbpQPYZh8syHH3f6WtbV5uMRN4aaiAi7/eXtthmT2Z/bh57FjYNP58bB4w7IOLiqehnBuJ/l1Yupj7Vtbq8aIVz3O+KRZY3nstIeoE/mL+mT+Ut6Z/wIVzckhgUIxzYTie8CEZQ4NYF3OtQubtaxtex2ApH1LZbH4nmUVNyOaTZ0pbidosT/IYYZQCSBvLoXMNvxXwwEXiXgf45odPlhkW9wn7cYOWA5IwcsZ3j/zzukCM3oOiJVd6BH8L52FBVp9+gpHJaZoYgMBl4F+gIKPKeqj9tl3wb+DzCAj1X1R4dDJodD55mfvcXcyUsZfc5grv3S5QBUltayZXUeIoIRN/j03WWsrE7g9L79+PPl+36Vn3TmMJ6d87PG93/8xWQWzdtKYqKXQTHLcnQvYSNGXTREjjeF915dRI2/HhTeeWkhEy4cDcDkGSuJmTFcuFi2uIzq26vJTmp5dmeaJoH6MGmZ+2ZxD51wBQ+dcEWnrv/6QdZeXzAeZEXNcjK81gzJ1DhxNZld/ikgRM0I8ys+b3N2GAtNxzSKififwJ1w9mGZlewl0TuG4b3faEwg7HF1bCm2ouFtokYxJfXPcELvA5f36hqeJRrfQn3gbTJSvwrQJbPEzsw2A7GdeN0ZgCDiJRIvJ8k7sMW6hlFJKPg+Ikn4G54k4TB/Dh1BVYn5n0HjhcRD7+FNuf9Ii9Q2Pev2tcnhWiaNA4+q6moRSQNWichMLOV4I3C6qkZE5OiJv3ScU1tRz8IPViIifPbaQq6++1K2byul/4BM/vzGIxiG5YA8q3g3C2YvZuX7uewcdxYn9Nq33+RN8DB/3lbOOHMoJcEgsWQPcRecetZQakMhQpE4Q1KzeC13MQvLdvDcOfdx0W0nMS2/BBAuGLkvYv+NX+nN8jIrFZDbC6XRglaV4YKP1zHlpQX89c1HSPAd+r/AoqoFfFo6ncFJQ8j0xlhV8Q/O7P0b+ib2J25aM5G2MtWrRogGngdXL8x4HkZ0GR7fuZ2SQdVAjQJcnmGdaheK5ZLoGYLP23oItpaIm3VUBt7G4+5DMLaJQHQDKQn7Qs7F4nkEw3Nxu/rQEHgV06xANUavzF91apz9KQ7tYE7ZG9w15Oe4Xe3Hez2p1y/YWPEDEtwjGZ39zTbrBgNvodqASAqx2AZisTUkJBycu0x3obF1mPHN4O6NEXgDT9KtSAdmk0eMHvZjoi0OizJU1RKgxH7dICJbgIHAg8CfVTVil7W/NuXQI3juF28Tj8YRl9BQ4+fjl+Yxdf4OzrtgFF9/yNqXC8djTFqwEc/sYlx7Avxn8iz+8427GvsoKKjiqSdnceU1p7Glvp5Y70RQKElR/rd5KQv+t47//v5ePixYS9Q0WFy5k/LTCkkdCmBS06u0sa/bx93I7dzYTEZV44AYkbFonPf+N4/qigYWTF/H5TefhapSH55LemLn880F4wHmVczFIx4+Lf2Y01JKqY/mUx1eyoMjvtWhPozYZtSsB00FjREPz+q0MoyHZxDz/52k7LcRd8cMXGJGFVvLv8GgjO/QO7VzGdIDkXWoxlANA0J9eFEzZRgIfQrEUBVUw9QH3sYlSaSnPoDX07JBUnuoKgsrJlMa2sXW+mWcnNl+dJaGyGbyGpbhN5cxJP0OEj2tW5QmJl2Oxzus8b3bPfSg5OxOjMgiROOgfhAXZmw9bl/3u9wcFLaf4dHCYd8zFJFhwBnAMmA0cJGILBOReSIyoc3GDoedaCzOP56cQV198z28k88dxbnXjuOcq09n7LmjKCirJxyKsWDuViorrL2MmGlyZiyd1Mo4yck+gjPym+0vvjd5BSLCp5+sw4gZqIC6YNGaXcycvoHYrhCP/W8KYcMy5f/fzvkUBsvwujx4XV5y/YVtyL2bvPKbMYyaZucXf7aB+pogyck+prwwj2gkTiCygj3VP6UhsqjT92dx1SL8cT+qsN2/lVz/dhI92Wyve5eYGexQH56EM0jtM5vkrMdJ7TsPX/rP2m/UBNUY8cCzqFlPLPhGh9uVNbxJ3PRT3PA/TI12asyMpIs5beB8Th+4iNMHLqR/evMMFRmpX2Vgn08Z2GcaKUk3IVjGHvX+Fzo1TlOKQzsoC+8myZ3OkqopGGb78Utza5+i3jQx1GBd5T/arOv1jiEp6frGw90DXTE8qY/g67MAX5/5+HrP7bmK0Eal/aOjiIhbRNaIyEf2++G27tgpIm+LWKbCIuKz3++0y4d1pP/Dak0qIqnAe8D3VLVeRDxANnAuMAF4R0RG6H4WGSLyEPAQwKBBg6isrOwSeerquiaGYHdypGVcsTqPbdvzmT4jkSsm7gvPdc4Np3DODZZPVEVFFS88t4icPgnE4wYff7iU675oWSMOzgvjH55Ngs+LETPYvHY7fQf3orKygZ0799CnTwLRmEFSmpeGZBNB8IkHd0EMGZGC7g5y8lm9cCe5SFAvX+t9DXnB7fhcPk5KH9Pqd6Gq4S2CERMzMpmM5Jsb7+PuXQX0G2FZtHq8bnZsySWe+jaR+EByg+8wKOskOrPR0TfanxtSrRlpZWg9hOLEIy7AJLdkFREzncyELJLd7fsg1tUFgfaX/iLxQir8bzEw83sIHozIMuLBBJAToWEpvtD14Mposw/DrGdP9RLgBKIaJTf+KZlJ57c7dnlNPnWRHWT4Oh5urqImFzWtjB7+hp1ovJSDefRsqFqGN5iB2zZ02la8nj6JB87e9n7WcbOegvJqxBiIAPnBfIa5C3FLK4kqDyNH+v/68NDlsUm/C2wB9mbe/gvwL1V9S0SeAb4GPG3/rVHVE0TkLrvenS112JTDpgxFxIulCN9Q1fft04XA+7byWy4iJpADVDRtq6rPAc8BjB8/XrvSz+lo8Jk6UjJGY3Hem76JcET4eNZ2rr1yAhnpB5rR+/1hhg0bRMxOa5Sekdko881fv4q5701lbO++3HPKaQwbMxCP142qjwsvPBUF/LEwbwZW4hrmIWLEydmaTLgwjCS4IKp8MTCG26+xlg39cT8vbZ2NW9xMGHo2PveB5uXR2G5qoh+SnpiE8hpZWbcBGeTk5HD/977YXPbwcnIr55HiSsY080lI3Up64kUdvkdWYG1redDUSzGsFX8AYib8ZevvOCXjNG4ffHfH+uvAZ51b9Sc0cS7upIvJTrmWaKAcI9GFtTXvISE9hLudPcBAtIJM9aHEAR/JSTXkpLc9tqqypfoZIuZ6Ls1+FberY0qlV69nUbW+G4gL10Eqoy9k30nE3Lecm+zOaNXAxbqPOQz1XEVNxLI6douP9KwkktpYKj2cHA3PnkNC6DIDGhEZBFwH/AH4vlgf/GXAPXaVV4DHsJThjfZrgMnAEyIi+0+y9udwWZMK8AKwRVX/2aRoKnApMEdERgMJQNdM+xwOmeWrdlNbGyQpMYFgOMrnC7Zy83UH+p+lpibyre+2nPF9m9lAcZZSbpbzyAk5uDxCeaiWPr0zuetuK7pJeaiecJ5i2kYmI8fkMObGfX6C/fpnNr5eWLEAwzQwxGBZ9VIu7n3JAWM2hD9HiWNqCDAIRBYD57RylW4yk/fliJMOzMxawyVuXLJvBrigYgZRM8ra2tVc1ucKevkO/eEXjG7FH1mJx5VFacOzZCZfQULKA5DyQKf6SUkYw9i+L3aqTW1kM/5YPi5fiAL/ZwxLv7H9Rlg+kl0xQfC4EvC4OudreWr2fYc+sMNB04V7hv8GfgTstRbqBdSqNgbtLcSyQ8H+WwCgqnERqbPrt6lbDtfM8ALgXmCDiKy1z/0MeBF4UUQ2AlHgvva0t0P3EInECAajZGXtCzd2+imD+fWP9v0SH9i/c1m2TVWeXrUcjZoEaiK8sXEd7oQIzy1byqQ7rueULGuZtU9SOt8e037+t7gZZ3HVQssyU5X5FXNbVIbZqfeTmbIvHZBLUqgKVrfYZ2riWaQmntWp6+oIISPE3IrPUZSIEebz8pkdnh22hGoc1RBVgWlWrj9xEzOq8EdWkJ64b3nTMMqAOG53y+4Dh8L22leJawi3RthZ9waDU6/G7erhjt8ORwylw8l9c0RkZZP3z9mrgQCIyPVAuaquEpGJXSpkEw6XNelCWp8wf/lwyODQNm+8uYQtW0v4yx/voLShgYKaOs4ZNphTxw4iFIvhdrlIcHcuOW1tOIRbBHeZYpbH2V5cxqKynYQCHv60YhqvX3Fyp/y43OLmwRHfIGYbTrS0RAog4sYtR9bc3FCD0zLGYdjLg719h+Y1VNfwDJHocvpnPUmftH1KNcHdPLWTv+4x0CDp2S93uY9cn+Rz8aTUkpwewy0JKGZjWdyMMavsDSb2uZ1Ed0obvTgcN3R8mbRSVce3UX4B8EURuRZIxNozfBzIFBGPPTscBBTZ9YuAwUChbZeSAVS1J4QTjs2Bqio/c+ZtxTBM1qzLZ/KebawtKuHdr92NV1187ZFnOPmWk/n59Z3L3p2dlMzfLr6KB1e8S3JKCjWFDQTjiicAa7a62ThhI6dmndpi21c3rmHikOEMSc9sPCciDE4eciiXethI9aRy2+C7mp2LmX48ktxp9w3DrMEffMtyu4hvIrEVt4tYdAPxmBUNJh5dgdfXcozWg2V4+s2kRSvJyT5wuXdz/VLW1Mwh3duL83M656bhcOzSGWvRVvtQ/SnwUwB7ZvgDVf2SiLwL3Aa8BdwHfGA3mWa/X2KXf96RFUcnHJsDH3y4mljMmsG88OoCls/cROyTfKau28K7k5dQtbGCuR+spbTB32ofqtpiDsG356ylrM5PQyjC9u2ViF9IKnAh5W6m7lzaYl+5tdU8uXoZT61e1mL50YiqycrS77On4f32K+9Hg/8N0Cgg1NU/0eJ9Bgj6nwINg0YI+luv19XEzRiLKj4gyZ3CyuoZhI2OuZTsT55/XWOQAodjA3VJu8ch8GMsY5qdWHuCe/12XgB62ee/D/ykI505ytCB8ooGsrNTyMxMpqi2Hvfnxbg31vD6tEW8++pCNNGFbKvnhXmtx2t8c/pKnn574QHn07OSSMjwYKrB+ecPJlsVr0dIbRDuG3UzYSNEw35xO/+3biUJLhdLigrYVdPyXl93Y5oNXapMKkKLCcTy2V03ibgZRFXJrX2dmB1f0jCKiMe2tNg2GluPSCIiCRhmNabZ8j1xuTLwJJyJJ+EMxN2H/QN/dxc7/GtoiNcAQtgIsrHuwO9Be1SEC3g9/zfMLz/QTzKvYQZrKp/sAkkdDjvSgaMTqOpcVb3efp2rqmer6gmqenuT4C1h+/0JdnluR/p2lkmPY7ZXVTI8M4uf/PC6xnO//et7LJ67A0xImVdBwADxCsRMVs/dhl536QF7UYFghHdmrqEs0MDlF4/mxAF9KayoZdmWPczdk4cETOK1capqQ7jLvHi8ghlXPlu6jd6n7KI8UsKDI36EiFBQX8uc/N14XS6CsSivblzDby66/LDeF9UoZZX3kJ76TVKSr+2C/kx21L6ASxIwNESRfzqJnv4sr3ySQLycU3O+T6D+n4QCZZjmGQcE5O7T67lWem5OWuafOyVXxKihIZpLTtKBBkRRI0RCK2mn9mdw8mhuGfTtxvc5vs4b73xW+gyGmiyvns7Ffe7GYxvmxM0wm2teJW4GGJVxE6mtxBV16IEIcGgzv8OKMzM8TqkKBrj9xTf598LmUVe2TN2IxuJoLEqoqI4f/vJGRlw5msf+8SX+88O7uO29N1lWVABYS6OmUcHCNblUhgLE48qPX54GwAufLOe5j5YSrotAg4Hb7aIhL8ItF4zjpnNO45YLTqdPRgKb6ldRHikiN7AVgKzEZH594aX85LyL+c1Fl3Pz6LEcbgLBj4nHC6lreNKy3DxEQkYZcdMPYgWLrg6vYW3lvzBU2V73AaHIGmLRJahGiYTeO6SxTI1SFVrQobo7a19lbcVviBq1zc5HjCCv5/2M/EDL2Sj2J9WTyQlp4xqPzITenZK5IlzAbv9m3AhRM8r88jcby/L9s4ibIUDYWvtWi+1NM8LWip8SNRyvrJ5GV0ag6W4cZXic8sfZ8/BtCPLmnNWNyW4Bhp49EsGEYIi0Uf0pdsfZHKhlVXUFn5fnsyG3jG9OnoaqEo8uxV91N0s27iIaMxBDKN5ez/IdeazaXojX48ZboaT7fPRLT4WAwe2Xns6j91zKo/dcijloM8GYganKrLIPWLAzD9M0uXrEaK4deSLXjjyRcX37HyC7qnnAua5CNUq9/xnElYZpVlBTfhNmvPWwbx0h2dOfSwa9y2WDp3HZ4GkMTL2BqmgRLoSIGWFzxWOoBkDjhAIvURbczJ4OKqL9qQjMZGvlL/FHt7ZYvrRyMqWhnYTiZZQEZmFqnLz65vuYG2vnUB+rYHHF5MOy71gW3kWCKxGPmCSIUBkpbizb3fCxbbUqlASXEDXqD2hfEfyU6tDnFDe8eUCZw5FFXe0fPQVnmfQ4o7K8Hk128fnCrWQWG4RjYf67aAk/mmhlac9MSUDCEdQNqRi8v2Qj2anJfLJ6G5WFEVx+CAYivL1xA9cPeBI1q0g/s456NXE3uFGBP02dQygSw+f1UBEI8I1bzyMrNRkEstP3OaWvrahl1o6B3Hl6BH/Qxa/mzOLOM0/j4Qtbt4I0zToqq+4nK+ufeD3Du/z+xOK5qEbB9uuLxrcQCbxEUsYvu2yMQLwGrysd1MpmEZXeJCVfQDiaREJinJnlrxAy/Hx5+L865WRuapQ99S+iGOTXvcjJvf/arLw6UsTK6mnsCW7k5OQs4mYAl/jIb3ifYem3kuDOIGIEWVnzMUnuDKqjxewJbmBoymlddu0tcUrmRE5OTkbrfgJ4kOyHGssu7PsHYmoZ5LjwkOBOb9bWNCMU1L+Ix5VFuf9DBqTdg0MPQbo8HFu34ijD4wBVRURoqA/xi++8ztjrRpOyK4o0BPHhY9m6PLCVoVlWieEyUI9QvauQ4FlZJHoSaQhHaCgM421QcMHfP/+cq+/chcuVzfisrXwcPQF8ghqQlujj4TssR3ARuOCU4aQkNn+om6psLemDYVQzQC6koKwONdcxZd0m7jjzVLKTW96v8gffIhbbht//HFmZf+rye5XgPYmB/eagZohA5Y0oBvHwTMyUB3B5BnXJGMPSrmZI6r59UJd4EHERilZS5S6kLvYfALbVL+DkzI7vl1YEZhIxynBLIjXhJfijW0lNOKmxfHn1FNzioSpSQCi5P4PSLBcIF24Uy5q4IpKPqQamHdgj39/9ylBVUf+z9ppZFA2+haRZGT8SPdkk0np+xerwQuJGDS5JxtAwFYGP8XHo+7wOh47ldH+kpeg4jjI8xtm4rZh3P1nNr79zHZ9NW01dbZDN03eQ0yCEiutw5aRxmatfY/3qYQlELu+HW1xEExP4+wPXk5qeTDge50vPvYVhGIgJ1Ct74+XWaRLZJ0VwSwImysWjhnDV+BNbFshmQVEexYF6Mn2JPL92JbGSEKI11IeVd1ZvaDY7rPAHKKqr57T+yQQCb+By9yUUnkNqfHe3zA4B4pEFxM0GtoRTGeuLEgt9hC/tYQLRHbgliUTvwStGEcEtLc/4llW+S9QMAi5WVL3PSRkX45aOhYhzu1Lp0xharrmpXn2sklz/KlziJm5GKQrXcuOgHx/Qx6DkMXxz1LOdvKJDROssl5C9efmMDhn/AZCZeA5j+zzR+D7RM4j6aPctozt0EkcZOvQEVJWX31vC9t3lLFq2g+nvryYlNRF/Qxgj6Mf0gvoDnHT6vigmqVcMpf8EK4JIisdL/0FZ5CSlYJrKoIQ08l21uEW4oPcw0vt+QmGwkmtPgNvG74v4kuJpP0TXstICRISoaSnX8YMLyfKWI64MhmY3j3P61KJlLN9TyCt3ZKMaQvAAcUKhGXjTvtE1N2s/PIkTKQ79hCU175Kd+T1Gp0xE1WBn1a/wurMY1etfuF0eXNK5qDztcXLGZXiIEDP8nJR1PZ15muQkX0JO8oHh6VSVFE8G1w34f41RY1LcmV0k8aEjrkykV8dTTzXF40ol3bf/zNUxpOkRCIfqR3hY6bAytMPaZNI8OKpDD2bdliL2FNeQnJjA6+8sIT0zGdMwMYw40T211mM2bvDmpLlcfrEVJ/Sxc65osa+GSJgyM4CRrGjEwJvkYWX1Tv61bSq/GXY7fb2dCzf2o/EX86Px1tJsJLKIuuqnQJJAg2RkXQ9Yy3t7ampZkJuHaSozdw3mjnEzGvtwSWpLXXeIQv9s+iWfj8eVRCheg6FRUr37goObuFhSsxC3JLK4egGjMq6mOvAx0XgJUaOMD4t+T+/EsVzU5+DjjbbE0JRT2FLzPC5RBiaNwC2H9ntVVVlU9ntGpl/L0JSuj8Hq4NAWR9MyaZu2PCLSX0Qes4Nrh4EyICwi60TkNyJyoKmfQ49hxvzNREIxGqqDVEUiPPKL6/nPKw/y6N9uI3xqNvHeaUQHZCD928+1t7ykCH9vE5E42euD5EoNr+fNIRAPM7d8Q5ttVZXpJdOpjx1oCQjgkjSSku8iKelGkpLvRmRfLr6Xl68mGI1hqvLqyrVE4qm4XVm4XVlIB5YPW7I8rYvsYG3l38lv+BiAZRVPsrDsr80sJ3fUr6QmWgpYhic761eRV/0YcbOK2liU/MAGNtbNIRCvbVeGzrC5djKmxlE12VA96ZD7qwhvpCS4kvXVL3erFa6DQ4t0sdN9d9Lqz04R+S3wf8AnwL+ADUA91kbRKcAVWFkonlTVXx8GWR06SO62EuZ/toFHvnYJ9ZUB1m4s4PsPXMYJw/pQHw2TlJmIKzmD6KA4LhX2BKMYponb1fpvo4uHDGN4dibhTwuRKPjXVFAwLESaJ4m1NbspDdfQL7HlrBY7/DuYUTqDuMb54oAvHlDuTTgNb0LLRhojc7K5bqy1/5jgdhMzDDqSABdANYRR8zDu9F8A+2TbVvsa4GJn3TukJZxEWXgjACWhtQxItlJUZfn6clHvfZkvkikjKjESgF3mYFwSx1SD1dWfdOnssDaSh8dl/TgJGhXEzTCeDuYN3B9VZUONlXcwEC+jOLiCgSmtpbJycOh6epLrRHu0tQbjBUaqam0LZauBV0UkEyvHlMMR5sV/f8ZZF4zitPHDef3p2Wxev4cRpw5ixeY8AsEg02ds4LRxg7lvwevclTqOBFzExQoQ0TvmIxKLUxLbxYiUk1rMduDzePjNkAt43PgQX18PdTuD9NFsPIluMkmgIlzXojLcOytMcCWwuHIxE3tPJN2bfkC91vjSWeMO+p6YoY8gthkz8CLwKAB10V2UhZbhkgQiRg3LKv5D1KhHUdZVv07/pHGICH0Th9M3cXjjNVRXfxWvN4moqQTCcdyudBTID2zgIrpOGV456G9d1ld9bA+1kd24xIOhMXY1fNJMGaqaxEz/Ae4KDg5dghwjytCOFN4mtqL8WVcK5NB58neVM2f6erauLyThux42b9lN2BXihf98RjS3mKT8SpZ5Enhl2VJKQ/V84N7A0G/1Zlt9KTVRPz+acBrl8Xwm7XmKe4Y8wsjUMS2Pk1tOeqY1a+mf4uOhzKs46bRB1FRVk53ei3WFRWRmKUNT9lla5gZyyQvmkeBKIGSEWFi5kGv7d7/pu2oIDb4Crhw0ugzMQiAHt/gYmX5rY73C4HbcxFBMEt1pmBo7wNJTNQRq4HL3J9ENt+f0Jjv7qW6/hkMl3TuEqwY9AXbS5IT9wrztbviI3fUfcunAp3Ed4t6kg0OLHMt+hiIyCjgd2Kmqa7tcIodO894rC3G7hYqyOl55cibBcAjxmZSVVuHdXYHEDRqqSplSvYHYDoMtA4vxpfrQggbMHOGZHZ9z41AhZsaYXfZBq7PDW+45j1vuOa/xvakmf9r8AhclnoanNsD/mzyF/meW8Pblv23MNdjb15svDflSY5t+if0O6Lc1VM1OpztqbBtdDmYDiAc0jhldAYwj1TuIsU2cupM876PGNgD6Jw7C3YKTu8uVTK+cVw9KjiOJiJDqbfl+x80wO2rfImLWU+Sfz+C0zqXncnBoj6PNz7BTTxoR+RrwIXArMEVEftMtUjl0mIrSOtatyKV2cz7+4kpqYjUM+06QkQ/HCWX4IW6iQMqaKhqKQxiFLsh3M0KSyP5fNSM2hnF7a9jcsJtEVxLFofzGOKHtsaFuBzv9e5hTvpIn5ywmGI1RmpvMm/kfNdZJ96YzIXtC4zE4eXCH+o4YlSwv+QrhePnB3BYk4SLKk//IcvNG3L3ex5V0fYv18uqnoPbMqaDhEwwzclDjHW3s8c8gZgZwSyJba19rdLJ3cOhSjgUDGgARGaiqRU1OfRk4Q1VDIpINbAMc45kjSK8+aXzlqxfxn//bTmLIyyWPDKAmy3q4b9myAxNQEVyGSc78INVDk6HWjefzYnwxD55ZcU67cSRpSVnk+BIREeJm+w9GU00mF8zE506gvCLClvIiXB6DYGUyU3Yu4+6h17eaib4j7Kl7i0Aslz31kxid/b0Ot4uZEbx2xoMV1VOpju7hxMwbgDRM08Dlau4XOL7P74nb4b7ckoCrFWf4Y43q8GbcYhnmKHGC8VJSDyGQgIPDARwre4Y2s0Tkn6r6vP0+CEwQkSXAhUBNt0rn0IxwMMrSGeuZeNN4AAzDxOUSPnvxc6KxOLWVDfTZMYR77rHiM86+/T8Ury8BBUEgKwm3uDACBlWz/GSnpVBbX88Jawdxz1c7l528NFxJWbgKRTFj4E0N2V+mOEbUQ16giBPTRxzUdUaMSkoDH+Nz96Ys8BlD0u8h0dO+H2NxaBcfFD3N/cMfoyq8nZpoPoYZYk3V25zkvpPndj7I1QN+yrDUfdFt0hKGHpSMRzvj+3Qo36mDwyFxNC2TtqcMzwf+ISJ3AV/HMsl7EXvPEPhq94rn0JTP31vOpH99wsDhfRh56iD+8K3XmHDRKHaszUdNJW7Gmf7SPL5wz4UAnHjBiexMMXH5lYR0Lz+9eiIDM9OpLKhmUc1iisJ78MWVdRUbuIeOK0NVZUBSHx4/8yeompT0X4R5zs/wupJQDTGo93t4PR1bDm2JyuBCTI0CiqkxKoPzGZR+W2P57oY5JLoy6J9yZrN2CyqmUButYFX1bPzRdUSMOlTjFAZXE4y5iIif+eVPN1OGDg4O3cixogxVtQb4qohcCXwMPAdcoIcjr4tDM8LBCNNemEs8ZjD5qZlc+8Al7NhUSFlRDSkjBxHcshtJTSFz9D4llOTxku7yEYpEcIeFk4f2Y3S/HDhxCCedm86Tu55koCuTsFlFQbCgQ/t5u8qq+OuH8/j3V24gKcFajvRSg2uv8hMXcaOoTWUYiJWT7Mlp1ThmQOoN9E6e2Pje69oX6i1mBllT9QJeVzLXJT/VaAVZFNpFcWgXad4sVtXM4sreVxKMrgT1keodQH7DVlxJHuqixeT5lzsK0cHhMHA0zQw7tKKrqjOACcBIYKGInNROE4cuZs77K6gorqGhLsjahdt5+W/TSUz0EgpESOrrw0xwETfCXHTHuMY2D5x/JkN96fROTyFDEkj27PvtUxurpV9iPzK8GfRL7EdNtGMr3i/PX8XmonI+Xbe98Vxy4qUM6vOedfR+lyTfua22Dxt1fFr0AwoDy1qtI+ImwZ3ZeEiT+J876z7F0Bhho449/n2JiXMb1mOoQdgIYpgxtjfMwiuKz+2lNLwVUw3ARFFWV0/u0LU6ODgcAnZs0vaOnkJ7BjSjgb9hKcH1WMukJwDviMg7wJ9U1eh2KR3oM7gXvpwMNBglq186VeUNRA0TjwsqFu7BdCvElddfmsWll1jRXKpqA1TVBUnyeQmGo3y6Yhtfv8Zyuj4z60zOzDqzrSEPYFdZFStzC8lOSeK1hau5ZtyJJHo7552zte5DwkY9a6tfZ2DK2Z0KdG1ojK11Uy3LR1U21bzNsDQrMPWFvW/iwt43NdYNxIuJ2BncTdOktsYgs5flXJ7WyTiqDg4OB0nP0XXt0t6TbBLwOlaUmeuBJ1T1VhGZAPwGWAaM714RHQBq/VFITiQtM4WGuMmVD17M+/M28M07z+fpP75NdW0AEUhvkjdw5IAcHvvKvgwQA3MyWuq6w8xYv524YeISIRSLsyq3kAtOHNbh9mGjju11H+NzpRKIl1MUWM7g1PPab2jjws2E3o8Qie4gEl1FZtq9jWX7+0WmegeS6h24rzxYSU5iTofHcnBwODSONj/D9pThCOApVY2KyB7gXgBVjQA/EREnDP5hIjnFx5hTBrB2zhYuvHk8M5dsw+VxM2fNbiquHUxtJAIKnlNHNrZJS/Jx7pius5Z8YOIEbj371Mb3OWkpnWpfHy0i0Z2JqoGXZGqj+fQzTsbQCIme3u22F3ExMPkcqsLPETHXkON9sNPX4ODgcBg5hpThFGC2iMwHJgLvNS1U1VXdJJfDfpx7yYm88finBANhKivrqItG8fm8bNxeTGyUQVIE4ilQHK8DLD9A10FGb2mNRK+HxIzmIb0aYnWYdCwbQp+ksXxxyNPNzm2s/D2heBHj+z7VYtSb/YnG1hCNbUHER0PDU/TKfrLjF+Dg4HD4OMr8DNsT9evAP4Fa4Leq+rtul8ihGaah5O0sZ+PK3eRtLUG8HjbO38Z544fjH2DwhYljuHrsKDKqvIwMZ3DLGadQG/TzswV/Z0+wAIDlVRuYV768y2WLm3H+l/sPNteuOaj2gdgeKkOL8Md2UxPu2O+qQOA1IIrgIRJdTiy++6DGdnBwOAwcKxFobBeKKYdJFgebqR+voVd2KhedN4p33lnKgunbOXFEDqZpIiLEoga7G0opOyFK1ukZuFb7yUpKQvzKKDObf0//mEXLTAbkfMI3T3qAN/d8TMyMMyH7VJI9SV0m5/q6FdTFqlnnX8F5xiX43J1LNbS77lVMjQHCrroXyEo8q93ZYXrad0lJvsd+J3jcA9us7+DgcOQ4JvYMReQB4OW2fArFenLdr6ovdYdwxyO1dUHenbqSpCQvo0b0Yc2SXdTXBXGnJXPNd68CoD4WZkbqHvokpfDaulWkrXDhdgmRqMFrHy9jedEeNO5myYpiBqTNImSEUWB2+TJuGDCxXRlUlc+KN3J5/7F4XS1be8bNOHPKP8YjXmJmhJXVi7ig9+WdulZDQ6R4rT1Nl/gwNYxb2lbWHs8wPJ5hnRrHwcHhCHEsKEPgSuDnIvISMBPYrKp+EUkFxgJfAO4HVgKOMuwiPvp0HdG6EPFAhN//dgoaN0GEDat28+yjV9OrdxrPbFhGfFsBYSNOXAxuuf50Ts7sS30wxJKd24jtBpdbKdyQxEcnLMST4EYQZpYu4up+F+J1tb1VvKoqj79v/hS3uLhiwMnEY7twe0Y0m7XVxapxixufO5FEdxKl4YJOX+vpvf/Q6TYODg5HCUfZnmFb+QzvFpFzge9guVakNnkY+rEi0nxFVZd2u5THCZFIjM9mbyK0tRQS3NRkNjCkfzIun4e4QG11gF690/jayeO5c9S+zPAZvkRembOKz9Zu46oTTyB2kvWxetwurh04jPRMy90iQby42zGqUVX+t3MBgvDyroVcmOPDX30fGVn/IsG3L2pLL18fvjfaSlpSWVlJTs7x47agRjlm/a9xZfwN2S9HoIODQxOOkZkhtqJbKlYIkFFAFlZw7h2dcbYXkcHAq0BfLPeT51T1cRF5DHgQqLCr/kxVp3f6Ko4RvF4PN5w3ikmLduKOwr33X8SJp/clJSWdpMQEBg2wMsl7XW6yEvctJ9YFwkxdvpFwLE7vvmn8/bpzWhuiXVZV57O9vhSfy0txsJaZ+S9wbmoDfv8TZCW80iGLz2MdM/gGGl2BGZqCO+Xe9hs4OBynHBN7hk2xFV/Hkty1TBx4VFVXi0gasEpEZtpl/1LVvx9C38cEqoqaJnPfWUEsEicGbJy9mcuvPqlx1mWqSXGonEHJzRO2Tlm+kYpAPckJCbwydzVXnD4aXycjw+wl3ZvEHUMnAGCYNaTxFOLKIh7bQiy6otns8HhEjXI0/BG4ctDgJDTpZmd26ODQGkeRMjwsK7qqWqKqq+3XDcAWwDEDtKmvC/K9+/9HRVkd7gQ3XlG8XheKsGh7Pn+ZMgeANTVb+MPmZ6mI1KCq/HbTP1lUsZzi+kqCvhBBTwP9s9KoCYQOWpbR6X15+MRLubTfGL42vDenZ2bjcvXG7R6KYRS138Exjhn+FDQKmKABNDLvSIvk4NAjUenY0VM4uOnDISAiw4AzsEK5XQB8S0S+gmWI86idKeO44o0X5rNtUxEvPTWbaNQgVFSOeD34A0N4e/E6thY3cNW40UyqnMGurcJH2XM5LXMkW+p3UBwqJWvgIHyuBkyBe84eSL/MtPYHbYMd9WX8vxVv8bszbuas3u+0Wi8Ur2Zb7WQGcfMhjXc04Uq6FXwX7TvhHnDkhHFw6On0IGXXHodVGdqWqO8B31PVehF5Gvgd1j7i74B/0EKORBF5CHgIYNCgQVRWVnaJPHV1dV3Sz8FSVlpHWloi61Zso/+wNHbvKuTciUMIlxUgwMBzcwhpkCEZPp7+eBbRbCWptBerNueSm72NPrFsjIiwtS5Ef1JRhHc2LOCE04cdklzvbV9MVtTN+5sWM3Rs6yHXdtV/TEFgKeo5ATit1Xo9ga79rJv+2Giwj0PnSH8fO4IjY9dwNMjYFXTFzE9EEoH5gA9LZ01W1V+LyHDgLaAXsAq41w4d6sOyUTkLqALuVNW89sY5bMpQRLxYivANVX0fQFXLmpQ/D3zUUltVfQ4rlyLjx4/XrrRcPFJWkDu2lfDX333KhAkjyNtVi8vlwojFKfh0HfFQBFQp/OtMUh4eT15diPy6EK5KiEqI6p1Cvwn5FAYyyUgMkZAeYVBiMS7xMKHPLR2+pmBkHYne0bhc+4xxdtSXMdufS5IvgWXhYvIlwFm9DoxvGopXU+L/CHeqUhb+nHG9Jraan7CncDRYvDoydg2OjD2DLnKtiACX2a59Xqw0gp8A38eyOXlLRJ4BvgY8bf+tUdUT7MT0fwHubG+QDitDEbkQ+ArQX1VvsIN0p6jq/A60FeAFYIuq/rPJ+f6qWmK/vRnY2FF5jnbeeWMJQX+E3fmVjL14FKpgROPs9tdhRKIAJGelMHiESSxcT0KwH3nVIXAL4foETjQuYnesANx9uWTQSjI8xSgwKq1js+aYUUl+1bfpnfZ1ctK+0ni+NhrkxIx+qIKI9b4lcus/IW6G8LiSCMRLKQ+vo2/SGYd8XxwcHI4Ruijcmh34xW+/9dqHApcBe8NRvQI8hqUMb7RfA0wGnhARaS8pfYeUoYjcA/wXeAO4eK+MwG+xAni3xwVYGS82iMha+9zPgLtFZJzdVx7wjY7Ic7SzY1sJmzYUkpTmo7Soht/8+Q5OGN2PUDTG7U+ZiGFayihJGHhiATkpUfLLCykrSSViREnyKIUxJTMhlYgRozYcITVVcYtSEJjD+N5fa3XsiBHC0Dj1gddRDVHZ8ApZKbfgdqWiajAhZzgTcoa3ew2DUy8iwzcMgAZPjMyEEV11exwcHI4VOqYMc0RkZZP3z9mrgfu6sdz7VmHl030S2AXUqmrcrlLIPqPMgUABgKrGRaQOaym1zZlCR2eGPweuUtWVIrLXsWojcHJHGqvqQlq+LcelT2F1lZ/sfmlsK6/kxH69qaps4ITR/Uj0evjvl75I1LBcOJfWzKOkQYhpjJDPg5EdRPMTiPQOsyNYQbInAVOFgoZT6ZeyiCSXj7Oyrm1z7Nllr1MfK+E096eIy4dh1lETeJ/UpKtZWfYDxvf9R4fSKaUnDCE9YQgAlaFKfO5Dy5Xo4OBw7NHBPcNKVW0zL67t3jdORDKx4mWfdMjC7UdHleEAVd2rufdONeNAx9OUOzRyzvmjmFyxk4bcCMkj+3HO+aMAK0Ht8N7ZjfVO7HdbY3SX6cXL+e+azygtTMIVjvO3e65lULKltFZUPk5+yIVpGOAe2eKYANXRUnY0rEKJMir9dHJsX0TBTW7tm9RHd7K77i3G9Pp2N169g4PDcUMXmxGoaq2IzAHOAzJFxGPPDgcBe32/ioDBQKGIeIAMLEOaLhF1l4icv9+584FtHWzv0IStZRWs2FNEdnISy/ML2VZeSSQab7NNH18mGSUDSfIk4K5PwQgIQ1KzyPbFKIusQoC4GqysmtRqH0srPyRsNBA2QuyKDWNIr/8wNOe/pCRdQVHgI+IapdA/jXC8otU+HBwcHDpEF/kZikhve0aIiCQBV2D5qs8BbrOr3Qd8YL+eZr/HLv+8vf1C6Lgy/D3wgYj8AvCKyKNYJq2/7WB7hyYU1tbROyWF2uIAfVJS2JxXwoM/eI2tu0tbbdOXPhQXRQnGDTzqZd7qPQCoCi5XGimeXlYWeWndx7A2WkZcI8Q1SiBeR1wtQ52ywELCZh2KScSsozywuGsv2MHB4fika/IZ9gfmiMh6YAUwU1U/An4MfF9EdmLtCb5g138B6GWf/z7wk44M0tFwbFNFJIAVtDsfy4rnAVWd2XZLh5b4woknsHlbKZMWlnLR+MFUbKthT3E1v3/6U17/6/0ttslKTSbrhCQaGiKcOWggt55xKgAp3mxuHPRr1M42n+ROb3XcE9PHURfbDSjDU0fjdfkASE0YhteVjluSMDRMir0X6ODg4HCwKF3jZ6iq67ECtex/Phc4ID6kqoaB2zs7TrvK0F5zfRwrOoyj/DrJR5OXUVlax/3furLxXF0wzJTP1yEGvPfZGjIDbtQDBXlVrNyyh/FjDlRG66tLqPKFyElOZVWkiJ9lW87wLnHTP+nEDsmysXY21ldU2F6/hAty7sLrSsTQGDlJExrrWQl3HRwcHA6Rnu163Ix2laFtmnoX8K3DIM8xRTQa54XH3iMWinHjPeeRlW0tYU5dvIFwXRSXV4hWR6mJ7bVKUl78YClJqT627CzhtkvHNfa1vaaSLJ+VST5FvOTWV5OT1Hp0mJa4dfAviWoYAI948bqs/vokn0Of5IPPdOHg4OBwAD0s9mh7dNSadBpwK5YDo0MHefP5z4n4w6jCM3/+kK9+/zrq6oJkuH30Sk0GwPSZZPRPJT3Het8vO4Mf/+9D6sqCjD9xCMWRYko9Ae4fexb3jz2rQ+Oq2YDGd+FKGNfsfKo3m737yE4qJgcHh27nKHrMdFQZeoHXReRhLOd4c2+Bqj7UDXIdE3zw7GzMmAEIi99fwe5wmMoKP2+98ghfvOTUxnoikBfcQ6onlTVbq5g+ayOqyn/fX4CcVoK/wuTJCQ+3m5h3L/HAyxihqfhy3kdczf3/dta9Rdio5FTHfcLBwcGhkY6u6MaAN7G8+t3sC4nj7Sa5jgnOv/Mc9KyB6FkDGHnFyeTuKqeuPsi0z9bicknjYajB87mv82b++/xnygLUUFBYvH43/voY1VE/iyu2dGhMNaowQlNQjRAPvt2sLGo0sKt+MgX+WQRjJa304ODg4NA1qKv9o6fQUWvSB7pbkGORsn4JGKf0wi0uNu+oR+NWwM/X31jEjVefgddrxSxYXr2aYDzIjkgeffqeTKovAYDCaAUxUUJGhEn587moT/sBf+LBSaj6QRIxgm/iSb6zcXa4u+EDTDt60fa6NxmX8/1uunIHBwcHjsll0r3pl67D8uzfA0xXVX/brY5vhvfLJiPZMlL5vHwjZqYPF4I/AeqCIXIyUombcT4snkFJA2QmRzjjC0G+PerrmGoyu3Q94To/kuYjyZ3QsUFdWbh9l1mvxQsaxgrAAIX+WVgr3C5Kg4sxzEdw20Y0Dg4ODl3KsWhAIyInAzMBA2vPcBjwbxG5UlWPm0wTHcVUxSXCI9ftC9rzfzddwBtTl1NQXMOj3/gCOampABhqkO06geklhdw0OofhKVayWJe4uKL/OCq9lZ1K9eJN+XKrZRf3/y+GRuz+vY4idHBw6F6OImXY0RXbfwPPAkNU9SJgCFaqjMe7Sa6jlkg8zlffeZ8t5c1DmklMWbhsF7t2V1JTESAcj/G7jz9E1cWOCi8+VyLbK3xc2/8KaiMhgrFol8vmdaeR6Mkh0ZNDghNY28HBoZs55vYMgTOBa/fGd1NVFZE/A9/rLsGOVj7Zup3N5RU8t3QF//rivgwSH8zeQDxuIgKTpq1k4IlJzP/RDMywyTqzhGSPl9y6KhYW72bSjnUMSEnnVxMuP4JX4uDg4HCISLshQXsMHVWGtVhLozuanBsG1HetOEc3kXicF5avIrkW1koxm8rKOblvHwBWbypAVVGFXQWVLJ+xFXfIZNHLS5n404tw2W4TeQ01rK8qYWN1GQ+MGU9SG+M5ODg49FQ6Goi7p9BRZfgK8LE9G8zDUoQ/Al7uFqmOUlYXFVNXHSSaF8IVSeCzbTsaleHjv7itsd5LM+bx5rQVSKoHtjUwJpDC/Vdcgqry8NwpVIWDuER4actKHhl++pG6HAcHB4dD4xhUhn/A8jX8MZY1aQGWIvxb94h1dHL24EGMd/dhoz+PlIYE7j2tZUXWJ+hl6Mh+e8OE0jtoWYqGjTjFwXoQxUTZVuukUnJwcDh66Ul7gu3RUT9DA/iTfTi0ws49lWxaX4hRF6XebGDG4q186doDEzhfd9P5XHdT8/SQRcFSEt2JZKS46as+DDXpl+5Yezo4OBzFHGszQxG5Bdje1I1CRE4FRqrq1G6S7agjMy2JPi4fgQxQVQb3am6xObt4G3n+Kr42urkiNNTgqV2v0c/Xl5FpvembZAX07p2Yethkd3BwcOhSOp6vsEfQ0UnsX4Dq/c5VA3/tWnGObkqLaoiGYqSl+PC6XOzeVtZYFjUNntw6nzd3r6Is1NCs3arqDdRF69nm38nXx4xjRFovzu8znJ+c/oXDfQkODg4OXYdo+0cPoaN7hn1VtbjpCVUtEpH+3SDTUcvoUf345c9ubHzfp8++RLszirZQHwkRKDR5fedyHj3Vcpsw1OCD4hnE1cAwDSbt/oQZ+SF8Lg9XDxpz2K/BwcHBoas4Fq1Ji0XkZFXdtPeEHZWmtHvEOjpJSkrglFMGNTuXl1+BPxDmrcpVBEsNgrkwLXkz3zjpQlK9PmJmnOEpQ+if2BeA1WVhVJWwEeeD/A1cmTn0SFyKg4ODw6FzDCrDV4G3ReQHWL6Go7CWSF/pLsGOFX75gzcI1Ib4w5sP8KNln+JNDNG3Mo1El3XrE90+vj7iLgBC8RjTdv4PEcFU5f289Vw5zlGGDg4ORyfH4szwr1jRnt8FUoAg8BTw526S65hg4aKtlOdWAvDO68uprPMTIYxZpSzdUcCFJw1rVj/J4+Xli+8hahqN7wlEDrfYDg4ODoeO0KP2BNujo64VcSwfwx+LSG9VdRzgOsDT//7ETtsEKz9cj97lJW7EOaVf6gGWpnsZkNL8fKWjDB0cHI5WjjU/w/3IEJELgJWqWtjVAh1L+M0wsQwrZ6EmGARS45AoFGYUIclBIOvICujg4ODQnRwry6Qi8jtgq6q+Yb+/EWup1AMEReQaVV3Q/WL2fEzTxOVq/jMoeH4CtSVeUNAUJe4NggmlDYmUh2sYkjLwCEnr4ODg0N30LNeJ9mhvEnsbsLrJ+z8DTwLpWGmdftk9Yh1dhAMRfnrDX8nfUtTs/G2nnUfKVj+pq2sZO6I3g9PdDM5wc/nAkZyW5bhNODg4HMNIB48eQnvKsB+wDUBEBmFZkf7eznD/Z+C4jSK9vriU/8xbDMDstxeTu6GAyY9Pb1bHH4qQuLsBX2mIsrIKslIMcpKVsngehsaPhNgODg4Ohw11abtHT6G9PUM7lDQAZwG5qlplvw8Ayd0lWE9GVXly3lK2lFVw+bBhTHtmJll90tmwaBv5W4oYOsZa/pz28gK8pvVhx2bW8dC9t5KU4MUtbhLkYLZrHRwcHI4ietDMrz3aeyJvAO4EJgG3AnOblA0CarpHrJ7NqoJicquqSfC4efKVzwj5w7g9boyYwcJpKxuVYe9KgzoBELLqTCaknkh6+nH5+8HBweE4RI4hZfgb4EMR+QeQCpzTpOwmYEVHBhGRwViO+32xZpvPqerjTcofBf4O9FbVyg5Lf4R4a9V6YoaJz+Nma7rBkx8+Sv90K7h2SsY+ZTf+n5fwcd4WBDilz0BHETo4OBw/HEt+hqr6uR127UwsV4o9TYrXAJ93cJw48KiqrhaRNGCViMxU1c22orwS2NN2Fz2Hb118LtXBEAAuEU7o3wev23KhiEXj/PaRV7nv+1exqa4Mt1cwgfxQHaF4zHKkd3BwcDgeOJb8DFU1Dyu7/f7nF3Z0EFUtAUrs1w0isgUYCGwG/gX8CPigo/0daYb1ymJYr5Z9BBfP2sTm1Xm89795vPbnu/nD5pepjtbz19PvxS1H0TfDwcHB4ZA5RmaG3YGIDAPOAJbZfotFqrpO2lhcFpGHgIcABg0aRGVl16yk1tXVHXTbnVtKWLN0F7c/cGHjOSNuMPvDpfQbnUFRUSmzly6lOmZlvlq8ew1j0jsfZ/RQZDxc9DQZo0Y5DZHl9Eq+vvFcT5OxJRwZuwZHxh7EMbRn2KWISCrwHvA9rKXTn2EtkbaJqj4HPAcwfvx4zcnJ6TKZDqYv0zSZ/OJUSotquOzq8Zx4qpWpYu2SnezaXElFLEi6JPDWh/PxX+fHJS6m1i/iguFn4DqI2WFXXm930ZNk3F31bwKxmQxOP4fkhH3+nD1JxtZwZOwaHBl7AAJyFO0ZHrZ1OxHxYinCN1T1fWAkMBxYJyJ5WNapq0Wk3+GS6WBZtXgnVRUNJCR4eOelBagq9dV+Tj17BP0eHYv//j74v96XcXeO5uxeYxmffRKjU4cQV/NIi37ME4rlUhdeiAsfxXXPH2lxHByOYxRxtX+0h4gMFpE5IrJZRDaJyHft89kiMlNEdth/s+zzIiL/EZGdIrJeRM7siLSHZWYo1hroC8AWVf0ngKpuAPo0qZMHjD8arElnTluDETfxJrjZta2EDSty+cUNf+Wuv93BwuQifDk+KuMxeiUN565Rx21cgiNCecObmGYQlyTTEFlCKLaLJO/IIy2Wg8PxSdcsk7ZogAncD8xW1T+LyE+An2AllLgGK0DMKCwPiKdp7gnRIu3FJv1ZC6djQC4wQ1UbOngxFwD3AhtEZK197meqOr31Jj2Xb//8BvwNYQBEhKd/8iYRf4ipf/qY61+9trFeti/pSIl43JKTejNpiRMAEOT/t3fn8VGV1+PHP2dmMtlYAoZNQBFEZF+MS8XWrYqgrXWlLqio32p/rdS6t377ba1FS2u11FIr7lZqEeouiiKgIIIQiOxbIEjClgBZyDrL+f0xQ4iQZAKZzJI5777m1bnL3Hvu4M2Z57nPQpKzc4hPGGNaSjj6GTbSAPNy4Lzgbq8Q6Af/YHD9q6qqwGIRyRCRbsHjNChUyfCietYlAb0Bv4h8X1XXN+FiFhLiN4Kq9gp1nFjRtn0abYP9CXfn7+Wr95eh6qO0YC9n5bXhomvOinKEiSvdPYB094Boh2GMaXo/w0wRWVZneWqwnciRh6zTABPoUifB7SLQjx0CiXJ7nY/lB9cdezJU1fMbCEiA3wOTCGThhDX/nWz8Hi9aVYPPrcx7c6klQ2NMwhPA0bSxR4tUNSvk8eo0wFTV0ro9EFRVpZmtdY6pAU2w+PlH4IzmnLw1uOqOC2nbOxXcLhzthF/8+bpoh2SMMTFBREO+mnacIxpgAuwWkW7B7d2APcH1BUDPOh/vEVzXqOa0Jq0mUGWa0BbMWULJlhIU8JX6ePW5/0Y7JGOMiQ1hmMKpvgaYQe8CNwff38yhgVveBW4Ktio9CygJ9bwQmteadBSQ24zPx7U56zby1lerGHVSF+jaCRVw+JWi9DjqZWqMMS0lfP0M622ASaB28g0RuQ3YBlwb3DYLGANsBiqA8U05ybG0Jk0CTgauBBKyTtCvyuP//BDPlkp63phKZf/2UAW4FF+aDcZtjDGC4ghDMgzRAPPCevZX4GdHe55jaU3qITBW6aijGZ+0NXl7+So8WyoRv/JpzmbKLm6P+h34/Urv8ztGOzxjjIkJ8TQCzTG1Jk10U1/7HPEq6oDq1eUkd2xLWptkqr1eyousZGiMMdCK5jMUkYGquqaR7WPiteN8c7TvmkpxdWC8AZfLySOXfJ++3ToB0LlNejRDM8aYmCACTkf8DEEZqpr0S6DdwQUR2aeqdesB/1N3e6K49voa1pZuJsmRBAjnnNyFtkn1T+lkjDGJqtVUk3LkQ8tQywlhWIfv0rvNQCAw5JfbkRLliIwxJva0mmpSjpyZMdRyQujTZlC0QzDGmBgXntakkWJTrx+jNft28Y81X0Q7DGOMiUkCOMUf8hUrQpUM3Yf1NUw5bDkhR6BRVSavXsDqfbu4pOep9G53XLRDMsaY2CKtq5p0Md/ua7jksOXFYY8oDqzYW8CmkiKSHE6eW7eEx88cE/pDxhiTQATiqpo0VD/D8yIUR1x5bdNyqn1eUpwuFu7eSkF5Cd3T29e7r9fnx+U8sjb6iTUfsHJ/Pq+ec0dLh2uMMVHRmlqTNkhE3ECxqiZcL/OfDvgO1/QeAoBTHHRKaVPvflUeL3e++Bb3jPkuQ3p2rV1f6qnk9S3ZeP3Ksr1byDqud0TiNsaYyNG46mfYnAY0AiRkn4K+7TvxnS69+E6XXpzR+QTcTme9+81euZHcPXt5ft5XBIbLC5i8bjZevw/Fx59WJdyYBcaYBCASqCYN9YoVzW1NGjtXEmOqPV5eXbicjumpbNxVxKrtu2q3fZS/BkURlI2lRRSU74tipM1TN8kbY0xdDjTkK1Y0Zwon04hV23dRVlWNQwSv38+nazYz5IRuADw62EVe2RLAQVt3F7ql1v+8MdapNx8t/S3o76IdijEmBrWaZ4Yicn0jmxOyW0VTnXZSd966e1ztcrLr0FfdLaWY9q7Ao1an+PFpOY5mjGrn8x9gb9lUOrWbgEjkft9oxavgWY16vgB+HLHzGmNin6C44uiZYai/nBNDbP8mXIG0NiJCerK73m39Oz0OBKoYRQRVZV/lMjqknIYcQ8eckvKZ7D/wMilJA2mXNqpZcTeVevOhei44MqHqE9R/GeKovyGRMSbxxFvXikafGarqSaFekQq0tZm/awrTt90FQEn1SlYV/Yr9VdlHfRyf/wD7D7yCQ9qyt2wKqt5wh1ovrZoN6gG8QBXULIrIeY0xcSLOGtDYM8MW9t9vFnFh16FkuA9N7VTlLWVV8Sz86uObA8spPPACO6s8pBY/y+lHWTosrXgXn78YcaTj8W7nQNXntE29oCUu5Vsk/QZIHR14r2WQ3KfFz2mMiS+x1EAmlJDJUEQuBAYDi4ClwEvAFUAOcIOq5rdkgPFg2/a9LMvJ46ofnPat9etL83ll61xKPRWM7/P92vUL9zyHX30AzNv1FO0kn/0+BwUVmzi5KpuOqVlNPnd68ndwdjhUm53iHtDMq2kakRRwHh9YcBRF9FmlMSb2xdszw0arSUXkF8A7wPXAJ8AU4CTg1wS6VTzR0gHGg39N/5LXZywhf8f+b61/Le8zkhwuZu9awf6aA7XrN5UtQIP/K/YUss+XSpIjmWJ/OjW+8qM6tzvpJNqljal9JTm7hv6QMca0sIPPDOOlmjRUP8OfAReq6hnAGOAO4DpVnUKg+eC5LRxfzNu8ZQ8r1+XjER8z3l4KwNLN21m7fzsr929FgAOeSt7L/6r2M9ee+BRDO/yInulZnN3pRkTSSXZmoqRR4vNE6UqMMSa84ikZhqrb6qqqSwBU9QsRqVLVHcHlXSKS3vjHW7+33l/O/vIKasTLwiWb+d75/fnNjI8Zf/EIftp3dO1+vdI7175vk9SZvPIVeLWa0zteRWbXQ8OxZabYszdjTPwTWtkzw8NUt0gUcSzr3JP4yLkWqXLRp2cX3s9Zj8fn478L1/Da3T/G7TryK15TMgev1iAIq0vnMub4+6IQuTHGtCBRXA5ftKNoMpvPsJm+YAtlXh+6w8HinttwrW9DapKLvWUVfJyzkcuyjmzQsrlsEYofEHZVrqfKV0aKs23kgzfGmBYSb/0MbT7DZjojoxezt2+kqsbLkOqedOjfDpcEHsWmJNX/W+HKno/iD/YHFHGQ5EjI8c6NMa1cq6kmDdd8hiLSE3gV6EKgFepUVZ0sIo8ClwN+YA9wy8FnkrFuU1k+XVM6sn1dCdU1PsQJuev28tZvf0RKcuMFZpfDDdQ/Oo0xxrQGQmw1kAmlubNWNJUXuFdVBwBnAT8TkQHAn1V1iKoOA94H/i9C8TRLpbeaSete541v5vHVzq040nwkiZ+ypEp2lpREOzxjjIk6EUhy+EK+YkVEkqGq7lTV5cH3ZcA6oLuqltbZLZ04mRJqzu5sKrxVfFb4NYPPzqR/Wzed1x7g7DHHU+6p5t3PV0c7RGOMiTqbwqkRItILGE7g+SMiMhG4CSgBzo90PEer0lfNW/kLEIFKXw2umgr0s0oo9TNq58l8tHEtc5dmM6SPh17dh0c7XGOMiRLFIfEzAk1Ek6GItAH+C9x9sFSoqg8DD4vIr4CfA7+t53M/AX4C0KNHD4qKisIST8kxVGmWeSs4PakP3uBwage+KMXVVmnXNoVZby/kQK9kenRw8dYncxl3Wc+oxBhpFmN4WIzhYTHGhtbWmjRsRCSJQCKcpqpv1rPLNGAW9SRDVZ0KTAXIysrSzMzMsMV1tMfKBH7a9YTa5cc/eJ1v8ovRkhoKu1TjETfJ7SB/OVx9ST49uw6LeIzRYDGGh8UYHhZj9AlKksTOM8FQIpIMJTANwwvAOlV9ss76vqq6Kbh4ObA+EvGE094Lkqj+wIu7pJySKzO56cz1dEjyAT7cpALDohyhMcZERzhKhiLyInAZsEdVBwXXdQSmA72APOBaVd0fzDWTCQwfWkGgh8LyppwnUiXDkcA4YJWI5ATX/Rq4TUT6EehasQ24M0LxhE3Gajcp+eWIw0G7L6u4+qHfk5Ee6DfokIzoBmeMMVEiAs7wVJO+DPydQPe8gx4CPlXVP4rIQ8HlB4HRQN/g60zgmeD/hxSRZKiqCwlUIR9uViTO3xxVVR5SUhruN1g4P5ckvyIuJ7JlH/t3J3Nc324RjNAYY2KTg+Y3oFHVz4MNL+u6HDgv+P4VYD6BZHg58KqqKrBYRDJEpJuq7gx1HpuErhFr1xTw9N9m88STN5Censy05+Zz1vdOpU+/Q9MkXT/+AnadPxgAh9NBl8z20QrXGGNiRgs/M+xSJ8HtIjCgC0B3YHud/fKD6ywZHitV5fV/L2LP7lJmf7iSEcNP5IMZy9iyYRf/++extbPRnzlqaJQjjS0e3y6c0g6HIy3aoRhjougoWpNmisiyOstTg40mm0RVVaT59bGWDBuwdm0BeVuL6NAhnffeW87G5dsoK69izdfb2bCmgFMH9Yh2iDFH1c/2vRNIc59G14wHox2OMSaqFGfT+hkWqWrWUR5898HqTxHpRmA4T4ACoG6fth7BdSFFaji2uLNieR4+n5+Kihqqy6v56ovN1NR4KSmp4L03lkY7vJh0oOpzarz5lFS+h8e3K9rhGGOi6OB8hi00As27wM3B9zcD79RZf5MEnAWUNOV5IVjJsEE3jjuHG8edAwQa0dx+x/OU5nnxux30GWalwsOp+iksmwKA31/D3gOv0LW9lQ6NSVQiSpLDG4bjyOsEGstkikg+gb7ofwTeEJHbCPREuDa4+ywC3So2E+haMb6p57Fk2ASFRWXs2V+Kup2owufZG7jiiiy8Pj9JLme0w4sJPn8JIqm4XYGWtF7f3ihHZIyJNmcYxh5V1esa2HRhPfsq8LNjOY8lwyYo1zJwenG1A78Hypz7+DBnAx/lbGDyLT+sbUyTyFzODpzU6dXQOxpjEkKgAY2NTdqqdOnYkR//5DQqvOW4Hckcn5nJy/OXsb+8imW5+Zx+cvPHIDXGmNZFw1IyjBRrQNMEHdLbMm7UBazOX0anntXgak9pSSVup4Pn5y0lUDI3xhhzkABJ4g35ihVWMmxEZY2HxbnfcH7/Pny89hNy/1TCS1lfcNKlTg5sKMXZ0U2BQ9ixv5TuHa2zvTHGHBRvM91bMmzE+1+v55m5i+makcbzf5qH44CXqkUVuM/fg6jiKvXxyk+u4bgObSIem9+zFvXX4EweFvFzG2NMUzjDMBxbpFg1aQMqazxMW5QDwNMffE753HIApMbP8hdzERE8NV5efHdxxGNT9eMtfRRf2aOoeiJ+fmOMCUUAp/hDvmKFlQwPs3NbEW9NnUvny/uzt7yC1CQXq9fupK048DkURJDyakhxoKos3rQt4jFqzULUtwtQ/FVzcaaOingMxhjTGBGbzzDuqGpt94g3n53Lwg9WcMmATlxx2gAOeCpIPsnJtm3K1pw8ktqmcvENZzDyutMB6Jga2TE4VRXvgWdAywHBVz4VR8pFiBx9Id/v3YKn/GXc7X53TJ83xpjGhGPWikixZAj8+asFnNi+A99N7kr2/HWkt0tl3dur+NE/LuDFvM/41Ym38+C/V+HZvQ8vGaS3z+DMHtHpTiEiOFJ+EEyGgKQe87FqDvwTX/WnuFJG4UweGaYIjTEm0IAmTPMZRkTCJ8PtZSW8v2UDKU4XpZs7UFPjJTUtmYK8Ql795D0qTqri411f4ty/H3UC+0pJdkb3H9iVfn2zj+H3bMJfsxiRttQc+Ccp7u9Y6dAYEzZCfDWgSfhk+PKq5YBQ5fXiH9mJh0fdCsDakq2845xHuiuFz7Ysp3DlNvwOcHi9LJy1hFtuO2IkoLjiqZwBWg2Sjno34fesxOkeFu2wjDGtRGA+w9jpRxhKQifDKp+Xed9sxbXsAL52Dj5O28Yd5waqC7ft3EHPwsAkvmltPRSNcXCgqhpB6HrGidEMOyyS0sfjShlTu+xIOjWK0RhjWiOrJo0TKU4Xz5x9KY/OmE5yShITf3EppSUVfDF3HaOvGMkl3QKJcWd5GVftfY2y/Epc7Z34+2fi8/txOuK3WtHh7AbObtEOwxjTSgWmcIqfatL4/WseJotmrcUpgq/Gx8qFeXwwcxmv/nMem9cHpsCqqqihnTuZS7ufQlKBkLJHuPyk/twzYxYfr9kU5eiNMSY2CYpbvCFfsSKhk2F1tZcF89YhCH6f8t6by5jzQQ5JSU5m/usLcpZt4ac3TEErfcxfthmHAzz7fHy2NpeVBbuYunApNd746UdjjDGR1IKT+4ZdQleTJie7+N2ka/EGE9qnH3zNx++uQBxCztItbNhcwI6Cffz12VkU7a5AAPXBjHmr6Ni5LaWVVXy6PpfRg06J7oUYY0yMEdGYGmEmlIQuGQKc0CuTwowiPJ2rGXl+fwad04eyJDjze/3Ymb8Pf6qTpR+t45eXjOTey8/l9kvOJC0tBb9f8fr9zF67MdqXYIwxMckZnMapsVesSOiSIUClt5rntrxPB3cbftfvNpbl7aQ8BRat2oJPFfFCpc9DZ5K55LtDAbj5vKzaaZuSbaZ7Y4w5gnWtiDNzdi/D4/ewp6qYKW/Noay0EnEKRV4vWfecSWpGCiJCn2E9aj/TJtkdxYiNMSb2BTrdx07JL5SEToZev493CxZS7ffiVz+fFm6gpqcTlzioQRl96ilcMKhvtMM0xpi4ZPMZxgkR4YZeF+PxB4rye7rUsKewio/nZfM/14xhSI/joxyhMcbEJ0FtOLZ4ULhjPx6vl/NOGP6t9T94aiLeT/exKyuPzL6DohSdMcbENwGSrDVpbPP7/fzl3n8zZ+ZX31q/ZNsaKj7fj3h8zHx6CV5//Dz8NcaYWHLwmaG1Jo1hKxZsZHfBfnxSw+78faR1TMPtcvGnJz7AWeVDXQ6cG8p5be58BnXuS9aQ+B+L1BhjIi2Wkl0oCZcM/X4/05+Zg/r9eD1e3n35cz7btJOuPTLo5uhARYf9AIhD2L7iAO/lzuap31zNCd07RjlyY4yJHwI4JNpRNF1EkqGI9AReBboACkxV1cki8mfgB0ANkAuMV9Xiloyl8kA17TLSUYWMrinkbSuiaMteivL28ey0OzixRyYA1TVe7vzVv/H5/PznvWU8cOfFLRmWMca0KoKSFEclw0g9M/QC96rqAOAs4GciMgD4BBikqkOAjcCvWjqQ9Hap/PoftyBdM+g2oAd5VTUooH5l8t8/rt1v3qIN7C0ux+128mX2Fr4p2NfSoRljTOsh4GzCK1ZEpGSoqjuBncH3ZSKyDuiuqh/X2W0xcHUk4lm+dAu7dhRT46mgeHsxDgn8i2xdWVC7T3qam/PO6ktRaTkZbVLxa/z8wjHGmGgTBCcxlO1CiPgzQxHpBQwHlhy26VZgekuf3+9Xpr+2iL2VFaTUwKjLhnHjLd8DIDnl0Nfx3TP6MmhAD25+8j9cPnggvXoc19KhGWNMqxJP3RUimgxFpA3wX+BuVS2ts/5hAlWp0xr43E+AnwD06NGDoqKiY46hqrKGtp1ctPMnkdbBhSPJi58qACqrYGdRIZ+v3sIPzxrIB0vXcVyKgyWrN3JB/+60TU0+5vMeq5KSkoif82hZjOFhMYaHxRgbAv0M4ycdRiwZikgSgUQ4TVXfrLP+FuAy4ELV+usiVXUqMBUgKytLMzMzj/r8qsryvQWM6NGdHd8RVuZX08vlZNCwTOoeb9qCBby3eD0nn9iTGUs2oapUeauYu66A2y4+46jPGw7Hcr2RZjGGh8UYHhZj9AX6GcZPNWlE0raICPACsE5Vn6yz/hLgAeCHqlrRkjFkF+Xzyy/fZuHOrSzasR0IJMhp65bV7rNrXxmf5mwmNTmJaXNXMHJAL87u34sLBp9MxzZpLRmeMca0Og4k5CtWRKpkOBIYB6wSkZzgul8DfwOSgU8C+ZLFqnpnuE+uqkxeM5/C6hJe2LSI609Lp9JbTZsqN+2Oa1O739uLVlNZ7SEtOYm83fu46/KRDOrVNdzhGGNMqycITglPsgsWnCYDTuB5Vf1jWA5cR6Raky6Een8CzIrE+bOL8lm5rwCH+MjZW8AvB93I0OO6U1RURGZmJt/sLybd7WZUVj8Gntil9nMnds6IRHjGGNPqCOCi+fO9iogTmAJcBOQDS0XkXVVd2+yD15EQI9Bsrygkze3BKU58WkNBxV66VKRS46vGf5zyv7Pm0COjHY9dejEndbWRZowxpvkEZ3ga0JwBbFbVLQAi8h/gcsCS4dFql+JhUBc/ig8HQpqrmifvnU6/rOPp96NhFJSUsqO0lM2Fezm5k3WhMLGnpqaG3NxcKipa9NF6k2zbti3aIYRkMdYvLS2NPn364Ha3/ATlAjjC0yylO7C9znI+cGY4DlxXQiTDC7ucyeltBvPgX/7FE/fdzJqvtlJQsA+vo4ZPk0rxig+fX3nxq2weu9SGXTOxJzc3l4yMDPr164fDET/N1U3s8Pv97N69m9zcXPr379/i58teWT3b2W1TU5rMpojIsjrLU4M9CCIqYe6qP017k3Vf7ucv/3mb6f+cx94D5ZSUV+JcUczQ47sxosfxtInAryVjjkVFRQVdunSJSiJ0Op0MGzaMgQMHMnToUP7yl7/g9wfmqVu2bBkTJkyIeEzNkZeXx6BBR85VmpeXR2pqKsOHD6d///6cccYZvPzyy7XbX375ZRwOBytXrqxdN2jQIPLy8gDo1asXV111Ve22mTNncssttxxxnvnz5yMiPP/887XrcnJyEBGeeOKJZl9HQ/s4HA66dOkSsdoFVb1EVbOa8Bp02PLhibAA6FlnuUdwXVglRMmwtLyCL2bn43fD/Pe3cmq79lR0cFHVzsGg4zvzqysvi3aIxoQUrRJhamoqOTk5AOzZs4frr7+e0tJSHnnkEbKyssjKyopKXIfzer24XM37k9anTx9WrFgBwJYtW7jyyitRVcaPHw8EBv2YOHEi06fXP1hWdnY2a9euZcCAAY2eZ9CgQbzxxhvcfvvtALz++usMHTq0WbE3RZzWKiwF+orISQSS4I+B68N9krj8Zo7WUzPexl8FIuCrFlZ0qWb3qI7szWrL1mGRH1XGmHjVuXNnpk6dyt///ndUlfnz53PZZYEfk5999hnDhg1j2LBhDB8+nLKyMgAmTZrE4MGDGTp0KA899BAQKAmdddZZDBkyhCuuuIL9+/ezfv16zjjj0MAWeXl5DB48GAgkmXPPPZfTTjuNUaNGsXPnTgDOO+887r77brKyspg8eXKD+2VnZzN06FCGDh3KlClTmnStvXv35sknn+Rvf/tb7brLLruMNWvWsGHDhno/c++99zJx4sSQxz7xxBOpqqpi9+7dqCofffQRo0ePrt1e3/fT2HX4fD7uv/9+Tj/9dIYMGcKzzz7bpGuMB6rqBX4OzAbWAW+o6ppwnychkuGAIT3peVk7ul/cluPHtKWqswN3qgNnsrCrpiza4RkTVv6KN/FXvBl6x2PUu3dvfD4fe/bs+db6J554gilTppCTk8OCBQtITU3lww8/5J133mHJkiV8/fXXPPDAAwDcdNNNTJo0iZUrVzJ48GAeeeQRTj31VGpqati6dSsA06dPZ+zYsXg8Hu666y5mzpxJdnY2t956Kw8//HDteWtqamqraxvab/z48Tz99NN8/fXXR3WtI0aMYP369bXLDoeDBx54gMcee6ze/a+99lqWL1/O5s2bQx776quvZsaMGSxatIgRI0aQnHzoh3l9309j1/HCCy/Qvn17li5dytKlS3nuuedqv8fWQFVnqeopqtpHVUP/2jgGCVFNes2I73J670Hc8eG7TLnkMt7evJZKr4f0Gi/tOnSIdnjGhI36D0D5c4CgKRcjjjYhPxMuI0eO5J577uGGG27gyiuvpEePHsyZM4fx48eTlhYYwaljx46UlJRQXFzMueeeC8DNN9/MNddcAwSSyfTp03nooYeYPn0606dPZ8OGDaxevZqLLroICJSCunXrVnvesWPHAjS4X3FxMcXFxXzve4EB+ceNG8eHH37YpGuqb4TI66+/nokTJ9abbJxOJ/fffz+PP/74t0p69bn22msZO3Ys69ev57rrrmPRokUADX4/jV3Hxx9/zMqVK5k5c2btMTZt2sQpp5zSpOs0CZIMAV74Ops9FQd4fc0qfnPOeQC1ne6NaS208k3AE3z/FpI+Luzn2LJlC06nk86dO7Nu3bra9Q899BCXXnops2bNYuTIkcyePfuojz127FiuueYarrzySkSEvn37smrVKgYOHMiXX35Z72fS09OBQOKqb7/i4uKjjuOgFStWHNHy0uVyce+99zJp0qR6PzNu3Dgef/zxkI1bunbtSlJSEp988gmTJ0+uTYbHQlV5+umnGTVq1LfWH2zcY0JLiGrSvOL9fPZNHl3S2jBn62a2l7b+EeNN4lH1Q+UMCExXDZVvBNaFUWFhIXfeeSc///nPkcOG2srNzWXw4ME8+OCDnH766axfv56LLrqIl156qbYF4759+2jfvj0dOnRgwYIFAPzrX/+qLQX16dMHp9PJo48+Wlvi69evH4WFhbVJzuPxsGbNkY+MGtovIyODjIwMFi5cCMC0afVOjnOEvLw87rvvPu66664jtt1yyy3MmTOHwsLCI7YlJSXxy1/+kqeeeirkOX7/+98zadIknM5DI7U09P00dh2jRo3imWeeweMJ/BDauHEj5eXlTbpOE5AQJcPsXTsQESq8Hhwi5OzaSc927aMdljFhJkj7J4HK4HIq9Y+CeHQqKysZNmwYHo8Hl8vFuHHjuOeee47Y769//Svz5s3D4XAwcOBARo8eTXJyMjk5OWRlZeF2uxkzZgyPPfYYr7zyCnfeeScVFRX07t2bl156qfY4Y8eO5f7776+thnS73cycOZMJEyZQUlKC1+vl7rvvZuDAgd86f2P7vfTSS9x6662ICBdf3HBf4tzcXIYPH05VVRVt27ZlwoQJ9XaPcLvdTJgwgV/84hf1Hue2227jD3/4Q8jv9uyzz653fUPfT0PXcfvtt5OXl8eIESNQVTp16sTbb78d8vzmEGlg1qSYlZWVpcuWLQu9YxPEQzWpxRge8R5jdnY2p512WoQjMq1Rff8tiUi2qsZGH5koSYhqUmOMMaYxlgyNiRMHR30x5ljZf0MNs2RoTBxIS0tj9+7d9sfMHLODY5Me7OZivi0hGtAYE+/69OlDbm4uO3bsiHYoJo4dnLXCHMmSoTFxwO12R2SmgVDivSFSrIiHGBONVZMaY4xJeJYMjTHGJDxLhsYYYxJe3HW6F5FCYFuYDpcJFIXpWC3FYgwPizE8LMbwiLUYT1TVTtEOIpriLhmGk4gsi/VRFyzG8LAYw8NiDI94iDHRWDWpMcaYhGfJ0BhjTMJL9GQ4NdoBNIHFGB4WY3hYjOERDzEmlIR+ZmiMMcaAlQyNMcaY1p8MReRFEdkjIqsb2C4i8jcR2SwiK0VkRAzGeEMwtlUiskhEhsZajHX2O11EvCJydaRiq3PukDGKyHkikiMia0Tks0jGFzx/qH/r9iLynoh8HYxxfBRi7Cki80RkbTCGI2awjeZ908T4onrPNCXGOvtG7Z4xdahqq34B3wNGAKsb2D4G+JDAlOBnAUtiMMazgQ7B96NjMcbgPk5gLjALuDrWYgQygLXACcHlzjEY46+BScH3nYB9gDvCMXYDRgTftwU2AgMO2ydq900T44vqPdOUGIPbonrP2OvQq9WXDFX1cwJ/UBpyOfCqBiwGMkSkW2SiCwgVo6ouUtX9wcXFQI+IBPbtGEJ9jwB3Af8F9rR8REdqQozXA2+q6jfB/SMeZxNiVKCtiAjQJrivNxKx1QagulNVlwfflwHrgO6H7Ra1+6Yp8UX7nmnidwhRvmfMIa0+GTZBd2B7neV86v+PNlbcRuAXeUwRke7AFcAz0Y6lEacAHURkvohki8hN0Q6oHn8H+gM7gFXAL1Q1apMYikgvYDiw5LBNMXHfNBJfXVG9ZxqKMU7umYRhUzjFERE5n8CNfU60Y6nHX4EHVdUfKNTEJBdwGnAhkAp8KSKLVXVjdMP6llFADnAB0Af4REQWqGpppAMRkTYESi13R+P8oTQlvmjfMyFi/Cuxf88kDEuGUAD0rLPcI7gupojIEOB5YLSq7o12PPXIAv4TvKkzgTEi4lXVt6Ma1bflA3tVtRwoF5HPgaEEnufEivHAH1VVgc0ishU4FfgqkkGISBKBP+LTVPXNenaJ6n3ThPiifs80IcZ4uGcShlWTwrvATcHWcWcBJaq6M9pB1SUiJwBvAuNirBRTS1VPUtVeqtoLmAn8vxi8qd8BzhERl4ikAWcSeJYTS74hUHJFRLoA/YAtkQwg+LzyBWCdqj7ZwG5Ru2+aEl+075mmxBgn90zCaPUlQxF5HTgPyBSRfOC3QBKAqv6TQCuuMcBmoILAL/NYi/H/gOOAfwR/RXo1woP8NiHGqAsVo6quE5GPgJWAH3heVRvtKhLpGIFHgZdFZBWBlpoPqmqkZzcYCYwDVolITnDdr4ET6sQZzfumKfFF+55pSowmhtgINMYYYxKeVZMaY4xJeJYMjTHGJDxLhsYYYxKeJUNjjDEJz5KhMcbEqaYOoB/c90QR+TQ4gPl8EYn4sI6xzJKhMcbEr5eBS5q47xMExpMdAvweeLylgopHlgyNqUfwl7OKyLWHrT8zuD7vsPU3BNf/toHjXSsiC0WkTET2icgKEblPRNwteBmmlatv4HcR6SMiHwXH310gIqcGNw0gMEMGwDwCg62bIEuGxjRsHfA/h637H+ofteYOAn+UbhMRZ90NwQQ5lcCIJCeoakfgRgJDwUV0hhSTEKYCd6nqacB9wD+C678Grgy+v4LA7CjHRSG+mGTJ0JiGvQkMF5HeACLSFrgKeKnuTiLSH/gucDOB5Da6zrZewG+ACar60sFphVR1jaqOU9VtwSHNJorIjmDJMU9E7orEBZrWJTgw+NnAjODIN89y6AfXfcC5IrICOJfAWLK+aMQZi1r9cGzGNEMVMI3ArAcPA9cBnwGHj8H5E2Clqr4vIrMIlBLfD267mMCwav9p5DwXEUikZ6rqdhHpTGxPI2ZilwMoVtVhh29Q1R0ES4bBpHmVqhZHNLoYZiVDYxr3HDBeRFwEkt5zdTeKSApwE4dKiy8Ao+u01OsEFKlqTSPnqAFSgIEikqKqe1R1RTgvwiSG4DRRW0XkGggMGC4iQ4PvM0Xk4N/8XwEvRinMmGTJ0JhGBAfy3kagqrMz8NFhu1xDYEb614LLs4BC4PbgciGBQbkbbCijqvMJDOL8v8AeEflYRCI6ELuJT8GB378E+olIvojcBtxA4Nn118AaDjWUOQ/YICIbgS7AxCiEHLNsoG5j6iEi84E5qvoHERlPoMT3e1X9nYjcCPxBVXuJyALgO0DdmSUyCCTBXgTm/NsMjFfVfzXhvGnA74Afq+oJ4bsiY0xj7JmhMaG9DmwHsuuuFJEBBGZQ/yGwtM6mzsF9x6jqeyLyKDA5WEX1jqoWB5u7P0gg8XUBkglM4FsNlGENG4yJKEuGxoSgqlXAnHo23QEsV9X3Dlu/S0RmBLe/p6qPiMh6YAIwRURqCFS9vkagMU4fAh2i+xJIgquAsS1yMcaYelk1qTHGmIRnDWiMMcYkPEuGxhhjEp4lQ2OMMQnPkqExxpiEZ8nQGGNMwrNkaIwxJuFZMjTGGJPwLBkaY4xJeJYMjTHGJLz/DyjxtfQtRy+JAAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<Figure size 504x360 with 2 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.cm import ScalarMappable\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(7,5))\n",
+    "\n",
+    "number_of_evals = 500\n",
+    "df_dynas = pd.read_csv(config.dynas.results_csv_path)[:number_of_evals]\n",
+    "df_dynas.columns = ['config', 'date', 'lat', 'macs', 'top1']\n",
+    "\n",
+    "cm = plt.cm.get_cmap('viridis_r')\n",
+    "count = [x for x in range(len(df_dynas))]\n",
+    "\n",
+    "ax.scatter(df_dynas['macs'].values, df_dynas['top1'].values, marker='^', alpha=0.8, c=count, \n",
+    "           cmap=cm, label='Discovered DNN Model', s=10)\n",
+    "ax.set_title(f'Intel® Neural Compressor\\nDynamic NAS (DyNAS)\\nSupernet:{config.dynas.supernet}')\n",
+    "ax.set_xlabel('MACs', fontsize=13)\n",
+    "ax.set_ylabel('BLEU Score (%)', fontsize=13)\n",
+    "ax.legend(fancybox=True, fontsize=10, framealpha=1, borderpad=0.2, loc='lower right')\n",
+    "ax.grid(True, alpha=0.3)\n",
+    "\n",
+    "# Eval Count bar\n",
+    "norm = plt.Normalize(0, len(df_dynas))\n",
+    "sm = ScalarMappable(norm=norm, cmap=cm)\n",
+    "cbar = fig.colorbar(sm, ax=ax, shrink=0.85)\n",
+    "cbar.ax.set_title(\"         Evaluation\\n  Count\", fontsize=8)\n",
+    "\n",
+    "fig.tight_layout(pad=2)\n",
+    "plt.show();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# References"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791.   \n",
+    "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n",
+    "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358.   \n",
+    "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778.  \n",
+    "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324.    \n",
+    "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187.    \n",
+    "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 8d8aaab752dfe3add98d0f07660bb28a99de49e3 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Thu, 8 Dec 2022 03:15:04 -0800
Subject: [PATCH 40/60] Update NAAS.md

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 docs/NAS.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/NAS.md b/docs/NAS.md
index a2eb0eb456b..2255bc5ab6b 100644
--- a/docs/NAS.md
+++ b/docs/NAS.md
@@ -81,7 +81,7 @@ class NASBase(object):
 
     def search(self, res_save_path=None):
         # NAS search process.
-        ...    
+        ...
 
     def estimate(self, model): # pragma: no cover
         # Estimate performance of the model. Depends on specific NAS algorithm.
@@ -175,3 +175,5 @@ Following examples are supported in Intel® Neural Compressor:
 
 - DyNAS MobileNetV3 supernet Example:
   - [DyNAS MobileNetV3 supernet Example](../examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb): DyNAS with MobileNetV3 supernet on ImageNet dataset.
+- DyNAS Transformer LT supernet Example:
+  - [DyNAS Transformer LT supernet Example](../examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb): DyNAS with Transformer LT supernet on WMT En-De dataset.

From 9d95d770fdf32f4d53fa7cec6e1b73b3d40330e0 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 00:39:42 -0800
Subject: [PATCH 41/60] Fix UT

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../nas/dynast/supernetwork/__init__.py             | 13 +++++++++++++
 .../supernetwork/machine_translation/__init__.py    | 13 +++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 neural_compressor/experimental/nas/dynast/supernetwork/__init__.py
 create mode 100644 neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py
new file mode 100644
index 00000000000..e833188cc78
--- /dev/null
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py
new file mode 100644
index 00000000000..e833188cc78
--- /dev/null
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

From 0c5dea88e836c135ce1c81d716f35519897766a7 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 01:02:29 -0800
Subject: [PATCH 42/60] Change "TransformerLT" to "Transformer_LT" to fix
 pyspelling error

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 docs/source/NAS.md                                              | 2 +-
 ...rLT_Supernet_NAS.ipynb => Transformer_LT_Supernet_NAS.ipynb} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename examples/notebook/dynas/{TransformerLT_Supernet_NAS.ipynb => Transformer_LT_Supernet_NAS.ipynb} (100%)

diff --git a/docs/source/NAS.md b/docs/source/NAS.md
index 9e78bce7c93..8ad4a43554a 100644
--- a/docs/source/NAS.md
+++ b/docs/source/NAS.md
@@ -176,4 +176,4 @@ Following examples are supported in Intel® Neural Compressor:
 - DyNAS MobileNetV3 supernet Example:
   - [DyNAS MobileNetV3 supernet Example](../examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb): DyNAS with MobileNetV3 supernet on ImageNet dataset.
 - DyNAS Transformer LT supernet Example:
-  - [DyNAS Transformer LT supernet Example](../examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb): DyNAS with Transformer LT supernet on WMT En-De dataset.
+  - [DyNAS Transformer LT supernet Example](../examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb): DyNAS with Transformer LT supernet on WMT En-De dataset.
diff --git a/examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb b/examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb
similarity index 100%
rename from examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb
rename to examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb

From 51ee7562b9565b61be5893571f5f54b7fbff8b96 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 03:56:30 -0800
Subject: [PATCH 43/60] Update imports

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../experimental/nas/dynast/dynas_manager.py  |  6 ++--
 .../nas/dynast/supernetwork/__init__.py       |  2 ++
 .../machine_translation/__init__.py           |  2 ++
 .../transformer_interface.py                  | 32 +++++++++++++------
 .../transformer_supernetwork.py               |  2 +-
 5 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py
index 0a0792fc222..f1b7fc15e4d 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_manager.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py
@@ -390,9 +390,7 @@ def import_csv(
         column_names: List[str] = None,
         drop_duplicates: bool = True,
     ) -> pd.DataFrame:
-        '''
-        Import a csv file generated from a supernetwork search for the purpose
-        of training a predictor.
+        """Import a csv file generated from a supernetwork search for the purpose of training a predictor.
 
         filepath - path of the csv to be imported.
         config - the subnetwork configuration
@@ -400,7 +398,7 @@ def import_csv(
         column_names - a list of column names for the dataframe
         df - the output dataframe that contains the original config dict, pymoo, and 1-hot
              equivalent vector for training.
-        '''
+        """
 
         if column_names == None:
             df = pd.read_csv(filepath)
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py
index e833188cc78..451e864f2c7 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py
@@ -11,3 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+"""This module contains all code related to the supernets."""
\ No newline at end of file
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py
index e833188cc78..9003687dcb2 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py
@@ -11,3 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+"""This module contains all code related to the machine translation (Transformer LT) supernet."""
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 7ca6532d17d..6ba50ddfcc8 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -14,27 +14,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""
-Translate pre-processed data with a trained model.
-"""
+"""Translate pre-processed data with a trained model."""
 import time
 import warnings
 
 import numpy as np
-import torch
-import torchprofile
-from fairseq import options, progress_bar, tasks, utils
-from fairseq.data.encoders.moses_tokenizer import MosesTokenizer
-from fairseq.meters import StopwatchMeter
 
-from neural_compressor.utils import logger
+from neural_compressor.utils.utility import logger, LazyImport
 
 from .transformer_supernetwork import TransformerSuperNetwork
 
+torch = LazyImport('torch')
+torchprofile = LazyImport('torchprofile')
+fairseq = LazyImport('fairseq')
+
 warnings.filterwarnings("ignore")
 
 
 def compute_bleu(config, dataset_path, checkpoint_path):
+    """Measure BLEU score of the Transformer-based model."""
+    options = fairseq.options
+    utils = fairseq.utils
+    tasks = fairseq.tasks
+    MosesTokenizer = fairseq.data.encoders.moses_tokenizer.MosesTokenizer
+    StopwatchMeter = fairseq.meters.StopwatchMeter
+    progress_bar = fairseq.progress_bar
 
     parser = options.get_generation_parser()
 
@@ -137,6 +141,11 @@ def compute_bleu(config, dataset_path, checkpoint_path):
 
 
 def compute_latency(config, dataset_path, batch_size, get_model_parameters=False):
+    """Measure latency of the Transformer-based model."""
+    options = fairseq.options
+    utils = fairseq.utils
+    tasks = fairseq.tasks
+
     parser = options.get_generation_parser()
 
     args = options.parse_args_and_arch(parser, [dataset_path])
@@ -277,6 +286,11 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False
 
 
 def compute_macs(config, dataset_path):
+    """Calculate MACs for Transformer-based models."""
+    options = fairseq.options
+    utils = fairseq.utils
+    tasks = fairseq.tasks
+
     parser = options.get_generation_parser()
 
     args = options.parse_args_and_arch(parser,[dataset_path])
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
index 6ccc8b9c4a2..33dc3b125b5 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
@@ -220,7 +220,7 @@ def set_sample_config(self, config: dict):
                 layer.set_sample_config(is_identity_layer=True)
 
     def forward(self, src_tokens, src_lengths):
-        """
+        """Forward function.
         Args:
             src_tokens (LongTensor): tokens in the source language of shape
                 `(batch, src_len)`

From d6223a2f11f04b2b6306d6a54b064887e1cd6df0 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 06:26:14 -0800
Subject: [PATCH 44/60] Update requirements

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/requirements.txt b/test/requirements.txt
index 32535567cc6..fd9a37c7328 100644
--- a/test/requirements.txt
+++ b/test/requirements.txt
@@ -18,6 +18,8 @@ tensorflow-addons
 onnxruntime-extensions; python_version < '3.10'
 autograd==1.4
 fvcore==0.1.5.post20220119
+fairseq==0.12.2
+torchprofile==0.0.4
 ofa==0.1.0.post202203231606
 pymoo==0.5.0
 intel-extension-for-pytorch

From 95b7d39ad524163ac955de7f7463b5885fa37fd3 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 07:18:59 -0800
Subject: [PATCH 45/60] Fix styling check

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../experimental/nas/dynast/dynas_manager.py  |  14 ++-
 .../nas/dynast/dynas_predictor.py             |   1 +
 .../experimental/nas/dynast/dynas_utils.py    |  26 ++---
 .../modules_supernetwork.py                   |  80 +++++++-------
 .../transformer_supernetwork.py               | 101 +++++++++++-------
 5 files changed, 121 insertions(+), 101 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py
index f1b7fc15e4d..0e64ccaaf2e 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_manager.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py
@@ -284,11 +284,11 @@ def create_training_set(
             return features_train, features_test, labels_train, labels_test
 
 
-class TransformerLTEncoding(ParameterManager):
-    def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0):
+class TransformerLTEncoding(ParameterManager):  #noqa: D101
+    def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0):  #noqa: D107
         super().__init__(param_dict, verbose, seed)
 
-    def onehot_custom(self, subnet_cfg, provide_onehot=True):
+    def onehot_custom(self, subnet_cfg, provide_onehot=True):  #noqa: D102
 
         features = []
         #import ipdb;ipdb.set_trace()
@@ -399,7 +399,6 @@ def import_csv(
         df - the output dataframe that contains the original config dict, pymoo, and 1-hot
              equivalent vector for training.
         """
-
         if column_names == None:
             df = pd.read_csv(filepath)
         else:
@@ -442,11 +441,10 @@ def create_training_set(
         split: float = 0.33,
         seed: bool = None,
     ) -> Tuple[list, list, list, list]:
-        '''
-        Create a sklearn compatible test/train set from an imported results csv
-        after "import_csv" method is run.
-        '''
+        """Create a sklearn compatible test/train.
 
+        The set is created from an imported results csv after "import_csv" method is run.
+        """
         collect_rows = list()
         for i in range(len(dataframe)):
             collect_rows.append(np.asarray(dataframe['config_onehot'].iloc[i]))
diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
index 477e4fcf7ca..15b167bb86d 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py
@@ -70,6 +70,7 @@ def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS,
 
     def train(self, examples, labels):
         """Train the predictor on the specified examples and labels using the underlying regressor.
+
         Args:
             examples: Examples to be used for training.
             labels: Labels to be used for training.
diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index e1902a73c93..8387fdd800a 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -336,7 +336,7 @@ def get_subnet(
         return self.subnet
 
 
-class TransformerLTRunner(Runner):
+class TransformerLTRunner(Runner):  #noqa: D101
 
     def __init__(
         self,
@@ -348,7 +348,7 @@ def __init__(
         batch_size: int,
         checkpoint_path: str,
         **kwargs,
-    ) -> None:
+    ) -> None:  #noqa: D107
         self.supernet = supernet
         self.acc_predictor = acc_predictor
         self.macs_predictor = macs_predictor
@@ -362,28 +362,28 @@ def __init__(
     def estimate_accuracy_bleu(
         self,
         subnet_cfg: dict,
-    ) -> float:
+    ) -> float:  #noqa: D102
         top1 = self.acc_predictor.predict(subnet_cfg)
         return top1
 
     def estimate_macs(
         self,
         subnet_cfg: dict,
-    ) -> int:
+    ) -> int:  #noqa: D102
         macs = self.macs_predictor.predict(subnet_cfg)
         return macs
 
     def estimate_latency(
         self,
         subnet_cfg: dict,
-    ) -> float:
+    ) -> float:  #noqa: D102
         latency = self.latency_predictor.predict(subnet_cfg)
         return latency
 
     def validate_bleu(
         self,
         subnet_cfg: dict,
-    ) -> float:  # pragma: no cover
+    ) -> float:    #noqa: D102
 
         bleu = compute_bleu(subnet_cfg, self.dataset_path,
                             self.checkpoint_path)
@@ -393,13 +393,13 @@ def validate_macs(
         self,
         subnet_cfg: dict,
     ) -> float:
-        """Measure Torch model's FLOPs/MACs as per FVCore calculation
+        """Measure Torch model's FLOPs/MACs as per FVCore calculation.
+
         Args:
             subnet_cfg: sub-network Torch model
         Returns:
             `macs`
         """
-
         macs = compute_macs(subnet_cfg, self.dataset_path)
         logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs))
 
@@ -411,12 +411,12 @@ def measure_latency(
         subnet_cfg: dict,
     ) -> Tuple[float, float]:
         """Measure model's latency.
+
         Args:
             subnet_cfg: sub-network Torch model
         Returns:
             mean latency; std latency
         """
-
         latency_mean, latency_std = compute_latency(
             subnet_cfg, self.dataset_path, self.batch_size)
         logger.info(
@@ -610,7 +610,7 @@ def eval_subnet(
             return sample, macs, -top1
 
 
-class EvaluationInterfaceTransformerLT(EvaluationInterface):
+class EvaluationInterfaceTransformerLT(EvaluationInterface):  #noqa: D101
     def __init__(
         self,
         evaluator: Runner,
@@ -618,13 +618,13 @@ def __init__(
         metrics=['acc', 'macs'],
         predictor_mode=False,
         csv_path=None,
-    ) -> None:
+    ) -> None:  #noqa: D107
         super().__init__(evaluator, manager, metrics, predictor_mode, csv_path)
 
     def eval_subnet(
         self,
         x: list,
-    ) -> Tuple[dict, float, float]:
+    ) -> Tuple[dict, float, float]:  #noqa: D102
         # PyMoo vector to Elastic Parameter Mapping
         param_dict = self.manager.translate2param(x)
 
@@ -678,7 +678,7 @@ def eval_subnet(
         else:
             return sample, macs, -bleu
 
-    def clear_csv(self) -> None:
+    def clear_csv(self) -> None:  #noqa: D102
         if self.csv_path:
             f = open(self.csv_path, "w")
             writer = csv.writer(f)
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
index 4c750e45c5c..41d0a41fef7 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
@@ -1,3 +1,4 @@
+#noqa: D100
 # https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE
 #
 # Copyright (c) 2022 Intel Corporation
@@ -38,7 +39,7 @@ def _get_full_incremental_state_key(module_instance, key):
     return '{}.{}.{}'.format(module_name, module_instance._fairseq_instance_id, key)
 
 
-def get_incremental_state(module, incremental_state, key):
+def get_incremental_state(module, incremental_state, key):  #noqa: D102
     """Helper for getting incremental state for an nn.Module."""
     full_key = _get_full_incremental_state_key(module, key)
     if incremental_state is None or full_key not in incremental_state:
@@ -46,15 +47,15 @@ def get_incremental_state(module, incremental_state, key):
     return incremental_state[full_key]
 
 
-def set_incremental_state(module, incremental_state, key, value):
+def set_incremental_state(module, incremental_state, key, value):  #noqa: D102
     """Helper for setting incremental state for an nn.Module."""
     if incremental_state is not None:
         full_key = _get_full_incremental_state_key(module, key)
         incremental_state[full_key] = value
 
 
-class EmbeddingSuper(nn.Embedding):
-    def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs):
+class EmbeddingSuper(nn.Embedding):  #noqa: D101
+    def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs):  #noqa: D107
         super().__init__(num_embeddings, super_embed_dim, padding_idx, *args, **kwargs)
 
         # the largest embed dim
@@ -68,15 +69,15 @@ def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs
         self.profiling = False
         self.reset_parameters()
 
-    def profile(self, mode=True):
+    def profile(self, mode=True):  #noqa: D102
         self.profiling = mode
 
-    def reset_parameters(self):
+    def reset_parameters(self):  #noqa: D102
         super().reset_parameters()
         nn.init.normal_(self.weight, mean=0, std=self.embedding_dim ** -0.5)
         nn.init.constant_(self.weight[self.padding_idx], 0)
 
-    def set_sample_config(self, sample_embed_dim, part):
+    def set_sample_config(self, sample_embed_dim, part):  #noqa: D102
         self.sample_embed_dim[part] = sample_embed_dim
         self._sample_parameters(part)
 
@@ -86,18 +87,18 @@ def _sample_parameters(self, part):
 
         return self.samples
 
-    def sample_parameters(self, part, resample=False):
+    def sample_parameters(self, part, resample=False):  #noqa: D102
         return self._sample_parameters(part) if self.profiling or resample else self.samples
 
-    def sampled_weight(self, part):
+    def sampled_weight(self, part):  #noqa: D102
         return self.sample_parameters(part)[part]['weight']
 
-    def forward(self, input, part='encoder'):
+    def forward(self, input, part='encoder'):  #noqa: D102
         return F.embedding(input, self.sampled_weight(part), self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse)
 
 
-class LinearSuper(nn.Linear):
-    def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear'):
+class LinearSuper(nn.Linear):  #noqa: D101
+    def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear'):  #noqa: D107
         super().__init__(super_in_dim, super_out_dim, bias=bias)
 
         # super_in_dim and super_out_dim indicate the largest network!
@@ -113,10 +114,10 @@ def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_li
         self._reset_parameters(bias, uniform_, non_linear)
         self.profiling = False
 
-    def profile(self, mode=True):
+    def profile(self, mode=True):  #noqa: D102
         self.profiling = mode
 
-    def sample_parameters(self, resample=False):
+    def sample_parameters(self, resample=False):  #noqa: D102
         if self.profiling or resample:
             return self._sample_parameters()
         return self.samples
@@ -127,7 +128,7 @@ def _reset_parameters(self, bias, uniform_, non_linear):
         if bias:
             nn.init.constant_(self.bias, 0.)
 
-    def set_sample_config(self, sample_in_dim, sample_out_dim):
+    def set_sample_config(self, sample_in_dim, sample_out_dim):  #noqa: D102
         self.sample_in_dim = sample_in_dim
         self.sample_out_dim = sample_out_dim
 
@@ -141,11 +142,11 @@ def _sample_parameters(self):
             self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim)
         return self.samples
 
-    def forward(self, x):
+    def forward(self, x):  #noqa: D102
         self.sample_parameters()
         return F.linear(x, self.samples['weight'], self.samples['bias'])
 
-    def calc_sampled_param_num(self):
+    def calc_sampled_param_num(self):  #noqa: D102
         assert 'weight' in self.samples.keys()
         weight_numel = self.samples['weight'].numel()
 
@@ -157,20 +158,20 @@ def calc_sampled_param_num(self):
         return weight_numel + bias_numel
 
 
-def sample_weight(weight, sample_in_dim, sample_out_dim):
+def sample_weight(weight, sample_in_dim, sample_out_dim):  #noqa: D103
     sample_weight = weight[:, :sample_in_dim]
     sample_weight = sample_weight[:sample_out_dim, :]
 
     return sample_weight
 
 
-def sample_bias(bias, sample_out_dim):
+def sample_bias(bias, sample_out_dim):  #noqa: D103
     sample_bias = bias[:sample_out_dim]
 
     return sample_bias
 
 
-def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False):
+def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False):  #noqa: D103
     if not export and torch.cuda.is_available():
         try:
             from apex.normalization import FusedLayerNorm
@@ -180,8 +181,8 @@ def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False)
     return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine)
 
 
-class LayerNormSuper(torch.nn.LayerNorm):
-    def __init__(self, super_embed_dim):
+class LayerNormSuper(torch.nn.LayerNorm):  #noqa: D101
+    def __init__(self, super_embed_dim):  #noqa: D107
         super().__init__(super_embed_dim)
 
         # the largest embed dim
@@ -193,10 +194,10 @@ def __init__(self, super_embed_dim):
         self.samples = {}
         self.profiling = False
 
-    def profile(self, mode=True):
+    def profile(self, mode=True):  #noqa: D102
         self.profiling = mode
 
-    def sample_parameters(self, resample=False):
+    def sample_parameters(self, resample=False):  #noqa: D102
         if self.profiling or resample:
             return self._sample_parameters()
         return self.samples
@@ -206,15 +207,15 @@ def _sample_parameters(self):
         self.samples['bias'] = self.bias[:self.sample_embed_dim]
         return self.samples
 
-    def set_sample_config(self, sample_embed_dim):
+    def set_sample_config(self, sample_embed_dim):  # noqa: D102
         self.sample_embed_dim = sample_embed_dim
         self._sample_parameters()
 
-    def forward(self, x):
+    def forward(self, x):  # noqa: D102
         self.sample_parameters()
         return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps)
 
-    def calc_sampled_param_num(self):
+    def calc_sampled_param_num(self):  # noqa: D102
         assert 'weight' in self.samples.keys()
         assert 'bias' in self.samples.keys()
         return self.samples['weight'].numel() + self.samples['bias'].numel()
@@ -228,7 +229,7 @@ class MultiheadAttentionSuper(nn.Module):
 
     def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, super_vdim=None, dropout=0., bias=True,
                  add_bias_kv=False, add_zero_attn=False, self_attention=False,
-                 encoder_decoder_attention=False, out_dim=None, qkv_dim=None):
+                 encoder_decoder_attention=False, out_dim=None, qkv_dim=None):  # noqa: D107
         super().__init__()
 
         # the configs of super arch
@@ -308,7 +309,7 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe
             self.enable_torch_version = False
         self.enable_torch_version = False
 
-    def calc_sampled_param_num(self):
+    def calc_sampled_param_num(self):  # noqa: D102
         assert self.in_proj_weight is not None and self.in_proj_bias is not None
         in_proj_q_weight_numel = self.sample_q_embed_dim * self.qkv_dim
         in_proj_v_weight_numel = in_proj_k_weight_numel = self.sample_kv_embed_dim * self.qkv_dim
@@ -320,7 +321,7 @@ def calc_sampled_param_num(self):
 
         return in_proj_q_weight_numel + in_proj_k_weight_numel + in_proj_v_weight_numel + in_proj_bias_numel
 
-    def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None):
+    def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None):  # noqa: D102
         self.sample_q_embed_dim = sample_q_embed_dim
         if sample_kv_embed_dim is None:
             self.sample_kv_embed_dim = sample_q_embed_dim
@@ -336,10 +337,10 @@ def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_k
         self.out_proj.set_sample_config(
             sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim)
 
-    def prepare_for_onnx_export_(self):
+    def prepare_for_onnx_export_(self):  # noqa: D102
         self.onnx_trace = True
 
-    def reset_parameters(self):
+    def reset_parameters(self):  # noqa: D102
         if self.qkv_same_dim:
             nn.init.xavier_uniform_(self.in_proj_weight)
         else:
@@ -358,14 +359,13 @@ def reset_parameters(self):
 
     def forward(self, query, key, value, key_padding_mask=None, incremental_state=None,
                 need_weights=True, static_kv=False, attn_mask=None):
-        """Input shape: Time x Batch x Channel
+        """Input shape: Time x Batch x Channel.
 
         Timesteps can be masked by supplying a T x T mask in the
         `attn_mask` argument. Padding elements can be excluded from
         the key by passing a binary ByteTensor (`key_padding_mask`) with shape:
         batch x src_len, where padding elements are indicated by 1s.
         """
-
         tgt_len, bsz, embed_dim = query.size()
 
         if incremental_state is not None:
@@ -528,10 +528,10 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
 
         return attn, attn_weights
 
-    def in_proj_qkv(self, query):
+    def in_proj_qkv(self, query):  # noqa: D102
         return self._in_proj(query, sample_dim=self.sample_q_embed_dim).chunk(3, dim=-1)
 
-    def in_proj_q(self, query):
+    def in_proj_q(self, query):  # noqa: D102
         if self.qkv_same_dim:
             return self._in_proj(query, end=self.qkv_dim, sample_dim=self.sample_q_embed_dim)
         else:
@@ -540,7 +540,7 @@ def in_proj_q(self, query):
                 bias = bias[:self.qkv_dim]
             return F.linear(query, self.q_proj_weight[..., :self.sample_q_embed_dim], bias)
 
-    def in_proj_k(self, key):
+    def in_proj_k(self, key):  # noqa: D102
         if self.qkv_same_dim:
             return self._in_proj(key, start=self.qkv_dim, end=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim)
         else:
@@ -550,7 +550,7 @@ def in_proj_k(self, key):
                 bias = bias[self.qkv_dim:2 * self.qkv_dim]
             return F.linear(key, weight[..., :self.sample_kv_embed_dim], bias)
 
-    def in_proj_v(self, value):
+    def in_proj_v(self, value):  # noqa: D102
         if self.qkv_same_dim:
             return self._in_proj(value, start=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim)
         else:
@@ -591,10 +591,10 @@ def _set_input_buffer(self, incremental_state, buffer):
             buffer,
         )
 
-    def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz):
+    def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz):  # noqa: D102
         return attn_weights
 
-    def __repr__(self):
+    def __repr__(self):  # noqa: D105
         # We treat the extra repr like the sub-module, one item per line
         extra_lines = []
         extra_repr = self.extra_repr()
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
index 33dc3b125b5..fef5c49ece3 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
@@ -1,3 +1,4 @@
+#noqa: D100
 # https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE
 #
 # Copyright (c) 2022 Intel Corporation
@@ -34,9 +35,9 @@
 
 
 class TransformerSuperNetwork(BaseFairseqModel):
-    """
-    Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)
-    <https://arxiv.org/abs/1706.03762>`_.
+    """Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)`.
+
+    <https://arxiv.org/abs/1706.03762>
 
     Args:
         encoder (TransformerEncoder): the encoder
@@ -50,7 +51,7 @@ class TransformerSuperNetwork(BaseFairseqModel):
         :prog:
     """
 
-    def __init__(self, task):
+    def __init__(self, task):  #noqa: D107
         super().__init__()
 
         src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
@@ -76,7 +77,7 @@ def __init__(self, task):
         self.decoder = TransformerDecoder(
             decoder_config, tgt_dict, decoder_embed_tokens)
 
-    def build_embedding(self, dictionary, embed_dim, path=None):
+    def build_embedding(self, dictionary, embed_dim, path=None):  #noqa: D102
         num_embeddings = len(dictionary)
         padding_idx = dictionary.pad()
         emb = Embedding(num_embeddings, embed_dim, padding_idx)
@@ -86,12 +87,12 @@ def build_embedding(self, dictionary, embed_dim, path=None):
             utils.load_embedding(embed_dict, dictionary, emb)
         return emb
 
-    def profile(self, mode=True):
+    def profile(self, mode=True):  #noqa: D102
         for module in self.modules():
             if hasattr(module, 'profile') and self != module:
                 module.profile(mode)
 
-    def get_sampled_params_numel(self, config):
+    def get_sampled_params_numel(self, config):  #noqa: D102
         self.set_sample_config(config)
         numels = []
         for name, module in self.named_modules():
@@ -105,21 +106,21 @@ def get_sampled_params_numel(self, config):
                 numels.append(module.calc_sampled_param_num())
         return sum(numels)
 
-    def set_sample_config(self, config):
+    def set_sample_config(self, config):  #noqa: D102
         logger.info('[DyNAS-T] Setting active configuration to {}'.format(config))
         self.encoder.set_sample_config(config)
         self.decoder.set_sample_config(config)
 
-    def forward(self,src_tokens,src_lengths,prev_output_token):
+    def forward(self,src_tokens,src_lengths,prev_output_token):  #noqa: D102
          encoder_output = self.encoder.forward(src_tokens,src_lengths)
          output = self.decoder(prev_output_token,encoder_output)
          return output
 
 
 class TransformerEncoder(FairseqEncoder):
-    """
-    Transformer encoder consisting of *args.encoder_layers* layers. Each layer
-    is a :class:`TransformerEncoderLayer`.
+    """Transformer encoder consisting of *args.encoder_layers* layers.
+
+    Each layer is a :class:`TransformerEncoderLayer`.
 
     Args:
         args (argparse.Namespace): parsed command-line arguments
@@ -127,7 +128,7 @@ class TransformerEncoder(FairseqEncoder):
         embed_tokens (torch.nn.Embedding): input embedding
     """
 
-    def __init__(self, encoder_config, dictionary, embed_tokens):
+    def __init__(self, encoder_config, dictionary, embed_tokens):  #noqa: D107
         super().__init__(dictionary)
         # the configs of super arch
         self.super_embed_dim = encoder_config['encoder_embed_dim']
@@ -178,7 +179,7 @@ def __init__(self, encoder_config, dictionary, embed_tokens):
 
         self.vocab_original_scaling = False
 
-    def set_sample_config(self, config: dict):
+    def set_sample_config(self, config: dict):  #noqa: D102
 
         self.sample_embed_dim = config['encoder']['encoder_embed_dim']
 
@@ -221,6 +222,7 @@ def set_sample_config(self, config: dict):
 
     def forward(self, src_tokens, src_lengths):
         """Forward function.
+
         Args:
             src_tokens (LongTensor): tokens in the source language of shape
                 `(batch, src_len)`
@@ -268,8 +270,7 @@ def forward(self, src_tokens, src_lengths):
         }
 
     def reorder_encoder_out(self, encoder_out, new_order):
-        """
-        Reorder encoder output according to *new_order*.
+        """Reorder encoder output according to *new_order*.
 
         Args:
             encoder_out: output from the ``forward()`` method
@@ -323,9 +324,9 @@ def upgrade_state_dict_named(self, state_dict, name):
 
 
 class TransformerDecoder(FairseqIncrementalDecoder):
-    """
-    Transformer decoder consisting of *args.decoder_layers* layers. Each layer
-    is a :class:`TransformerDecoderLayer`.
+    """Transformer decoder consisting of *args.decoder_layers* layers.
+
+    Each layer is a :class:`TransformerDecoderLayer`.
 
     Args:
         args (argparse.Namespace): parsed command-line arguments
@@ -335,7 +336,7 @@ class TransformerDecoder(FairseqIncrementalDecoder):
             (default: False).
     """
 
-    def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=False):
+    def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=False):  #noqa: D107
         super().__init__(dictionary)
 
         # the configs of super arch
@@ -409,7 +410,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
 
         self.vocab_original_scaling = False
 
-    def set_sample_config(self, config: dict):
+    def set_sample_config(self, config: dict):  #noqa: D102
 
         self.sample_embed_dim = config['decoder']['decoder_embed_dim']
         self.sample_encoder_embed_dim = config['encoder']['encoder_embed_dim']
@@ -460,7 +461,8 @@ def set_sample_config(self, config: dict):
                 layer.set_sample_config(is_identity_layer=True)
 
     def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused):
-        """
+        """Forward pass.
+
         Args:
             prev_output_tokens (LongTensor): previous decoder outputs of shape
                 `(batch, tgt_len)`, for teacher forcing
@@ -480,8 +482,7 @@ def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None,
         return x, extra
 
     def extract_features(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused):
-        """
-        Similar to *forward* but only return features.
+        """Similar to *forward* but only return features.
 
         Returns:
             tuple:
@@ -594,7 +595,7 @@ def max_positions(self):
         ipdb.set_trace()
         return min(self.max_target_positions, self.embed_positions.max_positions())
 
-    def buffered_future_mask(self, tensor):
+    def buffered_future_mask(self, tensor):  #noqa: D102
         dim = tensor.size(0)
         if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device or self._future_mask.size(0) < dim:
             self._future_mask = torch.triu(
@@ -651,7 +652,7 @@ class TransformerEncoderLayer(nn.Module):
         args (argparse.Namespace): parsed command-line arguments
     """
 
-    def __init__(self, encoder_config, layer_idx):
+    def __init__(self, encoder_config, layer_idx):  #noqa: D107
         super().__init__()
 
         # the configs of super arch
@@ -692,7 +693,15 @@ def __init__(self, encoder_config, layer_idx):
                                super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear')
         self.final_layer_norm = LayerNormSuper(self.super_embed_dim)
 
-    def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None):
+    def set_sample_config(
+        self,
+        is_identity_layer,
+        sample_embed_dim=None,
+        sample_ffn_embed_dim_this_layer=None,
+        sample_self_attention_heads_this_layer=None,
+        sample_dropout=None,
+        sample_activation_dropout=None,
+    ):  #noqa: D102
 
         if is_identity_layer:
             self.is_identity_layer = True
@@ -722,7 +731,8 @@ def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn
             sample_embed_dim=self.sample_embed_dim)
 
     def upgrade_state_dict_named(self, state_dict, name):
-        """
+        """Renames keys in state dict.
+
         Rename layer norm states from `...layer_norms.0.weight` to
         `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to
         `...final_layer_norm.weight`
@@ -741,7 +751,8 @@ def upgrade_state_dict_named(self, state_dict, name):
                     del state_dict[k]
 
     def forward(self, x, encoder_padding_mask, attn_mask=None):
-        """
+        """Forward pass.
+
         Args:
             x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
             encoder_padding_mask (ByteTensor): binary ByteTensor of shape
@@ -786,7 +797,7 @@ def forward(self, x, encoder_padding_mask, attn_mask=None):
         x = self.maybe_layer_norm(self.final_layer_norm, x, after=True)
         return x
 
-    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):
+    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):  #noqa: D102
         assert before ^ after
         if after ^ self.normalize_before:
             return layer_norm(x)
@@ -811,7 +822,7 @@ class TransformerDecoderLayer(nn.Module):
             (default: False).
     """
 
-    def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False):
+    def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False):  #noqa: D107
         super().__init__()
 
         # the configs of super arch
@@ -883,7 +894,16 @@ def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv
 
         self.onnx_trace = False
 
-    def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_encoder_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_ende_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None):
+    def set_sample_config(self,
+        is_identity_layer,
+        sample_embed_dim=None,
+        sample_encoder_embed_dim=None,
+        sample_ffn_embed_dim_this_layer=None,
+        sample_self_attention_heads_this_layer=None,
+        sample_ende_attention_heads_this_layer=None,
+        sample_dropout=None,
+        sample_activation_dropout=None,
+    ):  #noqa: D102
 
         if is_identity_layer:
             self.is_identity_layer = True
@@ -918,7 +938,7 @@ def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_enc
         self.final_layer_norm.set_sample_config(
             sample_embed_dim=self.sample_embed_dim)
 
-    def prepare_for_onnx_export_(self):
+    def prepare_for_onnx_export_(self):  #noqa: D102
         self.onnx_trace = True
 
     def forward(
@@ -932,7 +952,8 @@ def forward(
         self_attn_mask=None,
         self_attn_padding_mask=None,
     ):
-        """
+        """Forward pass.
+
         Args:
             x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)`
             encoder_padding_mask (ByteTensor): binary ByteTensor of shape
@@ -1005,28 +1026,28 @@ def forward(
             return x, attn, self_attn_state
         return x, attn
 
-    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):
+    def maybe_layer_norm(self, layer_norm, x, before=False, after=False):  #noqa: D102
         assert before ^ after
         if after ^ self.normalize_before:
             return layer_norm(x)
         else:
             return x
 
-    def make_generation_fast_(self, need_attn=False, **kwargs):
+    def make_generation_fast_(self, need_attn=False, **kwargs):  #noqa: D102
         self.need_attn = need_attn
 
 
-def calc_dropout(dropout, sample_embed_dim, super_embed_dim):
+def calc_dropout(dropout, sample_embed_dim, super_embed_dim):  #noqa: D103
     return dropout * 1.0 * sample_embed_dim / super_embed_dim
 
 
-def Embedding(num_embeddings, embedding_dim, padding_idx):
+def Embedding(num_embeddings, embedding_dim, padding_idx):  #noqa: D103
     return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx)
 
 
-def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'):
+def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'):  #noqa: D103
     m = nn.Linear(in_features, out_features, bias)
-    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(
+    nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(  #noqa: D103
         m.weight, non_linear=non_linear)
     if bias:
         nn.init.constant_(m.bias, 0.)

From 6f1672207ca65310f6da4f63e4a2650afa4435a7 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 07:47:08 -0800
Subject: [PATCH 46/60] Ignore fairseq in pylint

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .azure-pipelines/scripts/codeScan/pylint/pylint.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh
index b15da8c91b3..938c5ecdc6c 100644
--- a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh
+++ b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh
@@ -10,13 +10,13 @@ pip install -r /neural-compressor/requirements.txt
 pip install torch==1.12.0
 
 python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto \
---ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor \
+--ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,fairseq,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor \
 > $log_dir/pylint.json
 
 exit_code=$?
 
 $BOLD_YELLOW && echo " -----------------  Current pylint cmd start --------------------------" && $RESET
-echo "python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto --ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor > $log_dir/pylint.json"
+echo "python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto --ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,fairseq,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor > $log_dir/pylint.json"
 $BOLD_YELLOW && echo " -----------------  Current pylint cmd end --------------------------" && $RESET
 
 $BOLD_YELLOW && echo " -----------------  Current log file output start --------------------------" && $RESET

From 2299d9d14023a791d6c1df97a33b21950c4b7450 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 07:54:55 -0800
Subject: [PATCH 47/60] Address styling issues

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../modules_supernetwork.py                   | 18 +++++-
 .../transformer_interface.py                  |  8 ++-
 .../transformer_supernetwork.py               | 57 +++++++++++++------
 3 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
index 41d0a41fef7..6de3afa7698 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
@@ -94,7 +94,15 @@ def sampled_weight(self, part):  #noqa: D102
         return self.sample_parameters(part)[part]['weight']
 
     def forward(self, input, part='encoder'):  #noqa: D102
-        return F.embedding(input, self.sampled_weight(part), self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse)
+        return F.embedding(
+            input,
+            self.sampled_weight(part),
+            self.padding_idx,
+            self.max_norm,
+            self.norm_type,
+            self.scale_grad_by_freq,
+            self.sparse,
+        )
 
 
 class LinearSuper(nn.Linear):  #noqa: D101
@@ -213,7 +221,13 @@ def set_sample_config(self, sample_embed_dim):  # noqa: D102
 
     def forward(self, x):  # noqa: D102
         self.sample_parameters()
-        return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps)
+        return F.layer_norm(
+            x,
+            (self.sample_embed_dim,),
+            weight=self.samples['weight'],
+            bias=self.samples['bias'],
+            eps=self.eps,
+        )
 
     def calc_sampled_param_num(self):  # noqa: D102
         assert 'weight' in self.samples.keys()
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
index 6ba50ddfcc8..0b76b052b21 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py
@@ -238,7 +238,9 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False
         encoder_latencies = encoder_latencies[int(
             args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
         logger.info(
-            f'[DyNAS-T] Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms')
+            f'[DyNAS-T] Encoder latency for dataset generation: Mean: '
+            '{np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms'
+        )
 
         encoder_out_test_with_beam = model.encoder.reorder_encoder_out(
             encoder_out_test, new_order)
@@ -278,7 +280,9 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False
             args.latiter * 0.1): -max(1, int(args.latiter * 0.1))]
 
     logger.info(
-        f'[DyNAS-T] Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms')
+        f'[DyNAS-T] Decoder latency for dataset generation: Mean: '
+        '{np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms'
+    )
 
     lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies)
     lat_std = np.std(encoder_latencies)+np.std(decoder_latencies)
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
index fef5c49ece3..cd2d3e0377e 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
@@ -98,9 +98,15 @@ def get_sampled_params_numel(self, config):  #noqa: D102
         for name, module in self.named_modules():
             if hasattr(module, 'calc_sampled_param_num'):
                 # a hacky way to skip the layers that exceed encoder-layer-num or decoder-layer-num
-                if name.split('.')[0] == 'encoder' and eval(name.split('.')[2]) >= config['encoder']['encoder_layer_num']:
+                if (
+                    name.split('.')[0] == 'encoder'
+                    and eval(name.split('.')[2]) >= config['encoder']['encoder_layer_num']
+                ):
                     continue
-                if name.split('.')[0] == 'decoder' and eval(name.split('.')[2]) >= config['decoder']['decoder_layer_num']:
+                if (
+                    name.split('.')[0] == 'decoder'
+                    and eval(name.split('.')[2]) >= config['decoder']['decoder_layer_num']
+                ):
                     continue
 
                 numels.append(module.calc_sampled_param_num())
@@ -397,7 +403,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
         self.adaptive_softmax = None
 
         self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \
-            if self.super_embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None
+            if self.super_embed_dim != self.output_embed_dim else None
 
         if not self.share_input_output_embed:
             self.embed_out = nn.Parameter(torch.Tensor(
@@ -533,7 +539,10 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta
                         [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0)
                 elif self.sample_arbitrary_ende_attn[i] == 2:
                     encoder_out_feed = torch.cat(
-                        [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2], encoder_out['encoder_out_all'][-3]], dim=0)
+                        [encoder_out['encoder_out'],
+                        encoder_out['encoder_out_all'][-2],
+                        encoder_out['encoder_out_all'][-3]],
+                        dim=0)
                 else:
                     raise NotImplementedError(
                         "arbitrary_ende_attn should in [-1, 1, 2]")
@@ -548,7 +557,10 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta
                 # concat two more
                 elif self.sample_arbitrary_ende_attn[i] == 2:
                     encoder_padding_mask_feed = torch.cat(
-                        [encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1)
+                        [encoder_out['encoder_padding_mask'],
+                        encoder_out['encoder_padding_mask'],
+                        encoder_out['encoder_padding_mask']],
+                        dim=1)
                 else:
                     raise NotImplementedError(
                         "arbitrary_ende_attn should in [-1, 1, 2]")
@@ -565,7 +577,7 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta
             attns.append(attn)
 
         if self.layer_norm:
-            x = self.layer_norm(x)
+            x = self.layer_norm(x)  # pylint: disable=not-callable
 
         # T x B x C -> B x T x C
         x = x.transpose(0, 1)
@@ -591,16 +603,19 @@ def max_positions(self):
         """Maximum output length supported by the decoder."""
         if self.embed_positions is None:
             return self.max_target_positions
-        import ipdb
-        ipdb.set_trace()
         return min(self.max_target_positions, self.embed_positions.max_positions())
 
     def buffered_future_mask(self, tensor):  #noqa: D102
         dim = tensor.size(0)
-        if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device or self._future_mask.size(0) < dim:
-            self._future_mask = torch.triu(
+        if (
+            not hasattr(self, '_future_mask')
+            or self._future_mask is None  # pylint: disable=access-member-before-definition
+            or self._future_mask.device != tensor.device  # pylint: disable=access-member-before-definition
+            or self._future_mask.size(0) < dim  # pylint: disable=access-member-before-definition
+        ):
+            self._future_mask = torch.triu(  # pylint: disable=access-member-before-definition
                 utils.fill_with_neg_inf(tensor.new(dim, dim)), 1)
-        return self._future_mask[:dim, :dim]
+        return self._future_mask[:dim, :dim]  # pylint: disable=access-member-before-definition
 
     def upgrade_state_dict_named(self, state_dict, name):
         """Upgrade a (possibly old) state dict for new versions of fairseq."""
@@ -676,8 +691,8 @@ def __init__(self, encoder_config, layer_idx):  #noqa: D107
         self.qkv_dim = 512
 
         self.self_attn = MultiheadAttentionSuper(
-            super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer, is_encoder=True,
-            dropout=0.1, self_attention=True, qkv_dim=self.qkv_dim
+            super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer,
+            is_encoder=True, dropout=0.1, self_attention=True, qkv_dim=self.qkv_dim,
         )
 
         self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim)
@@ -822,7 +837,14 @@ class TransformerDecoderLayer(nn.Module):
             (default: False).
     """
 
-    def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False):  #noqa: D107
+    def __init__(
+        self,
+        decoder_config,
+        layer_idx,
+        no_encoder_attn=False,
+        add_bias_kv=False,
+        add_zero_attn=False,
+    ):  #noqa: D107
         super().__init__()
 
         # the configs of super arch
@@ -927,8 +949,11 @@ def set_sample_config(self,
 
         self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim,
                                          sample_attention_heads=self.sample_self_attention_heads_this_layer)
-        self.encoder_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_kv_embed_dim=self.sample_encoder_embed_dim,
-                                            sample_attention_heads=self.sample_ende_attention_heads_this_layer)
+        self.encoder_attn.set_sample_config(
+            sample_q_embed_dim=self.sample_embed_dim,
+            sample_kv_embed_dim=self.sample_encoder_embed_dim,
+            sample_attention_heads=self.sample_ende_attention_heads_this_layer,
+        )
 
         self.fc1.set_sample_config(
             sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer)

From 7e46cadab43b262e43eed2f2a39b7af98974414f Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 11:45:37 -0800
Subject: [PATCH 48/60] Pin torch version

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/requirements.txt b/test/requirements.txt
index fd9a37c7328..a17c2623b5f 100644
--- a/test/requirements.txt
+++ b/test/requirements.txt
@@ -2,7 +2,7 @@ intel-tensorflow
 onnx
 onnxruntime
 --find-links https://download.pytorch.org/whl/torch_stable.html
-torch
+torch==1.12.1
 torchvision
 mxnet-mkl
 numpy

From d8226113a7db129d92fe3f14739bb40aa646354f Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 12:47:43 -0800
Subject: [PATCH 49/60] LazyImport fairseq

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../modules_supernetwork.py                   |  7 +++--
 .../transformer_supernetwork.py               | 31 ++++++++++++-------
 test/requirements.txt                         |  4 +--
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
index 6de3afa7698..1a5c9739372 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py
@@ -20,10 +20,13 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from fairseq import utils
 from torch.nn import Parameter
 from torch.nn.modules.module import _addindent
 
+from neural_compressor.utils.utility import LazyImport
+
+fairseq = LazyImport("fairseq")
+
 INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0)
 
 
@@ -511,7 +514,7 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No
             attn_weights = attn_weights.view(
                 bsz * self.num_heads, tgt_len, src_len)
 
-        attn_weights = utils.softmax(
+        attn_weights = fairseq.utils.softmax(
             attn_weights, dim=-1, onnx_trace=self.onnx_trace,
         ).type_as(attn_weights)
         attn_weights = F.dropout(
diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
index cd2d3e0377e..1034c6519a8 100644
--- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
+++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py
@@ -19,22 +19,21 @@
 
 import torch
 import torch.nn.functional as F
-from fairseq import utils
-from fairseq.models import (BaseFairseqModel, FairseqEncoder,
-                            FairseqIncrementalDecoder)
-from fairseq.modules import PositionalEmbedding, SinusoidalPositionalEmbedding
 from torch import nn
 
 from neural_compressor.utils import logger
+from neural_compressor.utils.utility import LazyImport
 
 from .modules_supernetwork import (EmbeddingSuper, LayerNormSuper, LinearSuper,
                                    MultiheadAttentionSuper)
 
+fairseq = LazyImport("fairseq")
+
 DEFAULT_MAX_SOURCE_POSITIONS = 1024
 DEFAULT_MAX_TARGET_POSITIONS = 1024
 
 
-class TransformerSuperNetwork(BaseFairseqModel):
+class TransformerSuperNetwork(fairseq.models.BaseFairseqModel):
     """Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)`.
 
     <https://arxiv.org/abs/1706.03762>
@@ -78,6 +77,8 @@ def __init__(self, task):  #noqa: D107
             decoder_config, tgt_dict, decoder_embed_tokens)
 
     def build_embedding(self, dictionary, embed_dim, path=None):  #noqa: D102
+        utils = fairseq.utils
+
         num_embeddings = len(dictionary)
         padding_idx = dictionary.pad()
         emb = Embedding(num_embeddings, embed_dim, padding_idx)
@@ -123,7 +124,7 @@ def forward(self,src_tokens,src_lengths,prev_output_token):  #noqa: D102
          return output
 
 
-class TransformerEncoder(FairseqEncoder):
+class TransformerEncoder(fairseq.models.FairseqEncoder):
     """Transformer encoder consisting of *args.encoder_layers* layers.
 
     Each layer is a :class:`TransformerEncoderLayer`.
@@ -167,7 +168,7 @@ def __init__(self, encoder_config, dictionary, embed_tokens):  #noqa: D107
 
         self.embed_tokens = embed_tokens
 
-        self.embed_positions = PositionalEmbedding(
+        self.embed_positions = fairseq.modules.PositionalEmbedding(
             self.max_source_positions, self.super_embed_dim, self.padding_idx,
             learned=False,
         )
@@ -309,7 +310,8 @@ def max_positions(self):
 
     def upgrade_state_dict_named(self, state_dict, name):
         """Upgrade a (possibly old) state dict for new versions of fairseq."""
-        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+        utils = fairseq.utils
+        if isinstance(self.embed_positions, fairseq.modules.SinusoidalPositionalEmbedding):
             weights_key = '{}.embed_positions.weights'.format(name)
             if weights_key in state_dict:
                 del state_dict[weights_key]
@@ -329,7 +331,7 @@ def upgrade_state_dict_named(self, state_dict, name):
         return state_dict
 
 
-class TransformerDecoder(FairseqIncrementalDecoder):
+class TransformerDecoder(fairseq.models.FairseqIncrementalDecoder):
     """Transformer decoder consisting of *args.decoder_layers* layers.
 
     Each layer is a :class:`TransformerDecoderLayer`.
@@ -388,7 +390,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal
 
         self.embed_tokens = embed_tokens
 
-        self.embed_positions = PositionalEmbedding(
+        self.embed_positions = fairseq.modules.PositionalEmbedding(
             self.max_target_positions, self.super_embed_dim, padding_idx,
             learned=False,
         ) if not False else None
@@ -606,6 +608,8 @@ def max_positions(self):
         return min(self.max_target_positions, self.embed_positions.max_positions())
 
     def buffered_future_mask(self, tensor):  #noqa: D102
+        utils = fairseq.utils
+
         dim = tensor.size(0)
         if (
             not hasattr(self, '_future_mask')
@@ -619,7 +623,8 @@ def buffered_future_mask(self, tensor):  #noqa: D102
 
     def upgrade_state_dict_named(self, state_dict, name):
         """Upgrade a (possibly old) state dict for new versions of fairseq."""
-        if isinstance(self.embed_positions, SinusoidalPositionalEmbedding):
+        utils = fairseq.utils
+        if isinstance(self.embed_positions, fairseq.modules.SinusoidalPositionalEmbedding):
             weights_key = '{}.embed_positions.weights'.format(name)
             if weights_key in state_dict:
                 del state_dict[weights_key]
@@ -670,6 +675,8 @@ class TransformerEncoderLayer(nn.Module):
     def __init__(self, encoder_config, layer_idx):  #noqa: D107
         super().__init__()
 
+        utils = fairseq.utils
+
         # the configs of super arch
         self.super_embed_dim = encoder_config['encoder_embed_dim']
         self.super_ffn_embed_dim_this_layer = encoder_config['encoder_ffn_embed_dim']
@@ -847,6 +854,8 @@ def __init__(
     ):  #noqa: D107
         super().__init__()
 
+        utils = fairseq.utils
+
         # the configs of super arch
         self.super_embed_dim = decoder_config['decoder_embed_dim']
         self.super_encoder_embed_dim = decoder_config['decoder_embed_dim']
diff --git a/test/requirements.txt b/test/requirements.txt
index a17c2623b5f..32535567cc6 100644
--- a/test/requirements.txt
+++ b/test/requirements.txt
@@ -2,7 +2,7 @@ intel-tensorflow
 onnx
 onnxruntime
 --find-links https://download.pytorch.org/whl/torch_stable.html
-torch==1.12.1
+torch
 torchvision
 mxnet-mkl
 numpy
@@ -18,8 +18,6 @@ tensorflow-addons
 onnxruntime-extensions; python_version < '3.10'
 autograd==1.4
 fvcore==0.1.5.post20220119
-fairseq==0.12.2
-torchprofile==0.0.4
 ofa==0.1.0.post202203231606
 pymoo==0.5.0
 intel-extension-for-pytorch

From 79a47586b46b136366329476048e2f09fcc1aa84 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 14:41:48 -0800
Subject: [PATCH 50/60] Remove reference to DyNAS from UTs

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/nas/test_nas.py | 68 --------------------------------------------
 1 file changed, 68 deletions(-)

diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py
index 4d22673d578..47dfaf19c6b 100644
--- a/test/nas/test_nas.py
+++ b/test/nas/test_nas.py
@@ -9,7 +9,6 @@
 from neural_compressor.data import DATASETS
 from neural_compressor.experimental import common, NAS
 from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
-from neural_compressor.experimental.nas.dynas import DyNAS
 
 def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']):
     fake_yaml = """
@@ -58,41 +57,6 @@ def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']):
     with open('fake.yaml', 'w', encoding="utf-8") as f:
         f.write(fake_yaml)
 
-def build_dynas_fake_yaml():
-    fake_yaml = """
-    model:
-        name: imagenet_nas
-        framework: pytorch
-
-    nas:
-        approach: dynas
-        search:
-            search_algorithm: nsga2
-        dynas:
-            supernet: ofa_resnet50
-            metrics: ['acc', 'macs']
-            results_csv_path: './search_results.csv'
-    """
-    with open('dynas_fake.yaml', 'w', encoding="utf-8") as f:
-        f.write(fake_yaml)
-
-def build_dynas_results_csv():
-    results_csv = """
-Sub-network,Date,Latency (ms), MACs,Top-1 Acc (%)
-"{'wid': None, 'ks': [7, 7, 3, 3, 5, 7, 7, 3, 5, 5, 3, 3, 7, 3, 5, 5, 5, 7, 5, 7], 'e': [3, 4, 4, 4, 4, 6, 6, 4, 4, 3, 4, 4, 3, 6, 4, 3, 4, 6, 3, 3], 'd': [2, 4, 4, 2, 3], 'r': [224]}",2022-07-07 03:13:06.306540,39,391813792,77.416
-"{'wid': None, 'ks': [3, 5, 5, 7, 5, 5, 3, 3, 7, 7, 7, 5, 7, 3, 7, 5, 3, 5, 3, 3], 'e': [4, 6, 3, 4, 4, 4, 4, 6, 3, 6, 4, 3, 4, 3, 4, 3, 6, 4, 4, 6], 'd': [4, 3, 3, 2, 3], 'r': [224]}",2022-07-07 03:14:50.398553,41,412962768,77.234
-"{'wid': None, 'ks': [5, 5, 5, 3, 7, 5, 7, 5, 7, 3, 3, 7, 7, 5, 7, 3, 5, 5, 7, 3], 'e': [6, 4, 3, 3, 3, 3, 4, 4, 3, 4, 3, 6, 4, 4, 3, 6, 4, 3, 4, 6], 'd': [4, 4, 4, 2, 4], 'r': [224]}",2022-07-07 03:16:53.105436,44,444295456,77.632
-"{'wid': None, 'ks': [3, 5, 3, 7, 3, 5, 7, 5, 3, 3, 3, 7, 3, 5, 3, 5, 3, 3, 7, 3], 'e': [4, 6, 3, 3, 6, 3, 3, 6, 6, 4, 4, 6, 3, 4, 3, 6, 3, 6, 3, 4], 'd': [4, 4, 2, 2, 4], 'r': [224]}",2022-07-07 03:18:47.301137,41,410969240,76.79
-"{'wid': None, 'ks': [3, 3, 3, 3, 7, 5, 3, 5, 3, 5, 5, 7, 7, 7, 3, 5, 7, 5, 3, 7], 'e': [3, 6, 6, 4, 6, 3, 3, 4, 3, 6, 3, 4, 4, 6, 3, 6, 4, 3, 6, 3], 'd': [2, 3, 4, 4, 2], 'r': [224]}",2022-07-07 03:20:35.391443,40,405868672,77.338
-"{'wid': None, 'ks': [3, 3, 3, 7, 5, 7, 7, 3, 3, 3, 3, 5, 7, 3, 7, 5, 3, 7, 5, 5], 'e': [4, 6, 3, 6, 4, 3, 3, 6, 3, 6, 4, 6, 4, 4, 3, 6, 4, 3, 4, 4], 'd': [3, 4, 4, 2, 2], 'r': [224]}",2022-07-07 03:22:14.504855,37,370501152,76.448
-"{'wid': None, 'ks': [7, 5, 3, 5, 7, 5, 3, 3, 5, 3, 3, 7, 7, 3, 5, 3, 3, 5, 5, 7], 'e': [3, 3, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 3, 6, 3, 3, 3, 4], 'd': [4, 4, 3, 4, 2], 'r': [224]}",2022-07-07 03:24:12.500905,48,482299704,77.7
-"{'wid': None, 'ks': [7, 3, 5, 7, 5, 5, 7, 5, 3, 3, 3, 5, 5, 3, 7, 5, 5, 7, 3, 7], 'e': [3, 6, 4, 6, 6, 3, 3, 3, 6, 3, 6, 4, 4, 6, 4, 4, 4, 4, 6, 6], 'd': [4, 4, 2, 2, 2], 'r': [224]}",2022-07-07 03:25:50.198665,42,423721952,76.506
-"{'wid': None, 'ks': [7, 7, 3, 7, 5, 7, 5, 5, 5, 3, 5, 3, 3, 7, 3, 5, 3, 7, 7, 3], 'e': [3, 3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 6, 6, 4, 3, 3, 3, 6, 3, 4], 'd': [4, 2, 2, 3, 3], 'r': [224]}",2022-07-07 03:27:26.901886,37,373770104,77.258
-"{'wid': None, 'ks': [3, 7, 5, 5, 7, 3, 5, 3, 5, 5, 5, 3, 5, 5, 3, 5, 7, 3, 7, 5], 'e': [3, 4, 6, 6, 4, 3, 6, 6, 6, 3, 3, 3, 3, 6, 3, 6, 6, 3, 6, 3], 'd': [3, 2, 3, 2, 3], 'r': [224]}",2022-07-07 03:29:00.989578,36,369186480,77.096
-"{'wid': None, 'ks': [7, 7, 5, 5, 7, 5, 3, 3, 3, 5, 7, 3, 7, 7, 5, 5, 3, 7, 3, 7], 'e': [6, 3, 6, 3, 4, 3, 3, 3, 4, 3, 6, 4, 3, 3, 6, 4, 4, 3, 4, 3], 'd': [4, 4, 3, 4, 4], 'r': [224]}",2022-07-07 03:31:07.608402,51,518341312,78.104
-    """
-    with open('search_results.csv', 'w', encoding="utf-8") as f:
-        f.write(results_csv)
 
 def model_builder(model_arch_params):
     channels = model_arch_params['channels']
@@ -123,14 +87,10 @@ class TestNAS(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         build_fake_yaml()
-        build_dynas_fake_yaml()
-        build_dynas_results_csv()
 
     @classmethod
     def tearDownClass(cls):
         os.remove('fake.yaml')
-        os.remove('dynas_fake.yaml')
-        os.remove('search_results.csv')
         shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True)
         shutil.rmtree('runs', ignore_errors=True)
 
@@ -184,34 +144,6 @@ def eval_func(model):
             best_model_archs = nas_agent()
             self.assertTrue(len(best_model_archs) > 0)
 
-    def test_dynas(self):
-        nas_agent = NAS('dynas_fake.yaml')
-        for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]:
-            config = NASConfig(approach='dynas', search_algorithm=search_algorithm)
-            config.dynas.supernet = supernet
-            config.seed = 42
-            config.dynas.metrics = ['acc', 'macs', 'lat']
-            config.dynas.population = 10
-            config.dynas.num_evals = 10
-            config.dynas.results_csv_path = 'search_results.csv'
-            config.dynas.batch_size = 64
-            nas_agent = NAS(config)
-            best_model_archs = nas_agent.search()
-        nas_agent.acc_predictor.get_parameters()
-        nas_agent.acc_predictor.save('tmp.pickle')
-        nas_agent.acc_predictor.load('tmp.pickle')
-        samples = nas_agent.supernet_manager.random_samples(10)
-        subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0])
-        nas_agent.runner_validate.validate_macs(subnet_cfg)
-        nas_agent.runner_validate.measure_latency(subnet_cfg)
-        nas_agent.validation_interface.clear_csv()
-        os.remove('tmp.pickle')
-        from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference
-        reference = TorchVisionReference('ofa_resnet50_ofa_mbv3', dataset_path=None, batch_size=1)
-        reference.validate_macs()
-        reference.measure_latency()
-        self.assertTrue(len(best_model_archs) > 0)
-
 
 if __name__ == "__main__":
     unittest.main()

From 07943de43b016aee945215803191e0250b317b71 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 15:46:39 -0800
Subject: [PATCH 51/60] Revert "Remove reference to DyNAS from UTs"

This reverts commit 79a47586b46b136366329476048e2f09fcc1aa84.
---
 test/nas/test_nas.py | 68 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py
index 47dfaf19c6b..4d22673d578 100644
--- a/test/nas/test_nas.py
+++ b/test/nas/test_nas.py
@@ -9,6 +9,7 @@
 from neural_compressor.data import DATASETS
 from neural_compressor.experimental import common, NAS
 from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
+from neural_compressor.experimental.nas.dynas import DyNAS
 
 def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']):
     fake_yaml = """
@@ -57,6 +58,41 @@ def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']):
     with open('fake.yaml', 'w', encoding="utf-8") as f:
         f.write(fake_yaml)
 
+def build_dynas_fake_yaml():
+    fake_yaml = """
+    model:
+        name: imagenet_nas
+        framework: pytorch
+
+    nas:
+        approach: dynas
+        search:
+            search_algorithm: nsga2
+        dynas:
+            supernet: ofa_resnet50
+            metrics: ['acc', 'macs']
+            results_csv_path: './search_results.csv'
+    """
+    with open('dynas_fake.yaml', 'w', encoding="utf-8") as f:
+        f.write(fake_yaml)
+
+def build_dynas_results_csv():
+    results_csv = """
+Sub-network,Date,Latency (ms), MACs,Top-1 Acc (%)
+"{'wid': None, 'ks': [7, 7, 3, 3, 5, 7, 7, 3, 5, 5, 3, 3, 7, 3, 5, 5, 5, 7, 5, 7], 'e': [3, 4, 4, 4, 4, 6, 6, 4, 4, 3, 4, 4, 3, 6, 4, 3, 4, 6, 3, 3], 'd': [2, 4, 4, 2, 3], 'r': [224]}",2022-07-07 03:13:06.306540,39,391813792,77.416
+"{'wid': None, 'ks': [3, 5, 5, 7, 5, 5, 3, 3, 7, 7, 7, 5, 7, 3, 7, 5, 3, 5, 3, 3], 'e': [4, 6, 3, 4, 4, 4, 4, 6, 3, 6, 4, 3, 4, 3, 4, 3, 6, 4, 4, 6], 'd': [4, 3, 3, 2, 3], 'r': [224]}",2022-07-07 03:14:50.398553,41,412962768,77.234
+"{'wid': None, 'ks': [5, 5, 5, 3, 7, 5, 7, 5, 7, 3, 3, 7, 7, 5, 7, 3, 5, 5, 7, 3], 'e': [6, 4, 3, 3, 3, 3, 4, 4, 3, 4, 3, 6, 4, 4, 3, 6, 4, 3, 4, 6], 'd': [4, 4, 4, 2, 4], 'r': [224]}",2022-07-07 03:16:53.105436,44,444295456,77.632
+"{'wid': None, 'ks': [3, 5, 3, 7, 3, 5, 7, 5, 3, 3, 3, 7, 3, 5, 3, 5, 3, 3, 7, 3], 'e': [4, 6, 3, 3, 6, 3, 3, 6, 6, 4, 4, 6, 3, 4, 3, 6, 3, 6, 3, 4], 'd': [4, 4, 2, 2, 4], 'r': [224]}",2022-07-07 03:18:47.301137,41,410969240,76.79
+"{'wid': None, 'ks': [3, 3, 3, 3, 7, 5, 3, 5, 3, 5, 5, 7, 7, 7, 3, 5, 7, 5, 3, 7], 'e': [3, 6, 6, 4, 6, 3, 3, 4, 3, 6, 3, 4, 4, 6, 3, 6, 4, 3, 6, 3], 'd': [2, 3, 4, 4, 2], 'r': [224]}",2022-07-07 03:20:35.391443,40,405868672,77.338
+"{'wid': None, 'ks': [3, 3, 3, 7, 5, 7, 7, 3, 3, 3, 3, 5, 7, 3, 7, 5, 3, 7, 5, 5], 'e': [4, 6, 3, 6, 4, 3, 3, 6, 3, 6, 4, 6, 4, 4, 3, 6, 4, 3, 4, 4], 'd': [3, 4, 4, 2, 2], 'r': [224]}",2022-07-07 03:22:14.504855,37,370501152,76.448
+"{'wid': None, 'ks': [7, 5, 3, 5, 7, 5, 3, 3, 5, 3, 3, 7, 7, 3, 5, 3, 3, 5, 5, 7], 'e': [3, 3, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 3, 6, 3, 3, 3, 4], 'd': [4, 4, 3, 4, 2], 'r': [224]}",2022-07-07 03:24:12.500905,48,482299704,77.7
+"{'wid': None, 'ks': [7, 3, 5, 7, 5, 5, 7, 5, 3, 3, 3, 5, 5, 3, 7, 5, 5, 7, 3, 7], 'e': [3, 6, 4, 6, 6, 3, 3, 3, 6, 3, 6, 4, 4, 6, 4, 4, 4, 4, 6, 6], 'd': [4, 4, 2, 2, 2], 'r': [224]}",2022-07-07 03:25:50.198665,42,423721952,76.506
+"{'wid': None, 'ks': [7, 7, 3, 7, 5, 7, 5, 5, 5, 3, 5, 3, 3, 7, 3, 5, 3, 7, 7, 3], 'e': [3, 3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 6, 6, 4, 3, 3, 3, 6, 3, 4], 'd': [4, 2, 2, 3, 3], 'r': [224]}",2022-07-07 03:27:26.901886,37,373770104,77.258
+"{'wid': None, 'ks': [3, 7, 5, 5, 7, 3, 5, 3, 5, 5, 5, 3, 5, 5, 3, 5, 7, 3, 7, 5], 'e': [3, 4, 6, 6, 4, 3, 6, 6, 6, 3, 3, 3, 3, 6, 3, 6, 6, 3, 6, 3], 'd': [3, 2, 3, 2, 3], 'r': [224]}",2022-07-07 03:29:00.989578,36,369186480,77.096
+"{'wid': None, 'ks': [7, 7, 5, 5, 7, 5, 3, 3, 3, 5, 7, 3, 7, 7, 5, 5, 3, 7, 3, 7], 'e': [6, 3, 6, 3, 4, 3, 3, 3, 4, 3, 6, 4, 3, 3, 6, 4, 4, 3, 4, 3], 'd': [4, 4, 3, 4, 4], 'r': [224]}",2022-07-07 03:31:07.608402,51,518341312,78.104
+    """
+    with open('search_results.csv', 'w', encoding="utf-8") as f:
+        f.write(results_csv)
 
 def model_builder(model_arch_params):
     channels = model_arch_params['channels']
@@ -87,10 +123,14 @@ class TestNAS(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         build_fake_yaml()
+        build_dynas_fake_yaml()
+        build_dynas_results_csv()
 
     @classmethod
     def tearDownClass(cls):
         os.remove('fake.yaml')
+        os.remove('dynas_fake.yaml')
+        os.remove('search_results.csv')
         shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True)
         shutil.rmtree('runs', ignore_errors=True)
 
@@ -144,6 +184,34 @@ def eval_func(model):
             best_model_archs = nas_agent()
             self.assertTrue(len(best_model_archs) > 0)
 
+    def test_dynas(self):
+        nas_agent = NAS('dynas_fake.yaml')
+        for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]:
+            config = NASConfig(approach='dynas', search_algorithm=search_algorithm)
+            config.dynas.supernet = supernet
+            config.seed = 42
+            config.dynas.metrics = ['acc', 'macs', 'lat']
+            config.dynas.population = 10
+            config.dynas.num_evals = 10
+            config.dynas.results_csv_path = 'search_results.csv'
+            config.dynas.batch_size = 64
+            nas_agent = NAS(config)
+            best_model_archs = nas_agent.search()
+        nas_agent.acc_predictor.get_parameters()
+        nas_agent.acc_predictor.save('tmp.pickle')
+        nas_agent.acc_predictor.load('tmp.pickle')
+        samples = nas_agent.supernet_manager.random_samples(10)
+        subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0])
+        nas_agent.runner_validate.validate_macs(subnet_cfg)
+        nas_agent.runner_validate.measure_latency(subnet_cfg)
+        nas_agent.validation_interface.clear_csv()
+        os.remove('tmp.pickle')
+        from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference
+        reference = TorchVisionReference('ofa_resnet50_ofa_mbv3', dataset_path=None, batch_size=1)
+        reference.validate_macs()
+        reference.measure_latency()
+        self.assertTrue(len(best_model_archs) > 0)
+
 
 if __name__ == "__main__":
     unittest.main()

From e3e67e28ca4b5a274b0b4758d647efd300f1fbdf Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 15:53:57 -0800
Subject: [PATCH 52/60] LazyLoad transformer_interface

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../experimental/nas/dynast/dynas_utils.py            | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index 8387fdd800a..4a771b41357 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -30,8 +30,8 @@
 from neural_compressor.experimental.nas.dynast.dynas_manager import \
     ParameterManager
 from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor
-from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import (
-    compute_bleu, compute_latency, compute_macs)
+# from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import (
+#     compute_bleu, compute_latency, compute_macs)
 from neural_compressor.utils.utility import LazyImport, logger
 from ofa.imagenet_classification.data_providers.imagenet import \
     ImagenetDataProvider
@@ -41,6 +41,7 @@
 
 torch = LazyImport('torch')
 torchvision = LazyImport('torchvision')
+transformer_interface = LazyImport('neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface')
 
 
 def get_macs(
@@ -385,7 +386,7 @@ def validate_bleu(
         subnet_cfg: dict,
     ) -> float:    #noqa: D102
 
-        bleu = compute_bleu(subnet_cfg, self.dataset_path,
+        bleu = transformer_interface.compute_bleu(subnet_cfg, self.dataset_path,
                             self.checkpoint_path)
         return bleu
 
@@ -400,7 +401,7 @@ def validate_macs(
         Returns:
             `macs`
         """
-        macs = compute_macs(subnet_cfg, self.dataset_path)
+        macs = transformer_interface.compute_macs(subnet_cfg, self.dataset_path)
         logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs))
 
         return macs
@@ -417,7 +418,7 @@ def measure_latency(
         Returns:
             mean latency; std latency
         """
-        latency_mean, latency_std = compute_latency(
+        latency_mean, latency_std = transformer_interface.compute_latency(
             subnet_cfg, self.dataset_path, self.batch_size)
         logger.info(
             '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std))

From eb5b063c74de774cfdddbb60f94ee47e3bc208e9 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 16:05:57 -0800
Subject: [PATCH 53/60] Removed commented code

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 .../experimental/nas/dynast/dynas_manager.py         | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py
index 0e64ccaaf2e..4ba28d087be 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_manager.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py
@@ -291,12 +291,9 @@ def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0):  #no
     def onehot_custom(self, subnet_cfg, provide_onehot=True):  #noqa: D102
 
         features = []
-        #import ipdb;ipdb.set_trace()
         features.extend(subnet_cfg['encoder_embed_dim'])
 
-        #encoder_layer_num = subnet_cfg['encoder_layer_num']
-        encode_layer_num_int = 6  # encoder_layer_num[0]
-        # features.extend(encoder_layer_num)
+        encode_layer_num_int = 6
 
         # Encoder FFN Embed Dim
         encoder_ffn_embed_dim = subnet_cfg['encoder_ffn_embed_dim']
@@ -360,10 +357,6 @@ def onehot_custom(self, subnet_cfg, provide_onehot=True):  #noqa: D102
             one_hot_count = 0
             unique_values = self.unique_values
 
-            # uncomment
-            # with open(self.onehot_unique,'rb') as f:
-            #    load_unique_values = pickle.load(f)
-            #    unique_values = load_unique_values.tolist()
             for unique in unique_values:
                 one_hot_count += len(unique.tolist())
 
@@ -380,8 +373,6 @@ def onehot_custom(self, subnet_cfg, provide_onehot=True):  #noqa: D102
         else:
             return features
 
-        # return np.array(ks_onehot + ex_onehot)
-
     def import_csv(
         self,
         filepath: str,
@@ -426,7 +417,6 @@ def import_csv(
             config_as_onehot = self.onehot_custom(
                 config_as_dict, provide_onehot=False)
             convert_to_onehot.append(config_as_onehot)
-        #import ipdb;ipdb.set_trace()
         df[config] = convert_to_dict
         df['config_pymoo'] = convert_to_pymoo
         df['config_onehot'] = convert_to_onehot

From 8f1e03ef0317d3442f510ede3a00f698fede58a4 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Fri, 9 Dec 2022 16:07:10 -0800
Subject: [PATCH 54/60] Shorten import line

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 neural_compressor/experimental/nas/dynast/dynas_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py
index 4a771b41357..9c9e4b1f4ce 100644
--- a/neural_compressor/experimental/nas/dynast/dynas_utils.py
+++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py
@@ -41,7 +41,9 @@
 
 torch = LazyImport('torch')
 torchvision = LazyImport('torchvision')
-transformer_interface = LazyImport('neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface')
+transformer_interface = LazyImport(
+    'neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface'
+)
 
 
 def get_macs(

From 6951d7ad91c9a9c3be97a9d964d62388c85b58d3 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Sat, 10 Dec 2022 12:49:39 -0800
Subject: [PATCH 55/60] Remove cached torch files when running tests

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/nas/test_nas.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py
index 4d22673d578..4e1c87db185 100644
--- a/test/nas/test_nas.py
+++ b/test/nas/test_nas.py
@@ -126,6 +126,12 @@ def setUpClass(cls):
         build_dynas_fake_yaml()
         build_dynas_results_csv()
 
+        try:
+            shutil.rmtree('.torch/')
+        except:
+            pass
+
+
     @classmethod
     def tearDownClass(cls):
         os.remove('fake.yaml')

From e9f872a132a16b2979f5bc9061dc639ba031f2cc Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Sat, 10 Dec 2022 13:30:46 -0800
Subject: [PATCH 56/60] Split NAS tests

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/nas/test_nas.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py
index 4e1c87db185..be6c6405ff0 100644
--- a/test/nas/test_nas.py
+++ b/test/nas/test_nas.py
@@ -203,6 +203,8 @@ def test_dynas(self):
             config.dynas.batch_size = 64
             nas_agent = NAS(config)
             best_model_archs = nas_agent.search()
+            self.assertTrue(len(best_model_archs) > 0)
+
         nas_agent.acc_predictor.get_parameters()
         nas_agent.acc_predictor.save('tmp.pickle')
         nas_agent.acc_predictor.load('tmp.pickle')
@@ -212,11 +214,18 @@ def test_dynas(self):
         nas_agent.runner_validate.measure_latency(subnet_cfg)
         nas_agent.validation_interface.clear_csv()
         os.remove('tmp.pickle')
+
+    def test_vision_reference(self):
         from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference
-        reference = TorchVisionReference('ofa_resnet50_ofa_mbv3', dataset_path=None, batch_size=1)
-        reference.validate_macs()
-        reference.measure_latency()
-        self.assertTrue(len(best_model_archs) > 0)
+        reference = TorchVisionReference('ofa_mbv3', dataset_path=None, batch_size=1)
+        macs = reference.validate_macs()
+
+        self.assertEqual(macs, 217234208)
+
+        reference.measure_latency(
+            warmup_steps=1,
+            measure_steps=1,
+        )
 
 
 if __name__ == "__main__":

From ce29fe42ee4bdf90f0d87c148c89a430861ad81e Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Sat, 10 Dec 2022 13:34:37 -0800
Subject: [PATCH 57/60] Catch network timeout in tests

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/nas/test_nas.py | 49 +++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py
index be6c6405ff0..d8365af2827 100644
--- a/test/nas/test_nas.py
+++ b/test/nas/test_nas.py
@@ -191,29 +191,32 @@ def eval_func(model):
             self.assertTrue(len(best_model_archs) > 0)
 
     def test_dynas(self):
-        nas_agent = NAS('dynas_fake.yaml')
-        for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]:
-            config = NASConfig(approach='dynas', search_algorithm=search_algorithm)
-            config.dynas.supernet = supernet
-            config.seed = 42
-            config.dynas.metrics = ['acc', 'macs', 'lat']
-            config.dynas.population = 10
-            config.dynas.num_evals = 10
-            config.dynas.results_csv_path = 'search_results.csv'
-            config.dynas.batch_size = 64
-            nas_agent = NAS(config)
-            best_model_archs = nas_agent.search()
-            self.assertTrue(len(best_model_archs) > 0)
-
-        nas_agent.acc_predictor.get_parameters()
-        nas_agent.acc_predictor.save('tmp.pickle')
-        nas_agent.acc_predictor.load('tmp.pickle')
-        samples = nas_agent.supernet_manager.random_samples(10)
-        subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0])
-        nas_agent.runner_validate.validate_macs(subnet_cfg)
-        nas_agent.runner_validate.measure_latency(subnet_cfg)
-        nas_agent.validation_interface.clear_csv()
-        os.remove('tmp.pickle')
+        try:
+            nas_agent = NAS('dynas_fake.yaml')
+            for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]:
+                config = NASConfig(approach='dynas', search_algorithm=search_algorithm)
+                config.dynas.supernet = supernet
+                config.seed = 42
+                config.dynas.metrics = ['acc', 'macs', 'lat']
+                config.dynas.population = 10
+                config.dynas.num_evals = 10
+                config.dynas.results_csv_path = 'search_results.csv'
+                config.dynas.batch_size = 64
+                nas_agent = NAS(config)
+                best_model_archs = nas_agent.search()
+                self.assertTrue(len(best_model_archs) > 0)
+
+            nas_agent.acc_predictor.get_parameters()
+            nas_agent.acc_predictor.save('tmp.pickle')
+            nas_agent.acc_predictor.load('tmp.pickle')
+            samples = nas_agent.supernet_manager.random_samples(10)
+            subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0])
+            nas_agent.runner_validate.validate_macs(subnet_cfg)
+            nas_agent.runner_validate.measure_latency(subnet_cfg)
+            nas_agent.validation_interface.clear_csv()
+            os.remove('tmp.pickle')
+        except TimeoutError:
+            pass
 
     def test_vision_reference(self):
         from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference

From 36365ed85c4b1de3563e8f2a3c51f176b7171416 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Sat, 10 Dec 2022 15:49:36 -0800
Subject: [PATCH 58/60] Remove try catch from tests

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/nas/test_nas.py | 52 +++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py
index d8365af2827..b6cc41ead22 100644
--- a/test/nas/test_nas.py
+++ b/test/nas/test_nas.py
@@ -191,35 +191,33 @@ def eval_func(model):
             self.assertTrue(len(best_model_archs) > 0)
 
     def test_dynas(self):
-        try:
-            nas_agent = NAS('dynas_fake.yaml')
-            for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]:
-                config = NASConfig(approach='dynas', search_algorithm=search_algorithm)
-                config.dynas.supernet = supernet
-                config.seed = 42
-                config.dynas.metrics = ['acc', 'macs', 'lat']
-                config.dynas.population = 10
-                config.dynas.num_evals = 10
-                config.dynas.results_csv_path = 'search_results.csv'
-                config.dynas.batch_size = 64
-                nas_agent = NAS(config)
-                best_model_archs = nas_agent.search()
-                self.assertTrue(len(best_model_archs) > 0)
-
-            nas_agent.acc_predictor.get_parameters()
-            nas_agent.acc_predictor.save('tmp.pickle')
-            nas_agent.acc_predictor.load('tmp.pickle')
-            samples = nas_agent.supernet_manager.random_samples(10)
-            subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0])
-            nas_agent.runner_validate.validate_macs(subnet_cfg)
-            nas_agent.runner_validate.measure_latency(subnet_cfg)
-            nas_agent.validation_interface.clear_csv()
-            os.remove('tmp.pickle')
-        except TimeoutError:
-            pass
+        nas_agent = NAS('dynas_fake.yaml')
+        for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]:
+            config = NASConfig(approach='dynas', search_algorithm=search_algorithm)
+            config.dynas.supernet = supernet
+            config.seed = 42
+            config.dynas.metrics = ['acc', 'macs', 'lat']
+            config.dynas.population = 10
+            config.dynas.num_evals = 10
+            config.dynas.results_csv_path = 'search_results.csv'
+            config.dynas.batch_size = 64
+            nas_agent = NAS(config)
+            best_model_archs = nas_agent.search()
+            self.assertTrue(len(best_model_archs) > 0)
+
+        nas_agent.acc_predictor.get_parameters()
+        nas_agent.acc_predictor.save('tmp.pickle')
+        nas_agent.acc_predictor.load('tmp.pickle')
+        samples = nas_agent.supernet_manager.random_samples(10)
+        subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0])
+        nas_agent.runner_validate.validate_macs(subnet_cfg)
+        nas_agent.runner_validate.measure_latency(subnet_cfg)
+        nas_agent.validation_interface.clear_csv()
+        os.remove('tmp.pickle')
 
     def test_vision_reference(self):
-        from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference
+        from neural_compressor.experimental.nas.dynast.dynas_utils import \
+            TorchVisionReference
         reference = TorchVisionReference('ofa_mbv3', dataset_path=None, batch_size=1)
         macs = reference.validate_macs()
 

From 858d7b044a8092fe4d6ba572b3ec853c1660e7c3 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Sat, 10 Dec 2022 15:50:02 -0800
Subject: [PATCH 59/60] Move `.torch` cleanup to teardown

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/nas/test_nas.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py
index b6cc41ead22..018ade52bfe 100644
--- a/test/nas/test_nas.py
+++ b/test/nas/test_nas.py
@@ -1,15 +1,17 @@
-from multiprocessing.spawn import import_main_path
 import os
 import shutil
 import unittest
+from pathlib import Path
+
 import numpy as np
 import torch
 
 from neural_compressor.conf.config import NASConfig
 from neural_compressor.data import DATASETS
-from neural_compressor.experimental import common, NAS
-from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
-from neural_compressor.experimental.nas.dynas import DyNAS
+from neural_compressor.experimental import NAS, common
+from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import \
+    PyTorchDataLoader
+
 
 def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']):
     fake_yaml = """
@@ -126,12 +128,6 @@ def setUpClass(cls):
         build_dynas_fake_yaml()
         build_dynas_results_csv()
 
-        try:
-            shutil.rmtree('.torch/')
-        except:
-            pass
-
-
     @classmethod
     def tearDownClass(cls):
         os.remove('fake.yaml')
@@ -139,6 +135,7 @@ def tearDownClass(cls):
         os.remove('search_results.csv')
         shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True)
         shutil.rmtree('runs', ignore_errors=True)
+        shutil.rmtree(os.path.join(os.getcwd(), '.torch'), ignore_errors=True)
 
     def test_basic_nas(self):
         # Built-in train, evaluation

From 7dd6615dea460deef599b369c1247b9c50da6670 Mon Sep 17 00:00:00 2001
From: Maciej Szankin <maciej.szankin@intel.com>
Date: Mon, 12 Dec 2022 01:09:20 -0800
Subject: [PATCH 60/60] Skip cleanup

Signed-off-by: Maciej Szankin <maciej.szankin@intel.com>
---
 test/nas/test_nas.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py
index bd2ef78559f..10673939388 100644
--- a/test/nas/test_nas.py
+++ b/test/nas/test_nas.py
@@ -135,7 +135,6 @@ def tearDownClass(cls):
         os.remove('search_results.csv')
         shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True)
         shutil.rmtree('runs', ignore_errors=True)
-        shutil.rmtree(os.path.join(os.getcwd(), '.torch'), ignore_errors=True)
 
     def test_basic_nas(self):
         # Built-in train, evaluation