From f419a7ff8d124b0c16bc92d7dea692e9d66ffebc Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 4 Nov 2022 15:44:08 -0700 Subject: [PATCH 01/60] Update normalization in predictors --- .../nas/dynast/dynas_predictor.py | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py index 8f142bfffec..a92c0bef5c2 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py +++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py @@ -30,8 +30,13 @@ class Predictor: DEFAULT_COST_FACTORS = np.arange(1.0, 101.0, 1.0) DEFAULT_MAX_ITERATIONS = 1000000 - def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS, - max_iterations=DEFAULT_MAX_ITERATIONS, verbose=False): + def __init__( + self, + alphas=DEFAULT_ALPHAS, + cost_factors=DEFAULT_COST_FACTORS, + max_iterations=DEFAULT_MAX_ITERATIONS, + verbose=False, + ): SEARCHER_VERBOSITY = 10 @@ -42,15 +47,24 @@ def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS, self.best_index = 0 # Create lists of regressors and associated hyper-parameters - regressors = [linear_model.Ridge(max_iter=max_iterations), - svm.SVR(kernel='rbf', gamma='auto', epsilon=0.0, max_iter=max_iterations)] + regressors = [ + linear_model.Ridge(max_iter=max_iterations), + svm.SVR(kernel='rbf', gamma='auto', epsilon=0.0, max_iter=max_iterations), + ] hyper_parameters = [{'alpha': alphas}, {'C': cost_factors}] # Create list of hyper-parameter searchers self.searchers = [] for regressor, parameters in zip(regressors, hyper_parameters): - self.searchers.append(GridSearchCV(estimator=regressor, param_grid=parameters, n_jobs=-1, - scoring='neg_mean_absolute_percentage_error', verbose=SEARCHER_VERBOSITY if (verbose) else 0)) + self.searchers.append( + GridSearchCV( + estimator=regressor, + param_grid=parameters, + n_jobs=-1, + scoring='neg_mean_absolute_percentage_error', + verbose=SEARCHER_VERBOSITY if (verbose) else 0, + ) + ) def train(self, examples, labels): @@ -65,8 +79,14 @@ def train(self, examples, labels): None ''' + # Compute normalization factor + max_label = np.amax(np.abs(labels)) + if max_label > 0.0: + self.normalization_factor = 10 ** (np.floor(np.log10(max_label)) - 1.0) + else: + self.normalization_factor = 1.0 + # Compute normalized labels - self.normalization_factor = 10 ** (np.floor(np.log10(np.amax(labels))) - 1.0) normalized_labels = labels / self.normalization_factor # Train regressors with optimal parameters @@ -108,7 +128,7 @@ def get_parameters(self): ''' # Retrieve optimal parameters - parameters = {} + parameters = {'best_index': self.best_index} for searcher in self.searchers: regressor_name = searcher.best_estimator_.__class__.__name__ for key in searcher.best_params_: From 363bdff46541e48634792e66f682872dcd87cb4e Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 4 Nov 2022 15:48:50 -0700 Subject: [PATCH 02/60] Add notebook's checkpoint to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d039b651c1f..509d3f1d1a9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .idea /venv/ */__pycache__ +.ipynb_checkpoints/ *.snapshot *.csv *.pb @@ -17,4 +18,4 @@ build/ _build lpot_workspace/ .torch/ -node_modules \ No newline at end of file +node_modules From 9d7f1f4eb972521e7a693ec1ab843cf841b34745 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 4 Nov 2022 16:02:23 -0700 Subject: [PATCH 03/60] Add TODO note. --- neural_compressor/experimental/nas/dynas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index b3e4d2cc37e..f70dcf1ec1c 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -103,6 +103,7 @@ def search(self): # Randomly sample search space for initial population # if number of results in results_csv_path smaller than population. + # TODO(macsz) Create empty CSV if it does not exists. df = pd.read_csv(self.results_csv_path) latest_population = [self.supernet_manager.random_sample() \ for _ in range(max(self.population - df.shape[0], 0))] From 1dceb8a6b79df7aa7cc8b77bd6d59bc8f483cea6 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 4 Nov 2022 16:20:30 -0700 Subject: [PATCH 04/60] Add Runners dictionary --- neural_compressor/experimental/nas/dynas.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index f70dcf1ec1c..2353d23eacf 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -39,7 +39,6 @@ def __init__(self, conf_fname_or_obj): self.Predictor = Predictor self.ProblemMultiObjective = ProblemMultiObjective self.SearchAlgoManager = SearchAlgoManager - self.OFARunner = OFARunner self.SUPERNET_PARAMETERS = { 'ofa_resnet50': {'d' : {'count' : 5, 'vars' : [0, 1, 2]}, @@ -54,6 +53,12 @@ def __init__(self, conf_fname_or_obj): 'e' : {'count' : 20, 'vars' : [3, 4, 6]}, 'd' : {'count' : 5, 'vars' : [2, 3, 4]} } } + self.RUNNERS = { + 'ofa_resnet50': OFARunner, + 'ofa_mbv3_d234_e346_k357_w1.0': OFARunner, + 'ofa_mbv3_d234_e346_k357_w1.2': OFARunner, + } + self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50, 'ofa_mbv3_d234_e346_k357_w1.0': EvaluationInterfaceMobileNetV3, 'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3} @@ -78,7 +83,7 @@ def init_for_search(self): ) # Validation High-Fidelity Measurement Runner - self.runner_validate = self.OFARunner( + self.runner_validate = self.RUNNERS[self.supernet]( supernet=self.supernet, acc_predictor=None, macs_predictor=None, @@ -121,7 +126,7 @@ def search(self): self.create_latency_predictor() # Inner-loop Low-Fidelity Predictor Runner, need to re-instantiate every loop - runner_predict = self.OFARunner( + runner_predict = self.RUNNERS[self.supernet]( supernet=self.supernet, acc_predictor=self.acc_predictor, macs_predictor=self.macs_predictor, From 2c17af6dcbddcf842364ff18f961ae4b0a32bd79 Mon Sep 17 00:00:00 2001 From: "Nittur Sridhar, Sharath" Date: Sun, 6 Nov 2022 22:54:56 -0800 Subject: [PATCH 05/60] add transformer example --- neural_compressor/conf/config.py | 1 + neural_compressor/experimental/nas/dynas.py | 54 +- .../experimental/nas/dynast/dynas_manager.py | 211 ++++ .../experimental/nas/dynast/dynas_utils.py | 171 +++ .../modules_supernetwork.py | 590 +++++++++++ .../transformer_interface.py | 498 +++++++++ .../transformer_supernetwork.py | 992 ++++++++++++++++++ 7 files changed, 2506 insertions(+), 11 deletions(-) create mode 100644 neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py create mode 100644 neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py create mode 100644 neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index f06560cc56e..d165298be29 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -1066,6 +1066,7 @@ def percent_to_float(data): Optional("num_evals", default=100000): int, Optional("results_csv_path", default=None): str, Optional("dataset_path", default=None): str, + Optional("supernet_ckpt_path", default=None): str, Optional("batch_size", default=64): int, }, }, diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index 2353d23eacf..a5d70dde6ae 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -21,7 +21,6 @@ from .nas import NASBase from .nas_utils import nas_registry - @nas_registry("DyNAS") class DyNAS(NASBase): """ @@ -31,10 +30,13 @@ class DyNAS(NASBase): """ def __init__(self, conf_fname_or_obj): from .dynast.dynas_manager import ParameterManager + from .dynast.dynas_manager import TransformerLTEncoding from .dynast.dynas_predictor import Predictor from .dynast.dynas_search import ProblemMultiObjective, SearchAlgoManager from .dynast.dynas_utils import (EvaluationInterfaceMobileNetV3, - EvaluationInterfaceResNet50, OFARunner) + EvaluationInterfaceResNet50, EvaluationInterfaceTransformerLT, + OFARunner,TransformerLTRunner) + self.ParameterManager = ParameterManager self.Predictor = Predictor self.ProblemMultiObjective = ProblemMultiObjective @@ -51,20 +53,44 @@ def __init__(self, conf_fname_or_obj): 'ofa_mbv3_d234_e346_k357_w1.2': {'ks' : {'count' : 20, 'vars' : [3, 5, 7]}, 'e' : {'count' : 20, 'vars' : [3, 4, 6]}, - 'd' : {'count' : 5, 'vars' : [2, 3, 4]} } - } + 'd' : {'count' : 5, 'vars' : [2, 3, 4]} }, + + 'transformer_lt_wmt_en_de': + {'encoder_embed_dim': {'count':1,'vars':[640, 512]}, + 'decoder_embed_dim': {'count':1, 'vars': [640, 512]}, + 'encoder_ffn_embed_dim': {'count':6, 'vars':[3072, 2048, 1024]}, + 'decoder_ffn_embed_dim' : {'count':6,'vars': [3072, 2048, 1024]}, + 'decoder_layer_num': {'count':1,'vars':[6, 5, 4, 3, 2, 1]}, + 'encoder_self_attention_heads': {'count':6, 'vars':[8, 4]}, + 'decoder_self_attention_heads': {'count':6, 'vars':[8, 4]}, + 'decoder_ende_attention_heads': {'count':6, 'vars':[8, 4]}, + 'decoder_arbitrary_ende_attn': {'count':6, 'vars':[-1, 1, 2]}} + } self.RUNNERS = { 'ofa_resnet50': OFARunner, 'ofa_mbv3_d234_e346_k357_w1.0': OFARunner, 'ofa_mbv3_d234_e346_k357_w1.2': OFARunner, + 'transformer_lt_wmt_en_de': TransformerLTRunner } self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50, 'ofa_mbv3_d234_e346_k357_w1.0': EvaluationInterfaceMobileNetV3, - 'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3} + 'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3, + 'transformer_lt_wmt_en_de': EvaluationInterfaceTransformerLT} + self.LINAS_INNERLOOP_EVALS = {'ofa_resnet50': 5000, 'ofa_mbv3_d234_e346_k357_w1.0': 20000, - 'ofa_mbv3_d234_e346_k357_w1.2': 20000} + 'ofa_mbv3_d234_e346_k357_w1.2': 20000, + 'transformer_lt_wmt_en_de': 10000} + + self.SUPERNET_ENCODING = { + 'ofa_resnet50': ParameterManager, + 'ofa_mbv3_d234_e346_k357_w1.0': ParameterManager, + 'ofa_mbv3_d234_e346_k357_w1.2': ParameterManager, + 'ofa_proxyless_d234_e346_k357_w1.3': ParameterManager, + 'transformer_lt_wmt_en_de': TransformerLTEncoding, + } + super().__init__() self.acc_predictor = None self.macs_predictor = None @@ -77,10 +103,13 @@ def estimate(self, individual): self.validation_interface.eval_subnet(individual) def init_for_search(self): - self.supernet_manager = self.ParameterManager( - param_dict=self.SUPERNET_PARAMETERS[self.supernet], - seed=self.seed + self.supernet_manager = self.SUPERNET_ENCODING[self.supernet]( + param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed ) + #self.supernet_manager = self.ParameterManager( + # param_dict=self.SUPERNET_PARAMETERS[self.supernet], + # seed=self.seed + #) # Validation High-Fidelity Measurement Runner self.runner_validate = self.RUNNERS[self.supernet]( @@ -88,8 +117,9 @@ def init_for_search(self): acc_predictor=None, macs_predictor=None, latency_predictor=None, - imagenetpath=self.dataset_path, + datasetpath=self.dataset_path, batch_size=self.batch_size, + checkpoint_path=self.supernet_ckpt_path ) # Setup validation interface @@ -131,8 +161,9 @@ def search(self): acc_predictor=self.acc_predictor, macs_predictor=self.macs_predictor, latency_predictor=self.latency_predictor, - imagenetpath=self.dataset_path, + datasetpath=self.dataset_path, batch_size=self.batch_size, + checkpoint_path=self.supernet_ckpt_path ) # Setup validation interface @@ -236,6 +267,7 @@ def init_cfg(self, conf_fname_or_obj): self.num_evals = dynas_config.num_evals self.results_csv_path = dynas_config.results_csv_path self.dataset_path = dynas_config.dataset_path + self.supernet_ckpt_path = dynas_config.supernet_ckpt_path self.batch_size = dynas_config.batch_size if dynas_config.population < 10: # pragma: no cover raise NotImplementedError( diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py index 0394dc2117b..898d85e3ab0 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_manager.py +++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py @@ -276,3 +276,214 @@ def create_training_set( ) ) return features_train, features_test, labels_train, labels_test + + + + + + +class TransformerLTEncoding(ParameterManager): + def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0): + super().__init__(param_dict, verbose, seed) + + def onehot_custom(self,subnet_cfg,provide_onehot=True): + + features = [] + #import ipdb;ipdb.set_trace() + features.extend(subnet_cfg['encoder_embed_dim']) + + #encoder_layer_num = subnet_cfg['encoder_layer_num'] + encode_layer_num_int = 6#encoder_layer_num[0] + #features.extend(encoder_layer_num) + + #Encoder FFN Embed Dim + encoder_ffn_embed_dim = subnet_cfg['encoder_ffn_embed_dim'] + + if encode_layer_num_int < 6: + encoder_ffn_embed_dim.extend([0]*(6-encode_layer_num_int)) + features.extend(encoder_ffn_embed_dim) + + #Encoder Self-Attn Heads + + encoder_self_attention_heads = subnet_cfg['encoder_self_attention_heads'][:encode_layer_num_int] + + if encode_layer_num_int < 6: + encoder_self_attention_heads.extend([0]*(6-encode_layer_num_int)) + features.extend(encoder_self_attention_heads) + + + features.extend(subnet_cfg['decoder_embed_dim']) + + decoder_layer_num = subnet_cfg['decoder_layer_num'] + decoder_layer_num_int = decoder_layer_num[0] + features.extend(decoder_layer_num) + + #Decoder FFN Embed Dim + decoder_ffn_embed_dim = subnet_cfg['decoder_ffn_embed_dim'][:decoder_layer_num_int] + + if decoder_layer_num_int < 6: + decoder_ffn_embed_dim.extend([0]*(6-decoder_layer_num_int)) + features.extend(decoder_ffn_embed_dim) + + + #Decoder Attn Heads + decoder_self_attention_heads = subnet_cfg['decoder_self_attention_heads'][:decoder_layer_num_int] + + if decoder_layer_num_int < 6: + decoder_self_attention_heads.extend([0]*(6-decoder_layer_num_int)) + features.extend(decoder_self_attention_heads) + + #Decoder ENDE HEADS + + decoder_ende_attention_heads = subnet_cfg['decoder_ende_attention_heads'][:decoder_layer_num_int] + + if decoder_layer_num_int < 6: + decoder_ende_attention_heads.extend([0]*(6-decoder_layer_num_int)) + + features.extend(decoder_ende_attention_heads) + + arbitrary_ende_attn_trans = [] + for i in range(decoder_layer_num_int): + if subnet_cfg['decoder_arbitrary_ende_attn'][i] == -1: + arbitrary_ende_attn_trans.append(1) + elif subnet_cfg['decoder_arbitrary_ende_attn'][i] == 1: + arbitrary_ende_attn_trans.append(2) + elif subnet_cfg['decoder_arbitrary_ende_attn'][i] == 2: + arbitrary_ende_attn_trans.append(3) + + if decoder_layer_num_int < 6: + arbitrary_ende_attn_trans.extend([0]*(6-decoder_layer_num_int)) + features.extend(arbitrary_ende_attn_trans) + + if provide_onehot==True: + examples = np.array([features]) + one_hot_count = 0 + unique_values = self.unique_values + + #uncomment + #with open(self.onehot_unique,'rb') as f: + # load_unique_values = pickle.load(f) + # unique_values = load_unique_values.tolist() + for unique in unique_values: + one_hot_count += len(unique.tolist()) + + + one_hot_examples = np.zeros((examples.shape[0], one_hot_count)) + for e, example in enumerate(examples): + offset = 0 + for f in range(len(example)): + index = np.where(unique_values[f] == example[f])[0] + offset + one_hot_examples[e, index] = 1.0 + offset += len(unique_values[f]) + return one_hot_examples + + else: + return features + + #return np.array(ks_onehot + ex_onehot) + + + def import_csv( + self, + filepath: str, + config: str, + objective: str, + column_names: List[str] = None, + drop_duplicates: bool = True, + ) -> pd.DataFrame: + ''' + Import a csv file generated from a supernetwork search for the purpose + of training a predictor. + + filepath - path of the csv to be imported. + config - the subnetwork configuration + objective - target/label for the subnet configuration (e.g. accuracy, latency) + column_names - a list of column names for the dataframe + df - the output dataframe that contains the original config dict, pymoo, and 1-hot + equivalent vector for training. + ''' + + if column_names == None: + df = pd.read_csv(filepath) + else: + df = pd.read_csv(filepath) + df.columns = column_names + df = df[[config, objective]] + # Old corner case coverage + df[config] = df[config].replace({'null': 'None'}, regex=True) + + if drop_duplicates: + df.drop_duplicates(subset=[config], inplace=True) + df.reset_index(drop=True, inplace=True) + + convert_to_dict = list() + convert_to_pymoo = list() + convert_to_onehot = list() + for i in range(len(df)): + # Elastic Param Config format + config_as_dict = ast.literal_eval(df[config].iloc[i]) + convert_to_dict.append(config_as_dict) + # PyMoo 1-D vector format + config_as_pymoo = self.translate2pymoo(config_as_dict) + convert_to_pymoo.append(config_as_pymoo) + # Onehot predictor format + config_as_onehot = self.onehot_custom(config_as_dict,provide_onehot=False) + convert_to_onehot.append(config_as_onehot) + #import ipdb;ipdb.set_trace() + df[config] = convert_to_dict + df['config_pymoo'] = convert_to_pymoo + df['config_onehot'] = convert_to_onehot + + return df + + #@staticmethod + def create_training_set( + self, + dataframe: pd.DataFrame, + train_with_all: bool = True, + split: float = 0.33, + seed: bool = None, + ) -> Tuple[list, list, list, list]: + ''' + Create a sklearn compatible test/train set from an imported results csv + after "import_csv" method is run. + ''' + + collect_rows = list() + for i in range(len(dataframe)): + collect_rows.append(np.asarray(dataframe['config_onehot'].iloc[i])) + features = np.asarray(collect_rows) + labels = dataframe.drop(columns=['config', 'config_pymoo', 'config_onehot']).values + + assert len(features) == len(labels) + one_hot_count = 0 + unique_values = [] + + for c in range(features.shape[1]): + unique_values.append(np.unique(features[:, c])) + one_hot_count += len(unique_values[-1]) + one_hot_examples = np.zeros((features.shape[0], one_hot_count)) + for e, example in enumerate(features): + offset = 0 + for f in range(len(example)): + index = np.where(unique_values[f] == example[f])[0] + offset + one_hot_examples[e, index] = 1.0 + offset += len(unique_values[f]) + + features = one_hot_examples + self.unique_values = unique_values + if train_with_all: + logger.info('[DyNAS-T] Training set size={}'.format(len(labels))) + return features, labels + else: + features_train, features_test, labels_train, labels_test = train_test_split( + features, labels, test_size=split, random_state=seed + ) + logger.info( + '[DyNAS-T] Test ({}) Train ({}) ratio is {}.'.format( + len(labels_train), len(labels_test), split + ) + ) + return features_train, features_test, labels_train, labels_test + + diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 8fe2608c67e..e15ca455005 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -33,6 +33,10 @@ from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor from neural_compressor.utils.utility import LazyImport, logger +from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import ( + compute_bleu, + compute_latency +) torch = LazyImport('torch') torchvision = LazyImport('torchvision') @@ -270,6 +274,103 @@ def get_subnet( return self.subnet + + +class TransformerLTRunner(Runner): + """The OFARunner class manages the sub-network selection from the OFA super-network and + the validation measurements of the sub-networks. ResNet50, MobileNetV3 w1.0, and MobileNetV3 w1.2 + are currently supported. Imagenet is required for these super-networks `imagenet-ilsvrc2012`. + """ + + def __init__( + self, + supernet: str, + acc_predictor: Predictor, + macs_predictor: Predictor, + latency_predictor: Predictor, + datasetpath: str, + batch_size: int, + checkpoint_path: str + ) -> None: + self.supernet = supernet + self.acc_predictor = acc_predictor + self.macs_predictor = macs_predictor + self.latency_predictor = latency_predictor + self.device = 'cpu' + self.test_size = None + self.batch_size = batch_size + self.dataset_path = datasetpath + self.checkpoint_path = checkpoint_path + + def estimate_accuracy_bleu( + self, + subnet_cfg: dict, + ) -> float: + top1 = self.acc_predictor.predict(subnet_cfg) + return top1 + + def estimate_macs( + self, + subnet_cfg: dict, + ) -> int: + macs = self.macs_predictor.predict(subnet_cfg) + return macs + + def estimate_latency( + self, + subnet_cfg: dict, + ) -> float: + latency = self.latency_predictor.predict(subnet_cfg) + return latency + + def validate_bleu( + self, + subnet_cfg: dict, + ) -> float: # pragma: no cover + + bleu = compute_bleu(subnet_cfg,self.dataset_path,self.checkpoint_path) + return bleu + + def validate_macs( + self, + subnet_cfg: dict, + ) -> float: + """Measure Torch model's FLOPs/MACs as per FVCore calculation + Args: + subnet_cfg: sub-network Torch model + Returns: + `macs` + """ + + #model = self.get_subnet(subnet_cfg) + #input_size = (self.batch_size, 3, 224, 224) + #macs = get_macs(model=model, input_size=input_size, device=self.device) + macs = 0 + #logger.info('Model\'s macs: {}'.format(macs)) + return macs + + @torch.no_grad() + def measure_latency( + self, + subnet_cfg: dict, + warmup_steps: int = None, + measure_steps: int = None, + ) -> Tuple[float, float]: + """Measure OFA model's latency. + Args: + subnet_cfg: sub-network Torch model + Returns: + mean latency; std latency + """ + + latency_mean, latency_std = compute_latency(subnet_cfg,self.dataset_path) + logger.info('Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) + + return latency_mean, latency_std + + + + class EvaluationInterface: """ The interface class update is required to be updated for each unique SuperNetwork @@ -430,6 +531,76 @@ def eval_subnet( return sample, macs, -top1 + +class EvaluationInterfaceTransformerLT(EvaluationInterface): + def __init__( + self, + evaluator: Runner, + manager: ParameterManager, + metrics=['acc', 'macs'], + predictor_mode=False, + csv_path=None, + ) -> None: + super().__init__(evaluator, manager, metrics, predictor_mode, csv_path) + + def eval_subnet( + self, + x: list, + ) -> Tuple[dict, float, float]: + # PyMoo vector to Elastic Parameter Mapping + param_dict = self.manager.translate2param(x) + + sample = { + 'encoder': { + 'encoder_embed_dim': param_dict['encoder_embed_dim'][0], + 'encoder_layer_num': 6,#param_dict['encoder_layer_num'][0], + 'encoder_ffn_embed_dim': param_dict['encoder_ffn_embed_dim'], + 'encoder_self_attention_heads': param_dict['encoder_self_attention_heads'], + }, + 'decoder': { + 'decoder_embed_dim': param_dict['decoder_embed_dim'][0], + 'decoder_layer_num': param_dict['decoder_layer_num'][0], + 'decoder_ffn_embed_dim': param_dict['decoder_ffn_embed_dim'], + 'decoder_self_attention_heads': param_dict['decoder_self_attention_heads'], + 'decoder_ende_attention_heads': param_dict['decoder_ende_attention_heads'], + 'decoder_arbitrary_ende_attn':param_dict['decoder_arbitrary_ende_attn'] + } + } + + subnet_sample = copy.deepcopy(sample) + + # Always evaluate/predict top1 + lat, macs = 0, 0 + if self.predictor_mode == True: + bleu = self.evaluator.estimate_accuracy_bleu(self.manager.onehot_custom(param_dict).reshape(1,-1))[0] + if 'macs' in self.metrics: + macs = self.evaluator.estimate_macs(self.manager.onehot_custom(param_dict).reshape(1,-1))[0] + if 'lat' in self.metrics: + lat = self.evaluator.estimate_latency(self.manager.onehot_custom(param_dict).reshape(1,-1))[0] + else: + bleu = self.evaluator.validate_bleu(subnet_sample) + macs = self.evaluator.validate_macs(subnet_sample) + if 'lat' in self.metrics: + lat, _ = self.evaluator.measure_latency(subnet_sample) + + if self.csv_path: + with open(self.csv_path, 'a') as f: + writer = csv.writer(f) + date = str(datetime.now()) + result = [param_dict, date, lat, macs, bleu,] + writer.writerow(result) + + # PyMoo only minimizes objectives, thus accuracy needs to be negative + # Requires format: subnetwork, objective x, objective y + if 'lat' in self.metrics: + return sample, lat, -bleu + else: + return sample, macs, -bleu + + + + + def get_torchvision_model( model_name: str, ) -> torch.nn.Module: diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py new file mode 100644 index 00000000000..ef4cbddc952 --- /dev/null +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py @@ -0,0 +1,590 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +import numpy as np +from torch.nn.modules.module import _addindent +from torch.nn import Parameter + +from fairseq import utils +from collections import defaultdict + + +INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0) + + +def _get_full_incremental_state_key(module_instance, key): + module_name = module_instance.__class__.__name__ + + # assign a unique ID to each module instance, so that incremental state is + # not shared across module instances + if not hasattr(module_instance, '_fairseq_instance_id'): + INCREMENTAL_STATE_INSTANCE_ID[module_name] += 1 + module_instance._fairseq_instance_id = INCREMENTAL_STATE_INSTANCE_ID[module_name] + + return '{}.{}.{}'.format(module_name, module_instance._fairseq_instance_id, key) + + +def get_incremental_state(module, incremental_state, key): + """Helper for getting incremental state for an nn.Module.""" + full_key = _get_full_incremental_state_key(module, key) + if incremental_state is None or full_key not in incremental_state: + return None + return incremental_state[full_key] + + +def set_incremental_state(module, incremental_state, key, value): + """Helper for setting incremental state for an nn.Module.""" + if incremental_state is not None: + full_key = _get_full_incremental_state_key(module, key) + incremental_state[full_key] = value + +class EmbeddingSuper(nn.Embedding): + def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs): + super().__init__(num_embeddings, super_embed_dim, padding_idx, *args, **kwargs) + + # the largest embed dim + self.super_embed_dim = {'encoder': super_embed_dim, 'decoder': super_embed_dim} + + # the current sampled embed dim + self.sample_embed_dim = {'encoder': None, 'decoder': None} + + self.samples = {'encoder': {}, 'decoder': {}} + self.profiling = False + self.reset_parameters() + + def profile(self, mode=True): + self.profiling = mode + + def reset_parameters(self): + super().reset_parameters() + nn.init.normal_(self.weight, mean=0, std=self.embedding_dim ** -0.5) + nn.init.constant_(self.weight[self.padding_idx], 0) + + def set_sample_config(self, sample_embed_dim, part): + self.sample_embed_dim[part] = sample_embed_dim + self._sample_parameters(part) + + def _sample_parameters(self, part): + weight = self.weight[..., :self.sample_embed_dim[part]] + self.samples[part]['weight'] = weight + + return self.samples + + def sample_parameters(self, part, resample=False): + return self._sample_parameters(part) if self.profiling or resample else self.samples + + def sampled_weight(self, part): + return self.sample_parameters(part)[part]['weight'] + + def forward(self, input, part='encoder'): + return F.embedding(input, self.sampled_weight(part), self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse) + + +class LinearSuper(nn.Linear): + def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear'): + super().__init__(super_in_dim, super_out_dim, bias=bias) + + # super_in_dim and super_out_dim indicate the largest network! + self.super_in_dim = super_in_dim + self.super_out_dim = super_out_dim + + # input_dim and output_dim indicate the current sampled size + self.sample_in_dim = None + self.sample_out_dim = None + + self.samples = {} + + self._reset_parameters(bias, uniform_, non_linear) + self.profiling = False + + def profile(self, mode=True): + self.profiling = mode + + def sample_parameters(self, resample=False): + if self.profiling or resample: + return self._sample_parameters() + return self.samples + + def _reset_parameters(self, bias, uniform_, non_linear): + nn.init.xavier_uniform_(self.weight) if uniform_ is None else uniform_( + self.weight, non_linear=non_linear) + if bias: + nn.init.constant_(self.bias, 0.) + + def set_sample_config(self, sample_in_dim, sample_out_dim): + self.sample_in_dim = sample_in_dim + self.sample_out_dim = sample_out_dim + + self._sample_parameters() + + def _sample_parameters(self): + self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim) + self.samples['bias'] = self.bias + if self.bias is not None: + self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) + return self.samples + + def forward(self, x): + self.sample_parameters() + return F.linear(x, self.samples['weight'], self.samples['bias']) + + def calc_sampled_param_num(self): + assert 'weight' in self.samples.keys() + weight_numel = self.samples['weight'].numel() + + if self.samples['bias'] is not None: + bias_numel = self.samples['bias'].numel() + else: + bias_numel = 0 + + return weight_numel + bias_numel + + +def sample_weight(weight, sample_in_dim, sample_out_dim): + sample_weight = weight[:, :sample_in_dim] + sample_weight = sample_weight[:sample_out_dim, :] + + return sample_weight + + +def sample_bias(bias, sample_out_dim): + sample_bias = bias[:sample_out_dim] + + return sample_bias + +def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): + if not export and torch.cuda.is_available(): + try: + from apex.normalization import FusedLayerNorm + return FusedLayerNorm(normalized_shape, eps, elementwise_affine) + except ImportError: + pass + return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine) + + +class LayerNormSuper(torch.nn.LayerNorm): + def __init__(self, super_embed_dim): + super().__init__(super_embed_dim) + + # the largest embed dim + self.super_embed_dim = super_embed_dim + + # the current sampled embed dim + self.sample_embed_dim = None + + self.samples = {} + self.profiling = False + + def profile(self, mode=True): + self.profiling = mode + + def sample_parameters(self, resample=False): + if self.profiling or resample: + return self._sample_parameters() + return self.samples + + def _sample_parameters(self): + self.samples['weight'] = self.weight[:self.sample_embed_dim] + self.samples['bias'] = self.bias[:self.sample_embed_dim] + return self.samples + + def set_sample_config(self, sample_embed_dim): + self.sample_embed_dim = sample_embed_dim + self._sample_parameters() + + def forward(self, x): + self.sample_parameters() + return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps) + + def calc_sampled_param_num(self): + assert 'weight' in self.samples.keys() + assert 'bias' in self.samples.keys() + return self.samples['weight'].numel() + self.samples['bias'].numel() + + + +class MultiheadAttentionSuper(nn.Module): + """Multi-headed attention. + + See "Attention Is All You Need" for more details. + """ + + def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, super_vdim=None, dropout=0., bias=True, + add_bias_kv=False, add_zero_attn=False, self_attention=False, + encoder_decoder_attention=False, out_dim=None, qkv_dim=None): + super().__init__() + + + + # the configs of super arch + self.super_q_embed_dim = super_embed_dim + self.super_kv_embed_dim = None + + # the configs of current sampled arch + self.sample_q_embed_dim = None + self.sample_kv_embed_dim = None + + if super_kdim is not None: + assert super_kdim == super_vdim + self.super_kv_embed_dim = super_kdim + else: + self.super_kv_embed_dim = self.super_q_embed_dim + + if qkv_dim is None: + self.qkv_dim = self.super_q_embed_dim + else: + self.qkv_dim = qkv_dim + + # this qkv same dim means the input dim for qkv are the same, not the output dim + # self.qkv_same_dim = self.kdim == self.super_embed_dim and self.vdim == self.super_embed_dim + self.qkv_same_dim = self.super_kv_embed_dim == self.super_q_embed_dim + self.encoder = is_encoder + + # Caution! these actually are the sampled num_heads, head_dim and scaling + self.num_heads = num_heads + self.dropout = dropout + self.head_dim = self.qkv_dim // num_heads + assert self.head_dim * num_heads == self.qkv_dim, "qkv must be divisible by num_heads" + self.scaling = self.head_dim ** -0.5 + + self.self_attention = self_attention + self.encoder_decoder_attention = encoder_decoder_attention + + assert not self.self_attention or self.qkv_same_dim, 'Self-attention requires query, key and ' \ + 'value to be of the same size' + + if self.qkv_same_dim: + self.in_proj_weight = Parameter(torch.Tensor(3 * self.qkv_dim, self.super_q_embed_dim)) + else: + self.k_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_kv_embed_dim)) + self.v_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_kv_embed_dim)) + self.q_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_q_embed_dim)) + + if bias: + self.in_proj_bias = Parameter(torch.Tensor(3 * self.qkv_dim)) + else: + self.register_parameter('in_proj_bias', None) + + if out_dim is None: + out_dim = self.super_q_embed_dim + self.out_proj = LinearSuper(super_in_dim=self.qkv_dim, super_out_dim=out_dim, bias=bias) + + if add_bias_kv: + self.bias_k = Parameter(torch.Tensor(1, 1, self.super_q_embed_dim)) + self.bias_v = Parameter(torch.Tensor(1, 1, self.super_q_embed_dim)) + else: + self.bias_k = self.bias_v = None + + self.add_zero_attn = add_zero_attn + + + self.reset_parameters() + + self.onnx_trace = False + + self.enable_torch_version = False + if hasattr(F, "multi_head_attention_forward"): + self.enable_torch_version = True + else: + self.enable_torch_version = False + self.enable_torch_version = False + + def calc_sampled_param_num(self): + assert self.in_proj_weight is not None and self.in_proj_bias is not None + in_proj_q_weight_numel = self.sample_q_embed_dim * self.qkv_dim + in_proj_v_weight_numel = in_proj_k_weight_numel = self.sample_kv_embed_dim * self.qkv_dim + in_proj_bias_numel = self.in_proj_bias.numel() + + # does not count in the output proj because it will be counted in LinearSuper layer + # out_proj_weight_numel = self.qkv_dim * self.sample_q_embed_dim + # out_proj_bias_numel = self. + + return in_proj_q_weight_numel + in_proj_k_weight_numel + in_proj_v_weight_numel + in_proj_bias_numel + + + + + def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None): + self.sample_q_embed_dim = sample_q_embed_dim + if sample_kv_embed_dim is None: + self.sample_kv_embed_dim = sample_q_embed_dim + else: + self.sample_kv_embed_dim = sample_kv_embed_dim + + self.num_heads = sample_attention_heads + self.head_dim = self.qkv_dim // self.num_heads + assert self.head_dim * self.num_heads == self.qkv_dim, "qkv_dim must be divisible by sampled num_heads" + self.scaling = self.head_dim ** -0.5 + + self.out_proj.set_sample_config(sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim) + + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def reset_parameters(self): + if self.qkv_same_dim: + nn.init.xavier_uniform_(self.in_proj_weight) + else: + nn.init.xavier_uniform_(self.k_proj_weight) + nn.init.xavier_uniform_(self.v_proj_weight) + nn.init.xavier_uniform_(self.q_proj_weight) + + nn.init.xavier_uniform_(self.out_proj.weight) + if self.in_proj_bias is not None: + nn.init.constant_(self.in_proj_bias, 0.) + nn.init.constant_(self.out_proj.bias, 0.) + if self.bias_k is not None: + nn.init.xavier_normal_(self.bias_k) + if self.bias_v is not None: + nn.init.xavier_normal_(self.bias_v) + + + def forward(self, query, key, value, key_padding_mask=None, incremental_state=None, + need_weights=True, static_kv=False, attn_mask=None): + """Input shape: Time x Batch x Channel + + Timesteps can be masked by supplying a T x T mask in the + `attn_mask` argument. Padding elements can be excluded from + the key by passing a binary ByteTensor (`key_padding_mask`) with shape: + batch x src_len, where padding elements are indicated by 1s. + """ + + + tgt_len, bsz, embed_dim = query.size() + + if incremental_state is not None: + saved_state = self._get_input_buffer(incremental_state) + if 'prev_key' in saved_state: + # previous time steps are cached - no need to recompute + # key and value if they are static + if static_kv: + assert self.encoder_decoder_attention and not self.self_attention + key = value = None + else: + saved_state = None + + if self.self_attention: + # self-attention + q, k, v = self.in_proj_qkv(query) + elif self.encoder_decoder_attention: + # encoder-decoder attention + q = self.in_proj_q(query) + if key is None: + assert value is None + k = v = None + else: + k = self.in_proj_k(key) + v = self.in_proj_v(key) + + else: + q = self.in_proj_q(query) + k = self.in_proj_k(key) + v = self.in_proj_v(value) + + q *= self.scaling + + if self.bias_k is not None: + assert self.bias_v is not None + k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) + v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) + if attn_mask is not None: + attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1) + + q = q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1) + + if k is not None: + k = k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) + if v is not None: + v = v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1) + + if saved_state is not None: + # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) + if 'prev_key' in saved_state: + prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + k = prev_key + else: + k = torch.cat((prev_key, k), dim=1) + if 'prev_value' in saved_state: + prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim) + if static_kv: + v = prev_value + else: + v = torch.cat((prev_value, v), dim=1) + saved_state['prev_key'] = k.view(bsz, self.num_heads, -1, self.head_dim) + saved_state['prev_value'] = v.view(bsz, self.num_heads, -1, self.head_dim) + + self._set_input_buffer(incremental_state, saved_state) + + src_len = k.size(1) + + # This is part of a workaround to get around fork/join parallelism + # not supporting Optional types. + if key_padding_mask is not None and key_padding_mask.shape == torch.Size([]): + key_padding_mask = None + + if key_padding_mask is not None: + fil = key_padding_mask.new_ones(key_padding_mask.size(0), src_len-key_padding_mask.size(1)) + key_padding_mask = torch.cat((key_padding_mask, fil), dim=1) + assert key_padding_mask.size(0) == bsz + assert key_padding_mask.size(1) == src_len + + + if self.add_zero_attn: + src_len += 1 + k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + if attn_mask is not None: + attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + if key_padding_mask is not None: + key_padding_mask = torch.cat( + [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1) + + attn_weights = torch.bmm(q, k.transpose(1, 2)) + attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + + assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + + if attn_mask is not None: + attn_mask = attn_mask.unsqueeze(0) + if self.onnx_trace: + attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) + attn_weights += attn_mask + + if key_padding_mask is not None: + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + if self.onnx_trace: + attn_weights = torch.where( + key_padding_mask.unsqueeze(1).unsqueeze(2), + torch.Tensor([float("-Inf")]), + attn_weights.float() + ).type_as(attn_weights) + else: + attn_weights = attn_weights.masked_fill( + key_padding_mask.unsqueeze(1).unsqueeze(2), + float('-inf'), + ) + attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + + attn_weights = utils.softmax( + attn_weights, dim=-1, onnx_trace=self.onnx_trace, + ).type_as(attn_weights) + attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training) + + attn = torch.bmm(attn_weights, v) + + assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + + if (self.onnx_trace and attn.size(1) == 1): + # when ONNX tracing a single decoder step (sequence length == 1) + # the transpose is a no-op copy before view, thus unnecessary + attn = attn.contiguous().view(tgt_len, bsz, self.qkv_dim) + else: + attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.qkv_dim) + attn = self.out_proj(attn) + + if need_weights: + # average attention weights over heads + attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + + attn_weights = attn_weights.sum(dim=1) / self.num_heads + else: + attn_weights = None + + return attn, attn_weights + + def in_proj_qkv(self, query): + return self._in_proj(query, sample_dim=self.sample_q_embed_dim).chunk(3, dim=-1) + + def in_proj_q(self, query): + if self.qkv_same_dim: + return self._in_proj(query, end=self.qkv_dim, sample_dim=self.sample_q_embed_dim) + else: + bias = self.in_proj_bias + if bias is not None: + bias = bias[:self.qkv_dim] + return F.linear(query, self.q_proj_weight[..., :self.sample_q_embed_dim], bias) + + def in_proj_k(self, key): + if self.qkv_same_dim: + return self._in_proj(key, start=self.qkv_dim, end=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim) + else: + weight = self.k_proj_weight + bias = self.in_proj_bias + if bias is not None: + bias = bias[self.qkv_dim:2 * self.qkv_dim] + return F.linear(key, weight[..., :self.sample_kv_embed_dim], bias) + + def in_proj_v(self, value): + if self.qkv_same_dim: + return self._in_proj(value, start=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim) + else: + weight = self.v_proj_weight + bias = self.in_proj_bias + if bias is not None: + bias = bias[2 * self.qkv_dim:] + return F.linear(value, weight[..., :self.sample_kv_embed_dim], bias) + + def _in_proj(self, input, sample_dim, start=0, end=None): + weight = self.in_proj_weight + bias = self.in_proj_bias + weight = weight[start:end, :sample_dim] + if bias is not None: + bias = bias[start:end] + return F.linear(input, weight, bias) + + def reorder_incremental_state(self, incremental_state, new_order): + """Reorder buffered internal state (for incremental generation).""" + input_buffer = self._get_input_buffer(incremental_state) + if input_buffer is not None: + for k in input_buffer.keys(): + input_buffer[k] = input_buffer[k].index_select(0, new_order) + self._set_input_buffer(incremental_state, input_buffer) + + def _get_input_buffer(self, incremental_state): + return get_incremental_state( #utils. + self, + incremental_state, + 'attn_state', + ) or {} + + def _set_input_buffer(self, incremental_state, buffer): + set_incremental_state( + self, + incremental_state, + 'attn_state', + buffer, + ) + + def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz): + return attn_weights + + def __repr__(self): + # We treat the extra repr like the sub-module, one item per line + extra_lines = [] + extra_repr = self.extra_repr() + # empty string will be split into list [''] + if extra_repr: + extra_lines = extra_repr.split('\n') + child_lines = [] + for key, module in self._modules.items(): + mod_str = repr(module) + mod_str = _addindent(mod_str, 2) + child_lines.append('(' + key + '): ' + mod_str) + lines = extra_lines + child_lines + + main_str = self._get_name() + '\tnum_heads:' + str(self.num_heads) + '\t qkv_dim:' + str(self.qkv_dim) + if lines: + # simple one-liner info, which most builtin Modules will use + if len(extra_lines) == 1 and not child_lines: + main_str += extra_lines[0] + else: + main_str += '\n ' + '\n '.join(lines) + '\n' + + main_str += ')' + return main_str diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py new file mode 100644 index 00000000000..75d990f26dd --- /dev/null +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -0,0 +1,498 @@ +""" +Translate pre-processed data with a trained model. +""" +import torch + +from fairseq import checkpoint_utils, options, progress_bar, tasks, utils +from fairseq.meters import StopwatchMeter, TimeMeter +import sys +import pdb +import numpy as np +import subprocess +import os +from fairseq.data import dictionary +import csv +import json +import warnings +from .transformer_supernetwork import TransformerSuperNetwork + +import sys +import logging +import tqdm +import time +import copy +from datetime import datetime +import ctypes +import math +warnings.filterwarnings("ignore") + + +try: + from fairseq import libbleu +except ImportError as e: + import sys + sys.stderr.write('ERROR: missing libbleu.so. run `pip install --editable .`\n') + raise e + + +C = ctypes.cdll.LoadLibrary(libbleu.__file__) + + +class BleuStat(ctypes.Structure): + _fields_ = [ + ('reflen', ctypes.c_size_t), + ('predlen', ctypes.c_size_t), + ('match1', ctypes.c_size_t), + ('count1', ctypes.c_size_t), + ('match2', ctypes.c_size_t), + ('count2', ctypes.c_size_t), + ('match3', ctypes.c_size_t), + ('count3', ctypes.c_size_t), + ('match4', ctypes.c_size_t), + ('count4', ctypes.c_size_t), + ] + + +class Scorer(object): + def __init__(self, pad, eos, unk): + self.stat = BleuStat() + self.pad = pad + self.eos = eos + self.unk = unk + self.reset() + + def reset(self, one_init=False): + if one_init: + C.bleu_one_init(ctypes.byref(self.stat)) + else: + C.bleu_zero_init(ctypes.byref(self.stat)) + + def add(self, ref, pred): + if not isinstance(ref, torch.IntTensor): + raise TypeError('ref must be a torch.IntTensor (got {})' + .format(type(ref))) + if not isinstance(pred, torch.IntTensor): + raise TypeError('pred must be a torch.IntTensor(got {})' + .format(type(pred))) + + # don't match unknown words + rref = ref.clone() + assert not rref.lt(0).any() + rref[rref.eq(self.unk)] = -999 + + rref = rref.contiguous().view(-1) + pred = pred.contiguous().view(-1) + + C.bleu_add( + ctypes.byref(self.stat), + ctypes.c_size_t(rref.size(0)), + ctypes.c_void_p(rref.data_ptr()), + ctypes.c_size_t(pred.size(0)), + ctypes.c_void_p(pred.data_ptr()), + ctypes.c_int(self.pad), + ctypes.c_int(self.eos)) + + def score(self, order=4): + psum = sum(math.log(p) if p > 0 else float('-Inf') + for p in self.precision()[:order]) + return self.brevity() * math.exp(psum / order) * 100 + + def precision(self): + def ratio(a, b): + return a / b if b > 0 else 0 + + return [ + ratio(self.stat.match1, self.stat.count1), + ratio(self.stat.match2, self.stat.count2), + ratio(self.stat.match3, self.stat.count3), + ratio(self.stat.match4, self.stat.count4), + ] + + def brevity(self): + r = self.stat.reflen / self.stat.predlen + return min(1, math.exp(1 - r)) + + def result_string(self, order=4): + assert order <= 4, "BLEU scores for order > 4 aren't supported" + fmt = 'BLEU{} = {:2.2f}, {:2.1f}' + for _ in range(1, order): + fmt += '/{:2.1f}' + fmt += ' (BP={:.3f}, ratio={:.3f}, syslen={}, reflen={})' + bleup = [p * 100 for p in self.precision()[:order]] + return fmt.format(order, self.score(order=order), *bleup, + self.brevity(), self.stat.predlen/self.stat.reflen, + self.stat.predlen, self.stat.reflen) + + +def get_bleu_score(args,ref,sys): + dict = dictionary.Dictionary() + order =4 + sacrebleu = False + sentence_bleu = False + ignore_case = False + def readlines(fd): + for line in fd.readlines(): + if ignore_case: + yield line.lower() + else: + yield line + + + if sentence_bleu: + def score(fdsys): + with open(ref) as fdref: + scorer = Scorer(dict.pad(), dict.eos(), dict.unk()) + for i, (sys_tok, ref_tok) in enumerate(zip(readlines(fdsys), readlines(fdref))): + scorer.reset(one_init=True) + sys_tok = dict.encode_line(sys_tok) + ref_tok = dict.encode_line(ref_tok) + scorer.add(ref_tok, sys_tok) + print(i, scorer.result_string(order)) + else: + def score(fdsys): + with open(ref) as fdref: + scorer = Scorer(dict.pad(), dict.eos(), dict.unk()) + for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)): + sys_tok = dict.encode_line(sys_tok) + ref_tok = dict.encode_line(ref_tok) + scorer.add(ref_tok, sys_tok) + print(scorer.result_string(order)) + return(scorer.score(order)) + + + if sys == '-': + score = score(sys.stdin) + else: + with open(sys, 'r') as f: + score = score(f) + return score + +def compute_bleu(config,dataset_path,checkpoint_path): + + parser = options.get_generation_parser() + + args = options.parse_args_and_arch(parser,[dataset_path]) + + args.data = dataset_path + args.beam = 5 + args.remove_bpe = '@@ ' + args.gen_subset = 'test' + args.lenpen = 0.6 + args.source_lang = 'en' + args.target_lang = 'de' + args.batch_size = 128 + utils.import_user_module(args) + max_tokens = 12000 + + + use_cuda = torch.cuda.is_available() and not args.cpu + + # when running on CPU, use fp32 as default + if not use_cuda: + args.fp16 = False + + torch.manual_seed(args.seed) + + # Optimize ensemble for generation + # Load dataset splits + task = tasks.setup_task(args) + task.load_dataset(args.gen_subset) + # Set dictionaries + try: + src_dict = getattr(task, 'source_dictionary', None) + except NotImplementedError: + src_dict = None + tgt_dict = task.target_dictionary + + + # Load ensemble + print('| loading model(s) from {}'.format(args.path)) + model = TransformerSuperNetwork(task) + state = torch.load(checkpoint_path,map_location=torch.device('cpu')) + + model.load_state_dict(state['model'], + strict=True) + + if use_cuda: + model.cuda() + print(config) + model.set_sample_config(config) + model.make_generation_fast_( + beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, + need_attn=args.print_alignment, + ) + if args.fp16: + model.half() + if use_cuda: + model.cuda() + + print(args.path, file=sys.stderr) + + # Load alignment dictionary for unknown word replacement + # (None if no unknown word replacement, empty if no path to align dictionary) + align_dict = utils.load_align_dict(args.replace_unk) + + # Load dataset (possibly sharded) + itr = task.get_batch_iterator( + dataset=task.dataset(args.gen_subset), + max_tokens=args.max_tokens, + max_sentences=128, + max_positions=utils.resolve_max_positions( + task.max_positions(), + *[model.max_positions()] + ), + ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, + required_batch_size_multiple=args.required_batch_size_multiple, + num_shards=args.num_shards, + shard_id=args.shard_id, + num_workers=args.num_workers, + ).next_epoch_itr(shuffle=False) + + # Initialize generator + gen_timer = StopwatchMeter() + generator = task.build_generator([model],args) + + num_sentences = 0 + has_target = True + decoder_times_all = [] + input_len_all = [] + with open('translations_out.txt','a') as fname_translations: + with progress_bar.build_progress_bar(args, itr) as t: + wps_meter = TimeMeter() + for sample in t: + + sample = utils.move_to_cuda(sample) if use_cuda else sample + if 'net_input' not in sample: + continue + + prefix_tokens = None + if args.prefix_size > 0: + prefix_tokens = sample['target'][:, :args.prefix_size] + + gen_timer.start() + hypos = task.inference_step(generator, [model], sample, prefix_tokens) + input_len_all.append(np.mean(sample['net_input']['src_lengths'].cpu().numpy())) + num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos) + gen_timer.stop(num_generated_tokens) + + for i, sample_id in enumerate(sample['id'].tolist()): + has_target = sample['target'] is not None + + # Remove padding + src_tokens = utils.strip_pad(sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) + target_tokens = None + if has_target: + target_tokens = utils.strip_pad(sample['target'][i, :], tgt_dict.pad()).int().cpu() + + # Either retrieve the original sentences or regenerate them from tokens. + if align_dict is not None: + src_str = task.dataset(args.gen_subset).src.get_original_text(sample_id) + target_str = task.dataset(args.gen_subset).tgt.get_original_text(sample_id) + else: + if src_dict is not None: + src_str = src_dict.string(src_tokens, args.remove_bpe) + else: + src_str = "" + if has_target: + target_str = tgt_dict.string(target_tokens, args.remove_bpe, escape_unk=True) + + if not args.quiet: + if src_dict is not None: + #print('S-{}\t{}'.format(sample_id, src_str)) + fname_translations.write('S-{}\t{}'.format(sample_id, src_str)) + fname_translations.write('\n') + + if has_target: + #print('T-{}\t{}'.format(sample_id, target_str)) + fname_translations.write('T-{}\t{}'.format(sample_id, target_str)) + fname_translations.write('\n') + + # Process top predictions + for j, hypo in enumerate(hypos[i][:args.nbest]): + hypo_tokens, hypo_str, alignment = utils.post_process_prediction( + hypo_tokens=hypo['tokens'].int().cpu(), + src_str=src_str, + alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, + align_dict=align_dict, + tgt_dict=tgt_dict, + remove_bpe=args.remove_bpe, + ) + + if not args.quiet: + + fname_translations.write('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) + fname_translations.write('\n') + fname_translations.write('P-{}\t{}'.format( + sample_id, + ' '.join(map( + lambda x: '{:.4f}'.format(x), + hypo['positional_scores'].tolist(), + )) + )) + fname_translations.write('\n') + + if args.print_alignment: + fname_translations.write('A-{}\t{}'.format( + sample_id, + ' '.join(map(lambda x: str(utils.item(x)), alignment)) + )) + fname_translations.write('\n') + + wps_meter.update(num_generated_tokens) + t.log({'wps': round(wps_meter.avg)}) + num_sentences += sample['nsentences'] + + + os.system("grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt") + os.system("grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt") + bleu_score = get_bleu_score(args,"ref.txt","sys.txt") + print(bleu_score) + + os.system("rm ref.txt") + os.system("rm sys.txt") + os.system("rm translations_out.txt") + return bleu_score + +def compute_latency(config,dataset_path,get_model_parameters=False): + parser = options.get_generation_parser() + + args = options.parse_args_and_arch(parser,[dataset_path]) + + args.data = dataset_path + args.beam = 5 + args.remove_bpe = '@@ ' + args.gen_subset = 'test' + args.lenpen = 0.6 + args.source_lang = 'en' + args.target_lang = 'de' + args.batch_size = 128 + utils.import_user_module(args) + max_tokens = 12000 + args.latgpu=False + args.latcpu=True + args.latiter=100 + + # Initialize CUDA and distributed training + if torch.cuda.is_available() and not args.cpu: + torch.cuda.set_device(args.device_id) + torch.manual_seed(args.seed) + + #Optimize ensemble for generation + # Load dataset splits + task = tasks.setup_task(args) + task.load_dataset(args.gen_subset) + # Set dictionaries + try: + src_dict = getattr(task, 'source_dictionary', None) + except NotImplementedError: + src_dict = None + tgt_dict = task.target_dictionary + + + # Load ensemble + print('| loading model(s) from {}'.format(args.path)) + model = TransformerSuperNetwork(task) + + # specify the length of the dummy input for profile + # for iwslt, the average length is 23, for wmt, that is 30 + dummy_sentence_length_dict = {'iwslt': 23, 'wmt': 30} + + dummy_sentence_length = dummy_sentence_length_dict['wmt'] + + + dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1) + dummy_prev = [7] * (dummy_sentence_length - 1) + [2] + + src_tokens_test = torch.tensor([dummy_src_tokens], dtype=torch.long)#.cuda() + src_lengths_test = torch.tensor([dummy_sentence_length])#.cuda() + prev_output_tokens_test_with_beam = torch.tensor([dummy_prev] * args.beam, dtype=torch.long)#.cuda() + bsz = 1 + new_order = torch.arange(bsz).view(-1, 1).repeat(1, args.beam).view(-1).long()#.cuda() + if args.latcpu: + model.cpu() + print('Measuring model latency on CPU for dataset generation...') + elif args.latgpu: + model.cuda() + src_tokens_test = src_tokens_test#.cuda() + src_lengths_test = src_lengths_test#.cuda() + prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam#.cuda() + print('Measuring model latency on GPU for dataset generation...') + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + + + model.set_sample_config(config) + + model.eval() + + with torch.no_grad(): + + # dry runs + for _ in range(15): + encoder_out_test = model.encoder(src_tokens=src_tokens_test, src_lengths=src_lengths_test) + + encoder_latencies = [] + print('Measuring encoder for dataset generation...') + for _ in range(args.latiter): + if args.latgpu: + #start.record() + start = time.time() + elif args.latcpu: + start = time.time() + + model.encoder(src_tokens=src_tokens_test, src_lengths=src_lengths_test) + + if args.latgpu: + end = time.time() + encoder_latencies.append((end - start) * 1000) + elif args.latcpu: + end = time.time() + encoder_latencies.append((end - start) * 1000) + + encoder_latencies.sort() + encoder_latencies = encoder_latencies[int(args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] + print(f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; \t Std: {np.std(encoder_latencies)} ms') + + + encoder_out_test_with_beam = model.encoder.reorder_encoder_out(encoder_out_test, new_order) + + # dry runs + for _ in range(15): + model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam, + encoder_out=encoder_out_test_with_beam) + + # decoder is more complicated because we need to deal with incremental states and auto regressive things + decoder_iterations_dict = {'iwslt': 23, 'wmt': 30} + + decoder_iterations = decoder_iterations_dict['wmt'] + print(decoder_iterations) + decoder_latencies = [] + print('Measuring decoder for dataset generation...') + for _ in range(args.latiter): + if args.latgpu: + start = time.time() + #start.record() + elif args.latcpu: + start = time.time() + incre_states = {} + for k_regressive in range(decoder_iterations): + model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam[:, :k_regressive + 1], + encoder_out=encoder_out_test_with_beam, incremental_state=incre_states) + if args.latgpu: + end = time.time() + decoder_latencies.append((end - start) * 1000) + + elif args.latcpu: + end = time.time() + decoder_latencies.append((end - start) * 1000) + + # only use the 10% to 90% latencies to avoid outliers + decoder_latencies.sort() + decoder_latencies = decoder_latencies[int(args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] + + print(decoder_latencies) + print(f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms') + + lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies) + lat_std = np.std(encoder_latencies)+np.std(decoder_latencies) + return lat_mean, lat_std diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py new file mode 100644 index 00000000000..58f29a94aa3 --- /dev/null +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py @@ -0,0 +1,992 @@ +import torch +from torch import nn +from torch.nn import Parameter +import torch.nn.functional as F + +from fairseq import utils +from fairseq import options, utils +from fairseq.models import ( + FairseqEncoder, + FairseqIncrementalDecoder, + BaseFairseqModel +) + +from fairseq.modules import (PositionalEmbedding,SinusoidalPositionalEmbedding) +from .modules_supernetwork import ( + + MultiheadAttentionSuper, + EmbeddingSuper, + LinearSuper, + LayerNormSuper + +) +import math + +DEFAULT_MAX_SOURCE_POSITIONS = 1024 +DEFAULT_MAX_TARGET_POSITIONS = 1024 +class TransformerSuperNetwork(BaseFairseqModel): + """ + Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017) + `_. + + Args: + encoder (TransformerEncoder): the encoder + decoder (TransformerDecoder): the decoder + + The Transformer model provides the following named architectures and + command-line arguments: + + .. argparse:: + :ref: fairseq.models.transformer_parser + :prog: + """ + + + def __init__(self,task): + super().__init__() + + src_dict, tgt_dict = task.source_dictionary, task.target_dictionary + encoder_config ={'encoder_embed_dim': 640, + 'encoder_layers': 6, + 'encoder_attention_heads': 8, + 'encoder_ffn_embed_dim':3072, + 'encoder_embed_path': None} + + decoder_config ={'decoder_embed_dim': 640, + 'decoder_layers': 6, + 'decoder_attention_heads': 8, + 'decoder_ffn_embed_dim':3072} + + encoder_embed_tokens = self.build_embedding( + src_dict, encoder_config['encoder_embed_dim'], encoder_config['encoder_embed_path'] + ) + decoder_embed_tokens = encoder_embed_tokens + self.share_decoder_input_output_embed = True + + self.encoder = TransformerEncoder(encoder_config, src_dict, encoder_embed_tokens) + self.decoder = TransformerDecoder(decoder_config, tgt_dict, decoder_embed_tokens) + + def build_embedding(self,dictionary, embed_dim, path=None): + num_embeddings = len(dictionary) + padding_idx = dictionary.pad() + emb = Embedding(num_embeddings, embed_dim, padding_idx) + # if provided, load from preloaded dictionaries + if path: + embed_dict = utils.parse_embedding(path) + utils.load_embedding(embed_dict, dictionary, emb) + return emb + + def profile(self, mode=True): + for module in self.modules(): + if hasattr(module, 'profile') and self != module: + module.profile(mode) + + def get_sampled_params_numel(self, config): + self.set_sample_config(config) + numels = [] + for name, module in self.named_modules(): + if hasattr(module, 'calc_sampled_param_num'): + # a hacky way to skip the layers that exceed encoder-layer-num or decoder-layer-num + if name.split('.')[0] == 'encoder' and eval(name.split('.')[2]) >= config['encoder']['encoder_layer_num']: + continue + if name.split('.')[0] == 'decoder' and eval(name.split('.')[2]) >= config['decoder']['decoder_layer_num']: + continue + + numels.append(module.calc_sampled_param_num()) + return sum(numels) + + def set_sample_config(self, config): + self.encoder.set_sample_config(config) + self.decoder.set_sample_config(config) + + +class TransformerEncoder(FairseqEncoder): + """ + Transformer encoder consisting of *args.encoder_layers* layers. Each layer + is a :class:`TransformerEncoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): encoding dictionary + embed_tokens (torch.nn.Embedding): input embedding + """ + + def __init__(self, encoder_config, dictionary, embed_tokens): + super().__init__(dictionary) + # the configs of super arch + self.super_embed_dim = encoder_config['encoder_embed_dim'] + self.super_ffn_embed_dim = [encoder_config['encoder_ffn_embed_dim']] * encoder_config['encoder_layers'] + self.super_layer_num = encoder_config['encoder_layers'] + self.super_self_attention_heads = [encoder_config['encoder_attention_heads']] * encoder_config['encoder_layers'] + + self.super_dropout = 0.3 + self.super_activation_dropout = 0 + + self.super_embed_scale = math.sqrt(self.super_embed_dim) + + # the configs of current sampled arch + self.sample_embed_dim = None + self.sample_ffn_embed_dim = None + self.sample_layer_num = None + self.sample_self_attention_heads = None + + self.sample_dropout = None + self.sample_activation_dropout = None + + self.sample_embed_scale = None + + self.register_buffer('version', torch.Tensor([3])) + + self.padding_idx = embed_tokens.padding_idx + self.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS + + self.embed_tokens = embed_tokens + + self.embed_positions = PositionalEmbedding( + self.max_source_positions, self.super_embed_dim, self.padding_idx, + learned= False, + ) + + self.layers = nn.ModuleList([]) + self.layers.extend([ + TransformerEncoderLayer(encoder_config, layer_idx=i) + for i in range(self.super_layer_num) + ]) + + if False: + self.layer_norm = LayerNormSuper(self.super_embed_dim) + else: + self.layer_norm = None + + self.vocab_original_scaling = False + + + def set_sample_config(self, config:dict): + + self.sample_embed_dim = config['encoder']['encoder_embed_dim'] + + # Caution: this is a list for all layers + self.sample_ffn_embed_dim = config['encoder']['encoder_ffn_embed_dim'] + + self.sample_layer_num = config['encoder']['encoder_layer_num'] + + # Caution: this is a list for all layers + self.sample_self_attention_heads = config['encoder']['encoder_self_attention_heads'] + + self.sample_dropout = calc_dropout(self.super_dropout, self.sample_embed_dim, self.super_embed_dim) + self.sample_activation_dropout = calc_dropout(self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim) + + self.sample_embed_scale = math.sqrt(self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale + + self.embed_tokens.set_sample_config(sample_embed_dim=self.sample_embed_dim, part='encoder') + + if self.layer_norm is not None: + self.layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + + for i, layer in enumerate(self.layers): + # not exceed sample layer number + if i < self.sample_layer_num: + layer.set_sample_config(is_identity_layer=False, + sample_embed_dim=self.sample_embed_dim, + sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i], + sample_self_attention_heads_this_layer=self.sample_self_attention_heads[i], + sample_dropout=self.sample_dropout, + sample_activation_dropout=self.sample_activation_dropout) + # exceeds sample layer number + else: + layer.set_sample_config(is_identity_layer=True) + + + def forward(self, src_tokens, src_lengths): + """ + Args: + src_tokens (LongTensor): tokens in the source language of shape + `(batch, src_len)` + src_lengths (torch.LongTensor): lengths of each source sentence of + shape `(batch)` + + Returns: + dict: + - **encoder_out** (Tensor): the last encoder layer's output of + shape `(src_len, batch, embed_dim)` + - **encoder_padding_mask** (ByteTensor): the positions of + padding elements of shape `(batch, src_len)` + """ + # embed tokens and positions + x = self.sample_embed_scale * self.embed_tokens(src_tokens, part='encoder') + if self.embed_positions is not None: + positions = self.embed_positions(src_tokens) + + # sample the positional embedding and add + x += positions[..., :self.sample_embed_dim] + x = F.dropout(x, p=self.sample_dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + + # compute padding mask + encoder_padding_mask = src_tokens.eq(self.padding_idx) + if not encoder_padding_mask.any(): + encoder_padding_mask = None + + all_x = [] + # encoder layers + for layer in self.layers: + # print(x.shape) + x = layer(x, encoder_padding_mask) + all_x.append(x) + + + if self.layer_norm: + x = self.layer_norm(x) + + return { + 'encoder_out': x, + 'encoder_out_all' : all_x, + 'encoder_padding_mask': encoder_padding_mask, + } + + def reorder_encoder_out(self, encoder_out, new_order): + """ + Reorder encoder output according to *new_order*. + + Args: + encoder_out: output from the ``forward()`` method + new_order (LongTensor): desired order + + Returns: + *encoder_out* rearranged according to *new_order* + """ + if encoder_out['encoder_out'] is not None: + encoder_out['encoder_out'] = \ + encoder_out['encoder_out'].index_select(1, new_order) + if encoder_out['encoder_padding_mask'] is not None: + encoder_out['encoder_padding_mask'] = \ + encoder_out['encoder_padding_mask'].index_select(0, new_order) + # need to reorder each layer of output + if 'encoder_out_all' in encoder_out.keys(): + new_encoder_out_all = [] + for encoder_out_one_layer in encoder_out['encoder_out_all']: + new_encoder_out_all.append(encoder_out_one_layer.index_select(1, new_order)) + encoder_out['encoder_out_all'] = new_encoder_out_all + + return encoder_out + + def max_positions(self): + """Maximum input length supported by the encoder.""" + if self.embed_positions is None: + return self.max_source_positions + return min(self.max_source_positions, self.embed_positions.max_positions()) + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = '{}.embed_positions.weights'.format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict['{}.embed_positions._float_tensor'.format(name)] = torch.FloatTensor(1) + for i in range(len(self.layers)): + # update layer norms + self.layers[i].upgrade_state_dict_named(state_dict, "{}.layers.{}".format(name, i)) + + version_key = '{}.version'.format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + return state_dict + + +class TransformerDecoder(FairseqIncrementalDecoder): + """ + Transformer decoder consisting of *args.decoder_layers* layers. Each layer + is a :class:`TransformerDecoderLayer`. + + Args: + args (argparse.Namespace): parsed command-line arguments + dictionary (~fairseq.data.Dictionary): decoding dictionary + embed_tokens (torch.nn.Embedding): output embedding + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=False): + super().__init__(dictionary) + + # the configs of super arch + self.super_embed_dim = decoder_config['decoder_embed_dim'] + self.super_ffn_embed_dim = decoder_config['decoder_ffn_embed_dim'] * decoder_config['decoder_layers'] + self.super_layer_num = decoder_config['decoder_layers'] + self.super_self_attention_heads = 8*[decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers'] + self.super_ende_attention_heads = [decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers'] + self.super_arbitrary_ende_attn = [-1] * decoder_config['decoder_layers'] + + self.super_dropout = 0.3 + self.super_activation_dropout = 0.0 + + self.super_embed_scale = math.sqrt(self.super_embed_dim) + + # the configs of current sampled arch + self.sample_embed_dim = None + self.sample_ffn_embed_dim = None + self.sample_layer_num = None + self.sample_self_attention_heads = None + self.sample_ende_attention_heads = None + self.sample_arbitrary_ende_attn = None + + self.sample_dropout = None + self.sample_activation_dropout = None + + self.sample_embed_scale = None + + + # the configs of current sampled arch + self.register_buffer('version', torch.Tensor([3])) + + self.share_input_output_embed = True + + self.output_embed_dim = decoder_config['decoder_embed_dim'] + + padding_idx = embed_tokens.padding_idx + self.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS + + self.embed_tokens = embed_tokens + + + self.embed_positions = PositionalEmbedding( + self.max_target_positions, self.super_embed_dim, padding_idx, + learned=False, + ) if not False else None + + self.layers = nn.ModuleList([]) + self.layers.extend([ + TransformerDecoderLayer(decoder_config, layer_idx=i, no_encoder_attn=no_encoder_attn) + for i in range(self.super_layer_num) + ]) + + self.adaptive_softmax = None + + self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \ + if self.super_embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None + + if False:# args.adaptive_softmax_cutoff is not None: + self.adaptive_softmax = AdaptiveSoftmax( + len(dictionary), + self.output_embed_dim, + options.eval_str_list(args.adaptive_softmax_cutoff, type=int), + dropout=args.adaptive_softmax_dropout, + adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None, + factor=args.adaptive_softmax_factor, + tie_proj=args.tie_adaptive_proj, + ) + elif not self.share_input_output_embed: + self.embed_out = nn.Parameter(torch.Tensor(len(dictionary), self.output_embed_dim)) + nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim ** -0.5) + + self.layer_norm = None + self.get_attn = False + + self.vocab_original_scaling = False + + def set_sample_config(self, config:dict): + + self.sample_embed_dim = config['decoder']['decoder_embed_dim'] + self.sample_encoder_embed_dim = config['encoder']['encoder_embed_dim'] + + # Caution: this is a list for all layers + self.sample_ffn_embed_dim = config['decoder']['decoder_ffn_embed_dim'] + + # Caution: this is a list for all layers + self.sample_self_attention_heads = config['decoder']['decoder_self_attention_heads'] + + # Caution: this is a list for all layers + self.sample_ende_attention_heads = config['decoder']['decoder_ende_attention_heads'] + + self.sample_arbitrary_ende_attn = config['decoder']['decoder_arbitrary_ende_attn'] + + self.sample_layer_num = config['decoder']['decoder_layer_num'] + + self.sample_dropout = calc_dropout(self.super_dropout, self.sample_embed_dim, self.super_embed_dim) + self.sample_activation_dropout = calc_dropout(self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim) + + self.sample_embed_scale = math.sqrt(self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale + + self.embed_tokens.set_sample_config(sample_embed_dim=self.sample_embed_dim, part='decoder') + + if self.layer_norm is not None: + self.layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + + for i, layer in enumerate(self.layers): + # not exceed sample layer number + if i < self.sample_layer_num: + layer.set_sample_config(is_identity_layer=False, + sample_embed_dim=self.sample_embed_dim, + sample_encoder_embed_dim=self.sample_encoder_embed_dim, + sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i], + sample_self_attention_heads_this_layer=self.sample_self_attention_heads[i], + sample_ende_attention_heads_this_layer=self.sample_ende_attention_heads[i], + sample_dropout=self.sample_dropout, + sample_activation_dropout=self.sample_activation_dropout) + # exceeds sample layer number + else: + layer.set_sample_config(is_identity_layer=True) + + + + def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused): + """ + Args: + prev_output_tokens (LongTensor): previous decoder outputs of shape + `(batch, tgt_len)`, for teacher forcing + encoder_out (Tensor, optional): output from the encoder, used for + encoder-side attention + incremental_state (dict): dictionary used for storing state during + :ref:`Incremental decoding` + + Returns: + tuple: + - the decoder's output of shape `(batch, tgt_len, vocab)` + - a dictionary with any model-specific outputs + """ + x, extra = self.extract_features(prev_output_tokens, encoder_out, incremental_state) + x = self.output_layer(x) + return x, extra + + def extract_features(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused): + """ + Similar to *forward* but only return features. + + Returns: + tuple: + - the decoder's features of shape `(batch, tgt_len, embed_dim)` + - a dictionary with any model-specific outputs + """ + # embed positions + positions = self.embed_positions( + prev_output_tokens, + incremental_state=incremental_state, + ) if self.embed_positions is not None else None + + if positions is not None: + positions = positions[..., :self.sample_embed_dim] + + if incremental_state is not None: + # only take the last token in to the decoder + prev_output_tokens = prev_output_tokens[:, -1:] + if positions is not None: + positions = positions[:, -1:] + + # embed tokens and positions + x = self.sample_embed_scale * self.embed_tokens(prev_output_tokens, part='decoder') + + if positions is not None: + x += positions + x = F.dropout(x, p=self.sample_dropout, training=self.training) + + # B x T x C -> T x B x C + x = x.transpose(0, 1) + attn = None + attns = [] + inner_states = [x] + + # decoder layers + for i, layer in enumerate(self.layers): + encoder_out_feed = None + encoder_padding_mask_feed = None + + if encoder_out is not None: + # only use the last layer + if i >= self.sample_layer_num or self.sample_arbitrary_ende_attn[i] == -1: + encoder_out_feed = encoder_out['encoder_out'] + # concat one second last output layer + elif self.sample_arbitrary_ende_attn[i] == 1: + encoder_out_feed = torch.cat([encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0) + elif self.sample_arbitrary_ende_attn[i] == 2: + encoder_out_feed = torch.cat([encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2], encoder_out['encoder_out_all'][-3]], dim=0) + else: + raise NotImplementedError("arbitrary_ende_attn should in [-1, 1, 2]") + + if encoder_out['encoder_padding_mask'] is not None: + if i >= self.sample_layer_num or self.sample_arbitrary_ende_attn[i] == -1: + encoder_padding_mask_feed = encoder_out['encoder_padding_mask'] + # concat one more + elif self.sample_arbitrary_ende_attn[i] == 1: + encoder_padding_mask_feed = torch.cat([encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1) + # concat two more + elif self.sample_arbitrary_ende_attn[i] == 2: + encoder_padding_mask_feed = torch.cat([encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1) + else: + raise NotImplementedError("arbitrary_ende_attn should in [-1, 1, 2]") + + + x, attn = layer( + x, + encoder_out_feed, + encoder_padding_mask_feed, + incremental_state, + self_attn_mask=self.buffered_future_mask(x) if incremental_state is None else None, + ) + inner_states.append(x) + attns.append(attn) + + + if self.layer_norm: + x = self.layer_norm(x) + + # T x B x C -> B x T x C + x = x.transpose(0, 1) + + if self.project_out_dim is not None: + x = self.project_out_dim(x) + if not self.get_attn: + attns = attns[-1] + return x, {'attn': attns, 'inner_states': inner_states} + + def output_layer(self, features, **kwargs): + """Project features to the vocabulary size.""" + if self.adaptive_softmax is None: + # project back to size of vocabulary + if self.share_input_output_embed: + return F.linear(features, self.embed_tokens.sampled_weight('decoder')) + else: + return F.linear(features, self.embed_out[:, :self.sample_embed_dim]) + else: + return features + + def max_positions(self): + """Maximum output length supported by the decoder.""" + if self.embed_positions is None: + return self.max_target_positions + import ipdb;ipdb.set_trace() + return min(self.max_target_positions, self.embed_positions.max_positions()) + + def buffered_future_mask(self, tensor): + dim = tensor.size(0) + if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device or self._future_mask.size(0) < dim: + self._future_mask = torch.triu(utils.fill_with_neg_inf(tensor.new(dim, dim)), 1) + return self._future_mask[:dim, :dim] + + def upgrade_state_dict_named(self, state_dict, name): + """Upgrade a (possibly old) state dict for new versions of fairseq.""" + if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + weights_key = '{}.embed_positions.weights'.format(name) + if weights_key in state_dict: + del state_dict[weights_key] + state_dict['{}.embed_positions._float_tensor'.format(name)] = torch.FloatTensor(1) + + for i in range(len(self.layers)): + # update layer norms + layer_norm_map = { + '0': 'self_attn_layer_norm', + '1': 'encoder_attn_layer_norm', + '2': 'final_layer_norm' + } + for old, new in layer_norm_map.items(): + for m in ('weight', 'bias'): + k = '{}.layers.{}.layer_norms.{}.{}'.format(name, i, old, m) + if k in state_dict: + state_dict['{}.layers.{}.{}.{}'.format(name, i, new, m)] = state_dict[k] + del state_dict[k] + + version_key = '{}.version'.format(name) + if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) <= 2: + # earlier checkpoints did not normalize after the stack of layers + self.layer_norm = None + self.normalize = False + state_dict[version_key] = torch.Tensor([1]) + + return state_dict + + +class TransformerEncoderLayer(nn.Module): + """Encoder layer block. + + In the original paper each operation (multi-head attention or FFN) is + postprocessed with: `dropout -> add residual -> layernorm`. In the + tensor2tensor code they suggest that learning is more robust when + preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *args.encoder_normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + """ + + def __init__(self, encoder_config, layer_idx): + super().__init__() + + # the configs of super arch + self.super_embed_dim = encoder_config['encoder_embed_dim'] + self.super_ffn_embed_dim_this_layer = encoder_config['encoder_ffn_embed_dim'] + self.super_self_attention_heads_this_layer = encoder_config['encoder_attention_heads'] + + self.super_dropout = 0.3 + self.super_activation_dropout =0 + + # the configs of current sampled arch + self.sample_embed_dim = None + self.sample_ffn_embed_dim_this_layer = None + self.sample_self_attention_heads_this_layer = None + + self.sample_dropout = None + self.sample_activation_dropout = None + + self.is_identity_layer = None + + self.qkv_dim= 512 + + + self.self_attn = MultiheadAttentionSuper( + super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer, is_encoder=True, + dropout=0.1, self_attention=True, qkv_dim=self.qkv_dim + ) + + self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim) + self.dropout = 0.1 + self.activation_fn = utils.get_activation_fn( + activation='relu' + ) + self.normalize_before = False + + self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer, uniform_=None, non_linear='relu') #init.uniform_ + self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear') + self.final_layer_norm = LayerNormSuper(self.super_embed_dim) + + + def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None): + + if is_identity_layer: + self.is_identity_layer = True + return + + self.is_identity_layer = False + + self.sample_embed_dim = sample_embed_dim + self.sample_ffn_embed_dim_this_layer = sample_ffn_embed_dim_this_layer + self.sample_self_attention_heads_this_layer = sample_self_attention_heads_this_layer + + self.sample_dropout = sample_dropout + self.sample_activation_dropout = sample_activation_dropout + + self.self_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + + self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer) + + self.fc1.set_sample_config(sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) + self.fc2.set_sample_config(sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim) + + self.final_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + + + def upgrade_state_dict_named(self, state_dict, name): + """ + Rename layer norm states from `...layer_norms.0.weight` to + `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to + `...final_layer_norm.weight` + """ + layer_norm_map = { + '0': 'self_attn_layer_norm', + '1': 'final_layer_norm' + } + for old, new in layer_norm_map.items(): + for m in ('weight', 'bias'): + k = '{}.layer_norms.{}.{}'.format(name, old, m) + if k in state_dict: + state_dict[ + '{}.{}.{}'.format(name, new, m) + ] = state_dict[k] + del state_dict[k] + + def forward(self, x, encoder_padding_mask, attn_mask=None): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, src_len)` where padding elements are indicated by ``1``. + attn_mask (ByteTensor): binary tensor of shape (T_tgt, T_src), where + T_tgt is the length of query, while T_src is the length of key, + though here both query and key is x here, + attn_mask[t_tgt, t_src] = 1 means when calculating embedding + for t_tgt, t_src is excluded (or masked out), =0 means it is + included in attention + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if self.is_identity_layer: + return x + residual = x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True) + if attn_mask is not None: + attn_mask = attn_mask.masked_fill(attn_mask.byte(), -1e8) + # anything in original attn_mask = 1, becomes -1e8 + # anything in original attn_mask = 0, becomes 0 + # Note that we cannot use -inf here, because at some edge cases, + # the attention weight (before softmax) for some padded element in query + # will become -inf, which results in NaN in model parameters + # TODO: to formally solve this problem, we need to change fairseq's + # MultiheadAttention. We will do this later on. + x, _ = self.self_attn(query=x, key=x, value=x, key_padding_mask=encoder_padding_mask) + x = F.dropout(x, p=self.dropout, training=self.training) + x[:residual.size(0),:,:] = residual + x[:residual.size(0),:,:] + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True) + + residual = x + x = self.maybe_layer_norm(self.final_layer_norm, x, before=True) + x = self.activation_fn(self.fc1(x)) + x = F.dropout(x, p=self.sample_activation_dropout, training=self.training) + x = self.fc2(x) + x = F.dropout(x, p=self.sample_dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.final_layer_norm, x, after=True) + return x + + def maybe_layer_norm(self, layer_norm, x, before=False, after=False): + assert before ^ after + if after ^ self.normalize_before: + return layer_norm(x) + else: + return x + + +class TransformerDecoderLayer(nn.Module): + """Decoder layer block. + + In the original paper each operation (multi-head attention, encoder + attention or FFN) is postprocessed with: `dropout -> add residual -> + layernorm`. In the tensor2tensor code they suggest that learning is more + robust when preprocessing each layer with layernorm and postprocessing with: + `dropout -> add residual`. We default to the approach in the paper, but the + tensor2tensor approach can be enabled by setting + *args.decoder_normalize_before* to ``True``. + + Args: + args (argparse.Namespace): parsed command-line arguments + no_encoder_attn (bool, optional): whether to attend to encoder outputs + (default: False). + """ + + def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False): + super().__init__() + + # the configs of super arch + self.super_embed_dim = decoder_config['decoder_embed_dim'] + self.super_encoder_embed_dim = decoder_config['decoder_embed_dim'] + self.super_ffn_embed_dim_this_layer = decoder_config['decoder_ffn_embed_dim'] + self.super_self_attention_heads_this_layer = decoder_config['decoder_attention_heads'] + self.super_ende_attention_heads_this_layer = decoder_config['decoder_attention_heads'] + + self.super_dropout = 0.3 + self.super_activation_dropout = 0 + + # the configs of current sampled arch + self.sample_embed_dim = None + self.sample_encoder_embed_dim = None + self.sample_ffn_embed_dim_this_layer = None + self.sample_self_attention_heads_this_layer = None + self.sample_ende_attention_heads_this_layer = None + self.sample_dropout = None + self.sample_activation_dropout = None + self.is_identity_layer = None + self.qkv_dim = 512 + self.layer_idx = layer_idx + + + self.self_attn = MultiheadAttentionSuper( + is_encoder=False, + super_embed_dim=self.super_embed_dim, + num_heads=self.super_self_attention_heads_this_layer, + dropout=0.1, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + self_attention=True, + qkv_dim=self.qkv_dim + ) + self.activation_fn = utils.get_activation_fn( + activation='relu' + ) + self.normalize_before = False + + # use layerNorm rather than FusedLayerNorm for exporting. + # char_inputs can be used to determint this. + # TODO remove this once we update apex with the fix + export = False + self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim) + + if no_encoder_attn: + self.encoder_attn = None + self.encoder_attn_layer_norm = None + else: + self.encoder_attn = MultiheadAttentionSuper( + super_embed_dim=self.super_embed_dim, + num_heads=self.super_ende_attention_heads_this_layer, + is_encoder=False, + super_kdim=self.super_encoder_embed_dim, + super_vdim=self.super_encoder_embed_dim, + dropout=0.1, + encoder_decoder_attention=True, + qkv_dim=self.qkv_dim + ) + self.encoder_attn_layer_norm = LayerNormSuper(self.super_embed_dim) + + self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer, + uniform_=None, non_linear='relu') + self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, super_out_dim=self.super_embed_dim, + uniform_=None, non_linear='linear') + + self.final_layer_norm = LayerNormSuper(self.super_embed_dim) + self.need_attn = True + + self.onnx_trace = False + + + def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_encoder_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_ende_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None): + + if is_identity_layer: + self.is_identity_layer = True + return + + self.is_identity_layer = False + + self.sample_embed_dim = sample_embed_dim + self.sample_encoder_embed_dim = sample_encoder_embed_dim + self.sample_ffn_embed_dim_this_layer = sample_ffn_embed_dim_this_layer + self.sample_self_attention_heads_this_layer = sample_self_attention_heads_this_layer + self.sample_ende_attention_heads_this_layer = sample_ende_attention_heads_this_layer + + self.sample_dropout = sample_dropout + self.sample_activation_dropout = sample_activation_dropout + + + self.self_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + self.encoder_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + + self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer) + self.encoder_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_kv_embed_dim=self.sample_encoder_embed_dim, sample_attention_heads=self.sample_ende_attention_heads_this_layer) + + self.fc1.set_sample_config(sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) + self.fc2.set_sample_config(sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim) + + self.final_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + + + def prepare_for_onnx_export_(self): + self.onnx_trace = True + + def forward( + self, + x, + encoder_out=None, + encoder_padding_mask=None, + incremental_state=None, + prev_self_attn_state=None, + prev_attn_state=None, + self_attn_mask=None, + self_attn_padding_mask=None, + ): + """ + Args: + x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` + encoder_padding_mask (ByteTensor): binary ByteTensor of shape + `(batch, src_len)` where padding elements are indicated by ``1``. + + Returns: + encoded output of shape `(seq_len, batch, embed_dim)` + """ + if self.is_identity_layer: + return x, None + + residual = x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, before=True) + if prev_self_attn_state is not None: + if incremental_state is None: + incremental_state = {} + prev_key, prev_value = prev_self_attn_state + saved_state = {"prev_key": prev_key, "prev_value": prev_value} + self.self_attn._set_input_buffer(incremental_state, saved_state) + x, attn = self.self_attn( + query=x, + key=x, + value=x, + key_padding_mask=self_attn_padding_mask, + incremental_state=incremental_state, + need_weights=False, + attn_mask=self_attn_mask, + ) + x = F.dropout(x, p=self.sample_dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True) + + if self.encoder_attn is not None: + residual = x + x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, before=True) + if prev_attn_state is not None: + if incremental_state is None: + incremental_state = {} + prev_key, prev_value = prev_attn_state + saved_state = {"prev_key": prev_key, "prev_value": prev_value} + self.encoder_attn._set_input_buffer(incremental_state, saved_state) + x, attn = self.encoder_attn( + query=x, + key=encoder_out, + value=encoder_out, + key_padding_mask=encoder_padding_mask, + incremental_state=incremental_state, + static_kv=True, + need_weights=(not self.training and self.need_attn), + ) + x = F.dropout(x, p=self.sample_dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, after=True) + + residual = x + x = self.maybe_layer_norm(self.final_layer_norm, x, before=True) + x = self.activation_fn(self.fc1(x)) + x = F.dropout(x, p=self.sample_activation_dropout, training=self.training) + x = self.fc2(x) + x = F.dropout(x, p=self.sample_dropout, training=self.training) + x = residual + x + x = self.maybe_layer_norm(self.final_layer_norm, x, after=True) + if self.onnx_trace and incremental_state is not None: + saved_state = self.self_attn._get_input_buffer(incremental_state) + self_attn_state = saved_state["prev_key"], saved_state["prev_value"] + return x, attn, self_attn_state + return x, attn + + def maybe_layer_norm(self, layer_norm, x, before=False, after=False): + assert before ^ after + if after ^ self.normalize_before: + return layer_norm(x) + else: + return x + + def make_generation_fast_(self, need_attn=False, **kwargs): + self.need_attn = need_attn + +def calc_dropout(dropout, sample_embed_dim, super_embed_dim): + return dropout * 1.0 * sample_embed_dim / super_embed_dim + +def Embedding(num_embeddings, embedding_dim, padding_idx): + return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx) + +def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(m.weight, non_linear=non_linear) + if bias: + nn.init.constant_(m.bias, 0.) + return m + +def calc_dropout(dropout, sample_embed_dim, super_embed_dim): + return dropout * 1.0 * sample_embed_dim / super_embed_dim + +def Embedding(num_embeddings, embedding_dim, padding_idx): + return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx) + +def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'): + m = nn.Linear(in_features, out_features, bias) + nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(m.weight, non_linear=non_linear) + if bias: + nn.init.constant_(m.bias, 0.) + return m From 7b205272ddb09060e1ce5a87520285dbbd41bbb0 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Thu, 17 Nov 2022 15:50:39 -0800 Subject: [PATCH 06/60] Run `autopep8` --- neural_compressor/experimental/nas/dynas.py | 110 +++---- .../experimental/nas/dynast/__init__.py | 2 +- .../experimental/nas/dynast/dynas_manager.py | 73 +++-- .../nas/dynast/dynas_predictor.py | 7 +- .../experimental/nas/dynast/dynas_search.py | 22 +- .../experimental/nas/dynast/dynas_utils.py | 69 +++-- neural_compressor/experimental/nas/nas.py | 104 ++++--- .../experimental/nas/nas_utils.py | 3 +- .../experimental/nas/search_algorithms.py | 16 +- .../modules_supernetwork.py | 96 +++--- .../transformer_interface.py | 152 +++++----- .../transformer_supernetwork.py | 275 +++++++++++------- 12 files changed, 535 insertions(+), 394 deletions(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index a5d70dde6ae..7ba96ef169e 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -21,6 +21,7 @@ from .nas import NASBase from .nas_utils import nas_registry + @nas_registry("DyNAS") class DyNAS(NASBase): """ @@ -28,43 +29,47 @@ class DyNAS(NASBase): conf_fname_or_obj (string or obj): The path to the YAML configuration file or the object of NASConfig. """ + def __init__(self, conf_fname_or_obj): from .dynast.dynas_manager import ParameterManager from .dynast.dynas_manager import TransformerLTEncoding from .dynast.dynas_predictor import Predictor from .dynast.dynas_search import ProblemMultiObjective, SearchAlgoManager from .dynast.dynas_utils import (EvaluationInterfaceMobileNetV3, - EvaluationInterfaceResNet50, EvaluationInterfaceTransformerLT, - OFARunner,TransformerLTRunner) + EvaluationInterfaceResNet50, EvaluationInterfaceTransformerLT, + OFARunner, TransformerLTRunner) self.ParameterManager = ParameterManager self.Predictor = Predictor self.ProblemMultiObjective = ProblemMultiObjective self.SearchAlgoManager = SearchAlgoManager self.SUPERNET_PARAMETERS = { - 'ofa_resnet50': - {'d' : {'count' : 5, 'vars' : [0, 1, 2]}, - 'e' : {'count' : 18, 'vars' : [0.2, 0.25, 0.35]}, - 'w' : {'count' : 6, 'vars' : [0, 1, 2]} }, - 'ofa_mbv3_d234_e346_k357_w1.0': - {'ks' : {'count' : 20, 'vars' : [3, 5, 7]}, - 'e' : {'count' : 20, 'vars' : [3, 4, 6]}, - 'd' : {'count' : 5, 'vars' : [2, 3, 4]} }, - 'ofa_mbv3_d234_e346_k357_w1.2': - {'ks' : {'count' : 20, 'vars' : [3, 5, 7]}, - 'e' : {'count' : 20, 'vars' : [3, 4, 6]}, - 'd' : {'count' : 5, 'vars' : [2, 3, 4]} }, - - 'transformer_lt_wmt_en_de': - {'encoder_embed_dim': {'count':1,'vars':[640, 512]}, - 'decoder_embed_dim': {'count':1, 'vars': [640, 512]}, - 'encoder_ffn_embed_dim': {'count':6, 'vars':[3072, 2048, 1024]}, - 'decoder_ffn_embed_dim' : {'count':6,'vars': [3072, 2048, 1024]}, - 'decoder_layer_num': {'count':1,'vars':[6, 5, 4, 3, 2, 1]}, - 'encoder_self_attention_heads': {'count':6, 'vars':[8, 4]}, - 'decoder_self_attention_heads': {'count':6, 'vars':[8, 4]}, - 'decoder_ende_attention_heads': {'count':6, 'vars':[8, 4]}, - 'decoder_arbitrary_ende_attn': {'count':6, 'vars':[-1, 1, 2]}} + 'ofa_resnet50': { + 'd': {'count': 5, 'vars': [0, 1, 2]}, + 'e': {'count': 18, 'vars': [0.2, 0.25, 0.35]}, + 'w': {'count': 6, 'vars': [0, 1, 2]}, + }, + 'ofa_mbv3_d234_e346_k357_w1.0': { + 'ks': {'count': 20, 'vars': [3, 5, 7]}, + 'e': {'count': 20, 'vars': [3, 4, 6]}, + 'd': {'count': 5, 'vars': [2, 3, 4]}, + }, + 'ofa_mbv3_d234_e346_k357_w1.2': { + 'ks': {'count': 20, 'vars': [3, 5, 7]}, + 'e': {'count': 20, 'vars': [3, 4, 6]}, + 'd': {'count': 5, 'vars': [2, 3, 4]}, + }, + 'transformer_lt_wmt_en_de': { + 'encoder_embed_dim': {'count': 1, 'vars': [640, 512]}, + 'decoder_embed_dim': {'count': 1, 'vars': [640, 512]}, + 'encoder_ffn_embed_dim': {'count': 6, 'vars': [3072, 2048, 1024]}, + 'decoder_ffn_embed_dim': {'count': 6, 'vars': [3072, 2048, 1024]}, + 'decoder_layer_num': {'count': 1, 'vars': [6, 5, 4, 3, 2, 1]}, + 'encoder_self_attention_heads': {'count': 6, 'vars': [8, 4]}, + 'decoder_self_attention_heads': {'count': 6, 'vars': [8, 4]}, + 'decoder_ende_attention_heads': {'count': 6, 'vars': [8, 4]}, + 'decoder_arbitrary_ende_attn': {'count': 6, 'vars': [-1, 1, 2]}, + }, } self.RUNNERS = { 'ofa_resnet50': OFARunner, @@ -76,19 +81,19 @@ def __init__(self, conf_fname_or_obj): self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50, 'ofa_mbv3_d234_e346_k357_w1.0': EvaluationInterfaceMobileNetV3, 'ofa_mbv3_d234_e346_k357_w1.2': EvaluationInterfaceMobileNetV3, - 'transformer_lt_wmt_en_de': EvaluationInterfaceTransformerLT} + 'transformer_lt_wmt_en_de': EvaluationInterfaceTransformerLT} self.LINAS_INNERLOOP_EVALS = {'ofa_resnet50': 5000, 'ofa_mbv3_d234_e346_k357_w1.0': 20000, 'ofa_mbv3_d234_e346_k357_w1.2': 20000, 'transformer_lt_wmt_en_de': 10000} - + self.SUPERNET_ENCODING = { - 'ofa_resnet50': ParameterManager, - 'ofa_mbv3_d234_e346_k357_w1.0': ParameterManager, - 'ofa_mbv3_d234_e346_k357_w1.2': ParameterManager, - 'ofa_proxyless_d234_e346_k357_w1.3': ParameterManager, - 'transformer_lt_wmt_en_de': TransformerLTEncoding, + 'ofa_resnet50': ParameterManager, + 'ofa_mbv3_d234_e346_k357_w1.0': ParameterManager, + 'ofa_mbv3_d234_e346_k357_w1.2': ParameterManager, + 'ofa_proxyless_d234_e346_k357_w1.3': ParameterManager, + 'transformer_lt_wmt_en_de': TransformerLTEncoding, } super().__init__() @@ -98,7 +103,6 @@ def __init__(self, conf_fname_or_obj): self.results_csv_path = None self.init_cfg(conf_fname_or_obj) - def estimate(self, individual): self.validation_interface.eval_subnet(individual) @@ -106,10 +110,10 @@ def init_for_search(self): self.supernet_manager = self.SUPERNET_ENCODING[self.supernet]( param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed ) - #self.supernet_manager = self.ParameterManager( + # self.supernet_manager = self.ParameterManager( # param_dict=self.SUPERNET_PARAMETERS[self.supernet], # seed=self.seed - #) + # ) # Validation High-Fidelity Measurement Runner self.runner_validate = self.RUNNERS[self.supernet]( @@ -140,13 +144,14 @@ def search(self): # if number of results in results_csv_path smaller than population. # TODO(macsz) Create empty CSV if it does not exists. df = pd.read_csv(self.results_csv_path) - latest_population = [self.supernet_manager.random_sample() \ - for _ in range(max(self.population - df.shape[0], 0))] + latest_population = [self.supernet_manager.random_sample() + for _ in range(max(self.population - df.shape[0], 0))] # Start Lightweight Iterative Neural Architecture Search (LINAS) num_loops = round(self.num_evals/self.population) for loop in range(num_loops): - logger.info('[DyNAS-T] Starting LINAS loop {} of {}.'.format(loop+1, num_loops)) + logger.info( + '[DyNAS-T] Starting LINAS loop {} of {}.'.format(loop+1, num_loops)) for individual in latest_population: self.validation_interface.eval_subnet(individual) @@ -172,7 +177,7 @@ def search(self): manager=self.supernet_manager, metrics=self.metrics, csv_path=None, - predictor_mode = True + predictor_mode=True ) problem = self.ProblemMultiObjective( @@ -182,19 +187,22 @@ def search(self): ) if self.search_algo == 'age': - search_manager = self.SearchAlgoManager(algorithm='age', seed=self.seed) + search_manager = self.SearchAlgoManager( + algorithm='age', seed=self.seed) search_manager.configure_age(population=self.population, - num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet]) + num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet]) else: - search_manager = self.SearchAlgoManager(algorithm='nsga2', seed=self.seed) + search_manager = self.SearchAlgoManager( + algorithm='nsga2', seed=self.seed) search_manager.configure_nsga2(population=self.population, - num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet]) + num_evals=self.LINAS_INNERLOOP_EVALS[self.supernet]) results = search_manager.run_search(problem) latest_population = results.pop.get('X') - logger.info("[DyNAS-T] Validated model architectures in file: {}".format(self.results_csv_path)) + logger.info( + "[DyNAS-T] Validated model architectures in file: {}".format(self.results_csv_path)) output = list() for individual in latest_population: @@ -202,7 +210,7 @@ def search(self): return output - def select_model_arch(self): # pragma: no cover + def select_model_arch(self): # pragma: no cover # model_arch_proposition intrinsically contained in # pymoo.minimize API of search_manager.run_search method, # don't have to implement it explicitly. @@ -214,7 +222,7 @@ def create_acc_predictor(self): df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='acc', - column_names=['config','date','lat','macs','acc']) + column_names=['config', 'date', 'lat', 'macs', 'acc']) features, labels = self.supernet_manager.create_training_set(df) self.acc_predictor = self.Predictor() self.acc_predictor.train(features, labels.ravel()) @@ -227,7 +235,7 @@ def create_macs_predictor(self): df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='macs', - column_names=['config','date','lat','macs','acc']) + column_names=['config', 'date', 'lat', 'macs', 'acc']) features, labels = self.supernet_manager.create_training_set(df) self.macs_predictor = self.Predictor() self.macs_predictor.train(features, labels.ravel()) @@ -240,7 +248,7 @@ def create_latency_predictor(self): df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='lat', - column_names=['config','date','lat','macs','acc']) + column_names=['config', 'date', 'lat', 'macs', 'acc']) features, labels = self.supernet_manager.create_training_set(df) self.latency_predictor = self.Predictor() self.latency_predictor.train(features, labels.ravel()) @@ -254,11 +262,11 @@ def init_cfg(self, conf_fname_or_obj): elif isinstance(conf_fname_or_obj, NASConfig): conf_fname_or_obj.validate() self.conf = conf_fname_or_obj.usr_cfg - else: # pragma: no cover + else: # pragma: no cover raise NotImplementedError( "Please provide a str path to the config file or an object of NASConfig." ) - #self.init_search_cfg(self.conf.nas) + # self.init_search_cfg(self.conf.nas) assert 'dynas' in self.conf.nas, "Must specify dynas section." dynas_config = self.conf.nas.dynas self.search_algo = self.conf.nas.search.search_algorithm @@ -269,7 +277,7 @@ def init_cfg(self, conf_fname_or_obj): self.dataset_path = dynas_config.dataset_path self.supernet_ckpt_path = dynas_config.supernet_ckpt_path self.batch_size = dynas_config.batch_size - if dynas_config.population < 10: # pragma: no cover + if dynas_config.population < 10: # pragma: no cover raise NotImplementedError( "Please specify a population size >= 10" ) diff --git a/neural_compressor/experimental/nas/dynast/__init__.py b/neural_compressor/experimental/nas/dynast/__init__.py index 1c73aaf4901..369707c0ef6 100644 --- a/neural_compressor/experimental/nas/dynast/__init__.py +++ b/neural_compressor/experimental/nas/dynast/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py index 898d85e3ab0..61956464e97 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_manager.py +++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py @@ -61,21 +61,25 @@ def process_param_dict(self) -> Tuple[list, list, int]: for i in range(options['count']): parameter_upperbound.append(len(options['vars']) - 1) index_simple = [x for x in range(len(options['vars']))] - parameter_mapper.append(dict(zip(index_simple, options['vars']))) + parameter_mapper.append( + dict(zip(index_simple, options['vars']))) - if self.verbose: # pragma: no cover + if self.verbose: # pragma: no cover logger.info( - '[DyNAS-T] Problem definition variables: {}'.format(parameter_count) + '[DyNAS-T] Problem definition variables: {}'.format( + parameter_count) ) logger.info( - '[DyNAS-T] Variable Upper Bound array: {}'.format(parameter_upperbound) + '[DyNAS-T] Variable Upper Bound array: {}'.format( + parameter_upperbound) ) logger.info( '[DyNAS-T] Mapping dictionary created of length: {}'.format( len(parameter_mapper) ) ) - logger.info('[DyNAS-T] Parameter Bound: {}'.format(parameter_bound)) + logger.info( + '[DyNAS-T] Parameter Bound: {}'.format(parameter_bound)) return parameter_mapper, parameter_upperbound, parameter_count @@ -138,7 +142,8 @@ def random_samples(self, size: int = 100, trial_limit: int = 100000) -> List[lis trials += 1 if trials >= trial_limit: - logger.warning('[DyNAS-T] Unable to create unique list of samples.') + logger.warning( + '[DyNAS-T] Unable to create unique list of samples.') return pymoo_vector_list @@ -172,7 +177,8 @@ def translate2pymoo(self, parameters: dict) -> list: param_counter = 0 for i in range(value['count']): output.append( - self.inv_mapper[mapper_counter][parameters[key][param_counter]] + self.inv_mapper[mapper_counter][parameters[key] + [param_counter]] ) mapper_counter += 1 param_counter += 1 @@ -278,32 +284,28 @@ def create_training_set( return features_train, features_test, labels_train, labels_test - - - - class TransformerLTEncoding(ParameterManager): def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0): super().__init__(param_dict, verbose, seed) - def onehot_custom(self,subnet_cfg,provide_onehot=True): + def onehot_custom(self, subnet_cfg, provide_onehot=True): features = [] #import ipdb;ipdb.set_trace() features.extend(subnet_cfg['encoder_embed_dim']) #encoder_layer_num = subnet_cfg['encoder_layer_num'] - encode_layer_num_int = 6#encoder_layer_num[0] - #features.extend(encoder_layer_num) + encode_layer_num_int = 6 # encoder_layer_num[0] + # features.extend(encoder_layer_num) - #Encoder FFN Embed Dim + # Encoder FFN Embed Dim encoder_ffn_embed_dim = subnet_cfg['encoder_ffn_embed_dim'] if encode_layer_num_int < 6: encoder_ffn_embed_dim.extend([0]*(6-encode_layer_num_int)) features.extend(encoder_ffn_embed_dim) - #Encoder Self-Attn Heads + # Encoder Self-Attn Heads encoder_self_attention_heads = subnet_cfg['encoder_self_attention_heads'][:encode_layer_num_int] @@ -311,34 +313,32 @@ def onehot_custom(self,subnet_cfg,provide_onehot=True): encoder_self_attention_heads.extend([0]*(6-encode_layer_num_int)) features.extend(encoder_self_attention_heads) - features.extend(subnet_cfg['decoder_embed_dim']) decoder_layer_num = subnet_cfg['decoder_layer_num'] decoder_layer_num_int = decoder_layer_num[0] features.extend(decoder_layer_num) - #Decoder FFN Embed Dim + # Decoder FFN Embed Dim decoder_ffn_embed_dim = subnet_cfg['decoder_ffn_embed_dim'][:decoder_layer_num_int] if decoder_layer_num_int < 6: decoder_ffn_embed_dim.extend([0]*(6-decoder_layer_num_int)) features.extend(decoder_ffn_embed_dim) - - #Decoder Attn Heads + # Decoder Attn Heads decoder_self_attention_heads = subnet_cfg['decoder_self_attention_heads'][:decoder_layer_num_int] if decoder_layer_num_int < 6: - decoder_self_attention_heads.extend([0]*(6-decoder_layer_num_int)) + decoder_self_attention_heads.extend([0]*(6-decoder_layer_num_int)) features.extend(decoder_self_attention_heads) - #Decoder ENDE HEADS + # Decoder ENDE HEADS decoder_ende_attention_heads = subnet_cfg['decoder_ende_attention_heads'][:decoder_layer_num_int] if decoder_layer_num_int < 6: - decoder_ende_attention_heads.extend([0]*(6-decoder_layer_num_int)) + decoder_ende_attention_heads.extend([0]*(6-decoder_layer_num_int)) features.extend(decoder_ende_attention_heads) @@ -352,27 +352,27 @@ def onehot_custom(self,subnet_cfg,provide_onehot=True): arbitrary_ende_attn_trans.append(3) if decoder_layer_num_int < 6: - arbitrary_ende_attn_trans.extend([0]*(6-decoder_layer_num_int)) + arbitrary_ende_attn_trans.extend([0]*(6-decoder_layer_num_int)) features.extend(arbitrary_ende_attn_trans) - if provide_onehot==True: + if provide_onehot == True: examples = np.array([features]) one_hot_count = 0 unique_values = self.unique_values - #uncomment - #with open(self.onehot_unique,'rb') as f: + # uncomment + # with open(self.onehot_unique,'rb') as f: # load_unique_values = pickle.load(f) # unique_values = load_unique_values.tolist() for unique in unique_values: one_hot_count += len(unique.tolist()) - one_hot_examples = np.zeros((examples.shape[0], one_hot_count)) for e, example in enumerate(examples): offset = 0 for f in range(len(example)): - index = np.where(unique_values[f] == example[f])[0] + offset + index = np.where(unique_values[f] == example[f])[ + 0] + offset one_hot_examples[e, index] = 1.0 offset += len(unique_values[f]) return one_hot_examples @@ -380,8 +380,7 @@ def onehot_custom(self,subnet_cfg,provide_onehot=True): else: return features - #return np.array(ks_onehot + ex_onehot) - + # return np.array(ks_onehot + ex_onehot) def import_csv( self, @@ -390,7 +389,7 @@ def import_csv( objective: str, column_names: List[str] = None, drop_duplicates: bool = True, - ) -> pd.DataFrame: + ) -> pd.DataFrame: ''' Import a csv file generated from a supernetwork search for the purpose of training a predictor. @@ -427,7 +426,8 @@ def import_csv( config_as_pymoo = self.translate2pymoo(config_as_dict) convert_to_pymoo.append(config_as_pymoo) # Onehot predictor format - config_as_onehot = self.onehot_custom(config_as_dict,provide_onehot=False) + config_as_onehot = self.onehot_custom( + config_as_dict, provide_onehot=False) convert_to_onehot.append(config_as_onehot) #import ipdb;ipdb.set_trace() df[config] = convert_to_dict @@ -436,7 +436,7 @@ def import_csv( return df - #@staticmethod + # @staticmethod def create_training_set( self, dataframe: pd.DataFrame, @@ -453,7 +453,8 @@ def create_training_set( for i in range(len(dataframe)): collect_rows.append(np.asarray(dataframe['config_onehot'].iloc[i])) features = np.asarray(collect_rows) - labels = dataframe.drop(columns=['config', 'config_pymoo', 'config_onehot']).values + labels = dataframe.drop( + columns=['config', 'config_pymoo', 'config_onehot']).values assert len(features) == len(labels) one_hot_count = 0 @@ -485,5 +486,3 @@ def create_training_set( ) ) return features_train, features_test, labels_train, labels_test - - diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py index a92c0bef5c2..1a47837d153 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py +++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py @@ -49,7 +49,8 @@ def __init__( # Create lists of regressors and associated hyper-parameters regressors = [ linear_model.Ridge(max_iter=max_iterations), - svm.SVR(kernel='rbf', gamma='auto', epsilon=0.0, max_iter=max_iterations), + svm.SVR(kernel='rbf', gamma='auto', + epsilon=0.0, max_iter=max_iterations), ] hyper_parameters = [{'alpha': alphas}, {'C': cost_factors}] @@ -67,7 +68,6 @@ def __init__( ) def train(self, examples, labels): - ''' Trains the predictor on the specified examples and labels using the underlying regressor. Parameters @@ -82,7 +82,8 @@ def train(self, examples, labels): # Compute normalization factor max_label = np.amax(np.abs(labels)) if max_label > 0.0: - self.normalization_factor = 10 ** (np.floor(np.log10(max_label)) - 1.0) + self.normalization_factor = 10 ** ( + np.floor(np.log10(max_label)) - 1.0) else: self.normalization_factor = 1.0 diff --git a/neural_compressor/experimental/nas/dynast/dynas_search.py b/neural_compressor/experimental/nas/dynast/dynas_search.py index a19ec5075f5..fc3d5123450 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_search.py +++ b/neural_compressor/experimental/nas/dynast/dynas_search.py @@ -62,9 +62,10 @@ def __init__( elif self.algorithm == 'age': self.configure_age() self.engine = 'pymoo' - else: # pragma: no cover + else: # pragma: no cover logger.error( - '[DyNAS-T] algorithm "{}" not implemented.'.format(self.algorithm) + '[DyNAS-T] algorithm "{}" not implemented.'.format( + self.algorithm) ) raise NotImplementedError @@ -89,8 +90,10 @@ def configure_nsga2( self.algorithm_def = NSGA2( pop_size=population, sampling=sample_strategy, - crossover=get_crossover("int_sbx", prob=crossover_prob, eta=crossover_eta), - mutation=get_mutation("int_pm", prob=mutation_prob, eta=mutation_eta), + crossover=get_crossover( + "int_sbx", prob=crossover_prob, eta=crossover_eta), + mutation=get_mutation( + "int_pm", prob=mutation_prob, eta=mutation_eta), eliminate_duplicates=True, ) @@ -116,8 +119,10 @@ def configure_age( self.algorithm_def = AGEMOEA( pop_size=population, sampling=sample_strategy, - crossover=get_crossover("int_sbx", prob=crossover_prob, eta=crossover_eta), - mutation=get_mutation("int_pm", prob=mutation_prob, eta=mutation_eta), + crossover=get_crossover( + "int_sbx", prob=crossover_prob, eta=crossover_eta), + mutation=get_mutation( + "int_pm", prob=mutation_prob, eta=mutation_eta), eliminate_duplicates=True, ) @@ -143,7 +148,7 @@ def run_search( save_history=save_history, verbose=self.verbose, ) - else: # pragma: no cover + else: # pragma: no cover logger.error('[DyNAS-T] Invalid algorithm engine configuration!') raise NotImplementedError @@ -200,7 +205,8 @@ def _evaluate( # Measure new individuals for i in range(len(x)): - _, objective_x, objective_y = self.evaluation_interface.eval_subnet(x[i]) + _, objective_x, objective_y = self.evaluation_interface.eval_subnet( + x[i]) objective_x_arr.append(objective_x) objective_y_arr.append(objective_y) diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index e15ca455005..2149860ff8e 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -36,7 +36,7 @@ from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import ( compute_bleu, compute_latency -) +) torch = LazyImport('torch') torchvision = LazyImport('torchvision') @@ -198,7 +198,7 @@ def estimate_latency( def validate_top1( self, subnet_cfg: dict, - ) -> float: # pragma: no cover + ) -> float: # pragma: no cover subnet = self.get_subnet(subnet_cfg) folder_name = '.torch/tmp-{}'.format(uuid.uuid1().hex) run_manager = RunManager( @@ -207,7 +207,8 @@ def validate_top1( run_manager.reset_running_statistics(net=subnet) # Test sampled subnet - self.run_config.data_provider.assign_active_img_size(subnet_cfg['r'][0]) + self.run_config.data_provider.assign_active_img_size( + subnet_cfg['r'][0]) loss, acc = run_manager.validate(net=subnet, no_logs=False) top1 = acc[0] return top1 @@ -252,7 +253,8 @@ def measure_latency( measure_steps=measure_steps, device=self.device, ) - logger.info('Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) + logger.info( + 'Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) return latency_mean, latency_std @@ -274,8 +276,6 @@ def get_subnet( return self.subnet - - class TransformerLTRunner(Runner): """The OFARunner class manages the sub-network selection from the OFA super-network and the validation measurements of the sub-networks. ResNet50, MobileNetV3 w1.0, and MobileNetV3 w1.2 @@ -326,9 +326,10 @@ def estimate_latency( def validate_bleu( self, subnet_cfg: dict, - ) -> float: # pragma: no cover - - bleu = compute_bleu(subnet_cfg,self.dataset_path,self.checkpoint_path) + ) -> float: # pragma: no cover + + bleu = compute_bleu(subnet_cfg, self.dataset_path, + self.checkpoint_path) return bleu def validate_macs( @@ -362,13 +363,13 @@ def measure_latency( Returns: mean latency; std latency """ - - latency_mean, latency_std = compute_latency(subnet_cfg,self.dataset_path) - logger.info('Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) - - return latency_mean, latency_std + latency_mean, latency_std = compute_latency( + subnet_cfg, self.dataset_path) + logger.info( + 'Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) + return latency_mean, latency_std class EvaluationInterface: @@ -409,7 +410,8 @@ def clear_csv(self) -> None: if self.csv_path: f = open(self.csv_path, "w") writer = csv.writer(f) - result = ['Sub-network', 'Date', 'Latency (ms)', ' MACs', 'Top-1 Acc (%)'] + result = ['Sub-network', 'Date', + 'Latency (ms)', ' MACs', 'Top-1 Acc (%)'] writer.writerow(result) f.close() @@ -505,11 +507,14 @@ def eval_subnet( # Always evaluate/predict top1 lat, macs = 0, 0 if self.predictor_mode == True: - top1 = self.evaluator.estimate_accuracy_top1(self.manager.onehot_generic(x).reshape(1,-1))[0] + top1 = self.evaluator.estimate_accuracy_top1( + self.manager.onehot_generic(x).reshape(1, -1))[0] if 'macs' in self.metrics: - macs = self.evaluator.estimate_macs(self.manager.onehot_generic(x).reshape(1,-1))[0] + macs = self.evaluator.estimate_macs( + self.manager.onehot_generic(x).reshape(1, -1))[0] if 'lat' in self.metrics: - lat = self.evaluator.estimate_latency(self.manager.onehot_generic(x).reshape(1,-1))[0] + lat = self.evaluator.estimate_latency( + self.manager.onehot_generic(x).reshape(1, -1))[0] else: top1 = self.evaluator.validate_top1(subnet_sample) macs = self.evaluator.validate_macs(subnet_sample) @@ -531,7 +536,6 @@ def eval_subnet( return sample, macs, -top1 - class EvaluationInterfaceTransformerLT(EvaluationInterface): def __init__( self, @@ -553,7 +557,7 @@ def eval_subnet( sample = { 'encoder': { 'encoder_embed_dim': param_dict['encoder_embed_dim'][0], - 'encoder_layer_num': 6,#param_dict['encoder_layer_num'][0], + 'encoder_layer_num': 6, # param_dict['encoder_layer_num'][0], 'encoder_ffn_embed_dim': param_dict['encoder_ffn_embed_dim'], 'encoder_self_attention_heads': param_dict['encoder_self_attention_heads'], }, @@ -563,20 +567,23 @@ def eval_subnet( 'decoder_ffn_embed_dim': param_dict['decoder_ffn_embed_dim'], 'decoder_self_attention_heads': param_dict['decoder_self_attention_heads'], 'decoder_ende_attention_heads': param_dict['decoder_ende_attention_heads'], - 'decoder_arbitrary_ende_attn':param_dict['decoder_arbitrary_ende_attn'] - } + 'decoder_arbitrary_ende_attn': param_dict['decoder_arbitrary_ende_attn'] } + } subnet_sample = copy.deepcopy(sample) # Always evaluate/predict top1 lat, macs = 0, 0 if self.predictor_mode == True: - bleu = self.evaluator.estimate_accuracy_bleu(self.manager.onehot_custom(param_dict).reshape(1,-1))[0] + bleu = self.evaluator.estimate_accuracy_bleu( + self.manager.onehot_custom(param_dict).reshape(1, -1))[0] if 'macs' in self.metrics: - macs = self.evaluator.estimate_macs(self.manager.onehot_custom(param_dict).reshape(1,-1))[0] + macs = self.evaluator.estimate_macs( + self.manager.onehot_custom(param_dict).reshape(1, -1))[0] if 'lat' in self.metrics: - lat = self.evaluator.estimate_latency(self.manager.onehot_custom(param_dict).reshape(1,-1))[0] + lat = self.evaluator.estimate_latency( + self.manager.onehot_custom(param_dict).reshape(1, -1))[0] else: bleu = self.evaluator.validate_bleu(subnet_sample) macs = self.evaluator.validate_macs(subnet_sample) @@ -587,7 +594,7 @@ def eval_subnet( with open(self.csv_path, 'a') as f: writer = csv.writer(f) date = str(datetime.now()) - result = [param_dict, date, lat, macs, bleu,] + result = [param_dict, date, lat, macs, bleu, ] writer.writerow(result) # PyMoo only minimizes objectives, thus accuracy needs to be negative @@ -598,9 +605,6 @@ def eval_subnet( return sample, macs, -bleu - - - def get_torchvision_model( model_name: str, ) -> torch.nn.Module: @@ -608,14 +612,15 @@ def get_torchvision_model( model = getattr(torchvision.models, model_name)(pretrained=True) model.eval() return model - except AttributeError as ae: # pragma: no cover + except AttributeError as ae: # pragma: no cover logger.error( 'Model {model_name} not available. This can be due to either a typo or the model is not ' 'available in torchvision=={torchvision_version}. \nAvailable models: {available_models}'.format( model_name=model_name, torchvision_version=torchvision.__version__, available_models=', '.join( - [m for m in dir(torchvision.models) if not m.startswith('_')] + [m for m in dir(torchvision.models) + if not m.startswith('_')] ), ) ) @@ -652,7 +657,7 @@ def __init__( # separately to avoid modifications to the model being passed between calls. get_torchvision_model(model_name=self.model_name) - def validate_top1(self) -> Tuple[float, float, float]: # pragma: no cover + def validate_top1(self) -> Tuple[float, float, float]: # pragma: no cover ImagenetDataProvider.DEFAULT_PATH = self.dataset_path model = get_torchvision_model(model_name=self.model_name) run_config = ImagenetRunConfig(test_batch_size=64, n_worker=20) diff --git a/neural_compressor/experimental/nas/nas.py b/neural_compressor/experimental/nas/nas.py index cbdf9c17ae3..390ef8ada46 100644 --- a/neural_compressor/experimental/nas/nas.py +++ b/neural_compressor/experimental/nas/nas.py @@ -39,13 +39,13 @@ def __new__(self, conf_fname_or_obj, *args, **kwargs): elif isinstance(conf_fname_or_obj, Config): self.conf = NASConfig() self.conf.map_pyconfig_to_cfg(conf_fname_or_obj) - else: # pragma: no cover + else: # pragma: no cover raise NotImplementedError( "Please provide a str path to the config file." ) assert self.conf.usr_cfg.nas is not None, "nas section must be set" if isinstance(self.conf.usr_cfg.nas.approach, str) and \ - self.conf.usr_cfg.nas.approach.lower() in NASMethods: + self.conf.usr_cfg.nas.approach.lower() in NASMethods: method = self.conf.usr_cfg.nas.approach.lower() else: logger.warning( @@ -110,29 +110,38 @@ def search(self, res_save_path=None): ) ) model_arch_paras = self.select_model_arch() - logger.info("Model architecture {} proposed.".format(model_arch_paras)) + logger.info( + "Model architecture {} proposed.".format(model_arch_paras)) model = self._model_builder(model_arch_paras) model_paras = self.count_model_parameters(model) logger.info( - "***** Number of model parameters: {:.2f}M *****".format(model_paras / 10**6) + "***** Number of model parameters: {:.2f}M *****".format( + model_paras / 10**6) ) - self.model_paras_num[tuple(model_arch_paras.values())] = model_paras + self.model_paras_num[tuple( + model_arch_paras.values())] = model_paras if tuple(model_arch_paras.values()) in self.search_results: - logger.info("Skip evaluated model architecture {}.".format(model_arch_paras)) + logger.info( + "Skip evaluated model architecture {}.".format(model_arch_paras)) continue if tuple(model_arch_paras.values()) in self.resumed_search_results: logger.info( - "Find previous results of model architecture: {}.".format(model_arch_paras) + "Find previous results of model architecture: {}.".format( + model_arch_paras) ) - metrics = self.resumed_search_results[tuple(model_arch_paras.values())] + metrics = self.resumed_search_results[tuple( + model_arch_paras.values())] else: - logger.info("Assessing model architecture: {}.".format(model_arch_paras)) + logger.info( + "Assessing model architecture: {}.".format(model_arch_paras)) metrics = self.estimate(model) logger.info( - "Metrics of model architecture {} is {}.".format(model_arch_paras, metrics) + "Metrics of model architecture {} is {}.".format( + model_arch_paras, metrics) ) self.search_results[tuple(model_arch_paras.values())] = metrics - self._search_algorithm.get_feedback(sum(self.metrics_conversion(metrics))) + self._search_algorithm.get_feedback( + sum(self.metrics_conversion(metrics))) self.dump_search_results( os.path.join(save_path, 'Trial_{}_results.txt'.format(i+1)) ) @@ -141,9 +150,12 @@ def search(self, res_save_path=None): if model_arch_vec not in self.search_results: self.search_results[model_arch_vec] = \ self.resumed_search_results[model_arch_vec] - model = self._model_builder(self.params_vec2params_dict(model_arch_vec)) - self.model_paras_num[model_arch_vec] = self.count_model_parameters(model) - self.dump_search_results(os.path.join(save_path, 'Final_results.txt'.format(i+1))) + model = self._model_builder( + self.params_vec2params_dict(model_arch_vec)) + self.model_paras_num[model_arch_vec] = self.count_model_parameters( + model) + self.dump_search_results(os.path.join( + save_path, 'Final_results.txt'.format(i+1))) self.find_best_model_archs() logger.info( "{fix} Found {n} best model architectures {fix}".format( @@ -151,10 +163,11 @@ def search(self, res_save_path=None): ) ) for i, model_arch in enumerate(self.best_model_archs): - logger.info("Best model architecture {}: {}".format(i+1, model_arch)) + logger.info( + "Best model architecture {}: {}".format(i+1, model_arch)) return self.best_model_archs - def estimate(self, model): # pragma: no cover + def estimate(self, model): # pragma: no cover """Estimate performance of the model. Depends on specific NAS algorithm. Returns: @@ -166,57 +179,63 @@ def count_model_parameters(self, model): if isinstance(model, torch.nn.Module): return sum(p.numel() for p in model.parameters()) else: - raise NotImplementedError("Only support torch model now.") # pragma: no cover + raise NotImplementedError( + "Only support torch model now.") # pragma: no cover def load_search_results(self, path): self.resumed_search_results = {} lastest_results_record = os.path.join(path, 'lastest_results.npy') if not os.path.exists(path) or not os.path.exists(lastest_results_record): return - self.resumed_search_results = np.load(lastest_results_record, allow_pickle=True).item() + self.resumed_search_results = np.load( + lastest_results_record, allow_pickle=True).item() os.makedirs(os.path.join(path, 'previous_results'), exist_ok=True) for f in os.listdir(path): if os.path.isfile(os.path.join(path, f)): - shutil.move(os.path.join(path, f), os.path.join(path, 'previous_results', f)) + shutil.move(os.path.join(path, f), os.path.join( + path, 'previous_results', f)) logger.info("Loaded previous results.") def dump_search_results(self, path): - lastest_results_record = os.path.join(os.path.dirname(path), 'lastest_results.npy') + lastest_results_record = os.path.join( + os.path.dirname(path), 'lastest_results.npy') np.save(lastest_results_record, self.search_results, allow_pickle=True) write_contents = '=' * 30 + ' All Search Results ' + '=' * 30 + '\n\n' for model_arch_vec in self.search_results: - tmp = ','.join(['{}_{}'.format(k, v) \ - for k, v in zip(self.search_space_keys, model_arch_vec)]) + tmp = ','.join(['{}_{}'.format(k, v) + for k, v in zip(self.search_space_keys, model_arch_vec)]) write_contents += '{}: {} Paras: {}M\n'.format( tmp, self.search_results[model_arch_vec], self.model_paras_num[model_arch_vec] / 10**6 ) - write_contents += '\n\n\n' + '=' * 30 + ' Best Search Results ' + '=' * 30 + '\n\n' + write_contents += '\n\n\n' + '=' * 30 + \ + ' Best Search Results ' + '=' * 30 + '\n\n' self.find_best_model_archs() for i, model_arch in enumerate(self.best_model_archs): model_arch_vec = tuple(model_arch.values()) - tmp = ','.join(['{}_{}'.format(k, v) \ - for k, v in zip(self.search_space_keys, model_arch_vec)]) + tmp = ','.join(['{}_{}'.format(k, v) + for k, v in zip(self.search_space_keys, model_arch_vec)]) write_contents += \ '{}. {}: {} Paras: {}M\n'.format( i+1, tmp, self.search_results[model_arch_vec], self.model_paras_num[model_arch_vec] / 10**6 - ) + ) with open(path, mode='w') as f: f.write(write_contents) def params_vec2params_dict(self, paras_vec): assert len(paras_vec) == len(self.search_space_keys), \ "Length of paras_vec and search_space_keys should be the same." - return {k:v for k, v in zip(self.search_space_keys, paras_vec)} + return {k: v for k, v in zip(self.search_space_keys, paras_vec)} def find_best_model_archs(self): assert len(self.search_results) > 0, "Zero result in search_results." model_arches = list(self.search_results.keys()) - metrics = [self.metrics_conversion(self.search_results[ma]) for ma in model_arches] + metrics = [self.metrics_conversion( + self.search_results[ma]) for ma in model_arches] pareto_front_indices = find_pareto_front(metrics) - self.best_model_archs = [self.params_vec2params_dict(model_arches[i]) \ - for i in pareto_front_indices] + self.best_model_archs = [self.params_vec2params_dict(model_arches[i]) + for i in pareto_front_indices] def metrics_conversion(self, metrics): if not isinstance(metrics, Iterable): @@ -228,11 +247,11 @@ def metrics_conversion(self, metrics): "Keys of metrics not match with metrics in the configuration." metrics = list(metrics.values()) if self.higher_is_better is None: - self.higher_is_better = [True,] * len(metrics) - logger.warning("higher_is_better not set in the configuration, " + \ - "set it to all True for every metric entry by default.") - converted_metrics = [metric if higher_is_better else -metric \ - for metric, higher_is_better in zip(metrics, self.higher_is_better)] + self.higher_is_better = [True, ] * len(metrics) + logger.warning("higher_is_better not set in the configuration, " + + "set it to all True for every metric entry by default.") + converted_metrics = [metric if higher_is_better else -metric + for metric, higher_is_better in zip(metrics, self.higher_is_better)] return converted_metrics def init_search_cfg(self, config): @@ -260,18 +279,21 @@ def init_search_cfg(self, config): if self.search_cfg.higher_is_better else None self.seed = self.search_cfg.seed self.max_trials = self.search_cfg.max_trials \ - if self.search_cfg.max_trials is not None else 3 # set default 3 for max_trials + if self.search_cfg.max_trials is not None else 3 # set default 3 for max_trials self.search_algorithm_type = self.search_cfg.search_algorithm \ if self.search_cfg.search_algorithm else None if not self.search_algorithm_type: - self._search_algorithm = BayesianOptimizationSearcher(self.search_space, self.seed) + self._search_algorithm = BayesianOptimizationSearcher( + self.search_space, self.seed) elif self.search_algorithm_type.lower() == 'grid': self._search_algorithm = GridSearcher(self.search_space) elif self.search_algorithm_type.lower() == 'random': - self._search_algorithm = RandomSearcher(self.search_space, self.seed) + self._search_algorithm = RandomSearcher( + self.search_space, self.seed) elif self.search_algorithm_type.lower() == 'bo': - self._search_algorithm = BayesianOptimizationSearcher(self.search_space, self.seed) - else: # pragma: no cover + self._search_algorithm = BayesianOptimizationSearcher( + self.search_space, self.seed) + else: # pragma: no cover logger.warning( 'Please be aware that \'{}\' is not a built-in search algorithm.'.format( self.search_algorithm_type @@ -303,4 +325,4 @@ def model_builder(self, model_builder): self._model_builder = model_builder def __repr__(self): - return 'Base Class of NAS' # pragma: no cover \ No newline at end of file + return 'Base Class of NAS' # pragma: no cover diff --git a/neural_compressor/experimental/nas/nas_utils.py b/neural_compressor/experimental/nas/nas_utils.py index 1b19f0cd871..139cf818338 100644 --- a/neural_compressor/experimental/nas/nas_utils.py +++ b/neural_compressor/experimental/nas/nas_utils.py @@ -30,6 +30,7 @@ def nas_registry(nas_method): cls: The class of register. """ assert isinstance(nas_method, str), "Expect nas_method to be a string." + def decorator(cls): NASMethods[nas_method.lower()] = cls return cls @@ -68,4 +69,4 @@ def find_pareto_front(metrics): pareto_front_point_indices = pareto_front_point_indices[nondominated_points] metrics = metrics[nondominated_points] next_point_idx = np.sum(nondominated_points[:next_point_idx+1]) - return pareto_front_point_indices \ No newline at end of file + return pareto_front_point_indices diff --git a/neural_compressor/experimental/nas/search_algorithms.py b/neural_compressor/experimental/nas/search_algorithms.py index c3475c53eee..35cb8a0b3b3 100644 --- a/neural_compressor/experimental/nas/search_algorithms.py +++ b/neural_compressor/experimental/nas/search_algorithms.py @@ -29,10 +29,12 @@ def __init__(self, search_space) -> None: self.search_space_keys = sorted(search_space.keys()) for k in self.search_space_keys: assert isinstance(self.search_space[k], (list, tuple)), \ - "Value of key \'{}\' must be a list or tuple to specify choices".format(k) + "Value of key \'{}\' must be a list or tuple to specify choices".format( + k) def suggest(self): - raise NotImplementedError('Depends on specific search algorithm.') # pragma: no cover + raise NotImplementedError( + 'Depends on specific search algorithm.') # pragma: no cover def get_feedback(self, metric): pass @@ -74,8 +76,10 @@ def suggest(self): class BayesianOptimizationSearcher(Searcher): def __init__(self, search_space, seed=42) -> None: super(BayesianOptimizationSearcher, self).__init__(search_space) - idx_search_space = {k: (0, len(search_space[k])-1) for k in self.search_space_keys} - self.bo_agent = BayesianOptimization(idx_search_space, random_seed=seed) + idx_search_space = { + k: (0, len(search_space[k])-1) for k in self.search_space_keys} + self.bo_agent = BayesianOptimization( + idx_search_space, random_seed=seed) self.last_param_indices = None def suggest(self): @@ -88,7 +92,7 @@ def get_feedback(self, metric): "to get parameters and the input metric is corresponding to this parameters." try: self.bo_agent._space.register(self.last_param_indices, metric) - except KeyError: # pragma: no cover + except KeyError: # pragma: no cover logger.debug("Find registered params, skip it.") pass self.last_param_indices = None @@ -99,4 +103,4 @@ def indices2params_vec(self, indices): # keep ind within the index range of self.search_space[key] ind = int(min(max(round(ind), 0), len(self.search_space[key])-1)) res.append(self.search_space[key][ind]) - return res \ No newline at end of file + return res diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py index ef4cbddc952..61aeb409ae5 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py @@ -39,12 +39,14 @@ def set_incremental_state(module, incremental_state, key, value): full_key = _get_full_incremental_state_key(module, key) incremental_state[full_key] = value + class EmbeddingSuper(nn.Embedding): def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs): super().__init__(num_embeddings, super_embed_dim, padding_idx, *args, **kwargs) # the largest embed dim - self.super_embed_dim = {'encoder': super_embed_dim, 'decoder': super_embed_dim} + self.super_embed_dim = { + 'encoder': super_embed_dim, 'decoder': super_embed_dim} # the current sampled embed dim self.sample_embed_dim = {'encoder': None, 'decoder': None} @@ -119,7 +121,8 @@ def set_sample_config(self, sample_in_dim, sample_out_dim): self._sample_parameters() def _sample_parameters(self): - self.samples['weight'] = sample_weight(self.weight, self.sample_in_dim, self.sample_out_dim) + self.samples['weight'] = sample_weight( + self.weight, self.sample_in_dim, self.sample_out_dim) self.samples['bias'] = self.bias if self.bias is not None: self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) @@ -153,6 +156,7 @@ def sample_bias(bias, sample_out_dim): return sample_bias + def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): if not export and torch.cuda.is_available(): try: @@ -203,7 +207,6 @@ def calc_sampled_param_num(self): return self.samples['weight'].numel() + self.samples['bias'].numel() - class MultiheadAttentionSuper(nn.Module): """Multi-headed attention. @@ -215,8 +218,6 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe encoder_decoder_attention=False, out_dim=None, qkv_dim=None): super().__init__() - - # the configs of super arch self.super_q_embed_dim = super_embed_dim self.super_kv_embed_dim = None @@ -255,11 +256,15 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe 'value to be of the same size' if self.qkv_same_dim: - self.in_proj_weight = Parameter(torch.Tensor(3 * self.qkv_dim, self.super_q_embed_dim)) + self.in_proj_weight = Parameter(torch.Tensor( + 3 * self.qkv_dim, self.super_q_embed_dim)) else: - self.k_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_kv_embed_dim)) - self.v_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_kv_embed_dim)) - self.q_proj_weight = Parameter(torch.Tensor(self.qkv_dim, self.super_q_embed_dim)) + self.k_proj_weight = Parameter(torch.Tensor( + self.qkv_dim, self.super_kv_embed_dim)) + self.v_proj_weight = Parameter(torch.Tensor( + self.qkv_dim, self.super_kv_embed_dim)) + self.q_proj_weight = Parameter(torch.Tensor( + self.qkv_dim, self.super_q_embed_dim)) if bias: self.in_proj_bias = Parameter(torch.Tensor(3 * self.qkv_dim)) @@ -268,7 +273,8 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe if out_dim is None: out_dim = self.super_q_embed_dim - self.out_proj = LinearSuper(super_in_dim=self.qkv_dim, super_out_dim=out_dim, bias=bias) + self.out_proj = LinearSuper( + super_in_dim=self.qkv_dim, super_out_dim=out_dim, bias=bias) if add_bias_kv: self.bias_k = Parameter(torch.Tensor(1, 1, self.super_q_embed_dim)) @@ -278,7 +284,6 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe self.add_zero_attn = add_zero_attn - self.reset_parameters() self.onnx_trace = False @@ -302,9 +307,6 @@ def calc_sampled_param_num(self): return in_proj_q_weight_numel + in_proj_k_weight_numel + in_proj_v_weight_numel + in_proj_bias_numel - - - def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None): self.sample_q_embed_dim = sample_q_embed_dim if sample_kv_embed_dim is None: @@ -314,11 +316,12 @@ def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_k self.num_heads = sample_attention_heads self.head_dim = self.qkv_dim // self.num_heads - assert self.head_dim * self.num_heads == self.qkv_dim, "qkv_dim must be divisible by sampled num_heads" + assert self.head_dim * \ + self.num_heads == self.qkv_dim, "qkv_dim must be divisible by sampled num_heads" self.scaling = self.head_dim ** -0.5 - self.out_proj.set_sample_config(sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim) - + self.out_proj.set_sample_config( + sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim) def prepare_for_onnx_export_(self): self.onnx_trace = True @@ -340,7 +343,6 @@ def reset_parameters(self): if self.bias_v is not None: nn.init.xavier_normal_(self.bias_v) - def forward(self, query, key, value, key_padding_mask=None, incremental_state=None, need_weights=True, static_kv=False, attn_mask=None): """Input shape: Time x Batch x Channel @@ -351,7 +353,6 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No batch x src_len, where padding elements are indicated by 1s. """ - tgt_len, bsz, embed_dim = query.size() if incremental_state is not None: @@ -390,7 +391,8 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) if attn_mask is not None: - attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) if key_padding_mask is not None: key_padding_mask = torch.cat( [key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1)], dim=1) @@ -405,19 +407,23 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No if saved_state is not None: # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) if 'prev_key' in saved_state: - prev_key = saved_state['prev_key'].view(bsz * self.num_heads, -1, self.head_dim) + prev_key = saved_state['prev_key'].view( + bsz * self.num_heads, -1, self.head_dim) if static_kv: k = prev_key else: k = torch.cat((prev_key, k), dim=1) if 'prev_value' in saved_state: - prev_value = saved_state['prev_value'].view(bsz * self.num_heads, -1, self.head_dim) + prev_value = saved_state['prev_value'].view( + bsz * self.num_heads, -1, self.head_dim) if static_kv: v = prev_value else: v = torch.cat((prev_value, v), dim=1) - saved_state['prev_key'] = k.view(bsz, self.num_heads, -1, self.head_dim) - saved_state['prev_value'] = v.view(bsz, self.num_heads, -1, self.head_dim) + saved_state['prev_key'] = k.view( + bsz, self.num_heads, -1, self.head_dim) + saved_state['prev_value'] = v.view( + bsz, self.num_heads, -1, self.head_dim) self._set_input_buffer(incremental_state, saved_state) @@ -429,26 +435,31 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No key_padding_mask = None if key_padding_mask is not None: - fil = key_padding_mask.new_ones(key_padding_mask.size(0), src_len-key_padding_mask.size(1)) + fil = key_padding_mask.new_ones( + key_padding_mask.size(0), src_len-key_padding_mask.size(1)) key_padding_mask = torch.cat((key_padding_mask, fil), dim=1) assert key_padding_mask.size(0) == bsz assert key_padding_mask.size(1) == src_len - if self.add_zero_attn: src_len += 1 - k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) - v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) + k = torch.cat( + [k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) + v = torch.cat( + [v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) if attn_mask is not None: - attn_mask = torch.cat([attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) + attn_mask = torch.cat( + [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) if key_padding_mask is not None: key_padding_mask = torch.cat( [key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask)], dim=1) attn_weights = torch.bmm(q, k.transpose(1, 2)) - attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) + attn_weights = self.apply_sparse_mask( + attn_weights, tgt_len, src_len, bsz) - assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] + assert list(attn_weights.size()) == [ + bsz * self.num_heads, tgt_len, src_len] if attn_mask is not None: attn_mask = attn_mask.unsqueeze(0) @@ -457,7 +468,8 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No attn_weights += attn_mask if key_padding_mask is not None: - attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights.view( + bsz, self.num_heads, tgt_len, src_len) if self.onnx_trace: attn_weights = torch.where( key_padding_mask.unsqueeze(1).unsqueeze(2), @@ -469,28 +481,33 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No key_padding_mask.unsqueeze(1).unsqueeze(2), float('-inf'), ) - attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) + attn_weights = attn_weights.view( + bsz * self.num_heads, tgt_len, src_len) attn_weights = utils.softmax( attn_weights, dim=-1, onnx_trace=self.onnx_trace, ).type_as(attn_weights) - attn_weights = F.dropout(attn_weights, p=self.dropout, training=self.training) + attn_weights = F.dropout( + attn_weights, p=self.dropout, training=self.training) attn = torch.bmm(attn_weights, v) - assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] + assert list(attn.size()) == [ + bsz * self.num_heads, tgt_len, self.head_dim] if (self.onnx_trace and attn.size(1) == 1): # when ONNX tracing a single decoder step (sequence length == 1) # the transpose is a no-op copy before view, thus unnecessary attn = attn.contiguous().view(tgt_len, bsz, self.qkv_dim) else: - attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, self.qkv_dim) + attn = attn.transpose(0, 1).contiguous().view( + tgt_len, bsz, self.qkv_dim) attn = self.out_proj(attn) if need_weights: # average attention weights over heads - attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attn_weights = attn_weights.view( + bsz, self.num_heads, tgt_len, src_len) attn_weights = attn_weights.sum(dim=1) / self.num_heads else: @@ -547,7 +564,7 @@ def reorder_incremental_state(self, incremental_state, new_order): self._set_input_buffer(incremental_state, input_buffer) def _get_input_buffer(self, incremental_state): - return get_incremental_state( #utils. + return get_incremental_state( # utils. self, incremental_state, 'attn_state', @@ -578,7 +595,8 @@ def __repr__(self): child_lines.append('(' + key + '): ' + mod_str) lines = extra_lines + child_lines - main_str = self._get_name() + '\tnum_heads:' + str(self.num_heads) + '\t qkv_dim:' + str(self.qkv_dim) + main_str = self._get_name() + '\tnum_heads:' + str(self.num_heads) + \ + '\t qkv_dim:' + str(self.qkv_dim) if lines: # simple one-liner info, which most builtin Modules will use if len(extra_lines) == 1 and not child_lines: diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py index 75d990f26dd..554b7619a7a 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -31,7 +31,8 @@ from fairseq import libbleu except ImportError as e: import sys - sys.stderr.write('ERROR: missing libbleu.so. run `pip install --editable .`\n') + sys.stderr.write( + 'ERROR: missing libbleu.so. run `pip install --editable .`\n') raise e @@ -124,12 +125,13 @@ def result_string(self, order=4): self.stat.predlen, self.stat.reflen) -def get_bleu_score(args,ref,sys): +def get_bleu_score(args, ref, sys): dict = dictionary.Dictionary() - order =4 + order = 4 sacrebleu = False sentence_bleu = False ignore_case = False + def readlines(fd): for line in fd.readlines(): if ignore_case: @@ -137,7 +139,6 @@ def readlines(fd): else: yield line - if sentence_bleu: def score(fdsys): with open(ref) as fdref: @@ -159,7 +160,6 @@ def score(fdsys): print(scorer.result_string(order)) return(scorer.score(order)) - if sys == '-': score = score(sys.stdin) else: @@ -167,11 +167,12 @@ def score(fdsys): score = score(f) return score -def compute_bleu(config,dataset_path,checkpoint_path): + +def compute_bleu(config, dataset_path, checkpoint_path): parser = options.get_generation_parser() - args = options.parse_args_and_arch(parser,[dataset_path]) + args = options.parse_args_and_arch(parser, [dataset_path]) args.data = dataset_path args.beam = 5 @@ -184,7 +185,6 @@ def compute_bleu(config,dataset_path,checkpoint_path): utils.import_user_module(args) max_tokens = 12000 - use_cuda = torch.cuda.is_available() and not args.cpu # when running on CPU, use fp32 as default @@ -204,14 +204,13 @@ def compute_bleu(config,dataset_path,checkpoint_path): src_dict = None tgt_dict = task.target_dictionary - # Load ensemble print('| loading model(s) from {}'.format(args.path)) model = TransformerSuperNetwork(task) - state = torch.load(checkpoint_path,map_location=torch.device('cpu')) + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) model.load_state_dict(state['model'], - strict=True) + strict=True) if use_cuda: model.cuda() @@ -250,13 +249,13 @@ def compute_bleu(config,dataset_path,checkpoint_path): # Initialize generator gen_timer = StopwatchMeter() - generator = task.build_generator([model],args) + generator = task.build_generator([model], args) num_sentences = 0 has_target = True decoder_times_all = [] input_len_all = [] - with open('translations_out.txt','a') as fname_translations: + with open('translations_out.txt', 'a') as fname_translations: with progress_bar.build_progress_bar(args, itr) as t: wps_meter = TimeMeter() for sample in t: @@ -270,8 +269,10 @@ def compute_bleu(config,dataset_path,checkpoint_path): prefix_tokens = sample['target'][:, :args.prefix_size] gen_timer.start() - hypos = task.inference_step(generator, [model], sample, prefix_tokens) - input_len_all.append(np.mean(sample['net_input']['src_lengths'].cpu().numpy())) + hypos = task.inference_step( + generator, [model], sample, prefix_tokens) + input_len_all.append( + np.mean(sample['net_input']['src_lengths'].cpu().numpy())) num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos) gen_timer.stop(num_generated_tokens) @@ -279,32 +280,40 @@ def compute_bleu(config,dataset_path,checkpoint_path): has_target = sample['target'] is not None # Remove padding - src_tokens = utils.strip_pad(sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) + src_tokens = utils.strip_pad( + sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) target_tokens = None if has_target: - target_tokens = utils.strip_pad(sample['target'][i, :], tgt_dict.pad()).int().cpu() + target_tokens = utils.strip_pad( + sample['target'][i, :], tgt_dict.pad()).int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: - src_str = task.dataset(args.gen_subset).src.get_original_text(sample_id) - target_str = task.dataset(args.gen_subset).tgt.get_original_text(sample_id) + src_str = task.dataset( + args.gen_subset).src.get_original_text(sample_id) + target_str = task.dataset( + args.gen_subset).tgt.get_original_text(sample_id) else: if src_dict is not None: - src_str = src_dict.string(src_tokens, args.remove_bpe) + src_str = src_dict.string( + src_tokens, args.remove_bpe) else: src_str = "" if has_target: - target_str = tgt_dict.string(target_tokens, args.remove_bpe, escape_unk=True) + target_str = tgt_dict.string( + target_tokens, args.remove_bpe, escape_unk=True) if not args.quiet: if src_dict is not None: #print('S-{}\t{}'.format(sample_id, src_str)) - fname_translations.write('S-{}\t{}'.format(sample_id, src_str)) + fname_translations.write( + 'S-{}\t{}'.format(sample_id, src_str)) fname_translations.write('\n') if has_target: #print('T-{}\t{}'.format(sample_id, target_str)) - fname_translations.write('T-{}\t{}'.format(sample_id, target_str)) + fname_translations.write( + 'T-{}\t{}'.format(sample_id, target_str)) fname_translations.write('\n') # Process top predictions @@ -312,29 +321,32 @@ def compute_bleu(config,dataset_path,checkpoint_path): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, - alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, + alignment=hypo['alignment'].int().cpu( + ) if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) if not args.quiet: - - fname_translations.write('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) + + fname_translations.write( + 'H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) fname_translations.write('\n') fname_translations.write('P-{}\t{}'.format( - sample_id, - ' '.join(map( - lambda x: '{:.4f}'.format(x), - hypo['positional_scores'].tolist(), - )) + sample_id, + ' '.join(map( + lambda x: '{:.4f}'.format(x), + hypo['positional_scores'].tolist(), )) + )) fname_translations.write('\n') if args.print_alignment: fname_translations.write('A-{}\t{}'.format( sample_id, - ' '.join(map(lambda x: str(utils.item(x)), alignment)) + ' '.join( + map(lambda x: str(utils.item(x)), alignment)) )) fname_translations.write('\n') @@ -342,10 +354,11 @@ def compute_bleu(config,dataset_path,checkpoint_path): t.log({'wps': round(wps_meter.avg)}) num_sentences += sample['nsentences'] - - os.system("grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt") - os.system("grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt") - bleu_score = get_bleu_score(args,"ref.txt","sys.txt") + os.system( + "grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt") + os.system( + "grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt") + bleu_score = get_bleu_score(args, "ref.txt", "sys.txt") print(bleu_score) os.system("rm ref.txt") @@ -353,10 +366,11 @@ def compute_bleu(config,dataset_path,checkpoint_path): os.system("rm translations_out.txt") return bleu_score -def compute_latency(config,dataset_path,get_model_parameters=False): + +def compute_latency(config, dataset_path, get_model_parameters=False): parser = options.get_generation_parser() - args = options.parse_args_and_arch(parser,[dataset_path]) + args = options.parse_args_and_arch(parser, [dataset_path]) args.data = dataset_path args.beam = 5 @@ -368,16 +382,16 @@ def compute_latency(config,dataset_path,get_model_parameters=False): args.batch_size = 128 utils.import_user_module(args) max_tokens = 12000 - args.latgpu=False - args.latcpu=True - args.latiter=100 + args.latgpu = False + args.latcpu = True + args.latiter = 100 # Initialize CUDA and distributed training if torch.cuda.is_available() and not args.cpu: torch.cuda.set_device(args.device_id) torch.manual_seed(args.seed) - #Optimize ensemble for generation + # Optimize ensemble for generation # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args.gen_subset) @@ -388,7 +402,6 @@ def compute_latency(config,dataset_path,get_model_parameters=False): src_dict = None tgt_dict = task.target_dictionary - # Load ensemble print('| loading model(s) from {}'.format(args.path)) model = TransformerSuperNetwork(task) @@ -399,48 +412,51 @@ def compute_latency(config,dataset_path,get_model_parameters=False): dummy_sentence_length = dummy_sentence_length_dict['wmt'] - dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1) dummy_prev = [7] * (dummy_sentence_length - 1) + [2] - src_tokens_test = torch.tensor([dummy_src_tokens], dtype=torch.long)#.cuda() - src_lengths_test = torch.tensor([dummy_sentence_length])#.cuda() - prev_output_tokens_test_with_beam = torch.tensor([dummy_prev] * args.beam, dtype=torch.long)#.cuda() + src_tokens_test = torch.tensor( + [dummy_src_tokens], dtype=torch.long) # .cuda() + src_lengths_test = torch.tensor([dummy_sentence_length]) # .cuda() + prev_output_tokens_test_with_beam = torch.tensor( + [dummy_prev] * args.beam, dtype=torch.long) # .cuda() bsz = 1 - new_order = torch.arange(bsz).view(-1, 1).repeat(1, args.beam).view(-1).long()#.cuda() + new_order = torch.arange(bsz).view(-1, 1).repeat(1, + args.beam).view(-1).long() # .cuda() if args.latcpu: model.cpu() print('Measuring model latency on CPU for dataset generation...') elif args.latgpu: model.cuda() - src_tokens_test = src_tokens_test#.cuda() - src_lengths_test = src_lengths_test#.cuda() - prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam#.cuda() + src_tokens_test = src_tokens_test # .cuda() + src_lengths_test = src_lengths_test # .cuda() + prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam # .cuda() print('Measuring model latency on GPU for dataset generation...') start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) - model.set_sample_config(config) - + model.eval() - + with torch.no_grad(): # dry runs for _ in range(15): - encoder_out_test = model.encoder(src_tokens=src_tokens_test, src_lengths=src_lengths_test) + encoder_out_test = model.encoder( + src_tokens=src_tokens_test, src_lengths=src_lengths_test) encoder_latencies = [] print('Measuring encoder for dataset generation...') for _ in range(args.latiter): if args.latgpu: - #start.record() + # start.record() start = time.time() elif args.latcpu: start = time.time() - model.encoder(src_tokens=src_tokens_test, src_lengths=src_lengths_test) + model.encoder(src_tokens=src_tokens_test, + src_lengths=src_lengths_test) if args.latgpu: end = time.time() @@ -450,16 +466,18 @@ def compute_latency(config,dataset_path,get_model_parameters=False): encoder_latencies.append((end - start) * 1000) encoder_latencies.sort() - encoder_latencies = encoder_latencies[int(args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] - print(f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; \t Std: {np.std(encoder_latencies)} ms') - + encoder_latencies = encoder_latencies[int( + args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] + print( + f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; \t Std: {np.std(encoder_latencies)} ms') - encoder_out_test_with_beam = model.encoder.reorder_encoder_out(encoder_out_test, new_order) + encoder_out_test_with_beam = model.encoder.reorder_encoder_out( + encoder_out_test, new_order) # dry runs for _ in range(15): model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam, - encoder_out=encoder_out_test_with_beam) + encoder_out=encoder_out_test_with_beam) # decoder is more complicated because we need to deal with incremental states and auto regressive things decoder_iterations_dict = {'iwslt': 23, 'wmt': 30} @@ -471,13 +489,13 @@ def compute_latency(config,dataset_path,get_model_parameters=False): for _ in range(args.latiter): if args.latgpu: start = time.time() - #start.record() + # start.record() elif args.latcpu: start = time.time() incre_states = {} for k_regressive in range(decoder_iterations): model.decoder(prev_output_tokens=prev_output_tokens_test_with_beam[:, :k_regressive + 1], - encoder_out=encoder_out_test_with_beam, incremental_state=incre_states) + encoder_out=encoder_out_test_with_beam, incremental_state=incre_states) if args.latgpu: end = time.time() decoder_latencies.append((end - start) * 1000) @@ -488,10 +506,12 @@ def compute_latency(config,dataset_path,get_model_parameters=False): # only use the 10% to 90% latencies to avoid outliers decoder_latencies.sort() - decoder_latencies = decoder_latencies[int(args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] + decoder_latencies = decoder_latencies[int( + args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] print(decoder_latencies) - print(f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms') + print( + f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms') lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies) lat_std = np.std(encoder_latencies)+np.std(decoder_latencies) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py index 58f29a94aa3..a47837bb36f 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py @@ -11,7 +11,8 @@ BaseFairseqModel ) -from fairseq.modules import (PositionalEmbedding,SinusoidalPositionalEmbedding) +from fairseq.modules import ( + PositionalEmbedding, SinusoidalPositionalEmbedding) from .modules_supernetwork import ( MultiheadAttentionSuper, @@ -24,6 +25,8 @@ DEFAULT_MAX_SOURCE_POSITIONS = 1024 DEFAULT_MAX_TARGET_POSITIONS = 1024 + + class TransformerSuperNetwork(BaseFairseqModel): """ Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017) @@ -41,32 +44,33 @@ class TransformerSuperNetwork(BaseFairseqModel): :prog: """ - - def __init__(self,task): + def __init__(self, task): super().__init__() src_dict, tgt_dict = task.source_dictionary, task.target_dictionary - encoder_config ={'encoder_embed_dim': 640, - 'encoder_layers': 6, - 'encoder_attention_heads': 8, - 'encoder_ffn_embed_dim':3072, - 'encoder_embed_path': None} + encoder_config = {'encoder_embed_dim': 640, + 'encoder_layers': 6, + 'encoder_attention_heads': 8, + 'encoder_ffn_embed_dim': 3072, + 'encoder_embed_path': None} - decoder_config ={'decoder_embed_dim': 640, - 'decoder_layers': 6, - 'decoder_attention_heads': 8, - 'decoder_ffn_embed_dim':3072} + decoder_config = {'decoder_embed_dim': 640, + 'decoder_layers': 6, + 'decoder_attention_heads': 8, + 'decoder_ffn_embed_dim': 3072} encoder_embed_tokens = self.build_embedding( src_dict, encoder_config['encoder_embed_dim'], encoder_config['encoder_embed_path'] - ) + ) decoder_embed_tokens = encoder_embed_tokens self.share_decoder_input_output_embed = True - self.encoder = TransformerEncoder(encoder_config, src_dict, encoder_embed_tokens) - self.decoder = TransformerDecoder(decoder_config, tgt_dict, decoder_embed_tokens) + self.encoder = TransformerEncoder( + encoder_config, src_dict, encoder_embed_tokens) + self.decoder = TransformerDecoder( + decoder_config, tgt_dict, decoder_embed_tokens) - def build_embedding(self,dictionary, embed_dim, path=None): + def build_embedding(self, dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) @@ -98,7 +102,7 @@ def get_sampled_params_numel(self, config): def set_sample_config(self, config): self.encoder.set_sample_config(config) self.decoder.set_sample_config(config) - + class TransformerEncoder(FairseqEncoder): """ @@ -115,9 +119,11 @@ def __init__(self, encoder_config, dictionary, embed_tokens): super().__init__(dictionary) # the configs of super arch self.super_embed_dim = encoder_config['encoder_embed_dim'] - self.super_ffn_embed_dim = [encoder_config['encoder_ffn_embed_dim']] * encoder_config['encoder_layers'] + self.super_ffn_embed_dim = [ + encoder_config['encoder_ffn_embed_dim']] * encoder_config['encoder_layers'] self.super_layer_num = encoder_config['encoder_layers'] - self.super_self_attention_heads = [encoder_config['encoder_attention_heads']] * encoder_config['encoder_layers'] + self.super_self_attention_heads = [ + encoder_config['encoder_attention_heads']] * encoder_config['encoder_layers'] self.super_dropout = 0.3 self.super_activation_dropout = 0 @@ -141,11 +147,11 @@ def __init__(self, encoder_config, dictionary, embed_tokens): self.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS self.embed_tokens = embed_tokens - + self.embed_positions = PositionalEmbedding( self.max_source_positions, self.super_embed_dim, self.padding_idx, - learned= False, - ) + learned=False, + ) self.layers = nn.ModuleList([]) self.layers.extend([ @@ -158,10 +164,9 @@ def __init__(self, encoder_config, dictionary, embed_tokens): else: self.layer_norm = None - self.vocab_original_scaling = False - + self.vocab_original_scaling = False - def set_sample_config(self, config:dict): + def set_sample_config(self, config: dict): self.sample_embed_dim = config['encoder']['encoder_embed_dim'] @@ -173,15 +178,20 @@ def set_sample_config(self, config:dict): # Caution: this is a list for all layers self.sample_self_attention_heads = config['encoder']['encoder_self_attention_heads'] - self.sample_dropout = calc_dropout(self.super_dropout, self.sample_embed_dim, self.super_embed_dim) - self.sample_activation_dropout = calc_dropout(self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim) + self.sample_dropout = calc_dropout( + self.super_dropout, self.sample_embed_dim, self.super_embed_dim) + self.sample_activation_dropout = calc_dropout( + self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim) - self.sample_embed_scale = math.sqrt(self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale + self.sample_embed_scale = math.sqrt( + self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale - self.embed_tokens.set_sample_config(sample_embed_dim=self.sample_embed_dim, part='encoder') + self.embed_tokens.set_sample_config( + sample_embed_dim=self.sample_embed_dim, part='encoder') if self.layer_norm is not None: - self.layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + self.layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) for i, layer in enumerate(self.layers): # not exceed sample layer number @@ -189,14 +199,14 @@ def set_sample_config(self, config:dict): layer.set_sample_config(is_identity_layer=False, sample_embed_dim=self.sample_embed_dim, sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i], - sample_self_attention_heads_this_layer=self.sample_self_attention_heads[i], + sample_self_attention_heads_this_layer=self.sample_self_attention_heads[ + i], sample_dropout=self.sample_dropout, sample_activation_dropout=self.sample_activation_dropout) # exceeds sample layer number else: layer.set_sample_config(is_identity_layer=True) - def forward(self, src_tokens, src_lengths): """ Args: @@ -213,7 +223,8 @@ def forward(self, src_tokens, src_lengths): padding elements of shape `(batch, src_len)` """ # embed tokens and positions - x = self.sample_embed_scale * self.embed_tokens(src_tokens, part='encoder') + x = self.sample_embed_scale * \ + self.embed_tokens(src_tokens, part='encoder') if self.embed_positions is not None: positions = self.embed_positions(src_tokens) @@ -236,14 +247,13 @@ def forward(self, src_tokens, src_lengths): x = layer(x, encoder_padding_mask) all_x.append(x) - if self.layer_norm: x = self.layer_norm(x) return { - 'encoder_out': x, - 'encoder_out_all' : all_x, - 'encoder_padding_mask': encoder_padding_mask, + 'encoder_out': x, + 'encoder_out_all': all_x, + 'encoder_padding_mask': encoder_padding_mask, } def reorder_encoder_out(self, encoder_out, new_order): @@ -267,7 +277,8 @@ def reorder_encoder_out(self, encoder_out, new_order): if 'encoder_out_all' in encoder_out.keys(): new_encoder_out_all = [] for encoder_out_one_layer in encoder_out['encoder_out_all']: - new_encoder_out_all.append(encoder_out_one_layer.index_select(1, new_order)) + new_encoder_out_all.append( + encoder_out_one_layer.index_select(1, new_order)) encoder_out['encoder_out_all'] = new_encoder_out_all return encoder_out @@ -284,10 +295,12 @@ def upgrade_state_dict_named(self, state_dict, name): weights_key = '{}.embed_positions.weights'.format(name) if weights_key in state_dict: del state_dict[weights_key] - state_dict['{}.embed_positions._float_tensor'.format(name)] = torch.FloatTensor(1) + state_dict['{}.embed_positions._float_tensor'.format( + name)] = torch.FloatTensor(1) for i in range(len(self.layers)): # update layer norms - self.layers[i].upgrade_state_dict_named(state_dict, "{}.layers.{}".format(name, i)) + self.layers[i].upgrade_state_dict_named( + state_dict, "{}.layers.{}".format(name, i)) version_key = '{}.version'.format(name) if utils.item(state_dict.get(version_key, torch.Tensor([1]))[0]) < 2: @@ -316,11 +329,16 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal # the configs of super arch self.super_embed_dim = decoder_config['decoder_embed_dim'] - self.super_ffn_embed_dim = decoder_config['decoder_ffn_embed_dim'] * decoder_config['decoder_layers'] + self.super_ffn_embed_dim = decoder_config['decoder_ffn_embed_dim'] * \ + decoder_config['decoder_layers'] self.super_layer_num = decoder_config['decoder_layers'] - self.super_self_attention_heads = 8*[decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers'] - self.super_ende_attention_heads = [decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers'] - self.super_arbitrary_ende_attn = [-1] * decoder_config['decoder_layers'] + self.super_self_attention_heads = 8 * \ + [decoder_config['decoder_attention_heads']] * \ + decoder_config['decoder_layers'] + self.super_ende_attention_heads = [ + decoder_config['decoder_attention_heads']] * decoder_config['decoder_layers'] + self.super_arbitrary_ende_attn = [-1] * \ + decoder_config['decoder_layers'] self.super_dropout = 0.3 self.super_activation_dropout = 0.0 @@ -340,7 +358,6 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal self.sample_embed_scale = None - # the configs of current sampled arch self.register_buffer('version', torch.Tensor([3])) @@ -353,15 +370,15 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal self.embed_tokens = embed_tokens - self.embed_positions = PositionalEmbedding( self.max_target_positions, self.super_embed_dim, padding_idx, learned=False, - ) if not False else None + ) if not False else None self.layers = nn.ModuleList([]) self.layers.extend([ - TransformerDecoderLayer(decoder_config, layer_idx=i, no_encoder_attn=no_encoder_attn) + TransformerDecoderLayer( + decoder_config, layer_idx=i, no_encoder_attn=no_encoder_attn) for i in range(self.super_layer_num) ]) @@ -370,7 +387,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \ if self.super_embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None - if False:# args.adaptive_softmax_cutoff is not None: + if False: # args.adaptive_softmax_cutoff is not None: self.adaptive_softmax = AdaptiveSoftmax( len(dictionary), self.output_embed_dim, @@ -381,15 +398,17 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal tie_proj=args.tie_adaptive_proj, ) elif not self.share_input_output_embed: - self.embed_out = nn.Parameter(torch.Tensor(len(dictionary), self.output_embed_dim)) - nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim ** -0.5) + self.embed_out = nn.Parameter(torch.Tensor( + len(dictionary), self.output_embed_dim)) + nn.init.normal_(self.embed_out, mean=0, + std=self.output_embed_dim ** -0.5) self.layer_norm = None self.get_attn = False self.vocab_original_scaling = False - def set_sample_config(self, config:dict): + def set_sample_config(self, config: dict): self.sample_embed_dim = config['decoder']['decoder_embed_dim'] self.sample_encoder_embed_dim = config['encoder']['encoder_embed_dim'] @@ -407,15 +426,20 @@ def set_sample_config(self, config:dict): self.sample_layer_num = config['decoder']['decoder_layer_num'] - self.sample_dropout = calc_dropout(self.super_dropout, self.sample_embed_dim, self.super_embed_dim) - self.sample_activation_dropout = calc_dropout(self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim) + self.sample_dropout = calc_dropout( + self.super_dropout, self.sample_embed_dim, self.super_embed_dim) + self.sample_activation_dropout = calc_dropout( + self.super_activation_dropout, self.sample_embed_dim, self.super_embed_dim) - self.sample_embed_scale = math.sqrt(self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale + self.sample_embed_scale = math.sqrt( + self.sample_embed_dim) if not self.vocab_original_scaling else self.super_embed_scale - self.embed_tokens.set_sample_config(sample_embed_dim=self.sample_embed_dim, part='decoder') + self.embed_tokens.set_sample_config( + sample_embed_dim=self.sample_embed_dim, part='decoder') if self.layer_norm is not None: - self.layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + self.layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) for i, layer in enumerate(self.layers): # not exceed sample layer number @@ -424,16 +448,16 @@ def set_sample_config(self, config:dict): sample_embed_dim=self.sample_embed_dim, sample_encoder_embed_dim=self.sample_encoder_embed_dim, sample_ffn_embed_dim_this_layer=self.sample_ffn_embed_dim[i], - sample_self_attention_heads_this_layer=self.sample_self_attention_heads[i], - sample_ende_attention_heads_this_layer=self.sample_ende_attention_heads[i], + sample_self_attention_heads_this_layer=self.sample_self_attention_heads[ + i], + sample_ende_attention_heads_this_layer=self.sample_ende_attention_heads[ + i], sample_dropout=self.sample_dropout, sample_activation_dropout=self.sample_activation_dropout) # exceeds sample layer number else: layer.set_sample_config(is_identity_layer=True) - - def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused): """ Args: @@ -449,7 +473,8 @@ def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, - the decoder's output of shape `(batch, tgt_len, vocab)` - a dictionary with any model-specific outputs """ - x, extra = self.extract_features(prev_output_tokens, encoder_out, incremental_state) + x, extra = self.extract_features( + prev_output_tokens, encoder_out, incremental_state) x = self.output_layer(x) return x, extra @@ -478,7 +503,8 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta positions = positions[:, -1:] # embed tokens and positions - x = self.sample_embed_scale * self.embed_tokens(prev_output_tokens, part='decoder') + x = self.sample_embed_scale * \ + self.embed_tokens(prev_output_tokens, part='decoder') if positions is not None: x += positions @@ -501,36 +527,41 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta encoder_out_feed = encoder_out['encoder_out'] # concat one second last output layer elif self.sample_arbitrary_ende_attn[i] == 1: - encoder_out_feed = torch.cat([encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0) + encoder_out_feed = torch.cat( + [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0) elif self.sample_arbitrary_ende_attn[i] == 2: - encoder_out_feed = torch.cat([encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2], encoder_out['encoder_out_all'][-3]], dim=0) + encoder_out_feed = torch.cat( + [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2], encoder_out['encoder_out_all'][-3]], dim=0) else: - raise NotImplementedError("arbitrary_ende_attn should in [-1, 1, 2]") + raise NotImplementedError( + "arbitrary_ende_attn should in [-1, 1, 2]") if encoder_out['encoder_padding_mask'] is not None: if i >= self.sample_layer_num or self.sample_arbitrary_ende_attn[i] == -1: encoder_padding_mask_feed = encoder_out['encoder_padding_mask'] # concat one more elif self.sample_arbitrary_ende_attn[i] == 1: - encoder_padding_mask_feed = torch.cat([encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1) + encoder_padding_mask_feed = torch.cat( + [encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1) # concat two more elif self.sample_arbitrary_ende_attn[i] == 2: - encoder_padding_mask_feed = torch.cat([encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1) + encoder_padding_mask_feed = torch.cat( + [encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1) else: - raise NotImplementedError("arbitrary_ende_attn should in [-1, 1, 2]") - + raise NotImplementedError( + "arbitrary_ende_attn should in [-1, 1, 2]") x, attn = layer( x, encoder_out_feed, encoder_padding_mask_feed, incremental_state, - self_attn_mask=self.buffered_future_mask(x) if incremental_state is None else None, + self_attn_mask=self.buffered_future_mask( + x) if incremental_state is None else None, ) inner_states.append(x) attns.append(attn) - if self.layer_norm: x = self.layer_norm(x) @@ -558,13 +589,15 @@ def max_positions(self): """Maximum output length supported by the decoder.""" if self.embed_positions is None: return self.max_target_positions - import ipdb;ipdb.set_trace() + import ipdb + ipdb.set_trace() return min(self.max_target_positions, self.embed_positions.max_positions()) def buffered_future_mask(self, tensor): dim = tensor.size(0) if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device or self._future_mask.size(0) < dim: - self._future_mask = torch.triu(utils.fill_with_neg_inf(tensor.new(dim, dim)), 1) + self._future_mask = torch.triu( + utils.fill_with_neg_inf(tensor.new(dim, dim)), 1) return self._future_mask[:dim, :dim] def upgrade_state_dict_named(self, state_dict, name): @@ -573,7 +606,8 @@ def upgrade_state_dict_named(self, state_dict, name): weights_key = '{}.embed_positions.weights'.format(name) if weights_key in state_dict: del state_dict[weights_key] - state_dict['{}.embed_positions._float_tensor'.format(name)] = torch.FloatTensor(1) + state_dict['{}.embed_positions._float_tensor'.format( + name)] = torch.FloatTensor(1) for i in range(len(self.layers)): # update layer norms @@ -584,9 +618,11 @@ def upgrade_state_dict_named(self, state_dict, name): } for old, new in layer_norm_map.items(): for m in ('weight', 'bias'): - k = '{}.layers.{}.layer_norms.{}.{}'.format(name, i, old, m) + k = '{}.layers.{}.layer_norms.{}.{}'.format( + name, i, old, m) if k in state_dict: - state_dict['{}.layers.{}.{}.{}'.format(name, i, new, m)] = state_dict[k] + state_dict['{}.layers.{}.{}.{}'.format( + name, i, new, m)] = state_dict[k] del state_dict[k] version_key = '{}.version'.format(name) @@ -623,7 +659,7 @@ def __init__(self, encoder_config, layer_idx): self.super_self_attention_heads_this_layer = encoder_config['encoder_attention_heads'] self.super_dropout = 0.3 - self.super_activation_dropout =0 + self.super_activation_dropout = 0 # the configs of current sampled arch self.sample_embed_dim = None @@ -635,8 +671,7 @@ def __init__(self, encoder_config, layer_idx): self.is_identity_layer = None - self.qkv_dim= 512 - + self.qkv_dim = 512 self.self_attn = MultiheadAttentionSuper( super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer, is_encoder=True, @@ -650,11 +685,12 @@ def __init__(self, encoder_config, layer_idx): ) self.normalize_before = False - self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer, uniform_=None, non_linear='relu') #init.uniform_ - self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear') + self.fc1 = LinearSuper(super_in_dim=self.super_embed_dim, super_out_dim=self.super_ffn_embed_dim_this_layer, + uniform_=None, non_linear='relu') # init.uniform_ + self.fc2 = LinearSuper(super_in_dim=self.super_ffn_embed_dim_this_layer, + super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear') self.final_layer_norm = LayerNormSuper(self.super_embed_dim) - def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None): if is_identity_layer: @@ -670,15 +706,19 @@ def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn self.sample_dropout = sample_dropout self.sample_activation_dropout = sample_activation_dropout - self.self_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) - - self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer) + self.self_attn_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) - self.fc1.set_sample_config(sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) - self.fc2.set_sample_config(sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim) + self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, + sample_attention_heads=self.sample_self_attention_heads_this_layer) - self.final_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + self.fc1.set_sample_config( + sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) + self.fc2.set_sample_config( + sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim) + self.final_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) def upgrade_state_dict_named(self, state_dict, name): """ @@ -728,15 +768,17 @@ def forward(self, x, encoder_padding_mask, attn_mask=None): # will become -inf, which results in NaN in model parameters # TODO: to formally solve this problem, we need to change fairseq's # MultiheadAttention. We will do this later on. - x, _ = self.self_attn(query=x, key=x, value=x, key_padding_mask=encoder_padding_mask) + x, _ = self.self_attn(query=x, key=x, value=x, + key_padding_mask=encoder_padding_mask) x = F.dropout(x, p=self.dropout, training=self.training) - x[:residual.size(0),:,:] = residual + x[:residual.size(0),:,:] + x[:residual.size(0), :, :] = residual + x[:residual.size(0), :, :] x = self.maybe_layer_norm(self.self_attn_layer_norm, x, after=True) residual = x x = self.maybe_layer_norm(self.final_layer_norm, x, before=True) x = self.activation_fn(self.fc1(x)) - x = F.dropout(x, p=self.sample_activation_dropout, training=self.training) + x = F.dropout(x, p=self.sample_activation_dropout, + training=self.training) x = self.fc2(x) x = F.dropout(x, p=self.sample_dropout, training=self.training) x = residual + x @@ -793,7 +835,6 @@ def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv self.qkv_dim = 512 self.layer_idx = layer_idx - self.self_attn = MultiheadAttentionSuper( is_encoder=False, super_embed_dim=self.super_embed_dim, @@ -841,7 +882,6 @@ def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv self.onnx_trace = False - def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_encoder_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_ende_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None): if is_identity_layer: @@ -859,18 +899,23 @@ def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_enc self.sample_dropout = sample_dropout self.sample_activation_dropout = sample_activation_dropout + self.self_attn_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) + self.encoder_attn_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) - self.self_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) - self.encoder_attn_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, + sample_attention_heads=self.sample_self_attention_heads_this_layer) + self.encoder_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_kv_embed_dim=self.sample_encoder_embed_dim, + sample_attention_heads=self.sample_ende_attention_heads_this_layer) - self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer) - self.encoder_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_kv_embed_dim=self.sample_encoder_embed_dim, sample_attention_heads=self.sample_ende_attention_heads_this_layer) - - self.fc1.set_sample_config(sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) - self.fc2.set_sample_config(sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim) - - self.final_layer_norm.set_sample_config(sample_embed_dim=self.sample_embed_dim) + self.fc1.set_sample_config( + sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) + self.fc2.set_sample_config( + sample_in_dim=self.sample_ffn_embed_dim_this_layer, sample_out_dim=self.sample_embed_dim) + self.final_layer_norm.set_sample_config( + sample_embed_dim=self.sample_embed_dim) def prepare_for_onnx_export_(self): self.onnx_trace = True @@ -921,13 +966,15 @@ def forward( if self.encoder_attn is not None: residual = x - x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, before=True) + x = self.maybe_layer_norm( + self.encoder_attn_layer_norm, x, before=True) if prev_attn_state is not None: if incremental_state is None: incremental_state = {} prev_key, prev_value = prev_attn_state saved_state = {"prev_key": prev_key, "prev_value": prev_value} - self.encoder_attn._set_input_buffer(incremental_state, saved_state) + self.encoder_attn._set_input_buffer( + incremental_state, saved_state) x, attn = self.encoder_attn( query=x, key=encoder_out, @@ -939,12 +986,14 @@ def forward( ) x = F.dropout(x, p=self.sample_dropout, training=self.training) x = residual + x - x = self.maybe_layer_norm(self.encoder_attn_layer_norm, x, after=True) + x = self.maybe_layer_norm( + self.encoder_attn_layer_norm, x, after=True) residual = x x = self.maybe_layer_norm(self.final_layer_norm, x, before=True) x = self.activation_fn(self.fc1(x)) - x = F.dropout(x, p=self.sample_activation_dropout, training=self.training) + x = F.dropout(x, p=self.sample_activation_dropout, + training=self.training) x = self.fc2(x) x = F.dropout(x, p=self.sample_dropout, training=self.training) x = residual + x @@ -965,28 +1014,36 @@ def maybe_layer_norm(self, layer_norm, x, before=False, after=False): def make_generation_fast_(self, need_attn=False, **kwargs): self.need_attn = need_attn + def calc_dropout(dropout, sample_embed_dim, super_embed_dim): return dropout * 1.0 * sample_embed_dim / super_embed_dim + def Embedding(num_embeddings, embedding_dim, padding_idx): return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx) + def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'): m = nn.Linear(in_features, out_features, bias) - nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(m.weight, non_linear=non_linear) + nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_( + m.weight, non_linear=non_linear) if bias: nn.init.constant_(m.bias, 0.) return m + def calc_dropout(dropout, sample_embed_dim, super_embed_dim): return dropout * 1.0 * sample_embed_dim / super_embed_dim + def Embedding(num_embeddings, embedding_dim, padding_idx): return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx) + def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'): m = nn.Linear(in_features, out_features, bias) - nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_(m.weight, non_linear=non_linear) + nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_( + m.weight, non_linear=non_linear) if bias: nn.init.constant_(m.bias, 0.) return m From 4b05fd240d09a888fc0e1af12f257800490bd28c Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Thu, 17 Nov 2022 22:06:23 -0800 Subject: [PATCH 07/60] Cleanup imports --- neural_compressor/experimental/nas/dynas.py | 2 +- .../machine_translation/transformer_interface.py | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index 7ba96ef169e..7cd280ff6c1 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -75,7 +75,7 @@ def __init__(self, conf_fname_or_obj): 'ofa_resnet50': OFARunner, 'ofa_mbv3_d234_e346_k357_w1.0': OFARunner, 'ofa_mbv3_d234_e346_k357_w1.2': OFARunner, - 'transformer_lt_wmt_en_de': TransformerLTRunner + 'transformer_lt_wmt_en_de': TransformerLTRunner, } self.EVALUATION_INTERFACE = {'ofa_resnet50': EvaluationInterfaceResNet50, diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py index 554b7619a7a..a7ca55a153d 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -3,25 +3,17 @@ """ import torch -from fairseq import checkpoint_utils, options, progress_bar, tasks, utils +from fairseq import options, progress_bar, tasks, utils from fairseq.meters import StopwatchMeter, TimeMeter import sys -import pdb import numpy as np -import subprocess import os from fairseq.data import dictionary -import csv -import json import warnings from .transformer_supernetwork import TransformerSuperNetwork import sys -import logging -import tqdm import time -import copy -from datetime import datetime import ctypes import math warnings.filterwarnings("ignore") From 1e19ff8d88698e5603cbf44a7f420acc9415c1d4 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Thu, 17 Nov 2022 22:12:14 -0800 Subject: [PATCH 08/60] Cleanup imports --- neural_compressor/experimental/nas/dynas.py | 12 ++++---- .../experimental/nas/dynast/dynas_manager.py | 3 +- .../experimental/nas/dynast/dynas_search.py | 6 ++-- .../experimental/nas/dynast/dynas_utils.py | 18 +++++------ .../modules_supernetwork.py | 11 +++---- .../transformer_interface.py | 20 ++++++------- .../transformer_supernetwork.py | 30 ++++++------------- 7 files changed, 43 insertions(+), 57 deletions(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index 7cd280ff6c1..bf8e7482213 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -13,8 +13,8 @@ # limitations under the License. import os -import pandas as pd +import pandas as pd from neural_compressor.conf.config import Conf, NASConfig from neural_compressor.utils import logger @@ -31,12 +31,14 @@ class DyNAS(NASBase): """ def __init__(self, conf_fname_or_obj): - from .dynast.dynas_manager import ParameterManager - from .dynast.dynas_manager import TransformerLTEncoding + from .dynast.dynas_manager import (ParameterManager, + TransformerLTEncoding) from .dynast.dynas_predictor import Predictor - from .dynast.dynas_search import ProblemMultiObjective, SearchAlgoManager + from .dynast.dynas_search import (ProblemMultiObjective, + SearchAlgoManager) from .dynast.dynas_utils import (EvaluationInterfaceMobileNetV3, - EvaluationInterfaceResNet50, EvaluationInterfaceTransformerLT, + EvaluationInterfaceResNet50, + EvaluationInterfaceTransformerLT, OFARunner, TransformerLTRunner) self.ParameterManager = ParameterManager diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py index 61956464e97..6908e8d2d73 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_manager.py +++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py @@ -21,9 +21,8 @@ import numpy as np import pandas as pd -from sklearn.model_selection import train_test_split - from neural_compressor.utils import logger +from sklearn.model_selection import train_test_split class ParameterManager: diff --git a/neural_compressor/experimental/nas/dynast/dynas_search.py b/neural_compressor/experimental/nas/dynast/dynas_search.py index fc3d5123450..e74075aac74 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_search.py +++ b/neural_compressor/experimental/nas/dynast/dynas_search.py @@ -20,15 +20,15 @@ import autograd.numpy as anp import numpy as np import pymoo +from neural_compressor.experimental.nas.dynast.dynas_utils import \ + EvaluationInterface +from neural_compressor.utils import logger from pymoo.algorithms.moo.age import AGEMOEA from pymoo.algorithms.moo.nsga2 import NSGA2 from pymoo.core.problem import Problem from pymoo.factory import get_crossover, get_mutation, get_sampling from pymoo.optimize import minimize -from neural_compressor.experimental.nas.dynast.dynas_utils import EvaluationInterface -from neural_compressor.utils import logger - class SearchAlgoManager: """ diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 2149860ff8e..7940834d49f 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -25,18 +25,18 @@ import numpy as np import ofa from fvcore.nn import FlopCountAnalysis -from ofa.imagenet_classification.data_providers.imagenet import ImagenetDataProvider -from ofa.imagenet_classification.run_manager import ImagenetRunConfig, RunManager -from ofa.tutorial.flops_table import rm_bn_from_net - -from neural_compressor.experimental.nas.dynast.dynas_manager import ParameterManager +from neural_compressor.experimental.nas.dynast.dynas_manager import \ + ParameterManager from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor +from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import ( + compute_bleu, compute_latency) from neural_compressor.utils.utility import LazyImport, logger +from ofa.imagenet_classification.data_providers.imagenet import \ + ImagenetDataProvider +from ofa.imagenet_classification.run_manager import (ImagenetRunConfig, + RunManager) +from ofa.tutorial.flops_table import rm_bn_from_net -from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import ( - compute_bleu, - compute_latency -) torch = LazyImport('torch') torchvision = LazyImport('torchvision') diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py index 61aeb409ae5..19a7f8b2b8f 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py @@ -1,14 +1,11 @@ +from collections import defaultdict + import torch import torch.nn as nn import torch.nn.functional as F - -import numpy as np -from torch.nn.modules.module import _addindent -from torch.nn import Parameter - from fairseq import utils -from collections import defaultdict - +from torch.nn import Parameter +from torch.nn.modules.module import _addindent INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py index a7ca55a153d..8caef91a202 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -1,21 +1,21 @@ """ Translate pre-processed data with a trained model. """ -import torch - -from fairseq import options, progress_bar, tasks, utils -from fairseq.meters import StopwatchMeter, TimeMeter +import ctypes +import math +import os import sys +import time +import warnings + import numpy as np -import os +import torch +from fairseq import options, progress_bar, tasks, utils from fairseq.data import dictionary -import warnings +from fairseq.meters import StopwatchMeter, TimeMeter + from .transformer_supernetwork import TransformerSuperNetwork -import sys -import time -import ctypes -import math warnings.filterwarnings("ignore") diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py index a47837bb36f..edf70234ea4 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py @@ -1,27 +1,15 @@ +import math + import torch -from torch import nn -from torch.nn import Parameter import torch.nn.functional as F - -from fairseq import utils from fairseq import options, utils -from fairseq.models import ( - FairseqEncoder, - FairseqIncrementalDecoder, - BaseFairseqModel -) - -from fairseq.modules import ( - PositionalEmbedding, SinusoidalPositionalEmbedding) -from .modules_supernetwork import ( - - MultiheadAttentionSuper, - EmbeddingSuper, - LinearSuper, - LayerNormSuper - -) -import math +from fairseq.models import (BaseFairseqModel, FairseqEncoder, + FairseqIncrementalDecoder) +from fairseq.modules import PositionalEmbedding, SinusoidalPositionalEmbedding +from torch import nn + +from .modules_supernetwork import (EmbeddingSuper, LayerNormSuper, LinearSuper, + MultiheadAttentionSuper) DEFAULT_MAX_SOURCE_POSITIONS = 1024 DEFAULT_MAX_TARGET_POSITIONS = 1024 From 8d7f56be8a5939c17f6e97e7e795302ae5b5c9f0 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 10:46:46 -0800 Subject: [PATCH 09/60] Replace print with logger --- .../experimental/nas/dynast/dynas_search.py | 2 - .../transformer_interface.py | 45 ++++++------------- .../transformer_supernetwork.py | 4 +- 3 files changed, 17 insertions(+), 34 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_search.py b/neural_compressor/experimental/nas/dynast/dynas_search.py index e74075aac74..6a0a07b7f89 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_search.py +++ b/neural_compressor/experimental/nas/dynast/dynas_search.py @@ -211,7 +211,5 @@ def _evaluate( objective_x_arr.append(objective_x) objective_y_arr.append(objective_y) - print('.', end='', flush=True) - # Update PyMoo with evaluation data out["F"] = anp.column_stack([objective_x_arr, objective_y_arr]) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py index 8caef91a202..c9fce76927d 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -14,6 +14,8 @@ from fairseq.data import dictionary from fairseq.meters import StopwatchMeter, TimeMeter +from neural_compressor.utils import logger + from .transformer_supernetwork import TransformerSuperNetwork warnings.filterwarnings("ignore") @@ -23,8 +25,7 @@ from fairseq import libbleu except ImportError as e: import sys - sys.stderr.write( - 'ERROR: missing libbleu.so. run `pip install --editable .`\n') + logger.error('missing libbleu.so. run `pip install --editable .`') raise e @@ -120,7 +121,6 @@ def result_string(self, order=4): def get_bleu_score(args, ref, sys): dict = dictionary.Dictionary() order = 4 - sacrebleu = False sentence_bleu = False ignore_case = False @@ -140,7 +140,6 @@ def score(fdsys): sys_tok = dict.encode_line(sys_tok) ref_tok = dict.encode_line(ref_tok) scorer.add(ref_tok, sys_tok) - print(i, scorer.result_string(order)) else: def score(fdsys): with open(ref) as fdref: @@ -149,7 +148,6 @@ def score(fdsys): sys_tok = dict.encode_line(sys_tok) ref_tok = dict.encode_line(ref_tok) scorer.add(ref_tok, sys_tok) - print(scorer.result_string(order)) return(scorer.score(order)) if sys == '-': @@ -157,6 +155,7 @@ def score(fdsys): else: with open(sys, 'r') as f: score = score(f) + logger.debug('Achieved BLEU score: {}'.format(score)) return score @@ -175,7 +174,6 @@ def compute_bleu(config, dataset_path, checkpoint_path): args.target_lang = 'de' args.batch_size = 128 utils.import_user_module(args) - max_tokens = 12000 use_cuda = torch.cuda.is_available() and not args.cpu @@ -197,7 +195,7 @@ def compute_bleu(config, dataset_path, checkpoint_path): tgt_dict = task.target_dictionary # Load ensemble - print('| loading model(s) from {}'.format(args.path)) + logger.info('Loading model(s) from {}'.format(args.path)) model = TransformerSuperNetwork(task) state = torch.load(checkpoint_path, map_location=torch.device('cpu')) @@ -206,7 +204,6 @@ def compute_bleu(config, dataset_path, checkpoint_path): if use_cuda: model.cuda() - print(config) model.set_sample_config(config) model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, @@ -217,7 +214,6 @@ def compute_bleu(config, dataset_path, checkpoint_path): if use_cuda: model.cuda() - print(args.path, file=sys.stderr) # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) @@ -297,13 +293,11 @@ def compute_bleu(config, dataset_path, checkpoint_path): if not args.quiet: if src_dict is not None: - #print('S-{}\t{}'.format(sample_id, src_str)) fname_translations.write( 'S-{}\t{}'.format(sample_id, src_str)) fname_translations.write('\n') if has_target: - #print('T-{}\t{}'.format(sample_id, target_str)) fname_translations.write( 'T-{}\t{}'.format(sample_id, target_str)) fname_translations.write('\n') @@ -351,7 +345,6 @@ def compute_bleu(config, dataset_path, checkpoint_path): os.system( "grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt") bleu_score = get_bleu_score(args, "ref.txt", "sys.txt") - print(bleu_score) os.system("rm ref.txt") os.system("rm sys.txt") @@ -373,7 +366,6 @@ def compute_latency(config, dataset_path, get_model_parameters=False): args.target_lang = 'de' args.batch_size = 128 utils.import_user_module(args) - max_tokens = 12000 args.latgpu = False args.latcpu = True args.latiter = 100 @@ -387,15 +379,9 @@ def compute_latency(config, dataset_path, get_model_parameters=False): # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args.gen_subset) - # Set dictionaries - try: - src_dict = getattr(task, 'source_dictionary', None) - except NotImplementedError: - src_dict = None - tgt_dict = task.target_dictionary # Load ensemble - print('| loading model(s) from {}'.format(args.path)) + logger.info('Loading model(s) from {}'.format(args.path)) model = TransformerSuperNetwork(task) # specify the length of the dummy input for profile @@ -417,13 +403,13 @@ def compute_latency(config, dataset_path, get_model_parameters=False): args.beam).view(-1).long() # .cuda() if args.latcpu: model.cpu() - print('Measuring model latency on CPU for dataset generation...') + logger.info('Measuring model latency on CPU for dataset generation...') elif args.latgpu: model.cuda() src_tokens_test = src_tokens_test # .cuda() src_lengths_test = src_lengths_test # .cuda() prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam # .cuda() - print('Measuring model latency on GPU for dataset generation...') + logger.info('Measuring model latency on GPU for dataset generation...') start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) @@ -439,10 +425,9 @@ def compute_latency(config, dataset_path, get_model_parameters=False): src_tokens=src_tokens_test, src_lengths=src_lengths_test) encoder_latencies = [] - print('Measuring encoder for dataset generation...') + logger.info('Measuring encoder for dataset generation...') for _ in range(args.latiter): if args.latgpu: - # start.record() start = time.time() elif args.latcpu: start = time.time() @@ -460,8 +445,8 @@ def compute_latency(config, dataset_path, get_model_parameters=False): encoder_latencies.sort() encoder_latencies = encoder_latencies[int( args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] - print( - f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; \t Std: {np.std(encoder_latencies)} ms') + logger.info( + f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms') encoder_out_test_with_beam = model.encoder.reorder_encoder_out( encoder_out_test, new_order) @@ -475,13 +460,12 @@ def compute_latency(config, dataset_path, get_model_parameters=False): decoder_iterations_dict = {'iwslt': 23, 'wmt': 30} decoder_iterations = decoder_iterations_dict['wmt'] - print(decoder_iterations) decoder_latencies = [] - print('Measuring decoder for dataset generation...') + + logger.info('Measuring decoder for dataset generation...') for _ in range(args.latiter): if args.latgpu: start = time.time() - # start.record() elif args.latcpu: start = time.time() incre_states = {} @@ -501,8 +485,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False): decoder_latencies = decoder_latencies[int( args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] - print(decoder_latencies) - print( + logger.info( f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms') lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py index edf70234ea4..af11dbd3720 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py @@ -8,6 +8,8 @@ from fairseq.modules import PositionalEmbedding, SinusoidalPositionalEmbedding from torch import nn +from neural_compressor.utils import logger + from .modules_supernetwork import (EmbeddingSuper, LayerNormSuper, LinearSuper, MultiheadAttentionSuper) @@ -88,6 +90,7 @@ def get_sampled_params_numel(self, config): return sum(numels) def set_sample_config(self, config): + logger.debug('Setting active configuration to {}'.format(config)) self.encoder.set_sample_config(config) self.decoder.set_sample_config(config) @@ -231,7 +234,6 @@ def forward(self, src_tokens, src_lengths): all_x = [] # encoder layers for layer in self.layers: - # print(x.shape) x = layer(x, encoder_padding_mask) all_x.append(x) From 55e7725d503cb7eef7232bd9c6e8d2868a029674 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 10:50:36 -0800 Subject: [PATCH 10/60] Replace `os.system('rm ...')` with `os.remove` --- .../machine_translation/transformer_interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py index c9fce76927d..bbf7bbfb841 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -346,9 +346,9 @@ def compute_bleu(config, dataset_path, checkpoint_path): "grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt") bleu_score = get_bleu_score(args, "ref.txt", "sys.txt") - os.system("rm ref.txt") - os.system("rm sys.txt") - os.system("rm translations_out.txt") + os.remove("ref.txt") + os.remove("sys.txt") + os.remove("translations_out.txt") return bleu_score From 16cccb4a83923822c14d892fdd2fc634f45b2542 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 11:04:26 -0800 Subject: [PATCH 11/60] Remove unused logs --- .../supernetwork/machine_translation/transformer_interface.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py index bbf7bbfb841..befef3803f1 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -195,7 +195,6 @@ def compute_bleu(config, dataset_path, checkpoint_path): tgt_dict = task.target_dictionary # Load ensemble - logger.info('Loading model(s) from {}'.format(args.path)) model = TransformerSuperNetwork(task) state = torch.load(checkpoint_path, map_location=torch.device('cpu')) @@ -381,7 +380,6 @@ def compute_latency(config, dataset_path, get_model_parameters=False): task.load_dataset(args.gen_subset) # Load ensemble - logger.info('Loading model(s) from {}'.format(args.path)) model = TransformerSuperNetwork(task) # specify the length of the dummy input for profile From cb121e26c5debe453905e1fa51e441435a73377e Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 11:09:10 -0800 Subject: [PATCH 12/60] Make variable names more descriptive --- .../machine_translation/transformer_interface.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py index befef3803f1..a4c4e431959 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -118,7 +118,7 @@ def result_string(self, order=4): self.stat.predlen, self.stat.reflen) -def get_bleu_score(args, ref, sys): +def get_bleu_score(args, reference_sentences_fpath, translated_sentences_fpath): dict = dictionary.Dictionary() order = 4 sentence_bleu = False @@ -133,7 +133,7 @@ def readlines(fd): if sentence_bleu: def score(fdsys): - with open(ref) as fdref: + with open(reference_sentences_fpath) as fdref: scorer = Scorer(dict.pad(), dict.eos(), dict.unk()) for i, (sys_tok, ref_tok) in enumerate(zip(readlines(fdsys), readlines(fdref))): scorer.reset(one_init=True) @@ -142,7 +142,7 @@ def score(fdsys): scorer.add(ref_tok, sys_tok) else: def score(fdsys): - with open(ref) as fdref: + with open(reference_sentences_fpath) as fdref: scorer = Scorer(dict.pad(), dict.eos(), dict.unk()) for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)): sys_tok = dict.encode_line(sys_tok) @@ -150,10 +150,10 @@ def score(fdsys): scorer.add(ref_tok, sys_tok) return(scorer.score(order)) - if sys == '-': + if translated_sentences_fpath == '-': score = score(sys.stdin) else: - with open(sys, 'r') as f: + with open(translated_sentences_fpath, 'r') as f: score = score(f) logger.debug('Achieved BLEU score: {}'.format(score)) return score From 1e9b6b8a9f001a18f8a089142a3c8dfad991bc1c Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 11:51:10 -0800 Subject: [PATCH 13/60] Remove duplicate definitions --- .../transformer_supernetwork.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py index af11dbd3720..083a3492c0c 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py @@ -1020,20 +1020,3 @@ def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='line if bias: nn.init.constant_(m.bias, 0.) return m - - -def calc_dropout(dropout, sample_embed_dim, super_embed_dim): - return dropout * 1.0 * sample_embed_dim / super_embed_dim - - -def Embedding(num_embeddings, embedding_dim, padding_idx): - return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx) - - -def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'): - m = nn.Linear(in_features, out_features, bias) - nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_( - m.weight, non_linear=non_linear) - if bias: - nn.init.constant_(m.bias, 0.) - return m From 322c277432fca3b9512157126c3087a7fd02212e Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 11:52:02 -0800 Subject: [PATCH 14/60] Remove unused code --- .../transformer_supernetwork.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py index 083a3492c0c..69b9ad6ed25 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py @@ -2,7 +2,7 @@ import torch import torch.nn.functional as F -from fairseq import options, utils +from fairseq import utils from fairseq.models import (BaseFairseqModel, FairseqEncoder, FairseqIncrementalDecoder) from fairseq.modules import PositionalEmbedding, SinusoidalPositionalEmbedding @@ -377,17 +377,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \ if self.super_embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None - if False: # args.adaptive_softmax_cutoff is not None: - self.adaptive_softmax = AdaptiveSoftmax( - len(dictionary), - self.output_embed_dim, - options.eval_str_list(args.adaptive_softmax_cutoff, type=int), - dropout=args.adaptive_softmax_dropout, - adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None, - factor=args.adaptive_softmax_factor, - tie_proj=args.tie_adaptive_proj, - ) - elif not self.share_input_output_embed: + if not self.share_input_output_embed: self.embed_out = nn.Parameter(torch.Tensor( len(dictionary), self.output_embed_dim)) nn.init.normal_(self.embed_out, mean=0, @@ -843,7 +833,7 @@ def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv # use layerNorm rather than FusedLayerNorm for exporting. # char_inputs can be used to determint this. # TODO remove this once we update apex with the fix - export = False + self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim) if no_encoder_attn: From abddec7713f9aa00e4dcbcccd6aac5a62fc62fb7 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 11:52:20 -0800 Subject: [PATCH 15/60] Add TODOs --- .../machine_translation/transformer_interface.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py index a4c4e431959..d0f700eaa15 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py @@ -24,7 +24,6 @@ try: from fairseq import libbleu except ImportError as e: - import sys logger.error('missing libbleu.so. run `pip install --editable .`') raise e @@ -155,7 +154,7 @@ def score(fdsys): else: with open(translated_sentences_fpath, 'r') as f: score = score(f) - logger.debug('Achieved BLEU score: {}'.format(score)) + logger.info('Achieved BLEU score: {}'.format(score)) return score @@ -164,7 +163,7 @@ def compute_bleu(config, dataset_path, checkpoint_path): parser = options.get_generation_parser() args = options.parse_args_and_arch(parser, [dataset_path]) - + # TODO(macsz) Un-hardcode args args.data = dataset_path args.beam = 5 args.remove_bpe = '@@ ' @@ -240,7 +239,6 @@ def compute_bleu(config, dataset_path, checkpoint_path): num_sentences = 0 has_target = True - decoder_times_all = [] input_len_all = [] with open('translations_out.txt', 'a') as fname_translations: with progress_bar.build_progress_bar(args, itr) as t: @@ -356,6 +354,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False): args = options.parse_args_and_arch(parser, [dataset_path]) + # TODO(macsz) Un-hardcode args args.data = dataset_path args.beam = 5 args.remove_bpe = '@@ ' From 051ae2626fb6832d6f86ea616b78170084fba4d5 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 11:59:25 -0800 Subject: [PATCH 16/60] Update progress tracking --- neural_compressor/experimental/nas/dynas.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index bf8e7482213..af1903bc0f4 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -152,10 +152,11 @@ def search(self): # Start Lightweight Iterative Neural Architecture Search (LINAS) num_loops = round(self.num_evals/self.population) for loop in range(num_loops): - logger.info( - '[DyNAS-T] Starting LINAS loop {} of {}.'.format(loop+1, num_loops)) - for individual in latest_population: + for i, individual in enumerate(latest_population): + logger.info( + '[DyNAS-T] Starting eval {} of {} in LINAS loop {} of {}.'.format( + i+1, max(self.population - df.shape[0], 0), loop+1, num_loops)) self.validation_interface.eval_subnet(individual) self.create_acc_predictor() From d70c55e4bfbdbae7493d0b5fe96e6de4d388c2aa Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 12:23:29 -0800 Subject: [PATCH 17/60] Move supernetwork dir under dynast dir --- neural_compressor/experimental/nas/dynas.py | 4 ---- neural_compressor/experimental/nas/dynast/dynas_utils.py | 2 +- .../supernetwork/machine_translation/modules_supernetwork.py | 0 .../supernetwork/machine_translation/transformer_interface.py | 0 .../machine_translation/transformer_supernetwork.py | 2 +- 5 files changed, 2 insertions(+), 6 deletions(-) rename neural_compressor/experimental/nas/{ => dynast}/supernetwork/machine_translation/modules_supernetwork.py (100%) rename neural_compressor/experimental/nas/{ => dynast}/supernetwork/machine_translation/transformer_interface.py (100%) rename neural_compressor/experimental/nas/{ => dynast}/supernetwork/machine_translation/transformer_supernetwork.py (99%) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index af1903bc0f4..7821bca9e7b 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -112,10 +112,6 @@ def init_for_search(self): self.supernet_manager = self.SUPERNET_ENCODING[self.supernet]( param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed ) - # self.supernet_manager = self.ParameterManager( - # param_dict=self.SUPERNET_PARAMETERS[self.supernet], - # seed=self.seed - # ) # Validation High-Fidelity Measurement Runner self.runner_validate = self.RUNNERS[self.supernet]( diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 7940834d49f..a62838dfa2e 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -28,7 +28,7 @@ from neural_compressor.experimental.nas.dynast.dynas_manager import \ ParameterManager from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor -from neural_compressor.experimental.nas.supernetwork.machine_translation.transformer_interface import ( +from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import ( compute_bleu, compute_latency) from neural_compressor.utils.utility import LazyImport, logger from ofa.imagenet_classification.data_providers.imagenet import \ diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py similarity index 100% rename from neural_compressor/experimental/nas/supernetwork/machine_translation/modules_supernetwork.py rename to neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py similarity index 100% rename from neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_interface.py rename to neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py diff --git a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py similarity index 99% rename from neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py rename to neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py index 69b9ad6ed25..b0619f62184 100644 --- a/neural_compressor/experimental/nas/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -90,7 +90,7 @@ def get_sampled_params_numel(self, config): return sum(numels) def set_sample_config(self, config): - logger.debug('Setting active configuration to {}'.format(config)) + logger.info('[DyNAS-T] Setting active configuration to {}'.format(config)) self.encoder.set_sample_config(config) self.decoder.set_sample_config(config) From 4a60f47be59752f59b22b6ff48b0b0b708ad5631 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 12:30:37 -0800 Subject: [PATCH 18/60] Update logging and TODos --- neural_compressor/experimental/nas/dynas.py | 6 +++--- neural_compressor/experimental/nas/dynast/dynas_utils.py | 4 ++-- .../machine_translation/transformer_interface.py | 9 +++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index 7821bca9e7b..eef3fcdbf20 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -217,7 +217,7 @@ def select_model_arch(self): # pragma: no cover def create_acc_predictor(self): if 'acc' in self.metrics: - logger.info('Building Accuracy Predictor') + logger.info('[DyNAS-T] Building Accuracy Predictor') df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='acc', @@ -230,7 +230,7 @@ def create_acc_predictor(self): def create_macs_predictor(self): if 'macs' in self.metrics: - logger.info('Building MACs Predictor') + logger.info('[DyNAS-T] Building MACs Predictor') df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='macs', @@ -243,7 +243,7 @@ def create_macs_predictor(self): def create_latency_predictor(self): if 'lat' in self.metrics: - logger.info('Building Latency Predictor') + logger.info('[DyNAS-T] Building Latency Predictor') df = self.supernet_manager.import_csv(self.results_csv_path, config='config', objective='lat', diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index a62838dfa2e..65b5b478a75 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -227,7 +227,7 @@ def validate_macs( model = self.get_subnet(subnet_cfg) input_size = (self.batch_size, 3, 224, 224) macs = get_macs(model=model, input_size=input_size, device=self.device) - logger.info('Model\'s macs: {}'.format(macs)) + logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs)) return macs @torch.no_grad() @@ -254,7 +254,7 @@ def measure_latency( device=self.device, ) logger.info( - 'Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) + '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) return latency_mean, latency_std diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index d0f700eaa15..940ab7e47f5 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -337,6 +337,7 @@ def compute_bleu(config, dataset_path, checkpoint_path): t.log({'wps': round(wps_meter.avg)}) num_sentences += sample['nsentences'] + # TODO(macsz) Try to convert this system call to Python code os.system( "grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt") os.system( @@ -422,7 +423,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False): src_tokens=src_tokens_test, src_lengths=src_lengths_test) encoder_latencies = [] - logger.info('Measuring encoder for dataset generation...') + logger.info('[DyNAS-T] Measuring encoder for dataset generation...') for _ in range(args.latiter): if args.latgpu: start = time.time() @@ -443,7 +444,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False): encoder_latencies = encoder_latencies[int( args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] logger.info( - f'Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms') + f'[DyNAS-T] Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms') encoder_out_test_with_beam = model.encoder.reorder_encoder_out( encoder_out_test, new_order) @@ -459,7 +460,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False): decoder_iterations = decoder_iterations_dict['wmt'] decoder_latencies = [] - logger.info('Measuring decoder for dataset generation...') + logger.info('[DyNAS-T] Measuring decoder for dataset generation...') for _ in range(args.latiter): if args.latgpu: start = time.time() @@ -483,7 +484,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False): args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] logger.info( - f'Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms') + f'[DyNAS-T] Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms') lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies) lat_std = np.std(encoder_latencies)+np.std(decoder_latencies) From d8866a9d5c73af3c75d6dbc2a664e18e76dca94e Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 12:31:45 -0800 Subject: [PATCH 19/60] Log warning when measuring MACs for transformer LT (not supported) --- neural_compressor/experimental/nas/dynast/dynas_utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 65b5b478a75..a7a01f8f6a7 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -342,12 +342,9 @@ def validate_macs( Returns: `macs` """ - - #model = self.get_subnet(subnet_cfg) - #input_size = (self.batch_size, 3, 224, 224) - #macs = get_macs(model=model, input_size=input_size, device=self.device) + logger.warning('Transformer LT search space does not currently support MACs metric.') + # TODO(macsz) Provide fix for MACs measurement for Transformer LT search space. macs = 0 - #logger.info('Model\'s macs: {}'.format(macs)) return macs @torch.no_grad() From 41bebb4f18a677600ad86774ac203b45c6b717a0 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 12:34:09 -0800 Subject: [PATCH 20/60] Update LINAS loop --- neural_compressor/experimental/nas/dynas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index eef3fcdbf20..53e18a1afb5 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -152,7 +152,7 @@ def search(self): for i, individual in enumerate(latest_population): logger.info( '[DyNAS-T] Starting eval {} of {} in LINAS loop {} of {}.'.format( - i+1, max(self.population - df.shape[0], 0), loop+1, num_loops)) + i+1, len(latest_population), loop+1, num_loops)) self.validation_interface.eval_subnet(individual) self.create_acc_predictor() From e773a55a2681ed34af66b49190e0e2eff1cb0faf Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 12:36:57 -0800 Subject: [PATCH 21/60] Fix error when CSV file does not exist --- neural_compressor/experimental/nas/dynas.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index 53e18a1afb5..6b205e4e269 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -140,7 +140,11 @@ def search(self): # Randomly sample search space for initial population # if number of results in results_csv_path smaller than population. - # TODO(macsz) Create empty CSV if it does not exists. + + if not os.path.exists(self.results_csv_path): + # Clear also creates empty CSV file. + self.validation_interface.clear_csv() + df = pd.read_csv(self.results_csv_path) latest_population = [self.supernet_manager.random_sample() for _ in range(max(self.population - df.shape[0], 0))] From aba049551dad4dc6c80aae88cd3a1d3b0e780738 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 18 Nov 2022 12:45:25 -0800 Subject: [PATCH 22/60] Change column names in CSV file for Transformer LT --- .../experimental/nas/dynast/dynas_utils.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index a7a01f8f6a7..ef03435426f 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -277,10 +277,6 @@ def get_subnet( class TransformerLTRunner(Runner): - """The OFARunner class manages the sub-network selection from the OFA super-network and - the validation measurements of the sub-networks. ResNet50, MobileNetV3 w1.0, and MobileNetV3 w1.2 - are currently supported. Imagenet is required for these super-networks `imagenet-ilsvrc2012`. - """ def __init__( self, @@ -354,7 +350,7 @@ def measure_latency( warmup_steps: int = None, measure_steps: int = None, ) -> Tuple[float, float]: - """Measure OFA model's latency. + """Measure model's latency. Args: subnet_cfg: sub-network Torch model Returns: @@ -408,7 +404,7 @@ def clear_csv(self) -> None: f = open(self.csv_path, "w") writer = csv.writer(f) result = ['Sub-network', 'Date', - 'Latency (ms)', ' MACs', 'Top-1 Acc (%)'] + 'Latency (ms)', 'MACs', 'Top-1 Acc (%)'] writer.writerow(result) f.close() @@ -601,6 +597,15 @@ def eval_subnet( else: return sample, macs, -bleu + def clear_csv(self) -> None: + if self.csv_path: + f = open(self.csv_path, "w") + writer = csv.writer(f) + result = ['Sub-network', 'Date', + 'Latency (ms)', 'MACs', 'BLEU'] + writer.writerow(result) + f.close() + def get_torchvision_model( model_name: str, From ead69995bafcfd1b2ff3faa940404ccf59f6c36a Mon Sep 17 00:00:00 2001 From: "Nittur Sridhar, Sharath" Date: Fri, 18 Nov 2022 18:10:28 -0800 Subject: [PATCH 23/60] add macs computation for transformers --- .../experimental/nas/dynast/dynas_utils.py | 11 ++-- .../modules_supernetwork.py | 2 +- .../transformer_interface.py | 61 +++++++++++++++++++ .../transformer_supernetwork.py | 5 ++ 4 files changed, 73 insertions(+), 6 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index ef03435426f..7cfe7298b92 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -29,7 +29,7 @@ ParameterManager from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import ( - compute_bleu, compute_latency) + compute_bleu, compute_latency, compute_macs) from neural_compressor.utils.utility import LazyImport, logger from ofa.imagenet_classification.data_providers.imagenet import \ ImagenetDataProvider @@ -338,9 +338,10 @@ def validate_macs( Returns: `macs` """ - logger.warning('Transformer LT search space does not currently support MACs metric.') - # TODO(macsz) Provide fix for MACs measurement for Transformer LT search space. - macs = 0 + + macs = compute_macs(subnet_cfg, self.dataset_path) + logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs)) + return macs @torch.no_grad() @@ -360,7 +361,7 @@ def measure_latency( latency_mean, latency_std = compute_latency( subnet_cfg, self.dataset_path) logger.info( - 'Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) + '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) return latency_mean, latency_std diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py index 19a7f8b2b8f..bdb44d25a54 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py @@ -381,7 +381,7 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No k = self.in_proj_k(key) v = self.in_proj_v(value) - q *= self.scaling + q = q * self.scaling if self.bias_k is not None: assert self.bias_v is not None diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 940ab7e47f5..d9a6f07b475 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -17,6 +17,7 @@ from neural_compressor.utils import logger from .transformer_supernetwork import TransformerSuperNetwork +from fvcore.nn import FlopCountAnalysis warnings.filterwarnings("ignore") @@ -489,3 +490,63 @@ def compute_latency(config, dataset_path, get_model_parameters=False): lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies) lat_std = np.std(encoder_latencies)+np.std(decoder_latencies) return lat_mean, lat_std + + +def compute_macs(config,dataset_path): + parser = options.get_generation_parser() + + args = options.parse_args_and_arch(parser,[dataset_path]) + + args.data = dataset_path + args.beam = 5 + args.remove_bpe = '@@ ' + args.gen_subset = 'test' + args.lenpen = 0.6 + args.source_lang = 'en' + args.target_lang = 'de' + args.batch_size = 128 + utils.import_user_module(args) + max_tokens = 12000 + args.latgpu=False + args.latcpu=True + args.latiter=100 + + # Initialize CUDA and distributed training + if torch.cuda.is_available() and not args.cpu: + torch.cuda.set_device(args.device_id) + torch.manual_seed(args.seed) + + #Optimize ensemble for generation + # Load dataset splits + task = tasks.setup_task(args) + task.load_dataset(args.gen_subset) + # Set dictionaries + try: + src_dict = getattr(task, 'source_dictionary', None) + except NotImplementedError: + src_dict = None + tgt_dict = task.target_dictionary + + # Load model + print('| loading model(s) from {}'.format(args.path)) + model = TransformerSuperNetwork(task) + + # specify the length of the dummy input for profile + # for iwslt, the average length is 23, for wmt, that is 30 + dummy_sentence_length_dict = {'iwslt': 23, 'wmt': 30} + + dummy_sentence_length = dummy_sentence_length_dict['wmt'] + + + dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1) + dummy_prev = [7] * (dummy_sentence_length - 1) + [2] + + model.set_sample_config(config) + + model.profile(mode=True) + macs = FlopCountAnalysis(model, (torch.tensor([dummy_src_tokens], dtype=torch.long), + torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long))) + macs_tot = macs.total() + model.profile(mode=False) + + return macs_tot diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py index b0619f62184..7890e0605d3 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -94,6 +94,11 @@ def set_sample_config(self, config): self.encoder.set_sample_config(config) self.decoder.set_sample_config(config) + def forward(self,src_tokens,src_lengths,prev_output_token): + encoder_output = self.encoder.forward(src_tokens,src_lengths) + output = self.decoder(prev_output_token,encoder_output) + return output + class TransformerEncoder(FairseqEncoder): """ From e222eb7c7a339b5fc8c31aeb3ec9c2a4665733ad Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Mon, 21 Nov 2022 12:52:13 -0800 Subject: [PATCH 24/60] Add `batch size` to compute latency for TransformerLT --- neural_compressor/experimental/nas/dynast/dynas_utils.py | 6 ++---- .../machine_translation/transformer_interface.py | 7 +++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 7cfe7298b92..2fa203fe9a2 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -338,7 +338,7 @@ def validate_macs( Returns: `macs` """ - + macs = compute_macs(subnet_cfg, self.dataset_path) logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs)) @@ -348,8 +348,6 @@ def validate_macs( def measure_latency( self, subnet_cfg: dict, - warmup_steps: int = None, - measure_steps: int = None, ) -> Tuple[float, float]: """Measure model's latency. Args: @@ -359,7 +357,7 @@ def measure_latency( """ latency_mean, latency_std = compute_latency( - subnet_cfg, self.dataset_path) + subnet_cfg, self.dataset_path, self.batch_size) logger.info( '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index d9a6f07b475..8e38a457152 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -164,7 +164,7 @@ def compute_bleu(config, dataset_path, checkpoint_path): parser = options.get_generation_parser() args = options.parse_args_and_arch(parser, [dataset_path]) - # TODO(macsz) Un-hardcode args + args.data = dataset_path args.beam = 5 args.remove_bpe = '@@ ' @@ -351,12 +351,11 @@ def compute_bleu(config, dataset_path, checkpoint_path): return bleu_score -def compute_latency(config, dataset_path, get_model_parameters=False): +def compute_latency(config, dataset_path, batch_size, get_model_parameters=False): parser = options.get_generation_parser() args = options.parse_args_and_arch(parser, [dataset_path]) - # TODO(macsz) Un-hardcode args args.data = dataset_path args.beam = 5 args.remove_bpe = '@@ ' @@ -364,7 +363,7 @@ def compute_latency(config, dataset_path, get_model_parameters=False): args.lenpen = 0.6 args.source_lang = 'en' args.target_lang = 'de' - args.batch_size = 128 + args.batch_size = batch_size utils.import_user_module(args) args.latgpu = False args.latcpu = True From f5b79223cb0210aeddf47828696c7c340cf38188 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Mon, 21 Nov 2022 13:09:48 -0800 Subject: [PATCH 25/60] Remove old cuda calls --- .../transformer_interface.py | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 8e38a457152..99e5fb0bf3e 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -392,21 +392,21 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False dummy_prev = [7] * (dummy_sentence_length - 1) + [2] src_tokens_test = torch.tensor( - [dummy_src_tokens], dtype=torch.long) # .cuda() - src_lengths_test = torch.tensor([dummy_sentence_length]) # .cuda() + [dummy_src_tokens], dtype=torch.long) + src_lengths_test = torch.tensor([dummy_sentence_length]) prev_output_tokens_test_with_beam = torch.tensor( - [dummy_prev] * args.beam, dtype=torch.long) # .cuda() + [dummy_prev] * args.beam, dtype=torch.long) bsz = 1 new_order = torch.arange(bsz).view(-1, 1).repeat(1, - args.beam).view(-1).long() # .cuda() + args.beam).view(-1).long() if args.latcpu: model.cpu() logger.info('Measuring model latency on CPU for dataset generation...') elif args.latgpu: model.cuda() - src_tokens_test = src_tokens_test # .cuda() - src_lengths_test = src_lengths_test # .cuda() - prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam # .cuda() + src_tokens_test = src_tokens_test + src_lengths_test = src_lengths_test + prev_output_tokens_test_with_beam = prev_output_tokens_test_with_beam logger.info('Measuring model latency on GPU for dataset generation...') start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) @@ -505,7 +505,6 @@ def compute_macs(config,dataset_path): args.target_lang = 'de' args.batch_size = 128 utils.import_user_module(args) - max_tokens = 12000 args.latgpu=False args.latcpu=True args.latiter=100 @@ -519,15 +518,9 @@ def compute_macs(config,dataset_path): # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args.gen_subset) - # Set dictionaries - try: - src_dict = getattr(task, 'source_dictionary', None) - except NotImplementedError: - src_dict = None - tgt_dict = task.target_dictionary # Load model - print('| loading model(s) from {}'.format(args.path)) + logger.info('[DyNAS-T] loading model(s) from {}'.format(args.path)) model = TransformerSuperNetwork(task) # specify the length of the dummy input for profile From 4ef8f0543bce4aa196e20837c6343263c5743e89 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Tue, 22 Nov 2022 07:45:23 -0800 Subject: [PATCH 26/60] Remove comment --- .../supernetwork/machine_translation/modules_supernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py index bdb44d25a54..8650a024db7 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py @@ -561,7 +561,7 @@ def reorder_incremental_state(self, incremental_state, new_order): self._set_input_buffer(incremental_state, input_buffer) def _get_input_buffer(self, incremental_state): - return get_incremental_state( # utils. + return get_incremental_state( self, incremental_state, 'attn_state', From f022e2a90a2e57f62c14ecfbd9280f6bfb3102de Mon Sep 17 00:00:00 2001 From: "Nittur Sridhar, Sharath" Date: Sun, 27 Nov 2022 22:19:31 -0800 Subject: [PATCH 27/60] replace bleu with sacrebleu --- .../transformer_interface.py | 266 ++---------------- 1 file changed, 20 insertions(+), 246 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 99e5fb0bf3e..dac058d24ec 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -13,6 +13,7 @@ from fairseq import options, progress_bar, tasks, utils from fairseq.data import dictionary from fairseq.meters import StopwatchMeter, TimeMeter +from fairseq.data.encoders.moses_tokenizer import MosesTokenizer from neural_compressor.utils import logger @@ -22,143 +23,6 @@ warnings.filterwarnings("ignore") -try: - from fairseq import libbleu -except ImportError as e: - logger.error('missing libbleu.so. run `pip install --editable .`') - raise e - - -C = ctypes.cdll.LoadLibrary(libbleu.__file__) - - -class BleuStat(ctypes.Structure): - _fields_ = [ - ('reflen', ctypes.c_size_t), - ('predlen', ctypes.c_size_t), - ('match1', ctypes.c_size_t), - ('count1', ctypes.c_size_t), - ('match2', ctypes.c_size_t), - ('count2', ctypes.c_size_t), - ('match3', ctypes.c_size_t), - ('count3', ctypes.c_size_t), - ('match4', ctypes.c_size_t), - ('count4', ctypes.c_size_t), - ] - - -class Scorer(object): - def __init__(self, pad, eos, unk): - self.stat = BleuStat() - self.pad = pad - self.eos = eos - self.unk = unk - self.reset() - - def reset(self, one_init=False): - if one_init: - C.bleu_one_init(ctypes.byref(self.stat)) - else: - C.bleu_zero_init(ctypes.byref(self.stat)) - - def add(self, ref, pred): - if not isinstance(ref, torch.IntTensor): - raise TypeError('ref must be a torch.IntTensor (got {})' - .format(type(ref))) - if not isinstance(pred, torch.IntTensor): - raise TypeError('pred must be a torch.IntTensor(got {})' - .format(type(pred))) - - # don't match unknown words - rref = ref.clone() - assert not rref.lt(0).any() - rref[rref.eq(self.unk)] = -999 - - rref = rref.contiguous().view(-1) - pred = pred.contiguous().view(-1) - - C.bleu_add( - ctypes.byref(self.stat), - ctypes.c_size_t(rref.size(0)), - ctypes.c_void_p(rref.data_ptr()), - ctypes.c_size_t(pred.size(0)), - ctypes.c_void_p(pred.data_ptr()), - ctypes.c_int(self.pad), - ctypes.c_int(self.eos)) - - def score(self, order=4): - psum = sum(math.log(p) if p > 0 else float('-Inf') - for p in self.precision()[:order]) - return self.brevity() * math.exp(psum / order) * 100 - - def precision(self): - def ratio(a, b): - return a / b if b > 0 else 0 - - return [ - ratio(self.stat.match1, self.stat.count1), - ratio(self.stat.match2, self.stat.count2), - ratio(self.stat.match3, self.stat.count3), - ratio(self.stat.match4, self.stat.count4), - ] - - def brevity(self): - r = self.stat.reflen / self.stat.predlen - return min(1, math.exp(1 - r)) - - def result_string(self, order=4): - assert order <= 4, "BLEU scores for order > 4 aren't supported" - fmt = 'BLEU{} = {:2.2f}, {:2.1f}' - for _ in range(1, order): - fmt += '/{:2.1f}' - fmt += ' (BP={:.3f}, ratio={:.3f}, syslen={}, reflen={})' - bleup = [p * 100 for p in self.precision()[:order]] - return fmt.format(order, self.score(order=order), *bleup, - self.brevity(), self.stat.predlen/self.stat.reflen, - self.stat.predlen, self.stat.reflen) - - -def get_bleu_score(args, reference_sentences_fpath, translated_sentences_fpath): - dict = dictionary.Dictionary() - order = 4 - sentence_bleu = False - ignore_case = False - - def readlines(fd): - for line in fd.readlines(): - if ignore_case: - yield line.lower() - else: - yield line - - if sentence_bleu: - def score(fdsys): - with open(reference_sentences_fpath) as fdref: - scorer = Scorer(dict.pad(), dict.eos(), dict.unk()) - for i, (sys_tok, ref_tok) in enumerate(zip(readlines(fdsys), readlines(fdref))): - scorer.reset(one_init=True) - sys_tok = dict.encode_line(sys_tok) - ref_tok = dict.encode_line(ref_tok) - scorer.add(ref_tok, sys_tok) - else: - def score(fdsys): - with open(reference_sentences_fpath) as fdref: - scorer = Scorer(dict.pad(), dict.eos(), dict.unk()) - for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)): - sys_tok = dict.encode_line(sys_tok) - ref_tok = dict.encode_line(ref_tok) - scorer.add(ref_tok, sys_tok) - return(scorer.score(order)) - - if translated_sentences_fpath == '-': - score = score(sys.stdin) - else: - with open(translated_sentences_fpath, 'r') as f: - score = score(f) - logger.info('Achieved BLEU score: {}'.format(score)) - return score - - def compute_bleu(config, dataset_path, checkpoint_path): parser = options.get_generation_parser() @@ -173,6 +37,9 @@ def compute_bleu(config, dataset_path, checkpoint_path): args.source_lang = 'en' args.target_lang = 'de' args.batch_size = 128 + args.eval_bleu_remove_bpe = '@@ ' + args.eval_bleu_detok = 'moses' + utils.import_user_module(args) use_cuda = torch.cuda.is_available() and not args.cpu @@ -187,6 +54,9 @@ def compute_bleu(config, dataset_path, checkpoint_path): # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args.gen_subset) + + tokenizer = MosesTokenizer(args) + task.tokenizer=tokenizer # Set dictionaries try: src_dict = getattr(task, 'source_dictionary', None) @@ -239,115 +109,19 @@ def compute_bleu(config, dataset_path, checkpoint_path): generator = task.build_generator([model], args) num_sentences = 0 - has_target = True - input_len_all = [] - with open('translations_out.txt', 'a') as fname_translations: - with progress_bar.build_progress_bar(args, itr) as t: - wps_meter = TimeMeter() - for sample in t: - - sample = utils.move_to_cuda(sample) if use_cuda else sample - if 'net_input' not in sample: - continue - - prefix_tokens = None - if args.prefix_size > 0: - prefix_tokens = sample['target'][:, :args.prefix_size] - - gen_timer.start() - hypos = task.inference_step( - generator, [model], sample, prefix_tokens) - input_len_all.append( - np.mean(sample['net_input']['src_lengths'].cpu().numpy())) - num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos) - gen_timer.stop(num_generated_tokens) - - for i, sample_id in enumerate(sample['id'].tolist()): - has_target = sample['target'] is not None - - # Remove padding - src_tokens = utils.strip_pad( - sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) - target_tokens = None - if has_target: - target_tokens = utils.strip_pad( - sample['target'][i, :], tgt_dict.pad()).int().cpu() - - # Either retrieve the original sentences or regenerate them from tokens. - if align_dict is not None: - src_str = task.dataset( - args.gen_subset).src.get_original_text(sample_id) - target_str = task.dataset( - args.gen_subset).tgt.get_original_text(sample_id) - else: - if src_dict is not None: - src_str = src_dict.string( - src_tokens, args.remove_bpe) - else: - src_str = "" - if has_target: - target_str = tgt_dict.string( - target_tokens, args.remove_bpe, escape_unk=True) - - if not args.quiet: - if src_dict is not None: - fname_translations.write( - 'S-{}\t{}'.format(sample_id, src_str)) - fname_translations.write('\n') - - if has_target: - fname_translations.write( - 'T-{}\t{}'.format(sample_id, target_str)) - fname_translations.write('\n') - - # Process top predictions - for j, hypo in enumerate(hypos[i][:args.nbest]): - hypo_tokens, hypo_str, alignment = utils.post_process_prediction( - hypo_tokens=hypo['tokens'].int().cpu(), - src_str=src_str, - alignment=hypo['alignment'].int().cpu( - ) if hypo['alignment'] is not None else None, - align_dict=align_dict, - tgt_dict=tgt_dict, - remove_bpe=args.remove_bpe, - ) - - if not args.quiet: - - fname_translations.write( - 'H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) - fname_translations.write('\n') - fname_translations.write('P-{}\t{}'.format( - sample_id, - ' '.join(map( - lambda x: '{:.4f}'.format(x), - hypo['positional_scores'].tolist(), - )) - )) - fname_translations.write('\n') - - if args.print_alignment: - fname_translations.write('A-{}\t{}'.format( - sample_id, - ' '.join( - map(lambda x: str(utils.item(x)), alignment)) - )) - fname_translations.write('\n') - - wps_meter.update(num_generated_tokens) - t.log({'wps': round(wps_meter.avg)}) - num_sentences += sample['nsentences'] - - # TODO(macsz) Try to convert this system call to Python code - os.system( - "grep ^H translations_out.txt | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > sys.txt") - os.system( - "grep ^T translations_out.txt | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > ref.txt") - bleu_score = get_bleu_score(args, "ref.txt", "sys.txt") - - os.remove("ref.txt") - os.remove("sys.txt") - os.remove("translations_out.txt") + bleu_list = [] + with progress_bar.build_progress_bar(args, itr) as t: + for sample in t: + sample = utils.move_to_cuda(sample) if use_cuda else sample + if 'net_input' not in sample: + continue + + bleu = task._inference_with_bleu(generator,sample,model) + bleu_list.append(bleu.score) + + num_sentences += sample['nsentences'] + + bleu_score = np.mean(np.array(bleu_list)) return bleu_score From a35901b48afd27f4811d8513d02617f3c51f15cd Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Tue, 29 Nov 2022 11:55:16 -0800 Subject: [PATCH 28/60] Fix problem with `dataset` for OFA --- neural_compressor/experimental/nas/dynas.py | 2 +- neural_compressor/experimental/nas/dynast/dynas_utils.py | 8 +++++--- .../machine_translation/transformer_interface.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index dc11928c0fb..8430cbab8ea 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -131,7 +131,7 @@ def init_for_search(self): latency_predictor=None, datasetpath=self.dataset_path, batch_size=self.batch_size, - checkpoint_path=self.supernet_ckpt_path + checkpoint_path=self.supernet_ckpt_path, ) # Setup validation interface diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 2bf204deab5..e1902a73c93 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -175,8 +175,9 @@ def __init__( acc_predictor: Predictor, macs_predictor: Predictor, latency_predictor: Predictor, - imagenetpath: str, + datasetpath: str, batch_size: int, + **kwargs, ) -> None: """Initialize the attributes.""" self.supernet = supernet @@ -185,7 +186,7 @@ def __init__( self.latency_predictor = latency_predictor self.device = 'cpu' self.test_size = None - ImagenetDataProvider.DEFAULT_PATH = imagenetpath + ImagenetDataProvider.DEFAULT_PATH = datasetpath self.ofa_network = ofa.model_zoo.ofa_net(supernet, pretrained=True) self.run_config = ImagenetRunConfig(test_batch_size=64, n_worker=20) self.batch_size = batch_size @@ -345,7 +346,8 @@ def __init__( latency_predictor: Predictor, datasetpath: str, batch_size: int, - checkpoint_path: str + checkpoint_path: str, + **kwargs, ) -> None: self.supernet = supernet self.acc_predictor = acc_predictor diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index dac058d24ec..336a26fad47 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -120,7 +120,7 @@ def compute_bleu(config, dataset_path, checkpoint_path): bleu_list.append(bleu.score) num_sentences += sample['nsentences'] - + bleu_score = np.mean(np.array(bleu_list)) return bleu_score From 8b3a795e3aeb7f2c31a4e4d02eb1aeb6a0da49f8 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Tue, 29 Nov 2022 12:07:58 -0800 Subject: [PATCH 29/60] Remove unused imports --- .../machine_translation/transformer_interface.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 336a26fad47..34ce0a7e96b 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -1,18 +1,13 @@ """ Translate pre-processed data with a trained model. """ -import ctypes -import math -import os -import sys import time import warnings import numpy as np import torch from fairseq import options, progress_bar, tasks, utils -from fairseq.data import dictionary -from fairseq.meters import StopwatchMeter, TimeMeter +from fairseq.meters import StopwatchMeter from fairseq.data.encoders.moses_tokenizer import MosesTokenizer from neural_compressor.utils import logger @@ -265,7 +260,7 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False return lat_mean, lat_std -def compute_macs(config,dataset_path): +def compute_macs(config, dataset_path): parser = options.get_generation_parser() args = options.parse_args_and_arch(parser,[dataset_path]) From 53f4af78555211443cb9acbb646dab58bd3f1b84 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Tue, 29 Nov 2022 18:33:38 -0800 Subject: [PATCH 30/60] Fix indentation --- neural_compressor/experimental/nas/dynast/dynas_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py index 0042d1df729..e9b7764d3de 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py +++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py @@ -75,7 +75,7 @@ def train(self, examples, labels): examples: Examples to be used for training. labels: Labels to be used for training. """ - # Compute normalization factor + # Compute normalization factor max_label = np.amax(np.abs(labels)) if max_label > 0.0: self.normalization_factor = 10 ** (np.floor(np.log10(max_label)) - 1.0) From d255b68b5982436e8ed699f911ca2462fc782aa5 Mon Sep 17 00:00:00 2001 From: "Nittur Sridhar, Sharath" Date: Tue, 29 Nov 2022 20:38:38 -0800 Subject: [PATCH 31/60] replace fvcore with torchprofile to avoid mem leaks --- .../machine_translation/transformer_interface.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 34ce0a7e96b..5ad471ec858 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -13,7 +13,7 @@ from neural_compressor.utils import logger from .transformer_supernetwork import TransformerSuperNetwork -from fvcore.nn import FlopCountAnalysis +import torchprofile warnings.filterwarnings("ignore") @@ -301,13 +301,13 @@ def compute_macs(config, dataset_path): dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1) dummy_prev = [7] * (dummy_sentence_length - 1) + [2] - - model.set_sample_config(config) - + + model.eval() model.profile(mode=True) - macs = FlopCountAnalysis(model, (torch.tensor([dummy_src_tokens], dtype=torch.long), - torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long))) - macs_tot = macs.total() + model.set_sample_config(config) + macs = torchprofile.profile_macs(model, args=(torch.tensor([dummy_src_tokens], dtype=torch.long), + torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long))) + model.profile(mode=False) - return macs_tot + return macs From bb2a53aaa6e587e598fc99b48ceb6f6ddd259375 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Wed, 30 Nov 2022 11:24:34 -0800 Subject: [PATCH 32/60] Add DyNAS Transformer LT example Signed-off-by: Maciej Szankin --- ..._Supernet_NAS.ipynb => Supernet_NAS.ipynb} | 53 ++++++++++++++----- 1 file changed, 40 insertions(+), 13 deletions(-) rename examples/notebook/dynas/{MobileNetV3_Supernet_NAS.ipynb => Supernet_NAS.ipynb} (95%) diff --git a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb b/examples/notebook/dynas/Supernet_NAS.ipynb similarity index 95% rename from examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb rename to examples/notebook/dynas/Supernet_NAS.ipynb index 4fdbc291284..ffe71eaa4b1 100644 --- a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb +++ b/examples/notebook/dynas/Supernet_NAS.ipynb @@ -13,7 +13,7 @@ "\n", "#### Super-Networks\n", "\n", - "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification task on ImageNet-ilsvrc2012.\n", + "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n", "\n", "#### Methodology\n", "\n", @@ -29,23 +29,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Prerequisites" + "## Prerequisites\n", + "\n", + "For released version of Neural Compressor:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "!pip install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2" + "!pip install -q neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Import Packages" + "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:" ] }, { @@ -53,6 +55,24 @@ "execution_count": 1, "metadata": {}, "outputs": [], + "source": [ + "# import sys\n", + "# sys.path.insert(0,'')\n", + "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Packages" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], "source": [ "from neural_compressor.conf.config import NASConfig\n", "from neural_compressor.experimental.nas import NAS\n", @@ -72,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -84,12 +104,16 @@ "metadata": {}, "source": [ "### Define Architecture\n", - "We currently leverage pre-trained Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path. \n", + "We currently support pre-trained super-networks:\n", + "\n", + "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n", + "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n", "\n", "Super-network options (choose 1): \n", "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n", "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n", - "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. " + "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. \n", + "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]." ] }, { @@ -113,7 +137,7 @@ "* `['acc', 'lat']` \n", "\n", "Description:\n", - "* `'acc'` - ImageNet Top-1 Accuracy (%)\n", + "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n", "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n", "* `'lat'` - Latency (inference time) measurement (ms)" ] @@ -137,7 +161,8 @@ "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n", "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n", "* `config.dynas.batch_size` - Batch size used during latency measurements.\n", - "* `config.dynas.dataset_path` - Path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php" + "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n", + "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)." ] }, { @@ -272,8 +297,10 @@ "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791. \n", "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n", "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358. \n", - "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", - "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. " + "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", + "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. \n", + "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187. \n", + "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30." ] }, { @@ -300,7 +327,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.11" } }, "nbformat": 4, From 1cd635fd6f848c7928f0c223f9b1e56d8771ae6c Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Wed, 30 Nov 2022 11:25:40 -0800 Subject: [PATCH 33/60] Code cleanup Signed-off-by: Maciej Szankin --- neural_compressor/experimental/nas/dynas.py | 6 ++++-- .../experimental/nas/dynast/dynas_predictor.py | 2 -- .../machine_translation/transformer_interface.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/neural_compressor/experimental/nas/dynas.py b/neural_compressor/experimental/nas/dynas.py index 8430cbab8ea..3c63e19d27f 100644 --- a/neural_compressor/experimental/nas/dynas.py +++ b/neural_compressor/experimental/nas/dynas.py @@ -37,7 +37,8 @@ class DyNAS(NASBase): def __init__(self, conf_fname_or_obj): """Initialize the attributes.""" - from .dynast.dynas_manager import ParameterManager, TransformerLTEncoding + from .dynast.dynas_manager import (ParameterManager, + TransformerLTEncoding) from .dynast.dynas_predictor import Predictor from .dynast.dynas_search import (ProblemMultiObjective, SearchAlgoManager) @@ -121,7 +122,8 @@ def estimate(self, individual): def init_for_search(self): """Initialize the search configuration.""" self.supernet_manager = self.SUPERNET_ENCODING[self.supernet]( - param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed) + param_dict=self.SUPERNET_PARAMETERS[self.supernet], seed=self.seed + ) # Validation High-Fidelity Measurement Runner self.runner_validate = self.RUNNERS[self.supernet]( diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py index e9b7764d3de..477e4fcf7ca 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py +++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py @@ -70,7 +70,6 @@ def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS, def train(self, examples, labels): """Train the predictor on the specified examples and labels using the underlying regressor. - Args: examples: Examples to be used for training. labels: Labels to be used for training. @@ -94,7 +93,6 @@ def train(self, examples, labels): # Determine index of best searcher self.best_index = np.argmax(scores) - def predict(self, examples): """Predict the output values of the specified examples using the underlying regressor. diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 5ad471ec858..cffd80246cd 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -6,14 +6,14 @@ import numpy as np import torch +import torchprofile from fairseq import options, progress_bar, tasks, utils -from fairseq.meters import StopwatchMeter from fairseq.data.encoders.moses_tokenizer import MosesTokenizer +from fairseq.meters import StopwatchMeter from neural_compressor.utils import logger from .transformer_supernetwork import TransformerSuperNetwork -import torchprofile warnings.filterwarnings("ignore") @@ -301,13 +301,13 @@ def compute_macs(config, dataset_path): dummy_src_tokens = [2] + [7] * (dummy_sentence_length - 1) dummy_prev = [7] * (dummy_sentence_length - 1) + [2] - + model.eval() model.profile(mode=True) model.set_sample_config(config) macs = torchprofile.profile_macs(model, args=(torch.tensor([dummy_src_tokens], dtype=torch.long), torch.tensor([30]), torch.tensor([dummy_prev], dtype=torch.long))) - + model.profile(mode=False) return macs From 1c6ddf68c059dafb67dd0cdbb6bf3dc3a2143963 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Wed, 30 Nov 2022 15:04:45 -0800 Subject: [PATCH 34/60] Add fairseq and HAT license Signed-off-by: Maciej Szankin --- .../supernetwork/machine_translation/LICENSE | 51 +++++++++++++++++++ .../modules_supernetwork.py | 3 ++ .../transformer_interface.py | 3 ++ .../transformer_supernetwork.py | 3 ++ 4 files changed, 60 insertions(+) create mode 100644 neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE new file mode 100644 index 00000000000..4c15682134a --- /dev/null +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE @@ -0,0 +1,51 @@ +MIT License +------------ LICENSE For Hardware-Aware Transformer software --------------- +Copyright (c) 2020, Hanrui Wang, Zhanghao Wu, Zhijian Liu, Han Cai, +Ligeng Zhu, Chuang Gan and Song Han +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +------------------------- LICENSE FOR Fairseq ------------------------------ +MIT License + +Copyright (c) Facebook, Inc. and its affiliates. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py index 8650a024db7..7ffac3351ab 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py @@ -1,3 +1,6 @@ +# Part of this source code is licensed under the MIT license found in the +# LICENSE file in the same directory as this file. + from collections import defaultdict import torch diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index cffd80246cd..4ebfb7e215e 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -1,3 +1,6 @@ +# Part of this source code is licensed under the MIT license found in the +# LICENSE file in the same directory as this file. + """ Translate pre-processed data with a trained model. """ diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py index 7890e0605d3..ee6b7b3a861 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -1,3 +1,6 @@ +# Part of this source code is licensed under the MIT license found in the +# LICENSE file in the same directory as this file. + import math import torch From 3afbcc74d8bf89051dd06304a1983db3c99fecdb Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Tue, 6 Dec 2022 03:34:58 -0800 Subject: [PATCH 35/60] Update license headers in transformer t supernet code Signed-off-by: Maciej Szankin --- .../supernetwork/machine_translation/LICENSE | 51 ------------------- .../modules_supernetwork.py | 17 ++++++- .../transformer_interface.py | 17 ++++++- .../transformer_supernetwork.py | 17 ++++++- 4 files changed, 45 insertions(+), 57 deletions(-) delete mode 100644 neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE deleted file mode 100644 index 4c15682134a..00000000000 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/LICENSE +++ /dev/null @@ -1,51 +0,0 @@ -MIT License ------------- LICENSE For Hardware-Aware Transformer software --------------- -Copyright (c) 2020, Hanrui Wang, Zhanghao Wu, Zhijian Liu, Han Cai, -Ligeng Zhu, Chuang Gan and Song Han -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -------------------------- LICENSE FOR Fairseq ------------------------------ -MIT License - -Copyright (c) Facebook, Inc. and its affiliates. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py index 7ffac3351ab..4c750e45c5c 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py @@ -1,5 +1,18 @@ -# Part of this source code is licensed under the MIT license found in the -# LICENSE file in the same directory as this file. +# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from collections import defaultdict diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 4ebfb7e215e..7ca6532d17d 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -1,5 +1,18 @@ -# Part of this source code is licensed under the MIT license found in the -# LICENSE file in the same directory as this file. +# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Translate pre-processed data with a trained model. diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py index ee6b7b3a861..6ccc8b9c4a2 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -1,5 +1,18 @@ -# Part of this source code is licensed under the MIT license found in the -# LICENSE file in the same directory as this file. +# https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math From 2ea3572c001ec9f5c3062a3bf14ed57096036271 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Wed, 7 Dec 2022 14:34:45 -0800 Subject: [PATCH 36/60] Revert "Add DyNAS Transformer LT example" This reverts commit bb2a53aaa6e587e598fc99b48ceb6f6ddd259375. --- ...S.ipynb => MobileNetV3_Supernet_NAS.ipynb} | 53 +++++-------------- 1 file changed, 13 insertions(+), 40 deletions(-) rename examples/notebook/dynas/{Supernet_NAS.ipynb => MobileNetV3_Supernet_NAS.ipynb} (95%) diff --git a/examples/notebook/dynas/Supernet_NAS.ipynb b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb similarity index 95% rename from examples/notebook/dynas/Supernet_NAS.ipynb rename to examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb index ffe71eaa4b1..4fdbc291284 100644 --- a/examples/notebook/dynas/Supernet_NAS.ipynb +++ b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb @@ -13,7 +13,7 @@ "\n", "#### Super-Networks\n", "\n", - "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n", + "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification task on ImageNet-ilsvrc2012.\n", "\n", "#### Methodology\n", "\n", @@ -29,36 +29,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Prerequisites\n", - "\n", - "For released version of Neural Compressor:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -q neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:" + "## Prerequisites" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "# import sys\n", - "# sys.path.insert(0,'')\n", - "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" + "!pip install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2" ] }, { @@ -70,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -104,16 +84,12 @@ "metadata": {}, "source": [ "### Define Architecture\n", - "We currently support pre-trained super-networks:\n", - "\n", - "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n", - "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n", + "We currently leverage pre-trained Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path. \n", "\n", "Super-network options (choose 1): \n", "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n", "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n", - "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. \n", - "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]." + "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. " ] }, { @@ -137,7 +113,7 @@ "* `['acc', 'lat']` \n", "\n", "Description:\n", - "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n", + "* `'acc'` - ImageNet Top-1 Accuracy (%)\n", "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n", "* `'lat'` - Latency (inference time) measurement (ms)" ] @@ -161,8 +137,7 @@ "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n", "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n", "* `config.dynas.batch_size` - Batch size used during latency measurements.\n", - "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n", - "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)." + "* `config.dynas.dataset_path` - Path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php" ] }, { @@ -297,10 +272,8 @@ "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791. \n", "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n", "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358. \n", - "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", - "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. \n", - "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187. \n", - "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30." + "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", + "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. " ] }, { @@ -327,7 +300,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.11" + "version": "3.8.10" } }, "nbformat": 4, From 6dba11432aac7d2c20672a4d6f3d8e77d6a5440b Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Thu, 8 Dec 2022 02:30:33 -0800 Subject: [PATCH 37/60] Add example results for Transformer LT search space Signed-off-by: Maciej Szankin --- .../dynas/results_transformerlt_macs.csv | 501 ++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 examples/notebook/dynas/results_transformerlt_macs.csv diff --git a/examples/notebook/dynas/results_transformerlt_macs.csv b/examples/notebook/dynas/results_transformerlt_macs.csv new file mode 100644 index 00000000000..326d9894762 --- /dev/null +++ b/examples/notebook/dynas/results_transformerlt_macs.csv @@ -0,0 +1,501 @@ +Sub-network,Date,Latency (ms),MACs,BLEU +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, -1, -1]}",2022-11-29 22:54:58.796773,0,1397702484,23.35221720436182 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 2048, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, 2, 2, -1]}",2022-11-29 22:55:36.708362,0,2117790828,25.699488742308187 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, 1]}",2022-11-29 22:56:06.143948,0,1700582490,25.0628359775166 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 2, -1, 1, -1]}",2022-11-29 22:56:40.372306,0,1593972576,25.51774692114225 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 1024, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-29 22:57:09.483908,0,1234590804,22.56186718543443 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 3072, 2048, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 1, -1, 1]}",2022-11-29 22:57:47.479253,0,2320469868,26.46877217919795 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 3072, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 1, 1, 1, -1]}",2022-11-29 22:58:16.629295,0,1269811290,24.64774544301779 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 2048, 3072, 3072], 'decoder_ffn_embed_dim': [3072, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, -1, 1, -1, 2]}",2022-11-29 22:58:57.917629,0,2481530994,26.07415311884126 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 3072, 1024, 1024, 1024, 2048], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 1024, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 2, 1, 1, -1]}",2022-11-29 22:59:26.905633,0,1319024724,22.493311676649537 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 1024, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 1, 2, 2, 1]}",2022-11-29 23:00:07.328829,0,1880709234,26.00344571579533 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 1024, 2048, 2048, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, 2, -1, 1]}",2022-11-29 23:00:38.792088,0,1671939936,25.692425623480723 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 2048, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 2, -1, 1, -1]}",2022-11-29 23:01:14.719074,0,1804297062,26.07342689295033 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 2, 1, 2, -1]}",2022-11-29 23:01:56.128203,0,2350798194,26.332192395799687 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, -1, 1, -1, 1]}",2022-11-29 23:02:26.540354,0,1397483610,25.69929087830039 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, 1, 1, 2]}",2022-11-29 23:03:07.262003,0,2119699314,26.35980541802738 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, -1, 2, -1, -1]}",2022-11-29 23:03:35.089457,0,1110604884,22.97494000005183 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 2048], 'decoder_ffn_embed_dim': [3072, 3072, 1024, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 1, -1, 1, 2]}",2022-11-29 23:04:06.642167,0,1801651290,25.757473996484833 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 3072, 1024, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 2, 2, -1, -1, -1]}",2022-11-29 23:04:41.625679,0,1888961382,25.85426108217189 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, 1, 1, 1]}",2022-11-29 23:05:11.833000,0,1490960730,25.63143521434478 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 3072, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, 1, 1, 2]}",2022-11-29 23:05:41.444763,0,1364183130,25.072061221515387 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 1024, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, 1, 1, -1]}",2022-11-29 23:06:20.219204,0,2281236594,26.08920225424034 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 1024, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 2, 1, -1, -1]}",2022-11-29 23:06:54.484344,0,1688332896,25.54971935098368 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 1024, 3072, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, 1, 1, 2]}",2022-11-29 23:07:31.477666,0,1540039776,25.66937359699742 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, 2, 1, 1]}",2022-11-29 23:08:00.927883,0,1543246170,25.23650526106691 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 1024, 3072, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 2, 2, -1, 2]}",2022-11-29 23:08:37.748235,0,1840608102,25.84950449942653 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 3072], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:09:14.377541,0,1662908256,25.748175360241753 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 1024, 3072, 3072], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 2, 1, -1]}",2022-11-29 23:09:53.068127,0,2074525548,26.02259252150837 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, -1, 2, 2]}",2022-11-29 23:10:31.871226,0,2061818988,26.071718195164653 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, -1, 2, 1, 2]}",2022-11-29 23:11:01.734997,0,1412290650,25.399141175298542 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, -1, -1, 1]}",2022-11-29 23:11:40.315729,0,1971394674,26.21330617046487 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [2, 1, 1, 1, 1, -1]}",2022-11-29 23:12:16.427543,0,1830900582,26.184771020867597 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [3072, 3072, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, 1, 2, 2, 1]}",2022-11-29 23:12:57.379424,0,2421538668,26.175261088262666 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 2, 1, -1]}",2022-11-29 23:13:25.381877,0,1251290964,23.320679652947288 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 1]}",2022-11-29 23:13:58.935098,0,1588224102,25.972021275557776 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 2]}",2022-11-29 23:14:35.478235,0,1960738668,26.14494989795422 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, 2]}",2022-11-29 23:15:03.906597,0,1419452244,22.7811520313731 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 2, 2]}",2022-11-29 23:15:33.346144,0,1316075610,24.898481627702125 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 1, 2]}",2022-11-29 23:16:12.674460,0,1893596268,26.420978678385804 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_ffn_embed_dim': [1024, 3072, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, 2, -1]}",2022-11-29 23:16:45.186860,0,1642325856,25.982442735663543 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 1024, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 2, 2, 2, 2, 2]}",2022-11-29 23:17:21.824602,0,1856336742,25.94442144683277 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 3072, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, 1]}",2022-11-29 23:17:58.260936,0,2023653228,26.07457768169323 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 2048], 'decoder_ffn_embed_dim': [3072, 1024, 3072, 3072, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, -1, 2, 2]}",2022-11-29 23:18:27.089691,0,1582529364,23.950252879196924 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 3072, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [2, 1, 2, -1, 2, -1]}",2022-11-29 23:19:02.726800,0,2035372902,26.447028779186226 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [3072, 1024, 3072, 2048, 2048, 3072], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 2, 2, 2, 2, 2]}",2022-11-29 23:19:39.439886,0,1945854822,26.003986822056245 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, 1, 1]}",2022-11-29 23:20:17.584270,0,2299887468,26.442901941442834 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 1024, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 2]}",2022-11-29 23:20:58.503235,0,2382305394,26.517600251211515 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [2, 1, 2, -1, 1, -1]}",2022-11-29 23:21:34.739346,0,1693274982,25.604765879724265 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 3072, 3072, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 2, 2, 2, 1]}",2022-11-29 23:22:15.291570,0,2189491308,25.71548559680124 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [1024, 3072, 3072, 3072, 1024, 3072], 'decoder_ffn_embed_dim': [3072, 2048, 3072, 2048, 3072, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, 1, -1, 2]}",2022-11-29 23:22:53.173666,0,2225126508,26.022116504070834 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 2, 2, 1, 1, 1]}",2022-11-29 23:23:30.358756,0,1890804582,25.70580338518658 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 1, -1]}",2022-11-29 23:24:17.798475,0,1156869204,23.410008497520735 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:24:59.431502,0,2226789234,26.12743322887944 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 2, -1]}",2022-11-29 23:25:28.533368,0,1253161050,24.92578691671575 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:25:56.800152,0,1173519444,23.28780146013261 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:26:33.876814,0,1510425696,25.417654573154596 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:27:12.966148,0,1784417388,25.760464304216683 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:27:50.697432,0,1478968416,25.19809949508387 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:28:21.688106,0,1334569050,24.839033226584537 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:28:58.223688,0,1765847142,25.798742018362613 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:29:28.855348,0,1334569050,24.797771375743167 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:30:10.461923,0,1943623794,25.731563707029 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:30:51.410770,0,2006538354,25.93412075285396 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:31:27.644610,0,1640018022,25.48925301539262 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:31:58.151027,0,1303111770,24.636635975207156 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:32:34.035788,0,1623367782,25.55649399276896 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:33:09.644672,0,1591910502,25.363108576481086 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:33:50.627277,0,1943623794,25.471976693004432 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:34:27.530520,0,1734389862,26.022537181002058 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:35:08.733562,0,2148146034,25.76800258460572 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:35:48.076930,0,1784417388,25.746269422993464 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:36:18.846293,0,1397483610,24.957529704052245 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:37:00.512837,0,2226789234,26.00635517076823 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:37:41.235777,0,1912166514,25.566969946150067 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:38:18.262099,0,1541882976,25.33456038818163 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:38:57.094675,0,1815874668,25.598195468771692 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:39:33.146325,0,1640018022,25.552118216389637 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:40:09.706551,0,1462318176,25.29731961246495 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, -1]}",2022-11-29 23:40:38.756573,0,1253161050,24.86012555534481 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 2, -1]}",2022-11-29 23:41:07.530840,0,1253161050,24.9440878831812 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 2, -1, 2, 1, -1]}",2022-11-29 23:41:34.610233,0,1156869204,23.274975491818346 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:42:01.811817,0,1173519444,23.245718341488995 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 2, 2, -1]}",2022-11-29 23:42:37.425300,0,1591910502,25.413674094921433 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:43:13.878058,0,1608560742,25.365752658832324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 2, -1]}",2022-11-29 23:43:50.046834,0,1608560742,25.36745967365502 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, 1, 1, -1]}",2022-11-29 23:44:18.851277,0,1253161050,25.005251094503805 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:44:46.110775,0,1156869204,23.277213964898888 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 2, -1]}",2022-11-29 23:45:14.844805,0,1253161050,24.834074772858695 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 2, 1, 1, 2, -1]}",2022-11-29 23:45:42.403812,0,1156869204,23.178044546083612 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:46:23.812450,0,2116688754,25.77854866366567 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:47:03.270121,0,1815874668,25.683722427952674 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 1, -1]}",2022-11-29 23:47:43.427381,0,1926973554,25.6886566287921 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:48:14.256367,0,1303111770,24.786229852100394 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:48:50.842520,0,1702932582,25.571343061345555 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:49:31.529598,0,1975081074,25.722542126362086 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:50:13.472772,0,2163874674,26.02132010597597 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:50:49.447197,0,1671475302,25.60807605923095 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:51:20.051939,0,1303111770,24.844209249533108 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 1024, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:52:01.725725,0,2116688754,25.774689748379366 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 3072, 2048, 2048, 3072], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 2, 1, 2, 1, -1]}",2022-11-29 23:52:43.523756,0,2037995634,25.839133901981196 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 3072, 1024, 2048, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [2, 2, 1, 1, 2, -1]}",2022-11-29 23:53:22.095038,0,1573340256,25.369471344799734 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-29 23:54:11.017282,0,982932564,21.75634266526977 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:54:50.536084,0,2047077234,26.357977252559444 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, -1]}",2022-11-29 23:55:18.749444,0,1079224410,23.671710345039983 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:55:45.598303,0,1062497364,22.630106123134603 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:56:23.927095,0,1799301234,26.237657577301754 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:57:02.821107,0,1799301234,26.235752390680105 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:57:41.722481,0,1976298354,26.409237334320427 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:58:20.422891,0,1862215794,26.41037129035317 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-29 23:58:51.406394,0,1334645856,25.47756540184132 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-29 23:59:24.731374,0,1479045222,25.751217013602062 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-29 23:59:57.442326,0,1462394982,25.872011121812324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:00:30.249710,0,1399480422,25.17096109375348 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:01:04.136585,0,1573417062,26.100985377959983 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:01:43.420676,0,2015619954,26.590522766056612 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:02:22.331936,0,1830758514,26.2075294199728 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:02:51.964958,0,1269811290,25.475704676620662 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:03:29.946063,0,1751193714,26.105643502743355 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:04:07.937498,0,1751193714,25.93245158011976 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:04:37.760555,0,1206896730,24.91733058699306 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:05:12.938706,0,1541959782,25.94168073173522 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:05:50.741638,0,1719736434,26.03682929942318 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:06:25.032545,0,1447587942,25.535328515473058 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:06:58.207200,0,1319838816,25.37435112751941 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:07:37.942726,0,2007755634,26.51729964072713 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:08:09.189292,0,1271731296,25.166134982432663 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:08:43.141792,0,1416130662,25.449204232481016 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:09:11.020741,0,999582804,21.951713272811833 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:09:40.649180,0,1238354010,24.98037680186457 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:10:13.227827,0,1303188576,25.448120268873193 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:10:43.351246,0,1175439450,24.62386693490815 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:11:23.467151,0,2007755634,26.232304249661833 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:11:51.316006,0,1031040084,22.17006946878601 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:12:21.453576,0,1238354010,24.98730164778759 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:12:51.154193,0,1175439450,24.370328478496752 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:13:21.253989,0,1143982170,24.242154307788027 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:13:48.722881,0,1062497364,22.741194138868078 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:14:18.241161,0,1127331930,24.234335725130748 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:14:57.044258,0,1830758514,26.44666962861665 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:15:32.368557,0,1541959782,26.050701171511186 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:16:01.654068,0,1221703770,24.996951152986856 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:16:33.903998,0,1288381536,25.159811999467085 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, -1]}",2022-11-30 00:17:07.162316,0,1399480422,25.443390153441023 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:17:35.512007,0,982932564,21.871299717957186 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, 1]}",2022-11-30 00:18:09.340609,0,1416130662,25.447142262784027 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:18:37.410682,0,1045847124,22.47970882205465 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 1]}",2022-11-30 00:19:06.749493,0,1095874650,23.814093767809908 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 1, -1, 1, -1]}",2022-11-30 00:19:39.893348,0,1319838816,25.478955124339844 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:20:09.251167,0,1095874650,23.750972433853825 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, -1]}",2022-11-30 00:20:38.348608,0,1127331930,23.993329632521927 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, 1, 1, -1, 1, 1]}",2022-11-30 00:21:06.810591,0,1014389844,22.26199233258481 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 1, 1]}",2022-11-30 00:22:10.464888,0,2500270194,26.490267758415033 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [-1, 1, -1, -1, 1, 2]}",2022-11-30 00:22:38.706298,0,1077304404,23.03639520261316 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 2]}",2022-11-30 00:23:08.174166,0,1142138970,24.176833511635046 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:23:37.202278,0,1156869204,23.451597618181914 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:24:12.037672,0,1894982502,26.570692923549505 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:24:48.692945,0,1795538028,26.656160779307765 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:25:18.655715,0,1221703770,24.876611482664103 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:25:53.952187,0,1571573862,26.243286807849493 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 2]}",2022-11-30 00:26:28.198620,0,1397560416,25.575803459047332 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:27:03.351000,0,1619681382,26.29731327668079 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, -1, 1]}",2022-11-30 00:27:32.454206,0,1205053530,24.652752198325597 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:28:00.103253,0,1125411924,23.2436921421545 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:28:38.079240,0,2318626668,26.646036771408653 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:29:17.534918,0,2097273714,26.452370488677506 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:29:52.746931,0,1651138662,26.151231823189608 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:30:22.172110,0,1284618330,25.49140283243417 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:30:52.032834,0,1284618330,25.483324803555185 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:31:26.157137,0,1682595942,26.243800774630134 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:32:02.794238,0,1412367456,25.73672237614388 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:32:38.498129,0,1412367456,25.694815754009024 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 2]}",2022-11-30 00:33:14.792750,0,1460474976,25.87514706229428 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:33:55.738731,0,2419783794,26.642604563885367 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:34:35.635995,0,2168052594,26.669695273057105 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:35:13.689253,0,2015788908,26.70123813916216 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:35:50.004608,0,1460474976,25.829650262816568 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:36:19.755425,0,1221703770,24.917846107035597 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:36:48.610538,0,1093954644,23.08549976883442 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:37:23.140196,0,1571573862,26.495457209152665 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:37:58.602077,0,1349452896,25.103371841310192 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:38:34.600291,0,1523389536,26.028214026012492 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 1]}",2022-11-30 00:39:10.001278,0,1349452896,24.972561656938478 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:39:44.443816,0,1714053222,26.16022517555216 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:40:21.100754,0,1491932256,25.983857779417527 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:41:01.553620,0,2459105394,26.362258167185217 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:41:41.438856,0,2136595314,26.728063906094565 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:42:09.810325,0,1093954644,22.9713116425012 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, -1, 1]}",2022-11-30 00:42:38.643170,0,1156869204,23.4523958088576 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:43:07.305868,0,1093954644,23.166873916592454 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:43:36.751041,0,1316075610,25.51338789773949 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:44:07.237775,0,1253161050,25.004771948649424 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 00:44:43.589272,0,1747430508,26.62802459531885 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 1]}",2022-11-30 00:45:13.240897,0,1142138970,24.356415715383232 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:45:43.655137,0,1301268570,25.49676417511692 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 2]}",2022-11-30 00:46:13.344754,0,1142138970,24.224366625169438 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:46:50.784832,0,1945010028,26.612602269295685 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 4, 4, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 1]}",2022-11-30 00:47:21.162395,0,1238354010,24.912296082302873 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:47:50.863610,0,1190246490,24.44551868443464 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 4, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 4, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:48:20.655219,0,1190246490,24.332970362633144 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:48:57.884646,0,1984331628,26.656284353019814 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 4, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 1, 2]}",2022-11-30 00:49:36.098176,0,1976467308,26.4326896541673 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:50:27.405530,0,982932564,21.902795709633452 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:51:08.032064,0,2180770674,26.599782674768583 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:51:35.807356,0,1062497364,22.371131012623422 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, 2, 1, 2]}",2022-11-30 00:52:05.763177,0,1079224410,23.52409715160256 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:52:39.045430,0,1571573862,26.256831208948178 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:53:12.642643,0,1508659302,26.00462272595473 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:53:42.368319,0,1158789210,24.50401996100462 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:54:12.059480,0,1127331930,24.087230950865788 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:54:51.957601,0,1956587634,26.41692984127425 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:55:29.312918,0,1826995308,26.542101655879033 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:56:07.198728,0,2007924588,26.616730692591272 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:56:48.032095,0,2149313394,26.572361617382416 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 00:57:18.152332,0,1095874650,23.782891130205417 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:57:47.584518,0,1173596250,24.547205997391597 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:58:17.345898,0,1110681690,23.94407626548769 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:58:47.056374,0,1190246490,24.491800170896195 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 00:59:16.745616,0,1253161050,25.254425153820556 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 00:59:47.189514,0,1236510810,25.16579743713827 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:00:16.840252,0,1347532890,25.676132056093994 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:00:46.759507,0,1221703770,24.768354572582403 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:01:21.302832,0,1412367456,25.746119527915003 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:01:55.146488,0,1634488422,26.23053149772474 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:02:23.005266,0,1062497364,22.439008935256105 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:02:59.282910,0,1778887788,26.56040601873973 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:03:36.281891,0,1826995308,26.583100181248412 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:04:12.687089,0,1715973228,26.303092956441024 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:04:48.509486,0,1475282016,25.672530277500815 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:05:25.099950,0,1747430508,26.33985155284849 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:05:53.329815,0,1031040084,22.262405116542624 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:06:30.890827,0,1976467308,26.580438511490023 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:00.560411,0,1079224410,23.728343099659895 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:28.617193,0,999582804,21.72906013722468 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:07:57.530176,0,1014389844,22.471135438983048 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:08:32.633405,0,1682595942,26.278826842615302 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:09:03.054659,0,1284618330,25.50001897765904 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:09:31.687974,0,999582804,21.967989901189092 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 3072, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:10:06.993955,0,1506739296,25.812328256697807 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:10:36.948650,0,1284618330,25.440948757479756 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:11:11.600524,0,1380910176,25.703810906558953 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:11:46.811777,0,1380910176,25.597256331992767 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 2048, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 2]}",2022-11-30 01:12:14.807619,0,982932564,21.644880427454428 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:12:45.339890,0,1378990170,25.719053744735888 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:13:19.554133,0,1651138662,26.296467387644455 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:13:55.039207,0,1443824736,25.92682154046769 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:14:25.339862,0,1378990170,25.618097500353073 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:14:58.311883,0,1443824736,26.05836967026129 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [3072, 2048, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:15:35.880349,0,1826995308,26.665846653894516 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:16:05.964065,0,1316075610,25.603591515089885 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 4, 4, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:16:35.543255,0,1316075610,25.726771790425637 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 4, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:17:09.354304,0,1588224102,26.44933722401916 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:18:13.173994,0,2136595314,26.65406908362065 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:18:40.903167,0,982932564,21.753935985565093 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:19:09.095827,0,1077304404,22.592892971887863 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, -1]}",2022-11-30 01:19:38.452316,0,1079224410,23.708705837824187 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:20:15.347627,0,1651138662,26.467237911633077 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:20:53.408627,0,2015788908,26.720937123019265 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:21:30.489630,0,1732623468,26.539145946856273 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:22:04.657299,0,1588224102,26.359690429140286 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:22:41.410352,0,1795538028,26.7016667761388 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:23:11.191925,0,1045847124,22.30727022285813 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:23:48.495707,0,1945010028,26.92853123605158 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:24:18.247397,0,1142138970,24.635872154023456 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:24:48.348618,0,1127331930,24.1530263191461 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:25:26.938625,0,1860372594,26.730853774460698 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:25:59.999243,0,1366103136,25.823752934217687 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:26:38.109367,0,1460474976,25.87027954805624 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:27:14.992048,0,1826995308,26.80291796821574 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, 1]}",2022-11-30 01:27:44.593561,0,1110681690,24.51106684934533 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:28:13.315260,0,999582804,21.754138520655324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:28:43.054739,0,1095874650,23.72840125694677 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:29:16.453346,0,1540116582,26.383471679803876 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:29:44.596431,0,1014389844,21.82697530935213 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:30:22.103158,0,1523389536,25.605380278566003 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:30:52.162081,0,1158789210,24.84491236430293 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 01:31:27.756932,0,1571573862,26.52406333073416 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:31:58.252949,0,1253161050,25.329648091984946 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:32:34.692819,0,1669708908,26.5312087342803 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:33:04.851690,0,1316075610,25.720879408387013 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:33:34.153425,0,1079224410,24.103822174703968 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:34:13.928399,0,2073680754,26.650073433583966 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:34:52.003469,0,1491932256,25.68166831220549 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:35:22.255164,0,1316075610,25.63882428357699 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:35:51.968182,0,1347532890,25.833426111273635 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:36:31.293492,0,1877022834,26.470763981649853 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:37:01.112958,0,1062497364,22.484020387193706 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:37:41.228117,0,2105138034,26.83517275846072 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:38:19.780658,0,1443824736,25.801099953730787 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:38:50.000595,0,1077304404,22.347079305577246 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 01:39:21.802091,0,1380910176,25.69437359008145 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, -1]}",2022-11-30 01:39:51.661673,0,1253161050,25.278872112854447 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:40:28.495428,0,1701166188,26.376182230389777 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:40:58.659095,0,1284618330,25.38778340159123 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:41:29.227300,0,1284618330,25.42396262714132 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 2048], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:42:01.693769,0,1491932256,26.049173770932143 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 01:42:39.003275,0,1412367456,25.450957445704358 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 1, 1]}",2022-11-30 01:43:08.560591,0,1205053530,24.982224495498585 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:43:38.655869,0,1190246490,24.935927330385375 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 1, -1, 2, 1]}",2022-11-30 01:44:07.301698,0,1045847124,22.58554082987191 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 01:44:37.461976,0,1190246490,24.671958393096833 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 4, 4, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 2, 1]}",2022-11-30 01:45:07.650100,0,1221703770,24.956348430879675 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:46:01.404524,0,1031040084,22.152066191159324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:46:38.986518,0,2082647148,26.632249118046865 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:47:07.608826,0,1125411924,23.429789667811992 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 2]}",2022-11-30 01:47:37.867613,0,1127331930,24.145917266173598 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:48:15.090054,0,1826995308,26.770198001318732 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [640], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:48:53.000294,0,2011868268,26.415120168334965 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:49:26.441014,0,1382830182,25.316083834952625 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, -1, 2]}",2022-11-30 01:49:59.544683,0,1414287462,25.711571467826957 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:50:30.849009,0,1223546970,24.831513194585497 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:51:01.107149,0,1079147604,22.675323576984212 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, 1, -1, 2]}",2022-11-30 01:51:31.946530,0,1143982170,24.30020519062015 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 01:52:01.944178,0,1158789210,24.760354519096623 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:52:32.644831,0,1175439450,24.745534135337422 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 2]}",2022-11-30 01:53:01.116235,0,1062497364,22.613029220889654 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:53:35.041431,0,1619681382,26.2082672911056 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:54:05.943378,0,1255004250,25.072608209842993 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:54:35.408367,0,1110604884,23.1627621167102 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 01:55:06.650719,0,1317918810,25.578281446432214 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 01:55:36.475238,0,1127331930,24.221637357380914 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:56:12.790186,0,1669708908,26.54140556368842 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:56:43.360950,0,1206896730,24.98000861493876 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:57:13.953244,0,1364183130,25.71705181089393 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:57:51.090401,0,1701166188,26.74516092088886 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:58:27.981238,0,1732623468,26.832353130261968 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 01:58:56.696835,0,1047690324,21.97995176988117 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 01:59:31.345276,0,1588224102,26.3358070373563 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 4, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 02:00:00.113102,0,1062497364,22.362618000056415 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 2]}",2022-11-30 02:00:29.945995,0,1127331930,24.648057370763627 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:01:00.265931,0,1301268570,25.436935433814305 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 02:01:31.151981,0,1380833370,25.654131643295717 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:02:09.301256,0,1795538028,26.84364156318439 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 02:02:37.517073,0,1031040084,21.937041004808965 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:03:12.651254,0,1556766822,26.361614746242726 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, -1, 1, 2]}",2022-11-30 02:03:43.953623,0,1286461530,25.33590186847325 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 2]}",2022-11-30 02:04:14.640998,0,1127331930,24.26800692018226 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:04:48.236910,0,1445744742,25.8605605687801 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:05:22.538414,0,1430937702,25.736368197822884 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:05:51.408503,0,1031040084,22.160851653528795 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:06:26.648873,0,1525309542,26.283505470568386 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:07:01.841145,0,1651138662,26.200923396852648 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:07:36.826535,0,1493852262,26.022938040890846 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:08:11.743622,0,1651138662,26.323507870111342 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:08:40.810079,0,1093954644,23.10914434974903 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 3072, 2048, 1024, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 4, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 2]}",2022-11-30 02:09:10.497316,0,1093954644,22.93525727677559 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:09:45.035662,0,1525309542,26.303765162822714 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, 1, -1, 1, 2]}",2022-11-30 02:10:15.853041,0,1269811290,25.14969354058586 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 3072, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, 1, 2]}",2022-11-30 02:10:53.034327,0,1764080748,26.719747425528375 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:11:27.448102,0,1493852262,26.147631963028342 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, -1, -1, -1, -1, 2]}",2022-11-30 02:12:01.745228,0,1462394982,25.806638854264037 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [2, 1, -1, 1, 1, 2]}",2022-11-30 02:12:33.064042,0,1192089690,24.67646662934568 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 1024, 3072, 3072, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 4, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, 1, -1, 1]}",2022-11-30 02:13:31.905219,0,1014389844,22.07444316375671 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:14:13.239756,0,2196499314,26.808937868203532 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:14:45.016226,0,1108761684,22.99185474514479 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:15:16.151531,0,1110681690,24.451311787293157 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:15:52.689296,0,1653058668,26.388890341662112 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:16:30.799650,0,1945010028,26.832269839482606 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:17:09.628703,0,1810345068,26.87035240576452 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:17:41.163075,0,1158789210,24.73994162716646 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 3072, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:18:12.461543,0,1253161050,25.18416515633912 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:18:46.947088,0,1445744742,25.761961494652613 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:19:21.548056,0,1540116582,26.36816465236295 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:19:55.543285,0,1477202022,26.259735292233113 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:20:26.257628,0,1410447450,25.769535189140193 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:21:00.935996,0,1508659302,26.45714906422908 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:21:39.693771,0,2064818028,27.05141228767061 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:22:16.858717,0,1634488422,26.322548970809848 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:22:53.592330,0,1715973228,26.657027696998984 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 3072, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:23:23.753177,0,1142138970,24.539293515717535 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:24:02.314804,0,1860295788,27.020732069299534 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:24:40.894881,0,2023653228,26.816836839483663 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:25:19.593071,0,1860295788,26.957189752523504 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:25:50.458625,0,1284618330,25.61733025863949 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, 1]}",2022-11-30 02:26:20.744436,0,1236510810,25.25415313294958 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:26:53.298888,0,1045847124,22.08090715962085 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, 1]}",2022-11-30 02:27:27.137456,0,1492009062,26.052263535617904 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:28:01.945411,0,1571573862,26.118237209470863 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, 1]}",2022-11-30 02:28:38.594669,0,1747430508,26.903183105861135 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:29:18.253776,0,2076691314,26.879803077690028 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:29:56.298060,0,2003070828,26.839090098499142 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, 1]}",2022-11-30 02:30:33.772050,0,1603031142,26.482417706814886 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:31:13.542551,0,2155334514,26.87436116692265 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 4, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:31:47.410084,0,1077304404,22.604758748152854 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 2, -1]}",2022-11-30 02:32:17.856179,0,1173596250,24.7651262424877 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:32:48.975305,0,1127331930,24.704098583912252 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [-1, -1, -1, -1, 1, -1]}",2022-11-30 02:33:19.260398,0,1205053530,25.137127615371046 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, -1, -1, 2, 1]}",2022-11-30 02:33:51.759557,0,1364183130,25.966442456302147 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:34:21.926894,0,1093954644,23.220261036558885 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:34:53.358603,0,1190246490,25.1359189611972 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:35:22.930318,0,1062497364,22.7755095138295 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:35:53.858887,0,1284618330,25.537416313175953 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 2, -1]}",2022-11-30 02:36:31.595724,0,1764080748,26.882073104251866 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:37:02.267669,0,1316075610,25.570143211664274 +"{'encoder_embed_dim': [640], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 3072], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [6], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, -1, -1, 1, -1]}",2022-11-30 02:37:41.647836,0,2134752114,26.898452516723687 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 4, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:38:13.005215,0,1031040084,22.282220573694513 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:38:44.151950,0,1347532890,25.784134961825835 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:39:15.266274,0,1221703770,25.202375572779086 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 2, 1]}",2022-11-30 02:39:46.953641,0,1332725850,25.851297056506393 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 3072, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 4], 'decoder_ende_attention_heads': [8, 8, 4, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, -1]}",2022-11-30 02:40:17.653376,0,1173596250,24.972344992193374 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 2048, 3072, 3072, 3072, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:40:49.609980,0,1062497364,22.535667407831653 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 2, 1]}",2022-11-30 02:41:20.761069,0,1110681690,24.40576457445596 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:42:27.496173,0,1828838508,26.85145321742662 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:42:57.919093,0,982932564,21.761604389716634 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 02:43:28.655608,0,1079224410,23.718368472465446 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:43:58.512911,0,1062497364,22.74182664332034 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:44:32.816447,0,1445667936,26.19212122197997 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:45:10.899684,0,1477125216,26.010153160637532 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:45:41.313907,0,1045847124,22.827474314422002 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:46:19.207836,0,1749273708,26.60418713411001 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 3072], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:46:53.592587,0,1414210656,26.00726305388265 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 4, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, 1, 1]}",2022-11-30 02:47:23.850237,0,1142138970,24.6489542661405 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:47:54.746779,0,1316075610,25.689218653291295 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, 1]}",2022-11-30 02:48:25.548201,0,1347532890,25.751911210030475 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:49:04.198324,0,1780730988,27.00579454971298 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 02:49:42.700124,0,1812188268,26.874801178813083 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:50:13.662203,0,1158789210,24.921368325030492 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:50:43.904715,0,999582804,22.026558500552152 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 02:51:22.015853,0,1686359148,26.62215586535193 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 3072, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:52:00.755827,0,1667788902,26.61057212093059 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 02:52:37.454316,0,1636331622,26.6512297923577 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, 1]}",2022-11-30 02:53:07.714810,0,1014389844,22.393618015178536 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 02:53:43.144481,0,1510502502,25.993038659880476 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:54:19.052536,0,1573417062,26.421662873667255 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:54:49.657459,0,1095874650,23.883133083803862 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 02:55:21.070738,0,1206896730,25.148678288815 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 02:55:52.009648,0,1014389844,22.06913488473211 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:56:25.509329,0,1382753376,25.54875613062997 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 02:56:56.957773,0,1301268570,25.649734062232383 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, 1, 1]}",2022-11-30 02:57:31.518411,0,1493775456,26.10832383442528 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:58:01.613773,0,1031040084,22.362002479551165 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:58:31.705698,0,1062497364,22.819328594276566 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, 1]}",2022-11-30 02:59:02.697733,0,1173596250,24.988685667678173 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 02:59:33.659490,0,1221703770,25.302962108892185 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:00:09.446922,0,1541959782,26.112077468690867 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 03:00:39.473024,0,1079224410,23.955772671551667 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:01:15.155182,0,1541959782,26.31776013053777 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:01:46.072247,0,1253161050,25.489643989727142 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, 1]}",2022-11-30 03:02:17.596453,0,1364183130,25.815981278057624 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, 1, 2, -1, -1, -1]}",2022-11-30 03:02:48.933726,0,1045847124,22.32432265753782 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 03:03:21.469495,0,1269811290,25.513369223069635 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:03:53.160286,0,1284618330,25.46582706181441 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 03:04:31.684525,0,1717816428,26.710511535879036 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, -1]}",2022-11-30 03:05:10.042233,0,1717816428,26.84283267727433 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:05:41.855922,0,1190246490,24.970583908771232 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 2, -1]}",2022-11-30 03:06:20.184503,0,1636331622,26.504345633971067 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 1024, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:06:51.460598,0,1127331930,24.268248941796838 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:07:28.142691,0,1573417062,26.430998131687453 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:07:59.749480,0,1253161050,25.38845190370804 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, -1]}",2022-11-30 03:08:30.871157,0,1190246490,25.041571400304573 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 2048, 3072], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [4, 8, 4, 4, 4, 4], 'decoder_arbitrary_ende_attn': [-1, -1, 2, -1, -1, -1]}",2022-11-30 03:09:02.664589,0,1110681690,24.006237352801346 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 1024, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 4], 'decoder_ende_attention_heads': [8, 8, 8, 4, 4, 4], 'decoder_arbitrary_ende_attn': [1, 1, 2, -1, -1, -1]}",2022-11-30 03:09:34.238610,0,1238354010,25.34276030239052 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:10:34.311288,0,999582804,21.80118151877014 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:11:13.593310,0,1797381228,27.043140669460854 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:11:43.503387,0,1093954644,23.007414463752117 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:12:14.163528,0,1095874650,23.79086433289906 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:12:50.848022,0,1621524582,26.025864778342207 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [8, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:13:21.556759,0,1190246490,25.077358466768757 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:13:55.398877,0,1429017696,26.12059006524399 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 1]}",2022-11-30 03:14:26.326007,0,999582804,22.127524836343532 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 1]}",2022-11-30 03:14:57.762642,0,1221703770,25.21914419948441 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:15:36.252929,0,1667788902,26.6927938370969 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:16:14.254276,0,1652981862,26.66556123105007 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 2]}",2022-11-30 03:16:45.445260,0,1127331930,24.13548008816253 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 2]}",2022-11-30 03:17:15.639603,0,1031040084,22.152066191159324 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:17:54.414138,0,1477125216,26.149095479536744 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 1, 1]}",2022-11-30 03:18:25.964419,0,1347532890,25.790673448942552 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:18:56.523414,0,1031040084,22.518290658576667 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 2, 1, 1]}",2022-11-30 03:19:27.666139,0,1316075610,25.42608049975173 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:19:59.551900,0,1221703770,25.355575788780996 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:20:30.914307,0,1062497364,22.58490426133199 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 2, 1, 1]}",2022-11-30 03:21:03.259812,0,1316075610,25.632790518407255 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:21:37.308205,0,1445667936,26.01769230789309 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:22:13.654111,0,1493852262,26.206006379852774 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:22:44.661526,0,1127331930,24.704098583912252 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 3072, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 4, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:23:16.791537,0,1158789210,24.4930221168223 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 1024, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, -1, 1]}",2022-11-30 03:23:48.818752,0,1062497364,22.608186762946183 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:24:26.151864,0,1590067302,26.365434081423825 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:25:04.997659,0,1686359148,26.898935796393708 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:25:38.612617,0,1397560416,26.061090394566637 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:26:18.290877,0,1734466668,26.887018361917384 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:26:50.457197,0,1284618330,25.456784215173144 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [3], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:27:24.405212,0,1397560416,26.060964017387292 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:28:00.762632,0,1558610022,26.24033664128142 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, -1, 1]}",2022-11-30 03:28:30.909491,0,1093954644,22.91490934985308 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:29:06.861622,0,1573417062,26.51638836290174 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:29:38.763394,0,1253161050,25.32548928774925 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:30:14.354144,0,1541959782,26.36876929191619 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:30:45.758366,0,1095874650,24.13357019256553 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:31:22.344668,0,1527152742,26.27025764535305 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 1024, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, 1, 1, 1]}",2022-11-30 03:31:54.461060,0,1253161050,25.41836880480659 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 4, 8, 8, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:32:26.060470,0,1284618330,25.472978060833974 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [4], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:33:02.551992,0,1510502502,26.241380252439953 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 1024, 2048, 2048, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:33:33.960758,0,1347532890,25.707442276004617 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 4, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:34:12.877897,0,1717816428,26.937139740910155 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 2048], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 2]}",2022-11-30 03:34:43.973196,0,1095874650,23.88536811980862 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:35:22.799736,0,1749273708,27.035785346779857 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [2048, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, -1, 1, 1]}",2022-11-30 03:36:01.488138,0,1780730988,26.94260959266693 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [2048, 2048, 2048, 3072, 3072, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 3072, 2048, 1024, 1024], 'decoder_layer_num': [5], 'encoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [2, -1, 2, -1, 1, 1]}",2022-11-30 03:36:41.098531,0,1765923948,27.1368398440508 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 2048], 'decoder_layer_num': [1], 'encoder_self_attention_heads': [8, 8, 8, 4, 8, 4], 'decoder_self_attention_heads': [8, 4, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 2, 1, 1, 1]}",2022-11-30 03:37:11.314304,0,1062497364,22.486957414672982 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 2048, 2048, 2048, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 2048, 1024, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 8, 8, 8], 'decoder_self_attention_heads': [4, 8, 8, 8, 8, 8], 'decoder_ende_attention_heads': [8, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, 1, 1]}",2022-11-30 03:37:43.678526,0,1190246490,24.977323840246868 +"{'encoder_embed_dim': [512], 'decoder_embed_dim': [512], 'encoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 1024, 1024], 'decoder_ffn_embed_dim': [1024, 1024, 1024, 1024, 3072, 1024], 'decoder_layer_num': [2], 'encoder_self_attention_heads': [4, 4, 8, 4, 8, 8], 'decoder_self_attention_heads': [8, 8, 8, 4, 4, 8], 'decoder_ende_attention_heads': [4, 8, 8, 8, 8, 4], 'decoder_arbitrary_ende_attn': [1, -1, 1, -1, -1, 2]}",2022-11-30 03:38:15.756806,0,1095874650,23.82580846204717 From 0b2fa19de0fe11841d67bca16f8755c7d484daf6 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Thu, 8 Dec 2022 03:07:52 -0800 Subject: [PATCH 38/60] Update MobileNetV3 example Signed-off-by: Maciej Szankin --- .../dynas/MobileNetV3_Supernet_NAS.ipynb | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb index 4fdbc291284..cbbd678b4bc 100644 --- a/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb +++ b/examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb @@ -13,7 +13,7 @@ "\n", "#### Super-Networks\n", "\n", - "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification task on ImageNet-ilsvrc2012.\n", + "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n", "\n", "#### Methodology\n", "\n", @@ -38,7 +38,25 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy==1.19.2 ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2" + "!pip -q install neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import sys\n", + "# sys.path.insert(0,'')\n", + "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" ] }, { @@ -84,12 +102,16 @@ "metadata": {}, "source": [ "### Define Architecture\n", - "We currently leverage pre-trained Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path. \n", + "We currently support pre-trained super-networks:\n", + "\n", + "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n", + "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n", "\n", "Super-network options (choose 1): \n", "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n", "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n", - "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. " + "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. \n", + "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]." ] }, { @@ -113,7 +135,7 @@ "* `['acc', 'lat']` \n", "\n", "Description:\n", - "* `'acc'` - ImageNet Top-1 Accuracy (%)\n", + "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n", "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n", "* `'lat'` - Latency (inference time) measurement (ms)" ] @@ -137,7 +159,8 @@ "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n", "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n", "* `config.dynas.batch_size` - Batch size used during latency measurements.\n", - "* `config.dynas.dataset_path` - Path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php" + "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n", + "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)." ] }, { @@ -272,8 +295,10 @@ "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791. \n", "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n", "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358. \n", - "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", - "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. " + "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", + "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. \n", + "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187. \n", + "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30." ] }, { @@ -300,7 +325,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.11" } }, "nbformat": 4, From bfd3dc37529d818b8b066bad2ef26a83f48871ea Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Thu, 8 Dec 2022 03:08:20 -0800 Subject: [PATCH 39/60] Add Transformer LT example notebook Signed-off-by: Maciej Szankin --- .../dynas/TransformerLT_Supernet_NAS.ipynb | 310 ++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb diff --git a/examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb b/examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb new file mode 100644 index 00000000000..1e7ffcd71b5 --- /dev/null +++ b/examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction\n", + "\n", + "This tutorial demonstrates how to perform a multi-objective neural architecture search (NAS) on a MobileNetV3 one-shot weight-sharing super-network [1] using the Intel® Neural Compressor Dynamic NAS (DyNAS) search approach. \n", + "\n", + "#### Background\n", + "Neural architecture search, the study of automating the discovery of optimal deep neural network architectures for tasks in domains such as computer vision and natural language processing, has seen rapid growth in the machine learning research community. While there have been many recent advancements in NAS, there is still a significant focus on reducing the computational cost incurred when validating discovered architectures by making search more efficient. Evolutionary algorithms, specifically genetic algorithms, have a history of usage in NAS and continue to gain popularity as a highly efficient way to explore the architecture objective space. In this tutorial, we show how evolutionary algorithms [2] can be paired with lightly trained objective predictors in an iterative cycle to accelerate multi-objective architectural exploration. Specifically, we use a bi-level optimization approach [3] denoted as `dynas`. This technique is ~4x more sample efficient than typical one-shot predictor-based NAS approaches. \n", + "\n", + "#### Super-Networks\n", + "\n", + "The computational overhead of evaluating DNN architectures during the neural architecture search process can be very costly due to the training and validation cycles. To address the training overhead, novel weight-sharing approaches known as one-shot or super-networks have offered a way to mitigate the training overhead by reducing training times from thousands to a few GPU days. These approaches train a task-specific super-network architecture with a weight-sharing mechanism that allows the sub-networks to be treated as unique individual architectures. This enables sub-network model extraction and validation without a separate training cycle. This tutorial offers pre-trained Once-for-All (OFA) super-networks [1] for the image classification on ImageNet-ilsvrc2012 as well as Transformer Language Translation (based on [6]) for the language translation tasks.\n", + "\n", + "#### Methodology\n", + "\n", + "The flow of the DyNAS approach (`approach='dynas'`) is shown in the following figure. Currently, three pre-trained super-network options for the image classification task are provided. In the first phase of the search, a small population (`config.dynas.population`) of sub-networks are randomly sampled and evaluated (validation measurement) to provide the initial training set for the inner predictor loop. After the predictors are trained, a multi-objective evolutionary search (`search_algorithm`) is performed in the predictor objective space. After an extensive search is performed, the best performing sub-network configurations are selected to be the next iteration's validation population. The cycle continues until the search concludes when the user defined evaluation count (`config.dynas.num_evals`) is met. \n", + " \n", + "
\n", + "
\n", + "\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "\n", + "For released version of Neural Compressor:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q neural_compressor autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatievely, if you have a local copy of https://github.com/intel/neural-compressor, you can uncomment and run the code below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import sys\n", + "# sys.path.insert(0,'')\n", + "# !pip install -q autograd==1.4 fvcore==0.1.5.post20220119 numpy ofa==0.1.0.post202203231606 pandas==1.1.5 pymoo==0.5.0 pyyaml==5.4.1 scikit-learn==0.24.2 scipy==1.5.4 torch==1.10.1 torchvision==0.11.2 sacremoses==0.0.53 torchprofile==0.0.4 fairseq==0.12.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Packages" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from neural_compressor.conf.config import NASConfig\n", + "from neural_compressor.experimental.nas import NAS\n", + "from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Configure NAS Algorithm\n", + "\n", + "The `NASConfig` class allows us to define the appropriate paramenters for determining how the neural architecture search is performed. Currently, the following multi-objective evolutionary algorithms are supported by the `dynas` approach: \n", + "* `'nsga2'`\n", + "* `'age'`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "config = NASConfig(approach='dynas', search_algorithm='nsga2')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define Architecture\n", + "We currently support pre-trained super-networks:\n", + "\n", + "1. Once-for-All (OFA) super-networks [4] for the image classification task on ImageNet-ilsvrc2012. In the case where the super-network PyTorch model download fails, you can manually copy the pre-trained models from https://github.com/mit-han-lab/once-for-all and place them in the `.torch/ofa_nets` path.\n", + "2. Hardware-Aware-Transformers (HAT) supernetwork [6] for language translation task on WMT14 En-De. To run this supernetwork you have to manually download preprocessed dataset from https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/configs/wmt14.en-de/get_preprocessed.sh and pretrained model from https://www.dropbox.com/s/pkdddxvvpw9a4vq/HAT_wmt14ende_super_space0.pt?dl=0\n", + "\n", + "Super-network options (choose 1): \n", + "- `ofa_resnet50` - based on the ResNet50 architecture [4]. Search space of ~$10^{15}$ architectures.\n", + "- `ofa_mbv3_d234_e346_k357_w1.0` - based on the MobileNetV3 architecture [5], width multiplier 1.0. Search space of ~$10^{19}$ architectures.\n", + "- `ofa_mbv3_d234_e346_k357_w1.2` - based on the MobileNetV3 architecture [5], width multiplier 1.2. Search space of ~$10^{19}$ architectures. \n", + "- `transformer_lt_wmt_en_de` - based on the Transformer architecture [7]." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "config.dynas.supernet = 'transformer_lt_wmt_en_de'\n", + "config.seed = 42" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Select performance metrics\n", + "\n", + "Performance metric options are as follows. Currently, the `dynas` approach supports the use exactly 2 objectives.\n", + "* `['acc', 'macs'] `\n", + "* `['acc', 'lat']` \n", + "\n", + "Description:\n", + "* `'acc'` - ImageNet Top-1 Accuracy (%) (for OFA supetnetworks) and Bleu (for Transformer LT)\n", + "* `'macs'` - Multiply-and-accumulates as measured from FVCore. \n", + "* `'lat'` - Latency (inference time) measurement (ms)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "config.dynas.metrics = ['acc', 'macs']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Search parameters\n", + "\n", + "* `config.dynas.population` - Size of the population for evolutionary/genetic algorithm (50 recommended)\n", + "* `config.dynas.num_evals` - Validation measurement count, a higher count comes with greater computational cost but a higher chance of finding optimal sub-networks\n", + "* `config.dynas.results_csv_path` - Location of the search (validation measurement) results. This file is also used to provide training data to the metric predictors. \n", + "* `config.dynas.batch_size` - Batch size used during latency measurements.\n", + "* `config.dynas.dataset_path` - For OFA it's a path to the imagenet-ilsvrc2012 dataset. This can be obtained at: https://www.image-net.org/download.php; For Transformer LT it's a path to preprocessed WMT EnDe directory (`(...)/data/binary/wmt16_en_de`)\n", + "* `config.dynas.supernet_ckpt_path` - Transformer LT only. Path to downloaded pretrained super-network (`HAT_wmt14ende_super_space0.pt` file)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "config.dynas.population = 50\n", + "config.dynas.num_evals = 250\n", + "config.dynas.results_csv_path = 'results_transformerlt_macs.csv'\n", + "config.dynas.batch_size = 64\n", + "config.dynas.dataset_path = '/datasets/hat_dataset/data/binary/wmt16_en_de' # example\n", + "config.dynas.supernet_ckpt_path ='/datasets/hat_dataset/HAT_wmt14ende_super_space0.pt' # example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Perform Search\n", + "\n", + "After the DyNAS configuration parameters are set, the search process can be started. Depending on how many evaluations `config.dynas.num_evals` were defined, the search time can vary from hours to days. \n", + "The search process will populate the `config.dynas.results_csv_path` file and will also return a list of the final iteration's best sub-network population recommondation. \n", + "\n", + "Note: example search results are provided for the plotting section if you wish to skip this step for now. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent = NAS(config)\n", + "results = agent.search()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot Search Results in the Multi-Objective Space" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcMAAAFOCAYAAAD6qHbYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAACjpElEQVR4nOydd3gc1fWw37NFq94s925jg00zYNOLgdAh9JoQIAmE8Ev7QnolvRcSOqGDaQYbAwZccO+9d1myem/bd2fO98eMZclWtSVbtud9nnm0O7edmV3N2XvvKaKqODg4ODg4HM+4jrQADg4ODg4ORxpHGTo4ODg4HPc4ytDBwcHB4bjHUYYODg4ODsc9jjJ0cHBwcDjucZShg4ODg8Nxj6MMHY4oIvKyiPz+SMvRXYiIisgJR1oOBweHtnGUoUO7iEieiHyhg3XnisjXu2DMr4rIShGpFpEdIvJ7EUlup839tvL50X7nC0Vk4qHK1B2IyFUiMl9EGkSkQkTmicgXj7RcDg7HG44ydOhxiMh/gYnAl1Q1G7gAaAA+FZGkdppXAz8SkbTulRJExHOI7W8D3gVeBQYBfYFfATccunSHJJeIyBF9NoiI+0iO73D84ShDh05hz74WisjfRaRGRHaLyDV22R+Ai4AnRMQvIk/Y508SkZn2LG+biNzRRv9XAgL8ElgjIjXAJ8D7wL+Bn7Uj4hZgCfD9Vvp3ichPRGSXiFSJyDsikm2XTRSRwv3qN86KReQxEZksIq+LSD1wv4icLSJLRKRWREpE5AkRSWhHRkREgH8Cv1PV/6lqnaqaqjpPVR9sIusvRCRfRMpF5FURybDLhtmz4AdEpMD+LB4WkQkist6W54km490vIots+epEZKuIXN6kfK6I/EFEFgFBYERbn5uIXCsim+0ZbZGI/MA+nyMiH9njV4vIgr2KVUTG2OPUisimpjNge7n8aRGZLiIB4NL27qGDQ5eiqs7hHG0eQB7wBfv1/UAMeBBwA98EigGxy+cCX2/SNgUoAB4APMAZQCUw1i5/Gfh9k/rPA1nAMGCufW4S8Kj9+t025LwfWAiMA2qAbPt8ITDRfv1dYCnWTMwHPAu8aZdNBArbuPbH7Gu/CeuHZBJwFnCufW3DsJTx95q0V+CEFmQ9yS4b3sb1fBXYCYwAUrF+ELxmlw2z2z8DJAJXAmFgKtAHGAiUA5c0uTdx4P8BXuBOoK7JPZoL7AFOtq8lo53PrQS4yH6dBZxpv/6TLZPXPi7C+nHjta/lZ0ACcBnWbP/EJt+DOqxVABeQeKS/985xfB3OzNDhYMhX1edV1QBeAfpjLfG1xPVAnqq+pKpxVV0DvAfc3kp9r6rW2K8vFJEwMAR43T4XaE84VV0LzAR+3ELxw8DPVbVQVSNYCu62Tix5LlHVqWrN4kKqukpVl9rXloelXC/pQD+97L8lbdT5EvBPVc1VVT/wU+Cu/WT9naqGVXUG1r15U1XLVbUIWIClxPZSDvxbVWOq+jawDbiuSfnLqrpJVePA1bT9ucWAsSKSrqo1qrq6yfn+wFB7nAWqqlg/GFKBP6tqVFU/Bz4C7m4y/gequsi+t+H2bqCDQ1fiKEOHg6F07wtVDdovU1upOxQ4x14aqxWRWqyHfL9W6sdEJMt+vRAYA2RizU7Amml2hF8B3xSR/ZX0UGBKE1m2AAatK/P9KWj6RkRG28uCpfbS6R+BnA70U2X/7d9GnQFAfpP3+Vj3oamsZU1eh1p43/RzKbIVU9P+BjR53/Ta2vvcbgWuBfJto5/z7PN/w5oBzhCRXBH5SZNrKVBVc7/xB7YyvoPDYcVRhg5dzf5pUAqAeaqa2eRIVdVvttJ+MpZCiQDbVHU38HPgW7bBybYOCaG6FWtZ8ectyHPNfvIk2jOpANBosWobcfRu5/qeBrYCo1Q1HWsZUDog4jZbllvbqFOMpZT2MgRrqbOs5ertMtDeq2zaX3GT902vrc3PTVVXqOqNWEuyU4F37PMNqvqoqo4Avgh8396bLAYG72eYMwQoamV8B4fDiqMMHbqaMqw9rr18BIwWkXtFxGsfE0RkTEuNVfUzIAr8HfiX/fBcCtQD3wL+0AlZfoO155XZ5NwzwB9EZCiAiPQWkRvtsu1AoohcJyJe4BdY+4ptkWbL5heRk7D2UNvFnqF9H/ilbQSTbhvMXCgiz9nV3gT+n4gMF5FUrB8Jb9vLmAdDH+A79mdwO9ase3ordVv93EQkQUS+JCIZqhqzr98EEJHrReQEW+nWYc26TWAZlmHOj+y+JmJZzb51kNfi4NClOMrQoat5HGsPrkZE/qOqDVjGHXdhzQ5Kgb/QhpJR1e8Cs4A3sFwlFmItj16rqqGOCmLPKl+j+dLq48A0rGW8BixFe45dvw54BPgf1owlgGV80xY/AO7BMgZ5Hni7E/JNxjJk+SrWvSkDfg98YFd50ZZ/PrAby0Dm2x3tvwWWAaOwDGH+ANymqlUtVezA53YvkGcvDT+MtYSK3f8swI9l1fuUqs5R1SiW8rvGHv8p4Cv2DN7B4Yiz1wLQwcHhGEZE7sey8r3wSMvi4NATcWaGDg4ODg7HPY4ydHBwcHA47nGWSR0cHBwcjnucmaGDg4ODw3GPowwdjltE5BkR+eWRlqMj2C4gW6X9QOU9BhF5T+y4tQ4OPR1HGTp0KWIFtg7ZAZxrRWSxHUC6x33XVPVhVf1dZ9vZ11guIilNzn1dRObuV0/sKCybW+jjZBGZYQezrhWRVSJybRvD/gQrXFrIbj9XRML2fa632/9ERNrzi9w7fofTXTWpe2cL/fxMrGDtfrttU9eSv2C5ijg49Hh63APK4ZjgBlVNw4qe8mesGKEvHFmRuhw3VtDvtrgYy9F9hIhM2K/sQ6z4qf3sOt/Bcl4/AFvB3ce++Kx7+ZZ9n/sDj2L5BE7fL8pMW3Q03dV9dt2v7CfXfVj+hl9Q1VRgPDB7b7mqLgfSRWR8B+VxcDhiOMrQodtQKy3RNCzH8vtE5BQ7ikmZNMlXJyK3iMg6+/VjYqVVetWe9Wxq+jCVfemXGsRKIXRzk7K9aYr+Zc+2ckXkfPt8gT2bu69J/ZdF5PdN3t8oImvtmdYuEbm6jcv7G/ADEclso859WA700+3Xe8fJAYYDz9tBq6N2gOqFrfRzDlCrqi0GAFDVgKrOxQp/dh5wnYj0E5GgiOwNCI6InClWAmGvfarNdFd2m6FYgccfAq4SkaYxZScAn6nqLluOUlV9br8u5tI8GLiDQ4/EUYYO3Y49QyjESvmzAitI9ZVNqtyLleB2L1/ECtOViRUt5okmZbuw0gJlYIVbe11Emga7PgdYj5UVYpLdzwTgBODLWLkWDwgqLiJn2zL80B73Yqz0Ta2xEutB/4OWCkUkGbgNK4rOG1jZJvbmOazCCmb9uojcJAcGE9+fU+lATFZV3WPLdZGqltryNc0deS/wlh1CbS+/BL4ndk7HFvgKsFJV38NSnl9qUrYU+IqI/FBExkvLCXm3AKe3J7uDw5HGUYYOh4tiYO8D9xUsxYT9EL4KS3HtZaGqTrdTRL1Gk4epqr6rqsV2mp+3gR3A2U3a7rbTDhlYodEGA79V1Yid5iiKpRj352vAi6o60+67qAOhwn4FfFtE9g/mDXALVrDxGcDHWPn8rrOvQbGS1+YB/wBKRGS+iIxqZZxMrHBvHaG1++zGSpf0WtPK7aS7AksZ7v1sJtFkqVRVX8cKD3cVMA8oF5H9+2mgeWxYB4ceiaMMHQ4XA7H2ncDa+7rBNkC5A1igqk3z+pU2eR3ECp7tARCRr9hLmXvTCp1C85RJ+6cwQlXbSmu0l8FYs84Oo6obsQJa/6SF4vuAd+xcgGGsXID3NWlbqKrfUtWRWHurAZrPjptSgxUQvCM0vc8fYOUcHA5cAdTZs/T9aTHdlYhcgLWcuzeY9iTgVBEZ1+Q63lDVL2ApvIeB34nIVU26SQNqOyi7g8MRw1GGDt2ObTwyECvgNna6pCVYs6d72W+20kY/Q7GCYX8L6KWqmcBGOpYyqT0KgJEH0e7XwIM0ycsnIoOwMrl/Waw8h6VYS6bX2vuFzVDVAuBJLMXeEuuB0e0JIiKDgbOwkvpiK+F3sGaHrd7nNtJd3Yd1b9fa17Csyfn9+4ip6ru2rE2vYwywrj3ZHRyONI4ydOg2xEpLdD3WzOJ1Vd3QpPhV4EdY+2Hvd7DLFKycdxV2/w/QugLpLC8AD4jI5WKlUhooVkqmNlHVnVjLsd9pcvperHRQJwLj7GM01r7p3SKSJSK/ESvVkctWkF/F2oNrieVApogMbKlQRJJF5BKsmeBymqdlehW4H2sftq0fHc3SXYlIItas/aEm1zAOa1n0HhHx2IZJ14lImn0d1wAns09pgmV880kb4zo49AgcZejQHXwoVnqkAqzZxj+xHrRNmYKddV5Vgx3pVFU3Y+2xLcFaDj0VWNQVAtvLhw8A/8LKwzeP5ol12+K3NE8TdR9W6qLSpgdWLsX7sPYth2GlOqrHmt1GsJRWS7JFgZex9/+a8IR9n8uAf2MtxV7dNJu8qi7Cyie4WlXzW7uAFtJd3YS1pPzqftfwIuABrrZl/xmwB2sp9K/AN/daxdorAv5WlmYdHHoUTmxShyOGiOwCvqGqs460LD0d20hnAXBGZ3I62m0/Byap6v+6RbjWx30PeEFVW0sg7ODQY3CUocMRQURuxYpQMrrpTMaha7FnZzOBwXbCXgcHhxbwHGkBHI4/xApbNha411GE3YeIvIK13PldRxE6OLSNMzN0cHBwcDjucQxoHBwcHByOexxl6ODQBBG5QER22FkYbjrS8nSU/eOsOjRHrEwjXzjScjj0XBxleBQhIheKlRKpTqzUP4vkwGwIPRqxUgG1FA5tb/lEEWkxIPVh4rfAE6qaqqpTj6AcB00PuIeO8nE46nAMaI4SRCQdK/TXN7GiiiRgBayOHEYZBGuf+YgavYiIR1Xj3dT9UGDTwTTsZrkQEbcdc9XBwaGLcWaGRw+jAVT1TVU1VDWkqjNUdb1YaY8ac92JyDB7BrY3nudcEfmTiCy30xN90DRLgYica884a0VknTRJ7mq3/YOILMKKEzrC7vthezmxVkSetBXl3jZfFZEtIlIjIp/ZYdQQkfl2lXX2MmSzZLF2rNJPgAF2uV9EBtjXN1lEXheReuB+ETlbRJbY45eIyBOyLysEbcloR36ZZ8+wK8VOSGv7PY7AChrgFxGfPf40eya+U0QebDJGS3LNFZHf2/fTLyIfikgvEXnDvvcrRGRYkz5OEpGZdv/bROSOJmUvi8jTIjJdRAJYwb3bpLV72ErdRLESMefY738uInH7hxci8jsR+XcTWZ4SkU/sPheJlSbq3/bnvFVEzrDrvgYMaXIff9TS+E3kaO/79zt7vAaxEiIfENKuhT7vFZF8EakSkZ/vV+aSfanAqsRKGdZa1g6H4wVVdY6j4ADSsVL/vAJcA2Q1KXsMK9zZ3vfDsMKWeez3c4EirNBlKViRSl63ywba/V6L9ePoCvt97yZt92CF2fJgZV9QrFlqJtZDrwIr8gnAjVjpicbY9X8BLG4imwIn7HdttcCF9uuJQOF+5Y8BMSw3AReQhBWD81x7jGFYqYK+t984rcn4JlZkHBeQuHdsuywPK1nt3vfzgafseuPsfi5rQ6659vWPxEoztRkrNNsXbFlfBV6y26dgRel5wC47A6gExtrlL2NFw7lgr6xtfD9eBn7f2j1so9184Fb79QysYOXXNCm7uUn/lfZ9TwQ+B3ZjZbFwY2W0n9PafWxj/I58/3Zh/Rjce3//3E6fYwE/VhouH1YEpPheebCSMi8FBtnlzwJvHun/cec4soczMzxKUNV64EKsh/zzQIU9Y2kvF95eXlPVjaoawMphd4dYaX2+DExXK2WSqaozsXLiXduk7cuqukmtDAx7c+H9WVVr1cqhNwdLUYCVueBPqrpFrSXDPwLj9s4OW7m2TG09se1elqjqVFvGkKquUtWltkx5WA+0S/Zr05qMMazl0AGqGm5tbLECX18A/Niutxb4H80zvjeTyz73kqruUtU6rFnaLlWdZd+Pd7GUHsD1QJ5aKafiqroG64fK7U36/0CtxL+mWoG3u5p5wCVirSKcBvzHfp+IlQdyfpO6U+z7HsYKpxdW1Vd1X7qsM+g8Hfn+vaSq2+37+w77PsfWuA34SFXnq2oE6/vedGn/YeDnamUOiWD9qLnNvgcOxymOMjyKsBXM/ao6CGuWNwArJmVHKGjyOh9rhpeDpRRut5eo9qZFuhDo30rbveyfZmlvWqShwONN+qrGynzQYpDpTtBMBhEZLSIfiZUVoh5L6e6/fNaajD+yZVouIptE5KutjDkAqNbmDuv5NL+Wlu7N/imjWkshNRQ4Z797/yWgaTb5lvrvSuZhzSTPBDZgRau5BGvWvVNVq5rU7eh1dYaOfP9a+xxbYwBN7pv9A7DpdQwFpjQZbwtgAB39YelwDOL8EjpKUdWtIvIy8A1gNZDcpLhfC00GN3k9BGt2VIn10HhNVR9soU3jcJ0QrQD4g6q+0Yk2HRlr//NPA2uAu1W1QUS+hzUjaH8AK+D0g2BZ6AKzRGS+WhkomlIMZItIWhOFOARrybk9eTtCATBPVa9oS9yD6LczbRZjZde42ZZls4gMwZqZzTuIsTsrQ0e+f52lBGuZHrCyegC99hvzq2oFMXdwAJyZ4VGDbWjxqFi58vYu4d2NtfexFrhYRIaISAbw0xa6+LKIjLUfDL8FJtvLW3sT7V4lIm7bqGLi3nEOgmeAn4rIybacGSLSdNmvDMtIpTXKgF72dbRFGlbWBL9YqZa+2VEBReT2JtdXg/XgPsBCVq08g4uBP9n35TTga1j3rCv4CBhtG3t47WOCiIxpt2XbdPQeolbGkFXA/7FP+S3GWko8FGXY3ue8l67+/gFMBq4XyxUpAev73vRZ9wzwB9ln2NVbRG48hPEcjgEcZXj00ACcAyyzLQuXYqX+edTeZ3kbK7HqKqyH7P68hmUEUYplAPEdaHzg34iViqcC61fzDznI74aqTsEKwP2WvXy5EcvgZy+PAa/YS1R3ANgWhxfZ7bdiGbjk2nVatIQEfgDcg3Vfnse6/o4yAes++oFpWLE7c1upezeWgU4x1j7Zr7WLsmzYs80rgbvs/kux7p3vEPvt6D3cyzysZfPlTd6n0Xy/sLP8CfiFPf4P2pC1S79/dp+bsJT7JKxZYg1WLsm9PI71uc8QKwXWUqz/LYfjGCc26XGAWIGxX9fDnMLHwcHB4WjBmRk6ODg4OBwytk/oPPvv+51s2541+f717xcRl/36JyJyqAZ6jjJ0cDhasC1f/S0cX2qn3SettPvZYZL7S62Mf1CRfrqrT4cu4XJVnaiqt3TzOPdj6y9V/bOqFrVdvX2cZVIHBwcHh0PG3o75gu1Piz1b+4eq3mX7NM9W1Yki8g6WG0sEuE1V60VkoapeuPfv3v7s+j8FrsaydXgYa397BrAOy+/3cqygDxXAG1gBStaq6ndE5H7gBqyADQDXaStKz5kZOjg4ODh0FbPtZdKn7NlallgBHC5in0HW/ap6CVYAhTtb66gJj9v1vwT8QFWXY1nQX66qrzWp9xDwtqpeDCSLyF6jqEJVvRbLJeq01gZx/AwdHBwcHLqKy7V5sPrPsKzJLwOet2eIfxORU7FmcFNa6kRkX6xj4F57K8Ckbf/VkcB0+/VKYG92nI323yKs8IwtctQpw5ycHB02bFiX9BWPx/F4evYtcGTsGhwZuwZHxq6hp8m4atWqSlXt3Q1dT8Zys+mvVlKBs4AUVb1YrKD3+xu+iIj4sAJB7OURrFB/I7HcqMAKGuLGijm7l11YsXM3AeOxllBPorkCbapkm9FzPo0OMmzYMFauXNklfVVWVpKT024A/COKI2PX4MjYNTgydg09TUYRye+irmaLiAKGql6uqntEZDhWgHWAbcAJIvIplk/p/oYvLwMLsWL67mU51hJrU7/Xj4GpItLUXex5YJKtZNer6lI7IEeHOOqUoYODg4NDz0NVJ7Zy/vwmr/1YsWf3r3Oh/fd59s3+9pYdEKpPVf/NvrjM7zUpuna/ei83ef1YW/I7BjQODg4ODsc9jjJ0cHBwcDjucZShg4ODg0OXICKX264V80Vkioj0ar9Vm/3dJCLZXSVfWzh7hg4ODg4Oh4yI9AZ+BVxvp1UbDSQcYrc3YblGVB9iP+3izAwdHBwcHLqCa7FyUzYAqOp2VS0RkdNFZJGILBWRL0NjHFPP3tf235dF5BkRWSgivxYrr+bVwBsi8sPuFt5Rhg4Oh4mS3DJCgfCRFsPBobvoj5Uya39+hxU95iLg2yLibaOPz2zL0mtVdQ/wKfAlVf1bl0u7H44ydHA4DMSicf74pf8w+Z8tpZp0cDgmKAFayp2Zpap5qhoDdgN9aN0Rfm+0mFD3iNg6jjJ0cDgMLJyyjJqyOj6ftJCa8rojLY6DQ3cwHfiyiKQBiMgJItIfqBWRYfaMcARQDtQB/UUkB+jXpI/9w63tjTTT7TjK0MGhm4lF40z+x0eIWwgHI3z07MwjLZKDQ5ejqhVYS6Ifich84O9AFMuoZhJWZJkn7Rnic8CHwGNY2SZa4zPgKRF5uBtFBxxrUgeHbsM0TQq3lZCWnULvITmk56QBEAlGmtWrqWxg/dKdnHzu4CMhZrcy7fMN1PtDfPmLZx9pURwOA6o6C5i13+kq4Pz96k1nX1Dtvefub/J6ov13MlZ8027nsChDERkMvIqVw0qB51T1cRF5m30BWTOBWlUddzhkcnDobtbN28x/Hvkff/z4pzw2+dFmZdFIHI/XjcslTH1pAZ9/sJpfPHdPh+NV7syvYNvuMq6beEp3iN4lBIIR3py+CsMwuPbik8nOTDnSIjk4tMrhWiaNA4+q6ljgXOD/RGSsqt6pquNsBfge8P5hksfBoVsxTZO3/zqNQH2Q9//zSbMyVeVPv53Kh1NXUVlax8JP1+Nyu1j4yfoO9a2qPPfWIl58dwmVNf7uEL9L+Hj+JmKxOKapvD9r3ZEWx+EoRkTyRGSDiKwVkZX2uWwRmSkiO+y/WfZ5EZH/iMhOEVkvImd2ZIzDogxVtURVV9uvG4AtNEndYeeuugN483DI4+DQ3ayfv4WCbUWkZaex5MOVlOSWNZZt3ljEjm2lfPj+Kqa8vAB/fQg1TTYsz6WssH3f4o3bi8ktqEAV3p+xthuv4tD4ZP5myxpC4POl24nG4u01cXBoi0vtydN4+/1PgNmqOgqYbb8HK3/iKPt4CHi6I50f9j1DERmGlZtqWZPTFwFlqrqjlTYPYV0UgwYNorKysktkqavr+VZ9joxdQ3fKWFNWi8vtJsPeEwQoLytn7CV2blGBPXmFeNMto7iPpy0hMyGKOwFIhi99/1KrnitOKBqgstJsc7zpn68iJcHEMJS1G3aSf95wUpJ9ByW7qYpLWk3xdgCduY8/f/BiolEDAI/HTX1dbWfFOyiO9+/jccSNwET79StYaaJ+bJ9/VVUVWCoimSLSX1Vb8oFs5LAqQxFJxVoO/Z6q1jcpups2ZoWq+hyW9RHjx4/XrswD1pNyirWGI2PX0B0yqir/+uqLJKX4+Pmk77A3QfeVd17KFXdMRBVcrn3KpqYmwNZ1ZZTMWI87I4WM3n148DvXAx3PcXfJeaewZG0JsbjJZReOpn//viR4O/+vXOb388gH03jyizfQLy2t/QY2Hb2PR/Ircbx+H49hFJhh50p81tYJfZsouFIsmxSwVh0LmrQttM+1qQwPm2uF7WPyHvCGqr7f5LwHuAV4+3DJ4uDQWUL+MH99+AXqq5vv0W1YuJWiHSXsXJvHjtW7MeIGW1bsAmDS1BU89ercxrq1tUGyslK47vzh9OmTTq8kD1++YzydZU9hDR63i5RkH2s3F+JxH5wb1utr1pJXU8ura9YeVPvOEonEOly3pLSOvz/+KYbR9izZoeeSI/00XbLbPURko4isbHI81EJ3F6rqmVhLoP8nIhc3LbRngfv7KHaKw6IM7T3BF4AtqvrP/Yq/AGxV1cLDIYuDw8Hw+eRlrJy1kemv7Eu2raq8/bdpRMMxIqEob/9tGounr+EvDz7PlnV5fDR7AwuW76KkvI4du8r47o/eZPeuMj56bibRcIxAXfCAiDSvv7eUH//wTUxTG8fYn227y0j0eUnwuAmFY1TXBTp9PWV+Px9v206f1FQ+3b6D0oaGTvfRGQqKa3j4p5OoqOrYOO9OWcHCxTtYtjIXsBTpY//4kMrqnmsw5NCcKFHO9VzZ7gGEVXV8k+O5/ftS1SL7bzkwBTgbKLOd+rH/ltvVi4CmfkqD7HNtcriWSS8A7gU2iMha+9zPbF+Tu3AMZxx6MCF/mA+fn0NGTiqz3lzMtfddTHp2KqZh0ndIDtGoicstZPZO5+1/f0IkHOPJX0/G6J8BwLsfraKuIkBtXZBpn67j9kdvwIhZe2lZ/TKZtWY7A3tlkOIymPTaYoz6CCuW72L8hBH85jdTuOuucxk7ttHejF9++9oW5ewMi/LyiZkGpt/ASIAFefncfmrbbhrRWJyN24qZeBDLe+98uJKKygben76Gb9y770d9NG6wp7qWE/rsy/RTUlrLshW7SE1LZNI7Szln/AhmLdzKynX5vP/xah669+KWhnDoaYiAHPp8S0RSAJedCSMFuBL4LTANuA/4s/33A7vJNOBbIvIWcA5Q195+IRw+a9KFqiqqetpeVwpbEaKq96vqM4dDDgeHg2HFzA3464KgEA5GWfDBSgDcHjcP//M+wqlpxDIyOf3yU8nPLSdoGuxeuRujPoQqLFq2i42bi8jOTmHpyt2cef14bnj4Sm54+EpOv/I0nvhgEf/9YBGffL6ReEMUdQnPPzuHZctz2bSlmNffWNziDPFQuOnksTxz5Q30ynPz9BXXc+spJ7fbZs6ibbw9bSU786yAIarKh7M3EAhF22xXUFzD8rX55GSnMXfJ9mazww/Xb+U773xEbXBfAPPPZm4kGjVwCZSV1bNy9W7e/XAVmelJzFm8rdXZYa0/RG3DYQ9p6dAKAohL2j06QF9goYisA5YDH6vqp1hK8AoR2YG1wvhnu/50IBfYCTwPPNKRQZwINA4O7XDW5afw6xF9Gt/3G7pvZrRoxkYC/jCCsGLpDuL9UlDgzLNHcte3r2HEKYOZPXcz0z5eiwIpyT5y8yro19eaNU5dsgnDVArKaghvLgVVQCgqqObpZ2dRb0TIz69k06YiTjllUJddk0uEqbPW4w9GmDprA6c82L/N+tFYnLenrcLngbemruAX37uWTdtLeP7tRURjcW69+oxW267dVIBpmPiDYUSEDVuLuOyCkwjH4ry6bDWBSJR3V2/gwQsnAHDDteM45+wRje2Ly+to8IdJ9HmJRg1mL9jCnTdOOGCcf74+B1OV791+zkHeFYcupwtmhqqaC5zewvkq4PIWzivwf50dx1GGDg7tkJKexKjThx5w3jBMpryykEgohqqydPUezCtHIiK4Rw3mtHMs14obrzuDG687UFmEIjHeX7iBmGEQj5ukj8jkGzecC0B9dYBJ7y4lapj4XVFmf765S5VhXnE1KzbtITsjmZWb95BbVMWIga0nJZ+7eDtVNQEG90ti1fp8du4u57Wpy/G63Uz5bB1XX3IyKUkt53G94YrTuOGK0w44//GGbdSGwiR6Pby7agO3n3kqmcmJ9OqVSq9eqY31hg/rzbDB+36A9Mk50PJ1+54KNu4qQYE9pTWOpWZPQATcR0/4a0cZOjh0kphh8N+PFvHVyydw61cvJhqOoQpvLFyLOzURgIr6AHHDxNPGw8DjdvHIDecTty0mk1xxJp5l7dutKizi6R1riQTiePolc98DFzZra5gm/miUjMTEg7qG3MJKMlITUYWEVDe7CyvbVIYZ6UlcNXEsCe44J5/kIa+omk07SkjwuqmpDzJjwWZuvnJcp2TwRyKM6dsbALfbRXUgSGbyvuuJmnESXB4SfV5GDuvdZl+vT19BLG7dxxnLtnHmKaM6JYtD9yAuRxk6OByzzNuQy7Rlm+mVlsx914ynvLgWl1u48pazWm3jD0V4+ePlfOOm8/F6LFcIr8fNFWeOxlRFgKqqqsb6H27YSihugE+o9ofYVlPFOWnJjeVvrFnHzJ07eeWO2zrlNL+Xy84ezWVnj+5w/XPOGM45Zwxv9IV8felqzr5kBKf2tbLvnDi8Tzs9HMh9553Jfee1HCkrbho8vOQN7j/hfC7u27ZiM00lZpgM7ptpvzcxTBP3UfQgPmY5iO/mkcJRhg4OrfDx64uYcOlY+gzMajwXMwxenr2StKQE3l+8kZvOOZn//vYDEpMT+Nk/7mp0uj+gr0Wb+WDBRk4c0ocrzj6xWdkvZ87ilH59uWLAvryovdxJZCclkZyQQDgWI6nJv2pDJMKkdesIxWIsyMvjkuHDu/jK2yYQjfKfjUsR4AeXXkayt63E5QfHvNLt5DZU8vz2BVzQZyTuNvaeXC7hT/93feP7yspKRxH2BETgKPocjh5JHRy6ib0+fQBzpq6irtpPwc4y3nx8Bu8993mzuku37qG0tgERF4FwlFfeXkT+rnJ2bilmx6aWXZn8oQjvzV1HWpKP1z5dSSxuNJZtr6xkfn4eL69aTSi+L3bnDWeM4Sc3TuTiM4bz21uvYHjv7MayKRs3E4rF8bjcPLt0BWYXW5o2pSzUcIAl6+sb1lAfCVMXCfPGhjUd6qcz1rBx0+B/OxaRJAmUh+pZWLazWfnWumIm7V7c4f4cjiBuV/tHD6HnSOLgcATYsHYPv/j+JOJxg8JdZbzwx2l8+MpC3ntuDp4ENyvmbKEkf18s3LFD+vKbu6/kx7dM5LG7ryB3bi4VVQ34G8K8++KCZg99Q02ipsHsFdtpCEQQESpr/SzekNdY538rVlIX81MZbmBhXn7j+WG9s9gdqeWDgm306pVCWtK+2KObystJSfDi87iJGHHK/d3jiF4YqOXeua+zsWafi5YCL61dhaql4F5Ys7JDyvhXH8/i0y07KKquwzDbjiqzq6GC6kiA/PwYtXUmc0u37RtflWd2zOb13YsoDtYc9LUdLDNL1jG9aFWX9vlu/lJKQrVd2mePQEDE1e7RU3CWSR2OW1SVSS8vJHdnGcsW7mDVp+twu13Menc5sUgMcbuIB6N89NpCvvz9a/n3X6fzyPeu5IKxwwAw4gavmwaamgApCaTvl6/viS3zqKht4P/GTiSnSdmJQ60QinXhMEuK9hA3DeKYLN6Tz93nWm4BNaEQ723ZhMclPL9mJf+6cp+j/V+uuapLrt9UZXbxNi4fcGKL+46vbF9ObTTEc1uX8J/zbkFECEQjkBFiQG9FEJK8cVRNkNZDwm0tq2Bhbj7rC0vRoMkjl5/H1ae3vl95YkY/Hh5yBX/Zs4D+Rio/OeXqxrL1tQXk+itwi4tJeUt4dMw1rS5NdzWheJSXd3+OqcolfU8mxXNwxktN2e0v54Vdc9kdqORHY69vv8FRhUDH/Ah7BD1HLTs4HGY2rNlDcWE1aWlJvP7cHJbN2oSqEvSHGTVuKPG0JM686jQuvHYcs2dsZOWyXUz/YHVj++r6IBVZHrInDMQYkcEFt57R+GAuDzXw0Yr1LPnXZsqCdVxw2ojGIyfDUozpPh8TThdOPjXKmFNDnDF6n4HM25s3UBMOETVMFhXsYWtlRZdf/9LyPH639jOWlO8+oGxrTTmfFW6lly+F9dXFjbPDeSW5GHj46tgz+M/F1/DOFd/A7Wo7NuqLS1bicbkoq26gvCHASwtWEm2yVLw/UcPg5TVrSEvwURMKMzcvr7HsrbwlRI04IsKc0k08svA9FpYeKH938FnJGiJGnJgZ5+Mumh2+tnshCS43iyu2URhsP33XUYdI+0cPwVGGDsctC+ZuwYgbxOMGDf4wl95+Nrd/83LuffQaMgZl40pLYlt+FTmDsvlg8goyslKY8ckG6mqDABSW1tEnO4205ER6Z6dRULpv2W5S7krMJRHwm/z7jRnNxlVVFpfk0xALYbriZCUl0SsphdrYvsgs4/sP5KpRozC9Jl8/8yyyEpM6fF25/hKe2fFRm/t0pirPbluEojy7ddEBS52/WjSLUFDwuT1k+ZLZUVdJfTTM7MKdJLo9PL95MTPLJpPkbtt4psIfYG1RKaapBANRYoZBlT/I55t2tdpmd00NoXiMuGkiInyWu4P/t/BDDNPkvhEX8avTbuYnJ9/AHUMuZF11MU9sWtju0uuhoqpMLVyGqSaKMq1oBXGzdYXeEQoCVSyp2IEiBOJR3s1f2kXS9hAEcLvbP3oIzjKpwzHD3od/R5fNHvr2FXzl65c0tklJ9SEi1NUGeffrL2IClTV+XnxmDn5/mKSkBCLhKAvmbOH6m8/ijLGDePo3dzXrc2NZGYZpsmDddrTEgFShcHkFZRV19O1tRZ1ZVlrADxd8wuOX3MC/z/pmY9umeTpP69uPXy6fSUiiBDRC39RUOsrrebNZV5PLxL6ncVL6kBbrLKvIY2d9BUluL7saKllSvpsL+lpRXzZUlFLqD5Asifz33NsYnGbJ/crWlfhjEaISoSEi7KxtYEv9ek7OGNeqLL1TU3jz/jvZVlLBUzOXoqq4XS6Ka1vP53diTg4z7r2/8f2Pl0xnVsFuPi/axRWDLTcLVeXFLWtIcnspD/mZX5rLpQNO6PA96iwiwk9PvpWwYWXeSHR727Rw7QiZCSn8vzHXsjfZwoCkrLYbHHX0rJlfezjK0OGY4X9zlrF52lr+8c+HcHXApNvrdePxJFJfGyQja9+eXjgU5bQzhzJz7XbCmAwc1otb79wX4quvHYB7fwzT5Pdz5xI3Te7xncG0NGtJ1ZUk5OVW0rd3BqrKU+uXEjUMnlm/jOe/cHOLynvGnh3sabBmSG9uX8eXx5xB76SUA+rtz/b6QrbWF+Bze3kzfy6PnXJvi/0nu718ccipje9TPPuixzyzfrk1A1J4eeMqfnneZQBM6DsYbzjE1JqPcEscryfCrLIP21SGAL1Skjn/hKGcf8KBUXzaY2ddFUtL95DpS+SZTUu5bOBI3C4XeQ3V7KqrxCVC3DSYlr+pW5UhwInpA9uv1AK1oWXEzGp6p1zT7HyaN5Er+5/aSqtjBEcZOjgcXmpDYd547lMS5hcx98o1XHbtPgd4VaW4tLbFEF2LZ23mrWc/52+vfYNEO5xY3/6ZnPnFsUyqyUXVzY7EEF8+sV+7MszPy2u07EyfkM0rtz58QJ1lpQVsra4kxZvAhqpSVpUXM77vgQ/ZPfU1GJh43W5MlPKgv0PK8KPipcTMOInuBLbWF5AfKGNY6oGyn95rEKf3OjC8WzgeozoUIsNnGYfk19c2lo3N6svWksVkp/u5ou9F9Evsg8916EYkbTEldyNR08DjclESbGBlRSHn9B3CsLRs3vnCVzDtWVVTZd6TMDVGXs3fMNRPVtJFeFwdn+F3lNJQNVvq87i0b8sBDI4ojjJ0cDg0guEodQ0h+vdueRa2P6/MX4Z7eSmmR3juj+8x8eozQASXCJu3l/D8Gwv57jfSOWHYvkgp8ZjB5BfmU13ewNyP1nLGNWOZsnkz35gwgX99vKDR/3D6qq08esNFpCe1/uBXVZ5dsYJAzFpGe3bFCi4fOfIAK01DlYsG7p0hSat7XYbLJC3JS7LHS8SIo23kLY3G4zz28Wy+d9kF3D30Uq7sZyUMFoGByZ2L0Zno8fLm9Xe2WFYXq2Nzw1ZSfankB8u4ZeCdh2TJGYpHSWpHiX197Nl8cdjYxvfD0y1/SxGhd1LXK5b2UKOSuP85POk/RtqwoN1LdXAOMdMyjCn3T2FA+r1dLtOk/BmsqtnOqZkjyPFldnn/B43Qo/wI28NRhg49ktc+WM7qzQU8+cs78Hjaf+hMe30erpCBuoXq3ZXM+HQVr4Z28/R1N/DG+8uIRGJ2toXrGtssm7uVmsoGUtISmfraYlZlBZi2axvnDRlM/37pxLyWAkr0egjF4qS3Y8Ny45gxBKJWOqPWorJcMGAoFwxof7nw8kEnMDw9m50N+ZRHqhiQkt5q3U8372DO9lz6pqfy3UsvoH9S6zFGO0pxsB6fy02vxH2z0QUVi4iaEURc5Af2kBvYzchUa5+xtP51IE6/9Ps71H/YiPLtVc/zjROuYkKv1pc3s3xJZPk6bjzU1fijm6gPr2RA+n0AGMHXMUPvY/rOxZ14Wbvti+pfwtQogouShjfpl3YHLvG1266j7AmUsa52J4IwtXABXx95Q5f1feh0TT7Dw4WjDB16HJU1fmYv3UbcMFm4OpeJZ7cfdDnjsmT82gv3kgjuG9KYHyphW1UFT81YzLZdZQzul8SajQXsyqtoDPq8c1MRScnWzETdwuxVW0nMSeCZFSt49ist7+W1hohw92kHZmY4WMZm9WVURjZz1n9AQIPECQH7XC9UlW0N6xiWNJaXl6wiMymJ6Ru3c/f40xHewy3JZKfe3uHxJu9az6jMHE7vNQBV5YfL3qdvUhr/PPfWxjrjMk8jK5JJnWmSW9JAH591H+NGHWUNLwNKr5Qb8brbNwSZWbKOklANr+yew1nZI3D1wIemqlJQ+28C0S1kJ38BnyRghqaBKx3D/ywu3yXtzg6HZn6HuGktnbskAaFrQ9dNKZpPwAiTIF7mlK/hlkGXkO1r/YfTYUVw/AwdHA6F92asJRyNI8AbH60gvp9P2prC4gOWF/9085fpVZyOJCQypKYPS/wV9E5JYcbmHQwamEV2ZgrDB/eiujbQ2Oa+713J09O+x9PTvseJPxxPIFMwyqOsyS9mc0Xbfn2RWLzN8q5gSdVagoaVA/Cj4rnNygpCu5hc+DzvbJxHddBKaBuJx/lw/VqqGl6ivP4ZDLNjkWkqwwH+u3ERf187D1OVJeW72VJbwILS7eyo23cfBiUP4sS00Sxc6WfagiJCQdun0v82SgxVg3L/W+2OFzaivLNnEZneZEpDNaysbt3N4kjSEFlNKLYTEQ8l9S9jhKaiGgRcqLEHjbbvCpGZdB45KVeQk3IF2cmXdHnElYtyTuPrI27gK8Ov5r7hV+Nz97C906PIz9CZGTr0OMLROCMGWUt9SYkJ+IMRMtOtWdHWsgq+983nuOWKcXz3Ozexo8hyRyhdV059eQCP103+qhJip/bH2FWHDkvjqi+fwdlZ2W3muLtm9Gj6uVN48dXFjDt9EIPSW/91XR0I8uBrU/jLLVdzQp9DX5JsjVllizHVxC0uVtVs5I74NaR6klFVPi//AFMNqhIX8t87Hm58yHpd72IacZA4NYH3yEm7r91xJu2w4osW+GtZUprHvzbNIK5KzIzx382f85/z9u0hFlbVsSavGBHhzcXr+M7V51EZmGoVilLh/wDTcymDU05qdbxd/jIipuVHCLCyaidn9xqFmn7isc14fWcf5B3rWkoaXsHUCEIC1cEZDEj5D97MMY3l4j3lsMixoGwHJ2X0o3figXkcz8w+sYUWPYWjK1C3owwdehzfvXdiq2V/nTQT78pipm8s42sPXMVf3/4cEWFIYRyXx2W5VKhyflUGaz7bwqnnjuLqH42iukl6pKaoKiLCSTm9+eCjdSQnJJC7o4JoKA6t2Mu8vWI9JbUNvLRoFX+4+cpWZV1VVkRDNMLEwSNardMW3xn1FUJGGAC3uElxW3tnBaFd7AnswOvy0WCW4kkrZUz6GajG2F461VqeUqXK/xa9Ur/S5nJvfTTMlN0bMVWJmQb/27qckJaRnRhDUcrDBc3qz9ywnUAkSoLbzcdrt/Lli87gxD7PY6ol507/Tt4ueJz7h/+SHN+Alobk5IzBvHPhDw84Hw68QTj4Khk5U3C5O58SqqsZmP4gMfNmAAQP3oST2tzvi8c24vaM7dLZX1XEz+/XT+ey/ify4yZh6Y4KjrJlUkcZOhwVVNT4qY6G2DllI764iek3+dVjb1CeYj147r77Iu791hcAy9/vke8/jZqwbfVudq3fQ9bAfYYgexUgwJNvzGfYwF6ccfIgPl+xg0A4SrLPy3uz1/LwbRceIEd1IMi0dVvonZbMirxCdpZXkZWWRHZiUjOlY5gmf1w+j0Asyrn9B5Po6fxeUY6v5b03QTglY0Lje7fs/Tf2MCznuUbF5JKkdvc9E91efnnWFxpnaaleN6VxP1EzAkCGt/nM99xRQxk9fKjdv5Dk9eDzWK4hphosqXqRmBllYcU0bhp0oGtJa5hmLeHQm6jGCAVeJSX9Bx1u212k+jruA2jE8whWP0xSxm/xdsCwpqO8s3slqsrc0u18ecQ5DEw+yhzze9AyaHs4ytChx1NcWcdDf3mHS88YgW9nFRJXQNn03mr0ztMBeHPBWp757m24XMIHy9ZhrKtAVIkGYrz0+Md8/693NPb3+PZpnJw+hLGu4cxdtoNE327OPHkQqVlJ1JaFufCskYwf2zxyS3kwQHZiEmsLSogZZmNszZnbdvJ++Sb+eOGVTOi7z29vTkEuFaEAKEzbtZU7Tuw65+rBySMZnDzygPMigs/buVlogtvNZQP3t+Y8sO+9jBnYp9Xl5u0Na6iJluFzJbGtYRWVkeJWZ4f7Ewm8jZp1IElEQ++RlPKVHjE77CgR/wuoBgn7n8TTAcOajlAdCTClYC2GmoRiUd7IXcaPjrLZoTrK0MGhY6zctIcxI/qRktT6xv9TUxZRt2gXywrruPCOC9iwZBeC0P+MgZx0wckApCQmUBHyY6DsrqgldFY/XAKmCQMv3Pdw3xMoZ1HFZtbW5HLK5ioEiETjTP18A6V+P/EkWJVfzE/v+0Jjm4gR58FPp3D3mNO486RTmTh6n8L595pFVIWDPLNuGeOvGNg4E3tuwwpCsRgiwkubVnPzqLF42wlofbQTNcIMsfcKBSFkBNppsQ9xZ5GQeLXd1oNqrFtk7A6MeB6x8AxE0jDjBcQj87pkdugS4Y6h4zHVmrUPSc1up0UPQwC3owwdHNqlrKqePz73GbdfeQZ3Xze+WVk0EuO1373HZQ9dxrxFW/DtqqQ6v5rc04WUXumoqUiDwZcvPI3pL83ljv93LffMeZVAPMqvRl+D3mv9EwrCFRP2GXO8mT/fSswbjDJ39XYS8ILCR/M2EnHHcYmL0toG1ucWc9bowQBM37WN3LpKnl23nBtPGEOS7UNYEQowdddmeiUlsbWmslk0ma+fOh6/7XPoc3tw0fZDYVrRy5zqupAcOuckf7gx1GCXP4+RqcMOKDst60JOyzpwabkjJCbfAcl3tF+xB6JmDe6E08BWWqbZetzVzpCZkMzXRx/c/ewZ9Cxr0fZwlKHDEWPyZ2sxTOWDORu4fuIpSNzEXxuk39AcFry/nM9enkthJIJrcxkgEDdJynTz0Hcsx+Kk5AQ+fnEOHzw7G8+J6awPFQMQTIvw/266uNlYlZWV1EUDrKnJJVInuBJMht7hoqoimWHpWXhNL7P35JKAi5gohr3FGDHiPLFmCWGNUhaq54Odm7lrjLU0u73GCqtmqkmqN4GNlWWNyvDKoe37Ru5lU90KFlV+Qp2rjuH9v9usrCFWR8AI0C+xY8uNXYE/NBuvuz++hLEHlG2p387Uwk/5xdjvk+PrPkvaowlPwhmkZj9/pMXomRxFyvDosXt1OGrJy6vktVcXNjtXXtXAzCVb8bhcNATCfDRvI5P+8TF//84rhIMR3n98OimZyez6ZB0Z5QGSE70k+7zEcis489yRnHnuSAYMzGT2m0tISvbx0t+mYpiKofCXdTNbTF+UkZDCv8c9hHdLP06qPI0bh11KsfpZUlfI3eNP5yfjLqRPiY8XvngTZ/W3lFqp30+9EcTrdoHLZFnZvmz0FwwYyic3389nt3yVz255gPtPPrjYkB8VvwpAQXAX1ZHyZmUfFL3GK7v/haHd79cIYJp+qup+S2Xd7w+4h3EzzpKqFUTNKJ+Wfn5Y5DkeUDUbjZ6OKcQKZtHe0VNwZoYO3c4bry9i3do9nH/+KEaeYGV5N0zl0nNGE48auDwuvFGTZTM2YBrKm3/9kOqyWpJSE4n5w3z521cz4apxACSl7fN3mDVpMYH6IEkpicTyGsjMTyE+OomaWB3BeJQU74Fm8Gu2lREIxVm/q5Qd3hpbgRq8vX0dZUvqqKkLsnlrKSddbBtveOIMGBDG6/IQMWNk92q+l1UZ9rOroZxzeh+c+8SW+lVUR62Zr4nJtOKXuH/4jwEoC5ewpX4FpkZYV7OMM7MvOKgxOkN98D1MjRKN5xGKLiXZd15j2bLq1fjjQXwJPpZWreLqfpc5s8MuoLrhKSKxrQzo9cSRFqXrOYpmho4ydOhWduwoZeuWYnw+D2+/vYyf/fyLAPTvnc43b7+An9/zFPf+4FqWfrKWurogcdNkzaIdfPeJr+Gyg/wOGzuIPkMO3Esbf8Up9B1iPYxLQtUMGj+A1JwUElxeklsIAB2LG7wydxWGaRA2TKIFJiPGWEYJJUX15JdXEXCHmPT5Gq45ZwxJPi8Dk7L447i7G8NkZyU0zxzx4s6FzCvbxhsXPUhmQjKdJdmdxqg0a9nV60pmSPLwxrIZpW8RNSO4UD4rfZPTs85p4kbRtaiaFFZ/j1h0OWgEMKhpeKqZMjTVYHDyQCLJMVwiBI1Qt8jS1ZgaJ27WkeDueYrbMKqpC7yNaoxwdCOJCYfHkf+w4ShDh+OdJ345mbMvHcuitflEowa+RA8b1hdQUFDF4MHWQ2nBR2spyi3n7SdmMmBoNuEkD6ZhktY/g1MuPInUjJaVy4ZVu0lM8jFq7ECGnrQv/dGM+Zsp3FHB7de0HMEkEo8zblh/QnZmiUHZGTx4hZWn8IkpC9kYKyCuJnWRIDuLKjl1RH88LjcnZw5usb+iYA1zS7dhYjI5byVfH33xAXVKwyVkejNJdLccbHpoymi+PuLngLWv2dRtwWNsYXBCA+DCJ0VE4vUke7vHojAQWYI/vACvuz85aV9GRHC7MpvVuaj3eYyRUW1G8umJFDVMpsQ/jfH9X8YlPStcWa3/DVTjCC6q659iQM5TR1qkLkMRx7XC4fhm95Ziln++mZ0bC/nuP+5h4kQrhJW4hD59rDBn0UiMKc/PIT0rhZK8SgZcMAKzfzYajOG+YFirijAWi/Ps3z8lMSmBv/7vgcYkvsFQlBcnL6QmXM0F5w5lQFbfA9qmJvr4yS2XttjvhIv68HmvKD6XFwgzckj7SmdS7jIC8QgJLg/v7VnN7cPGk9Fkdhg1o7yw+ylOzziT6wfc3G5/0Hxf8Oo+5xEzCgEQ8ZHYTXEnVU0q6p/CJWkYZh0J3jEk+87oUFvDDOF2tZ5VQtXs8nicnSFuBiion4Rh+qkIzKFv6lVt1jeMSoLBt0lNfeSQ0lN1lGBkEWCgQCS2FcNswO06MOza/qhGkC7MftEtHGWuFY4BjcNBU1fRwL+/8/IBhhaTn5+Dx+umviZA8fZS3AMTGTA6h3HjhuLzWW4JuZuKCDaECYeixOMGc6auwqgNY0YN1i7ZRZ2/5SW4hbM2428IU1Vez6rFOxvPz5i3mbpQPYZh8syHH3f6WtbV5uMRN4aaiAi7/eXtthmT2Z/bh57FjYNP58bB4w7IOLiqehnBuJ/l1Yupj7Vtbq8aIVz3O+KRZY3nstIeoE/mL+mT+Ut6Z/wIVzckhgUIxzYTie8CEZQ4NYF3OtQubtaxtex2ApH1LZbH4nmUVNyOaTZ0pbidosT/IYYZQCSBvLoXMNvxXwwEXiXgf45odPlhkW9wn7cYOWA5IwcsZ3j/zzukCM3oOiJVd6BH8L52FBVp9+gpHJaZoYgMBl4F+gIKPKeqj9tl3wb+DzCAj1X1R4dDJodD55mfvcXcyUsZfc5grv3S5QBUltayZXUeIoIRN/j03WWsrE7g9L79+PPl+36Vn3TmMJ6d87PG93/8xWQWzdtKYqKXQTHLcnQvYSNGXTREjjeF915dRI2/HhTeeWkhEy4cDcDkGSuJmTFcuFi2uIzq26vJTmp5dmeaJoH6MGmZ+2ZxD51wBQ+dcEWnrv/6QdZeXzAeZEXNcjK81gzJ1DhxNZld/ikgRM0I8ys+b3N2GAtNxzSKififwJ1w9mGZlewl0TuG4b3faEwg7HF1bCm2ouFtokYxJfXPcELvA5f36hqeJRrfQn3gbTJSvwrQJbPEzsw2A7GdeN0ZgCDiJRIvJ8k7sMW6hlFJKPg+Ikn4G54k4TB/Dh1BVYn5n0HjhcRD7+FNuf9Ii9Q2Pev2tcnhWiaNA4+q6moRSQNWichMLOV4I3C6qkZE5OiJv3ScU1tRz8IPViIifPbaQq6++1K2byul/4BM/vzGIxiG5YA8q3g3C2YvZuX7uewcdxYn9Nq33+RN8DB/3lbOOHMoJcEgsWQPcRecetZQakMhQpE4Q1KzeC13MQvLdvDcOfdx0W0nMS2/BBAuGLkvYv+NX+nN8jIrFZDbC6XRglaV4YKP1zHlpQX89c1HSPAd+r/AoqoFfFo6ncFJQ8j0xlhV8Q/O7P0b+ib2J25aM5G2MtWrRogGngdXL8x4HkZ0GR7fuZ2SQdVAjQJcnmGdaheK5ZLoGYLP23oItpaIm3VUBt7G4+5DMLaJQHQDKQn7Qs7F4nkEw3Nxu/rQEHgV06xANUavzF91apz9KQ7tYE7ZG9w15Oe4Xe3Hez2p1y/YWPEDEtwjGZ39zTbrBgNvodqASAqx2AZisTUkJBycu0x3obF1mPHN4O6NEXgDT9KtSAdmk0eMHvZjoi0OizJU1RKgxH7dICJbgIHAg8CfVTVil7W/NuXQI3juF28Tj8YRl9BQ4+fjl+Yxdf4OzrtgFF9/yNqXC8djTFqwEc/sYlx7Avxn8iz+8427GvsoKKjiqSdnceU1p7Glvp5Y70RQKElR/rd5KQv+t47//v5ePixYS9Q0WFy5k/LTCkkdCmBS06u0sa/bx93I7dzYTEZV44AYkbFonPf+N4/qigYWTF/H5TefhapSH55LemLn880F4wHmVczFIx4+Lf2Y01JKqY/mUx1eyoMjvtWhPozYZtSsB00FjREPz+q0MoyHZxDz/52k7LcRd8cMXGJGFVvLv8GgjO/QO7VzGdIDkXWoxlANA0J9eFEzZRgIfQrEUBVUw9QH3sYlSaSnPoDX07JBUnuoKgsrJlMa2sXW+mWcnNl+dJaGyGbyGpbhN5cxJP0OEj2tW5QmJl2Oxzus8b3bPfSg5OxOjMgiROOgfhAXZmw9bl/3u9wcFLaf4dHCYd8zFJFhwBnAMmA0cJGILBOReSIyoc3GDoedaCzOP56cQV198z28k88dxbnXjuOcq09n7LmjKCirJxyKsWDuViorrL2MmGlyZiyd1Mo4yck+gjPym+0vvjd5BSLCp5+sw4gZqIC6YNGaXcycvoHYrhCP/W8KYcMy5f/fzvkUBsvwujx4XV5y/YVtyL2bvPKbMYyaZucXf7aB+pogyck+prwwj2gkTiCygj3VP6UhsqjT92dx1SL8cT+qsN2/lVz/dhI92Wyve5eYGexQH56EM0jtM5vkrMdJ7TsPX/rP2m/UBNUY8cCzqFlPLPhGh9uVNbxJ3PRT3PA/TI12asyMpIs5beB8Th+4iNMHLqR/evMMFRmpX2Vgn08Z2GcaKUk3IVjGHvX+Fzo1TlOKQzsoC+8myZ3OkqopGGb78Utza5+i3jQx1GBd5T/arOv1jiEp6frGw90DXTE8qY/g67MAX5/5+HrP7bmK0Eal/aOjiIhbRNaIyEf2++G27tgpIm+LWKbCIuKz3++0y4d1pP/Dak0qIqnAe8D3VLVeRDxANnAuMAF4R0RG6H4WGSLyEPAQwKBBg6isrOwSeerquiaGYHdypGVcsTqPbdvzmT4jkSsm7gvPdc4Np3DODZZPVEVFFS88t4icPgnE4wYff7iU675oWSMOzgvjH55Ngs+LETPYvHY7fQf3orKygZ0799CnTwLRmEFSmpeGZBNB8IkHd0EMGZGC7g5y8lm9cCe5SFAvX+t9DXnB7fhcPk5KH9Pqd6Gq4S2CERMzMpmM5Jsb7+PuXQX0G2FZtHq8bnZsySWe+jaR+EByg+8wKOskOrPR0TfanxtSrRlpZWg9hOLEIy7AJLdkFREzncyELJLd7fsg1tUFgfaX/iLxQir8bzEw83sIHozIMuLBBJAToWEpvtD14Mposw/DrGdP9RLgBKIaJTf+KZlJ57c7dnlNPnWRHWT4Oh5urqImFzWtjB7+hp1ovJSDefRsqFqGN5iB2zZ02la8nj6JB87e9n7WcbOegvJqxBiIAPnBfIa5C3FLK4kqDyNH+v/68NDlsUm/C2wB9mbe/gvwL1V9S0SeAb4GPG3/rVHVE0TkLrvenS112JTDpgxFxIulCN9Q1fft04XA+7byWy4iJpADVDRtq6rPAc8BjB8/XrvSz+lo8Jk6UjJGY3Hem76JcET4eNZ2rr1yAhnpB5rR+/1hhg0bRMxOa5Sekdko881fv4q5701lbO++3HPKaQwbMxCP142qjwsvPBUF/LEwbwZW4hrmIWLEydmaTLgwjCS4IKp8MTCG26+xlg39cT8vbZ2NW9xMGHo2PveB5uXR2G5qoh+SnpiE8hpZWbcBGeTk5HD/977YXPbwcnIr55HiSsY080lI3Up64kUdvkdWYG1redDUSzGsFX8AYib8ZevvOCXjNG4ffHfH+uvAZ51b9Sc0cS7upIvJTrmWaKAcI9GFtTXvISE9hLudPcBAtIJM9aHEAR/JSTXkpLc9tqqypfoZIuZ6Ls1+FberY0qlV69nUbW+G4gL10Eqoy9k30nE3Lecm+zOaNXAxbqPOQz1XEVNxLI6douP9KwkktpYKj2cHA3PnkNC6DIDGhEZBFwH/AH4vlgf/GXAPXaVV4DHsJThjfZrgMnAEyIi+0+y9udwWZMK8AKwRVX/2aRoKnApMEdERgMJQNdM+xwOmeWrdlNbGyQpMYFgOMrnC7Zy83UH+p+lpibyre+2nPF9m9lAcZZSbpbzyAk5uDxCeaiWPr0zuetuK7pJeaiecJ5i2kYmI8fkMObGfX6C/fpnNr5eWLEAwzQwxGBZ9VIu7n3JAWM2hD9HiWNqCDAIRBYD57RylW4yk/fliJMOzMxawyVuXLJvBrigYgZRM8ra2tVc1ucKevkO/eEXjG7FH1mJx5VFacOzZCZfQULKA5DyQKf6SUkYw9i+L3aqTW1kM/5YPi5fiAL/ZwxLv7H9Rlg+kl0xQfC4EvC4OudreWr2fYc+sMNB04V7hv8GfgTstRbqBdSqNgbtLcSyQ8H+WwCgqnERqbPrt6lbDtfM8ALgXmCDiKy1z/0MeBF4UUQ2AlHgvva0t0P3EInECAajZGXtCzd2+imD+fWP9v0SH9i/c1m2TVWeXrUcjZoEaiK8sXEd7oQIzy1byqQ7rueULGuZtU9SOt8e037+t7gZZ3HVQssyU5X5FXNbVIbZqfeTmbIvHZBLUqgKVrfYZ2riWaQmntWp6+oIISPE3IrPUZSIEebz8pkdnh22hGoc1RBVgWlWrj9xEzOq8EdWkJ64b3nTMMqAOG53y+4Dh8L22leJawi3RthZ9waDU6/G7erhjt8ORwylw8l9c0RkZZP3z9mrgQCIyPVAuaquEpGJXSpkEw6XNelCWp8wf/lwyODQNm+8uYQtW0v4yx/voLShgYKaOs4ZNphTxw4iFIvhdrlIcHcuOW1tOIRbBHeZYpbH2V5cxqKynYQCHv60YhqvX3Fyp/y43OLmwRHfIGYbTrS0RAog4sYtR9bc3FCD0zLGYdjLg719h+Y1VNfwDJHocvpnPUmftH1KNcHdPLWTv+4x0CDp2S93uY9cn+Rz8aTUkpwewy0JKGZjWdyMMavsDSb2uZ1Ed0obvTgcN3R8mbRSVce3UX4B8EURuRZIxNozfBzIFBGPPTscBBTZ9YuAwUChbZeSAVS1J4QTjs2Bqio/c+ZtxTBM1qzLZ/KebawtKuHdr92NV1187ZFnOPmWk/n59Z3L3p2dlMzfLr6KB1e8S3JKCjWFDQTjiicAa7a62ThhI6dmndpi21c3rmHikOEMSc9sPCciDE4eciiXethI9aRy2+C7mp2LmX48ktxp9w3DrMEffMtyu4hvIrEVt4tYdAPxmBUNJh5dgdfXcozWg2V4+s2kRSvJyT5wuXdz/VLW1Mwh3duL83M656bhcOzSGWvRVvtQ/SnwUwB7ZvgDVf2SiLwL3Aa8BdwHfGA3mWa/X2KXf96RFUcnHJsDH3y4mljMmsG88OoCls/cROyTfKau28K7k5dQtbGCuR+spbTB32ofqtpiDsG356ylrM5PQyjC9u2ViF9IKnAh5W6m7lzaYl+5tdU8uXoZT61e1mL50YiqycrS77On4f32K+9Hg/8N0Cgg1NU/0eJ9Bgj6nwINg0YI+luv19XEzRiLKj4gyZ3CyuoZhI2OuZTsT55/XWOQAodjA3VJu8ch8GMsY5qdWHuCe/12XgB62ee/D/ykI505ytCB8ooGsrNTyMxMpqi2Hvfnxbg31vD6tEW8++pCNNGFbKvnhXmtx2t8c/pKnn574QHn07OSSMjwYKrB+ecPJlsVr0dIbRDuG3UzYSNEw35xO/+3biUJLhdLigrYVdPyXl93Y5oNXapMKkKLCcTy2V03ibgZRFXJrX2dmB1f0jCKiMe2tNg2GluPSCIiCRhmNabZ8j1xuTLwJJyJJ+EMxN2H/QN/dxc7/GtoiNcAQtgIsrHuwO9Be1SEC3g9/zfMLz/QTzKvYQZrKp/sAkkdDjvSgaMTqOpcVb3efp2rqmer6gmqenuT4C1h+/0JdnluR/p2lkmPY7ZXVTI8M4uf/PC6xnO//et7LJ67A0xImVdBwADxCsRMVs/dhl536QF7UYFghHdmrqEs0MDlF4/mxAF9KayoZdmWPczdk4cETOK1capqQ7jLvHi8ghlXPlu6jd6n7KI8UsKDI36EiFBQX8uc/N14XS6CsSivblzDby66/LDeF9UoZZX3kJ76TVKSr+2C/kx21L6ASxIwNESRfzqJnv4sr3ySQLycU3O+T6D+n4QCZZjmGQcE5O7T67lWem5OWuafOyVXxKihIZpLTtKBBkRRI0RCK2mn9mdw8mhuGfTtxvc5vs4b73xW+gyGmiyvns7Ffe7GYxvmxM0wm2teJW4GGJVxE6mtxBV16IEIcGgzv8OKMzM8TqkKBrj9xTf598LmUVe2TN2IxuJoLEqoqI4f/vJGRlw5msf+8SX+88O7uO29N1lWVABYS6OmUcHCNblUhgLE48qPX54GwAufLOe5j5YSrotAg4Hb7aIhL8ItF4zjpnNO45YLTqdPRgKb6ldRHikiN7AVgKzEZH594aX85LyL+c1Fl3Pz6LEcbgLBj4nHC6lreNKy3DxEQkYZcdMPYgWLrg6vYW3lvzBU2V73AaHIGmLRJahGiYTeO6SxTI1SFVrQobo7a19lbcVviBq1zc5HjCCv5/2M/EDL2Sj2J9WTyQlp4xqPzITenZK5IlzAbv9m3AhRM8r88jcby/L9s4ibIUDYWvtWi+1NM8LWip8SNRyvrJ5GV0ag6W4cZXic8sfZ8/BtCPLmnNWNyW4Bhp49EsGEYIi0Uf0pdsfZHKhlVXUFn5fnsyG3jG9OnoaqEo8uxV91N0s27iIaMxBDKN5ez/IdeazaXojX48ZboaT7fPRLT4WAwe2Xns6j91zKo/dcijloM8GYganKrLIPWLAzD9M0uXrEaK4deSLXjjyRcX37HyC7qnnAua5CNUq9/xnElYZpVlBTfhNmvPWwbx0h2dOfSwa9y2WDp3HZ4GkMTL2BqmgRLoSIGWFzxWOoBkDjhAIvURbczJ4OKqL9qQjMZGvlL/FHt7ZYvrRyMqWhnYTiZZQEZmFqnLz65vuYG2vnUB+rYHHF5MOy71gW3kWCKxGPmCSIUBkpbizb3fCxbbUqlASXEDXqD2hfEfyU6tDnFDe8eUCZw5FFXe0fPQVnmfQ4o7K8Hk128fnCrWQWG4RjYf67aAk/mmhlac9MSUDCEdQNqRi8v2Qj2anJfLJ6G5WFEVx+CAYivL1xA9cPeBI1q0g/s456NXE3uFGBP02dQygSw+f1UBEI8I1bzyMrNRkEstP3OaWvrahl1o6B3Hl6BH/Qxa/mzOLOM0/j4Qtbt4I0zToqq+4nK+ufeD3Du/z+xOK5qEbB9uuLxrcQCbxEUsYvu2yMQLwGrysd1MpmEZXeJCVfQDiaREJinJnlrxAy/Hx5+L865WRuapQ99S+iGOTXvcjJvf/arLw6UsTK6mnsCW7k5OQs4mYAl/jIb3ifYem3kuDOIGIEWVnzMUnuDKqjxewJbmBoymlddu0tcUrmRE5OTkbrfgJ4kOyHGssu7PsHYmoZ5LjwkOBOb9bWNCMU1L+Ix5VFuf9DBqTdg0MPQbo8HFu34ijD4wBVRURoqA/xi++8ztjrRpOyK4o0BPHhY9m6PLCVoVlWieEyUI9QvauQ4FlZJHoSaQhHaCgM421QcMHfP/+cq+/chcuVzfisrXwcPQF8ghqQlujj4TssR3ARuOCU4aQkNn+om6psLemDYVQzQC6koKwONdcxZd0m7jjzVLKTW96v8gffIhbbht//HFmZf+rye5XgPYmB/eagZohA5Y0oBvHwTMyUB3B5BnXJGMPSrmZI6r59UJd4EHERilZS5S6kLvYfALbVL+DkzI7vl1YEZhIxynBLIjXhJfijW0lNOKmxfHn1FNzioSpSQCi5P4PSLBcIF24Uy5q4IpKPqQamHdgj39/9ylBVUf+z9ppZFA2+haRZGT8SPdkk0np+xerwQuJGDS5JxtAwFYGP8XHo+7wOh47ldH+kpeg4jjI8xtm4rZh3P1nNr79zHZ9NW01dbZDN03eQ0yCEiutw5aRxmatfY/3qYQlELu+HW1xEExP4+wPXk5qeTDge50vPvYVhGIgJ1Ct74+XWaRLZJ0VwSwImysWjhnDV+BNbFshmQVEexYF6Mn2JPL92JbGSEKI11IeVd1ZvaDY7rPAHKKqr57T+yQQCb+By9yUUnkNqfHe3zA4B4pEFxM0GtoRTGeuLEgt9hC/tYQLRHbgliUTvwStGEcEtLc/4llW+S9QMAi5WVL3PSRkX45aOhYhzu1Lp0xharrmpXn2sklz/KlziJm5GKQrXcuOgHx/Qx6DkMXxz1LOdvKJDROssl5C9efmMDhn/AZCZeA5j+zzR+D7RM4j6aPctozt0EkcZOvQEVJWX31vC9t3lLFq2g+nvryYlNRF/Qxgj6Mf0gvoDnHT6vigmqVcMpf8EK4JIisdL/0FZ5CSlYJrKoIQ08l21uEW4oPcw0vt+QmGwkmtPgNvG74v4kuJpP0TXstICRISoaSnX8YMLyfKWI64MhmY3j3P61KJlLN9TyCt3ZKMaQvAAcUKhGXjTvtE1N2s/PIkTKQ79hCU175Kd+T1Gp0xE1WBn1a/wurMY1etfuF0eXNK5qDztcXLGZXiIEDP8nJR1PZ15muQkX0JO8oHh6VSVFE8G1w34f41RY1LcmV0k8aEjrkykV8dTTzXF40ol3bf/zNUxpOkRCIfqR3hY6bAytMPaZNI8OKpDD2bdliL2FNeQnJjA6+8sIT0zGdMwMYw40T211mM2bvDmpLlcfrEVJ/Sxc65osa+GSJgyM4CRrGjEwJvkYWX1Tv61bSq/GXY7fb2dCzf2o/EX86Px1tJsJLKIuuqnQJJAg2RkXQ9Yy3t7ampZkJuHaSozdw3mjnEzGvtwSWpLXXeIQv9s+iWfj8eVRCheg6FRUr37goObuFhSsxC3JLK4egGjMq6mOvAx0XgJUaOMD4t+T+/EsVzU5+DjjbbE0JRT2FLzPC5RBiaNwC2H9ntVVVlU9ntGpl/L0JSuj8Hq4NAWR9MyaZu2PCLSX0Qes4Nrh4EyICwi60TkNyJyoKmfQ49hxvzNREIxGqqDVEUiPPKL6/nPKw/y6N9uI3xqNvHeaUQHZCD928+1t7ykCH9vE5E42euD5EoNr+fNIRAPM7d8Q5ttVZXpJdOpjx1oCQjgkjSSku8iKelGkpLvRmRfLr6Xl68mGI1hqvLqyrVE4qm4XVm4XVlIB5YPW7I8rYvsYG3l38lv+BiAZRVPsrDsr80sJ3fUr6QmWgpYhic761eRV/0YcbOK2liU/MAGNtbNIRCvbVeGzrC5djKmxlE12VA96ZD7qwhvpCS4kvXVL3erFa6DQ4t0sdN9d9Lqz04R+S3wf8AnwL+ADUA91kbRKcAVWFkonlTVXx8GWR06SO62EuZ/toFHvnYJ9ZUB1m4s4PsPXMYJw/pQHw2TlJmIKzmD6KA4LhX2BKMYponb1fpvo4uHDGN4dibhTwuRKPjXVFAwLESaJ4m1NbspDdfQL7HlrBY7/DuYUTqDuMb54oAvHlDuTTgNb0LLRhojc7K5bqy1/5jgdhMzDDqSABdANYRR8zDu9F8A+2TbVvsa4GJn3TukJZxEWXgjACWhtQxItlJUZfn6clHvfZkvkikjKjESgF3mYFwSx1SD1dWfdOnssDaSh8dl/TgJGhXEzTCeDuYN3B9VZUONlXcwEC+jOLiCgSmtpbJycOh6epLrRHu0tQbjBUaqam0LZauBV0UkEyvHlMMR5sV/f8ZZF4zitPHDef3p2Wxev4cRpw5ixeY8AsEg02ds4LRxg7lvwevclTqOBFzExQoQ0TvmIxKLUxLbxYiUk1rMduDzePjNkAt43PgQX18PdTuD9NFsPIluMkmgIlzXojLcOytMcCWwuHIxE3tPJN2bfkC91vjSWeMO+p6YoY8gthkz8CLwKAB10V2UhZbhkgQiRg3LKv5D1KhHUdZVv07/pHGICH0Th9M3cXjjNVRXfxWvN4moqQTCcdyudBTID2zgIrpOGV456G9d1ld9bA+1kd24xIOhMXY1fNJMGaqaxEz/Ae4KDg5dghwjytCOFN4mtqL8WVcK5NB58neVM2f6erauLyThux42b9lN2BXihf98RjS3mKT8SpZ5Enhl2VJKQ/V84N7A0G/1Zlt9KTVRPz+acBrl8Xwm7XmKe4Y8wsjUMS2Pk1tOeqY1a+mf4uOhzKs46bRB1FRVk53ei3WFRWRmKUNT9lla5gZyyQvmkeBKIGSEWFi5kGv7d7/pu2oIDb4Crhw0ugzMQiAHt/gYmX5rY73C4HbcxFBMEt1pmBo7wNJTNQRq4HL3J9ENt+f0Jjv7qW6/hkMl3TuEqwY9AXbS5IT9wrztbviI3fUfcunAp3Ed4t6kg0OLHMt+hiIyCjgd2Kmqa7tcIodO894rC3G7hYqyOl55cibBcAjxmZSVVuHdXYHEDRqqSplSvYHYDoMtA4vxpfrQggbMHOGZHZ9z41AhZsaYXfZBq7PDW+45j1vuOa/xvakmf9r8AhclnoanNsD/mzyF/meW8Pblv23MNdjb15svDflSY5t+if0O6Lc1VM1OpztqbBtdDmYDiAc0jhldAYwj1TuIsU2cupM876PGNgD6Jw7C3YKTu8uVTK+cVw9KjiOJiJDqbfl+x80wO2rfImLWU+Sfz+C0zqXncnBoj6PNz7BTTxoR+RrwIXArMEVEftMtUjl0mIrSOtatyKV2cz7+4kpqYjUM+06QkQ/HCWX4IW6iQMqaKhqKQxiFLsh3M0KSyP5fNSM2hnF7a9jcsJtEVxLFofzGOKHtsaFuBzv9e5hTvpIn5ywmGI1RmpvMm/kfNdZJ96YzIXtC4zE4eXCH+o4YlSwv+QrhePnB3BYk4SLKk//IcvNG3L3ex5V0fYv18uqnoPbMqaDhEwwzclDjHW3s8c8gZgZwSyJba19rdLJ3cOhSjgUDGgARGaiqRU1OfRk4Q1VDIpINbAMc45kjSK8+aXzlqxfxn//bTmLIyyWPDKAmy3q4b9myAxNQEVyGSc78INVDk6HWjefzYnwxD55ZcU67cSRpSVnk+BIREeJm+w9GU00mF8zE506gvCLClvIiXB6DYGUyU3Yu4+6h17eaib4j7Kl7i0Aslz31kxid/b0Ot4uZEbx2xoMV1VOpju7hxMwbgDRM08Dlau4XOL7P74nb4b7ckoCrFWf4Y43q8GbcYhnmKHGC8VJSDyGQgIPDARwre4Y2s0Tkn6r6vP0+CEwQkSXAhUBNt0rn0IxwMMrSGeuZeNN4AAzDxOUSPnvxc6KxOLWVDfTZMYR77rHiM86+/T8Ury8BBUEgKwm3uDACBlWz/GSnpVBbX88Jawdxz1c7l528NFxJWbgKRTFj4E0N2V+mOEbUQ16giBPTRxzUdUaMSkoDH+Nz96Ys8BlD0u8h0dO+H2NxaBcfFD3N/cMfoyq8nZpoPoYZYk3V25zkvpPndj7I1QN+yrDUfdFt0hKGHpSMRzvj+3Qo36mDwyFxNC2TtqcMzwf+ISJ3AV/HMsl7EXvPEPhq94rn0JTP31vOpH99wsDhfRh56iD+8K3XmHDRKHaszUdNJW7Gmf7SPL5wz4UAnHjBiexMMXH5lYR0Lz+9eiIDM9OpLKhmUc1iisJ78MWVdRUbuIeOK0NVZUBSHx4/8yeompT0X4R5zs/wupJQDTGo93t4PR1bDm2JyuBCTI0CiqkxKoPzGZR+W2P57oY5JLoy6J9yZrN2CyqmUButYFX1bPzRdUSMOlTjFAZXE4y5iIif+eVPN1OGDg4O3cixogxVtQb4qohcCXwMPAdcoIcjr4tDM8LBCNNemEs8ZjD5qZlc+8Al7NhUSFlRDSkjBxHcshtJTSFz9D4llOTxku7yEYpEcIeFk4f2Y3S/HDhxCCedm86Tu55koCuTsFlFQbCgQ/t5u8qq+OuH8/j3V24gKcFajvRSg2uv8hMXcaOoTWUYiJWT7Mlp1ThmQOoN9E6e2Pje69oX6i1mBllT9QJeVzLXJT/VaAVZFNpFcWgXad4sVtXM4sreVxKMrgT1keodQH7DVlxJHuqixeT5lzsK0cHhMHA0zQw7tKKrqjOACcBIYKGInNROE4cuZs77K6gorqGhLsjahdt5+W/TSUz0EgpESOrrw0xwETfCXHTHuMY2D5x/JkN96fROTyFDEkj27PvtUxurpV9iPzK8GfRL7EdNtGMr3i/PX8XmonI+Xbe98Vxy4qUM6vOedfR+lyTfua22Dxt1fFr0AwoDy1qtI+ImwZ3ZeEiT+J876z7F0Bhho449/n2JiXMb1mOoQdgIYpgxtjfMwiuKz+2lNLwVUw3ARFFWV0/u0LU6ODgcAnZs0vaOnkJ7BjSjgb9hKcH1WMukJwDviMg7wJ9U1eh2KR3oM7gXvpwMNBglq186VeUNRA0TjwsqFu7BdCvElddfmsWll1jRXKpqA1TVBUnyeQmGo3y6Yhtfv8Zyuj4z60zOzDqzrSEPYFdZFStzC8lOSeK1hau5ZtyJJHo7552zte5DwkY9a6tfZ2DK2Z0KdG1ojK11Uy3LR1U21bzNsDQrMPWFvW/iwt43NdYNxIuJ2BncTdOktsYgs5flXJ7WyTiqDg4OB0nP0XXt0t6TbBLwOlaUmeuBJ1T1VhGZAPwGWAaM714RHQBq/VFITiQtM4WGuMmVD17M+/M28M07z+fpP75NdW0AEUhvkjdw5IAcHvvKvgwQA3MyWuq6w8xYv524YeISIRSLsyq3kAtOHNbh9mGjju11H+NzpRKIl1MUWM7g1PPab2jjws2E3o8Qie4gEl1FZtq9jWX7+0WmegeS6h24rzxYSU5iTofHcnBwODSONj/D9pThCOApVY2KyB7gXgBVjQA/EREnDP5hIjnFx5hTBrB2zhYuvHk8M5dsw+VxM2fNbiquHUxtJAIKnlNHNrZJS/Jx7pius5Z8YOIEbj371Mb3OWkpnWpfHy0i0Z2JqoGXZGqj+fQzTsbQCIme3u22F3ExMPkcqsLPETHXkON9sNPX4ODgcBg5hpThFGC2iMwHJgLvNS1U1VXdJJfDfpx7yYm88finBANhKivrqItG8fm8bNxeTGyUQVIE4ilQHK8DLD9A10FGb2mNRK+HxIzmIb0aYnWYdCwbQp+ksXxxyNPNzm2s/D2heBHj+z7VYtSb/YnG1hCNbUHER0PDU/TKfrLjF+Dg4HD4OMr8DNsT9evAP4Fa4Leq+rtul8ihGaah5O0sZ+PK3eRtLUG8HjbO38Z544fjH2DwhYljuHrsKDKqvIwMZ3DLGadQG/TzswV/Z0+wAIDlVRuYV768y2WLm3H+l/sPNteuOaj2gdgeKkOL8Md2UxPu2O+qQOA1IIrgIRJdTiy++6DGdnBwOAwcKxFobBeKKYdJFgebqR+voVd2KhedN4p33lnKgunbOXFEDqZpIiLEoga7G0opOyFK1ukZuFb7yUpKQvzKKDObf0//mEXLTAbkfMI3T3qAN/d8TMyMMyH7VJI9SV0m5/q6FdTFqlnnX8F5xiX43J1LNbS77lVMjQHCrroXyEo8q93ZYXrad0lJvsd+J3jcA9us7+DgcOQ4JvYMReQB4OW2fArFenLdr6ovdYdwxyO1dUHenbqSpCQvo0b0Yc2SXdTXBXGnJXPNd68CoD4WZkbqHvokpfDaulWkrXDhdgmRqMFrHy9jedEeNO5myYpiBqTNImSEUWB2+TJuGDCxXRlUlc+KN3J5/7F4XS1be8bNOHPKP8YjXmJmhJXVi7ig9+WdulZDQ6R4rT1Nl/gwNYxb2lbWHs8wPJ5hnRrHwcHhCHEsKEPgSuDnIvISMBPYrKp+EUkFxgJfAO4HVgKOMuwiPvp0HdG6EPFAhN//dgoaN0GEDat28+yjV9OrdxrPbFhGfFsBYSNOXAxuuf50Ts7sS30wxJKd24jtBpdbKdyQxEcnLMST4EYQZpYu4up+F+J1tb1VvKoqj79v/hS3uLhiwMnEY7twe0Y0m7XVxapxixufO5FEdxKl4YJOX+vpvf/Q6TYODg5HCUfZnmFb+QzvFpFzge9guVakNnkY+rEi0nxFVZd2u5THCZFIjM9mbyK0tRQS3NRkNjCkfzIun4e4QG11gF690/jayeO5c9S+zPAZvkRembOKz9Zu46oTTyB2kvWxetwurh04jPRMy90iQby42zGqUVX+t3MBgvDyroVcmOPDX30fGVn/IsG3L2pLL18fvjfaSlpSWVlJTs7x47agRjlm/a9xZfwN2S9HoIODQxOOkZkhtqJbKlYIkFFAFlZw7h2dcbYXkcHAq0BfLPeT51T1cRF5DHgQqLCr/kxVp3f6Ko4RvF4PN5w3ikmLduKOwr33X8SJp/clJSWdpMQEBg2wMsl7XW6yEvctJ9YFwkxdvpFwLE7vvmn8/bpzWhuiXVZV57O9vhSfy0txsJaZ+S9wbmoDfv8TZCW80iGLz2MdM/gGGl2BGZqCO+Xe9hs4OBynHBN7hk2xFV/Hkty1TBx4VFVXi0gasEpEZtpl/1LVvx9C38cEqoqaJnPfWUEsEicGbJy9mcuvPqlx1mWqSXGonEHJzRO2Tlm+kYpAPckJCbwydzVXnD4aXycjw+wl3ZvEHUMnAGCYNaTxFOLKIh7bQiy6otns8HhEjXI0/BG4ctDgJDTpZmd26ODQGkeRMjwsK7qqWqKqq+3XDcAWwDEDtKmvC/K9+/9HRVkd7gQ3XlG8XheKsGh7Pn+ZMgeANTVb+MPmZ6mI1KCq/HbTP1lUsZzi+kqCvhBBTwP9s9KoCYQOWpbR6X15+MRLubTfGL42vDenZ2bjcvXG7R6KYRS138Exjhn+FDQKmKABNDLvSIvk4NAjUenY0VM4uOnDISAiw4AzsEK5XQB8S0S+gmWI86idKeO44o0X5rNtUxEvPTWbaNQgVFSOeD34A0N4e/E6thY3cNW40UyqnMGurcJH2XM5LXMkW+p3UBwqJWvgIHyuBkyBe84eSL/MtPYHbYMd9WX8vxVv8bszbuas3u+0Wi8Ur2Zb7WQGcfMhjXc04Uq6FXwX7TvhHnDkhHFw6On0IGXXHodVGdqWqO8B31PVehF5Gvgd1j7i74B/0EKORBF5CHgIYNCgQVRWVnaJPHV1dV3Sz8FSVlpHWloi61Zso/+wNHbvKuTciUMIlxUgwMBzcwhpkCEZPp7+eBbRbCWptBerNueSm72NPrFsjIiwtS5Ef1JRhHc2LOCE04cdklzvbV9MVtTN+5sWM3Rs6yHXdtV/TEFgKeo5ATit1Xo9ga79rJv+2Giwj0PnSH8fO4IjY9dwNMjYFXTFzE9EEoH5gA9LZ01W1V+LyHDgLaAXsAq41w4d6sOyUTkLqALuVNW89sY5bMpQRLxYivANVX0fQFXLmpQ/D3zUUltVfQ4rlyLjx4/XrrRcPFJWkDu2lfDX333KhAkjyNtVi8vlwojFKfh0HfFQBFQp/OtMUh4eT15diPy6EK5KiEqI6p1Cvwn5FAYyyUgMkZAeYVBiMS7xMKHPLR2+pmBkHYne0bhc+4xxdtSXMdufS5IvgWXhYvIlwFm9DoxvGopXU+L/CHeqUhb+nHG9Jraan7CncDRYvDoydg2OjD2DLnKtiACX2a59Xqw0gp8A38eyOXlLRJ4BvgY8bf+tUdUT7MT0fwHubG+QDitDEbkQ+ArQX1VvsIN0p6jq/A60FeAFYIuq/rPJ+f6qWmK/vRnY2FF5jnbeeWMJQX+E3fmVjL14FKpgROPs9tdhRKIAJGelMHiESSxcT0KwH3nVIXAL4foETjQuYnesANx9uWTQSjI8xSgwKq1js+aYUUl+1bfpnfZ1ctK+0ni+NhrkxIx+qIKI9b4lcus/IW6G8LiSCMRLKQ+vo2/SGYd8XxwcHI4Ruijcmh34xW+/9dqHApcBe8NRvQI8hqUMb7RfA0wGnhARaS8pfYeUoYjcA/wXeAO4eK+MwG+xAni3xwVYGS82iMha+9zPgLtFZJzdVx7wjY7Ic7SzY1sJmzYUkpTmo7Soht/8+Q5OGN2PUDTG7U+ZiGFayihJGHhiATkpUfLLCykrSSViREnyKIUxJTMhlYgRozYcITVVcYtSEJjD+N5fa3XsiBHC0Dj1gddRDVHZ8ApZKbfgdqWiajAhZzgTcoa3ew2DUy8iwzcMgAZPjMyEEV11exwcHI4VOqYMc0RkZZP3z9mrgfu6sdz7VmHl030S2AXUqmrcrlLIPqPMgUABgKrGRaQOaym1zZlCR2eGPweuUtWVIrLXsWojcHJHGqvqQlq+LcelT2F1lZ/sfmlsK6/kxH69qaps4ITR/Uj0evjvl75I1LBcOJfWzKOkQYhpjJDPg5EdRPMTiPQOsyNYQbInAVOFgoZT6ZeyiCSXj7Oyrm1z7Nllr1MfK+E096eIy4dh1lETeJ/UpKtZWfYDxvf9R4fSKaUnDCE9YQgAlaFKfO5Dy5Xo4OBw7NHBPcNKVW0zL67t3jdORDKx4mWfdMjC7UdHleEAVd2rufdONeNAx9OUOzRyzvmjmFyxk4bcCMkj+3HO+aMAK0Ht8N7ZjfVO7HdbY3SX6cXL+e+azygtTMIVjvO3e65lULKltFZUPk5+yIVpGOAe2eKYANXRUnY0rEKJMir9dHJsX0TBTW7tm9RHd7K77i3G9Pp2N169g4PDcUMXmxGoaq2IzAHOAzJFxGPPDgcBe32/ioDBQKGIeIAMLEOaLhF1l4icv9+584FtHWzv0IStZRWs2FNEdnISy/ML2VZeSSQab7NNH18mGSUDSfIk4K5PwQgIQ1KzyPbFKIusQoC4GqysmtRqH0srPyRsNBA2QuyKDWNIr/8wNOe/pCRdQVHgI+IapdA/jXC8otU+HBwcHDpEF/kZikhve0aIiCQBV2D5qs8BbrOr3Qd8YL+eZr/HLv+8vf1C6Lgy/D3wgYj8AvCKyKNYJq2/7WB7hyYU1tbROyWF2uIAfVJS2JxXwoM/eI2tu0tbbdOXPhQXRQnGDTzqZd7qPQCoCi5XGimeXlYWeWndx7A2WkZcI8Q1SiBeR1wtQ52ywELCZh2KScSsozywuGsv2MHB4fika/IZ9gfmiMh6YAUwU1U/An4MfF9EdmLtCb5g138B6GWf/z7wk44M0tFwbFNFJIAVtDsfy4rnAVWd2XZLh5b4woknsHlbKZMWlnLR+MFUbKthT3E1v3/6U17/6/0ttslKTSbrhCQaGiKcOWggt55xKgAp3mxuHPRr1M42n+ROb3XcE9PHURfbDSjDU0fjdfkASE0YhteVjluSMDRMir0X6ODg4HCwKF3jZ6iq67ECtex/Phc4ID6kqoaB2zs7TrvK0F5zfRwrOoyj/DrJR5OXUVlax/3furLxXF0wzJTP1yEGvPfZGjIDbtQDBXlVrNyyh/FjDlRG66tLqPKFyElOZVWkiJ9lW87wLnHTP+nEDsmysXY21ldU2F6/hAty7sLrSsTQGDlJExrrWQl3HRwcHA6Rnu163Ix2laFtmnoX8K3DIM8xRTQa54XH3iMWinHjPeeRlW0tYU5dvIFwXRSXV4hWR6mJ7bVKUl78YClJqT627CzhtkvHNfa1vaaSLJ+VST5FvOTWV5OT1Hp0mJa4dfAviWoYAI948bqs/vokn0Of5IPPdOHg4OBwAD0s9mh7dNSadBpwK5YDo0MHefP5z4n4w6jCM3/+kK9+/zrq6oJkuH30Sk0GwPSZZPRPJT3Het8vO4Mf/+9D6sqCjD9xCMWRYko9Ae4fexb3jz2rQ+Oq2YDGd+FKGNfsfKo3m737yE4qJgcHh27nKHrMdFQZeoHXReRhLOd4c2+Bqj7UDXIdE3zw7GzMmAEIi99fwe5wmMoKP2+98ghfvOTUxnoikBfcQ6onlTVbq5g+ayOqyn/fX4CcVoK/wuTJCQ+3m5h3L/HAyxihqfhy3kdczf3/dta9Rdio5FTHfcLBwcGhkY6u6MaAN7G8+t3sC4nj7Sa5jgnOv/Mc9KyB6FkDGHnFyeTuKqeuPsi0z9bicknjYajB87mv82b++/xnygLUUFBYvH43/voY1VE/iyu2dGhMNaowQlNQjRAPvt2sLGo0sKt+MgX+WQRjJa304ODg4NA1qKv9o6fQUWvSB7pbkGORsn4JGKf0wi0uNu+oR+NWwM/X31jEjVefgddrxSxYXr2aYDzIjkgeffqeTKovAYDCaAUxUUJGhEn587moT/sBf+LBSaj6QRIxgm/iSb6zcXa4u+EDTDt60fa6NxmX8/1uunIHBwcHjsll0r3pl67D8uzfA0xXVX/brY5vhvfLJiPZMlL5vHwjZqYPF4I/AeqCIXIyUombcT4snkFJA2QmRzjjC0G+PerrmGoyu3Q94To/kuYjyZ3QsUFdWbh9l1mvxQsaxgrAAIX+WVgr3C5Kg4sxzEdw20Y0Dg4ODl3KsWhAIyInAzMBA2vPcBjwbxG5UlWPm0wTHcVUxSXCI9ftC9rzfzddwBtTl1NQXMOj3/gCOampABhqkO06geklhdw0OofhKVayWJe4uKL/OCq9lZ1K9eJN+XKrZRf3/y+GRuz+vY4idHBw6F6OImXY0RXbfwPPAkNU9SJgCFaqjMe7Sa6jlkg8zlffeZ8t5c1DmklMWbhsF7t2V1JTESAcj/G7jz9E1cWOCi8+VyLbK3xc2/8KaiMhgrFol8vmdaeR6Mkh0ZNDghNY28HBoZs55vYMgTOBa/fGd1NVFZE/A9/rLsGOVj7Zup3N5RU8t3QF//rivgwSH8zeQDxuIgKTpq1k4IlJzP/RDMywyTqzhGSPl9y6KhYW72bSjnUMSEnnVxMuP4JX4uDg4HCISLshQXsMHVWGtVhLozuanBsG1HetOEc3kXicF5avIrkW1koxm8rKOblvHwBWbypAVVGFXQWVLJ+xFXfIZNHLS5n404tw2W4TeQ01rK8qYWN1GQ+MGU9SG+M5ODg49FQ6Goi7p9BRZfgK8LE9G8zDUoQ/Al7uFqmOUlYXFVNXHSSaF8IVSeCzbTsaleHjv7itsd5LM+bx5rQVSKoHtjUwJpDC/Vdcgqry8NwpVIWDuER4actKHhl++pG6HAcHB4dD4xhUhn/A8jX8MZY1aQGWIvxb94h1dHL24EGMd/dhoz+PlIYE7j2tZUXWJ+hl6Mh+e8OE0jtoWYqGjTjFwXoQxUTZVuukUnJwcDh66Ul7gu3RUT9DA/iTfTi0ws49lWxaX4hRF6XebGDG4q186doDEzhfd9P5XHdT8/SQRcFSEt2JZKS46as+DDXpl+5Yezo4OBzFHGszQxG5Bdje1I1CRE4FRqrq1G6S7agjMy2JPi4fgQxQVQb3am6xObt4G3n+Kr42urkiNNTgqV2v0c/Xl5FpvembZAX07p2Yethkd3BwcOhSOp6vsEfQ0UnsX4Dq/c5VA3/tWnGObkqLaoiGYqSl+PC6XOzeVtZYFjUNntw6nzd3r6Is1NCs3arqDdRF69nm38nXx4xjRFovzu8znJ+c/oXDfQkODg4OXYdo+0cPoaN7hn1VtbjpCVUtEpH+3SDTUcvoUf345c9ubHzfp8++RLszirZQHwkRKDR5fedyHj3Vcpsw1OCD4hnE1cAwDSbt/oQZ+SF8Lg9XDxpz2K/BwcHBoas4Fq1Ji0XkZFXdtPeEHZWmtHvEOjpJSkrglFMGNTuXl1+BPxDmrcpVBEsNgrkwLXkz3zjpQlK9PmJmnOEpQ+if2BeA1WVhVJWwEeeD/A1cmTn0SFyKg4ODw6FzDCrDV4G3ReQHWL6Go7CWSF/pLsGOFX75gzcI1Ib4w5sP8KNln+JNDNG3Mo1El3XrE90+vj7iLgBC8RjTdv4PEcFU5f289Vw5zlGGDg4ORyfH4szwr1jRnt8FUoAg8BTw526S65hg4aKtlOdWAvDO68uprPMTIYxZpSzdUcCFJw1rVj/J4+Xli+8hahqN7wlEDrfYDg4ODoeO0KP2BNujo64VcSwfwx+LSG9VdRzgOsDT//7ETtsEKz9cj97lJW7EOaVf6gGWpnsZkNL8fKWjDB0cHI5WjjU/w/3IEJELgJWqWtjVAh1L+M0wsQwrZ6EmGARS45AoFGYUIclBIOvICujg4ODQnRwry6Qi8jtgq6q+Yb+/EWup1AMEReQaVV3Q/WL2fEzTxOVq/jMoeH4CtSVeUNAUJe4NggmlDYmUh2sYkjLwCEnr4ODg0N30LNeJ9mhvEnsbsLrJ+z8DTwLpWGmdftk9Yh1dhAMRfnrDX8nfUtTs/G2nnUfKVj+pq2sZO6I3g9PdDM5wc/nAkZyW5bhNODg4HMNIB48eQnvKsB+wDUBEBmFZkf7eznD/Z+C4jSK9vriU/8xbDMDstxeTu6GAyY9Pb1bHH4qQuLsBX2mIsrIKslIMcpKVsngehsaPhNgODg4Ohw11abtHT6G9PUM7lDQAZwG5qlplvw8Ayd0lWE9GVXly3lK2lFVw+bBhTHtmJll90tmwaBv5W4oYOsZa/pz28gK8pvVhx2bW8dC9t5KU4MUtbhLkYLZrHRwcHI4ietDMrz3aeyJvAO4EJgG3AnOblA0CarpHrJ7NqoJicquqSfC4efKVzwj5w7g9boyYwcJpKxuVYe9KgzoBELLqTCaknkh6+nH5+8HBweE4RI4hZfgb4EMR+QeQCpzTpOwmYEVHBhGRwViO+32xZpvPqerjTcofBf4O9FbVyg5Lf4R4a9V6YoaJz+Nma7rBkx8+Sv90K7h2SsY+ZTf+n5fwcd4WBDilz0BHETo4OBw/HEt+hqr6uR127UwsV4o9TYrXAJ93cJw48KiqrhaRNGCViMxU1c22orwS2NN2Fz2Hb118LtXBEAAuEU7o3wev23KhiEXj/PaRV7nv+1exqa4Mt1cwgfxQHaF4zHKkd3BwcDgeOJb8DFU1Dyu7/f7nF3Z0EFUtAUrs1w0isgUYCGwG/gX8CPigo/0daYb1ymJYr5Z9BBfP2sTm1Xm89795vPbnu/nD5pepjtbz19PvxS1H0TfDwcHB4ZA5RmaG3YGIDAPOAJbZfotFqrpO2lhcFpGHgIcABg0aRGVl16yk1tXVHXTbnVtKWLN0F7c/cGHjOSNuMPvDpfQbnUFRUSmzly6lOmZlvlq8ew1j0jsfZ/RQZDxc9DQZo0Y5DZHl9Eq+vvFcT5OxJRwZuwZHxh7EMbRn2KWISCrwHvA9rKXTn2EtkbaJqj4HPAcwfvx4zcnJ6TKZDqYv0zSZ/OJUSotquOzq8Zx4qpWpYu2SnezaXElFLEi6JPDWh/PxX+fHJS6m1i/iguFn4DqI2WFXXm930ZNk3F31bwKxmQxOP4fkhH3+nD1JxtZwZOwaHBl7AAJyFO0ZHrZ1OxHxYinCN1T1fWAkMBxYJyJ5WNapq0Wk3+GS6WBZtXgnVRUNJCR4eOelBagq9dV+Tj17BP0eHYv//j74v96XcXeO5uxeYxmffRKjU4cQV/NIi37ME4rlUhdeiAsfxXXPH2lxHByOYxRxtX+0h4gMFpE5IrJZRDaJyHft89kiMlNEdth/s+zzIiL/EZGdIrJeRM7siLSHZWYo1hroC8AWVf0ngKpuAPo0qZMHjD8arElnTluDETfxJrjZta2EDSty+cUNf+Wuv93BwuQifDk+KuMxeiUN565Rx21cgiNCecObmGYQlyTTEFlCKLaLJO/IIy2Wg8PxSdcsk7ZogAncD8xW1T+LyE+An2AllLgGK0DMKCwPiKdp7gnRIu3FJv1ZC6djQC4wQ1UbOngxFwD3AhtEZK197meqOr31Jj2Xb//8BvwNYQBEhKd/8iYRf4ipf/qY61+9trFeti/pSIl43JKTejNpiRMAEOT/t3fn8VGV1+PHP2dmMtlYAoZNQBFEZF+MS8XWrYqgrXWlLqio32p/rdS6t377ba1FS2u11FIr7lZqEeouiiKgIIIQiOxbIEjClgBZyDrL+f0xQ4iQZAKZzJI5777m1bnL3Hvu4M2Z57nPQpKzc4hPGGNaSjj6GTbSAPNy4Lzgbq8Q6Af/YHD9q6qqwGIRyRCRbsHjNChUyfCietYlAb0Bv4h8X1XXN+FiFhLiN4Kq9gp1nFjRtn0abYP9CXfn7+Wr95eh6qO0YC9n5bXhomvOinKEiSvdPYB094Boh2GMaXo/w0wRWVZneWqwnciRh6zTABPoUifB7SLQjx0CiXJ7nY/lB9cdezJU1fMbCEiA3wOTCGThhDX/nWz8Hi9aVYPPrcx7c6klQ2NMwhPA0bSxR4tUNSvk8eo0wFTV0ro9EFRVpZmtdY6pAU2w+PlH4IzmnLw1uOqOC2nbOxXcLhzthF/8+bpoh2SMMTFBREO+mnacIxpgAuwWkW7B7d2APcH1BUDPOh/vEVzXqOa0Jq0mUGWa0BbMWULJlhIU8JX6ePW5/0Y7JGOMiQ1hmMKpvgaYQe8CNwff38yhgVveBW4Ktio9CygJ9bwQmteadBSQ24zPx7U56zby1lerGHVSF+jaCRVw+JWi9DjqZWqMMS0lfP0M622ASaB28g0RuQ3YBlwb3DYLGANsBiqA8U05ybG0Jk0CTgauBBKyTtCvyuP//BDPlkp63phKZf/2UAW4FF+aDcZtjDGC4ghDMgzRAPPCevZX4GdHe55jaU3qITBW6aijGZ+0NXl7+So8WyoRv/JpzmbKLm6P+h34/Urv8ztGOzxjjIkJ8TQCzTG1Jk10U1/7HPEq6oDq1eUkd2xLWptkqr1eyousZGiMMdCK5jMUkYGquqaR7WPiteN8c7TvmkpxdWC8AZfLySOXfJ++3ToB0LlNejRDM8aYmCACTkf8DEEZqpr0S6DdwQUR2aeqdesB/1N3e6K49voa1pZuJsmRBAjnnNyFtkn1T+lkjDGJqtVUk3LkQ8tQywlhWIfv0rvNQCAw5JfbkRLliIwxJva0mmpSjpyZMdRyQujTZlC0QzDGmBgXntakkWJTrx+jNft28Y81X0Q7DGOMiUkCOMUf8hUrQpUM3Yf1NUw5bDkhR6BRVSavXsDqfbu4pOep9G53XLRDMsaY2CKtq5p0Md/ua7jksOXFYY8oDqzYW8CmkiKSHE6eW7eEx88cE/pDxhiTQATiqpo0VD/D8yIUR1x5bdNyqn1eUpwuFu7eSkF5Cd3T29e7r9fnx+U8sjb6iTUfsHJ/Pq+ec0dLh2uMMVHRmlqTNkhE3ECxqiZcL/OfDvgO1/QeAoBTHHRKaVPvflUeL3e++Bb3jPkuQ3p2rV1f6qnk9S3ZeP3Ksr1byDqud0TiNsaYyNG46mfYnAY0AiRkn4K+7TvxnS69+E6XXpzR+QTcTme9+81euZHcPXt5ft5XBIbLC5i8bjZevw/Fx59WJdyYBcaYBCASqCYN9YoVzW1NGjtXEmOqPV5eXbicjumpbNxVxKrtu2q3fZS/BkURlI2lRRSU74tipM1TN8kbY0xdDjTkK1Y0Zwon04hV23dRVlWNQwSv38+nazYz5IRuADw62EVe2RLAQVt3F7ql1v+8MdapNx8t/S3o76IdijEmBrWaZ4Yicn0jmxOyW0VTnXZSd966e1ztcrLr0FfdLaWY9q7Ao1an+PFpOY5mjGrn8x9gb9lUOrWbgEjkft9oxavgWY16vgB+HLHzGmNin6C44uiZYai/nBNDbP8mXIG0NiJCerK73m39Oz0OBKoYRQRVZV/lMjqknIYcQ8eckvKZ7D/wMilJA2mXNqpZcTeVevOhei44MqHqE9R/GeKovyGRMSbxxFvXikafGarqSaFekQq0tZm/awrTt90FQEn1SlYV/Yr9VdlHfRyf/wD7D7yCQ9qyt2wKqt5wh1ovrZoN6gG8QBXULIrIeY0xcSLOGtDYM8MW9t9vFnFh16FkuA9N7VTlLWVV8Sz86uObA8spPPACO6s8pBY/y+lHWTosrXgXn78YcaTj8W7nQNXntE29oCUu5Vsk/QZIHR14r2WQ3KfFz2mMiS+x1EAmlJDJUEQuBAYDi4ClwEvAFUAOcIOq5rdkgPFg2/a9LMvJ46ofnPat9etL83ll61xKPRWM7/P92vUL9zyHX30AzNv1FO0kn/0+BwUVmzi5KpuOqVlNPnd68ndwdjhUm53iHtDMq2kakRRwHh9YcBRF9FmlMSb2xdszw0arSUXkF8A7wPXAJ8AU4CTg1wS6VTzR0gHGg39N/5LXZywhf8f+b61/Le8zkhwuZu9awf6aA7XrN5UtQIP/K/YUss+XSpIjmWJ/OjW+8qM6tzvpJNqljal9JTm7hv6QMca0sIPPDOOlmjRUP8OfAReq6hnAGOAO4DpVnUKg+eC5LRxfzNu8ZQ8r1+XjER8z3l4KwNLN21m7fzsr929FgAOeSt7L/6r2M9ee+BRDO/yInulZnN3pRkTSSXZmoqRR4vNE6UqMMSa84ikZhqrb6qqqSwBU9QsRqVLVHcHlXSKS3vjHW7+33l/O/vIKasTLwiWb+d75/fnNjI8Zf/EIftp3dO1+vdI7175vk9SZvPIVeLWa0zteRWbXQ8OxZabYszdjTPwTWtkzw8NUt0gUcSzr3JP4yLkWqXLRp2cX3s9Zj8fn478L1/Da3T/G7TryK15TMgev1iAIq0vnMub4+6IQuTHGtCBRXA5ftKNoMpvPsJm+YAtlXh+6w8HinttwrW9DapKLvWUVfJyzkcuyjmzQsrlsEYofEHZVrqfKV0aKs23kgzfGmBYSb/0MbT7DZjojoxezt2+kqsbLkOqedOjfDpcEHsWmJNX/W+HKno/iD/YHFHGQ5EjI8c6NMa1cq6kmDdd8hiLSE3gV6EKgFepUVZ0sIo8ClwN+YA9wy8FnkrFuU1k+XVM6sn1dCdU1PsQJuev28tZvf0RKcuMFZpfDDdQ/Oo0xxrQGQmw1kAmlubNWNJUXuFdVBwBnAT8TkQHAn1V1iKoOA94H/i9C8TRLpbeaSete541v5vHVzq040nwkiZ+ypEp2lpREOzxjjIk6EUhy+EK+YkVEkqGq7lTV5cH3ZcA6oLuqltbZLZ04mRJqzu5sKrxVfFb4NYPPzqR/Wzed1x7g7DHHU+6p5t3PV0c7RGOMiTqbwqkRItILGE7g+SMiMhG4CSgBzo90PEer0lfNW/kLEIFKXw2umgr0s0oo9TNq58l8tHEtc5dmM6SPh17dh0c7XGOMiRLFIfEzAk1Ek6GItAH+C9x9sFSoqg8DD4vIr4CfA7+t53M/AX4C0KNHD4qKisIST8kxVGmWeSs4PakP3uBwage+KMXVVmnXNoVZby/kQK9kenRw8dYncxl3Wc+oxBhpFmN4WIzhYTHGhtbWmjRsRCSJQCKcpqpv1rPLNGAW9SRDVZ0KTAXIysrSzMzMsMV1tMfKBH7a9YTa5cc/eJ1v8ovRkhoKu1TjETfJ7SB/OVx9ST49uw6LeIzRYDGGh8UYHhZj9AlKksTOM8FQIpIMJTANwwvAOlV9ss76vqq6Kbh4ObA+EvGE094Lkqj+wIu7pJySKzO56cz1dEjyAT7cpALDohyhMcZERzhKhiLyInAZsEdVBwXXdQSmA72APOBaVd0fzDWTCQwfWkGgh8LyppwnUiXDkcA4YJWI5ATX/Rq4TUT6EehasQ24M0LxhE3Gajcp+eWIw0G7L6u4+qHfk5Ee6DfokIzoBmeMMVEiAs7wVJO+DPydQPe8gx4CPlXVP4rIQ8HlB4HRQN/g60zgmeD/hxSRZKiqCwlUIR9uViTO3xxVVR5SUhruN1g4P5ckvyIuJ7JlH/t3J3Nc324RjNAYY2KTg+Y3oFHVz4MNL+u6HDgv+P4VYD6BZHg58KqqKrBYRDJEpJuq7gx1HpuErhFr1xTw9N9m88STN5Censy05+Zz1vdOpU+/Q9MkXT/+AnadPxgAh9NBl8z20QrXGGNiRgs/M+xSJ8HtIjCgC0B3YHud/fKD6ywZHitV5fV/L2LP7lJmf7iSEcNP5IMZy9iyYRf/++extbPRnzlqaJQjjS0e3y6c0g6HIy3aoRhjougoWpNmisiyOstTg40mm0RVVaT59bGWDBuwdm0BeVuL6NAhnffeW87G5dsoK69izdfb2bCmgFMH9Yh2iDFH1c/2vRNIc59G14wHox2OMSaqFGfT+hkWqWrWUR5898HqTxHpRmA4T4ACoG6fth7BdSFFaji2uLNieR4+n5+Kihqqy6v56ovN1NR4KSmp4L03lkY7vJh0oOpzarz5lFS+h8e3K9rhGGOi6OB8hi00As27wM3B9zcD79RZf5MEnAWUNOV5IVjJsEE3jjuHG8edAwQa0dx+x/OU5nnxux30GWalwsOp+iksmwKA31/D3gOv0LW9lQ6NSVQiSpLDG4bjyOsEGstkikg+gb7ofwTeEJHbCPREuDa4+ywC3So2E+haMb6p57Fk2ASFRWXs2V+Kup2owufZG7jiiiy8Pj9JLme0w4sJPn8JIqm4XYGWtF7f3ihHZIyJNmcYxh5V1esa2HRhPfsq8LNjOY8lwyYo1zJwenG1A78Hypz7+DBnAx/lbGDyLT+sbUyTyFzODpzU6dXQOxpjEkKgAY2NTdqqdOnYkR//5DQqvOW4Hckcn5nJy/OXsb+8imW5+Zx+cvPHIDXGmNZFw1IyjBRrQNMEHdLbMm7UBazOX0anntXgak9pSSVup4Pn5y0lUDI3xhhzkABJ4g35ihVWMmxEZY2HxbnfcH7/Pny89hNy/1TCS1lfcNKlTg5sKMXZ0U2BQ9ixv5TuHa2zvTHGHBRvM91bMmzE+1+v55m5i+makcbzf5qH44CXqkUVuM/fg6jiKvXxyk+u4bgObSIem9+zFvXX4EweFvFzG2NMUzjDMBxbpFg1aQMqazxMW5QDwNMffE753HIApMbP8hdzERE8NV5efHdxxGNT9eMtfRRf2aOoeiJ+fmOMCUUAp/hDvmKFlQwPs3NbEW9NnUvny/uzt7yC1CQXq9fupK048DkURJDyakhxoKos3rQt4jFqzULUtwtQ/FVzcaaOingMxhjTGBGbzzDuqGpt94g3n53Lwg9WcMmATlxx2gAOeCpIPsnJtm3K1pw8ktqmcvENZzDyutMB6Jga2TE4VRXvgWdAywHBVz4VR8pFiBx9Id/v3YKn/GXc7X53TJ83xpjGhGPWikixZAj8+asFnNi+A99N7kr2/HWkt0tl3dur+NE/LuDFvM/41Ym38+C/V+HZvQ8vGaS3z+DMHtHpTiEiOFJ+EEyGgKQe87FqDvwTX/WnuFJG4UweGaYIjTEm0IAmTPMZRkTCJ8PtZSW8v2UDKU4XpZs7UFPjJTUtmYK8Ql795D0qTqri411f4ty/H3UC+0pJdkb3H9iVfn2zj+H3bMJfsxiRttQc+Ccp7u9Y6dAYEzZCfDWgSfhk+PKq5YBQ5fXiH9mJh0fdCsDakq2845xHuiuFz7Ysp3DlNvwOcHi9LJy1hFtuO2IkoLjiqZwBWg2Sjno34fesxOkeFu2wjDGtRGA+w9jpRxhKQifDKp+Xed9sxbXsAL52Dj5O28Yd5waqC7ft3EHPwsAkvmltPRSNcXCgqhpB6HrGidEMOyyS0sfjShlTu+xIOjWK0RhjWiOrJo0TKU4Xz5x9KY/OmE5yShITf3EppSUVfDF3HaOvGMkl3QKJcWd5GVftfY2y/Epc7Z34+2fi8/txOuK3WtHh7AbObtEOwxjTSgWmcIqfatL4/WseJotmrcUpgq/Gx8qFeXwwcxmv/nMem9cHpsCqqqihnTuZS7ufQlKBkLJHuPyk/twzYxYfr9kU5eiNMSY2CYpbvCFfsSKhk2F1tZcF89YhCH6f8t6by5jzQQ5JSU5m/usLcpZt4ac3TEErfcxfthmHAzz7fHy2NpeVBbuYunApNd746UdjjDGR1IKT+4ZdQleTJie7+N2ka/EGE9qnH3zNx++uQBxCztItbNhcwI6Cffz12VkU7a5AAPXBjHmr6Ni5LaWVVXy6PpfRg06J7oUYY0yMEdGYGmEmlIQuGQKc0CuTwowiPJ2rGXl+fwad04eyJDjze/3Ymb8Pf6qTpR+t45eXjOTey8/l9kvOJC0tBb9f8fr9zF67MdqXYIwxMckZnMapsVesSOiSIUClt5rntrxPB3cbftfvNpbl7aQ8BRat2oJPFfFCpc9DZ5K55LtDAbj5vKzaaZuSbaZ7Y4w5gnWtiDNzdi/D4/ewp6qYKW/Noay0EnEKRV4vWfecSWpGCiJCn2E9aj/TJtkdxYiNMSb2BTrdx07JL5SEToZev493CxZS7ffiVz+fFm6gpqcTlzioQRl96ilcMKhvtMM0xpi4ZPMZxgkR4YZeF+PxB4rye7rUsKewio/nZfM/14xhSI/joxyhMcbEJ0FtOLZ4ULhjPx6vl/NOGP6t9T94aiLeT/exKyuPzL6DohSdMcbENwGSrDVpbPP7/fzl3n8zZ+ZX31q/ZNsaKj7fj3h8zHx6CV5//Dz8NcaYWHLwmaG1Jo1hKxZsZHfBfnxSw+78faR1TMPtcvGnJz7AWeVDXQ6cG8p5be58BnXuS9aQ+B+L1BhjIi2Wkl0oCZcM/X4/05+Zg/r9eD1e3n35cz7btJOuPTLo5uhARYf9AIhD2L7iAO/lzuap31zNCd07RjlyY4yJHwI4JNpRNF1EkqGI9AReBboACkxV1cki8mfgB0ANkAuMV9Xiloyl8kA17TLSUYWMrinkbSuiaMteivL28ey0OzixRyYA1TVe7vzVv/H5/PznvWU8cOfFLRmWMca0KoKSFEclw0g9M/QC96rqAOAs4GciMgD4BBikqkOAjcCvWjqQ9Hap/PoftyBdM+g2oAd5VTUooH5l8t8/rt1v3qIN7C0ux+128mX2Fr4p2NfSoRljTOsh4GzCK1ZEpGSoqjuBncH3ZSKyDuiuqh/X2W0xcHUk4lm+dAu7dhRT46mgeHsxDgn8i2xdWVC7T3qam/PO6ktRaTkZbVLxa/z8wjHGmGgTBCcxlO1CiPgzQxHpBQwHlhy26VZgekuf3+9Xpr+2iL2VFaTUwKjLhnHjLd8DIDnl0Nfx3TP6MmhAD25+8j9cPnggvXoc19KhGWNMqxJP3RUimgxFpA3wX+BuVS2ts/5hAlWp0xr43E+AnwD06NGDoqKiY46hqrKGtp1ctPMnkdbBhSPJi58qACqrYGdRIZ+v3sIPzxrIB0vXcVyKgyWrN3JB/+60TU0+5vMeq5KSkoif82hZjOFhMYaHxRgbAv0M4ycdRiwZikgSgUQ4TVXfrLP+FuAy4ELV+usiVXUqMBUgKytLMzMzj/r8qsryvQWM6NGdHd8RVuZX08vlZNCwTOoeb9qCBby3eD0nn9iTGUs2oapUeauYu66A2y4+46jPGw7Hcr2RZjGGh8UYHhZj9AX6GcZPNWlE0raICPACsE5Vn6yz/hLgAeCHqlrRkjFkF+Xzyy/fZuHOrSzasR0IJMhp65bV7rNrXxmf5mwmNTmJaXNXMHJAL87u34sLBp9MxzZpLRmeMca0Og4k5CtWRKpkOBIYB6wSkZzgul8DfwOSgU8C+ZLFqnpnuE+uqkxeM5/C6hJe2LSI609Lp9JbTZsqN+2Oa1O739uLVlNZ7SEtOYm83fu46/KRDOrVNdzhGGNMqycITglPsgsWnCYDTuB5Vf1jWA5cR6Raky6Een8CzIrE+bOL8lm5rwCH+MjZW8AvB93I0OO6U1RURGZmJt/sLybd7WZUVj8Gntil9nMnds6IRHjGGNPqCOCi+fO9iogTmAJcBOQDS0XkXVVd2+yD15EQI9Bsrygkze3BKU58WkNBxV66VKRS46vGf5zyv7Pm0COjHY9dejEndbWRZowxpvkEZ3ga0JwBbFbVLQAi8h/gcsCS4dFql+JhUBc/ig8HQpqrmifvnU6/rOPp96NhFJSUsqO0lM2Fezm5k3WhMLGnpqaG3NxcKipa9NF6k2zbti3aIYRkMdYvLS2NPn364Ha3/ATlAjjC0yylO7C9znI+cGY4DlxXQiTDC7ucyeltBvPgX/7FE/fdzJqvtlJQsA+vo4ZPk0rxig+fX3nxq2weu9SGXTOxJzc3l4yMDPr164fDET/N1U3s8Pv97N69m9zcXPr379/i58teWT3b2W1TU5rMpojIsjrLU4M9CCIqYe6qP017k3Vf7ucv/3mb6f+cx94D5ZSUV+JcUczQ47sxosfxtInAryVjjkVFRQVdunSJSiJ0Op0MGzaMgQMHMnToUP7yl7/g9wfmqVu2bBkTJkyIeEzNkZeXx6BBR85VmpeXR2pqKsOHD6d///6cccYZvPzyy7XbX375ZRwOBytXrqxdN2jQIPLy8gDo1asXV111Ve22mTNncssttxxxnvnz5yMiPP/887XrcnJyEBGeeOKJZl9HQ/s4HA66dOkSsdoFVb1EVbOa8Bp02PLhibAA6FlnuUdwXVglRMmwtLyCL2bn43fD/Pe3cmq79lR0cFHVzsGg4zvzqysvi3aIxoQUrRJhamoqOTk5AOzZs4frr7+e0tJSHnnkEbKyssjKyopKXIfzer24XM37k9anTx9WrFgBwJYtW7jyyitRVcaPHw8EBv2YOHEi06fXP1hWdnY2a9euZcCAAY2eZ9CgQbzxxhvcfvvtALz++usMHTq0WbE3RZzWKiwF+orISQSS4I+B68N9krj8Zo7WUzPexl8FIuCrFlZ0qWb3qI7szWrL1mGRH1XGmHjVuXNnpk6dyt///ndUlfnz53PZZYEfk5999hnDhg1j2LBhDB8+nLKyMgAmTZrE4MGDGTp0KA899BAQKAmdddZZDBkyhCuuuIL9+/ezfv16zjjj0MAWeXl5DB48GAgkmXPPPZfTTjuNUaNGsXPnTgDOO+887r77brKyspg8eXKD+2VnZzN06FCGDh3KlClTmnStvXv35sknn+Rvf/tb7brLLruMNWvWsGHDhno/c++99zJx4sSQxz7xxBOpqqpi9+7dqCofffQRo0ePrt1e3/fT2HX4fD7uv/9+Tj/9dIYMGcKzzz7bpGuMB6rqBX4OzAbWAW+o6ppwnychkuGAIT3peVk7ul/cluPHtKWqswN3qgNnsrCrpiza4RkTVv6KN/FXvBl6x2PUu3dvfD4fe/bs+db6J554gilTppCTk8OCBQtITU3lww8/5J133mHJkiV8/fXXPPDAAwDcdNNNTJo0iZUrVzJ48GAeeeQRTj31VGpqati6dSsA06dPZ+zYsXg8Hu666y5mzpxJdnY2t956Kw8//HDteWtqamqraxvab/z48Tz99NN8/fXXR3WtI0aMYP369bXLDoeDBx54gMcee6ze/a+99lqWL1/O5s2bQx776quvZsaMGSxatIgRI0aQnHzoh3l9309j1/HCCy/Qvn17li5dytKlS3nuuedqv8fWQFVnqeopqtpHVUP/2jgGCVFNes2I73J670Hc8eG7TLnkMt7evJZKr4f0Gi/tOnSIdnjGhI36D0D5c4CgKRcjjjYhPxMuI0eO5J577uGGG27gyiuvpEePHsyZM4fx48eTlhYYwaljx46UlJRQXFzMueeeC8DNN9/MNddcAwSSyfTp03nooYeYPn0606dPZ8OGDaxevZqLLroICJSCunXrVnvesWPHAjS4X3FxMcXFxXzve4EB+ceNG8eHH37YpGuqb4TI66+/nokTJ9abbJxOJ/fffz+PP/74t0p69bn22msZO3Ys69ev57rrrmPRokUADX4/jV3Hxx9/zMqVK5k5c2btMTZt2sQpp5zSpOs0CZIMAV74Ops9FQd4fc0qfnPOeQC1ne6NaS208k3AE3z/FpI+Luzn2LJlC06nk86dO7Nu3bra9Q899BCXXnops2bNYuTIkcyePfuojz127FiuueYarrzySkSEvn37smrVKgYOHMiXX35Z72fS09OBQOKqb7/i4uKjjuOgFStWHNHy0uVyce+99zJp0qR6PzNu3Dgef/zxkI1bunbtSlJSEp988gmTJ0+uTYbHQlV5+umnGTVq1LfWH2zcY0JLiGrSvOL9fPZNHl3S2jBn62a2l7b+EeNN4lH1Q+UMCExXDZVvBNaFUWFhIXfeeSc///nPkcOG2srNzWXw4ME8+OCDnH766axfv56LLrqIl156qbYF4759+2jfvj0dOnRgwYIFAPzrX/+qLQX16dMHp9PJo48+Wlvi69evH4WFhbVJzuPxsGbNkY+MGtovIyODjIwMFi5cCMC0afVOjnOEvLw87rvvPu66664jtt1yyy3MmTOHwsLCI7YlJSXxy1/+kqeeeirkOX7/+98zadIknM5DI7U09P00dh2jRo3imWeeweMJ/BDauHEj5eXlTbpOE5AQJcPsXTsQESq8Hhwi5OzaSc927aMdljFhJkj7J4HK4HIq9Y+CeHQqKysZNmwYHo8Hl8vFuHHjuOeee47Y769//Svz5s3D4XAwcOBARo8eTXJyMjk5OWRlZeF2uxkzZgyPPfYYr7zyCnfeeScVFRX07t2bl156qfY4Y8eO5f7776+thnS73cycOZMJEyZQUlKC1+vl7rvvZuDAgd86f2P7vfTSS9x6662ICBdf3HBf4tzcXIYPH05VVRVt27ZlwoQJ9XaPcLvdTJgwgV/84hf1Hue2227jD3/4Q8jv9uyzz653fUPfT0PXcfvtt5OXl8eIESNQVTp16sTbb78d8vzmEGlg1qSYlZWVpcuWLQu9YxPEQzWpxRge8R5jdnY2p512WoQjMq1Rff8tiUi2qsZGH5koSYhqUmOMMaYxlgyNiRMHR30x5ljZf0MNs2RoTBxIS0tj9+7d9sfMHLODY5Me7OZivi0hGtAYE+/69OlDbm4uO3bsiHYoJo4dnLXCHMmSoTFxwO12R2SmgVDivSFSrIiHGBONVZMaY4xJeJYMjTHGJDxLhsYYYxJe3HW6F5FCYFuYDpcJFIXpWC3FYgwPizE8LMbwiLUYT1TVTtEOIpriLhmGk4gsi/VRFyzG8LAYw8NiDI94iDHRWDWpMcaYhGfJ0BhjTMJL9GQ4NdoBNIHFGB4WY3hYjOERDzEmlIR+ZmiMMcaAlQyNMcaY1p8MReRFEdkjIqsb2C4i8jcR2SwiK0VkRAzGeEMwtlUiskhEhsZajHX2O11EvCJydaRiq3PukDGKyHkikiMia0Tks0jGFzx/qH/r9iLynoh8HYxxfBRi7Cki80RkbTCGI2awjeZ908T4onrPNCXGOvtG7Z4xdahqq34B3wNGAKsb2D4G+JDAlOBnAUtiMMazgQ7B96NjMcbgPk5gLjALuDrWYgQygLXACcHlzjEY46+BScH3nYB9gDvCMXYDRgTftwU2AgMO2ydq900T44vqPdOUGIPbonrP2OvQq9WXDFX1cwJ/UBpyOfCqBiwGMkSkW2SiCwgVo6ouUtX9wcXFQI+IBPbtGEJ9jwB3Af8F9rR8REdqQozXA2+q6jfB/SMeZxNiVKCtiAjQJrivNxKx1QagulNVlwfflwHrgO6H7Ra1+6Yp8UX7nmnidwhRvmfMIa0+GTZBd2B7neV86v+PNlbcRuAXeUwRke7AFcAz0Y6lEacAHURkvohki8hN0Q6oHn8H+gM7gFXAL1Q1apMYikgvYDiw5LBNMXHfNBJfXVG9ZxqKMU7umYRhUzjFERE5n8CNfU60Y6nHX4EHVdUfKNTEJBdwGnAhkAp8KSKLVXVjdMP6llFADnAB0Af4REQWqGpppAMRkTYESi13R+P8oTQlvmjfMyFi/Cuxf88kDEuGUAD0rLPcI7gupojIEOB5YLSq7o12PPXIAv4TvKkzgTEi4lXVt6Ma1bflA3tVtRwoF5HPgaEEnufEivHAH1VVgc0ishU4FfgqkkGISBKBP+LTVPXNenaJ6n3ThPiifs80IcZ4uGcShlWTwrvATcHWcWcBJaq6M9pB1SUiJwBvAuNirBRTS1VPUtVeqtoLmAn8vxi8qd8BzhERl4ikAWcSeJYTS74hUHJFRLoA/YAtkQwg+LzyBWCdqj7ZwG5Ru2+aEl+075mmxBgn90zCaPUlQxF5HTgPyBSRfOC3QBKAqv6TQCuuMcBmoILAL/NYi/H/gOOAfwR/RXo1woP8NiHGqAsVo6quE5GPgJWAH3heVRvtKhLpGIFHgZdFZBWBlpoPqmqkZzcYCYwDVolITnDdr4ET6sQZzfumKfFF+55pSowmhtgINMYYYxKeVZMaY4xJeJYMjTHGJDxLhsYYYxKeJUNjjDEJz5KhMcbEqaYOoB/c90QR+TQ4gPl8EYn4sI6xzJKhMcbEr5eBS5q47xMExpMdAvweeLylgopHlgyNqUfwl7OKyLWHrT8zuD7vsPU3BNf/toHjXSsiC0WkTET2icgKEblPRNwteBmmlatv4HcR6SMiHwXH310gIqcGNw0gMEMGwDwCg62bIEuGxjRsHfA/h637H+ofteYOAn+UbhMRZ90NwQQ5lcCIJCeoakfgRgJDwUV0hhSTEKYCd6nqacB9wD+C678Grgy+v4LA7CjHRSG+mGTJ0JiGvQkMF5HeACLSFrgKeKnuTiLSH/gucDOB5Da6zrZewG+ACar60sFphVR1jaqOU9VtwSHNJorIjmDJMU9E7orEBZrWJTgw+NnAjODIN89y6AfXfcC5IrICOJfAWLK+aMQZi1r9cGzGNEMVMI3ArAcPA9cBnwGHj8H5E2Clqr4vIrMIlBLfD267mMCwav9p5DwXEUikZ6rqdhHpTGxPI2ZilwMoVtVhh29Q1R0ES4bBpHmVqhZHNLoYZiVDYxr3HDBeRFwEkt5zdTeKSApwE4dKiy8Ao+u01OsEFKlqTSPnqAFSgIEikqKqe1R1RTgvwiSG4DRRW0XkGggMGC4iQ4PvM0Xk4N/8XwEvRinMmGTJ0JhGBAfy3kagqrMz8NFhu1xDYEb614LLs4BC4PbgciGBQbkbbCijqvMJDOL8v8AeEflYRCI6ELuJT8GB378E+olIvojcBtxA4Nn118AaDjWUOQ/YICIbgS7AxCiEHLNsoG5j6iEi84E5qvoHERlPoMT3e1X9nYjcCPxBVXuJyALgO0DdmSUyCCTBXgTm/NsMjFfVfzXhvGnA74Afq+oJ4bsiY0xj7JmhMaG9DmwHsuuuFJEBBGZQ/yGwtM6mzsF9x6jqeyLyKDA5WEX1jqoWB5u7P0gg8XUBkglM4FsNlGENG4yJKEuGxoSgqlXAnHo23QEsV9X3Dlu/S0RmBLe/p6qPiMh6YAIwRURqCFS9vkagMU4fAh2i+xJIgquAsS1yMcaYelk1qTHGmIRnDWiMMcYkPEuGxhhjEp4lQ2OMMQnPkqExxpiEZ8nQGGNMwrNkaIwxJuFZMjTGGJPwLBkaY4xJeJYMjTHGJLz/DyjxtfQtRy+JAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.cm import ScalarMappable\n", + "\n", + "fig, ax = plt.subplots(figsize=(7,5))\n", + "\n", + "number_of_evals = 500\n", + "df_dynas = pd.read_csv(config.dynas.results_csv_path)[:number_of_evals]\n", + "df_dynas.columns = ['config', 'date', 'lat', 'macs', 'top1']\n", + "\n", + "cm = plt.cm.get_cmap('viridis_r')\n", + "count = [x for x in range(len(df_dynas))]\n", + "\n", + "ax.scatter(df_dynas['macs'].values, df_dynas['top1'].values, marker='^', alpha=0.8, c=count, \n", + " cmap=cm, label='Discovered DNN Model', s=10)\n", + "ax.set_title(f'Intel® Neural Compressor\\nDynamic NAS (DyNAS)\\nSupernet:{config.dynas.supernet}')\n", + "ax.set_xlabel('MACs', fontsize=13)\n", + "ax.set_ylabel('BLEU Score (%)', fontsize=13)\n", + "ax.legend(fancybox=True, fontsize=10, framealpha=1, borderpad=0.2, loc='lower right')\n", + "ax.grid(True, alpha=0.3)\n", + "\n", + "# Eval Count bar\n", + "norm = plt.Normalize(0, len(df_dynas))\n", + "sm = ScalarMappable(norm=norm, cmap=cm)\n", + "cbar = fig.colorbar(sm, ax=ax, shrink=0.85)\n", + "cbar.ax.set_title(\" Evaluation\\n Count\", fontsize=8)\n", + "\n", + "fig.tight_layout(pad=2)\n", + "plt.show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[1] Cai, H., Gan, C., & Han, S. (2020). Once for All: Train One Network and Specialize it for Efficient Deployment. ArXiv, abs/1908.09791. \n", + "[2] K. Deb, A. Pratap, S. Agarwal and T. Meyarivan, \"A fast and elitist multiobjective genetic algorithm: NSGA-II,\" in IEEE Transactions on Evolutionary Computation, vol. 6, no. 2, pp. 182-197, April 2002, doi: 10.1109/4235.996017. \n", + "[3] Cummings, D., Sarah, A., Sridhar, S.N., Szankin, M., Muñoz, J.P., & Sundaresan, S. (2022). A Hardware-Aware Framework for Accelerating Neural Architecture Search Across Modalities. ArXiv, abs/2205.10358. \n", + "[4] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 770-778. \n", + "[5] Howard, A.G., Sandler, M., Chu, G., Chen, L., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q.V., & Adam, H. (2019). Searching for MobileNetV3. 2019 IEEE/CVF International Conference on Computer Vision (ICCV), 1314-1324. \n", + "[6] Wang, H., Wu, Z., Liu, Z., Cai, H., Zhu, L., Gan, C. and Han, S., 2020. Hat: Hardware-aware transformers for efficient natural language processing. arXiv preprint arXiv:2005.14187. \n", + "[7] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, Ł. and Polosukhin, I., 2017. Attention is all you need. Advances in neural information processing systems, 30." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 8d8aaab752dfe3add98d0f07660bb28a99de49e3 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Thu, 8 Dec 2022 03:15:04 -0800 Subject: [PATCH 40/60] Update NAAS.md Signed-off-by: Maciej Szankin --- docs/NAS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/NAS.md b/docs/NAS.md index a2eb0eb456b..2255bc5ab6b 100644 --- a/docs/NAS.md +++ b/docs/NAS.md @@ -81,7 +81,7 @@ class NASBase(object): def search(self, res_save_path=None): # NAS search process. - ... + ... def estimate(self, model): # pragma: no cover # Estimate performance of the model. Depends on specific NAS algorithm. @@ -175,3 +175,5 @@ Following examples are supported in Intel® Neural Compressor: - DyNAS MobileNetV3 supernet Example: - [DyNAS MobileNetV3 supernet Example](../examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb): DyNAS with MobileNetV3 supernet on ImageNet dataset. +- DyNAS Transformer LT supernet Example: + - [DyNAS Transformer LT supernet Example](../examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb): DyNAS with Transformer LT supernet on WMT En-De dataset. From 9d95d770fdf32f4d53fa7cec6e1b73b3d40330e0 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 00:39:42 -0800 Subject: [PATCH 41/60] Fix UT Signed-off-by: Maciej Szankin --- .../nas/dynast/supernetwork/__init__.py | 13 +++++++++++++ .../supernetwork/machine_translation/__init__.py | 13 +++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 neural_compressor/experimental/nas/dynast/supernetwork/__init__.py create mode 100644 neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py new file mode 100644 index 00000000000..e833188cc78 --- /dev/null +++ b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py new file mode 100644 index 00000000000..e833188cc78 --- /dev/null +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. From 0c5dea88e836c135ce1c81d716f35519897766a7 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 01:02:29 -0800 Subject: [PATCH 42/60] Change "TransformerLT" to "Transformer_LT" to fix pyspelling error Signed-off-by: Maciej Szankin --- docs/source/NAS.md | 2 +- ...rLT_Supernet_NAS.ipynb => Transformer_LT_Supernet_NAS.ipynb} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename examples/notebook/dynas/{TransformerLT_Supernet_NAS.ipynb => Transformer_LT_Supernet_NAS.ipynb} (100%) diff --git a/docs/source/NAS.md b/docs/source/NAS.md index 9e78bce7c93..8ad4a43554a 100644 --- a/docs/source/NAS.md +++ b/docs/source/NAS.md @@ -176,4 +176,4 @@ Following examples are supported in Intel® Neural Compressor: - DyNAS MobileNetV3 supernet Example: - [DyNAS MobileNetV3 supernet Example](../examples/notebook/dynas/MobileNetV3_Supernet_NAS.ipynb): DyNAS with MobileNetV3 supernet on ImageNet dataset. - DyNAS Transformer LT supernet Example: - - [DyNAS Transformer LT supernet Example](../examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb): DyNAS with Transformer LT supernet on WMT En-De dataset. + - [DyNAS Transformer LT supernet Example](../examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb): DyNAS with Transformer LT supernet on WMT En-De dataset. diff --git a/examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb b/examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb similarity index 100% rename from examples/notebook/dynas/TransformerLT_Supernet_NAS.ipynb rename to examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb From 51ee7562b9565b61be5893571f5f54b7fbff8b96 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 03:56:30 -0800 Subject: [PATCH 43/60] Update imports Signed-off-by: Maciej Szankin --- .../experimental/nas/dynast/dynas_manager.py | 6 ++-- .../nas/dynast/supernetwork/__init__.py | 2 ++ .../machine_translation/__init__.py | 2 ++ .../transformer_interface.py | 32 +++++++++++++------ .../transformer_supernetwork.py | 2 +- 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py index 0a0792fc222..f1b7fc15e4d 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_manager.py +++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py @@ -390,9 +390,7 @@ def import_csv( column_names: List[str] = None, drop_duplicates: bool = True, ) -> pd.DataFrame: - ''' - Import a csv file generated from a supernetwork search for the purpose - of training a predictor. + """Import a csv file generated from a supernetwork search for the purpose of training a predictor. filepath - path of the csv to be imported. config - the subnetwork configuration @@ -400,7 +398,7 @@ def import_csv( column_names - a list of column names for the dataframe df - the output dataframe that contains the original config dict, pymoo, and 1-hot equivalent vector for training. - ''' + """ if column_names == None: df = pd.read_csv(filepath) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py index e833188cc78..451e864f2c7 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""This module contains all code related to the supernets.""" \ No newline at end of file diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py index e833188cc78..9003687dcb2 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""This module contains all code related to the machine translation (Transformer LT) supernet.""" diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 7ca6532d17d..6ba50ddfcc8 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -14,27 +14,31 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Translate pre-processed data with a trained model. -""" +"""Translate pre-processed data with a trained model.""" import time import warnings import numpy as np -import torch -import torchprofile -from fairseq import options, progress_bar, tasks, utils -from fairseq.data.encoders.moses_tokenizer import MosesTokenizer -from fairseq.meters import StopwatchMeter -from neural_compressor.utils import logger +from neural_compressor.utils.utility import logger, LazyImport from .transformer_supernetwork import TransformerSuperNetwork +torch = LazyImport('torch') +torchprofile = LazyImport('torchprofile') +fairseq = LazyImport('fairseq') + warnings.filterwarnings("ignore") def compute_bleu(config, dataset_path, checkpoint_path): + """Measure BLEU score of the Transformer-based model.""" + options = fairseq.options + utils = fairseq.utils + tasks = fairseq.tasks + MosesTokenizer = fairseq.data.encoders.moses_tokenizer.MosesTokenizer + StopwatchMeter = fairseq.meters.StopwatchMeter + progress_bar = fairseq.progress_bar parser = options.get_generation_parser() @@ -137,6 +141,11 @@ def compute_bleu(config, dataset_path, checkpoint_path): def compute_latency(config, dataset_path, batch_size, get_model_parameters=False): + """Measure latency of the Transformer-based model.""" + options = fairseq.options + utils = fairseq.utils + tasks = fairseq.tasks + parser = options.get_generation_parser() args = options.parse_args_and_arch(parser, [dataset_path]) @@ -277,6 +286,11 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False def compute_macs(config, dataset_path): + """Calculate MACs for Transformer-based models.""" + options = fairseq.options + utils = fairseq.utils + tasks = fairseq.tasks + parser = options.get_generation_parser() args = options.parse_args_and_arch(parser,[dataset_path]) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py index 6ccc8b9c4a2..33dc3b125b5 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -220,7 +220,7 @@ def set_sample_config(self, config: dict): layer.set_sample_config(is_identity_layer=True) def forward(self, src_tokens, src_lengths): - """ + """Forward function. Args: src_tokens (LongTensor): tokens in the source language of shape `(batch, src_len)` From d6223a2f11f04b2b6306d6a54b064887e1cd6df0 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 06:26:14 -0800 Subject: [PATCH 44/60] Update requirements Signed-off-by: Maciej Szankin --- test/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/requirements.txt b/test/requirements.txt index 32535567cc6..fd9a37c7328 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -18,6 +18,8 @@ tensorflow-addons onnxruntime-extensions; python_version < '3.10' autograd==1.4 fvcore==0.1.5.post20220119 +fairseq==0.12.2 +torchprofile==0.0.4 ofa==0.1.0.post202203231606 pymoo==0.5.0 intel-extension-for-pytorch From 95b7d39ad524163ac955de7f7463b5885fa37fd3 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 07:18:59 -0800 Subject: [PATCH 45/60] Fix styling check Signed-off-by: Maciej Szankin --- .../experimental/nas/dynast/dynas_manager.py | 14 ++- .../nas/dynast/dynas_predictor.py | 1 + .../experimental/nas/dynast/dynas_utils.py | 26 ++--- .../modules_supernetwork.py | 80 +++++++------- .../transformer_supernetwork.py | 101 +++++++++++------- 5 files changed, 121 insertions(+), 101 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py index f1b7fc15e4d..0e64ccaaf2e 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_manager.py +++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py @@ -284,11 +284,11 @@ def create_training_set( return features_train, features_test, labels_train, labels_test -class TransformerLTEncoding(ParameterManager): - def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0): +class TransformerLTEncoding(ParameterManager): #noqa: D101 + def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0): #noqa: D107 super().__init__(param_dict, verbose, seed) - def onehot_custom(self, subnet_cfg, provide_onehot=True): + def onehot_custom(self, subnet_cfg, provide_onehot=True): #noqa: D102 features = [] #import ipdb;ipdb.set_trace() @@ -399,7 +399,6 @@ def import_csv( df - the output dataframe that contains the original config dict, pymoo, and 1-hot equivalent vector for training. """ - if column_names == None: df = pd.read_csv(filepath) else: @@ -442,11 +441,10 @@ def create_training_set( split: float = 0.33, seed: bool = None, ) -> Tuple[list, list, list, list]: - ''' - Create a sklearn compatible test/train set from an imported results csv - after "import_csv" method is run. - ''' + """Create a sklearn compatible test/train. + The set is created from an imported results csv after "import_csv" method is run. + """ collect_rows = list() for i in range(len(dataframe)): collect_rows.append(np.asarray(dataframe['config_onehot'].iloc[i])) diff --git a/neural_compressor/experimental/nas/dynast/dynas_predictor.py b/neural_compressor/experimental/nas/dynast/dynas_predictor.py index 477e4fcf7ca..15b167bb86d 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_predictor.py +++ b/neural_compressor/experimental/nas/dynast/dynas_predictor.py @@ -70,6 +70,7 @@ def __init__(self, alphas=DEFAULT_ALPHAS, cost_factors=DEFAULT_COST_FACTORS, def train(self, examples, labels): """Train the predictor on the specified examples and labels using the underlying regressor. + Args: examples: Examples to be used for training. labels: Labels to be used for training. diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index e1902a73c93..8387fdd800a 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -336,7 +336,7 @@ def get_subnet( return self.subnet -class TransformerLTRunner(Runner): +class TransformerLTRunner(Runner): #noqa: D101 def __init__( self, @@ -348,7 +348,7 @@ def __init__( batch_size: int, checkpoint_path: str, **kwargs, - ) -> None: + ) -> None: #noqa: D107 self.supernet = supernet self.acc_predictor = acc_predictor self.macs_predictor = macs_predictor @@ -362,28 +362,28 @@ def __init__( def estimate_accuracy_bleu( self, subnet_cfg: dict, - ) -> float: + ) -> float: #noqa: D102 top1 = self.acc_predictor.predict(subnet_cfg) return top1 def estimate_macs( self, subnet_cfg: dict, - ) -> int: + ) -> int: #noqa: D102 macs = self.macs_predictor.predict(subnet_cfg) return macs def estimate_latency( self, subnet_cfg: dict, - ) -> float: + ) -> float: #noqa: D102 latency = self.latency_predictor.predict(subnet_cfg) return latency def validate_bleu( self, subnet_cfg: dict, - ) -> float: # pragma: no cover + ) -> float: #noqa: D102 bleu = compute_bleu(subnet_cfg, self.dataset_path, self.checkpoint_path) @@ -393,13 +393,13 @@ def validate_macs( self, subnet_cfg: dict, ) -> float: - """Measure Torch model's FLOPs/MACs as per FVCore calculation + """Measure Torch model's FLOPs/MACs as per FVCore calculation. + Args: subnet_cfg: sub-network Torch model Returns: `macs` """ - macs = compute_macs(subnet_cfg, self.dataset_path) logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs)) @@ -411,12 +411,12 @@ def measure_latency( subnet_cfg: dict, ) -> Tuple[float, float]: """Measure model's latency. + Args: subnet_cfg: sub-network Torch model Returns: mean latency; std latency """ - latency_mean, latency_std = compute_latency( subnet_cfg, self.dataset_path, self.batch_size) logger.info( @@ -610,7 +610,7 @@ def eval_subnet( return sample, macs, -top1 -class EvaluationInterfaceTransformerLT(EvaluationInterface): +class EvaluationInterfaceTransformerLT(EvaluationInterface): #noqa: D101 def __init__( self, evaluator: Runner, @@ -618,13 +618,13 @@ def __init__( metrics=['acc', 'macs'], predictor_mode=False, csv_path=None, - ) -> None: + ) -> None: #noqa: D107 super().__init__(evaluator, manager, metrics, predictor_mode, csv_path) def eval_subnet( self, x: list, - ) -> Tuple[dict, float, float]: + ) -> Tuple[dict, float, float]: #noqa: D102 # PyMoo vector to Elastic Parameter Mapping param_dict = self.manager.translate2param(x) @@ -678,7 +678,7 @@ def eval_subnet( else: return sample, macs, -bleu - def clear_csv(self) -> None: + def clear_csv(self) -> None: #noqa: D102 if self.csv_path: f = open(self.csv_path, "w") writer = csv.writer(f) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py index 4c750e45c5c..41d0a41fef7 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py @@ -1,3 +1,4 @@ +#noqa: D100 # https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE # # Copyright (c) 2022 Intel Corporation @@ -38,7 +39,7 @@ def _get_full_incremental_state_key(module_instance, key): return '{}.{}.{}'.format(module_name, module_instance._fairseq_instance_id, key) -def get_incremental_state(module, incremental_state, key): +def get_incremental_state(module, incremental_state, key): #noqa: D102 """Helper for getting incremental state for an nn.Module.""" full_key = _get_full_incremental_state_key(module, key) if incremental_state is None or full_key not in incremental_state: @@ -46,15 +47,15 @@ def get_incremental_state(module, incremental_state, key): return incremental_state[full_key] -def set_incremental_state(module, incremental_state, key, value): +def set_incremental_state(module, incremental_state, key, value): #noqa: D102 """Helper for setting incremental state for an nn.Module.""" if incremental_state is not None: full_key = _get_full_incremental_state_key(module, key) incremental_state[full_key] = value -class EmbeddingSuper(nn.Embedding): - def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs): +class EmbeddingSuper(nn.Embedding): #noqa: D101 + def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs): #noqa: D107 super().__init__(num_embeddings, super_embed_dim, padding_idx, *args, **kwargs) # the largest embed dim @@ -68,15 +69,15 @@ def __init__(self, num_embeddings, super_embed_dim, padding_idx, *args, **kwargs self.profiling = False self.reset_parameters() - def profile(self, mode=True): + def profile(self, mode=True): #noqa: D102 self.profiling = mode - def reset_parameters(self): + def reset_parameters(self): #noqa: D102 super().reset_parameters() nn.init.normal_(self.weight, mean=0, std=self.embedding_dim ** -0.5) nn.init.constant_(self.weight[self.padding_idx], 0) - def set_sample_config(self, sample_embed_dim, part): + def set_sample_config(self, sample_embed_dim, part): #noqa: D102 self.sample_embed_dim[part] = sample_embed_dim self._sample_parameters(part) @@ -86,18 +87,18 @@ def _sample_parameters(self, part): return self.samples - def sample_parameters(self, part, resample=False): + def sample_parameters(self, part, resample=False): #noqa: D102 return self._sample_parameters(part) if self.profiling or resample else self.samples - def sampled_weight(self, part): + def sampled_weight(self, part): #noqa: D102 return self.sample_parameters(part)[part]['weight'] - def forward(self, input, part='encoder'): + def forward(self, input, part='encoder'): #noqa: D102 return F.embedding(input, self.sampled_weight(part), self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse) -class LinearSuper(nn.Linear): - def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear'): +class LinearSuper(nn.Linear): #noqa: D101 + def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_linear='linear'): #noqa: D107 super().__init__(super_in_dim, super_out_dim, bias=bias) # super_in_dim and super_out_dim indicate the largest network! @@ -113,10 +114,10 @@ def __init__(self, super_in_dim, super_out_dim, bias=True, uniform_=None, non_li self._reset_parameters(bias, uniform_, non_linear) self.profiling = False - def profile(self, mode=True): + def profile(self, mode=True): #noqa: D102 self.profiling = mode - def sample_parameters(self, resample=False): + def sample_parameters(self, resample=False): #noqa: D102 if self.profiling or resample: return self._sample_parameters() return self.samples @@ -127,7 +128,7 @@ def _reset_parameters(self, bias, uniform_, non_linear): if bias: nn.init.constant_(self.bias, 0.) - def set_sample_config(self, sample_in_dim, sample_out_dim): + def set_sample_config(self, sample_in_dim, sample_out_dim): #noqa: D102 self.sample_in_dim = sample_in_dim self.sample_out_dim = sample_out_dim @@ -141,11 +142,11 @@ def _sample_parameters(self): self.samples['bias'] = sample_bias(self.bias, self.sample_out_dim) return self.samples - def forward(self, x): + def forward(self, x): #noqa: D102 self.sample_parameters() return F.linear(x, self.samples['weight'], self.samples['bias']) - def calc_sampled_param_num(self): + def calc_sampled_param_num(self): #noqa: D102 assert 'weight' in self.samples.keys() weight_numel = self.samples['weight'].numel() @@ -157,20 +158,20 @@ def calc_sampled_param_num(self): return weight_numel + bias_numel -def sample_weight(weight, sample_in_dim, sample_out_dim): +def sample_weight(weight, sample_in_dim, sample_out_dim): #noqa: D103 sample_weight = weight[:, :sample_in_dim] sample_weight = sample_weight[:sample_out_dim, :] return sample_weight -def sample_bias(bias, sample_out_dim): +def sample_bias(bias, sample_out_dim): #noqa: D103 sample_bias = bias[:sample_out_dim] return sample_bias -def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): +def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False): #noqa: D103 if not export and torch.cuda.is_available(): try: from apex.normalization import FusedLayerNorm @@ -180,8 +181,8 @@ def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, export=False) return torch.nn.LayerNorm(normalized_shape, eps, elementwise_affine) -class LayerNormSuper(torch.nn.LayerNorm): - def __init__(self, super_embed_dim): +class LayerNormSuper(torch.nn.LayerNorm): #noqa: D101 + def __init__(self, super_embed_dim): #noqa: D107 super().__init__(super_embed_dim) # the largest embed dim @@ -193,10 +194,10 @@ def __init__(self, super_embed_dim): self.samples = {} self.profiling = False - def profile(self, mode=True): + def profile(self, mode=True): #noqa: D102 self.profiling = mode - def sample_parameters(self, resample=False): + def sample_parameters(self, resample=False): #noqa: D102 if self.profiling or resample: return self._sample_parameters() return self.samples @@ -206,15 +207,15 @@ def _sample_parameters(self): self.samples['bias'] = self.bias[:self.sample_embed_dim] return self.samples - def set_sample_config(self, sample_embed_dim): + def set_sample_config(self, sample_embed_dim): # noqa: D102 self.sample_embed_dim = sample_embed_dim self._sample_parameters() - def forward(self, x): + def forward(self, x): # noqa: D102 self.sample_parameters() return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps) - def calc_sampled_param_num(self): + def calc_sampled_param_num(self): # noqa: D102 assert 'weight' in self.samples.keys() assert 'bias' in self.samples.keys() return self.samples['weight'].numel() + self.samples['bias'].numel() @@ -228,7 +229,7 @@ class MultiheadAttentionSuper(nn.Module): def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, super_vdim=None, dropout=0., bias=True, add_bias_kv=False, add_zero_attn=False, self_attention=False, - encoder_decoder_attention=False, out_dim=None, qkv_dim=None): + encoder_decoder_attention=False, out_dim=None, qkv_dim=None): # noqa: D107 super().__init__() # the configs of super arch @@ -308,7 +309,7 @@ def __init__(self, super_embed_dim, num_heads, is_encoder, super_kdim=None, supe self.enable_torch_version = False self.enable_torch_version = False - def calc_sampled_param_num(self): + def calc_sampled_param_num(self): # noqa: D102 assert self.in_proj_weight is not None and self.in_proj_bias is not None in_proj_q_weight_numel = self.sample_q_embed_dim * self.qkv_dim in_proj_v_weight_numel = in_proj_k_weight_numel = self.sample_kv_embed_dim * self.qkv_dim @@ -320,7 +321,7 @@ def calc_sampled_param_num(self): return in_proj_q_weight_numel + in_proj_k_weight_numel + in_proj_v_weight_numel + in_proj_bias_numel - def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None): + def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_kv_embed_dim=None): # noqa: D102 self.sample_q_embed_dim = sample_q_embed_dim if sample_kv_embed_dim is None: self.sample_kv_embed_dim = sample_q_embed_dim @@ -336,10 +337,10 @@ def set_sample_config(self, sample_q_embed_dim, sample_attention_heads, sample_k self.out_proj.set_sample_config( sample_in_dim=self.qkv_dim, sample_out_dim=self.sample_q_embed_dim) - def prepare_for_onnx_export_(self): + def prepare_for_onnx_export_(self): # noqa: D102 self.onnx_trace = True - def reset_parameters(self): + def reset_parameters(self): # noqa: D102 if self.qkv_same_dim: nn.init.xavier_uniform_(self.in_proj_weight) else: @@ -358,14 +359,13 @@ def reset_parameters(self): def forward(self, query, key, value, key_padding_mask=None, incremental_state=None, need_weights=True, static_kv=False, attn_mask=None): - """Input shape: Time x Batch x Channel + """Input shape: Time x Batch x Channel. Timesteps can be masked by supplying a T x T mask in the `attn_mask` argument. Padding elements can be excluded from the key by passing a binary ByteTensor (`key_padding_mask`) with shape: batch x src_len, where padding elements are indicated by 1s. """ - tgt_len, bsz, embed_dim = query.size() if incremental_state is not None: @@ -528,10 +528,10 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No return attn, attn_weights - def in_proj_qkv(self, query): + def in_proj_qkv(self, query): # noqa: D102 return self._in_proj(query, sample_dim=self.sample_q_embed_dim).chunk(3, dim=-1) - def in_proj_q(self, query): + def in_proj_q(self, query): # noqa: D102 if self.qkv_same_dim: return self._in_proj(query, end=self.qkv_dim, sample_dim=self.sample_q_embed_dim) else: @@ -540,7 +540,7 @@ def in_proj_q(self, query): bias = bias[:self.qkv_dim] return F.linear(query, self.q_proj_weight[..., :self.sample_q_embed_dim], bias) - def in_proj_k(self, key): + def in_proj_k(self, key): # noqa: D102 if self.qkv_same_dim: return self._in_proj(key, start=self.qkv_dim, end=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim) else: @@ -550,7 +550,7 @@ def in_proj_k(self, key): bias = bias[self.qkv_dim:2 * self.qkv_dim] return F.linear(key, weight[..., :self.sample_kv_embed_dim], bias) - def in_proj_v(self, value): + def in_proj_v(self, value): # noqa: D102 if self.qkv_same_dim: return self._in_proj(value, start=2 * self.qkv_dim, sample_dim=self.sample_kv_embed_dim) else: @@ -591,10 +591,10 @@ def _set_input_buffer(self, incremental_state, buffer): buffer, ) - def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz): + def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz): # noqa: D102 return attn_weights - def __repr__(self): + def __repr__(self): # noqa: D105 # We treat the extra repr like the sub-module, one item per line extra_lines = [] extra_repr = self.extra_repr() diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py index 33dc3b125b5..fef5c49ece3 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -1,3 +1,4 @@ +#noqa: D100 # https://github.com/mit-han-lab/hardware-aware-transformers/blob/master/LICENSE # # Copyright (c) 2022 Intel Corporation @@ -34,9 +35,9 @@ class TransformerSuperNetwork(BaseFairseqModel): - """ - Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017) - `_. + """Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)`. + + Args: encoder (TransformerEncoder): the encoder @@ -50,7 +51,7 @@ class TransformerSuperNetwork(BaseFairseqModel): :prog: """ - def __init__(self, task): + def __init__(self, task): #noqa: D107 super().__init__() src_dict, tgt_dict = task.source_dictionary, task.target_dictionary @@ -76,7 +77,7 @@ def __init__(self, task): self.decoder = TransformerDecoder( decoder_config, tgt_dict, decoder_embed_tokens) - def build_embedding(self, dictionary, embed_dim, path=None): + def build_embedding(self, dictionary, embed_dim, path=None): #noqa: D102 num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) @@ -86,12 +87,12 @@ def build_embedding(self, dictionary, embed_dim, path=None): utils.load_embedding(embed_dict, dictionary, emb) return emb - def profile(self, mode=True): + def profile(self, mode=True): #noqa: D102 for module in self.modules(): if hasattr(module, 'profile') and self != module: module.profile(mode) - def get_sampled_params_numel(self, config): + def get_sampled_params_numel(self, config): #noqa: D102 self.set_sample_config(config) numels = [] for name, module in self.named_modules(): @@ -105,21 +106,21 @@ def get_sampled_params_numel(self, config): numels.append(module.calc_sampled_param_num()) return sum(numels) - def set_sample_config(self, config): + def set_sample_config(self, config): #noqa: D102 logger.info('[DyNAS-T] Setting active configuration to {}'.format(config)) self.encoder.set_sample_config(config) self.decoder.set_sample_config(config) - def forward(self,src_tokens,src_lengths,prev_output_token): + def forward(self,src_tokens,src_lengths,prev_output_token): #noqa: D102 encoder_output = self.encoder.forward(src_tokens,src_lengths) output = self.decoder(prev_output_token,encoder_output) return output class TransformerEncoder(FairseqEncoder): - """ - Transformer encoder consisting of *args.encoder_layers* layers. Each layer - is a :class:`TransformerEncoderLayer`. + """Transformer encoder consisting of *args.encoder_layers* layers. + + Each layer is a :class:`TransformerEncoderLayer`. Args: args (argparse.Namespace): parsed command-line arguments @@ -127,7 +128,7 @@ class TransformerEncoder(FairseqEncoder): embed_tokens (torch.nn.Embedding): input embedding """ - def __init__(self, encoder_config, dictionary, embed_tokens): + def __init__(self, encoder_config, dictionary, embed_tokens): #noqa: D107 super().__init__(dictionary) # the configs of super arch self.super_embed_dim = encoder_config['encoder_embed_dim'] @@ -178,7 +179,7 @@ def __init__(self, encoder_config, dictionary, embed_tokens): self.vocab_original_scaling = False - def set_sample_config(self, config: dict): + def set_sample_config(self, config: dict): #noqa: D102 self.sample_embed_dim = config['encoder']['encoder_embed_dim'] @@ -221,6 +222,7 @@ def set_sample_config(self, config: dict): def forward(self, src_tokens, src_lengths): """Forward function. + Args: src_tokens (LongTensor): tokens in the source language of shape `(batch, src_len)` @@ -268,8 +270,7 @@ def forward(self, src_tokens, src_lengths): } def reorder_encoder_out(self, encoder_out, new_order): - """ - Reorder encoder output according to *new_order*. + """Reorder encoder output according to *new_order*. Args: encoder_out: output from the ``forward()`` method @@ -323,9 +324,9 @@ def upgrade_state_dict_named(self, state_dict, name): class TransformerDecoder(FairseqIncrementalDecoder): - """ - Transformer decoder consisting of *args.decoder_layers* layers. Each layer - is a :class:`TransformerDecoderLayer`. + """Transformer decoder consisting of *args.decoder_layers* layers. + + Each layer is a :class:`TransformerDecoderLayer`. Args: args (argparse.Namespace): parsed command-line arguments @@ -335,7 +336,7 @@ class TransformerDecoder(FairseqIncrementalDecoder): (default: False). """ - def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=False): + def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=False): #noqa: D107 super().__init__(dictionary) # the configs of super arch @@ -409,7 +410,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal self.vocab_original_scaling = False - def set_sample_config(self, config: dict): + def set_sample_config(self, config: dict): #noqa: D102 self.sample_embed_dim = config['decoder']['decoder_embed_dim'] self.sample_encoder_embed_dim = config['encoder']['encoder_embed_dim'] @@ -460,7 +461,8 @@ def set_sample_config(self, config: dict): layer.set_sample_config(is_identity_layer=True) def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused): - """ + """Forward pass. + Args: prev_output_tokens (LongTensor): previous decoder outputs of shape `(batch, tgt_len)`, for teacher forcing @@ -480,8 +482,7 @@ def forward(self, prev_output_tokens, encoder_out=None, incremental_state=None, return x, extra def extract_features(self, prev_output_tokens, encoder_out=None, incremental_state=None, **unused): - """ - Similar to *forward* but only return features. + """Similar to *forward* but only return features. Returns: tuple: @@ -594,7 +595,7 @@ def max_positions(self): ipdb.set_trace() return min(self.max_target_positions, self.embed_positions.max_positions()) - def buffered_future_mask(self, tensor): + def buffered_future_mask(self, tensor): #noqa: D102 dim = tensor.size(0) if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device or self._future_mask.size(0) < dim: self._future_mask = torch.triu( @@ -651,7 +652,7 @@ class TransformerEncoderLayer(nn.Module): args (argparse.Namespace): parsed command-line arguments """ - def __init__(self, encoder_config, layer_idx): + def __init__(self, encoder_config, layer_idx): #noqa: D107 super().__init__() # the configs of super arch @@ -692,7 +693,15 @@ def __init__(self, encoder_config, layer_idx): super_out_dim=self.super_embed_dim, uniform_=None, non_linear='linear') self.final_layer_norm = LayerNormSuper(self.super_embed_dim) - def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None): + def set_sample_config( + self, + is_identity_layer, + sample_embed_dim=None, + sample_ffn_embed_dim_this_layer=None, + sample_self_attention_heads_this_layer=None, + sample_dropout=None, + sample_activation_dropout=None, + ): #noqa: D102 if is_identity_layer: self.is_identity_layer = True @@ -722,7 +731,8 @@ def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_ffn sample_embed_dim=self.sample_embed_dim) def upgrade_state_dict_named(self, state_dict, name): - """ + """Renames keys in state dict. + Rename layer norm states from `...layer_norms.0.weight` to `...self_attn_layer_norm.weight` and `...layer_norms.1.weight` to `...final_layer_norm.weight` @@ -741,7 +751,8 @@ def upgrade_state_dict_named(self, state_dict, name): del state_dict[k] def forward(self, x, encoder_padding_mask, attn_mask=None): - """ + """Forward pass. + Args: x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` encoder_padding_mask (ByteTensor): binary ByteTensor of shape @@ -786,7 +797,7 @@ def forward(self, x, encoder_padding_mask, attn_mask=None): x = self.maybe_layer_norm(self.final_layer_norm, x, after=True) return x - def maybe_layer_norm(self, layer_norm, x, before=False, after=False): + def maybe_layer_norm(self, layer_norm, x, before=False, after=False): #noqa: D102 assert before ^ after if after ^ self.normalize_before: return layer_norm(x) @@ -811,7 +822,7 @@ class TransformerDecoderLayer(nn.Module): (default: False). """ - def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False): + def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False): #noqa: D107 super().__init__() # the configs of super arch @@ -883,7 +894,16 @@ def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv self.onnx_trace = False - def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_encoder_embed_dim=None, sample_ffn_embed_dim_this_layer=None, sample_self_attention_heads_this_layer=None, sample_ende_attention_heads_this_layer=None, sample_dropout=None, sample_activation_dropout=None): + def set_sample_config(self, + is_identity_layer, + sample_embed_dim=None, + sample_encoder_embed_dim=None, + sample_ffn_embed_dim_this_layer=None, + sample_self_attention_heads_this_layer=None, + sample_ende_attention_heads_this_layer=None, + sample_dropout=None, + sample_activation_dropout=None, + ): #noqa: D102 if is_identity_layer: self.is_identity_layer = True @@ -918,7 +938,7 @@ def set_sample_config(self, is_identity_layer, sample_embed_dim=None, sample_enc self.final_layer_norm.set_sample_config( sample_embed_dim=self.sample_embed_dim) - def prepare_for_onnx_export_(self): + def prepare_for_onnx_export_(self): #noqa: D102 self.onnx_trace = True def forward( @@ -932,7 +952,8 @@ def forward( self_attn_mask=None, self_attn_padding_mask=None, ): - """ + """Forward pass. + Args: x (Tensor): input to the layer of shape `(seq_len, batch, embed_dim)` encoder_padding_mask (ByteTensor): binary ByteTensor of shape @@ -1005,28 +1026,28 @@ def forward( return x, attn, self_attn_state return x, attn - def maybe_layer_norm(self, layer_norm, x, before=False, after=False): + def maybe_layer_norm(self, layer_norm, x, before=False, after=False): #noqa: D102 assert before ^ after if after ^ self.normalize_before: return layer_norm(x) else: return x - def make_generation_fast_(self, need_attn=False, **kwargs): + def make_generation_fast_(self, need_attn=False, **kwargs): #noqa: D102 self.need_attn = need_attn -def calc_dropout(dropout, sample_embed_dim, super_embed_dim): +def calc_dropout(dropout, sample_embed_dim, super_embed_dim): #noqa: D103 return dropout * 1.0 * sample_embed_dim / super_embed_dim -def Embedding(num_embeddings, embedding_dim, padding_idx): +def Embedding(num_embeddings, embedding_dim, padding_idx): #noqa: D103 return EmbeddingSuper(num_embeddings, embedding_dim, padding_idx=padding_idx) -def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'): +def Linear(in_features, out_features, bias=True, uniform_=None, non_linear='linear'): #noqa: D103 m = nn.Linear(in_features, out_features, bias) - nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_( + nn.init.xavier_uniform_(m.weight) if uniform_ is None else uniform_( #noqa: D103 m.weight, non_linear=non_linear) if bias: nn.init.constant_(m.bias, 0.) From 6f1672207ca65310f6da4f63e4a2650afa4435a7 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 07:47:08 -0800 Subject: [PATCH 46/60] Ignore fairseq in pylint Signed-off-by: Maciej Szankin --- .azure-pipelines/scripts/codeScan/pylint/pylint.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh index b15da8c91b3..938c5ecdc6c 100644 --- a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh +++ b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh @@ -10,13 +10,13 @@ pip install -r /neural-compressor/requirements.txt pip install torch==1.12.0 python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto \ ---ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor \ +--ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,fairseq,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor \ > $log_dir/pylint.json exit_code=$? $BOLD_YELLOW && echo " ----------------- Current pylint cmd start --------------------------" && $RESET -echo "python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto --ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor > $log_dir/pylint.json" +echo "python -m pylint -f json --disable=R,C,W,E1129 --enable=line-too-long --max-line-length=120 --extension-pkg-whitelist=numpy --ignored-classes=TensorProto,NodeProto --ignored-modules=tensorflow,torch,torch.quantization,torch.tensor,torchvision,fairseq,mxnet,onnx,onnxruntime,intel_extension_for_pytorch /neural-compressor/neural_compressor > $log_dir/pylint.json" $BOLD_YELLOW && echo " ----------------- Current pylint cmd end --------------------------" && $RESET $BOLD_YELLOW && echo " ----------------- Current log file output start --------------------------" && $RESET From 2299d9d14023a791d6c1df97a33b21950c4b7450 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 07:54:55 -0800 Subject: [PATCH 47/60] Address styling issues Signed-off-by: Maciej Szankin --- .../modules_supernetwork.py | 18 +++++- .../transformer_interface.py | 8 ++- .../transformer_supernetwork.py | 57 +++++++++++++------ 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py index 41d0a41fef7..6de3afa7698 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py @@ -94,7 +94,15 @@ def sampled_weight(self, part): #noqa: D102 return self.sample_parameters(part)[part]['weight'] def forward(self, input, part='encoder'): #noqa: D102 - return F.embedding(input, self.sampled_weight(part), self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse) + return F.embedding( + input, + self.sampled_weight(part), + self.padding_idx, + self.max_norm, + self.norm_type, + self.scale_grad_by_freq, + self.sparse, + ) class LinearSuper(nn.Linear): #noqa: D101 @@ -213,7 +221,13 @@ def set_sample_config(self, sample_embed_dim): # noqa: D102 def forward(self, x): # noqa: D102 self.sample_parameters() - return F.layer_norm(x, (self.sample_embed_dim,), weight=self.samples['weight'], bias=self.samples['bias'], eps=self.eps) + return F.layer_norm( + x, + (self.sample_embed_dim,), + weight=self.samples['weight'], + bias=self.samples['bias'], + eps=self.eps, + ) def calc_sampled_param_num(self): # noqa: D102 assert 'weight' in self.samples.keys() diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py index 6ba50ddfcc8..0b76b052b21 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_interface.py @@ -238,7 +238,9 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False encoder_latencies = encoder_latencies[int( args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] logger.info( - f'[DyNAS-T] Encoder latency for dataset generation: Mean: {np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms') + f'[DyNAS-T] Encoder latency for dataset generation: Mean: ' + '{np.mean(encoder_latencies)} ms; Std: {np.std(encoder_latencies)} ms' + ) encoder_out_test_with_beam = model.encoder.reorder_encoder_out( encoder_out_test, new_order) @@ -278,7 +280,9 @@ def compute_latency(config, dataset_path, batch_size, get_model_parameters=False args.latiter * 0.1): -max(1, int(args.latiter * 0.1))] logger.info( - f'[DyNAS-T] Decoder latency for dataset generation: Mean: {np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms') + f'[DyNAS-T] Decoder latency for dataset generation: Mean: ' + '{np.mean(decoder_latencies)} ms; \t Std: {np.std(decoder_latencies)} ms' + ) lat_mean = np.mean(encoder_latencies)+np.mean(decoder_latencies) lat_std = np.std(encoder_latencies)+np.std(decoder_latencies) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py index fef5c49ece3..cd2d3e0377e 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -98,9 +98,15 @@ def get_sampled_params_numel(self, config): #noqa: D102 for name, module in self.named_modules(): if hasattr(module, 'calc_sampled_param_num'): # a hacky way to skip the layers that exceed encoder-layer-num or decoder-layer-num - if name.split('.')[0] == 'encoder' and eval(name.split('.')[2]) >= config['encoder']['encoder_layer_num']: + if ( + name.split('.')[0] == 'encoder' + and eval(name.split('.')[2]) >= config['encoder']['encoder_layer_num'] + ): continue - if name.split('.')[0] == 'decoder' and eval(name.split('.')[2]) >= config['decoder']['decoder_layer_num']: + if ( + name.split('.')[0] == 'decoder' + and eval(name.split('.')[2]) >= config['decoder']['decoder_layer_num'] + ): continue numels.append(module.calc_sampled_param_num()) @@ -397,7 +403,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal self.adaptive_softmax = None self.project_out_dim = Linear(self.super_embed_dim, self.output_embed_dim, bias=False) \ - if self.super_embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None + if self.super_embed_dim != self.output_embed_dim else None if not self.share_input_output_embed: self.embed_out = nn.Parameter(torch.Tensor( @@ -533,7 +539,10 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2]], dim=0) elif self.sample_arbitrary_ende_attn[i] == 2: encoder_out_feed = torch.cat( - [encoder_out['encoder_out'], encoder_out['encoder_out_all'][-2], encoder_out['encoder_out_all'][-3]], dim=0) + [encoder_out['encoder_out'], + encoder_out['encoder_out_all'][-2], + encoder_out['encoder_out_all'][-3]], + dim=0) else: raise NotImplementedError( "arbitrary_ende_attn should in [-1, 1, 2]") @@ -548,7 +557,10 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta # concat two more elif self.sample_arbitrary_ende_attn[i] == 2: encoder_padding_mask_feed = torch.cat( - [encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask'], encoder_out['encoder_padding_mask']], dim=1) + [encoder_out['encoder_padding_mask'], + encoder_out['encoder_padding_mask'], + encoder_out['encoder_padding_mask']], + dim=1) else: raise NotImplementedError( "arbitrary_ende_attn should in [-1, 1, 2]") @@ -565,7 +577,7 @@ def extract_features(self, prev_output_tokens, encoder_out=None, incremental_sta attns.append(attn) if self.layer_norm: - x = self.layer_norm(x) + x = self.layer_norm(x) # pylint: disable=not-callable # T x B x C -> B x T x C x = x.transpose(0, 1) @@ -591,16 +603,19 @@ def max_positions(self): """Maximum output length supported by the decoder.""" if self.embed_positions is None: return self.max_target_positions - import ipdb - ipdb.set_trace() return min(self.max_target_positions, self.embed_positions.max_positions()) def buffered_future_mask(self, tensor): #noqa: D102 dim = tensor.size(0) - if not hasattr(self, '_future_mask') or self._future_mask is None or self._future_mask.device != tensor.device or self._future_mask.size(0) < dim: - self._future_mask = torch.triu( + if ( + not hasattr(self, '_future_mask') + or self._future_mask is None # pylint: disable=access-member-before-definition + or self._future_mask.device != tensor.device # pylint: disable=access-member-before-definition + or self._future_mask.size(0) < dim # pylint: disable=access-member-before-definition + ): + self._future_mask = torch.triu( # pylint: disable=access-member-before-definition utils.fill_with_neg_inf(tensor.new(dim, dim)), 1) - return self._future_mask[:dim, :dim] + return self._future_mask[:dim, :dim] # pylint: disable=access-member-before-definition def upgrade_state_dict_named(self, state_dict, name): """Upgrade a (possibly old) state dict for new versions of fairseq.""" @@ -676,8 +691,8 @@ def __init__(self, encoder_config, layer_idx): #noqa: D107 self.qkv_dim = 512 self.self_attn = MultiheadAttentionSuper( - super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer, is_encoder=True, - dropout=0.1, self_attention=True, qkv_dim=self.qkv_dim + super_embed_dim=self.super_embed_dim, num_heads=self.super_self_attention_heads_this_layer, + is_encoder=True, dropout=0.1, self_attention=True, qkv_dim=self.qkv_dim, ) self.self_attn_layer_norm = LayerNormSuper(self.super_embed_dim) @@ -822,7 +837,14 @@ class TransformerDecoderLayer(nn.Module): (default: False). """ - def __init__(self, decoder_config, layer_idx, no_encoder_attn=False, add_bias_kv=False, add_zero_attn=False): #noqa: D107 + def __init__( + self, + decoder_config, + layer_idx, + no_encoder_attn=False, + add_bias_kv=False, + add_zero_attn=False, + ): #noqa: D107 super().__init__() # the configs of super arch @@ -927,8 +949,11 @@ def set_sample_config(self, self.self_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_attention_heads=self.sample_self_attention_heads_this_layer) - self.encoder_attn.set_sample_config(sample_q_embed_dim=self.sample_embed_dim, sample_kv_embed_dim=self.sample_encoder_embed_dim, - sample_attention_heads=self.sample_ende_attention_heads_this_layer) + self.encoder_attn.set_sample_config( + sample_q_embed_dim=self.sample_embed_dim, + sample_kv_embed_dim=self.sample_encoder_embed_dim, + sample_attention_heads=self.sample_ende_attention_heads_this_layer, + ) self.fc1.set_sample_config( sample_in_dim=self.sample_embed_dim, sample_out_dim=self.sample_ffn_embed_dim_this_layer) From 7e46cadab43b262e43eed2f2a39b7af98974414f Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 11:45:37 -0800 Subject: [PATCH 48/60] Pin torch version Signed-off-by: Maciej Szankin --- test/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/requirements.txt b/test/requirements.txt index fd9a37c7328..a17c2623b5f 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -2,7 +2,7 @@ intel-tensorflow onnx onnxruntime --find-links https://download.pytorch.org/whl/torch_stable.html -torch +torch==1.12.1 torchvision mxnet-mkl numpy From d8226113a7db129d92fe3f14739bb40aa646354f Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 12:47:43 -0800 Subject: [PATCH 49/60] LazyImport fairseq Signed-off-by: Maciej Szankin --- .../modules_supernetwork.py | 7 +++-- .../transformer_supernetwork.py | 31 ++++++++++++------- test/requirements.txt | 4 +-- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py index 6de3afa7698..1a5c9739372 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/modules_supernetwork.py @@ -20,10 +20,13 @@ import torch import torch.nn as nn import torch.nn.functional as F -from fairseq import utils from torch.nn import Parameter from torch.nn.modules.module import _addindent +from neural_compressor.utils.utility import LazyImport + +fairseq = LazyImport("fairseq") + INCREMENTAL_STATE_INSTANCE_ID = defaultdict(lambda: 0) @@ -511,7 +514,7 @@ def forward(self, query, key, value, key_padding_mask=None, incremental_state=No attn_weights = attn_weights.view( bsz * self.num_heads, tgt_len, src_len) - attn_weights = utils.softmax( + attn_weights = fairseq.utils.softmax( attn_weights, dim=-1, onnx_trace=self.onnx_trace, ).type_as(attn_weights) attn_weights = F.dropout( diff --git a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py index cd2d3e0377e..1034c6519a8 100644 --- a/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py +++ b/neural_compressor/experimental/nas/dynast/supernetwork/machine_translation/transformer_supernetwork.py @@ -19,22 +19,21 @@ import torch import torch.nn.functional as F -from fairseq import utils -from fairseq.models import (BaseFairseqModel, FairseqEncoder, - FairseqIncrementalDecoder) -from fairseq.modules import PositionalEmbedding, SinusoidalPositionalEmbedding from torch import nn from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport from .modules_supernetwork import (EmbeddingSuper, LayerNormSuper, LinearSuper, MultiheadAttentionSuper) +fairseq = LazyImport("fairseq") + DEFAULT_MAX_SOURCE_POSITIONS = 1024 DEFAULT_MAX_TARGET_POSITIONS = 1024 -class TransformerSuperNetwork(BaseFairseqModel): +class TransformerSuperNetwork(fairseq.models.BaseFairseqModel): """Transformer model from `"Attention Is All You Need" (Vaswani, et al, 2017)`. @@ -78,6 +77,8 @@ def __init__(self, task): #noqa: D107 decoder_config, tgt_dict, decoder_embed_tokens) def build_embedding(self, dictionary, embed_dim, path=None): #noqa: D102 + utils = fairseq.utils + num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) @@ -123,7 +124,7 @@ def forward(self,src_tokens,src_lengths,prev_output_token): #noqa: D102 return output -class TransformerEncoder(FairseqEncoder): +class TransformerEncoder(fairseq.models.FairseqEncoder): """Transformer encoder consisting of *args.encoder_layers* layers. Each layer is a :class:`TransformerEncoderLayer`. @@ -167,7 +168,7 @@ def __init__(self, encoder_config, dictionary, embed_tokens): #noqa: D107 self.embed_tokens = embed_tokens - self.embed_positions = PositionalEmbedding( + self.embed_positions = fairseq.modules.PositionalEmbedding( self.max_source_positions, self.super_embed_dim, self.padding_idx, learned=False, ) @@ -309,7 +310,8 @@ def max_positions(self): def upgrade_state_dict_named(self, state_dict, name): """Upgrade a (possibly old) state dict for new versions of fairseq.""" - if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + utils = fairseq.utils + if isinstance(self.embed_positions, fairseq.modules.SinusoidalPositionalEmbedding): weights_key = '{}.embed_positions.weights'.format(name) if weights_key in state_dict: del state_dict[weights_key] @@ -329,7 +331,7 @@ def upgrade_state_dict_named(self, state_dict, name): return state_dict -class TransformerDecoder(FairseqIncrementalDecoder): +class TransformerDecoder(fairseq.models.FairseqIncrementalDecoder): """Transformer decoder consisting of *args.decoder_layers* layers. Each layer is a :class:`TransformerDecoderLayer`. @@ -388,7 +390,7 @@ def __init__(self, decoder_config, dictionary, embed_tokens, no_encoder_attn=Fal self.embed_tokens = embed_tokens - self.embed_positions = PositionalEmbedding( + self.embed_positions = fairseq.modules.PositionalEmbedding( self.max_target_positions, self.super_embed_dim, padding_idx, learned=False, ) if not False else None @@ -606,6 +608,8 @@ def max_positions(self): return min(self.max_target_positions, self.embed_positions.max_positions()) def buffered_future_mask(self, tensor): #noqa: D102 + utils = fairseq.utils + dim = tensor.size(0) if ( not hasattr(self, '_future_mask') @@ -619,7 +623,8 @@ def buffered_future_mask(self, tensor): #noqa: D102 def upgrade_state_dict_named(self, state_dict, name): """Upgrade a (possibly old) state dict for new versions of fairseq.""" - if isinstance(self.embed_positions, SinusoidalPositionalEmbedding): + utils = fairseq.utils + if isinstance(self.embed_positions, fairseq.modules.SinusoidalPositionalEmbedding): weights_key = '{}.embed_positions.weights'.format(name) if weights_key in state_dict: del state_dict[weights_key] @@ -670,6 +675,8 @@ class TransformerEncoderLayer(nn.Module): def __init__(self, encoder_config, layer_idx): #noqa: D107 super().__init__() + utils = fairseq.utils + # the configs of super arch self.super_embed_dim = encoder_config['encoder_embed_dim'] self.super_ffn_embed_dim_this_layer = encoder_config['encoder_ffn_embed_dim'] @@ -847,6 +854,8 @@ def __init__( ): #noqa: D107 super().__init__() + utils = fairseq.utils + # the configs of super arch self.super_embed_dim = decoder_config['decoder_embed_dim'] self.super_encoder_embed_dim = decoder_config['decoder_embed_dim'] diff --git a/test/requirements.txt b/test/requirements.txt index a17c2623b5f..32535567cc6 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -2,7 +2,7 @@ intel-tensorflow onnx onnxruntime --find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.12.1 +torch torchvision mxnet-mkl numpy @@ -18,8 +18,6 @@ tensorflow-addons onnxruntime-extensions; python_version < '3.10' autograd==1.4 fvcore==0.1.5.post20220119 -fairseq==0.12.2 -torchprofile==0.0.4 ofa==0.1.0.post202203231606 pymoo==0.5.0 intel-extension-for-pytorch From 79a47586b46b136366329476048e2f09fcc1aa84 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 14:41:48 -0800 Subject: [PATCH 50/60] Remove reference to DyNAS from UTs Signed-off-by: Maciej Szankin --- test/nas/test_nas.py | 68 -------------------------------------------- 1 file changed, 68 deletions(-) diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index 4d22673d578..47dfaf19c6b 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -9,7 +9,6 @@ from neural_compressor.data import DATASETS from neural_compressor.experimental import common, NAS from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.experimental.nas.dynas import DyNAS def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']): fake_yaml = """ @@ -58,41 +57,6 @@ def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']): with open('fake.yaml', 'w', encoding="utf-8") as f: f.write(fake_yaml) -def build_dynas_fake_yaml(): - fake_yaml = """ - model: - name: imagenet_nas - framework: pytorch - - nas: - approach: dynas - search: - search_algorithm: nsga2 - dynas: - supernet: ofa_resnet50 - metrics: ['acc', 'macs'] - results_csv_path: './search_results.csv' - """ - with open('dynas_fake.yaml', 'w', encoding="utf-8") as f: - f.write(fake_yaml) - -def build_dynas_results_csv(): - results_csv = """ -Sub-network,Date,Latency (ms), MACs,Top-1 Acc (%) -"{'wid': None, 'ks': [7, 7, 3, 3, 5, 7, 7, 3, 5, 5, 3, 3, 7, 3, 5, 5, 5, 7, 5, 7], 'e': [3, 4, 4, 4, 4, 6, 6, 4, 4, 3, 4, 4, 3, 6, 4, 3, 4, 6, 3, 3], 'd': [2, 4, 4, 2, 3], 'r': [224]}",2022-07-07 03:13:06.306540,39,391813792,77.416 -"{'wid': None, 'ks': [3, 5, 5, 7, 5, 5, 3, 3, 7, 7, 7, 5, 7, 3, 7, 5, 3, 5, 3, 3], 'e': [4, 6, 3, 4, 4, 4, 4, 6, 3, 6, 4, 3, 4, 3, 4, 3, 6, 4, 4, 6], 'd': [4, 3, 3, 2, 3], 'r': [224]}",2022-07-07 03:14:50.398553,41,412962768,77.234 -"{'wid': None, 'ks': [5, 5, 5, 3, 7, 5, 7, 5, 7, 3, 3, 7, 7, 5, 7, 3, 5, 5, 7, 3], 'e': [6, 4, 3, 3, 3, 3, 4, 4, 3, 4, 3, 6, 4, 4, 3, 6, 4, 3, 4, 6], 'd': [4, 4, 4, 2, 4], 'r': [224]}",2022-07-07 03:16:53.105436,44,444295456,77.632 -"{'wid': None, 'ks': [3, 5, 3, 7, 3, 5, 7, 5, 3, 3, 3, 7, 3, 5, 3, 5, 3, 3, 7, 3], 'e': [4, 6, 3, 3, 6, 3, 3, 6, 6, 4, 4, 6, 3, 4, 3, 6, 3, 6, 3, 4], 'd': [4, 4, 2, 2, 4], 'r': [224]}",2022-07-07 03:18:47.301137,41,410969240,76.79 -"{'wid': None, 'ks': [3, 3, 3, 3, 7, 5, 3, 5, 3, 5, 5, 7, 7, 7, 3, 5, 7, 5, 3, 7], 'e': [3, 6, 6, 4, 6, 3, 3, 4, 3, 6, 3, 4, 4, 6, 3, 6, 4, 3, 6, 3], 'd': [2, 3, 4, 4, 2], 'r': [224]}",2022-07-07 03:20:35.391443,40,405868672,77.338 -"{'wid': None, 'ks': [3, 3, 3, 7, 5, 7, 7, 3, 3, 3, 3, 5, 7, 3, 7, 5, 3, 7, 5, 5], 'e': [4, 6, 3, 6, 4, 3, 3, 6, 3, 6, 4, 6, 4, 4, 3, 6, 4, 3, 4, 4], 'd': [3, 4, 4, 2, 2], 'r': [224]}",2022-07-07 03:22:14.504855,37,370501152,76.448 -"{'wid': None, 'ks': [7, 5, 3, 5, 7, 5, 3, 3, 5, 3, 3, 7, 7, 3, 5, 3, 3, 5, 5, 7], 'e': [3, 3, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 3, 6, 3, 3, 3, 4], 'd': [4, 4, 3, 4, 2], 'r': [224]}",2022-07-07 03:24:12.500905,48,482299704,77.7 -"{'wid': None, 'ks': [7, 3, 5, 7, 5, 5, 7, 5, 3, 3, 3, 5, 5, 3, 7, 5, 5, 7, 3, 7], 'e': [3, 6, 4, 6, 6, 3, 3, 3, 6, 3, 6, 4, 4, 6, 4, 4, 4, 4, 6, 6], 'd': [4, 4, 2, 2, 2], 'r': [224]}",2022-07-07 03:25:50.198665,42,423721952,76.506 -"{'wid': None, 'ks': [7, 7, 3, 7, 5, 7, 5, 5, 5, 3, 5, 3, 3, 7, 3, 5, 3, 7, 7, 3], 'e': [3, 3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 6, 6, 4, 3, 3, 3, 6, 3, 4], 'd': [4, 2, 2, 3, 3], 'r': [224]}",2022-07-07 03:27:26.901886,37,373770104,77.258 -"{'wid': None, 'ks': [3, 7, 5, 5, 7, 3, 5, 3, 5, 5, 5, 3, 5, 5, 3, 5, 7, 3, 7, 5], 'e': [3, 4, 6, 6, 4, 3, 6, 6, 6, 3, 3, 3, 3, 6, 3, 6, 6, 3, 6, 3], 'd': [3, 2, 3, 2, 3], 'r': [224]}",2022-07-07 03:29:00.989578,36,369186480,77.096 -"{'wid': None, 'ks': [7, 7, 5, 5, 7, 5, 3, 3, 3, 5, 7, 3, 7, 7, 5, 5, 3, 7, 3, 7], 'e': [6, 3, 6, 3, 4, 3, 3, 3, 4, 3, 6, 4, 3, 3, 6, 4, 4, 3, 4, 3], 'd': [4, 4, 3, 4, 4], 'r': [224]}",2022-07-07 03:31:07.608402,51,518341312,78.104 - """ - with open('search_results.csv', 'w', encoding="utf-8") as f: - f.write(results_csv) def model_builder(model_arch_params): channels = model_arch_params['channels'] @@ -123,14 +87,10 @@ class TestNAS(unittest.TestCase): @classmethod def setUpClass(cls): build_fake_yaml() - build_dynas_fake_yaml() - build_dynas_results_csv() @classmethod def tearDownClass(cls): os.remove('fake.yaml') - os.remove('dynas_fake.yaml') - os.remove('search_results.csv') shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True) shutil.rmtree('runs', ignore_errors=True) @@ -184,34 +144,6 @@ def eval_func(model): best_model_archs = nas_agent() self.assertTrue(len(best_model_archs) > 0) - def test_dynas(self): - nas_agent = NAS('dynas_fake.yaml') - for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]: - config = NASConfig(approach='dynas', search_algorithm=search_algorithm) - config.dynas.supernet = supernet - config.seed = 42 - config.dynas.metrics = ['acc', 'macs', 'lat'] - config.dynas.population = 10 - config.dynas.num_evals = 10 - config.dynas.results_csv_path = 'search_results.csv' - config.dynas.batch_size = 64 - nas_agent = NAS(config) - best_model_archs = nas_agent.search() - nas_agent.acc_predictor.get_parameters() - nas_agent.acc_predictor.save('tmp.pickle') - nas_agent.acc_predictor.load('tmp.pickle') - samples = nas_agent.supernet_manager.random_samples(10) - subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0]) - nas_agent.runner_validate.validate_macs(subnet_cfg) - nas_agent.runner_validate.measure_latency(subnet_cfg) - nas_agent.validation_interface.clear_csv() - os.remove('tmp.pickle') - from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference - reference = TorchVisionReference('ofa_resnet50_ofa_mbv3', dataset_path=None, batch_size=1) - reference.validate_macs() - reference.measure_latency() - self.assertTrue(len(best_model_archs) > 0) - if __name__ == "__main__": unittest.main() From 07943de43b016aee945215803191e0250b317b71 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 15:46:39 -0800 Subject: [PATCH 51/60] Revert "Remove reference to DyNAS from UTs" This reverts commit 79a47586b46b136366329476048e2f09fcc1aa84. --- test/nas/test_nas.py | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index 47dfaf19c6b..4d22673d578 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -9,6 +9,7 @@ from neural_compressor.data import DATASETS from neural_compressor.experimental import common, NAS from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader +from neural_compressor.experimental.nas.dynas import DyNAS def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']): fake_yaml = """ @@ -57,6 +58,41 @@ def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']): with open('fake.yaml', 'w', encoding="utf-8") as f: f.write(fake_yaml) +def build_dynas_fake_yaml(): + fake_yaml = """ + model: + name: imagenet_nas + framework: pytorch + + nas: + approach: dynas + search: + search_algorithm: nsga2 + dynas: + supernet: ofa_resnet50 + metrics: ['acc', 'macs'] + results_csv_path: './search_results.csv' + """ + with open('dynas_fake.yaml', 'w', encoding="utf-8") as f: + f.write(fake_yaml) + +def build_dynas_results_csv(): + results_csv = """ +Sub-network,Date,Latency (ms), MACs,Top-1 Acc (%) +"{'wid': None, 'ks': [7, 7, 3, 3, 5, 7, 7, 3, 5, 5, 3, 3, 7, 3, 5, 5, 5, 7, 5, 7], 'e': [3, 4, 4, 4, 4, 6, 6, 4, 4, 3, 4, 4, 3, 6, 4, 3, 4, 6, 3, 3], 'd': [2, 4, 4, 2, 3], 'r': [224]}",2022-07-07 03:13:06.306540,39,391813792,77.416 +"{'wid': None, 'ks': [3, 5, 5, 7, 5, 5, 3, 3, 7, 7, 7, 5, 7, 3, 7, 5, 3, 5, 3, 3], 'e': [4, 6, 3, 4, 4, 4, 4, 6, 3, 6, 4, 3, 4, 3, 4, 3, 6, 4, 4, 6], 'd': [4, 3, 3, 2, 3], 'r': [224]}",2022-07-07 03:14:50.398553,41,412962768,77.234 +"{'wid': None, 'ks': [5, 5, 5, 3, 7, 5, 7, 5, 7, 3, 3, 7, 7, 5, 7, 3, 5, 5, 7, 3], 'e': [6, 4, 3, 3, 3, 3, 4, 4, 3, 4, 3, 6, 4, 4, 3, 6, 4, 3, 4, 6], 'd': [4, 4, 4, 2, 4], 'r': [224]}",2022-07-07 03:16:53.105436,44,444295456,77.632 +"{'wid': None, 'ks': [3, 5, 3, 7, 3, 5, 7, 5, 3, 3, 3, 7, 3, 5, 3, 5, 3, 3, 7, 3], 'e': [4, 6, 3, 3, 6, 3, 3, 6, 6, 4, 4, 6, 3, 4, 3, 6, 3, 6, 3, 4], 'd': [4, 4, 2, 2, 4], 'r': [224]}",2022-07-07 03:18:47.301137,41,410969240,76.79 +"{'wid': None, 'ks': [3, 3, 3, 3, 7, 5, 3, 5, 3, 5, 5, 7, 7, 7, 3, 5, 7, 5, 3, 7], 'e': [3, 6, 6, 4, 6, 3, 3, 4, 3, 6, 3, 4, 4, 6, 3, 6, 4, 3, 6, 3], 'd': [2, 3, 4, 4, 2], 'r': [224]}",2022-07-07 03:20:35.391443,40,405868672,77.338 +"{'wid': None, 'ks': [3, 3, 3, 7, 5, 7, 7, 3, 3, 3, 3, 5, 7, 3, 7, 5, 3, 7, 5, 5], 'e': [4, 6, 3, 6, 4, 3, 3, 6, 3, 6, 4, 6, 4, 4, 3, 6, 4, 3, 4, 4], 'd': [3, 4, 4, 2, 2], 'r': [224]}",2022-07-07 03:22:14.504855,37,370501152,76.448 +"{'wid': None, 'ks': [7, 5, 3, 5, 7, 5, 3, 3, 5, 3, 3, 7, 7, 3, 5, 3, 3, 5, 5, 7], 'e': [3, 3, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 4, 3, 6, 3, 3, 3, 4], 'd': [4, 4, 3, 4, 2], 'r': [224]}",2022-07-07 03:24:12.500905,48,482299704,77.7 +"{'wid': None, 'ks': [7, 3, 5, 7, 5, 5, 7, 5, 3, 3, 3, 5, 5, 3, 7, 5, 5, 7, 3, 7], 'e': [3, 6, 4, 6, 6, 3, 3, 3, 6, 3, 6, 4, 4, 6, 4, 4, 4, 4, 6, 6], 'd': [4, 4, 2, 2, 2], 'r': [224]}",2022-07-07 03:25:50.198665,42,423721952,76.506 +"{'wid': None, 'ks': [7, 7, 3, 7, 5, 7, 5, 5, 5, 3, 5, 3, 3, 7, 3, 5, 3, 7, 7, 3], 'e': [3, 3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 6, 6, 4, 3, 3, 3, 6, 3, 4], 'd': [4, 2, 2, 3, 3], 'r': [224]}",2022-07-07 03:27:26.901886,37,373770104,77.258 +"{'wid': None, 'ks': [3, 7, 5, 5, 7, 3, 5, 3, 5, 5, 5, 3, 5, 5, 3, 5, 7, 3, 7, 5], 'e': [3, 4, 6, 6, 4, 3, 6, 6, 6, 3, 3, 3, 3, 6, 3, 6, 6, 3, 6, 3], 'd': [3, 2, 3, 2, 3], 'r': [224]}",2022-07-07 03:29:00.989578,36,369186480,77.096 +"{'wid': None, 'ks': [7, 7, 5, 5, 7, 5, 3, 3, 3, 5, 7, 3, 7, 7, 5, 5, 3, 7, 3, 7], 'e': [6, 3, 6, 3, 4, 3, 3, 3, 4, 3, 6, 4, 3, 3, 6, 4, 4, 3, 4, 3], 'd': [4, 4, 3, 4, 4], 'r': [224]}",2022-07-07 03:31:07.608402,51,518341312,78.104 + """ + with open('search_results.csv', 'w', encoding="utf-8") as f: + f.write(results_csv) def model_builder(model_arch_params): channels = model_arch_params['channels'] @@ -87,10 +123,14 @@ class TestNAS(unittest.TestCase): @classmethod def setUpClass(cls): build_fake_yaml() + build_dynas_fake_yaml() + build_dynas_results_csv() @classmethod def tearDownClass(cls): os.remove('fake.yaml') + os.remove('dynas_fake.yaml') + os.remove('search_results.csv') shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True) shutil.rmtree('runs', ignore_errors=True) @@ -144,6 +184,34 @@ def eval_func(model): best_model_archs = nas_agent() self.assertTrue(len(best_model_archs) > 0) + def test_dynas(self): + nas_agent = NAS('dynas_fake.yaml') + for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]: + config = NASConfig(approach='dynas', search_algorithm=search_algorithm) + config.dynas.supernet = supernet + config.seed = 42 + config.dynas.metrics = ['acc', 'macs', 'lat'] + config.dynas.population = 10 + config.dynas.num_evals = 10 + config.dynas.results_csv_path = 'search_results.csv' + config.dynas.batch_size = 64 + nas_agent = NAS(config) + best_model_archs = nas_agent.search() + nas_agent.acc_predictor.get_parameters() + nas_agent.acc_predictor.save('tmp.pickle') + nas_agent.acc_predictor.load('tmp.pickle') + samples = nas_agent.supernet_manager.random_samples(10) + subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0]) + nas_agent.runner_validate.validate_macs(subnet_cfg) + nas_agent.runner_validate.measure_latency(subnet_cfg) + nas_agent.validation_interface.clear_csv() + os.remove('tmp.pickle') + from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference + reference = TorchVisionReference('ofa_resnet50_ofa_mbv3', dataset_path=None, batch_size=1) + reference.validate_macs() + reference.measure_latency() + self.assertTrue(len(best_model_archs) > 0) + if __name__ == "__main__": unittest.main() From e3e67e28ca4b5a274b0b4758d647efd300f1fbdf Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 15:53:57 -0800 Subject: [PATCH 52/60] LazyLoad transformer_interface Signed-off-by: Maciej Szankin --- .../experimental/nas/dynast/dynas_utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 8387fdd800a..4a771b41357 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -30,8 +30,8 @@ from neural_compressor.experimental.nas.dynast.dynas_manager import \ ParameterManager from neural_compressor.experimental.nas.dynast.dynas_predictor import Predictor -from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import ( - compute_bleu, compute_latency, compute_macs) +# from neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface import ( +# compute_bleu, compute_latency, compute_macs) from neural_compressor.utils.utility import LazyImport, logger from ofa.imagenet_classification.data_providers.imagenet import \ ImagenetDataProvider @@ -41,6 +41,7 @@ torch = LazyImport('torch') torchvision = LazyImport('torchvision') +transformer_interface = LazyImport('neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface') def get_macs( @@ -385,7 +386,7 @@ def validate_bleu( subnet_cfg: dict, ) -> float: #noqa: D102 - bleu = compute_bleu(subnet_cfg, self.dataset_path, + bleu = transformer_interface.compute_bleu(subnet_cfg, self.dataset_path, self.checkpoint_path) return bleu @@ -400,7 +401,7 @@ def validate_macs( Returns: `macs` """ - macs = compute_macs(subnet_cfg, self.dataset_path) + macs = transformer_interface.compute_macs(subnet_cfg, self.dataset_path) logger.info('[DyNAS-T] Model\'s macs: {}'.format(macs)) return macs @@ -417,7 +418,7 @@ def measure_latency( Returns: mean latency; std latency """ - latency_mean, latency_std = compute_latency( + latency_mean, latency_std = transformer_interface.compute_latency( subnet_cfg, self.dataset_path, self.batch_size) logger.info( '[DyNAS-T] Model\'s latency: {} +/- {}'.format(latency_mean, latency_std)) From eb5b063c74de774cfdddbb60f94ee47e3bc208e9 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 16:05:57 -0800 Subject: [PATCH 53/60] Removed commented code Signed-off-by: Maciej Szankin --- .../experimental/nas/dynast/dynas_manager.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_manager.py b/neural_compressor/experimental/nas/dynast/dynas_manager.py index 0e64ccaaf2e..4ba28d087be 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_manager.py +++ b/neural_compressor/experimental/nas/dynast/dynas_manager.py @@ -291,12 +291,9 @@ def __init__(self, param_dict: dict, verbose: bool = False, seed: int = 0): #no def onehot_custom(self, subnet_cfg, provide_onehot=True): #noqa: D102 features = [] - #import ipdb;ipdb.set_trace() features.extend(subnet_cfg['encoder_embed_dim']) - #encoder_layer_num = subnet_cfg['encoder_layer_num'] - encode_layer_num_int = 6 # encoder_layer_num[0] - # features.extend(encoder_layer_num) + encode_layer_num_int = 6 # Encoder FFN Embed Dim encoder_ffn_embed_dim = subnet_cfg['encoder_ffn_embed_dim'] @@ -360,10 +357,6 @@ def onehot_custom(self, subnet_cfg, provide_onehot=True): #noqa: D102 one_hot_count = 0 unique_values = self.unique_values - # uncomment - # with open(self.onehot_unique,'rb') as f: - # load_unique_values = pickle.load(f) - # unique_values = load_unique_values.tolist() for unique in unique_values: one_hot_count += len(unique.tolist()) @@ -380,8 +373,6 @@ def onehot_custom(self, subnet_cfg, provide_onehot=True): #noqa: D102 else: return features - # return np.array(ks_onehot + ex_onehot) - def import_csv( self, filepath: str, @@ -426,7 +417,6 @@ def import_csv( config_as_onehot = self.onehot_custom( config_as_dict, provide_onehot=False) convert_to_onehot.append(config_as_onehot) - #import ipdb;ipdb.set_trace() df[config] = convert_to_dict df['config_pymoo'] = convert_to_pymoo df['config_onehot'] = convert_to_onehot From 8f1e03ef0317d3442f510ede3a00f698fede58a4 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Fri, 9 Dec 2022 16:07:10 -0800 Subject: [PATCH 54/60] Shorten import line Signed-off-by: Maciej Szankin --- neural_compressor/experimental/nas/dynast/dynas_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/neural_compressor/experimental/nas/dynast/dynas_utils.py b/neural_compressor/experimental/nas/dynast/dynas_utils.py index 4a771b41357..9c9e4b1f4ce 100644 --- a/neural_compressor/experimental/nas/dynast/dynas_utils.py +++ b/neural_compressor/experimental/nas/dynast/dynas_utils.py @@ -41,7 +41,9 @@ torch = LazyImport('torch') torchvision = LazyImport('torchvision') -transformer_interface = LazyImport('neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface') +transformer_interface = LazyImport( + 'neural_compressor.experimental.nas.dynast.supernetwork.machine_translation.transformer_interface' +) def get_macs( From 6951d7ad91c9a9c3be97a9d964d62388c85b58d3 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Sat, 10 Dec 2022 12:49:39 -0800 Subject: [PATCH 55/60] Remove cached torch files when running tests Signed-off-by: Maciej Szankin --- test/nas/test_nas.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index 4d22673d578..4e1c87db185 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -126,6 +126,12 @@ def setUpClass(cls): build_dynas_fake_yaml() build_dynas_results_csv() + try: + shutil.rmtree('.torch/') + except: + pass + + @classmethod def tearDownClass(cls): os.remove('fake.yaml') From e9f872a132a16b2979f5bc9061dc639ba031f2cc Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Sat, 10 Dec 2022 13:30:46 -0800 Subject: [PATCH 56/60] Split NAS tests Signed-off-by: Maciej Szankin --- test/nas/test_nas.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index 4e1c87db185..be6c6405ff0 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -203,6 +203,8 @@ def test_dynas(self): config.dynas.batch_size = 64 nas_agent = NAS(config) best_model_archs = nas_agent.search() + self.assertTrue(len(best_model_archs) > 0) + nas_agent.acc_predictor.get_parameters() nas_agent.acc_predictor.save('tmp.pickle') nas_agent.acc_predictor.load('tmp.pickle') @@ -212,11 +214,18 @@ def test_dynas(self): nas_agent.runner_validate.measure_latency(subnet_cfg) nas_agent.validation_interface.clear_csv() os.remove('tmp.pickle') + + def test_vision_reference(self): from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference - reference = TorchVisionReference('ofa_resnet50_ofa_mbv3', dataset_path=None, batch_size=1) - reference.validate_macs() - reference.measure_latency() - self.assertTrue(len(best_model_archs) > 0) + reference = TorchVisionReference('ofa_mbv3', dataset_path=None, batch_size=1) + macs = reference.validate_macs() + + self.assertEqual(macs, 217234208) + + reference.measure_latency( + warmup_steps=1, + measure_steps=1, + ) if __name__ == "__main__": From ce29fe42ee4bdf90f0d87c148c89a430861ad81e Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Sat, 10 Dec 2022 13:34:37 -0800 Subject: [PATCH 57/60] Catch network timeout in tests Signed-off-by: Maciej Szankin --- test/nas/test_nas.py | 49 +++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index be6c6405ff0..d8365af2827 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -191,29 +191,32 @@ def eval_func(model): self.assertTrue(len(best_model_archs) > 0) def test_dynas(self): - nas_agent = NAS('dynas_fake.yaml') - for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]: - config = NASConfig(approach='dynas', search_algorithm=search_algorithm) - config.dynas.supernet = supernet - config.seed = 42 - config.dynas.metrics = ['acc', 'macs', 'lat'] - config.dynas.population = 10 - config.dynas.num_evals = 10 - config.dynas.results_csv_path = 'search_results.csv' - config.dynas.batch_size = 64 - nas_agent = NAS(config) - best_model_archs = nas_agent.search() - self.assertTrue(len(best_model_archs) > 0) - - nas_agent.acc_predictor.get_parameters() - nas_agent.acc_predictor.save('tmp.pickle') - nas_agent.acc_predictor.load('tmp.pickle') - samples = nas_agent.supernet_manager.random_samples(10) - subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0]) - nas_agent.runner_validate.validate_macs(subnet_cfg) - nas_agent.runner_validate.measure_latency(subnet_cfg) - nas_agent.validation_interface.clear_csv() - os.remove('tmp.pickle') + try: + nas_agent = NAS('dynas_fake.yaml') + for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]: + config = NASConfig(approach='dynas', search_algorithm=search_algorithm) + config.dynas.supernet = supernet + config.seed = 42 + config.dynas.metrics = ['acc', 'macs', 'lat'] + config.dynas.population = 10 + config.dynas.num_evals = 10 + config.dynas.results_csv_path = 'search_results.csv' + config.dynas.batch_size = 64 + nas_agent = NAS(config) + best_model_archs = nas_agent.search() + self.assertTrue(len(best_model_archs) > 0) + + nas_agent.acc_predictor.get_parameters() + nas_agent.acc_predictor.save('tmp.pickle') + nas_agent.acc_predictor.load('tmp.pickle') + samples = nas_agent.supernet_manager.random_samples(10) + subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0]) + nas_agent.runner_validate.validate_macs(subnet_cfg) + nas_agent.runner_validate.measure_latency(subnet_cfg) + nas_agent.validation_interface.clear_csv() + os.remove('tmp.pickle') + except TimeoutError: + pass def test_vision_reference(self): from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference From 36365ed85c4b1de3563e8f2a3c51f176b7171416 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Sat, 10 Dec 2022 15:49:36 -0800 Subject: [PATCH 58/60] Remove try catch from tests Signed-off-by: Maciej Szankin --- test/nas/test_nas.py | 52 +++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index d8365af2827..b6cc41ead22 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -191,35 +191,33 @@ def eval_func(model): self.assertTrue(len(best_model_archs) > 0) def test_dynas(self): - try: - nas_agent = NAS('dynas_fake.yaml') - for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]: - config = NASConfig(approach='dynas', search_algorithm=search_algorithm) - config.dynas.supernet = supernet - config.seed = 42 - config.dynas.metrics = ['acc', 'macs', 'lat'] - config.dynas.population = 10 - config.dynas.num_evals = 10 - config.dynas.results_csv_path = 'search_results.csv' - config.dynas.batch_size = 64 - nas_agent = NAS(config) - best_model_archs = nas_agent.search() - self.assertTrue(len(best_model_archs) > 0) - - nas_agent.acc_predictor.get_parameters() - nas_agent.acc_predictor.save('tmp.pickle') - nas_agent.acc_predictor.load('tmp.pickle') - samples = nas_agent.supernet_manager.random_samples(10) - subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0]) - nas_agent.runner_validate.validate_macs(subnet_cfg) - nas_agent.runner_validate.measure_latency(subnet_cfg) - nas_agent.validation_interface.clear_csv() - os.remove('tmp.pickle') - except TimeoutError: - pass + nas_agent = NAS('dynas_fake.yaml') + for search_algorithm, supernet in [('nsga2','ofa_mbv3_d234_e346_k357_w1.2'), ('age', 'ofa_mbv3_d234_e346_k357_w1.2')]: + config = NASConfig(approach='dynas', search_algorithm=search_algorithm) + config.dynas.supernet = supernet + config.seed = 42 + config.dynas.metrics = ['acc', 'macs', 'lat'] + config.dynas.population = 10 + config.dynas.num_evals = 10 + config.dynas.results_csv_path = 'search_results.csv' + config.dynas.batch_size = 64 + nas_agent = NAS(config) + best_model_archs = nas_agent.search() + self.assertTrue(len(best_model_archs) > 0) + + nas_agent.acc_predictor.get_parameters() + nas_agent.acc_predictor.save('tmp.pickle') + nas_agent.acc_predictor.load('tmp.pickle') + samples = nas_agent.supernet_manager.random_samples(10) + subnet_cfg = nas_agent.supernet_manager.translate2param(samples[0]) + nas_agent.runner_validate.validate_macs(subnet_cfg) + nas_agent.runner_validate.measure_latency(subnet_cfg) + nas_agent.validation_interface.clear_csv() + os.remove('tmp.pickle') def test_vision_reference(self): - from neural_compressor.experimental.nas.dynast.dynas_utils import TorchVisionReference + from neural_compressor.experimental.nas.dynast.dynas_utils import \ + TorchVisionReference reference = TorchVisionReference('ofa_mbv3', dataset_path=None, batch_size=1) macs = reference.validate_macs() From 858d7b044a8092fe4d6ba572b3ec853c1660e7c3 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Sat, 10 Dec 2022 15:50:02 -0800 Subject: [PATCH 59/60] Move `.torch` cleanup to teardown Signed-off-by: Maciej Szankin --- test/nas/test_nas.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index b6cc41ead22..018ade52bfe 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -1,15 +1,17 @@ -from multiprocessing.spawn import import_main_path import os import shutil import unittest +from pathlib import Path + import numpy as np import torch from neural_compressor.conf.config import NASConfig from neural_compressor.data import DATASETS -from neural_compressor.experimental import common, NAS -from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.experimental.nas.dynas import DyNAS +from neural_compressor.experimental import NAS, common +from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import \ + PyTorchDataLoader + def build_fake_yaml(approach=None, search_algorithm=None, metrics=['acc']): fake_yaml = """ @@ -126,12 +128,6 @@ def setUpClass(cls): build_dynas_fake_yaml() build_dynas_results_csv() - try: - shutil.rmtree('.torch/') - except: - pass - - @classmethod def tearDownClass(cls): os.remove('fake.yaml') @@ -139,6 +135,7 @@ def tearDownClass(cls): os.remove('search_results.csv') shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True) shutil.rmtree('runs', ignore_errors=True) + shutil.rmtree(os.path.join(os.getcwd(), '.torch'), ignore_errors=True) def test_basic_nas(self): # Built-in train, evaluation From 7dd6615dea460deef599b369c1247b9c50da6670 Mon Sep 17 00:00:00 2001 From: Maciej Szankin Date: Mon, 12 Dec 2022 01:09:20 -0800 Subject: [PATCH 60/60] Skip cleanup Signed-off-by: Maciej Szankin --- test/nas/test_nas.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/nas/test_nas.py b/test/nas/test_nas.py index bd2ef78559f..10673939388 100644 --- a/test/nas/test_nas.py +++ b/test/nas/test_nas.py @@ -135,7 +135,6 @@ def tearDownClass(cls): os.remove('search_results.csv') shutil.rmtree(os.path.join(os.getcwd(), 'NASResults'), ignore_errors=True) shutil.rmtree('runs', ignore_errors=True) - shutil.rmtree(os.path.join(os.getcwd(), '.torch'), ignore_errors=True) def test_basic_nas(self): # Built-in train, evaluation