intel
diff --git a/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/README.md‎
Lines changed: 1 addition & 1 deletion b/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_benchmark.sh‎
Lines changed: 8 additions & 3 deletions b/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_benchmark.sh‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_glue.py‎
Lines changed: 65 additions & 16 deletions b/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_glue.py‎
Lines changed: 65 additions & 16 deletions
diff --git a/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_tuning.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_tuning.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_tunning_multinode.sh‎ renamed to ‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_tuning_multinode.sh‎ b/‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_tunning_multinode.sh‎ renamed to ‎examples/optimization/tensorflow/huggingface/text-classification/distillation/run_tuning_multinode.sh‎
diff --git a/‎examples/optimization/tensorflow/huggingface/text-classification/pruning/README.md‎
Lines changed: 1 addition & 1 deletion b/‎examples/optimization/tensorflow/huggingface/text-classification/pruning/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/optimization/tensorflow/huggingface/text-classification/pruning/run_benchmark.sh‎
Lines changed: 9 additions & 4 deletions b/‎examples/optimization/tensorflow/huggingface/text-classification/pruning/run_benchmark.sh‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎examples/optimization/tensorflow/huggingface/text-classification/pruning/run_glue.py‎
Lines changed: 67 additions & 13 deletions b/‎examples/optimization/tensorflow/huggingface/text-classification/pruning/run_glue.py‎
Lines changed: 67 additions & 13 deletions
@@ -28,7 +28,7 @@ bash run_tuning.sh  --topology=topology
 ```
 
 ```
-bash run_benchmark.sh --topology=topology --mode=benchmark
+bash run_benchmark.sh --topology=topology --mode=benchmark --use_distillation_model=true
 ```
 topology is "distilbert-base-uncased"
 
 
@@ -14,6 +14,7 @@ function init_params {
   batch_size=16
   tuned_checkpoint=saved_results
   topology="distilbert-base-uncased"
+  mode="benchmark"
   for var in "$@"
   do
     case $var in
@@ -35,8 +36,8 @@ function init_params {
       --iters=*)
           iters=$(echo ${var} |cut -f2 -d=)
       ;;
-      --int8=*)
-          int8=$(echo ${var} |cut -f2 -d=)
+      --use_distillation_model=*)
+          use_distillation_model=$(echo ${var} |cut -f2 -d=)
       ;;
       --config=*)
           tuned_checkpoint=$(echo $var |cut -f2 -d=)
@@ -67,7 +68,11 @@ function run_benchmark {
 
     if [ "${topology}" = "distilbert-base-uncased" ]; then
         TASK_NAME='sst2'
-        model_name_or_path=${tuned_checkpoint}
+        model_name_or_path=distilbert-base-uncased
+    fi
+
+    if [[ ${use_distillation_model} == "true" ]]; then
+        extra_cmd=$extra_cmd" --use_distillation_model"
     fi
 
     python -u ./run_glue.py \
 
@@ -284,7 +284,10 @@ class OptimizationArguments:
     )
     add_origin_loss: bool = field(
         default=False, metadata={"help": "Whether add the origin loss or not"})
-    benchmark: bool = field(default=False, metadata={"help": "run benchmark."})
+    benchmark: bool = field(default=False, metadata={"help": "Run benchmark."})
+    use_distillation_model: bool = field(
+        default=False,
+        metadata={"help":"Whether to use pretrained distillation model."})
     accuracy_only: bool = field(
         default=False,
         metadata={
@@ -618,7 +621,7 @@ def compute_metrics(preds, label_ids):
                 drop_remainder=drop_remainder,
                 # `label_cols` is needed for user-defined losses, such as in this example
                 # datasets v2.3.x need "labels", not "label"
-                label_cols=["labels", "label"]
+                label_cols=["labels"]
                 if "label" in dataset.column_names else None,
             )
             tf_data[key] = data
@@ -682,10 +685,7 @@ def compute_metrics(preds, label_ids):
             distillation_config=distillation_conf,
             teacher_model=teacher_model,
         )
-        distilled_model.save_pretrained(training_args.output_dir,
-                                        saved_model=True)
-        distilled_model.config.save_pretrained(training_args.output_dir)
-        tokenizer.save_pretrained(training_args.output_dir)
+
         return
 
     # region Training and validation
@@ -731,17 +731,49 @@ def compute_metrics(preds, label_ids):
             raw_datasets = [datasets["validation"]]
 
         total_time = 0
+        num_examples = 0
+        if optim_args.use_distillation_model:
+            model = tf.saved_model.load(training_args.output_dir)
         for raw_dataset, tf_dataset, task in zip(raw_datasets, tf_datasets,
                                                     tasks):
-            num_examples = sum(1 for _ in tf_dataset.unbatch())
-            start = time.time()
-            eval_predictions = model.predict(tf_dataset)
-            total_time += time.time() - start
-            eval_metrics = compute_metrics(eval_predictions,
-                                            raw_dataset["label"])
-            print(f"Evaluation metrics ({task}) Accuracy: ", eval_metrics)
-        print("Throughput: ", num_examples / total_time)
+            num_examples += sum(
+                1 for _ in (tf_dataset.unbatch()
+                            if hasattr(tf_dataset, "unbatch") else tf_dataset
+                            )
+            )
 
+            if optim_args.use_distillation_model:
+                preds: np.ndarray = None
+                label_ids: np.ndarray = None
+                infer = model.signatures[list(model.signatures.keys())[0]]
+                for i, (inputs, labels) in enumerate(tf_dataset):
+                    for name in inputs:
+                        inputs[name] = tf.constant(inputs[name].numpy(), dtype=tf.int32)
+                    start = time.time()
+                    results = infer(**inputs)
+                    total_time += time.time() - start
+                    for val in results:
+                        if preds is None:
+                            preds = results[val].numpy()
+                        else:
+                            preds = np.append(preds, results[val].numpy(), axis=0)
+                    if label_ids is None:
+                        label_ids = labels.numpy()
+                    else:
+                        label_ids = np.append(label_ids, labels.numpy(), axis=0)
+                eval_metrics = compute_metrics({"logits": preds}, label_ids)
+            else:
+                start = time.time()
+                eval_predictions = model.predict(tf_dataset)
+                total_time += time.time() - start
+                eval_metrics = compute_metrics(eval_predictions, raw_dataset["label"])
+                print(f"Evaluation metrics ({task}):")
+                print(eval_metrics)
+            logger.info("metric ({}) Accuracy: {}".format(task, eval_metrics["accuracy"]))
+        logger.info(
+           "Throughput: {} samples/sec".format(
+                num_examples / total_time)
+        )
     # endregion
 
     # region Prediction
@@ -769,9 +801,26 @@ def compute_metrics(preds, label_ids):
             tf_datasets.append(tf_data["user_data"])
             raw_datasets.append(datasets["user_data"])
 
+        if optim_args.use_distillation_model:
+            model = tf.saved_model.load(training_args.output_dir)
+
         for raw_dataset, tf_dataset, task in zip(raw_datasets, tf_datasets,
                                                     tasks):
-            test_predictions = model.predict(tf_dataset)
+            if optim_args.use_distillation_model:
+                preds: np.ndarray = None
+                infer = model.signatures[list(model.signatures.keys())[0]]
+                for i, (inputs, labels) in enumerate(tf_dataset):
+                    for name in inputs:
+                        inputs[name] = tf.constant(inputs[name].numpy(), dtype=tf.int32)
+                    results = infer(**inputs)
+                    for val in results:
+                        if preds is None:
+                            preds = results[val].numpy()
+                        else:
+                            preds = np.append(preds, results[val].numpy(), axis=0)
+                test_predictions = {"logits": preds}
+            else:
+                test_predictions = model.predict(tf_dataset)
             if "label" in raw_dataset:
                 test_metrics = compute_metrics(test_predictions,
                                                 raw_dataset["label"])
@@ -795,7 +844,7 @@ def compute_metrics(preds, label_ids):
                     if is_regression:
                         writer.write(f"{index}\t{item:3.3f}\n")
                     else:
-                        item = model.config.id2label[item]
+                        item = config.id2label[item]
                         writer.write(f"{index}\t{item}\n")
     # endregion
 
 
@@ -40,7 +40,7 @@ function init_params {
 # run_tuning
 function run_tuning {
     extra_cmd=''
-    batch_size=16
+    batch_size=64
     if [ "${topology}" = "distilbert-base-uncased" ]; then
         TASK_NAME='sst2'
         model_name_or_path=distilbert-base-uncased
 
@@ -24,7 +24,7 @@ bash run_tuning.sh  --topology=topology
 ```
 
 ```
-bash run_benchmark.sh --topology=topology --mode=benchmark
+bash run_benchmark.sh --topology=topology --mode=benchmark --use_pruned_model=true
 ```
 topology is "distilbert_base_sst2"
 
 
@@ -11,9 +11,10 @@ function main {
 # init params
 function init_params {
   iters=100
-  batch_size=16
+  batch_size=64
   tuned_checkpoint=saved_results
   topology="distilbert_base_sst2"
+  mode="benchmark"
   for var in "$@"
   do
     case $var in
@@ -35,8 +36,8 @@ function init_params {
       --iters=*)
           iters=$(echo ${var} |cut -f2 -d=)
       ;;
-      --int8=*)
-          int8=$(echo ${var} |cut -f2 -d=)
+      --use_pruned_model=*)
+          use_pruned_model=$(echo ${var} |cut -f2 -d=)
       ;;
       --config=*)
           tuned_checkpoint=$(echo $var |cut -f2 -d=)
@@ -67,7 +68,11 @@ function run_benchmark {
 
     if [ "${topology}" = "distilbert_base_sst2" ]; then
         TASK_NAME='sst2'
-        model_name_or_path=${tuned_checkpoint}
+        model_name_or_path=distilbert-base-uncased-finetuned-sst-2-english
+    fi
+
+    if [[ ${use_pruned_model} == "true" ]]; then
+        extra_cmd=$extra_cmd" --use_pruned_model"
     fi
 
     python -u ./run_glue.py \
 
@@ -210,7 +210,10 @@ class OptimizationArguments:
     )
     benchmark: bool = field(
         default=False,
-        metadata={"help": "run benchmark."})
+        metadata={"help": "Run benchmark."})
+    use_pruned_model: bool = field(
+        default=False,
+        metadata={"help":"Whether to use pretrained pruned model."})
     accuracy_only: bool = field(
         default=False,
         metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."})
@@ -503,7 +506,7 @@ def compute_metrics(preds, label_ids):
                 drop_remainder=drop_remainder,
                 # `label_cols` is needed for user-defined losses, such as in this example
                 # datasets v2.3.x need "labels", not "label"
-                label_cols=["labels", "label"] if "label" in dataset.column_names else None,
+                label_cols=["labels"] if "label" in dataset.column_names else None,
             )
             tf_data[key] = data
         # endregion
@@ -573,15 +576,49 @@ def compute_metrics(preds, label_ids):
             raw_datasets = [datasets["validation"]]
 
         total_time = 0
-        for raw_dataset, tf_dataset, task in zip(raw_datasets, tf_datasets, tasks):
-            num_examples = sum(1 for _ in tf_dataset.unbatch())
-            start = time.time()
-            eval_predictions = model.predict(tf_dataset)
-            total_time += time.time() - start
-            eval_metrics = compute_metrics(eval_predictions, raw_dataset["label"])
-            print(f"Evaluation metrics ({task}) Accuracy: ", eval_metrics)
-        print("Throughput: ", num_examples / total_time)
-
+        num_examples = 0
+        if optim_args.use_pruned_model:
+            model = tf.saved_model.load(training_args.output_dir)
+        for raw_dataset, tf_dataset, task in zip(raw_datasets, tf_datasets,
+                                                    tasks):
+            num_examples += sum(
+                1 for _ in (tf_dataset.unbatch()
+                            if hasattr(tf_dataset, "unbatch") else tf_dataset
+                            )
+            )
+            if optim_args.use_pruned_model:
+                preds: np.ndarray = None
+                label_ids: np.ndarray = None
+                infer = model.signatures[list(model.signatures.keys())[0]]
+                for i, (inputs, labels) in enumerate(tf_dataset):
+                    for name in inputs:
+                        inputs[name] = tf.constant(inputs[name].numpy(), dtype=tf.int32)
+                    start = time.time()
+                    results = infer(**inputs)
+                    total_time += time.time() - start
+                    for val in results:
+                        if preds is None:
+                            preds = results[val].numpy()
+                        else:
+                            preds = np.append(preds, results[val].numpy(), axis=0)
+                    if label_ids is None:
+                        label_ids = labels.numpy()
+                    else:
+                        label_ids = np.append(label_ids, labels.numpy(), axis=0)
+                eval_metrics = compute_metrics({"logits": preds}, label_ids)
+            else:
+                start = time.time()
+                eval_predictions = model.predict(tf_dataset)
+                total_time += time.time() - start
+                eval_metrics = compute_metrics(eval_predictions, raw_dataset["label"])
+                print(f"Evaluation metrics ({task}):")
+                print(eval_metrics)
+
+            logger.info("metric ({}) Accuracy: {}".format(task, eval_metrics["accuracy"]))
+        logger.info(
+            "Throughput: {} samples/sec".format(
+                num_examples / total_time)
+        )
     # endregion
 
     # region Prediction
@@ -606,8 +643,25 @@ def compute_metrics(preds, label_ids):
             tf_datasets.append(tf_data["user_data"])
             raw_datasets.append(datasets["user_data"])
 
+        if optim_args.use_pruned_model:
+            model = tf.saved_model.load(training_args.output_dir)
+
         for raw_dataset, tf_dataset, task in zip(raw_datasets, tf_datasets, tasks):
-            test_predictions = model.predict(tf_dataset)
+            if optim_args.use_pruned_model:
+                preds: np.ndarray = None
+                infer = model.signatures[list(model.signatures.keys())[0]]
+                for i, (inputs, labels) in enumerate(tf_dataset):
+                    for name in inputs:
+                        inputs[name] = tf.constant(inputs[name].numpy(), dtype=tf.int32)
+                    results = infer(**inputs)
+                    for val in results:
+                        if preds is None:
+                            preds = results[val].numpy()
+                        else:
+                            preds = np.append(preds, results[val].numpy(), axis=0)
+                test_predictions = {"logits": preds}
+            else:
+                test_predictions = model.predict(tf_dataset)
             if "label" in raw_dataset:
                 test_metrics = compute_metrics(test_predictions, raw_dataset["label"])
                 print(f"Test metrics ({task}):")
@@ -626,7 +680,7 @@ def compute_metrics(preds, label_ids):
                     if is_regression:
                         writer.write(f"{index}\t{item:3.3f}\n")
                     else:
-                        item = model.config.id2label[item]
+                        item = config.id2label[item]
                         writer.write(f"{index}\t{item}\n")
         # endregion