intel
diff --git a/‎docs/tutorials/pytorch/question-answering/Dynamic_MiniLM_SQuAD.ipynb‎
Lines changed: 2305 additions & 0 deletions b/‎docs/tutorials/pytorch/question-answering/Dynamic_MiniLM_SQuAD.ipynb‎
Lines changed: 2305 additions & 0 deletions
diff --git a/‎docs/tutorials/pytorch/question-answering/evo_search/parents-iter30.tsv‎
Lines changed: 13 additions & 0 deletions b/‎docs/tutorials/pytorch/question-answering/evo_search/parents-iter30.tsv‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎examples/optimization/pytorch/huggingface/question-answering/dynamic/README.md‎
Lines changed: 62 additions & 0 deletions b/‎examples/optimization/pytorch/huggingface/question-answering/dynamic/README.md‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎examples/optimization/pytorch/huggingface/question-answering/dynamic/requirements.txt‎
Lines changed: 3 additions & 0 deletions b/‎examples/optimization/pytorch/huggingface/question-answering/dynamic/requirements.txt‎
Lines changed: 3 additions & 0 deletions
@@ -0,0 +1,13 @@
+(308, 247, 198, 159, 128, 103)	2484739968	87.33049546363097	0	None
+(269, 253, 252, 202, 104, 34)	2485456896	87.76366630183897	1	((300, 263, 248, 202, 104, 38),)
+(300, 243, 214, 197, 129, 36)	2492206848	87.94776585579947	1	((284, 252, 237, 197, 129, 36),)
+(284, 262, 248, 200, 95, 36)	2510150400	88.12499528628362	2	((284, 252, 237, 197, 129, 36), (283, 272, 258, 203, 60, 36))
+(315, 251, 242, 159, 142, 33)	2546934912	88.33566781744294	1	((315, 278, 231, 169, 105, 33),)
+(303, 268, 256, 182, 118, 29)	2581745280	88.51327545611849	2	((322, 284, 275, 166, 107, 22), (284, 252, 237, 197, 129, 36))
+(346, 284, 275, 166, 107, 24)	2695914240	88.75378439863312	1	((346, 284, 275, 166, 107, 50),)
+(365, 280, 273, 176, 112, 46)	2782709760	88.8968270345175	2	((348, 282, 268, 186, 111, 42), (381, 278, 278, 166, 113, 50))
+(375, 331, 283, 213, 127, 18)	3015100416	89.11164300013466	1	((374, 331, 283, 184, 125, 48),)
+(374, 331, 283, 230, 126, 51)	3085609344	89.15126282829618	1	((374, 331, 283, 227, 126, 52),)
+(377, 361, 345, 245, 132, 38)	3329854464	89.28344658586016	2	((377, 355, 350, 242, 130, 33), (377, 367, 340, 247, 134, 43))
+(383, 363, 360, 242, 134, 42)	3385542144	89.30703301646996	1	((382, 379, 360, 242, 134, 42),)
+(383, 380, 376, 358, 202, 123)	3955011456	89.31783846266845	1	((383, 380, 378, 358, 210, 142),)
@@ -0,0 +1,62 @@
+# Dynamic-Length Transformer
+
+The implementation is based on [Length Adaptive Transformer](https://github.com/clovaai/length-adaptive-transformer)'s work.
+Currently, it supports BERT and RoBERTa based transformers.
+
+
+## Training
+
+
+### Step 1: Finetuning Pretrained Transformer
+```
+python run_qa.py \
+--model_name_or_path bert-base-uncased \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--learning_rate 3e-5 \
+--num_train_epochs 2 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--per_device_train_batch_size 8 \
+--output_dir output/finetuning
+```
+
+
+### Step 2: Training with LengthDrop
+
+```
+python run_qa.py \
+--model_name_or_path output/finetuning \
+--dataset_name squad \
+--do_train \
+--do_eval \
+--learning_rate 3e-5 \
+--num_train_epochs 5 \
+--max_seq_length 384 \
+--doc_stride 128 \
+--per_device_train_batch_size 8 \
+--length_adaptive \
+--num_sandwich 2  \
+--length_drop_ratio_bound 0.2 \
+--layer_dropout_prob 0.2 \
+--output_dir output/dynamic 
+
+```
+
+### Step 3: Evolutionary Search
+
+run search to optimize length configurations for any possible target computational budget.
+
+```
+python run_qa.py \
+--model_name_or_path output/dynamic \
+--dataset_name squad \
+--max_seq_length 384 \
+--doc_stride 128 \
+--do_eval \
+--per_device_eval_batch_size 32 \
+--do_search \
+--output_dir output/search
+
+```
@@ -0,0 +1,3 @@
+transformers
+datasets
+torchprofiler
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+transformers`
	`2`	`+datasets`
	`3`	`+torchprofiler`