Remove keep_in_memory and fix lintrunner errors

neuropilot-captain · neuropilot-captain · commit fc75330069a7 · 2024-11-11T16:08:43.000+08:00
diff --git a/examples/mediatek/aot_utils/llm_utils/sanity_checks.py b/examples/mediatek/aot_utils/llm_utils/sanity_checks.py
@@ -204,7 +204,9 @@ def check_weights_exist(weight_dir):
             f"No weight files found in {weight_dir}! Weight files should be either .bin or .safetensors file types."
         )
     safetensors_l = [f for f in os.listdir(weight_dir) if f.endswith(".safetensors")]
-    bin_l = [f for f in os.listdir(weight_dir) if f.endswith(".bin") and "embedding" not in f]
+    bin_l = [
+        f for f in os.listdir(weight_dir) if f.endswith(".bin") and "embedding" not in f
+    ]
     if len(safetensors_l) & len(bin_l):
         raise RuntimeError(
             "Weights should only be in either .bin or .safetensors format, not both."
diff --git a/examples/mediatek/model_export_scripts/llama.py b/examples/mediatek/model_export_scripts/llama.py
@@ -462,7 +462,6 @@ def main():
                 "eos_token_id_tensor": torch.tensor(tokenizer.eos_token_id),
                 "response_cap": args.response_cap,
             },
-            keep_in_memory=True
         )
 
     for chunk_idx, chunk in enumerate(models):
diff --git a/examples/mediatek/models/llm_models/modeling_common.py b/examples/mediatek/models/llm_models/modeling_common.py
@@ -751,6 +751,7 @@ def get_example_inputs(
                 for _ in range(2 * self.num_blocks)
             ],
         )
+        # Specify dims that would be dynamic during calibration
         # Note: Assume cache size fixed shape as torch dynamic shape cannot handle dim 3 being
         # combination of 2 dynamic dims
         if get_dym_shape:
diff --git a/examples/mediatek/shell_scripts/export_llama.sh b/examples/mediatek/shell_scripts/export_llama.sh
@@ -1,23 +1,28 @@
-model=${1:-'llama3'}
+model=${1:-'llama3.2-3b'}
 chunks=${2:-4}
 tok=${3:-128}
 cache=${4:-512}
 cal=${5:-None}
 pres=${6:-A16W4}
 
-if [ $model = "llama3" ]
+if [ $model = "llama3.2-3b" ]
+then
+	config_path=Llama-3.2-3B-Instruct/config.json
+	pref="--preformatter aot_utils/llm_utils/preformatter_templates/llama3.json"
+elif [ $model = "llama3.2-1b" ]
+then
+	config_path=Llama-3.2-1B-Instruct/config.json
+	pref="--preformatter aot_utils/llm_utils/preformatter_templates/llama3.json"
+elif [ $model = "llama3" ]
 then
 	config_path=llama3-8B-instruct/config.json
 	pref="--preformatter aot_utils/llm_utils/preformatter_templates/llama3.json"
 elif [ $model = "llama2" ]
 then
 	config_path=llama2-7B-chat/config.json
 	pref="--preformatter aot_utils/llm_utils/preformatter_templates/llama2_short.json"
-else
-	# will remove once stable
-	config_path=llama_1b_50k/config.json
-	pref=""
 fi
+
 if [ $cal = "None" ]
 then
 	data=""

Original file line number	Diff line number	Diff line change
`@@ -462,7 +462,6 @@ def main():`
`462`	`462`	`"eos_token_id_tensor": torch.tensor(tokenizer.eos_token_id),`
`463`	`463`	`"response_cap": args.response_cap,`
`464`	`464`	`},`
`465`		`- keep_in_memory=True`
`466`	`465`	`)`
`467`	`466`
`468`	`467`	`for chunk_idx, chunk in enumerate(models):`
Original file line number	Diff line number	Diff line change
`@@ -751,6 +751,7 @@ def get_example_inputs(`
`751`	`751`	`for _ in range(2 * self.num_blocks)`
`752`	`752`	`],`
`753`	`753`	`)`
	`754`	`+ # Specify dims that would be dynamic during calibration`
`754`	`755`	`# Note: Assume cache size fixed shape as torch dynamic shape cannot handle dim 3 being`
`755`	`756`	`# combination of 2 dynamic dims`
`756`	`757`	`if get_dym_shape:`