7
7
8
8
import hashlib
9
9
import inspect
10
- import json
11
10
import logging
12
11
import shutil
13
12
import subprocess
23
22
from QEfficient .base .pytorch_transforms import PytorchTransform
24
23
from QEfficient .compile .qnn_compiler import compile as qnn_compile
25
24
from QEfficient .generation .cloud_infer import QAICInferenceSession
26
- from QEfficient .utils import constants , dump_qconfig
25
+ from QEfficient .utils import constants , create_json , dump_qconfig , generate_mdp_partition_config , load_json
27
26
from QEfficient .utils .cache import QEFF_HOME , to_hashable
28
27
29
28
logger = logging .getLogger (__name__ )
@@ -269,17 +268,17 @@ def _compile(
269
268
specializations = specializations ,
270
269
custom_io = custom_io ,
271
270
device_group = list (range (mdp_ts_num_devices )),
272
- num_cores = compiler_options .get ("aic_num_cores" , 16 ),
273
- mxfp6 = compiler_options .get ("mxfp6_matmul" , False ),
271
+ num_cores = compiler_options .get ("aic_num_cores" , constants . DEFAULT_AIC_NUM_CORES ),
272
+ mxfp6 = compiler_options .get ("mxfp6_matmul" , constants . DEFAULT_AIC_MXPF6_MATMUL ),
274
273
mxint8 = mxint8_kv_cache ,
275
274
qnn_config = qnn_config ,
276
275
)
277
276
278
277
return self .qpc_path
279
278
280
279
command = constants .COMPILER + [f"-m={ onnx_path } " ]
281
- if mdp_ts_json_path := compiler_options . pop ( "mdp_ts_json_path" , None ):
282
- mdp_ts_num_devices = None
280
+
281
+ if mdp_ts_json_path := compiler_options . pop ( "mdp_load_partition_config" , None ):
283
282
command .append (f"-mdp-load-partition-config={ mdp_ts_json_path } " )
284
283
285
284
for key , value in compiler_options .items ():
@@ -289,6 +288,17 @@ def _compile(
289
288
command .append (option )
290
289
continue
291
290
command .append (f"{ option } ={ value } " )
291
+
292
+ # Create a dummy mdp_ts_json if mdp-load-partition-config not provided and num_devices > 1
293
+ if mdp_ts_json_path is not None :
294
+ mdp_ts_json = load_json (str (mdp_ts_json_path ))
295
+ elif mdp_ts_num_devices > 1 :
296
+ mdp_ts_json = generate_mdp_partition_config (
297
+ mdp_ts_num_devices , compiler_options .get ("aic_num_cores" , constants .DEFAULT_AIC_NUM_CORES )
298
+ )
299
+ else :
300
+ mdp_ts_json = None
301
+
292
302
compile_hash = hashlib .sha256 (to_hashable (command ))
293
303
294
304
if specializations is not None :
@@ -299,30 +309,37 @@ def _compile(
299
309
300
310
if num_speculative_tokens :
301
311
compile_hash .update (to_hashable ({"num_speculative_tokens" : num_speculative_tokens }))
302
- # Hash num_devices too, since default value would always be 1.
303
- compile_hash .update (to_hashable (mdp_ts_num_devices ))
312
+
313
+ # Hash the MDP partition config and the number of devices.
314
+ compile_hash .update (to_hashable (mdp_ts_json ))
315
+ compile_hash .update (to_hashable ({"mdp_ts_num_devices" : mdp_ts_num_devices }))
304
316
305
317
# Check if already compiled
306
318
compile_hash = compile_hash .hexdigest ()[:16 ]
307
319
compile_dir = qpc_path .with_name (qpc_path .name + "-" + compile_hash )
308
320
qpc_path = compile_dir / "qpc"
309
321
qpc_path .mkdir (parents = True , exist_ok = True )
322
+
310
323
if qpc_path .is_dir ():
311
324
if (qpc_path / "programqpc.bin" ).is_file ():
312
325
self .qpc_path = qpc_path
313
326
return qpc_path
314
327
# Probably compilation failure last time, delete directory to start over
315
328
shutil .rmtree (qpc_path )
316
329
330
+ # write the MDP partition config file if not provided
331
+ if mdp_ts_json is not None :
332
+ mdp_ts_json_path = compile_dir / f"mdp_ts_{ mdp_ts_num_devices } .json"
333
+ create_json (str (mdp_ts_json_path ), mdp_ts_json )
334
+ command .append (f"-mdp-load-partition-config={ mdp_ts_json_path } " )
335
+
317
336
# Write specializations.json file
318
337
if specializations is not None :
319
338
specializations_json = compile_dir / "specializations.json"
320
- with open (specializations_json , "w" ) as fp :
321
- json .dump (
322
- {"specializations" : [{k : str (v ) for k , v in spec .items ()} for spec in specializations ]},
323
- fp ,
324
- indent = 4 ,
325
- )
339
+ specializations_data = {
340
+ "specializations" : [{k : str (v ) for k , v in spec .items ()} for spec in specializations ]
341
+ }
342
+ create_json (str (specializations_json ), specializations_data )
326
343
command .append (f"-network-specialization-config={ specializations_json } " )
327
344
328
345
# Write custom_io.yaml file
@@ -333,26 +350,6 @@ def _compile(
333
350
fp .write (f" - IOName: { io_name } \n Precision: { dtype } \n \n " )
334
351
command .append (f"-custom-IO-list-file={ custom_io_yaml } " )
335
352
336
- # Write mdp_config.json file
337
- if not mdp_ts_json_path and mdp_ts_num_devices > 1 :
338
- num_cores = compiler_options .get ("aic_num_cores" , 16 )
339
- mdp_ts_json = compile_dir / f"mdp_ts_{ mdp_ts_num_devices } .json"
340
- with open (mdp_ts_json , "w" ) as fp :
341
- json .dump (
342
- {
343
- "connections" : [{"devices" : list (range (mdp_ts_num_devices )), "type" : "p2p" }],
344
- "partitions" : [
345
- {
346
- "name" : "Partition0" ,
347
- "devices" : [{"deviceId" : d , "numCores" : num_cores } for d in range (mdp_ts_num_devices )],
348
- }
349
- ],
350
- },
351
- fp ,
352
- indent = 4 ,
353
- )
354
- command .append (f"-mdp-load-partition-config={ mdp_ts_json } " )
355
-
356
353
command .append (f"-aic-binary-dir={ qpc_path } " )
357
354
logger .info (f"Running compiler: { ' ' .join (command )} " )
358
355
try :
0 commit comments