Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 31 additions & 34 deletions QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import hashlib
import inspect
import json
import logging
import shutil
import subprocess
Expand All @@ -23,7 +22,7 @@
from QEfficient.base.pytorch_transforms import PytorchTransform
from QEfficient.compile.qnn_compiler import compile as qnn_compile
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import constants, dump_qconfig
from QEfficient.utils import constants, create_json, dump_qconfig, generate_mdp_partition_config, load_json
from QEfficient.utils.cache import QEFF_HOME, to_hashable

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -269,17 +268,17 @@ def _compile(
specializations=specializations,
custom_io=custom_io,
device_group=list(range(mdp_ts_num_devices)),
num_cores=compiler_options.get("aic_num_cores", 16),
mxfp6=compiler_options.get("mxfp6_matmul", False),
num_cores=compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES),
mxfp6=compiler_options.get("mxfp6_matmul", constants.DEFAULT_AIC_MXPF6_MATMUL),
mxint8=mxint8_kv_cache,
qnn_config=qnn_config,
)

return self.qpc_path

command = constants.COMPILER + [f"-m={onnx_path}"]
if mdp_ts_json_path := compiler_options.pop("mdp_ts_json_path", None):
mdp_ts_num_devices = None

if mdp_ts_json_path := compiler_options.pop("mdp_load_partition_config", None):
command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")

for key, value in compiler_options.items():
Expand All @@ -289,6 +288,17 @@ def _compile(
command.append(option)
continue
command.append(f"{option}={value}")

# Create a dummy mdp_ts_json if mdp-load-partition-config not provided and num_devices > 1
if mdp_ts_json_path is not None:
mdp_ts_json = load_json(str(mdp_ts_json_path))
elif mdp_ts_num_devices > 1:
mdp_ts_json = generate_mdp_partition_config(
mdp_ts_num_devices, compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES)
)
else:
mdp_ts_json = None

compile_hash = hashlib.sha256(to_hashable(command))

if specializations is not None:
Expand All @@ -299,30 +309,37 @@ def _compile(

if num_speculative_tokens:
compile_hash.update(to_hashable({"num_speculative_tokens": num_speculative_tokens}))
# Hash num_devices too, since default value would always be 1.
compile_hash.update(to_hashable(mdp_ts_num_devices))

# Hash the MDP partition config and the number of devices.
compile_hash.update(to_hashable(mdp_ts_json))
compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices}))

# Check if already compiled
compile_hash = compile_hash.hexdigest()[:16]
compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
qpc_path = compile_dir / "qpc"
qpc_path.mkdir(parents=True, exist_ok=True)

if qpc_path.is_dir():
if (qpc_path / "programqpc.bin").is_file():
self.qpc_path = qpc_path
return qpc_path
# Probably compilation failure last time, delete directory to start over
shutil.rmtree(qpc_path)

# write the MDP partition config file if not provided
if mdp_ts_json is not None:
mdp_ts_json_path = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
create_json(str(mdp_ts_json_path), mdp_ts_json)
command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")

# Write specializations.json file
if specializations is not None:
specializations_json = compile_dir / "specializations.json"
with open(specializations_json, "w") as fp:
json.dump(
{"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]},
fp,
indent=4,
)
specializations_data = {
"specializations": [{k: str(v) for k, v in spec.items()} for spec in specializations]
}
create_json(str(specializations_json), specializations_data)
command.append(f"-network-specialization-config={specializations_json}")

# Write custom_io.yaml file
Expand All @@ -333,26 +350,6 @@ def _compile(
fp.write(f" - IOName: {io_name}\n Precision: {dtype}\n\n")
command.append(f"-custom-IO-list-file={custom_io_yaml}")

# Write mdp_config.json file
if not mdp_ts_json_path and mdp_ts_num_devices > 1:
num_cores = compiler_options.get("aic_num_cores", 16)
mdp_ts_json = compile_dir / f"mdp_ts_{mdp_ts_num_devices}.json"
with open(mdp_ts_json, "w") as fp:
json.dump(
{
"connections": [{"devices": list(range(mdp_ts_num_devices)), "type": "p2p"}],
"partitions": [
{
"name": "Partition0",
"devices": [{"deviceId": d, "numCores": num_cores} for d in range(mdp_ts_num_devices)],
}
],
},
fp,
indent=4,
)
command.append(f"-mdp-load-partition-config={mdp_ts_json}")

command.append(f"-aic-binary-dir={qpc_path}")
logger.info(f"Running compiler: {' '.join(command)}")
try:
Expand Down
3 changes: 3 additions & 0 deletions QEfficient/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
)
from QEfficient.utils._utils import ( # noqa: F401
check_and_assign_cache_dir,
create_json,
custom_format_warning,
dump_qconfig,
generate_mdp_partition_config,
get_num_layers_from_config,
get_num_layers_vlm,
get_onnx_dir_name,
Expand All @@ -24,6 +26,7 @@
hf_download,
load_hf_processor,
load_hf_tokenizer,
load_json,
login_and_download_hf_lm,
onnx_exists,
padding_check_and_fix,
Expand Down
26 changes: 26 additions & 0 deletions QEfficient/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,32 @@ def create_json(file_path: str, json_data: object):
print(f"Failed to create JSON File {file_path}: {e}")


def generate_mdp_partition_config(num_devices: int, num_cores: int) -> str:
"""
Generates an MDP partition configuration JSON file using the create_json utility.

Args:
num_devices (int): Number of devices.
num_cores (int): Number of cores per device.
output_dir (str): Directory where the JSON file will be saved.

Returns:
str: Path to the generated JSON file.
"""

mdp_config = {
"connections": [{"devices": list(range(num_devices)), "type": "p2p"}],
"partitions": [
{
"name": "Partition0",
"devices": [{"deviceId": d, "numCores": num_cores} for d in range(num_devices)],
}
],
}

return mdp_config


def model_swap(func):
def wrapper(*args, **kwargs):
if "model" in kwargs and kwargs["model"] is not None:
Expand Down
4 changes: 4 additions & 0 deletions QEfficient/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
ONNX_EXPORT_IMAGE_DEPTH = 3
ONNX_EXPORT_CTX_LEN = 1024

# Compiler defaults
DEFAULT_AIC_NUM_CORES = 16
DEFAULT_AIC_MXPF6_MATMUL = False


# Store the qeff_models inside the ~/.cache directory or over-ride with an env variable.
def get_models_dir():
Expand Down
Loading