Skip to content
This repository was archived by the owner on Sep 9, 2025. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples_utils/benchmarks/requirements_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def install_patched_requirements(requirements_file: Union[str, Path], listener:
err = (f"Installation of pip packages in file {requirements_file} failed with stderr: {err}.")
logger.error(err)
raise subprocess.CalledProcessError(exit_code, cmd, out, err)
cmd = [sys.executable, "-m", "pip", "freeze"]
run_and_monitor_progress(cmd, listener, monitor_ipus=False)
return original_requirements


Expand Down
21 changes: 19 additions & 2 deletions examples_utils/benchmarks/run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,16 @@ def plot_ipu_usage(*args, **kwargs):
BenchmarkDict = Dict


def should_reattempt_benchmark(variant, output, err, exitcode) -> Union[bool, str]:
def should_reattempt_benchmark(variant, output, err, exitcode, pip_freeze_before, pip_freeze_after) -> Union[bool, str]:
if "Timeout" in err:
return False
is_a_notebook = "examples_utils.benchmarks.notebook_utils" in variant["cmd"]
if is_a_notebook and "ModuleNotFoundError" in err and exitcode != 0:
if "Successfully installed" in output:
return "Notebook has installed some packages, need to restart kernel"
if is_a_notebook and exitcode != 0:
if pip_freeze_after != pip_freeze_before:
return "Output of pip-freeze changed need to restart kernel"

return False

Expand Down Expand Up @@ -287,10 +290,18 @@ def run_benchmark_variant(
monitor_log = []
exitcode = 0
stdout = stderr = ""
pip_freeze_before = ""
pip_freeze_after = ""
while need_to_run:
if args.submit_on_slurm:
stdout, stderr, exitcode = run_and_monitor_progress_on_slurm(listener=listener, **slurm_config)
else:
pip_freeze_before, *_ = run_and_monitor_progress(
[sys.executable, "-m", "pip", "freeze"],
listener,
cwd=cwd,
env=env,
)
stdout, stderr, exitcode, monitor_log = run_and_monitor_progress(
cmd,
listener,
Expand All @@ -299,7 +310,13 @@ def run_benchmark_variant(
cwd=cwd,
env=env,
)
need_to_run = should_reattempt_benchmark(benchmark_dict, stdout, stderr, exitcode)
pip_freeze_after, *_ = run_and_monitor_progress(
[sys.executable, "-m", "pip", "freeze"],
listener,
cwd=cwd,
env=env,
)
need_to_run = should_reattempt_benchmark(benchmark_dict, stdout, stderr, exitcode, pip_freeze_before, pip_freeze_after)
if need_to_run:
logger.info(f"Re-running benchmark because: {need_to_run}")

Expand Down