From 860ca693e99e9ad8003958fe08f94dc7ef686122 Mon Sep 17 00:00:00 2001 From: Manuel Saelices Date: Thu, 2 Oct 2025 18:23:12 +0200 Subject: [PATCH] New `--all-reruns-need-to-pass` argument This addresses your requirement to verify that non-deterministic tests (that work ~90% of the time) pass consistently by requiring all reruns to pass after an initial failure. Signed-off-by: Manuel Saelices --- src/pytest_rerunfailures.py | 111 +++++++++++++-- tests/test_pytest_rerunfailures.py | 210 +++++++++++++++++++++++++++++ 2 files changed, 307 insertions(+), 14 deletions(-) diff --git a/src/pytest_rerunfailures.py b/src/pytest_rerunfailures.py index d17a1a7..0fd46d4 100644 --- a/src/pytest_rerunfailures.py +++ b/src/pytest_rerunfailures.py @@ -91,10 +91,18 @@ def pytest_addoption(parser): dest="fail_on_flaky", help="Fail the test run with exit code 7 if a flaky test passes on a rerun.", ) + group._addoption( + "--all-reruns-need-to-pass", + action="store_true", + dest="all_reruns_need_to_pass", + default=False, + help="If enabled, after an initial failure, all reruns must pass for the test to succeed.", + ) arg_type = "string" parser.addini("reruns", RERUNS_DESC, type=arg_type) parser.addini("reruns_delay", RERUNS_DELAY_DESC, type=arg_type) + parser.addini("all_reruns_need_to_pass", "If enabled, all reruns must pass after initial failure", type=arg_type) # making sure the options make sense @@ -162,6 +170,33 @@ def get_reruns_delay(item): return delay +def get_all_reruns_need_to_pass(item): + """Get whether all reruns need to pass from marker, config, or ini.""" + rerun_marker = _get_marker(item) + + # Check marker kwargs first + if rerun_marker is not None and "all_reruns_need_to_pass" in rerun_marker.kwargs: + return rerun_marker.kwargs["all_reruns_need_to_pass"] + + # Check command-line option + all_need_pass = item.session.config.getvalue("all_reruns_need_to_pass") + if all_need_pass is not None: + return all_need_pass + + # Check ini value + try: + all_need_pass = item.session.config.getini("all_reruns_need_to_pass") + if all_need_pass: + # Parse string values like "True", "true", "1", etc. + if isinstance(all_need_pass, str): + return all_need_pass.lower() in ("true", "1", "yes", "on") + return bool(all_need_pass) + except (TypeError, ValueError): + pass + + return False + + def get_reruns_condition(item): rerun_marker = _get_marker(item) @@ -324,9 +359,9 @@ def pytest_configure(config): # add flaky marker config.addinivalue_line( "markers", - "flaky(reruns=1, reruns_delay=0): mark test to re-run up " + "flaky(reruns=1, reruns_delay=0, all_reruns_need_to_pass=False): mark test to re-run up " "to 'reruns' times. Add a delay of 'reruns_delay' seconds " - "between re-runs.", + "between re-runs. If 'all_reruns_need_to_pass' is True, all reruns must pass.", ) if config.pluginmanager.hasplugin("xdist") and HAS_PYTEST_HANDLECRASHITEM: @@ -550,6 +585,7 @@ def pytest_runtest_protocol(item, nextitem): # first item if necessary check_options(item.session.config) delay = get_reruns_delay(item) + all_reruns_need_to_pass = get_all_reruns_need_to_pass(item) parallel = not is_master(item.config) db = item.session.config.failures_db item.execution_count = db.get_test_failures(item.nodeid) @@ -558,6 +594,10 @@ def pytest_runtest_protocol(item, nextitem): if item.execution_count > reruns: return True + # Track rerun results when all reruns need to pass + initial_failure_occurred = False + rerun_results = [] # Track result of each rerun (True=passed, False=failed) + need_to_run = True while need_to_run: item.execution_count += 1 @@ -566,23 +606,66 @@ def pytest_runtest_protocol(item, nextitem): for report in reports: # 3 reports: setup, call, teardown report.rerun = item.execution_count - 1 - if _should_not_rerun(item, report, reruns): - # last run or no failure detected, log normally - item.ihook.pytest_runtest_logreport(report=report) + + # Track initial failure for all_reruns_need_to_pass mode + if all_reruns_need_to_pass and report.when == "call" and report.failed and item.execution_count == 1: + initial_failure_occurred = True + + # Track rerun results (after initial failure) - only track call phase + if all_reruns_need_to_pass and initial_failure_occurred and item.execution_count > 1 and report.when == "call": + rerun_results.append(not report.failed) # True if passed, False if failed + + # In all_reruns_need_to_pass mode with initial failure, override normal behavior + if all_reruns_need_to_pass and initial_failure_occurred: + # execution_count starts at 1, so: + # - execution_count==1: initial run (failed) + # - execution_count==2..reruns+1: reruns (must run all of them) + is_last_rerun = item.execution_count > reruns + + if is_last_rerun: + # Last run, check if all reruns passed + if any(not r for r in rerun_results): + # At least one rerun failed, mark final outcome as failed + if report.when == "call": + report.outcome = "failed" + # Log the final report + item.ihook.pytest_runtest_logreport(report=report) + else: + # Not the last rerun yet + # Only trigger rerun after processing the call phase + if report.when == "call": + report.outcome = "rerun" + time.sleep(delay) + if not parallel or works_with_current_xdist(): + item.ihook.pytest_runtest_logreport(report=report) + + _remove_cached_results_from_failed_fixtures(item) + _remove_failed_setup_state_from_session(item) + break # trigger rerun + else: + # For setup/teardown, just log normally + item.ihook.pytest_runtest_logreport(report=report) else: - # failure detected and reruns not exhausted, since i < reruns - report.outcome = "rerun" - time.sleep(delay) + # Normal rerun behavior + should_not_rerun = _should_not_rerun(item, report, reruns) - if not parallel or works_with_current_xdist(): - # will rerun test, log intermediate result + if should_not_rerun: + # last run or no failure detected, log normally item.ihook.pytest_runtest_logreport(report=report) + else: + # failure detected and reruns not exhausted + report.outcome = "rerun" + time.sleep(delay) + + if not parallel or works_with_current_xdist(): + # will rerun test, log intermediate result + item.ihook.pytest_runtest_logreport(report=report) - # cleanin item's cashed results from any level of setups - _remove_cached_results_from_failed_fixtures(item) - _remove_failed_setup_state_from_session(item) + # cleanin item's cashed results from any level of setups + _remove_cached_results_from_failed_fixtures(item) + _remove_failed_setup_state_from_session(item) - break # trigger rerun + break # trigger rerun else: need_to_run = False diff --git a/tests/test_pytest_rerunfailures.py b/tests/test_pytest_rerunfailures.py index afb706f..4122fdf 100644 --- a/tests/test_pytest_rerunfailures.py +++ b/tests/test_pytest_rerunfailures.py @@ -1357,3 +1357,213 @@ def test_1(session_fixture, function_fixture): result = testdir.runpytest() assert_outcomes(result, passed=0, failed=1, rerun=1) result.stdout.fnmatch_lines("session teardown") + + +def test_all_reruns_need_to_pass_disabled_by_default(testdir): + """Test that default behavior is unchanged (flag disabled by default).""" + testdir.makepyfile( + """ + import py + def test_default_behavior(): + path = py.path.local(__file__).dirpath().ensure('test.res') + count = int(path.read() or 0) + path.write(count + 1) + if count == 0: + raise Exception('Fail on first attempt') + # Pass on second attempt + """ + ) + result = testdir.runpytest("--reruns", "3") + # Should pass because one successful rerun is enough (default behavior) + assert_outcomes(result, passed=1, rerun=1) + + +def test_all_reruns_need_to_pass_all_pass(testdir): + """Test that when all reruns pass, test passes.""" + testdir.makepyfile( + """ + import py + def test_all_pass(): + path = py.path.local(__file__).dirpath().ensure('test.res') + count = int(path.read() or 0) + path.write(count + 1) + if count == 0: + raise Exception('Fail on first attempt') + # Pass on all subsequent attempts + """ + ) + result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass") + # Should pass because all 3 reruns pass + assert_outcomes(result, passed=1, rerun=3) + + +def test_all_reruns_need_to_pass_some_fail(testdir): + """Test that when some reruns fail, test fails.""" + testdir.makepyfile( + """ + import py + def test_some_fail(): + path = py.path.local(__file__).dirpath().ensure('test.res') + count = int(path.read() or 0) + path.write(count + 1) + # Fail on attempt 0 (initial) and attempt 2 (second rerun) + if count == 0 or count == 2: + raise Exception(f'Fail on attempt {count}') + # Pass on attempts 1 and 3 + """ + ) + result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass") + # Should fail because rerun 2 fails + assert_outcomes(result, passed=0, failed=1, rerun=3) + + +def test_all_reruns_need_to_pass_all_fail(testdir): + """Test that when all reruns fail, test fails.""" + testdir.makepyfile( + """ + def test_all_fail(): + raise Exception('Always fail') + """ + ) + result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass") + # Should fail because all reruns fail + assert_outcomes(result, passed=0, failed=1, rerun=3) + + +def test_all_reruns_need_to_pass_marker_override(testdir): + """Test that marker can override command-line flag.""" + testdir.makepyfile( + """ + import pytest + import py + + @pytest.mark.flaky(reruns=3, all_reruns_need_to_pass=True) + def test_marker_override(): + path = py.path.local(__file__).dirpath().ensure('test.res') + count = int(path.read() or 0) + path.write(count + 1) + if count == 0: + raise Exception('Fail on first attempt') + # Pass on all subsequent attempts + """ + ) + # Not passing --all-reruns-need-to-pass on command line, but marker enables it + result = testdir.runpytest("--verbose") + assert_outcomes(result, passed=1, rerun=3) + + +def test_all_reruns_need_to_pass_marker_can_disable(testdir): + """Test that marker can disable flag even when set on command line.""" + testdir.makepyfile( + """ + import pytest + import py + + @pytest.mark.flaky(reruns=3, all_reruns_need_to_pass=False) + def test_marker_disables(): + path = py.path.local(__file__).dirpath().ensure('test.res') + count = int(path.read() or 0) + path.write(count + 1) + if count == 0: + raise Exception('Fail on first attempt') + # Pass on second attempt + """ + ) + # Passing --all-reruns-need-to-pass, but marker disables it + result = testdir.runpytest("--all-reruns-need-to-pass", "--verbose") + # Should pass with just one successful rerun (default behavior) + assert_outcomes(result, passed=1, rerun=1) + + +def test_all_reruns_need_to_pass_with_only_rerun(testdir): + """Test interaction with --only-rerun flag.""" + testdir.makepyfile( + """ + import py + def test_with_only_rerun(): + path = py.path.local(__file__).dirpath().ensure('test.res') + count = int(path.read() or 0) + path.write(count + 1) + if count == 0: + raise ValueError('Fail on first attempt') + # Pass on subsequent attempts + """ + ) + result = testdir.runpytest( + "--reruns", "3", + "--all-reruns-need-to-pass", + "--only-rerun", "ValueError" + ) + # Should pass because all reruns pass + assert_outcomes(result, passed=1, rerun=3) + + +def test_all_reruns_need_to_pass_initial_pass(testdir): + """Test that flag has no effect if test passes on first attempt.""" + testdir.makepyfile( + """ + def test_initial_pass(): + pass # Always passes + """ + ) + result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass") + # Should pass without any reruns + assert_outcomes(result, passed=1, rerun=0) + + +def test_all_reruns_need_to_pass_zero_reruns(testdir): + """Test that flag has no effect with reruns=0.""" + testdir.makepyfile( + """ + def test_zero_reruns(): + raise Exception('Fail') + """ + ) + result = testdir.runpytest("--reruns", "0", "--all-reruns-need-to-pass") + # Should fail without any reruns + assert_outcomes(result, passed=0, failed=1, rerun=0) + + +def test_all_reruns_need_to_pass_setup_failure(testdir): + """Test behavior when setup fails - only call phase is tracked.""" + testdir.makepyfile( + """ + import pytest + import py + + @pytest.fixture + def failing_fixture(): + path = py.path.local(__file__).dirpath().ensure('test.res') + count = int(path.read() or 0) + path.write(count + 1) + if count < 2: + raise Exception(f'Fixture fails on attempt {count}') + return "ok" + + def test_setup_failure(failing_fixture): + pass + """ + ) + result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass") + # Note: all_reruns_need_to_pass only tracks call phase failures, not setup/teardown + # Setup eventually passes, so test passes + assert_outcomes(result, passed=1, rerun=2) + + +def test_all_reruns_need_to_pass_command_line(testdir): + """Test that command line flag works as expected.""" + testdir.makepyfile( + """ + import py + def test_cli_flag(): + path = py.path.local(__file__).dirpath().ensure('test.res') + count = int(path.read() or 0) + path.write(count + 1) + if count == 0: + raise Exception('Fail on first attempt') + # Pass on all subsequent attempts + """ + ) + result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass") + # Should pass because all reruns pass + assert_outcomes(result, passed=1, rerun=3)