From 860ca693e99e9ad8003958fe08f94dc7ef686122 Mon Sep 17 00:00:00 2001
From: Manuel Saelices <msaelices@gmail.com>
Date: Thu, 2 Oct 2025 18:23:12 +0200
Subject: [PATCH] New `--all-reruns-need-to-pass` argument

This addresses your requirement to verify that non-deterministic tests (that work ~90% of the time) pass consistently by requiring all reruns to pass after an initial failure.

Signed-off-by: Manuel Saelices <msaelices@gmail.com>
---
 src/pytest_rerunfailures.py        | 111 +++++++++++++--
 tests/test_pytest_rerunfailures.py | 210 +++++++++++++++++++++++++++++
 2 files changed, 307 insertions(+), 14 deletions(-)

diff --git a/src/pytest_rerunfailures.py b/src/pytest_rerunfailures.py
index d17a1a7..0fd46d4 100644
--- a/src/pytest_rerunfailures.py
+++ b/src/pytest_rerunfailures.py
@@ -91,10 +91,18 @@ def pytest_addoption(parser):
         dest="fail_on_flaky",
         help="Fail the test run with exit code 7 if a flaky test passes on a rerun.",
     )
+    group._addoption(
+        "--all-reruns-need-to-pass",
+        action="store_true",
+        dest="all_reruns_need_to_pass",
+        default=False,
+        help="If enabled, after an initial failure, all reruns must pass for the test to succeed.",
+    )
 
     arg_type = "string"
     parser.addini("reruns", RERUNS_DESC, type=arg_type)
     parser.addini("reruns_delay", RERUNS_DELAY_DESC, type=arg_type)
+    parser.addini("all_reruns_need_to_pass", "If enabled, all reruns must pass after initial failure", type=arg_type)
 
 
 # making sure the options make sense
@@ -162,6 +170,33 @@ def get_reruns_delay(item):
     return delay
 
 
+def get_all_reruns_need_to_pass(item):
+    """Get whether all reruns need to pass from marker, config, or ini."""
+    rerun_marker = _get_marker(item)
+
+    # Check marker kwargs first
+    if rerun_marker is not None and "all_reruns_need_to_pass" in rerun_marker.kwargs:
+        return rerun_marker.kwargs["all_reruns_need_to_pass"]
+
+    # Check command-line option
+    all_need_pass = item.session.config.getvalue("all_reruns_need_to_pass")
+    if all_need_pass is not None:
+        return all_need_pass
+
+    # Check ini value
+    try:
+        all_need_pass = item.session.config.getini("all_reruns_need_to_pass")
+        if all_need_pass:
+            # Parse string values like "True", "true", "1", etc.
+            if isinstance(all_need_pass, str):
+                return all_need_pass.lower() in ("true", "1", "yes", "on")
+            return bool(all_need_pass)
+    except (TypeError, ValueError):
+        pass
+
+    return False
+
+
 def get_reruns_condition(item):
     rerun_marker = _get_marker(item)
 
@@ -324,9 +359,9 @@ def pytest_configure(config):
     # add flaky marker
     config.addinivalue_line(
         "markers",
-        "flaky(reruns=1, reruns_delay=0): mark test to re-run up "
+        "flaky(reruns=1, reruns_delay=0, all_reruns_need_to_pass=False): mark test to re-run up "
         "to 'reruns' times. Add a delay of 'reruns_delay' seconds "
-        "between re-runs.",
+        "between re-runs. If 'all_reruns_need_to_pass' is True, all reruns must pass.",
     )
 
     if config.pluginmanager.hasplugin("xdist") and HAS_PYTEST_HANDLECRASHITEM:
@@ -550,6 +585,7 @@ def pytest_runtest_protocol(item, nextitem):
     # first item if necessary
     check_options(item.session.config)
     delay = get_reruns_delay(item)
+    all_reruns_need_to_pass = get_all_reruns_need_to_pass(item)
     parallel = not is_master(item.config)
     db = item.session.config.failures_db
     item.execution_count = db.get_test_failures(item.nodeid)
@@ -558,6 +594,10 @@ def pytest_runtest_protocol(item, nextitem):
     if item.execution_count > reruns:
         return True
 
+    # Track rerun results when all reruns need to pass
+    initial_failure_occurred = False
+    rerun_results = []  # Track result of each rerun (True=passed, False=failed)
+
     need_to_run = True
     while need_to_run:
         item.execution_count += 1
@@ -566,23 +606,66 @@ def pytest_runtest_protocol(item, nextitem):
 
         for report in reports:  # 3 reports: setup, call, teardown
             report.rerun = item.execution_count - 1
-            if _should_not_rerun(item, report, reruns):
-                # last run or no failure detected, log normally
-                item.ihook.pytest_runtest_logreport(report=report)
+
+            # Track initial failure for all_reruns_need_to_pass mode
+            if all_reruns_need_to_pass and report.when == "call" and report.failed and item.execution_count == 1:
+                initial_failure_occurred = True
+
+            # Track rerun results (after initial failure) - only track call phase
+            if all_reruns_need_to_pass and initial_failure_occurred and item.execution_count > 1 and report.when == "call":
+                rerun_results.append(not report.failed)  # True if passed, False if failed
+
+            # In all_reruns_need_to_pass mode with initial failure, override normal behavior
+            if all_reruns_need_to_pass and initial_failure_occurred:
+                # execution_count starts at 1, so:
+                # - execution_count==1: initial run (failed)
+                # - execution_count==2..reruns+1: reruns (must run all of them)
+                is_last_rerun = item.execution_count > reruns
+
+                if is_last_rerun:
+                    # Last run, check if all reruns passed
+                    if any(not r for r in rerun_results):
+                        # At least one rerun failed, mark final outcome as failed
+                        if report.when == "call":
+                            report.outcome = "failed"
+                    # Log the final report
+                    item.ihook.pytest_runtest_logreport(report=report)
+                else:
+                    # Not the last rerun yet
+                    # Only trigger rerun after processing the call phase
+                    if report.when == "call":
+                        report.outcome = "rerun"
+                        time.sleep(delay)
+                        if not parallel or works_with_current_xdist():
+                            item.ihook.pytest_runtest_logreport(report=report)
+
+                        _remove_cached_results_from_failed_fixtures(item)
+                        _remove_failed_setup_state_from_session(item)
+                        break  # trigger rerun
+                    else:
+                        # For setup/teardown, just log normally
+                        item.ihook.pytest_runtest_logreport(report=report)
             else:
-                # failure detected and reruns not exhausted, since i < reruns
-                report.outcome = "rerun"
-                time.sleep(delay)
+                # Normal rerun behavior
+                should_not_rerun = _should_not_rerun(item, report, reruns)
 
-                if not parallel or works_with_current_xdist():
-                    # will rerun test, log intermediate result
+                if should_not_rerun:
+                    # last run or no failure detected, log normally
                     item.ihook.pytest_runtest_logreport(report=report)
+                else:
+                    # failure detected and reruns not exhausted
+                    report.outcome = "rerun"
+                    time.sleep(delay)
+
+                    if not parallel or works_with_current_xdist():
+                        # will rerun test, log intermediate result
+                        item.ihook.pytest_runtest_logreport(report=report)
 
-                # cleanin item's cashed results from any level of setups
-                _remove_cached_results_from_failed_fixtures(item)
-                _remove_failed_setup_state_from_session(item)
+                    # cleanin item's cashed results from any level of setups
+                    _remove_cached_results_from_failed_fixtures(item)
+                    _remove_failed_setup_state_from_session(item)
 
-                break  # trigger rerun
+                    break  # trigger rerun
         else:
             need_to_run = False
 
diff --git a/tests/test_pytest_rerunfailures.py b/tests/test_pytest_rerunfailures.py
index afb706f..4122fdf 100644
--- a/tests/test_pytest_rerunfailures.py
+++ b/tests/test_pytest_rerunfailures.py
@@ -1357,3 +1357,213 @@ def test_1(session_fixture, function_fixture):
     result = testdir.runpytest()
     assert_outcomes(result, passed=0, failed=1, rerun=1)
     result.stdout.fnmatch_lines("session teardown")
+
+
+def test_all_reruns_need_to_pass_disabled_by_default(testdir):
+    """Test that default behavior is unchanged (flag disabled by default)."""
+    testdir.makepyfile(
+        """
+        import py
+        def test_default_behavior():
+            path = py.path.local(__file__).dirpath().ensure('test.res')
+            count = int(path.read() or 0)
+            path.write(count + 1)
+            if count == 0:
+                raise Exception('Fail on first attempt')
+            # Pass on second attempt
+        """
+    )
+    result = testdir.runpytest("--reruns", "3")
+    # Should pass because one successful rerun is enough (default behavior)
+    assert_outcomes(result, passed=1, rerun=1)
+
+
+def test_all_reruns_need_to_pass_all_pass(testdir):
+    """Test that when all reruns pass, test passes."""
+    testdir.makepyfile(
+        """
+        import py
+        def test_all_pass():
+            path = py.path.local(__file__).dirpath().ensure('test.res')
+            count = int(path.read() or 0)
+            path.write(count + 1)
+            if count == 0:
+                raise Exception('Fail on first attempt')
+            # Pass on all subsequent attempts
+        """
+    )
+    result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass")
+    # Should pass because all 3 reruns pass
+    assert_outcomes(result, passed=1, rerun=3)
+
+
+def test_all_reruns_need_to_pass_some_fail(testdir):
+    """Test that when some reruns fail, test fails."""
+    testdir.makepyfile(
+        """
+        import py
+        def test_some_fail():
+            path = py.path.local(__file__).dirpath().ensure('test.res')
+            count = int(path.read() or 0)
+            path.write(count + 1)
+            # Fail on attempt 0 (initial) and attempt 2 (second rerun)
+            if count == 0 or count == 2:
+                raise Exception(f'Fail on attempt {count}')
+            # Pass on attempts 1 and 3
+        """
+    )
+    result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass")
+    # Should fail because rerun 2 fails
+    assert_outcomes(result, passed=0, failed=1, rerun=3)
+
+
+def test_all_reruns_need_to_pass_all_fail(testdir):
+    """Test that when all reruns fail, test fails."""
+    testdir.makepyfile(
+        """
+        def test_all_fail():
+            raise Exception('Always fail')
+        """
+    )
+    result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass")
+    # Should fail because all reruns fail
+    assert_outcomes(result, passed=0, failed=1, rerun=3)
+
+
+def test_all_reruns_need_to_pass_marker_override(testdir):
+    """Test that marker can override command-line flag."""
+    testdir.makepyfile(
+        """
+        import pytest
+        import py
+
+        @pytest.mark.flaky(reruns=3, all_reruns_need_to_pass=True)
+        def test_marker_override():
+            path = py.path.local(__file__).dirpath().ensure('test.res')
+            count = int(path.read() or 0)
+            path.write(count + 1)
+            if count == 0:
+                raise Exception('Fail on first attempt')
+            # Pass on all subsequent attempts
+        """
+    )
+    # Not passing --all-reruns-need-to-pass on command line, but marker enables it
+    result = testdir.runpytest("--verbose")
+    assert_outcomes(result, passed=1, rerun=3)
+
+
+def test_all_reruns_need_to_pass_marker_can_disable(testdir):
+    """Test that marker can disable flag even when set on command line."""
+    testdir.makepyfile(
+        """
+        import pytest
+        import py
+
+        @pytest.mark.flaky(reruns=3, all_reruns_need_to_pass=False)
+        def test_marker_disables():
+            path = py.path.local(__file__).dirpath().ensure('test.res')
+            count = int(path.read() or 0)
+            path.write(count + 1)
+            if count == 0:
+                raise Exception('Fail on first attempt')
+            # Pass on second attempt
+        """
+    )
+    # Passing --all-reruns-need-to-pass, but marker disables it
+    result = testdir.runpytest("--all-reruns-need-to-pass", "--verbose")
+    # Should pass with just one successful rerun (default behavior)
+    assert_outcomes(result, passed=1, rerun=1)
+
+
+def test_all_reruns_need_to_pass_with_only_rerun(testdir):
+    """Test interaction with --only-rerun flag."""
+    testdir.makepyfile(
+        """
+        import py
+        def test_with_only_rerun():
+            path = py.path.local(__file__).dirpath().ensure('test.res')
+            count = int(path.read() or 0)
+            path.write(count + 1)
+            if count == 0:
+                raise ValueError('Fail on first attempt')
+            # Pass on subsequent attempts
+        """
+    )
+    result = testdir.runpytest(
+        "--reruns", "3",
+        "--all-reruns-need-to-pass",
+        "--only-rerun", "ValueError"
+    )
+    # Should pass because all reruns pass
+    assert_outcomes(result, passed=1, rerun=3)
+
+
+def test_all_reruns_need_to_pass_initial_pass(testdir):
+    """Test that flag has no effect if test passes on first attempt."""
+    testdir.makepyfile(
+        """
+        def test_initial_pass():
+            pass  # Always passes
+        """
+    )
+    result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass")
+    # Should pass without any reruns
+    assert_outcomes(result, passed=1, rerun=0)
+
+
+def test_all_reruns_need_to_pass_zero_reruns(testdir):
+    """Test that flag has no effect with reruns=0."""
+    testdir.makepyfile(
+        """
+        def test_zero_reruns():
+            raise Exception('Fail')
+        """
+    )
+    result = testdir.runpytest("--reruns", "0", "--all-reruns-need-to-pass")
+    # Should fail without any reruns
+    assert_outcomes(result, passed=0, failed=1, rerun=0)
+
+
+def test_all_reruns_need_to_pass_setup_failure(testdir):
+    """Test behavior when setup fails - only call phase is tracked."""
+    testdir.makepyfile(
+        """
+        import pytest
+        import py
+
+        @pytest.fixture
+        def failing_fixture():
+            path = py.path.local(__file__).dirpath().ensure('test.res')
+            count = int(path.read() or 0)
+            path.write(count + 1)
+            if count < 2:
+                raise Exception(f'Fixture fails on attempt {count}')
+            return "ok"
+
+        def test_setup_failure(failing_fixture):
+            pass
+        """
+    )
+    result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass")
+    # Note: all_reruns_need_to_pass only tracks call phase failures, not setup/teardown
+    # Setup eventually passes, so test passes
+    assert_outcomes(result, passed=1, rerun=2)
+
+
+def test_all_reruns_need_to_pass_command_line(testdir):
+    """Test that command line flag works as expected."""
+    testdir.makepyfile(
+        """
+        import py
+        def test_cli_flag():
+            path = py.path.local(__file__).dirpath().ensure('test.res')
+            count = int(path.read() or 0)
+            path.write(count + 1)
+            if count == 0:
+                raise Exception('Fail on first attempt')
+            # Pass on all subsequent attempts
+        """
+    )
+    result = testdir.runpytest("--reruns", "3", "--all-reruns-need-to-pass")
+    # Should pass because all reruns pass
+    assert_outcomes(result, passed=1, rerun=3)