codeflash-ai · alvin-r · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/codeflash/discovery/pytest_new_process_discovery.py b/codeflash/discovery/pytest_new_process_discovery.py
@@ -16,6 +16,11 @@ def pytest_collection_finish(self, session) -> None:
         collected_tests.extend(session.items)
         pytest_rootdir = session.config.rootdir
 
+    def pytest_collection_modifyitems(config, items):
+        skip_benchmark = pytest.mark.skip(reason="Skipping benchmark tests")
+        for item in items:
+            if "benchmark" in item.fixturenames:
+                item.add_marker(skip_benchmark)
 
 def parse_pytest_collection_results(pytest_tests: list[Any]) -> list[dict[str, str]]:
     test_results = []
@@ -34,7 +39,7 @@ def parse_pytest_collection_results(pytest_tests: list[Any]) -> list[dict[str, s
 
     try:
         exitcode = pytest.main(
-            [tests_root, "-pno:logging", "--collect-only", "-m", "not skip"], plugins=[PytestCollectionPlugin()]
+            [tests_root, "-p no:logging", "--collect-only", "-m", "not skip",], plugins=[PytestCollectionPlugin()]
         )
     except Exception as e:  # noqa: BLE001
         print(f"Failed to collect tests: {e!s}")  # noqa: T201

diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
@@ -16,6 +16,8 @@
 if TYPE_CHECKING:
     from codeflash.models.models import TestFiles
 
+BEHAVIORAL_BLOCKLISTED_PLUGINS = ["benchmark"]
+BENCHMARKING_BLOCKLISTED_PLUGINS = ["cov", "benchmark", "profiling"]
 
 def execute_test_subprocess(
     cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600
@@ -87,16 +89,18 @@ def run_behavioral_tests(
             else:
                 coverage_cmd.extend(shlex.split(pytest_cmd, posix=IS_POSIX)[1:])
 
+            blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"]
             results = execute_test_subprocess(
-                coverage_cmd + common_pytest_args + result_args + test_files, cwd=cwd, env=pytest_test_env, timeout=600
+                coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files, cwd=cwd, env=pytest_test_env, timeout=600
             )
             logger.debug(
                 f"Result return code: {results.returncode}, "
                 f"{'Result stderr:' + str(results.stderr) if results.stderr else ''}"
             )
         else:
+            blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS]
             results = execute_test_subprocess(
-                pytest_cmd_list + common_pytest_args + result_args + test_files,
+                pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files,
                 cwd=cwd,
                 env=pytest_test_env,
                 timeout=600,  # TODO: Make this dynamic
@@ -170,8 +174,10 @@ def run_benchmarking_tests(
         result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]
         pytest_test_env = test_env.copy()
         pytest_test_env["PYTEST_PLUGINS"] = "codeflash.verification.pytest_plugin"
+        blocklist_args = [f"-p no:{plugin}" for plugin in BENCHMARKING_BLOCKLISTED_PLUGINS]
+
         results = execute_test_subprocess(
-            pytest_cmd_list + pytest_args + result_args + test_files,
+            pytest_cmd_list + pytest_args + blocklist_args + result_args + test_files,
             cwd=cwd,
             env=pytest_test_env,
             timeout=600,  # TODO: Make this dynamic

diff --git a/tests/test_unit_test_discovery.py b/tests/test_unit_test_discovery.py
@@ -34,6 +34,48 @@ def test_unit_test_discovery_unittest():
     # assert len(tests) > 0
     # Unittest discovery within a pytest environment does not work
 
+def test_benchmark_unit_test_discovery_pytest():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # Create a dummy test file
+        test_file_path = Path(tmpdirname) / "test_dummy.py"
+        test_file_content = """
+from bubble_sort import sorter
+
+def test_benchmark_sort(benchmark):
+     benchmark(sorter, [5, 4, 3, 2, 1, 0])
+
+def test_normal_test():
+    assert sorter(list(reversed(range(100)))) == list(range(100))
+
+def test_normal_test2():
+    assert sorter(list(reversed(range(100)))) == list(range(100))"""
+        test_file_path.write_text(test_file_content)
+        path_obj_tempdirname = Path(tmpdirname)
+
+        # Create a file that the test file is testing
+        code_file_path = path_obj_tempdirname / "bubble_sort.py"
+        code_file_content = """
+def sorter(arr):
+    return sorted(arr)"""
+        code_file_path.write_text(code_file_content)
+
+        # Create a TestConfig with the temporary directory as the root
+        test_config = TestConfig(
+            tests_root=path_obj_tempdirname,
+            project_root_path=path_obj_tempdirname,
+            test_framework="pytest",
+            tests_project_rootdir=path_obj_tempdirname.parent,
+        )
+
+        # Discover tests
+        tests = discover_unit_tests(test_config)
+        assert len(tests) == 1
+        assert 'bubble_sort.sorter' in tests
+        assert len(tests['bubble_sort.sorter']) == 2
+        functions = [test.tests_in_file.test_function for test in tests['bubble_sort.sorter']]
+        assert 'test_normal_test' in functions
+        assert 'test_normal_test2' in functions
+        assert 'test_benchmark_sort' not in functions
 
 def test_discover_tests_pytest_with_temp_dir_root():
     with tempfile.TemporaryDirectory() as tmpdirname: