Merge branch 'main' into cf-576

misrasaurabh1 · web-flow · commit 5d883a9be0bc · 2025-03-20T18:17:32.000-07:00
diff --git a/codeflash/discovery/pytest_new_process_discovery.py b/codeflash/discovery/pytest_new_process_discovery.py
@@ -16,6 +16,11 @@ def pytest_collection_finish(self, session) -> None:
         collected_tests.extend(session.items)
         pytest_rootdir = session.config.rootdir
 
+    def pytest_collection_modifyitems(config, items):
+        skip_benchmark = pytest.mark.skip(reason="Skipping benchmark tests")
+        for item in items:
+            if "benchmark" in item.fixturenames:
+                item.add_marker(skip_benchmark)
 
 def parse_pytest_collection_results(pytest_tests: list[Any]) -> list[dict[str, str]]:
     test_results = []
@@ -34,7 +39,7 @@ def parse_pytest_collection_results(pytest_tests: list[Any]) -> list[dict[str, s
 
     try:
         exitcode = pytest.main(
-            [tests_root, "-pno:logging", "--collect-only", "-m", "not skip"], plugins=[PytestCollectionPlugin()]
+            [tests_root, "-p no:logging", "--collect-only", "-m", "not skip",], plugins=[PytestCollectionPlugin()]
         )
     except Exception as e:  # noqa: BLE001
         print(f"Failed to collect tests: {e!s}")  # noqa: T201
diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
@@ -16,6 +16,8 @@
 if TYPE_CHECKING:
     from codeflash.models.models import TestFiles
 
+BEHAVIORAL_BLOCKLISTED_PLUGINS = ["benchmark"]
+BENCHMARKING_BLOCKLISTED_PLUGINS = ["codspeed", "cov", "benchmark", "profiling"]
 
 def execute_test_subprocess(
     cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600
@@ -87,16 +89,18 @@ def run_behavioral_tests(
             else:
                 coverage_cmd.extend(shlex.split(pytest_cmd, posix=IS_POSIX)[1:])
 
+            blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"]
             results = execute_test_subprocess(
-                coverage_cmd + common_pytest_args + result_args + test_files, cwd=cwd, env=pytest_test_env, timeout=600
+                coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files, cwd=cwd, env=pytest_test_env, timeout=600
             )
             logger.debug(
                 f"Result return code: {results.returncode}, "
                 f"{'Result stderr:' + str(results.stderr) if results.stderr else ''}"
             )
         else:
+            blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS]
             results = execute_test_subprocess(
-                pytest_cmd_list + common_pytest_args + result_args + test_files,
+                pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files,
                 cwd=cwd,
                 env=pytest_test_env,
                 timeout=600,  # TODO: Make this dynamic
@@ -170,8 +174,10 @@ def run_benchmarking_tests(
         result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]
         pytest_test_env = test_env.copy()
         pytest_test_env["PYTEST_PLUGINS"] = "codeflash.verification.pytest_plugin"
+        blocklist_args = [f"-p no:{plugin}" for plugin in BENCHMARKING_BLOCKLISTED_PLUGINS]
+
         results = execute_test_subprocess(
-            pytest_cmd_list + pytest_args + result_args + test_files,
+            pytest_cmd_list + pytest_args + blocklist_args + result_args + test_files,
             cwd=cwd,
             env=pytest_test_env,
             timeout=600,  # TODO: Make this dynamic
diff --git a/docs/docs/how-codeflash-works.md b/docs/docs/how-codeflash-works.md
@@ -25,7 +25,7 @@ To optimize code, Codeflash first gathers all necessary context from the codebas
 
 ## Verification of correctness
 
-![Verification](/img/verification.svg)
+![Verification](/img/codeflash_arch_diagram.gif)
 
 The goal of correctness verification is to ensure that when the original code is replaced by the new code, there are no behavioral changes in the code and the rest of the system. This means the replacement should be completely safe.
 
@@ -60,4 +60,4 @@ Codeflash implements several techniques to measure code performance accurately.
 
 ## Creating Pull Requests
 
-Once an optimization passes all checks, Codeflash creates a pull request through the Codeflash GitHub app directly in your repository. The pull request includes the new code, the speedup percentage, an explanation of the optimization, test statistics including coverage, and the test content itself. You can review and merge the new code if it meets your standards. Feel free to modify the code as needed—we welcome your improvements!
+Once an optimization passes all checks, Codeflash creates a pull request through the Codeflash GitHub app directly in your repository. The pull request includes the new code, the speedup percentage, an explanation of the optimization, test statistics including coverage, and the test content itself. You can review and merge the new code if it meets your standards. Feel free to modify the code as needed—we welcome your improvements!
diff --git a/docs/static/img/CodeFlash_arch_diagram.gif b/docs/static/img/CodeFlash_arch_diagram.gif
diff --git a/tests/test_unit_test_discovery.py b/tests/test_unit_test_discovery.py
@@ -34,6 +34,48 @@ def test_unit_test_discovery_unittest():
     # assert len(tests) > 0
     # Unittest discovery within a pytest environment does not work
 
+def test_benchmark_unit_test_discovery_pytest():
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # Create a dummy test file
+        test_file_path = Path(tmpdirname) / "test_dummy.py"
+        test_file_content = """
+from bubble_sort import sorter
+
+def test_benchmark_sort(benchmark):
+     benchmark(sorter, [5, 4, 3, 2, 1, 0])
+
+def test_normal_test():
+    assert sorter(list(reversed(range(100)))) == list(range(100))
+
+def test_normal_test2():
+    assert sorter(list(reversed(range(100)))) == list(range(100))"""
+        test_file_path.write_text(test_file_content)
+        path_obj_tempdirname = Path(tmpdirname)
+
+        # Create a file that the test file is testing
+        code_file_path = path_obj_tempdirname / "bubble_sort.py"
+        code_file_content = """
+def sorter(arr):
+    return sorted(arr)"""
+        code_file_path.write_text(code_file_content)
+
+        # Create a TestConfig with the temporary directory as the root
+        test_config = TestConfig(
+            tests_root=path_obj_tempdirname,
+            project_root_path=path_obj_tempdirname,
+            test_framework="pytest",
+            tests_project_rootdir=path_obj_tempdirname.parent,
+        )
+
+        # Discover tests
+        tests = discover_unit_tests(test_config)
+        assert len(tests) == 1
+        assert 'bubble_sort.sorter' in tests
+        assert len(tests['bubble_sort.sorter']) == 2
+        functions = [test.tests_in_file.test_function for test in tests['bubble_sort.sorter']]
+        assert 'test_normal_test' in functions
+        assert 'test_normal_test2' in functions
+        assert 'test_benchmark_sort' not in functions
 
 def test_discover_tests_pytest_with_temp_dir_root():
     with tempfile.TemporaryDirectory() as tmpdirname: