From 4c372784468c52aa966795031f08e0ef7a4720f8 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 8 Feb 2024 10:10:04 -0500 Subject: [PATCH 1/4] gh-115168: Add pystats counter for invalidated executors --- Include/cpython/pystats.h | 1 + Python/specialize.c | 1 + Tools/scripts/summarize_stats.py | 2 ++ 3 files changed, 4 insertions(+) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 0f50439b73848e..8ac8bd90f2dd60 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -115,6 +115,7 @@ typedef struct _optimization_stats { uint64_t inner_loop; uint64_t recursive_call; uint64_t low_confidence; + uint64_t executors_invalidated; UOpStats opcode[512]; uint64_t unsupported_opcode[256]; uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; diff --git a/Python/specialize.c b/Python/specialize.c index ea2638570f22d0..2c4dde14aaea3a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -235,6 +235,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop); fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call); fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence); + fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated); print_histogram(out, "Trace length", stats->trace_length_hist); print_histogram(out, "Trace run length", stats->trace_run_length_hist); diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 7891b9cf923d33..74cc33ae9d6d8b 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -387,6 +387,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]: inner_loop = self._data["Optimization inner loop"] recursive_call = self._data["Optimization recursive call"] low_confidence = self._data["Optimization low confidence"] + executors_invalidated = self._data["Executors invalidated"] return { "Optimization attempts": (attempts, None), @@ -398,6 +399,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]: "Inner loop found": (inner_loop, attempts), "Recursive call": (recursive_call, attempts), "Low confidence": (low_confidence, attempts), + "Executors invalidated": (executors_invalidated, created), "Traces executed": (executed, None), "Uops executed": (uops, executed), } From 3b5a58bbe2f8e31df8ef7404b672ff13f07f8e3d Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 9 Feb 2024 11:03:30 -0500 Subject: [PATCH 2/4] Be more precise about when executors are invalidated --- Include/cpython/optimizer.h | 3 ++- Include/internal/pycore_code.h | 2 ++ Python/optimizer.c | 21 ++++++++++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 3928eca583ba5b..5f0c83e3d40d59 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -87,8 +87,9 @@ void _Py_ExecutorClear(_PyExecutorObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); -PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj); +PyAPI_FUNC(int) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj); extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp); +int _Py_Executors_Count(PyInterpreterState *interp); /* For testing */ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index fdd5918228455d..7b448889d3c6c9 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -285,6 +285,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co); do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0) #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0) #define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) +#define OPT_STAT_ADD(name, value) do { if (_Py_stats) _Py_stats->optimization_stats.name += value; } while (0) #define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0) #define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0) #define OPT_HIST(length, name) \ @@ -311,6 +312,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0) #define GC_STAT_ADD(gen, name, n) ((void)0) #define OPT_STAT_INC(name) ((void)0) +#define OPT_STAT_ADD(name, value) ((void)0) #define UOP_STAT_INC(opname, name) ((void)0) #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) #define OPT_HIST(length, name) ((void)0) diff --git a/Python/optimizer.c b/Python/optimizer.c index ad9ac382d300ef..1dc86fa407015c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1135,10 +1135,12 @@ _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) /* Invalidate all executors that depend on `obj` * May cause other executors to be invalidated as well + * Returns the number of executors that were invalidated. */ -void +int _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) { + int count = 0; _PyBloomFilter obj_filter; _Py_BloomFilter_Init(&obj_filter); _Py_BloomFilter_Add(&obj_filter, obj); @@ -1149,9 +1151,11 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) _PyExecutorObject *next = exec->vm_data.links.next; if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { _Py_ExecutorClear(exec); + count++; } exec = next; } + return count; } /* Invalidate all executors */ @@ -1169,3 +1173,18 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp) } } } + +/* Return the number of executors */ +int +_Py_Executors_Count(PyInterpreterState *interp) +{ + int count = 0; + /* Walk the list of executors */ + for (_PyExecutorObject *exec = interp->executor_list_head; + exec != NULL; + exec = exec->vm_data.links.next) { + count++; + assert(exec->vm_data.valid); + } + return count; +} From cb800862620c4f85a58d224d192fd809b19fd24b Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 9 Feb 2024 12:59:54 -0500 Subject: [PATCH 3/4] Also increment in _PyInterpreterState_SetEvalFrameFunc --- Python/pystate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/pystate.c b/Python/pystate.c index 937c43033b068d..e81cc3633806f6 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2624,6 +2624,7 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, return; } if (eval_frame != NULL) { + OPT_STAT_ADD(executors_invalidated, _Py_Executors_Count(interp)); _Py_Executors_InvalidateAll(interp); } RARE_EVENT_INC(set_eval_frame_func); From db58a2f4c8e45901bad1f29caf05b802925f8c55 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 12 Feb 2024 11:33:29 -0500 Subject: [PATCH 4/4] Use a flag rather than counting externally --- Include/cpython/optimizer.h | 5 ++--- Include/internal/pycore_code.h | 2 -- Modules/_testinternalcapi.c | 2 +- Python/instrumentation.c | 6 +++--- Python/optimizer.c | 31 +++++++++---------------------- Python/optimizer_analysis.c | 2 +- Python/pylifecycle.c | 4 ++-- Python/pystate.c | 3 +-- Python/sysmodule.c | 2 +- 9 files changed, 20 insertions(+), 37 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 5f0c83e3d40d59..4641563dccd80d 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -87,9 +87,8 @@ void _Py_ExecutorClear(_PyExecutorObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); -PyAPI_FUNC(int) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj); -extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp); -int _Py_Executors_Count(PyInterpreterState *interp); +PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); +extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); /* For testing */ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 7b448889d3c6c9..fdd5918228455d 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -285,7 +285,6 @@ extern int _PyStaticCode_Init(PyCodeObject *co); do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0) #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0) #define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) -#define OPT_STAT_ADD(name, value) do { if (_Py_stats) _Py_stats->optimization_stats.name += value; } while (0) #define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0) #define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0) #define OPT_HIST(length, name) \ @@ -312,7 +311,6 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0) #define GC_STAT_ADD(gen, name, n) ((void)0) #define OPT_STAT_INC(name) ((void)0) -#define OPT_STAT_ADD(name, value) ((void)0) #define UOP_STAT_INC(opname, name) ((void)0) #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) #define OPT_HIST(length, name) ((void)0) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 3834f00009cea4..dd74cd1c45f2bd 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1033,7 +1033,7 @@ static PyObject * invalidate_executors(PyObject *self, PyObject *obj) { PyInterpreterState *interp = PyInterpreterState_Get(); - _Py_Executors_InvalidateDependency(interp, obj); + _Py_Executors_InvalidateDependency(interp, obj, 1); Py_RETURN_NONE; } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 533aece210202b..d13915c1b887a0 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1574,7 +1574,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) if (code->co_executors != NULL) { _PyCode_Clear_Executors(code); } - _Py_Executors_InvalidateDependency(interp, code); + _Py_Executors_InvalidateDependency(interp, code, 1); int code_len = (int)Py_SIZE(code); /* Exit early to avoid creating instrumentation * data for potential statically allocated code @@ -1794,7 +1794,7 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) return -1; } set_global_version(interp, new_version); - _Py_Executors_InvalidateAll(interp); + _Py_Executors_InvalidateAll(interp, 1); return instrument_all_executing_code_objects(interp); } @@ -1824,7 +1824,7 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent /* Force instrumentation update */ code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT; } - _Py_Executors_InvalidateDependency(interp, code); + _Py_Executors_InvalidateDependency(interp, code, 1); if (_Py_Instrument(code, interp)) { return -1; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 1dc86fa407015c..28ff7b7a9591aa 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1135,12 +1135,10 @@ _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj) /* Invalidate all executors that depend on `obj` * May cause other executors to be invalidated as well - * Returns the number of executors that were invalidated. */ -int -_Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) +void +_Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation) { - int count = 0; _PyBloomFilter obj_filter; _Py_BloomFilter_Init(&obj_filter); _Py_BloomFilter_Add(&obj_filter, obj); @@ -1151,16 +1149,17 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj) _PyExecutorObject *next = exec->vm_data.links.next; if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { _Py_ExecutorClear(exec); - count++; + if (is_invalidation) { + OPT_STAT_INC(executors_invalidated); + } } exec = next; } - return count; } /* Invalidate all executors */ void -_Py_Executors_InvalidateAll(PyInterpreterState *interp) +_Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) { while (interp->executor_list_head) { _PyExecutorObject *executor = interp->executor_list_head; @@ -1171,20 +1170,8 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp) else { _Py_ExecutorClear(executor); } + if (is_invalidation) { + OPT_STAT_INC(executors_invalidated); + } } } - -/* Return the number of executors */ -int -_Py_Executors_Count(PyInterpreterState *interp) -{ - int count = 0; - /* Walk the list of executors */ - for (_PyExecutorObject *exec = interp->executor_list_head; - exec != NULL; - exec = exec->vm_data.links.next) { - count++; - assert(exec->vm_data.valid); - } - return count; -} diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b14e6950b4a06b..6ee5509729f012 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -39,7 +39,7 @@ globals_watcher_callback(PyDict_WatchEvent event, PyObject* dict, { RARE_EVENT_STAT_INC(watched_globals_modification); assert(get_mutations(dict) < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS); - _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict); + _Py_Executors_InvalidateDependency(_PyInterpreterState_GET(), dict, 1); increment_mutations(dict); PyDict_Unwatch(GLOBALS_WATCHER_ID, dict); return 0; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 230018068d751c..d94cd08b948a21 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -612,7 +612,7 @@ builtins_dict_watcher(PyDict_WatchEvent event, PyObject *dict, PyObject *key, Py { PyInterpreterState *interp = _PyInterpreterState_GET(); if (interp->rare_events.builtin_dict < _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) { - _Py_Executors_InvalidateAll(interp); + _Py_Executors_InvalidateAll(interp, 1); } RARE_EVENT_INTERP_INC(interp, builtin_dict); return 0; @@ -1626,7 +1626,7 @@ finalize_modules(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; // Invalidate all executors and turn off tier 2 optimizer - _Py_Executors_InvalidateAll(interp); + _Py_Executors_InvalidateAll(interp, 0); Py_XDECREF(interp->optimizer); interp->optimizer = &_PyOptimizer_Default; diff --git a/Python/pystate.c b/Python/pystate.c index e81cc3633806f6..d9bf548798a04c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2624,8 +2624,7 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, return; } if (eval_frame != NULL) { - OPT_STAT_ADD(executors_invalidated, _Py_Executors_Count(interp)); - _Py_Executors_InvalidateAll(interp); + _Py_Executors_InvalidateAll(interp, 1); } RARE_EVENT_INC(set_eval_frame_func); interp->eval_frame = eval_frame; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 69b6d886ccc3e9..1bfd031fdd26b2 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2138,7 +2138,7 @@ sys__clear_internal_caches_impl(PyObject *module) /*[clinic end generated code: output=0ee128670a4966d6 input=253e741ca744f6e8]*/ { PyInterpreterState *interp = _PyInterpreterState_GET(); - _Py_Executors_InvalidateAll(interp); + _Py_Executors_InvalidateAll(interp, 0); PyType_ClearCache(); Py_RETURN_NONE; }