|
10 | 10 | #include "pycore_initconfig.h" // _PyStatus_OK() |
11 | 11 | #include "pycore_interp.h" // PyInterpreterState.co_extra_freefuncs |
12 | 12 | #include "pycore_object.h" // _PyObject_SetDeferredRefcount |
| 13 | +#include "pycore_object_stack.h" |
13 | 14 | #include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches |
14 | 15 | #include "pycore_opcode_utils.h" // RESUME_AT_FUNC_START |
15 | 16 | #include "pycore_pymem.h" // _PyMem_FreeDelayed |
@@ -2824,4 +2825,138 @@ _PyCode_GetTLBC(PyCodeObject *co) |
2824 | 2825 | return result; |
2825 | 2826 | } |
2826 | 2827 |
|
| 2828 | +// My kingdom for a bitset |
| 2829 | +struct flag_set { |
| 2830 | + uint8_t *flags; |
| 2831 | + Py_ssize_t size; |
| 2832 | +}; |
| 2833 | + |
| 2834 | +static inline int |
| 2835 | +flag_is_set(struct flag_set *flags, Py_ssize_t idx) |
| 2836 | +{ |
| 2837 | + assert(idx >= 0); |
| 2838 | + return (idx < flags->size) && flags->flags[idx]; |
| 2839 | +} |
| 2840 | + |
| 2841 | +// Set the flag for each tlbc index in use |
| 2842 | +static int |
| 2843 | +get_indices_in_use(PyInterpreterState *interp, struct flag_set *in_use) |
| 2844 | +{ |
| 2845 | + assert(interp->stoptheworld.world_stopped); |
| 2846 | + assert(in_use->flags == NULL); |
| 2847 | + int32_t max_index = 0; |
| 2848 | + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { |
| 2849 | + int32_t idx = ((_PyThreadStateImpl *) p)->tlbc_index; |
| 2850 | + if (idx > max_index) { |
| 2851 | + max_index = idx; |
| 2852 | + } |
| 2853 | + } |
| 2854 | + in_use->size = (size_t) max_index + 1; |
| 2855 | + in_use->flags = PyMem_Calloc(in_use->size, sizeof(*in_use->flags)); |
| 2856 | + if (in_use->flags == NULL) { |
| 2857 | + return -1; |
| 2858 | + } |
| 2859 | + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { |
| 2860 | + in_use->flags[((_PyThreadStateImpl *) p)->tlbc_index] = 1; |
| 2861 | + } |
| 2862 | + return 0; |
| 2863 | +} |
| 2864 | + |
| 2865 | +struct get_code_args { |
| 2866 | + _PyObjectStack code_objs; |
| 2867 | + struct flag_set indices_in_use; |
| 2868 | + int err; |
| 2869 | +}; |
| 2870 | + |
| 2871 | +static void |
| 2872 | +clear_get_code_args(struct get_code_args *args) |
| 2873 | +{ |
| 2874 | + if (args->indices_in_use.flags != NULL) { |
| 2875 | + PyMem_Free(args->indices_in_use.flags); |
| 2876 | + args->indices_in_use.flags = NULL; |
| 2877 | + } |
| 2878 | + _PyObjectStack_Clear(&args->code_objs); |
| 2879 | +} |
| 2880 | + |
| 2881 | +static inline int |
| 2882 | +is_bytecode_unused(_PyCodeArray *tlbc, Py_ssize_t idx, |
| 2883 | + struct flag_set *indices_in_use) |
| 2884 | +{ |
| 2885 | + assert(idx > 0 && idx < tlbc->size); |
| 2886 | + return tlbc->entries[idx] != NULL && !flag_is_set(indices_in_use, idx); |
| 2887 | +} |
| 2888 | + |
| 2889 | +static int |
| 2890 | +get_code_with_unused_tlbc(PyObject *obj, struct get_code_args *args) |
| 2891 | +{ |
| 2892 | + if (!PyCode_Check(obj)) { |
| 2893 | + return 1; |
| 2894 | + } |
| 2895 | + PyCodeObject *co = (PyCodeObject *) obj; |
| 2896 | + _PyCodeArray *tlbc = co->co_tlbc; |
| 2897 | + // The first index always points at the main copy of the bytecode embedded |
| 2898 | + // in the code object. |
| 2899 | + for (Py_ssize_t i = 1; i < tlbc->size; i++) { |
| 2900 | + if (is_bytecode_unused(tlbc, i, &args->indices_in_use)) { |
| 2901 | + if (_PyObjectStack_Push(&args->code_objs, obj) < 0) { |
| 2902 | + args->err = -1; |
| 2903 | + return 0; |
| 2904 | + } |
| 2905 | + return 1; |
| 2906 | + } |
| 2907 | + } |
| 2908 | + return 1; |
| 2909 | +} |
| 2910 | + |
| 2911 | +static void |
| 2912 | +free_unused_bytecode(PyCodeObject *co, struct flag_set *indices_in_use) |
| 2913 | +{ |
| 2914 | + _PyCodeArray *tlbc = co->co_tlbc; |
| 2915 | + // The first index always points at the main copy of the bytecode embedded |
| 2916 | + // in the code object. |
| 2917 | + for (Py_ssize_t i = 1; i < tlbc->size; i++) { |
| 2918 | + if (is_bytecode_unused(tlbc, i, indices_in_use)) { |
| 2919 | + PyMem_Free(tlbc->entries[i]); |
| 2920 | + tlbc->entries[i] = NULL; |
| 2921 | + } |
| 2922 | + } |
| 2923 | +} |
| 2924 | + |
| 2925 | +int |
| 2926 | +_Py_ClearUnusedTLBC(PyInterpreterState *interp) |
| 2927 | +{ |
| 2928 | + struct get_code_args args = { |
| 2929 | + .code_objs = {NULL}, |
| 2930 | + .indices_in_use = {NULL, 0}, |
| 2931 | + .err = 0, |
| 2932 | + }; |
| 2933 | + _PyEval_StopTheWorld(interp); |
| 2934 | + // Collect in-use tlbc indices |
| 2935 | + if (get_indices_in_use(interp, &args.indices_in_use) < 0) { |
| 2936 | + goto err; |
| 2937 | + } |
| 2938 | + // Collect code objects that have bytecode not in use by any thread |
| 2939 | + _PyGC_VisitObjectsWorldStopped( |
| 2940 | + interp, (gcvisitobjects_t)get_code_with_unused_tlbc, &args); |
| 2941 | + if (args.err < 0) { |
| 2942 | + goto err; |
| 2943 | + } |
| 2944 | + // Free unused bytecode. This must happen outside of gc_visit_heaps; it is |
| 2945 | + // unsafe to allocate or free any mimalloc managed memory when it's |
| 2946 | + // running. |
| 2947 | + PyObject *obj; |
| 2948 | + while ((obj = _PyObjectStack_Pop(&args.code_objs)) != NULL) { |
| 2949 | + free_unused_bytecode((PyCodeObject*) obj, &args.indices_in_use); |
| 2950 | + } |
| 2951 | + _PyEval_StartTheWorld(interp); |
| 2952 | + clear_get_code_args(&args); |
| 2953 | + return 0; |
| 2954 | + |
| 2955 | +err: |
| 2956 | + _PyEval_StartTheWorld(interp); |
| 2957 | + clear_get_code_args(&args); |
| 2958 | + PyErr_NoMemory(); |
| 2959 | + return -1; |
| 2960 | +} |
| 2961 | + |
2827 | 2962 | #endif |
0 commit comments