From 465f0c01f26dc76c82501b89d08f687878d46b0a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 28 Feb 2022 14:33:51 +0000 Subject: [PATCH 1/9] Use inline cache for BINARY_SUBSCR. Work in progress. --- Include/cpython/code.h | 2 ++ Include/internal/pycore_code.h | 31 ++++++++++++++++++++++- Include/opcode.h | 1 + Lib/opcode.py | 2 +- Python/ceval.c | 28 ++++++++++++--------- Python/specialize.c | 46 +++++++++++++++++++++++++--------- 6 files changed, 84 insertions(+), 26 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 21f8fe7ddad4a7..7609c96aadb08c 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -58,6 +58,7 @@ struct PyCodeObject { _Py_CODEUNIT *co_firstinstr; /* Pointer to first instruction, used for quickening. Unlike the other "hot" fields, this one is actually derived from co_code. */ + PyObject **_co_obj_cache; /* Array of borrowed references to objects, for specialized code. */ PyObject *co_exceptiontable; /* Byte string encoding exception handling table */ int co_flags; /* CO_..., see below */ int co_warmup; /* Warmup counter for quickening */ @@ -90,6 +91,7 @@ struct PyCodeObject { int co_nplaincellvars; /* number of non-arg cell variables */ int co_ncellvars; /* total number of cell variables */ int co_nfreevars; /* number of free variables */ + int _co_obj_cache_len; /* number of entries in _co_obj_cache */ // lazily-computed values PyObject *co_varnames; /* tuple of strings (local variable names) */ PyObject *co_cellvars; /* tuple of strings (cell variable names) */ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 0e401d61f76733..777496554fb7da 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -72,12 +72,32 @@ typedef struct { _Py_CODEUNIT counter; } _PyUnpackSequenceCache; + + + + + + + + + +typedef struct { + _Py_CODEUNIT counter; + _Py_CODEUNIT object; + _Py_CODEUNIT type_version; + _Py_CODEUNIT _t1; + _Py_CODEUNIT func_version; +} _PyBinarySubscrCache; + #define INLINE_CACHE_ENTRIES_BINARY_OP \ (sizeof(_PyBinaryOpCache) / sizeof(_Py_CODEUNIT)) #define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \ (sizeof(_PyUnpackSequenceCache) / sizeof(_Py_CODEUNIT)) +#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR \ + (sizeof(_PyBinarySubscrCache) / sizeof(_Py_CODEUNIT)) + /* Maximum size of code to quicken, in code units. */ #define MAX_SIZE_TO_QUICKEN 5000 @@ -98,6 +118,15 @@ _GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n) return &last_cache_plus_one[-1-n].entry; } +/* Returns a borrowed reference */ +static inline PyObject* +_PyQuickenedGetObject(const _Py_CODEUNIT *first_instr, uint16_t index) +{ + SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr; + assert(&last_cache_plus_one->code[0] == first_instr); + return last_cache_plus_one[-1-index].entry.obj.obj; +} + /* Following two functions form a pair. * * oparg_from_offset_and_index() is used to compute the oparg @@ -309,7 +338,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache); +extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, PyCodeObject *code); extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames, SpecializedCacheEntry *cache); diff --git a/Include/opcode.h b/Include/opcode.h index ae21d92a865e59..b805f220aa23e8 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -241,6 +241,7 @@ static uint32_t _PyOpcode_Jump[8] = { #define NB_INPLACE_XOR 25 static const uint8_t _PyOpcode_InlineCacheEntries[256] = { + [BINARY_SUBSCR] = 5, [UNPACK_SEQUENCE] = 1, [BINARY_OP] = 1, }; diff --git a/Lib/opcode.py b/Lib/opcode.py index 8fa71bf4d180a7..678cfba011fc5f 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -68,7 +68,7 @@ def jabs_op(name, op, entries=0): def_op('UNARY_INVERT', 15) -def_op('BINARY_SUBSCR', 25) +def_op('BINARY_SUBSCR', 25, 5) def_op('GET_LEN', 30) def_op('MATCH_MAPPING', 31) diff --git a/Python/ceval.c b/Python/ceval.c index fe757829729925..c7ab62edbafcbb 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2102,25 +2102,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int SET_TOP(res); if (res == NULL) goto error; + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); DISPATCH(); } TARGET(BINARY_SUBSCR_ADAPTIVE) { - SpecializedCacheEntry *cache = GET_CACHE(); - if (cache->adaptive.counter == 0) { + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; + if (cache->counter == 0) { PyObject *sub = TOP(); PyObject *container = SECOND(); next_instr--; - if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) { + if (_Py_Specialize_BinarySubscr(container, sub, next_instr, frame->f_code) < 0) { goto error; } DISPATCH(); } else { STAT_INC(BINARY_SUBSCR, deferred); - cache->adaptive.counter--; - assert(cache->adaptive.original_oparg == 0); - /* No need to set oparg here; it isn't used by BINARY_SUBSCR */ + cache->counter--; JUMP_TO_INSTRUCTION(BINARY_SUBSCR); } } @@ -2146,6 +2145,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_DECREF(sub); SET_TOP(res); Py_DECREF(list); + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); NOTRACE_DISPATCH(); } @@ -2170,6 +2170,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_DECREF(sub); SET_TOP(res); Py_DECREF(tuple); + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); NOTRACE_DISPATCH(); } @@ -2188,18 +2189,20 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_DECREF(sub); SET_TOP(res); Py_DECREF(dict); + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); DISPATCH(); } TARGET(BINARY_SUBSCR_GETITEM) { PyObject *sub = TOP(); PyObject *container = SECOND(); - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - _PyObjectCache *cache1 = &caches[-1].obj; - PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj; - DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR); - DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR); + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; + PyObject *cached = _PyQuickenedGetObject(first_instr, cache->object); + assert(PyFunction_Check(cached)); + PyFunctionObject *getitem = (PyFunctionObject *)cached; + uint32_t type_version = read32(&cache->type_version); + DEOPT_IF(Py_TYPE(container)->tp_version_tag != type_version, BINARY_SUBSCR); + DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)getitem->func_code; size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; assert(code->co_argcount == 2); @@ -2221,6 +2224,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int new_frame->previous = frame; frame = cframe.current_frame = new_frame; CALL_STAT_INC(inlined_py_calls); + JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); goto start_frame; } diff --git a/Python/specialize.c b/Python/specialize.c index b88c5d517bd8fd..40e2115b2113f4 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -61,7 +61,6 @@ static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 1, // _PyAdaptiveEntry [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ - [BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */ [STORE_SUBSCR] = 0, [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ [PRECALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ @@ -69,6 +68,11 @@ static uint8_t cache_requirements[256] = { [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ }; +/* The number of object cache entries required for a "family" of instructions. */ +static const uint8_t object_cache_requirements[256] = { + [BINARY_SUBSCR] = 5, +}; + Py_ssize_t _Py_QuickenedCount = 0; #ifdef Py_STATS PyStats _py_stats = { 0 }; @@ -287,6 +291,14 @@ _Py_PrintSpecializationStats(int to_file) #define SPECIALIZATION_FAIL(opcode, kind) ((void)0) #endif +static void +_PyQuickenedSetObject(const _Py_CODEUNIT *first_instr, uint16_t index, PyObject *obj) +{ + SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr; + assert(&last_cache_plus_one->code[0] == first_instr); + last_cache_plus_one[-1-index].entry.obj.obj = obj; +} + static SpecializedCacheOrInstruction * allocate(int cache_count, int instruction_count) { @@ -353,7 +365,10 @@ entries_needed(const _Py_CODEUNIT *code, int len) int previous_opcode = -1; for (int i = 0; i < len; i++) { uint8_t opcode = _Py_OPCODE(code[i]); - if (previous_opcode != EXTENDED_ARG) { + if (object_cache_requirements[opcode]) { + cache_offset += object_cache_requirements[opcode]; + } + else if (previous_opcode != EXTENDED_ARG) { oparg_from_instruction_and_update_offset(i, opcode, 0, &cache_offset); } previous_opcode = opcode; @@ -387,6 +402,11 @@ optimize(SpecializedCacheOrInstruction *quickened, int len) if (adaptive_opcode) { if (_PyOpcode_InlineCacheEntries[opcode]) { instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg); + if (object_cache_requirements[opcode]) { + assert(_PyOpcode_InlineCacheEntries[opcode] >= 2); + instructions[i+2] = cache_offset; + cache_offset += object_cache_requirements[opcode]; + } } else if (previous_opcode != EXTENDED_ARG) { int new_oparg = oparg_from_instruction_and_update_offset( @@ -1332,9 +1352,11 @@ function_kind(PyCodeObject *code) { int _Py_Specialize_BinarySubscr( - PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache) + PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, PyCodeObject *code) { - _PyAdaptiveEntry *cache0 = &cache->adaptive; + assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] == + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1); PyTypeObject *container_type = Py_TYPE(container); if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { @@ -1362,25 +1384,25 @@ _Py_Specialize_BinarySubscr( PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { PyFunctionObject *func = (PyFunctionObject *)descriptor; - PyCodeObject *code = (PyCodeObject *)func->func_code; - int kind = function_kind(code); + PyCodeObject *fcode = (PyCodeObject *)func->func_code; + int kind = function_kind(fcode); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(BINARY_SUBSCR, kind); goto fail; } - if (code->co_argcount != 2) { + if (fcode->co_argcount != 2) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); goto fail; } assert(cls->tp_version_tag != 0); - cache0->version = cls->tp_version_tag; + cache->type_version = cls->tp_version_tag; int version = _PyFunction_GetVersionForCurrentState(func); if (version == 0 || version != (uint16_t)version) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } - cache0->index = version; - cache[-1].obj.obj = descriptor; + cache->func_version = version; + _PyQuickenedSetObject(code->co_firstinstr, cache->object, descriptor); *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr)); goto success; } @@ -1389,12 +1411,12 @@ _Py_Specialize_BinarySubscr( fail: STAT_INC(BINARY_SUBSCR, failure); assert(!PyErr_Occurred()); - cache_backoff(cache0); + cache->counter = ADAPTIVE_CACHE_BACKOFF; return 0; success: STAT_INC(BINARY_SUBSCR, success); assert(!PyErr_Occurred()); - cache0->counter = initial_counter_value(); + cache->counter = initial_counter_value(); return 0; } From a9f7043956bad266c74f8a8725ada087e5d06fcc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 28 Feb 2022 15:44:30 +0000 Subject: [PATCH 2/9] Finish inlining cache for . --- Lib/importlib/_bootstrap_external.py | 3 +- Lib/test/test_capi.py | 2 +- Objects/codeobject.c | 4 ++ Programs/test_frozenmain.h | 63 +++++++++++++++------------- Python/ceval.c | 8 ++-- Python/specialize.c | 2 +- 6 files changed, 45 insertions(+), 37 deletions(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index f051dfe9492f58..350ed048743e4a 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -389,6 +389,7 @@ def _write_atomic(path, data, mode=0o666): # Python 3.11a5 3480 (New CALL opcodes, second iteration) # Python 3.11a5 3481 (Use inline cache for BINARY_OP) # Python 3.11a5 3482 (Use inline caching for UNPACK_SEQUENCE and LOAD_GLOBAL) +# Python 3.11a5 3483 (Use inline caching for BINARY_SUBSCR) # Python 3.12 will start with magic number 3500 @@ -403,7 +404,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3482).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3483).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c _PYCACHE = '__pycache__' diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py index 8832292a9991aa..d9615430327a40 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py @@ -335,7 +335,7 @@ class C(): pass *_, count = line.split(b' ') count = int(count) self.assertLessEqual(count, i*5) - self.assertGreaterEqual(count, i*5-1) + self.assertGreaterEqual(count, i*5-2) def test_mapping_keys_values_items(self): class Mapping1(dict): diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 18846d20ffd833..108dbbaa322564 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1925,6 +1925,10 @@ _PyStaticCode_Dealloc(PyCodeObject *co) co->co_quickened = NULL; _Py_QuickenedCount--; } + if (co->_co_obj_cache) { + PyMem_Free(co->_co_obj_cache); + co->_co_obj_cache = NULL; + } co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE; PyMem_Free(co->co_extra); co->co_extra = NULL; diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 11593a9ba3d68f..67118e1d4157ef 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,37 +1,40 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,115,104,0,0,0,151,0,100,0,100,1, + 0,0,0,0,0,115,124,0,0,0,151,0,100,0,100,1, 108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2, 100,2,166,1,171,1,1,0,2,0,101,2,100,3,101,0, 106,3,166,2,171,2,1,0,2,0,101,1,106,4,166,0, - 171,0,100,4,25,0,90,5,100,5,68,0,93,16,90,6, - 2,0,101,2,100,6,101,6,155,0,100,7,101,5,101,6, - 25,0,155,0,157,4,166,1,171,1,1,0,113,33,100,1, - 83,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, - 101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8, - 115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103, - 41,5,90,12,112,114,111,103,114,97,109,95,110,97,109,101, - 218,10,101,120,101,99,117,116,97,98,108,101,90,15,117,115, - 101,95,101,110,118,105,114,111,110,109,101,110,116,90,17,99, - 111,110,102,105,103,117,114,101,95,99,95,115,116,100,105,111, - 90,14,98,117,102,102,101,114,101,100,95,115,116,100,105,111, - 122,7,99,111,110,102,105,103,32,122,2,58,32,41,7,218, - 3,115,121,115,90,17,95,116,101,115,116,105,110,116,101,114, - 110,97,108,99,97,112,105,218,5,112,114,105,110,116,218,4, - 97,114,103,118,90,11,103,101,116,95,99,111,110,102,105,103, - 115,114,2,0,0,0,218,3,107,101,121,169,0,243,0,0, - 0,0,250,18,116,101,115,116,95,102,114,111,122,101,110,109, - 97,105,110,46,112,121,250,8,60,109,111,100,117,108,101,62, - 114,11,0,0,0,1,0,0,0,115,18,0,0,0,2,128, - 8,3,8,1,12,2,16,1,16,1,8,1,30,7,4,249, - 115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,16, - 1,2,7,4,1,2,249,34,7,115,104,0,0,0,0,0, - 1,11,1,11,1,11,1,11,1,25,1,25,1,25,1,25, - 1,6,1,6,7,27,1,28,1,28,1,28,1,6,1,6, - 7,17,19,22,19,27,1,28,1,28,1,28,10,39,10,27, - 10,39,10,41,10,41,42,50,10,51,1,7,12,2,1,42, - 1,42,5,8,5,10,5,10,11,41,21,24,11,41,11,41, - 28,34,35,38,28,39,11,41,11,41,5,42,5,42,5,42, - 5,42,1,42,1,42,114,9,0,0,0, + 171,0,100,4,25,0,3,0,3,0,3,0,3,0,3,0, + 90,5,100,5,68,0,93,21,90,6,2,0,101,2,100,6, + 101,6,155,0,100,7,101,5,101,6,25,0,3,0,3,0, + 3,0,3,0,3,0,155,0,157,4,166,1,171,1,1,0, + 113,38,100,1,83,0,41,8,233,0,0,0,0,78,122,18, + 70,114,111,122,101,110,32,72,101,108,108,111,32,87,111,114, + 108,100,122,8,115,121,115,46,97,114,103,118,218,6,99,111, + 110,102,105,103,41,5,90,12,112,114,111,103,114,97,109,95, + 110,97,109,101,218,10,101,120,101,99,117,116,97,98,108,101, + 90,15,117,115,101,95,101,110,118,105,114,111,110,109,101,110, + 116,90,17,99,111,110,102,105,103,117,114,101,95,99,95,115, + 116,100,105,111,90,14,98,117,102,102,101,114,101,100,95,115, + 116,100,105,111,122,7,99,111,110,102,105,103,32,122,2,58, + 32,41,7,218,3,115,121,115,90,17,95,116,101,115,116,105, + 110,116,101,114,110,97,108,99,97,112,105,218,5,112,114,105, + 110,116,218,4,97,114,103,118,90,11,103,101,116,95,99,111, + 110,102,105,103,115,114,2,0,0,0,218,3,107,101,121,169, + 0,243,0,0,0,0,250,18,116,101,115,116,95,102,114,111, + 122,101,110,109,97,105,110,46,112,121,250,8,60,109,111,100, + 117,108,101,62,114,11,0,0,0,1,0,0,0,115,18,0, + 0,0,2,128,8,3,8,1,12,2,16,1,26,1,8,1, + 40,7,4,249,115,20,0,0,0,2,128,8,3,8,1,12, + 2,16,1,26,1,2,7,4,1,2,249,44,7,115,124,0, + 0,0,0,0,1,11,1,11,1,11,1,11,1,25,1,25, + 1,25,1,25,1,6,1,6,7,27,1,28,1,28,1,28, + 1,6,1,6,7,17,19,22,19,27,1,28,1,28,1,28, + 10,39,10,27,10,39,10,41,10,41,42,50,10,51,10,51, + 10,51,10,51,10,51,10,51,1,7,12,2,1,42,1,42, + 5,8,5,10,5,10,11,41,21,24,11,41,11,41,28,34, + 35,38,28,39,28,39,28,39,28,39,28,39,28,39,11,41, + 11,41,5,42,5,42,5,42,5,42,1,42,1,42,114,9, + 0,0,0, }; diff --git a/Python/ceval.c b/Python/ceval.c index 65348903732d23..2df8ba7b933f60 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2198,10 +2198,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *container = SECOND(); _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; PyObject *cached = frame->f_code->_co_obj_cache[cache->object]; - assert(PyFunction_Check(cached)); - PyFunctionObject *getitem = (PyFunctionObject *)cached; uint32_t type_version = read32(&cache->type_version); DEOPT_IF(Py_TYPE(container)->tp_version_tag != type_version, BINARY_SUBSCR); + assert(PyFunction_Check(cached)); + PyFunctionObject *getitem = (PyFunctionObject *)cached; DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)getitem->func_code; size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE; @@ -2221,10 +2221,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int new_frame->localsplus[i] = NULL; } _PyFrame_SetStackPointer(frame, stack_pointer); + frame->f_lasti += INLINE_CACHE_ENTRIES_BINARY_SUBSCR; new_frame->previous = frame; frame = cframe.current_frame = new_frame; CALL_STAT_INC(inlined_py_calls); - JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); goto start_frame; } @@ -5606,7 +5606,7 @@ MISS_WITH_CACHE(PRECALL) MISS_WITH_CACHE(CALL) MISS_WITH_INLINE_CACHE(BINARY_OP) MISS_WITH_CACHE(COMPARE_OP) -MISS_WITH_CACHE(BINARY_SUBSCR) +MISS_WITH_INLINE_CACHE(BINARY_SUBSCR) MISS_WITH_INLINE_CACHE(UNPACK_SEQUENCE) MISS_WITH_OPARG_COUNTER(STORE_SUBSCR) diff --git a/Python/specialize.c b/Python/specialize.c index d807991c85a9c7..6065e69c1f3ded 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1409,7 +1409,7 @@ _Py_Specialize_BinarySubscr( goto fail; } assert(cls->tp_version_tag != 0); - cache->type_version = cls->tp_version_tag; + write32(&cache->type_version, cls->tp_version_tag); int version = _PyFunction_GetVersionForCurrentState(func); if (version == 0 || version != (uint16_t)version) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); From a3305741b27de31d45da363046dbfd2bf7b7e691 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 28 Feb 2022 15:52:21 +0000 Subject: [PATCH 3/9] Tidy up --- Include/internal/pycore_code.h | 18 ++++-------------- .../2022-02-28-15-46-36.bpo-46841.MDQoty.rst | 1 + 2 files changed, 5 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index ef65ad9c97657c..a5b83327e50271 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -77,18 +77,13 @@ typedef struct { } _PyBinaryOpCache; #define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache) + typedef struct { _Py_CODEUNIT counter; } _PyUnpackSequenceCache; - - - - - - - - +#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \ + CACHE_ENTRIES(_PyUnpackSequenceCache) typedef struct { _Py_CODEUNIT counter; @@ -98,12 +93,7 @@ typedef struct { _Py_CODEUNIT func_version; } _PyBinarySubscrCache; - -#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \ - (sizeof(_PyUnpackSequenceCache) / sizeof(_Py_CODEUNIT)) - -#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR \ - (sizeof(_PyBinarySubscrCache) / sizeof(_Py_CODEUNIT)) +#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) /* Maximum size of code to quicken, in code units. */ #define MAX_SIZE_TO_QUICKEN 5000 diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst b/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst new file mode 100644 index 00000000000000..97b03debcf092d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-02-28-15-46-36.bpo-46841.MDQoty.rst @@ -0,0 +1 @@ +Use inline cache for :opcode:`BINARY_SUBSCR`. From e4b0ba006f9df5c7277a6a9ac1932c06de551d56 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 1 Mar 2022 11:10:28 +0000 Subject: [PATCH 4/9] Address review comments. --- Include/cpython/code.h | 1 - Python/specialize.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 7609c96aadb08c..b53c524572d58e 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -91,7 +91,6 @@ struct PyCodeObject { int co_nplaincellvars; /* number of non-arg cell variables */ int co_ncellvars; /* total number of cell variables */ int co_nfreevars; /* number of free variables */ - int _co_obj_cache_len; /* number of entries in _co_obj_cache */ // lazily-computed values PyObject *co_varnames; /* tuple of strings (local variable names) */ PyObject *co_cellvars; /* tuple of strings (cell variable names) */ diff --git a/Python/specialize.c b/Python/specialize.c index 6065e69c1f3ded..134757b1d4ced2 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -489,7 +489,6 @@ _Py_Quicken(PyCodeObject *code) { } int obj_count = object_slots_needed(code->co_firstinstr, instr_count); code->_co_obj_cache = PyMem_Malloc(obj_count*sizeof(PyObject *)); - code->_co_obj_cache_len = obj_count; if (code->_co_obj_cache == NULL) { return -1; } @@ -1417,8 +1416,7 @@ _Py_Specialize_BinarySubscr( } cache->func_version = version; assert(code->_co_obj_cache != NULL); - assert(cache->object >= 0 && cache->object < code->_co_obj_cache_len); - code->_co_obj_cache[cache->object] = descriptor; + code->_co_obj_cache[cache->object] = descriptor; *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr)); goto success; } From 05787b3cd7311c13440057e656d04ee6da44ac79 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 1 Mar 2022 12:31:19 +0000 Subject: [PATCH 5/9] Remove per-code-object-cache and add small per-heap-type cache. --- Include/cpython/code.h | 1 - Include/cpython/object.h | 8 +++++ Include/internal/pycore_code.h | 1 - Include/opcode.h | 2 +- Lib/opcode.py | 2 +- Lib/test/test_sys.py | 4 ++- Objects/codeobject.c | 8 ----- Programs/test_frozenmain.h | 65 +++++++++++++++++----------------- Python/ceval.c | 6 ++-- Python/specialize.c | 25 ++++--------- 10 files changed, 55 insertions(+), 67 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index b53c524572d58e..21f8fe7ddad4a7 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -58,7 +58,6 @@ struct PyCodeObject { _Py_CODEUNIT *co_firstinstr; /* Pointer to first instruction, used for quickening. Unlike the other "hot" fields, this one is actually derived from co_code. */ - PyObject **_co_obj_cache; /* Array of borrowed references to objects, for specialized code. */ PyObject *co_exceptiontable; /* Byte string encoding exception handling table */ int co_flags; /* CO_..., see below */ int co_warmup; /* Warmup counter for quickening */ diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 5f978eec465809..736c2043a5d54b 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -229,6 +229,13 @@ struct _typeobject { vectorcallfunc tp_vectorcall; }; +/* This struct is used by the specializer + * It should should be treated as opaque blob + * by any other code */ +struct _specialization_cache { + PyObject *getitem; +}; + /* The *real* layout of a type object when allocated on the heap */ typedef struct _heaptypeobject { /* Note: there's a dependency on the order of these members @@ -247,6 +254,7 @@ typedef struct _heaptypeobject { struct _dictkeysobject *ht_cached_keys; PyObject *ht_module; char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec + struct _specialization_cache _spec_cache; // Internal -- DO NOT USE. /* here are optional user slots, followed by the members. */ } PyHeapTypeObject; diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index a5b83327e50271..9811a70cb2c3c0 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -87,7 +87,6 @@ typedef struct { typedef struct { _Py_CODEUNIT counter; - _Py_CODEUNIT object; _Py_CODEUNIT type_version; _Py_CODEUNIT _t1; _Py_CODEUNIT func_version; diff --git a/Include/opcode.h b/Include/opcode.h index 004a528f21f0fd..75b7fd2019fab9 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -211,7 +211,7 @@ static const uint32_t _PyOpcode_Jump[8] = { }; const uint8_t _PyOpcode_InlineCacheEntries[256] = { - [BINARY_SUBSCR] = 5, + [BINARY_SUBSCR] = 4, [UNPACK_SEQUENCE] = 1, [LOAD_GLOBAL] = 5, [BINARY_OP] = 1, diff --git a/Lib/opcode.py b/Lib/opcode.py index 4ad97c54f78705..3cc1db9366cafe 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -68,7 +68,7 @@ def jabs_op(name, op, entries=0): def_op('UNARY_INVERT', 15) -def_op('BINARY_SUBSCR', 25, 5) +def_op('BINARY_SUBSCR', 25, 4) def_op('GET_LEN', 30) def_op('MATCH_MAPPING', 31) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index f828d1b15d2868..70768f56fa9f11 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1501,7 +1501,9 @@ def delx(self): del self.__x '3P' # PyMappingMethods '10P' # PySequenceMethods '2P' # PyBufferProcs - '6P') + '6P' + '1P' # Specializer cache + ) class newstyleclass(object): pass # Separate block for PyDictKeysObject with 8 keys and 5 entries check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P")) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 108dbbaa322564..5a87e6c4ff8777 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -348,7 +348,6 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) /* not set */ co->co_weakreflist = NULL; co->co_extra = NULL; - co->_co_obj_cache = NULL; co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE; co->co_quickened = NULL; @@ -1370,9 +1369,6 @@ code_dealloc(PyCodeObject *co) PyMem_Free(co->co_quickened); _Py_QuickenedCount--; } - if (co->_co_obj_cache) { - PyMem_Free(co->_co_obj_cache); - } PyObject_Free(co); } @@ -1925,10 +1921,6 @@ _PyStaticCode_Dealloc(PyCodeObject *co) co->co_quickened = NULL; _Py_QuickenedCount--; } - if (co->_co_obj_cache) { - PyMem_Free(co->_co_obj_cache); - co->_co_obj_cache = NULL; - } co->co_warmup = QUICKENING_INITIAL_WARMUP_VALUE; PyMem_Free(co->co_extra); co->co_extra = NULL; diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 67118e1d4157ef..3fef981e42ff96 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,40 +1,39 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,115,124,0,0,0,151,0,100,0,100,1, + 0,0,0,0,0,115,120,0,0,0,151,0,100,0,100,1, 108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2, 100,2,166,1,171,1,1,0,2,0,101,2,100,3,101,0, 106,3,166,2,171,2,1,0,2,0,101,1,106,4,166,0, - 171,0,100,4,25,0,3,0,3,0,3,0,3,0,3,0, - 90,5,100,5,68,0,93,21,90,6,2,0,101,2,100,6, - 101,6,155,0,100,7,101,5,101,6,25,0,3,0,3,0, - 3,0,3,0,3,0,155,0,157,4,166,1,171,1,1,0, - 113,38,100,1,83,0,41,8,233,0,0,0,0,78,122,18, - 70,114,111,122,101,110,32,72,101,108,108,111,32,87,111,114, - 108,100,122,8,115,121,115,46,97,114,103,118,218,6,99,111, - 110,102,105,103,41,5,90,12,112,114,111,103,114,97,109,95, - 110,97,109,101,218,10,101,120,101,99,117,116,97,98,108,101, - 90,15,117,115,101,95,101,110,118,105,114,111,110,109,101,110, - 116,90,17,99,111,110,102,105,103,117,114,101,95,99,95,115, - 116,100,105,111,90,14,98,117,102,102,101,114,101,100,95,115, - 116,100,105,111,122,7,99,111,110,102,105,103,32,122,2,58, - 32,41,7,218,3,115,121,115,90,17,95,116,101,115,116,105, - 110,116,101,114,110,97,108,99,97,112,105,218,5,112,114,105, - 110,116,218,4,97,114,103,118,90,11,103,101,116,95,99,111, - 110,102,105,103,115,114,2,0,0,0,218,3,107,101,121,169, - 0,243,0,0,0,0,250,18,116,101,115,116,95,102,114,111, - 122,101,110,109,97,105,110,46,112,121,250,8,60,109,111,100, - 117,108,101,62,114,11,0,0,0,1,0,0,0,115,18,0, - 0,0,2,128,8,3,8,1,12,2,16,1,26,1,8,1, - 40,7,4,249,115,20,0,0,0,2,128,8,3,8,1,12, - 2,16,1,26,1,2,7,4,1,2,249,44,7,115,124,0, - 0,0,0,0,1,11,1,11,1,11,1,11,1,25,1,25, - 1,25,1,25,1,6,1,6,7,27,1,28,1,28,1,28, - 1,6,1,6,7,17,19,22,19,27,1,28,1,28,1,28, - 10,39,10,27,10,39,10,41,10,41,42,50,10,51,10,51, - 10,51,10,51,10,51,10,51,1,7,12,2,1,42,1,42, - 5,8,5,10,5,10,11,41,21,24,11,41,11,41,28,34, - 35,38,28,39,28,39,28,39,28,39,28,39,28,39,11,41, - 11,41,5,42,5,42,5,42,5,42,1,42,1,42,114,9, - 0,0,0, + 171,0,100,4,25,0,3,0,3,0,3,0,3,0,90,5, + 100,5,68,0,93,20,90,6,2,0,101,2,100,6,101,6, + 155,0,100,7,101,5,101,6,25,0,3,0,3,0,3,0, + 3,0,155,0,157,4,166,1,171,1,1,0,113,37,100,1, + 83,0,41,8,233,0,0,0,0,78,122,18,70,114,111,122, + 101,110,32,72,101,108,108,111,32,87,111,114,108,100,122,8, + 115,121,115,46,97,114,103,118,218,6,99,111,110,102,105,103, + 41,5,90,12,112,114,111,103,114,97,109,95,110,97,109,101, + 218,10,101,120,101,99,117,116,97,98,108,101,90,15,117,115, + 101,95,101,110,118,105,114,111,110,109,101,110,116,90,17,99, + 111,110,102,105,103,117,114,101,95,99,95,115,116,100,105,111, + 90,14,98,117,102,102,101,114,101,100,95,115,116,100,105,111, + 122,7,99,111,110,102,105,103,32,122,2,58,32,41,7,218, + 3,115,121,115,90,17,95,116,101,115,116,105,110,116,101,114, + 110,97,108,99,97,112,105,218,5,112,114,105,110,116,218,4, + 97,114,103,118,90,11,103,101,116,95,99,111,110,102,105,103, + 115,114,2,0,0,0,218,3,107,101,121,169,0,243,0,0, + 0,0,250,18,116,101,115,116,95,102,114,111,122,101,110,109, + 97,105,110,46,112,121,250,8,60,109,111,100,117,108,101,62, + 114,11,0,0,0,1,0,0,0,115,18,0,0,0,2,128, + 8,3,8,1,12,2,16,1,24,1,8,1,38,7,4,249, + 115,20,0,0,0,2,128,8,3,8,1,12,2,16,1,24, + 1,2,7,4,1,2,249,42,7,115,120,0,0,0,0,0, + 1,11,1,11,1,11,1,11,1,25,1,25,1,25,1,25, + 1,6,1,6,7,27,1,28,1,28,1,28,1,6,1,6, + 7,17,19,22,19,27,1,28,1,28,1,28,10,39,10,27, + 10,39,10,41,10,41,42,50,10,51,10,51,10,51,10,51, + 10,51,1,7,12,2,1,42,1,42,5,8,5,10,5,10, + 11,41,21,24,11,41,11,41,28,34,35,38,28,39,28,39, + 28,39,28,39,28,39,11,41,11,41,5,42,5,42,5,42, + 5,42,1,42,1,42,114,9,0,0,0, }; diff --git a/Python/ceval.c b/Python/ceval.c index 2df8ba7b933f60..84e2ff568dd274 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2197,9 +2197,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *sub = TOP(); PyObject *container = SECOND(); _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; - PyObject *cached = frame->f_code->_co_obj_cache[cache->object]; uint32_t type_version = read32(&cache->type_version); - DEOPT_IF(Py_TYPE(container)->tp_version_tag != type_version, BINARY_SUBSCR); + PyTypeObject *tp = Py_TYPE(container); + DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR); + assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE); + PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem; assert(PyFunction_Check(cached)); PyFunctionObject *getitem = (PyFunctionObject *)cached; DEOPT_IF(getitem->func_version != cache->func_version, BINARY_SUBSCR); diff --git a/Python/specialize.c b/Python/specialize.c index 134757b1d4ced2..7aaf9fba5a2cdb 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -364,17 +364,6 @@ entries_needed(const _Py_CODEUNIT *code, int len) return cache_offset + 1; // One extra for the count entry } -static int -object_slots_needed(const _Py_CODEUNIT *code, int len) -{ - int count = 0; - for (int i = 0; i < len; i++) { - uint8_t opcode = _Py_OPCODE(code[i]); - count += object_cache_requirements[opcode]; - } - return count; -} - static inline _Py_CODEUNIT * first_instruction(SpecializedCacheOrInstruction *quickened) { @@ -487,15 +476,9 @@ _Py_Quicken(PyCodeObject *code) { code->co_warmup = QUICKENING_WARMUP_COLDEST; return 0; } - int obj_count = object_slots_needed(code->co_firstinstr, instr_count); - code->_co_obj_cache = PyMem_Malloc(obj_count*sizeof(PyObject *)); - if (code->_co_obj_cache == NULL) { - return -1; - } int entry_count = entries_needed(code->co_firstinstr, instr_count); SpecializedCacheOrInstruction *quickened = allocate(entry_count, instr_count); if (quickened == NULL) { - PyMem_Free(code->_co_obj_cache); return -1; } _Py_CODEUNIT *new_instructions = first_instruction(quickened); @@ -583,6 +566,7 @@ initial_counter_value(void) { #define SPEC_FAIL_SUBSCR_PY_SIMPLE 20 #define SPEC_FAIL_SUBSCR_PY_OTHER 21 #define SPEC_FAIL_SUBSCR_DICT_SUBCLASS_NO_OVERRIDE 22 +#define SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE 23 /* Binary op */ @@ -1396,6 +1380,10 @@ _Py_Specialize_BinarySubscr( PyTypeObject *cls = Py_TYPE(container); PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { + if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { + SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE); + goto fail; + } PyFunctionObject *func = (PyFunctionObject *)descriptor; PyCodeObject *fcode = (PyCodeObject *)func->func_code; int kind = function_kind(fcode); @@ -1415,8 +1403,7 @@ _Py_Specialize_BinarySubscr( goto fail; } cache->func_version = version; - assert(code->_co_obj_cache != NULL); - code->_co_obj_cache[cache->object] = descriptor; + ((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor; *instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr)); goto success; } From 956ed1e03e8827d41f42ae4c7433ce343660bd19 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 1 Mar 2022 12:48:04 +0000 Subject: [PATCH 6/9] Remove unused parameter. --- Include/internal/pycore_code.h | 2 +- Python/ceval.c | 2 +- Python/specialize.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 9811a70cb2c3c0..97f0a52092323a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -325,7 +325,7 @@ extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObjec extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name); extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); -extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, PyCodeObject *code); +extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, int nargs, PyObject *kwnames, SpecializedCacheEntry *cache); diff --git a/Python/ceval.c b/Python/ceval.c index 84e2ff568dd274..50ad35cfa8b99d 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2112,7 +2112,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int PyObject *sub = TOP(); PyObject *container = SECOND(); next_instr--; - if (_Py_Specialize_BinarySubscr(container, sub, next_instr, frame->f_code) < 0) { + if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) { goto error; } DISPATCH(); diff --git a/Python/specialize.c b/Python/specialize.c index 7aaf9fba5a2cdb..b8ea832946e476 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1349,7 +1349,7 @@ function_kind(PyCodeObject *code) { int _Py_Specialize_BinarySubscr( - PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, PyCodeObject *code) + PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) { assert(_PyOpcode_InlineCacheEntries[BINARY_SUBSCR] == INLINE_CACHE_ENTRIES_BINARY_SUBSCR); From 0e282f387d542170d5fe1488737b17339c7d7a49 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 1 Mar 2022 12:53:28 +0000 Subject: [PATCH 7/9] Remove unused code. --- Python/specialize.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index b8ea832946e476..bece89cf8f74f6 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -67,11 +67,6 @@ static uint8_t cache_requirements[256] = { [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ }; -/* The number of object cache entries required for a "family" of instructions. */ -static const uint8_t object_cache_requirements[256] = { - [BINARY_SUBSCR] = 1, -}; - Py_ssize_t _Py_QuickenedCount = 0; #ifdef Py_STATS PyStats _py_stats = { 0 }; @@ -381,7 +376,6 @@ optimize(SpecializedCacheOrInstruction *quickened, int len) { _Py_CODEUNIT *instructions = first_instruction(quickened); int cache_offset = 0; - int object_offset = 0; int previous_opcode = -1; int previous_oparg = 0; for(int i = 0; i < len; i++) { @@ -391,11 +385,6 @@ optimize(SpecializedCacheOrInstruction *quickened, int len) if (adaptive_opcode) { if (_PyOpcode_InlineCacheEntries[opcode]) { instructions[i] = _Py_MAKECODEUNIT(adaptive_opcode, oparg); - if (object_cache_requirements[opcode]) { - assert(_PyOpcode_InlineCacheEntries[opcode] >= 2); - instructions[i+2] = object_offset; - object_offset += object_cache_requirements[opcode]; - } previous_opcode = -1; i += _PyOpcode_InlineCacheEntries[opcode]; } From f667ddb9729598a9dae9f1cdf7a5ce0cc8b8edbb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 1 Mar 2022 14:57:16 +0000 Subject: [PATCH 8/9] Clarify comments. --- Include/cpython/object.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 736c2043a5d54b..fb9b798fc10895 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -230,8 +230,8 @@ struct _typeobject { }; /* This struct is used by the specializer - * It should should be treated as opaque blob - * by any other code */ + * It should should be treated as an opaque blob + * by code than the specializer and interpreter. */ struct _specialization_cache { PyObject *getitem; }; @@ -254,7 +254,7 @@ typedef struct _heaptypeobject { struct _dictkeysobject *ht_cached_keys; PyObject *ht_module; char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec - struct _specialization_cache _spec_cache; // Internal -- DO NOT USE. + struct _specialization_cache _spec_cache; // For use by the specializer. /* here are optional user slots, followed by the members. */ } PyHeapTypeObject; From 22713370859fce01b4e4d3a78a816a202407b706 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 1 Mar 2022 15:00:33 +0000 Subject: [PATCH 9/9] Fix typo. --- Include/cpython/object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index fb9b798fc10895..b018dabf9d862f 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -231,7 +231,7 @@ struct _typeobject { /* This struct is used by the specializer * It should should be treated as an opaque blob - * by code than the specializer and interpreter. */ + * by code other than the specializer and interpreter. */ struct _specialization_cache { PyObject *getitem; };