@@ -8,50 +8,10 @@ extern "C" {
88 * Specialization and quickening structs and helper functions
99 */
1010
11- typedef struct {
12- int32_t cache_count ;
13- int32_t _ ; /* Force 8 byte size */
14- } _PyEntryZero ;
15-
16- typedef struct {
17- uint8_t original_oparg ;
18- uint8_t counter ;
19- uint16_t index ;
20- uint32_t version ;
21- } _PyAdaptiveEntry ;
2211
23- typedef struct {
24- /* Borrowed ref */
25- PyObject * obj ;
26- } _PyObjectCache ;
27-
28- typedef struct {
29- uint32_t func_version ;
30- uint16_t min_args ;
31- uint16_t defaults_len ;
32- } _PyCallCache ;
33-
34-
35- /* Add specialized versions of entries to this union.
36- *
37- * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
38- * Preserving this invariant is necessary because:
39- - If any one form uses more space, then all must and on 64 bit machines
40- this is likely to double the memory consumption of caches
41- - The function for calculating the offset of caches assumes a 4:1
42- cache:instruction size ratio. Changing that would need careful
43- analysis to choose a new function.
44- */
45- typedef union {
46- _PyEntryZero zero ;
47- _PyAdaptiveEntry adaptive ;
48- _PyObjectCache obj ;
49- _PyCallCache call ;
50- } SpecializedCacheEntry ;
51-
52- #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
53-
54- /* Inline caches */
12+ // Inline caches. If you change the number of cache entries for an instruction,
13+ // you must *also* update the number of cache entries in Lib/opcode.py and bump
14+ // the magic number in Lib/importlib/_bootstrap_external.py!
5515
5616#define CACHE_ENTRIES (cache ) (sizeof(cache)/sizeof(_Py_CODEUNIT))
5717
@@ -112,73 +72,22 @@ typedef struct {
11272
11373#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)
11474
115- /* Maximum size of code to quicken, in code units. */
116- #define MAX_SIZE_TO_QUICKEN 5000
117-
118- typedef union _cache_or_instruction {
119- _Py_CODEUNIT code [1 ];
120- SpecializedCacheEntry entry ;
121- } SpecializedCacheOrInstruction ;
75+ typedef struct {
76+ _Py_CODEUNIT counter ;
77+ _Py_CODEUNIT func_version [2 ];
78+ _Py_CODEUNIT min_args ;
79+ } _PyCallCache ;
12280
123- /* Get pointer to the nth cache entry, from the first instruction and n.
124- * Cache entries are indexed backwards, with [count-1] first in memory, and [0] last.
125- * The zeroth entry immediately precedes the instructions.
126- */
127- static inline SpecializedCacheEntry *
128- _GetSpecializedCacheEntry (const _Py_CODEUNIT * first_instr , Py_ssize_t n )
129- {
130- SpecializedCacheOrInstruction * last_cache_plus_one = (SpecializedCacheOrInstruction * )first_instr ;
131- assert (& last_cache_plus_one -> code [0 ] == first_instr );
132- return & last_cache_plus_one [-1 - n ].entry ;
133- }
81+ #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
13482
135- /* Following two functions form a pair.
136- *
137- * oparg_from_offset_and_index() is used to compute the oparg
138- * when quickening, so that offset_from_oparg_and_nexti()
139- * can be used at runtime to compute the offset.
140- *
141- * The relationship between the three values is currently
142- * offset == (index>>1) + oparg
143- * This relation is chosen based on the following observations:
144- * 1. typically 1 in 4 instructions need a cache
145- * 2. instructions that need a cache typically use 2 entries
146- * These observations imply: offset ≈ index/2
147- * We use the oparg to fine tune the relation to avoid wasting space
148- * and allow consecutive instructions to use caches.
149- *
150- * If the number of cache entries < number of instructions/2 we will waste
151- * some small amoount of space.
152- * If the number of cache entries > (number of instructions/2) + 255, then
153- * some instructions will not be able to use a cache.
154- * In practice, we expect some small amount of wasted space in a shorter functions
155- * and only functions exceeding a 1000 lines or more not to have enugh cache space.
156- *
157- */
158- static inline int
159- oparg_from_offset_and_nexti (int offset , int nexti )
160- {
161- return offset - (nexti >>1 );
162- }
83+ typedef struct {
84+ _Py_CODEUNIT counter ;
85+ } _PyPrecallCache ;
16386
164- static inline int
165- offset_from_oparg_and_nexti (int oparg , int nexti )
166- {
167- return (nexti >>1 )+ oparg ;
168- }
87+ #define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache)
16988
170- /* Get pointer to the cache entry associated with an instruction.
171- * nexti is the index of the instruction plus one.
172- * nexti is used as it corresponds to the instruction pointer in the interpreter.
173- * This doesn't check that an entry has been allocated for that instruction. */
174- static inline SpecializedCacheEntry *
175- _GetSpecializedCacheEntryForInstruction (const _Py_CODEUNIT * first_instr , int nexti , int oparg )
176- {
177- return _GetSpecializedCacheEntry (
178- first_instr ,
179- offset_from_oparg_and_nexti (oparg , nexti )
180- );
181- }
89+ /* Maximum size of code to quicken, in code units. */
90+ #define MAX_SIZE_TO_QUICKEN 10000
18291
18392#define QUICKENING_WARMUP_DELAY 8
18493
@@ -205,6 +114,13 @@ _Py_IncrementCountAndMaybeQuicken(PyCodeObject *code)
205114
206115extern Py_ssize_t _Py_QuickenedCount ;
207116
117+ // Borrowed references to common callables:
118+ struct callable_cache {
119+ PyObject * isinstance ;
120+ PyObject * len ;
121+ PyObject * list_append ;
122+ };
123+
208124/* "Locals plus" for a code object is the set of locals + cell vars +
209125 * free vars. This relates to variable names as well as offsets into
210126 * the "fast locals" storage array of execution frames. The compiler
@@ -332,11 +248,6 @@ extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
332248
333249#define ADAPTIVE_CACHE_BACKOFF 64
334250
335- static inline void
336- cache_backoff (_PyAdaptiveEntry * entry ) {
337- entry -> counter = ADAPTIVE_CACHE_BACKOFF ;
338- }
339-
340251/* Specialization functions */
341252
342253extern int _Py_Specialize_LoadAttr (PyObject * owner , _Py_CODEUNIT * instr ,
@@ -348,10 +259,10 @@ extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
348259 PyObject * name );
349260extern int _Py_Specialize_BinarySubscr (PyObject * sub , PyObject * container , _Py_CODEUNIT * instr );
350261extern int _Py_Specialize_StoreSubscr (PyObject * container , PyObject * sub , _Py_CODEUNIT * instr );
351- extern int _Py_Specialize_Call (PyObject * callable , _Py_CODEUNIT * instr , int nargs ,
352- PyObject * kwnames , SpecializedCacheEntry * cache );
353- extern int _Py_Specialize_Precall (PyObject * callable , _Py_CODEUNIT * instr , int nargs ,
354- PyObject * kwnames , SpecializedCacheEntry * cache , PyObject * builtins );
262+ extern int _Py_Specialize_Call (PyObject * callable , _Py_CODEUNIT * instr ,
263+ int nargs , PyObject * kwnames );
264+ extern int _Py_Specialize_Precall (PyObject * callable , _Py_CODEUNIT * instr ,
265+ int nargs , PyObject * kwnames , int oparg );
355266extern void _Py_Specialize_BinaryOp (PyObject * lhs , PyObject * rhs , _Py_CODEUNIT * instr ,
356267 int oparg );
357268extern void _Py_Specialize_CompareOp (PyObject * lhs , PyObject * rhs ,
0 commit comments