@@ -124,6 +124,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page)
124
124
125
125
}
126
126
127
+ #ifdef Py_GIL_DISABLED
128
+
129
+ // If we are deferring collection of more than this amount of memory for
130
+ // mimalloc pages, advance the write sequence. Advancing allows these
131
+ // pages to be re-used in a different thread or for a different size class.
132
+ #define QSBR_PAGE_MEM_LIMIT 4096*20
133
+
134
+ // Return true if the global write sequence should be advanced for a mimalloc
135
+ // page that is deferred from collection.
136
+ static bool
137
+ should_advance_qsbr_for_page (struct _qsbr_thread_state * qsbr , mi_page_t * page )
138
+ {
139
+ size_t bsize = mi_page_block_size (page );
140
+ size_t page_size = page -> capacity * bsize ;
141
+ if (page_size > QSBR_PAGE_MEM_LIMIT ) {
142
+ qsbr -> deferred_page_memory = 0 ;
143
+ return true;
144
+ }
145
+ qsbr -> deferred_page_memory += page_size ;
146
+ if (qsbr -> deferred_page_memory > QSBR_PAGE_MEM_LIMIT ) {
147
+ qsbr -> deferred_page_memory = 0 ;
148
+ return true;
149
+ }
150
+ return false;
151
+ }
152
+ #endif
153
+
127
154
static bool
128
155
_PyMem_mi_page_maybe_free (mi_page_t * page , mi_page_queue_t * pq , bool force )
129
156
{
@@ -139,7 +166,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
139
166
140
167
_PyMem_mi_page_clear_qsbr (page );
141
168
page -> retire_expire = 0 ;
142
- page -> qsbr_goal = _Py_qsbr_deferred_advance (tstate -> qsbr );
169
+
170
+ if (should_advance_qsbr_for_page (tstate -> qsbr , page )) {
171
+ page -> qsbr_goal = _Py_qsbr_advance (tstate -> qsbr -> shared );
172
+ }
173
+ else {
174
+ page -> qsbr_goal = _Py_qsbr_shared_next (tstate -> qsbr -> shared );
175
+ }
176
+
143
177
llist_insert_tail (& tstate -> mimalloc .page_list , & page -> qsbr_node );
144
178
return false;
145
179
}
@@ -1141,8 +1175,44 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
1141
1175
}
1142
1176
}
1143
1177
1178
+
1179
+ #ifdef Py_GIL_DISABLED
1180
+
1181
+ // For deferred advance on free: the number of deferred items before advancing
1182
+ // the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally
1183
+ // want to process a chunk before it overflows.
1184
+ #define QSBR_DEFERRED_LIMIT 127
1185
+
1186
+ // If the deferred memory exceeds 1 MiB, advance the write sequence. This
1187
+ // helps limit memory usage due to QSBR delaying frees too long.
1188
+ #define QSBR_FREE_MEM_LIMIT 1024*1024
1189
+
1190
+ // Return true if the global write sequence should be advanced for a deferred
1191
+ // memory free.
1192
+ static bool
1193
+ should_advance_qsbr_for_free (struct _qsbr_thread_state * qsbr , size_t size )
1194
+ {
1195
+ if (size > QSBR_FREE_MEM_LIMIT ) {
1196
+ qsbr -> deferred_count = 0 ;
1197
+ qsbr -> deferred_memory = 0 ;
1198
+ qsbr -> should_process = true;
1199
+ return true;
1200
+ }
1201
+ qsbr -> deferred_count ++ ;
1202
+ qsbr -> deferred_memory += size ;
1203
+ if (qsbr -> deferred_count > QSBR_DEFERRED_LIMIT ||
1204
+ qsbr -> deferred_memory > QSBR_FREE_MEM_LIMIT ) {
1205
+ qsbr -> deferred_count = 0 ;
1206
+ qsbr -> deferred_memory = 0 ;
1207
+ qsbr -> should_process = true;
1208
+ return true;
1209
+ }
1210
+ return false;
1211
+ }
1212
+ #endif
1213
+
1144
1214
static void
1145
- free_delayed (uintptr_t ptr )
1215
+ free_delayed (uintptr_t ptr , size_t size )
1146
1216
{
1147
1217
#ifndef Py_GIL_DISABLED
1148
1218
free_work_item (ptr , NULL , NULL );
@@ -1200,23 +1270,32 @@ free_delayed(uintptr_t ptr)
1200
1270
}
1201
1271
1202
1272
assert (buf != NULL && buf -> wr_idx < WORK_ITEMS_PER_CHUNK );
1203
- uint64_t seq = _Py_qsbr_deferred_advance (tstate -> qsbr );
1273
+ uint64_t seq ;
1274
+ if (should_advance_qsbr_for_free (tstate -> qsbr , size )) {
1275
+ seq = _Py_qsbr_advance (tstate -> qsbr -> shared );
1276
+ }
1277
+ else {
1278
+ seq = _Py_qsbr_shared_next (tstate -> qsbr -> shared );
1279
+ }
1204
1280
buf -> array [buf -> wr_idx ].ptr = ptr ;
1205
1281
buf -> array [buf -> wr_idx ].qsbr_goal = seq ;
1206
1282
buf -> wr_idx ++ ;
1207
1283
1208
1284
if (buf -> wr_idx == WORK_ITEMS_PER_CHUNK ) {
1285
+ // Normally the processing of delayed items is done from the eval
1286
+ // breaker. Processing here is a safety measure to ensure too much
1287
+ // work does not accumulate.
1209
1288
_PyMem_ProcessDelayed ((PyThreadState * )tstate );
1210
1289
}
1211
1290
#endif
1212
1291
}
1213
1292
1214
1293
void
1215
- _PyMem_FreeDelayed (void * ptr )
1294
+ _PyMem_FreeDelayed (void * ptr , size_t size )
1216
1295
{
1217
1296
assert (!((uintptr_t )ptr & 0x01 ));
1218
1297
if (ptr != NULL ) {
1219
- free_delayed ((uintptr_t )ptr );
1298
+ free_delayed ((uintptr_t )ptr , size );
1220
1299
}
1221
1300
}
1222
1301
@@ -1226,7 +1305,10 @@ _PyObject_XDecRefDelayed(PyObject *ptr)
1226
1305
{
1227
1306
assert (!((uintptr_t )ptr & 0x01 ));
1228
1307
if (ptr != NULL ) {
1229
- free_delayed (((uintptr_t )ptr )|0x01 );
1308
+ // We use 0 as the size since we don't have an easy way to know the
1309
+ // actual size. If we are freeing many objects, the write sequence
1310
+ // will be advanced due to QSBR_DEFERRED_LIMIT.
1311
+ free_delayed (((uintptr_t )ptr )|0x01 , 0 );
1230
1312
}
1231
1313
}
1232
1314
#endif
@@ -1302,6 +1384,8 @@ _PyMem_ProcessDelayed(PyThreadState *tstate)
1302
1384
PyInterpreterState * interp = tstate -> interp ;
1303
1385
_PyThreadStateImpl * tstate_impl = (_PyThreadStateImpl * )tstate ;
1304
1386
1387
+ tstate_impl -> qsbr -> should_process = false;
1388
+
1305
1389
// Process thread-local work
1306
1390
process_queue (& tstate_impl -> mem_free_queue , tstate_impl , true, NULL , NULL );
1307
1391
0 commit comments