@@ -1224,48 +1224,72 @@ void l_cl_mem_blocking_free(cl_context context, void *ptr) {
1224
1224
1225
1225
for (int i = 0 ; i < num_command_queues; i++) {
1226
1226
cl_command_queue current_cq = context->command_queue [i];
1227
- // check events in commands
1228
- for (const auto &event : current_cq->commands ) {
1229
- if (event->execution_status != CL_COMPLETE) {
1230
- // check if ptr is used by kernels when we submit to queue
1231
- if (event->ptr_hashtable .find (ptr) != event->ptr_hashtable .end ()) {
1232
- clWaitForEvents (1 , &event);
1233
- }
1234
- // check if ptr is used in queues
1235
- if ((event->cmd .type == CL_COMMAND_MEMCPY_INTEL) ||
1236
- (event->cmd .type == CL_COMMAND_MEMFILL_INTEL)) {
1237
- src_usm_alloc = acl_get_usm_alloc_from_ptr (
1238
- context, event->cmd .info .usm_xfer .src_ptr );
1239
- dst_usm_alloc = acl_get_usm_alloc_from_ptr (
1240
- context, event->cmd .info .usm_xfer .dst_ptr );
1241
- if ((src_usm_alloc && (src_usm_alloc->range .begin == ptr)) ||
1242
- (dst_usm_alloc && (dst_usm_alloc->range .begin == ptr))) {
1227
+ // Set a flag to indicate the command set of this command queue is being
1228
+ // traversed, and any event deletion should be deferred
1229
+ current_cq->waiting_for_events = true ;
1230
+
1231
+ if (current_cq->properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
1232
+ // Current queue is ooo queue, check events in commands
1233
+ for (auto it = current_cq->commands .begin ();
1234
+ it != current_cq->commands .end ();) {
1235
+ auto event = *it;
1236
+ if (event->execution_status != CL_COMPLETE) {
1237
+ // check if ptr is used by kernels when we submit to queue
1238
+ if (event->ptr_hashtable .find (ptr) != event->ptr_hashtable .end ()) {
1243
1239
clWaitForEvents (1 , &event);
1244
1240
}
1241
+ // check if ptr is used in queues
1242
+ if ((event->cmd .type == CL_COMMAND_MEMCPY_INTEL) ||
1243
+ (event->cmd .type == CL_COMMAND_MEMFILL_INTEL)) {
1244
+ src_usm_alloc = acl_get_usm_alloc_from_ptr (
1245
+ context, event->cmd .info .usm_xfer .src_ptr );
1246
+ dst_usm_alloc = acl_get_usm_alloc_from_ptr (
1247
+ context, event->cmd .info .usm_xfer .dst_ptr );
1248
+ if ((src_usm_alloc && (src_usm_alloc->range .begin == ptr)) ||
1249
+ (dst_usm_alloc && (dst_usm_alloc->range .begin == ptr))) {
1250
+ clWaitForEvents (1 , &event);
1251
+ }
1252
+ }
1245
1253
}
1246
- }
1247
- }
1248
- // check events in inorder commands
1249
- for (const auto &event : current_cq->inorder_commands ) {
1250
- if (event->execution_status != CL_COMPLETE) {
1251
- // check if ptr is used by kernels when we submit to queue
1252
- if (event->ptr_hashtable .find (ptr) != event->ptr_hashtable .end ()) {
1253
- clWaitForEvents (1 , &event);
1254
+ if (event->defer_removal ) {
1255
+ it = current_cq->commands .erase (it);
1256
+ event->defer_removal = false ; // Reset as this event might get reused
1257
+ } else {
1258
+ ++it;
1254
1259
}
1255
- // check if ptr is used in queues
1256
- if ((event-> cmd . type == CL_COMMAND_MEMCPY_INTEL) ||
1257
- (event-> cmd . type == CL_COMMAND_MEMFILL_INTEL)) {
1258
- src_usm_alloc = acl_get_usm_alloc_from_ptr (
1259
- context, event-> cmd . info . usm_xfer . src_ptr );
1260
- dst_usm_alloc = acl_get_usm_alloc_from_ptr (
1261
- context, event->cmd . info . usm_xfer . dst_ptr );
1262
- if ((src_usm_alloc && (src_usm_alloc-> range . begin == ptr)) ||
1263
- (dst_usm_alloc && (dst_usm_alloc-> range . begin == ptr) )) {
1260
+ }
1261
+ } else {
1262
+ // Current queue is inorder queue, check events in inorder commands
1263
+ for ( auto it = current_cq-> inorder_commands . begin ();
1264
+ it != current_cq-> inorder_commands . end ();) {
1265
+ auto event = *it;
1266
+ if ( event->execution_status != CL_COMPLETE) {
1267
+ // check if ptr is used by kernels when we submit to queue
1268
+ if (event-> ptr_hashtable . find (ptr) != event-> ptr_hashtable . end ( )) {
1264
1269
clWaitForEvents (1 , &event);
1265
1270
}
1271
+ // check if ptr is used in queues
1272
+ if ((event->cmd .type == CL_COMMAND_MEMCPY_INTEL) ||
1273
+ (event->cmd .type == CL_COMMAND_MEMFILL_INTEL)) {
1274
+ src_usm_alloc = acl_get_usm_alloc_from_ptr (
1275
+ context, event->cmd .info .usm_xfer .src_ptr );
1276
+ dst_usm_alloc = acl_get_usm_alloc_from_ptr (
1277
+ context, event->cmd .info .usm_xfer .dst_ptr );
1278
+ if ((src_usm_alloc && (src_usm_alloc->range .begin == ptr)) ||
1279
+ (dst_usm_alloc && (dst_usm_alloc->range .begin == ptr))) {
1280
+ clWaitForEvents (1 , &event);
1281
+ }
1282
+ }
1283
+ }
1284
+ if (event->defer_removal ) {
1285
+ it = current_cq->inorder_commands .erase (it);
1286
+ event->defer_removal = false ; // Reset as this event might get reused
1287
+ } else {
1288
+ ++it;
1266
1289
}
1267
1290
}
1268
1291
}
1292
+ current_cq->waiting_for_events = false ;
1269
1293
}
1270
1294
}
1271
1295
0 commit comments