@@ -205,18 +205,21 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
205
205
206
206
if (dev -> kfd -> shared_resources .enable_mes ) {
207
207
amdgpu_amdkfd_free_gtt_mem (dev -> adev , & pqn -> q -> gang_ctx_bo );
208
- if (pqn -> q -> wptr_bo )
209
- amdgpu_amdkfd_free_gtt_mem (dev -> adev , (void * * )& pqn -> q -> wptr_bo );
208
+ amdgpu_amdkfd_free_gtt_mem (dev -> adev , (void * * )& pqn -> q -> wptr_bo_gart );
210
209
}
211
210
}
212
211
213
212
void pqm_uninit (struct process_queue_manager * pqm )
214
213
{
215
214
struct process_queue_node * pqn , * next ;
215
+ struct kfd_process_device * pdd ;
216
216
217
217
list_for_each_entry_safe (pqn , next , & pqm -> queues , process_queue_list ) {
218
- if (pqn -> q )
218
+ if (pqn -> q ) {
219
+ pdd = kfd_get_process_device_data (pqn -> q -> device , pqm -> process );
220
+ kfd_queue_release_buffers (pdd , & pqn -> q -> properties );
219
221
pqm_clean_queue_resource (pqm , pqn );
222
+ }
220
223
221
224
kfd_procfs_del_queue (pqn -> q );
222
225
uninit_queue (pqn -> q );
@@ -231,8 +234,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
231
234
static int init_user_queue (struct process_queue_manager * pqm ,
232
235
struct kfd_node * dev , struct queue * * q ,
233
236
struct queue_properties * q_properties ,
234
- struct file * f , struct amdgpu_bo * wptr_bo ,
235
- unsigned int qid )
237
+ struct file * f , unsigned int qid )
236
238
{
237
239
int retval ;
238
240
@@ -263,12 +265,32 @@ static int init_user_queue(struct process_queue_manager *pqm,
263
265
goto cleanup ;
264
266
}
265
267
memset ((* q )-> gang_ctx_cpu_ptr , 0 , AMDGPU_MES_GANG_CTX_SIZE );
266
- (* q )-> wptr_bo = wptr_bo ;
268
+
269
+ /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
270
+ * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
271
+ */
272
+ if (((dev -> adev -> mes .sched_version & AMDGPU_MES_API_VERSION_MASK )
273
+ >> AMDGPU_MES_API_VERSION_SHIFT ) >= 2 ) {
274
+ if (dev -> adev != amdgpu_ttm_adev (q_properties -> wptr_bo -> tbo .bdev )) {
275
+ pr_err ("Queue memory allocated to wrong device\n" );
276
+ retval = - EINVAL ;
277
+ goto free_gang_ctx_bo ;
278
+ }
279
+
280
+ retval = amdgpu_amdkfd_map_gtt_bo_to_gart (q_properties -> wptr_bo ,
281
+ & (* q )-> wptr_bo_gart );
282
+ if (retval ) {
283
+ pr_err ("Failed to map wptr bo to GART\n" );
284
+ goto free_gang_ctx_bo ;
285
+ }
286
+ }
267
287
}
268
288
269
289
pr_debug ("PQM After init queue" );
270
290
return 0 ;
271
291
292
+ free_gang_ctx_bo :
293
+ amdgpu_amdkfd_free_gtt_mem (dev -> adev , (* q )-> gang_ctx_bo );
272
294
cleanup :
273
295
uninit_queue (* q );
274
296
* q = NULL ;
@@ -280,7 +302,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
280
302
struct file * f ,
281
303
struct queue_properties * properties ,
282
304
unsigned int * qid ,
283
- struct amdgpu_bo * wptr_bo ,
284
305
const struct kfd_criu_queue_priv_data * q_data ,
285
306
const void * restore_mqd ,
286
307
const void * restore_ctl_stack ,
@@ -351,7 +372,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
351
372
* allocate_sdma_queue() in create_queue() has the
352
373
* corresponding check logic.
353
374
*/
354
- retval = init_user_queue (pqm , dev , & q , properties , f , wptr_bo , * qid );
375
+ retval = init_user_queue (pqm , dev , & q , properties , f , * qid );
355
376
if (retval != 0 )
356
377
goto err_create_queue ;
357
378
pqn -> q = q ;
@@ -372,7 +393,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
372
393
goto err_create_queue ;
373
394
}
374
395
375
- retval = init_user_queue (pqm , dev , & q , properties , f , wptr_bo , * qid );
396
+ retval = init_user_queue (pqm , dev , & q , properties , f , * qid );
376
397
if (retval != 0 )
377
398
goto err_create_queue ;
378
399
pqn -> q = q ;
@@ -490,6 +511,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
490
511
}
491
512
492
513
if (pqn -> q ) {
514
+ retval = kfd_queue_release_buffers (pdd , & pqn -> q -> properties );
515
+ if (retval )
516
+ goto err_destroy_queue ;
517
+
493
518
kfd_procfs_del_queue (pqn -> q );
494
519
dqm = pqn -> q -> device -> dqm ;
495
520
retval = dqm -> ops .destroy_queue (dqm , & pdd -> qpd , pqn -> q );
@@ -971,7 +996,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
971
996
972
997
print_queue_properties (& qp );
973
998
974
- ret = pqm_create_queue (& p -> pqm , pdd -> dev , NULL , & qp , & queue_id , NULL , q_data , mqd , ctl_stack ,
999
+ ret = pqm_create_queue (& p -> pqm , pdd -> dev , NULL , & qp , & queue_id , q_data , mqd , ctl_stack ,
975
1000
NULL );
976
1001
if (ret ) {
977
1002
pr_err ("Failed to create new queue err:%d\n" , ret );
0 commit comments