22
22
23
23
/* number of iaa instances probed */
24
24
static unsigned int nr_iaa ;
25
+ static unsigned int nr_cpus ;
26
+ static unsigned int nr_nodes ;
27
+ static unsigned int nr_cpus_per_node ;
28
+
29
+ /* Number of physical cpus sharing each iaa instance */
30
+ static unsigned int cpus_per_iaa ;
31
+
32
+ /* Per-cpu lookup table for balanced wqs */
33
+ static struct wq_table_entry __percpu * wq_table ;
34
+
35
+ static void wq_table_add (int cpu , struct idxd_wq * wq )
36
+ {
37
+ struct wq_table_entry * entry = per_cpu_ptr (wq_table , cpu );
38
+
39
+ if (WARN_ON (entry -> n_wqs == entry -> max_wqs ))
40
+ return ;
41
+
42
+ entry -> wqs [entry -> n_wqs ++ ] = wq ;
43
+
44
+ pr_debug ("%s: added iaa wq %d.%d to idx %d of cpu %d\n" , __func__ ,
45
+ entry -> wqs [entry -> n_wqs - 1 ]-> idxd -> id ,
46
+ entry -> wqs [entry -> n_wqs - 1 ]-> id , entry -> n_wqs - 1 , cpu );
47
+ }
48
+
49
+ static void wq_table_free_entry (int cpu )
50
+ {
51
+ struct wq_table_entry * entry = per_cpu_ptr (wq_table , cpu );
52
+
53
+ kfree (entry -> wqs );
54
+ memset (entry , 0 , sizeof (* entry ));
55
+ }
56
+
57
+ static void wq_table_clear_entry (int cpu )
58
+ {
59
+ struct wq_table_entry * entry = per_cpu_ptr (wq_table , cpu );
60
+
61
+ entry -> n_wqs = 0 ;
62
+ entry -> cur_wq = 0 ;
63
+ memset (entry -> wqs , 0 , entry -> max_wqs * sizeof (struct idxd_wq * ));
64
+ }
25
65
26
66
static LIST_HEAD (iaa_devices );
27
67
static DEFINE_MUTEX (iaa_devices_lock );
@@ -141,6 +181,53 @@ static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
141
181
}
142
182
}
143
183
184
+ static void clear_wq_table (void )
185
+ {
186
+ int cpu ;
187
+
188
+ for (cpu = 0 ; cpu < nr_cpus ; cpu ++ )
189
+ wq_table_clear_entry (cpu );
190
+
191
+ pr_debug ("cleared wq table\n" );
192
+ }
193
+
194
+ static void free_wq_table (void )
195
+ {
196
+ int cpu ;
197
+
198
+ for (cpu = 0 ; cpu < nr_cpus ; cpu ++ )
199
+ wq_table_free_entry (cpu );
200
+
201
+ free_percpu (wq_table );
202
+
203
+ pr_debug ("freed wq table\n" );
204
+ }
205
+
206
+ static int alloc_wq_table (int max_wqs )
207
+ {
208
+ struct wq_table_entry * entry ;
209
+ int cpu ;
210
+
211
+ wq_table = alloc_percpu (struct wq_table_entry );
212
+ if (!wq_table )
213
+ return - ENOMEM ;
214
+
215
+ for (cpu = 0 ; cpu < nr_cpus ; cpu ++ ) {
216
+ entry = per_cpu_ptr (wq_table , cpu );
217
+ entry -> wqs = kcalloc (max_wqs , sizeof (struct wq * ), GFP_KERNEL );
218
+ if (!entry -> wqs ) {
219
+ free_wq_table ();
220
+ return - ENOMEM ;
221
+ }
222
+
223
+ entry -> max_wqs = max_wqs ;
224
+ }
225
+
226
+ pr_debug ("initialized wq table\n" );
227
+
228
+ return 0 ;
229
+ }
230
+
144
231
static int save_iaa_wq (struct idxd_wq * wq )
145
232
{
146
233
struct iaa_device * iaa_device , * found = NULL ;
@@ -193,6 +280,8 @@ static int save_iaa_wq(struct idxd_wq *wq)
193
280
194
281
if (WARN_ON (nr_iaa == 0 ))
195
282
return - EINVAL ;
283
+
284
+ cpus_per_iaa = (nr_nodes * nr_cpus_per_node ) / nr_iaa ;
196
285
out :
197
286
return 0 ;
198
287
}
@@ -207,6 +296,116 @@ static void remove_iaa_wq(struct idxd_wq *wq)
207
296
break ;
208
297
}
209
298
}
299
+
300
+ if (nr_iaa )
301
+ cpus_per_iaa = (nr_nodes * nr_cpus_per_node ) / nr_iaa ;
302
+ else
303
+ cpus_per_iaa = 0 ;
304
+ }
305
+
306
+ static int wq_table_add_wqs (int iaa , int cpu )
307
+ {
308
+ struct iaa_device * iaa_device , * found_device = NULL ;
309
+ int ret = 0 , cur_iaa = 0 , n_wqs_added = 0 ;
310
+ struct idxd_device * idxd ;
311
+ struct iaa_wq * iaa_wq ;
312
+ struct pci_dev * pdev ;
313
+ struct device * dev ;
314
+
315
+ list_for_each_entry (iaa_device , & iaa_devices , list ) {
316
+ idxd = iaa_device -> idxd ;
317
+ pdev = idxd -> pdev ;
318
+ dev = & pdev -> dev ;
319
+
320
+ if (cur_iaa != iaa ) {
321
+ cur_iaa ++ ;
322
+ continue ;
323
+ }
324
+
325
+ found_device = iaa_device ;
326
+ dev_dbg (dev , "getting wq from iaa_device %d, cur_iaa %d\n" ,
327
+ found_device -> idxd -> id , cur_iaa );
328
+ break ;
329
+ }
330
+
331
+ if (!found_device ) {
332
+ found_device = list_first_entry_or_null (& iaa_devices ,
333
+ struct iaa_device , list );
334
+ if (!found_device ) {
335
+ pr_debug ("couldn't find any iaa devices with wqs!\n" );
336
+ ret = - EINVAL ;
337
+ goto out ;
338
+ }
339
+ cur_iaa = 0 ;
340
+
341
+ idxd = found_device -> idxd ;
342
+ pdev = idxd -> pdev ;
343
+ dev = & pdev -> dev ;
344
+ dev_dbg (dev , "getting wq from only iaa_device %d, cur_iaa %d\n" ,
345
+ found_device -> idxd -> id , cur_iaa );
346
+ }
347
+
348
+ list_for_each_entry (iaa_wq , & found_device -> wqs , list ) {
349
+ wq_table_add (cpu , iaa_wq -> wq );
350
+ pr_debug ("rebalance: added wq for cpu=%d: iaa wq %d.%d\n" ,
351
+ cpu , iaa_wq -> wq -> idxd -> id , iaa_wq -> wq -> id );
352
+ n_wqs_added ++ ;
353
+ };
354
+
355
+ if (!n_wqs_added ) {
356
+ pr_debug ("couldn't find any iaa wqs!\n" );
357
+ ret = - EINVAL ;
358
+ goto out ;
359
+ }
360
+ out :
361
+ return ret ;
362
+ }
363
+
364
+ /*
365
+ * Rebalance the wq table so that given a cpu, it's easy to find the
366
+ * closest IAA instance. The idea is to try to choose the most
367
+ * appropriate IAA instance for a caller and spread available
368
+ * workqueues around to clients.
369
+ */
370
+ static void rebalance_wq_table (void )
371
+ {
372
+ const struct cpumask * node_cpus ;
373
+ int node , cpu , iaa = -1 ;
374
+
375
+ if (nr_iaa == 0 )
376
+ return ;
377
+
378
+ pr_debug ("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n" ,
379
+ nr_nodes , nr_cpus , nr_iaa , cpus_per_iaa );
380
+
381
+ clear_wq_table ();
382
+
383
+ if (nr_iaa == 1 ) {
384
+ for (cpu = 0 ; cpu < nr_cpus ; cpu ++ ) {
385
+ if (WARN_ON (wq_table_add_wqs (0 , cpu ))) {
386
+ pr_debug ("could not add any wqs for iaa 0 to cpu %d!\n" , cpu );
387
+ return ;
388
+ }
389
+ }
390
+
391
+ return ;
392
+ }
393
+
394
+ for_each_online_node (node ) {
395
+ node_cpus = cpumask_of_node (node );
396
+
397
+ for (cpu = 0 ; cpu < nr_cpus_per_node ; cpu ++ ) {
398
+ int node_cpu = cpumask_nth (cpu , node_cpus );
399
+
400
+ if ((cpu % cpus_per_iaa ) == 0 )
401
+ iaa ++ ;
402
+
403
+ if (WARN_ON (wq_table_add_wqs (iaa , node_cpu ))) {
404
+ pr_debug ("could not add any wqs for iaa %d to cpu %d!\n" , iaa , cpu );
405
+ return ;
406
+ }
407
+ }
408
+ }
210
409
}
211
410
212
411
static int iaa_crypto_probe (struct idxd_dev * idxd_dev )
@@ -215,6 +414,7 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
215
414
struct idxd_device * idxd = wq -> idxd ;
216
415
struct idxd_driver_data * data = idxd -> data ;
217
416
struct device * dev = & idxd_dev -> conf_dev ;
417
+ bool first_wq = false;
218
418
int ret = 0 ;
219
419
220
420
if (idxd -> state != IDXD_DEV_ENABLED )
@@ -245,17 +445,30 @@ static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
245
445
246
446
mutex_lock (& iaa_devices_lock );
247
447
448
+ if (list_empty (& iaa_devices )) {
449
+ ret = alloc_wq_table (wq -> idxd -> max_wqs );
450
+ if (ret )
451
+ goto err_alloc ;
452
+ first_wq = true;
453
+ }
454
+
248
455
ret = save_iaa_wq (wq );
249
456
if (ret )
250
457
goto err_save ;
251
458
459
+ rebalance_wq_table ();
460
+
252
461
mutex_unlock (& iaa_devices_lock );
253
462
out :
254
463
mutex_unlock (& wq -> wq_lock );
255
464
256
465
return ret ;
257
466
258
467
err_save :
468
+ if (first_wq )
469
+ free_wq_table ();
470
+ err_alloc :
471
+ mutex_unlock (& iaa_devices_lock );
259
472
idxd_drv_disable_wq (wq );
260
473
err :
261
474
wq -> type = IDXD_WQT_NONE ;
@@ -273,7 +486,12 @@ static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
273
486
mutex_lock (& iaa_devices_lock );
274
487
275
488
remove_iaa_wq (wq );
489
+
276
490
idxd_drv_disable_wq (wq );
491
+ rebalance_wq_table ();
492
+
493
+ if (nr_iaa == 0 )
494
+ free_wq_table ();
277
495
278
496
mutex_unlock (& iaa_devices_lock );
279
497
mutex_unlock (& wq -> wq_lock );
@@ -295,6 +513,10 @@ static int __init iaa_crypto_init_module(void)
295
513
{
296
514
int ret = 0 ;
297
515
516
+ nr_cpus = num_online_cpus ();
517
+ nr_nodes = num_online_nodes ();
518
+ nr_cpus_per_node = nr_cpus / nr_nodes ;
519
+
298
520
ret = idxd_driver_register (& iaa_crypto_driver );
299
521
if (ret ) {
300
522
pr_debug ("IAA wq sub-driver registration failed\n" );
0 commit comments