Skip to content

Commit c36326d

Browse files
shayshyiSaeed Mahameed
authored andcommitted
net/mlx5: Round-Robin EQs over IRQs
Whenever users provided affinity for an EQ creation request, map the EQ to a matching IRQ. Matching IRQ=IRQ with the same affinity and type (completion/control) of the EQ created. This mapping is being done in agressive dedicated IRQ allocation scheme, which described bellow. First, we check whether there is a matching IRQ that his min threshold is not exhausted. - min_eqs_threshold = 3 for control EQ. - min_eqs_threshold = 1 for completion EQ. In case no matching IRQ was found, try to request a new IRQ. In case we can't request a new IRQ, reuse least-used matching IRQ. Signed-off-by: Shay Drory <[email protected]> Reviewed-by: Leon Romanovsky <[email protected]> Reviewed-by: Tariq Toukan <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent c8ea212 commit c36326d

File tree

4 files changed

+189
-29
lines changed

4 files changed

+189
-29
lines changed

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,8 +1559,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
15591559
}
15601560

15611561
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
1562-
param = (struct mlx5_eq_param){
1563-
.irq_index = 0,
1562+
param = (struct mlx5_eq_param) {
15641563
.nent = MLX5_IB_NUM_PF_EQE,
15651564
};
15661565
param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;

drivers/net/ethernet/mellanox/mlx5/core/eq.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
263263
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
264264
u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
265265
struct mlx5_priv *priv = &dev->priv;
266-
u8 vecidx = param->irq_index;
266+
u16 vecidx = param->irq_index;
267267
__be64 *pas;
268268
void *eqc;
269269
int inlen;
@@ -292,6 +292,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
292292
goto err_buf;
293293
}
294294

295+
vecidx = mlx5_irq_get_index(eq->irq);
295296
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
296297
MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
297298

@@ -629,7 +630,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
629630
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
630631

631632
param = (struct mlx5_eq_param) {
632-
.irq_index = 0,
633633
.nent = MLX5_NUM_CMD_EQE,
634634
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
635635
};
@@ -642,7 +642,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
642642
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
643643

644644
param = (struct mlx5_eq_param) {
645-
.irq_index = 0,
646645
.nent = MLX5_NUM_ASYNC_EQE,
647646
};
648647

@@ -652,7 +651,6 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
652651
goto err2;
653652

654653
param = (struct mlx5_eq_param) {
655-
.irq_index = 0,
656654
.nent = /* TODO: sriov max_vf + */ 1,
657655
.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
658656
};
@@ -985,15 +983,19 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
985983
int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
986984
MLX5_CAP_GEN(dev, max_num_eqs) :
987985
1 << MLX5_CAP_GEN(dev, log_max_eq);
986+
int max_eqs_sf;
988987
int err;
989988

990989
eq_table->num_comp_eqs =
991990
min_t(int,
992991
mlx5_irq_table_get_num_comp(eq_table->irq_table),
993992
num_eqs - MLX5_MAX_ASYNC_EQS);
994-
if (mlx5_core_is_sf(dev))
993+
if (mlx5_core_is_sf(dev)) {
994+
max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
995+
mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
995996
eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
996-
MLX5_COMP_EQS_PER_SF);
997+
max_eqs_sf);
998+
}
997999

9981000
err = create_async_eqs(dev);
9991001
if (err) {

drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,19 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
1717
int mlx5_irq_table_create(struct mlx5_core_dev *dev);
1818
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
1919
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
20+
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
2021
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
2122

2223
int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
2324
int msix_vec_count);
2425
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
2526

26-
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
27+
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
2728
struct cpumask *affinity);
2829
void mlx5_irq_release(struct mlx5_irq *irq);
2930
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
3031
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
3132
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
33+
int mlx5_irq_get_index(struct mlx5_irq *irq);
3234

3335
#endif /* __MLX5_IRQ_H__ */

drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c

Lines changed: 177 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include <linux/mlx5/driver.h>
88
#include "mlx5_core.h"
99
#include "mlx5_irq.h"
10-
#include "sf/sf.h"
10+
#include "lib/sf.h"
1111
#ifdef CONFIG_RFS_ACCEL
1212
#include <linux/cpu_rmap.h>
1313
#endif
@@ -21,6 +21,12 @@
2121
/* min num of vectores for SFs to be enabled */
2222
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
2323

24+
#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
25+
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
26+
#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
27+
#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
28+
#define MLX5_EQ_REFS_PER_IRQ (2)
29+
2430
struct mlx5_irq {
2531
u32 index;
2632
struct atomic_notifier_head nh;
@@ -34,7 +40,10 @@ struct mlx5_irq {
3440
struct mlx5_irq_pool {
3541
char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
3642
struct xa_limit xa_num_irqs;
43+
struct mutex lock; /* sync IRQs creations */
3744
struct xarray irqs;
45+
u32 max_threshold;
46+
u32 min_threshold;
3847
struct mlx5_core_dev *dev;
3948
};
4049

@@ -147,7 +156,11 @@ static void irq_release(struct kref *kref)
147156

148157
static void irq_put(struct mlx5_irq *irq)
149158
{
159+
struct mlx5_irq_pool *pool = irq->pool;
160+
161+
mutex_lock(&pool->lock);
150162
kref_put(&irq->kref, irq_release);
163+
mutex_unlock(&pool->lock);
151164
}
152165

153166
static irqreturn_t irq_int_handler(int irq, void *nh)
@@ -201,15 +214,15 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
201214
err = -ENOMEM;
202215
goto err_cpumask;
203216
}
204-
err = xa_alloc(&pool->irqs, &irq->index, irq, pool->xa_num_irqs,
205-
GFP_KERNEL);
217+
kref_init(&irq->kref);
218+
irq->index = i;
219+
err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
206220
if (err) {
207221
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
208222
irq->index, err);
209223
goto err_xa;
210224
}
211225
irq->pool = pool;
212-
kref_init(&irq->kref);
213226
return irq;
214227
err_xa:
215228
free_cpumask_var(irq->mask);
@@ -247,6 +260,124 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
247260
return irq->mask;
248261
}
249262

263+
int mlx5_irq_get_index(struct mlx5_irq *irq)
264+
{
265+
return irq->index;
266+
}
267+
268+
/* irq_pool API */
269+
270+
/* creating an irq from irq_pool */
271+
static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool,
272+
struct cpumask *affinity)
273+
{
274+
struct mlx5_irq *irq;
275+
u32 irq_index;
276+
int err;
277+
278+
err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs,
279+
GFP_KERNEL);
280+
if (err)
281+
return ERR_PTR(err);
282+
irq = irq_request(pool, irq_index);
283+
if (IS_ERR(irq))
284+
return irq;
285+
cpumask_copy(irq->mask, affinity);
286+
irq_set_affinity_hint(irq->irqn, irq->mask);
287+
return irq;
288+
}
289+
290+
/* looking for the irq with the smallest refcount and the same affinity */
291+
static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool,
292+
struct cpumask *affinity)
293+
{
294+
int start = pool->xa_num_irqs.min;
295+
int end = pool->xa_num_irqs.max;
296+
struct mlx5_irq *irq = NULL;
297+
struct mlx5_irq *iter;
298+
unsigned long index;
299+
300+
lockdep_assert_held(&pool->lock);
301+
xa_for_each_range(&pool->irqs, index, iter, start, end) {
302+
if (!cpumask_equal(iter->mask, affinity))
303+
continue;
304+
if (kref_read(&iter->kref) < pool->min_threshold)
305+
return iter;
306+
if (!irq || kref_read(&iter->kref) <
307+
kref_read(&irq->kref))
308+
irq = iter;
309+
}
310+
return irq;
311+
}
312+
313+
/* requesting an irq from a given pool according to given affinity */
314+
static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
315+
struct cpumask *affinity)
316+
{
317+
struct mlx5_irq *least_loaded_irq, *new_irq;
318+
319+
mutex_lock(&pool->lock);
320+
least_loaded_irq = irq_pool_find_least_loaded(pool, affinity);
321+
if (least_loaded_irq &&
322+
kref_read(&least_loaded_irq->kref) < pool->min_threshold)
323+
goto out;
324+
new_irq = irq_pool_create_irq(pool, affinity);
325+
if (IS_ERR(new_irq)) {
326+
if (!least_loaded_irq) {
327+
mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n",
328+
cpumask_first(affinity));
329+
mutex_unlock(&pool->lock);
330+
return new_irq;
331+
}
332+
/* We failed to create a new IRQ for the requested affinity,
333+
* sharing existing IRQ.
334+
*/
335+
goto out;
336+
}
337+
least_loaded_irq = new_irq;
338+
goto unlock;
339+
out:
340+
kref_get(&least_loaded_irq->kref);
341+
if (kref_read(&least_loaded_irq->kref) > pool->max_threshold)
342+
mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
343+
least_loaded_irq->irqn, pool->name,
344+
kref_read(&least_loaded_irq->kref) / MLX5_EQ_REFS_PER_IRQ);
345+
unlock:
346+
mutex_unlock(&pool->lock);
347+
return least_loaded_irq;
348+
}
349+
350+
/* requesting an irq from a given pool according to given index */
351+
static struct mlx5_irq *
352+
irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
353+
struct cpumask *affinity)
354+
{
355+
struct mlx5_irq *irq;
356+
357+
mutex_lock(&pool->lock);
358+
irq = xa_load(&pool->irqs, vecidx);
359+
if (irq) {
360+
kref_get(&irq->kref);
361+
goto unlock;
362+
}
363+
irq = irq_request(pool, vecidx);
364+
if (IS_ERR(irq) || !affinity)
365+
goto unlock;
366+
cpumask_copy(irq->mask, affinity);
367+
irq_set_affinity_hint(irq->irqn, irq->mask);
368+
unlock:
369+
mutex_unlock(&pool->lock);
370+
return irq;
371+
}
372+
373+
static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table,
374+
int i, struct cpumask *affinity)
375+
{
376+
if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL)
377+
return irq_table->sf_ctrl_pool;
378+
return irq_table->sf_comp_pool;
379+
}
380+
250381
/**
251382
* mlx5_irq_release - release an IRQ back to the system.
252383
* @irq: irq to be released.
@@ -266,32 +397,40 @@ void mlx5_irq_release(struct mlx5_irq *irq)
266397
*
267398
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
268399
*/
269-
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
400+
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
270401
struct cpumask *affinity)
271402
{
272403
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
273404
struct mlx5_irq_pool *pool;
274405
struct mlx5_irq *irq;
275406

276-
pool = irq_table->pf_pool;
277-
278-
irq = xa_load(&pool->irqs, vecidx);
279-
if (irq) {
280-
kref_get(&irq->kref);
281-
return irq;
407+
if (mlx5_core_is_sf(dev)) {
408+
pool = find_sf_irq_pool(irq_table, vecidx, affinity);
409+
if (!pool)
410+
/* we don't have IRQs for SFs, using the PF IRQs */
411+
goto pf_irq;
412+
if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp"))
413+
/* In case an SF user request IRQ with vecidx */
414+
irq = irq_pool_request_vector(pool, vecidx, NULL);
415+
else
416+
irq = irq_pool_request_affinity(pool, affinity);
417+
goto out;
282418
}
283-
irq = irq_request(pool, vecidx);
419+
pf_irq:
420+
pool = irq_table->pf_pool;
421+
irq = irq_pool_request_vector(pool, vecidx, affinity);
422+
out:
284423
if (IS_ERR(irq))
285424
return irq;
286-
cpumask_copy(irq->mask, affinity);
287-
irq_set_affinity_hint(irq->irqn, irq->mask);
425+
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
426+
irq->irqn, cpumask_pr_args(affinity),
427+
kref_read(&irq->kref) / MLX5_EQ_REFS_PER_IRQ);
288428
return irq;
289429
}
290430

291-
/* irq_pool API */
292-
293431
static struct mlx5_irq_pool *
294-
irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name)
432+
irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
433+
u32 min_threshold, u32 max_threshold)
295434
{
296435
struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
297436

@@ -304,6 +443,9 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name)
304443
if (name)
305444
snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
306445
name);
446+
pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
447+
pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
448+
mutex_init(&pool->lock);
307449
mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
308450
name, size, start);
309451
return pool;
@@ -329,7 +471,9 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
329471
int err;
330472

331473
/* init pf_pool */
332-
table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL);
474+
table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
475+
MLX5_EQ_SHARE_IRQ_MIN_COMP,
476+
MLX5_EQ_SHARE_IRQ_MAX_COMP);
333477
if (IS_ERR(table->pf_pool))
334478
return PTR_ERR(table->pf_pool);
335479
if (!mlx5_sf_max_functions(dev))
@@ -346,14 +490,18 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
346490
num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
347491
num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
348492
table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
349-
"mlx5_sf_ctrl");
493+
"mlx5_sf_ctrl",
494+
MLX5_EQ_SHARE_IRQ_MIN_CTRL,
495+
MLX5_EQ_SHARE_IRQ_MAX_CTRL);
350496
if (IS_ERR(table->sf_ctrl_pool)) {
351497
err = PTR_ERR(table->sf_ctrl_pool);
352498
goto err_pf;
353499
}
354500
/* init sf_comp_pool */
355501
table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
356-
sf_vec - num_sf_ctrl, "mlx5_sf_comp");
502+
sf_vec - num_sf_ctrl, "mlx5_sf_comp",
503+
MLX5_EQ_SHARE_IRQ_MIN_COMP,
504+
MLX5_EQ_SHARE_IRQ_MAX_COMP);
357505
if (IS_ERR(table->sf_comp_pool)) {
358506
err = PTR_ERR(table->sf_comp_pool);
359507
goto err_sf_ctrl;
@@ -455,6 +603,15 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
455603
pci_free_irq_vectors(dev->pdev);
456604
}
457605

606+
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
607+
{
608+
if (table->sf_comp_pool)
609+
return table->sf_comp_pool->xa_num_irqs.max -
610+
table->sf_comp_pool->xa_num_irqs.min + 1;
611+
else
612+
return mlx5_irq_table_get_num_comp(table);
613+
}
614+
458615
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
459616
{
460617
#ifdef CONFIG_MLX5_SF

0 commit comments

Comments
 (0)