Skip to content

Commit e52e150

Browse files
committed
IB/mad: Check available slots before posting receive WRs
JIRA: https://issues.redhat.com/browse/RHEL-75591 commit 37826f0 Author: Maher Sanalla <[email protected]> Date: Thu Mar 13 16:20:17 2025 +0200 IB/mad: Check available slots before posting receive WRs The ib_post_receive_mads() function handles posting receive work requests (WRs) to MAD QPs and is called in two cases: 1) When a MAD port is opened. 2) When a receive WQE is consumed upon receiving a new MAD. Whereas, if MADs arrive during the port open phase, a race condition might cause an extra WR to be posted, exceeding the QP’s capacity. This leads to failures such as: infiniband mlx5_0: ib_post_recv failed: -12 infiniband mlx5_0: Couldn't post receive WRs infiniband mlx5_0: Couldn't start port infiniband mlx5_0: Couldn't open port 1 Fix this by checking the current receive count before posting a new WR. If the QP’s receive queue is full, do not post additional WRs. Fixes: 1da177e ("Linux-2.6.12-rc2") Signed-off-by: Maher Sanalla <[email protected]> Link: https://patch.msgid.link/c4984ba3c3a98a5711a558bccefcad789587ecf1.1741875592.git.leon@kernel.org Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Kamal Heib <[email protected]>
1 parent 24e83b7 commit e52e150

File tree

1 file changed

+20
-18
lines changed
  • drivers/infiniband/core

1 file changed

+20
-18
lines changed

drivers/infiniband/core/mad.c

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2671,11 +2671,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
26712671
struct ib_mad_private *mad)
26722672
{
26732673
unsigned long flags;
2674-
int post, ret;
26752674
struct ib_mad_private *mad_priv;
26762675
struct ib_sge sg_list;
26772676
struct ib_recv_wr recv_wr;
26782677
struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2678+
int ret = 0;
26792679

26802680
/* Initialize common scatter list fields */
26812681
sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
@@ -2685,18 +2685,16 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
26852685
recv_wr.sg_list = &sg_list;
26862686
recv_wr.num_sge = 1;
26872687

2688-
do {
2688+
while (true) {
26892689
/* Allocate and map receive buffer */
26902690
if (mad) {
26912691
mad_priv = mad;
26922692
mad = NULL;
26932693
} else {
26942694
mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
26952695
GFP_ATOMIC);
2696-
if (!mad_priv) {
2697-
ret = -ENOMEM;
2698-
break;
2699-
}
2696+
if (!mad_priv)
2697+
return -ENOMEM;
27002698
}
27012699
sg_list.length = mad_priv_dma_size(mad_priv);
27022700
sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
@@ -2705,37 +2703,41 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
27052703
DMA_FROM_DEVICE);
27062704
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
27072705
sg_list.addr))) {
2708-
kfree(mad_priv);
27092706
ret = -ENOMEM;
2710-
break;
2707+
goto free_mad_priv;
27112708
}
27122709
mad_priv->header.mapping = sg_list.addr;
27132710
mad_priv->header.mad_list.mad_queue = recv_queue;
27142711
mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
27152712
recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
2716-
2717-
/* Post receive WR */
27182713
spin_lock_irqsave(&recv_queue->lock, flags);
2719-
post = (++recv_queue->count < recv_queue->max_active);
2720-
list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2714+
if (recv_queue->count >= recv_queue->max_active) {
2715+
/* Fully populated the receive queue */
2716+
spin_unlock_irqrestore(&recv_queue->lock, flags);
2717+
break;
2718+
}
2719+
recv_queue->count++;
2720+
list_add_tail(&mad_priv->header.mad_list.list,
2721+
&recv_queue->list);
27212722
spin_unlock_irqrestore(&recv_queue->lock, flags);
2723+
27222724
ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
27232725
if (ret) {
27242726
spin_lock_irqsave(&recv_queue->lock, flags);
27252727
list_del(&mad_priv->header.mad_list.list);
27262728
recv_queue->count--;
27272729
spin_unlock_irqrestore(&recv_queue->lock, flags);
2728-
ib_dma_unmap_single(qp_info->port_priv->device,
2729-
mad_priv->header.mapping,
2730-
mad_priv_dma_size(mad_priv),
2731-
DMA_FROM_DEVICE);
2732-
kfree(mad_priv);
27332730
dev_err(&qp_info->port_priv->device->dev,
27342731
"ib_post_recv failed: %d\n", ret);
27352732
break;
27362733
}
2737-
} while (post);
2734+
}
27382735

2736+
ib_dma_unmap_single(qp_info->port_priv->device,
2737+
mad_priv->header.mapping,
2738+
mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE);
2739+
free_mad_priv:
2740+
kfree(mad_priv);
27392741
return ret;
27402742
}
27412743

0 commit comments

Comments
 (0)