Skip to content

Commit fd10ed8

Browse files
Jack Morgensteindledford
authored andcommitted
IB/mlx4: Fix possible vl/sl field mismatch in LRH header in QP1 packets
In MLX qp packets, the LRH (built by the driver) has both a VL field and an SL field. When building a QP1 packet, the VL field should reflect the SLtoVL mapping and not arbitrarily contain zero (as is done now). This bug causes credit problems in IB switches at high rates of QP1 packets. The fix is to cache the SL to VL mapping in the driver, and look up the VL mapped to the SL provided in the send request when sending QP1 packets. For FW versions which support generating a port_management_config_change event with subtype sl-to-vl-table-change, the driver uses that event to update its sl-to-vl mapping cache. Otherwise, the driver snoops incoming SMP mads to update the cache. There remains the case where the FW is running in secure-host mode (so no QP0 packets are delivered to the driver), and the FW does not generate the sl2vl mapping change event. To support this case, the driver updates (via querying the FW) its sl2vl mapping cache when running in secure-host mode when it receives either a Port Up event or a client-reregister event (where the port is still up, but there may have been an opensm failover). OpenSM modifies the sl2vl mapping before Port Up and Client-reregister events occur, so if there is a mapping change the driver's cache will be properly updated. Fixes: 225c7b1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent 486f609 commit fd10ed8

File tree

6 files changed

+220
-10
lines changed

6 files changed

+220
-10
lines changed

drivers/infiniband/hw/mlx4/mad.c

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad
230230
mad->mad_hdr.method == IB_MGMT_METHOD_SET)
231231
switch (mad->mad_hdr.attr_id) {
232232
case IB_SMP_ATTR_PORT_INFO:
233+
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
234+
return;
233235
pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
234236
lid = be16_to_cpu(pinfo->lid);
235237

@@ -245,6 +247,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad
245247
break;
246248

247249
case IB_SMP_ATTR_PKEY_TABLE:
250+
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
251+
return;
248252
if (!mlx4_is_mfunc(dev->dev)) {
249253
mlx4_ib_dispatch_event(dev, port_num,
250254
IB_EVENT_PKEY_CHANGE);
@@ -281,6 +285,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad
281285
break;
282286

283287
case IB_SMP_ATTR_GUID_INFO:
288+
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
289+
return;
284290
/* paravirtualized master's guid is guid 0 -- does not change */
285291
if (!mlx4_is_master(dev->dev))
286292
mlx4_ib_dispatch_event(dev, port_num,
@@ -296,6 +302,26 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad
296302
}
297303
break;
298304

305+
case IB_SMP_ATTR_SL_TO_VL_TABLE:
306+
/* cache sl to vl mapping changes for use in
307+
* filling QP1 LRH VL field when sending packets
308+
*/
309+
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV &&
310+
dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)
311+
return;
312+
if (!mlx4_is_slave(dev->dev)) {
313+
union sl2vl_tbl_to_u64 sl2vl64;
314+
int jj;
315+
316+
for (jj = 0; jj < 8; jj++) {
317+
sl2vl64.sl8[jj] = ((struct ib_smp *)mad)->data[jj];
318+
pr_debug("port %u, sl2vl[%d] = %02x\n",
319+
port_num, jj, sl2vl64.sl8[jj]);
320+
}
321+
atomic64_set(&dev->sl2vl[port_num - 1], sl2vl64.sl64);
322+
}
323+
break;
324+
299325
default:
300326
break;
301327
}
@@ -806,8 +832,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
806832
return IB_MAD_RESULT_FAILURE;
807833

808834
if (!out_mad->mad_hdr.status) {
809-
if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
810-
smp_snoop(ibdev, port_num, in_mad, prev_lid);
835+
smp_snoop(ibdev, port_num, in_mad, prev_lid);
811836
/* slaves get node desc from FW */
812837
if (!mlx4_is_slave(to_mdev(ibdev)->dev))
813838
node_desc_override(ibdev, out_mad);
@@ -1038,6 +1063,23 @@ static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
10381063
MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
10391064
}
10401065
}
1066+
1067+
/* Update the sl to vl table from inside client rereg
1068+
* only if in secure-host mode (snooping is not possible)
1069+
* and the sl-to-vl change event is not generated by FW.
1070+
*/
1071+
if (!mlx4_is_slave(dev->dev) &&
1072+
dev->dev->flags & MLX4_FLAG_SECURE_HOST &&
1073+
!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) {
1074+
if (mlx4_is_master(dev->dev))
1075+
/* already in work queue from mlx4_ib_event queueing
1076+
* mlx4_handle_port_mgmt_change_event, which calls
1077+
* this procedure. Therefore, call sl2vl_update directly.
1078+
*/
1079+
mlx4_ib_sl2vl_update(dev, port_num);
1080+
else
1081+
mlx4_sched_ib_sl2vl_update_work(dev, port_num);
1082+
}
10411083
mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
10421084
}
10431085

@@ -1156,6 +1198,24 @@ void handle_port_mgmt_change_event(struct work_struct *work)
11561198
handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
11571199
}
11581200
break;
1201+
1202+
case MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP:
1203+
/* cache sl to vl mapping changes for use in
1204+
* filling QP1 LRH VL field when sending packets
1205+
*/
1206+
if (!mlx4_is_slave(dev->dev)) {
1207+
union sl2vl_tbl_to_u64 sl2vl64;
1208+
int jj;
1209+
1210+
for (jj = 0; jj < 8; jj++) {
1211+
sl2vl64.sl8[jj] =
1212+
eqe->event.port_mgmt_change.params.sl2vl_tbl_change_info.sl2vl_table[jj];
1213+
pr_debug("port %u, sl2vl[%d] = %02x\n",
1214+
port, jj, sl2vl64.sl8[jj]);
1215+
}
1216+
atomic64_set(&dev->sl2vl[port - 1], sl2vl64.sl64);
1217+
}
1218+
break;
11591219
default:
11601220
pr_warn("Unsupported subtype 0x%x for "
11611221
"Port Management Change event\n", eqe->subtype);

drivers/infiniband/hw/mlx4/main.c

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,66 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
832832
return ret;
833833
}
834834

835+
static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
836+
{
837+
union sl2vl_tbl_to_u64 sl2vl64;
838+
struct ib_smp *in_mad = NULL;
839+
struct ib_smp *out_mad = NULL;
840+
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
841+
int err = -ENOMEM;
842+
int jj;
843+
844+
if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
845+
*sl2vl_tbl = 0;
846+
return 0;
847+
}
848+
849+
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
850+
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
851+
if (!in_mad || !out_mad)
852+
goto out;
853+
854+
init_query_mad(in_mad);
855+
in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE;
856+
in_mad->attr_mod = 0;
857+
858+
if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
859+
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
860+
861+
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
862+
in_mad, out_mad);
863+
if (err)
864+
goto out;
865+
866+
for (jj = 0; jj < 8; jj++)
867+
sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
868+
*sl2vl_tbl = sl2vl64.sl64;
869+
870+
out:
871+
kfree(in_mad);
872+
kfree(out_mad);
873+
return err;
874+
}
875+
876+
static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
877+
{
878+
u64 sl2vl;
879+
int i;
880+
int err;
881+
882+
for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
883+
if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
884+
continue;
885+
err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
886+
if (err) {
887+
pr_err("Unable to get default sl to vl mapping for port %d. Using all zeroes (%d)\n",
888+
i, err);
889+
sl2vl = 0;
890+
}
891+
atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
892+
}
893+
}
894+
835895
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
836896
u16 *pkey, int netw_view)
837897
{
@@ -2675,6 +2735,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
26752735

26762736
if (init_node_data(ibdev))
26772737
goto err_map;
2738+
mlx4_init_sl2vl_tbl(ibdev);
26782739

26792740
for (i = 0; i < ibdev->num_ports; ++i) {
26802741
mutex_init(&ibdev->counters_table[i].mutex);
@@ -3123,6 +3184,47 @@ static void handle_bonded_port_state_event(struct work_struct *work)
31233184
ib_dispatch_event(&ibev);
31243185
}
31253186

3187+
void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
3188+
{
3189+
u64 sl2vl;
3190+
int err;
3191+
3192+
err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
3193+
if (err) {
3194+
pr_err("Unable to get current sl to vl mapping for port %d. Using all zeroes (%d)\n",
3195+
port, err);
3196+
sl2vl = 0;
3197+
}
3198+
atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
3199+
}
3200+
3201+
static void ib_sl2vl_update_work(struct work_struct *work)
3202+
{
3203+
struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
3204+
struct mlx4_ib_dev *mdev = ew->ib_dev;
3205+
int port = ew->port;
3206+
3207+
mlx4_ib_sl2vl_update(mdev, port);
3208+
3209+
kfree(ew);
3210+
}
3211+
3212+
void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
3213+
int port)
3214+
{
3215+
struct ib_event_work *ew;
3216+
3217+
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3218+
if (ew) {
3219+
INIT_WORK(&ew->work, ib_sl2vl_update_work);
3220+
ew->port = port;
3221+
ew->ib_dev = ibdev;
3222+
queue_work(wq, &ew->work);
3223+
} else {
3224+
pr_err("failed to allocate memory for sl2vl update work\n");
3225+
}
3226+
}
3227+
31263228
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
31273229
enum mlx4_dev_event event, unsigned long param)
31283230
{
@@ -3153,10 +3255,14 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
31533255
case MLX4_DEV_EVENT_PORT_UP:
31543256
if (p > ibdev->num_ports)
31553257
return;
3156-
if (mlx4_is_master(dev) &&
3258+
if (!mlx4_is_slave(dev) &&
31573259
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
31583260
IB_LINK_LAYER_INFINIBAND) {
3159-
mlx4_ib_invalidate_all_guid_record(ibdev, p);
3261+
if (mlx4_is_master(dev))
3262+
mlx4_ib_invalidate_all_guid_record(ibdev, p);
3263+
if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
3264+
!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
3265+
mlx4_sched_ib_sl2vl_update_work(ibdev, p);
31603266
}
31613267
ibev.event = IB_EVENT_PORT_ACTIVE;
31623268
break;

drivers/infiniband/hw/mlx4/mlx4_ib.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@ struct mlx4_ib_dev {
570570
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
571571
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
572572
spinlock_t sm_lock;
573+
atomic64_t sl2vl[MLX4_MAX_PORTS];
573574
struct mlx4_ib_sriov sriov;
574575

575576
struct mutex cap_mask_mutex;
@@ -600,6 +601,7 @@ struct ib_event_work {
600601
struct work_struct work;
601602
struct mlx4_ib_dev *ib_dev;
602603
struct mlx4_eqe ib_eqe;
604+
int port;
603605
};
604606

605607
struct mlx4_ib_qp_tunnel_init_attr {
@@ -883,4 +885,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
883885
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
884886
u8 port_num, int index);
885887

888+
void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
889+
int port);
890+
891+
void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
892+
886893
#endif /* MLX4_IB_H */

drivers/infiniband/hw/mlx4/qp.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2405,6 +2405,22 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
24052405
return 0;
24062406
}
24072407

2408+
static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
2409+
{
2410+
union sl2vl_tbl_to_u64 tmp_vltab;
2411+
u8 vl;
2412+
2413+
if (sl > 15)
2414+
return 0xf;
2415+
tmp_vltab.sl64 = atomic64_read(&dev->sl2vl[port_num - 1]);
2416+
vl = tmp_vltab.sl8[sl >> 1];
2417+
if (sl & 1)
2418+
vl &= 0x0f;
2419+
else
2420+
vl >>= 4;
2421+
return vl;
2422+
}
2423+
24082424
#define MLX4_ROCEV2_QP1_SPORT 0xC000
24092425
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
24102426
void *wqe, unsigned *mlx_seg_len)
@@ -2587,7 +2603,12 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
25872603
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
25882604
}
25892605
} else {
2590-
sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
2606+
sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 :
2607+
sl_to_vl(to_mdev(ib_dev),
2608+
sqp->ud_header.lrh.service_level,
2609+
sqp->qp.port);
2610+
if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
2611+
return -EINVAL;
25912612
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
25922613
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
25932614
}

drivers/net/ethernet/mellanox/mlx4/fw.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
158158
[31] = "Modifying loopback source checks using UPDATE_QP support",
159159
[32] = "Loopback source checks support",
160160
[33] = "RoCEv2 support",
161-
[34] = "DMFS Sniffer support (UC & MC)"
161+
[34] = "DMFS Sniffer support (UC & MC)",
162+
[36] = "sl to vl mapping table change event support"
162163
};
163164
int i;
164165

@@ -703,6 +704,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
703704
#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74
704705
#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76
705706
#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77
707+
#define QUERY_DEV_CAP_SL2VL_EVENT_OFFSET 0x78
706708
#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a
707709
#define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET 0x7b
708710
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
@@ -822,6 +824,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
822824
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB;
823825
MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
824826
dev_cap->fs_max_num_qp_per_entry = field;
827+
MLX4_GET(field, outbox, QUERY_DEV_CAP_SL2VL_EVENT_OFFSET);
828+
if (field & (1 << 5))
829+
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT;
825830
MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET);
826831
if (field & 0x1)
827832
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN;
@@ -2698,7 +2703,6 @@ static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev,
26982703
int mlx4_config_mad_demux(struct mlx4_dev *dev)
26992704
{
27002705
struct mlx4_cmd_mailbox *mailbox;
2701-
int secure_host_active;
27022706
int err;
27032707

27042708
/* Check if mad_demux is supported */
@@ -2721,7 +2725,8 @@ int mlx4_config_mad_demux(struct mlx4_dev *dev)
27212725
goto out;
27222726
}
27232727

2724-
secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox);
2728+
if (mlx4_check_smp_firewall_active(dev, mailbox))
2729+
dev->flags |= MLX4_FLAG_SECURE_HOST;
27252730

27262731
/* Config mad_demux to handle all MADs returned by the query above */
27272732
err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
@@ -2732,7 +2737,7 @@ int mlx4_config_mad_demux(struct mlx4_dev *dev)
27322737
goto out;
27332738
}
27342739

2735-
if (secure_host_active)
2740+
if (dev->flags & MLX4_FLAG_SECURE_HOST)
27362741
mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
27372742
out:
27382743
mlx4_free_cmd_mailbox(dev, mailbox);

0 commit comments

Comments
 (0)