Skip to content

Commit fd261ce

Browse files
rds: rdma: update rdma transport for tos
For RDMA transports, RDS TOS is an extension of IB QoS(Annex A13) to provide clients the ability to segregate traffic flows for different type of data. RDMA CM abstract it for ULPs using rdma_set_service_type(). Internally, each traffic flow is represented by a connection with all of its independent resources like that of a normal connection, and is differentiated by service type. In other words, there can be multiple qp connections between an IP pair and each supports a unique service type. The feature has been added from RDSv4.1 onwards and supports rolling upgrades. RDMA connection metadata also carries the tos information to set up SL on end to end context. The original code was developed by Bang Nguyen in downstream kernel back in 2.6.32 kernel days and it has evolved over period of time. Reviewed-by: Sowmini Varadhan <[email protected]> Signed-off-by: Santosh Shilimkar <[email protected]> [[email protected]: Adapted original patch with ipv6 changes] Signed-off-by: Zhu Yanjun <[email protected]>
1 parent 56dc8bc commit fd261ce

File tree

6 files changed

+29
-22
lines changed

6 files changed

+29
-22
lines changed

net/rds/ib.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ struct rds_ib_conn_priv_cmn {
6767
u8 ricpc_protocol_major;
6868
u8 ricpc_protocol_minor;
6969
__be16 ricpc_protocol_minor_mask; /* bitmask */
70-
__be32 ricpc_reserved1;
70+
u8 ricpc_dp_toss;
71+
u8 ripc_reserved1;
72+
__be16 ripc_reserved2;
7173
__be64 ricpc_ack_seq;
7274
__be32 ricpc_credit; /* non-zero enables flow ctl */
7375
};

net/rds/ib_cm.c

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,9 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
144144
}
145145
}
146146

147-
pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n",
147+
pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n",
148148
ic->i_active_side ? "Active" : "Passive",
149-
&conn->c_laddr, &conn->c_faddr,
149+
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
150150
RDS_PROTOCOL_MAJOR(conn->c_version),
151151
RDS_PROTOCOL_MINOR(conn->c_version),
152152
ic->i_flowctl ? ", flow control" : "");
@@ -222,6 +222,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
222222
cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
223223
dp->ricp_v6.dp_ack_seq =
224224
cpu_to_be64(rds_ib_piggyb_ack(ic));
225+
dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos;
225226

226227
conn_param->private_data = &dp->ricp_v6;
227228
conn_param->private_data_len = sizeof(dp->ricp_v6);
@@ -236,6 +237,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
236237
cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
237238
dp->ricp_v4.dp_ack_seq =
238239
cpu_to_be64(rds_ib_piggyb_ack(ic));
240+
dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos;
239241

240242
conn_param->private_data = &dp->ricp_v4;
241243
conn_param->private_data_len = sizeof(dp->ricp_v4);
@@ -391,10 +393,9 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
391393
rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
392394
break;
393395
default:
394-
rdsdebug("Fatal QP Event %u (%s) "
395-
"- connection %pI6c->%pI6c, reconnecting\n",
396-
event->event, ib_event_msg(event->event),
397-
&conn->c_laddr, &conn->c_faddr);
396+
rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n",
397+
event->event, ib_event_msg(event->event),
398+
&conn->c_laddr, &conn->c_faddr);
398399
rds_conn_drop(conn);
399400
break;
400401
}
@@ -662,11 +663,11 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
662663

663664
/* Even if len is crap *now* I still want to check it. -ASG */
664665
if (event->param.conn.private_data_len < data_len || major == 0)
665-
return RDS_PROTOCOL_3_0;
666+
return RDS_PROTOCOL_4_0;
666667

667668
common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
668-
if (major == 3 && common) {
669-
version = RDS_PROTOCOL_3_0;
669+
if (major == 4 && common) {
670+
version = RDS_PROTOCOL_4_0;
670671
while ((common >>= 1) != 0)
671672
version++;
672673
} else if (RDS_PROTOCOL_COMPAT_VERSION ==
@@ -778,15 +779,16 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
778779
daddr6 = &d_mapped_addr;
779780
}
780781

781-
rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid "
782-
"0x%llx\n", saddr6, daddr6,
783-
RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
782+
rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n",
783+
saddr6, daddr6, RDS_PROTOCOL_MAJOR(version),
784+
RDS_PROTOCOL_MINOR(version),
784785
(unsigned long long)be64_to_cpu(lguid),
785-
(unsigned long long)be64_to_cpu(fguid));
786+
(unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss);
786787

787788
/* RDS/IB is not currently netns aware, thus init_net */
788789
conn = rds_conn_create(&init_net, daddr6, saddr6,
789-
&rds_ib_transport, 0, GFP_KERNEL, ifindex);
790+
&rds_ib_transport, dp_cmn->ricpc_dp_toss,
791+
GFP_KERNEL, ifindex);
790792
if (IS_ERR(conn)) {
791793
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
792794
conn = NULL;
@@ -868,7 +870,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
868870

869871
/* If the peer doesn't do protocol negotiation, we must
870872
* default to RDSv3.0 */
871-
rds_ib_set_protocol(conn, RDS_PROTOCOL_VERSION);
873+
rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1);
872874
ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */
873875

874876
ret = rds_ib_setup_qp(conn);

net/rds/ib_recv.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -986,9 +986,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
986986
} else {
987987
/* We expect errors as the qp is drained during shutdown */
988988
if (rds_conn_up(conn) || rds_conn_connecting(conn))
989-
rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
989+
rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
990990
&conn->c_laddr, &conn->c_faddr,
991-
wc->status,
991+
conn->c_tos, wc->status,
992992
ib_wc_status_msg(wc->status));
993993
}
994994

net/rds/ib_send.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,9 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
305305

306306
/* We expect errors as the qp is drained during shutdown */
307307
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
308-
rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
309-
&conn->c_laddr, &conn->c_faddr, wc->status,
308+
rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
309+
&conn->c_laddr, &conn->c_faddr,
310+
conn->c_tos, wc->status,
310311
ib_wc_status_msg(wc->status));
311312
}
312313
}

net/rds/rdma_transport.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
8383
break;
8484

8585
case RDMA_CM_EVENT_ADDR_RESOLVED:
86+
rdma_set_service_type(cm_id, conn->c_tos);
8687
/* XXX do we need to clean up if this fails? */
8788
ret = rdma_resolve_route(cm_id,
8889
RDS_RDMA_RESOLVE_TIMEOUT_MS);

net/rds/send.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,12 +1277,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
12771277

12781278
/* rds_conn_create has a spinlock that runs with IRQ off.
12791279
* Caching the conn in the socket helps a lot. */
1280-
if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) {
1280+
if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) &&
1281+
rs->rs_tos == rs->rs_conn->c_tos) {
12811282
conn = rs->rs_conn;
12821283
} else {
12831284
conn = rds_conn_create_outgoing(sock_net(sock->sk),
12841285
&rs->rs_bound_addr, &daddr,
1285-
rs->rs_transport, 0,
1286+
rs->rs_transport, rs->rs_tos,
12861287
sock->sk->sk_allocation,
12871288
scope_id);
12881289
if (IS_ERR(conn)) {

0 commit comments

Comments
 (0)