Skip to content

Commit 7e2832f

Browse files
committed
Merge branch 'rds-perf'
Sowmini Varadhan says: ==================== RDS: RDS-TCP perf enhancements A 3-part patchset that (a) improves current RDS-TCP perf by 2X-3X and (b) refactors earlier robustness code for better observability/scaling. Patch 1 is an enhancment of earlier robustness fixes that had used separate sockets for client and server endpoints to resolve race conditions. It is possible to have an equivalent solution that does not use 2 sockets. The benefit of a single socket solution is that it results in more predictable and observable behavior for the underlying TCP pipe of an RDS connection Patches 2 and 3 are simple, straightforward perf bug fixes that align the RDS TCP socket with other parts of the kernel stack. v2: fix kbuild-test-robot warnings, comments from Sergei Shtylov and Santosh Shilimkar. ==================== Acked-by: Santosh Shilimkar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
2 parents 393159e + 76b29ef commit 7e2832f

File tree

5 files changed

+29
-43
lines changed

5 files changed

+29
-43
lines changed

net/rds/connection.c

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
128128
struct rds_transport *loop_trans;
129129
unsigned long flags;
130130
int ret;
131-
struct rds_transport *otrans = trans;
132131

133-
if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
134-
goto new_conn;
135132
rcu_read_lock();
136133
conn = rds_conn_lookup(net, head, laddr, faddr, trans);
137134
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
@@ -147,7 +144,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
147144
if (conn)
148145
goto out;
149146

150-
new_conn:
151147
conn = kmem_cache_zalloc(rds_conn_slab, gfp);
152148
if (!conn) {
153149
conn = ERR_PTR(-ENOMEM);
@@ -207,6 +203,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
207203

208204
atomic_set(&conn->c_state, RDS_CONN_DOWN);
209205
conn->c_send_gen = 0;
206+
conn->c_outgoing = (is_outgoing ? 1 : 0);
210207
conn->c_reconnect_jiffies = 0;
211208
INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
212209
INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
@@ -243,22 +240,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,
243240
/* Creating normal conn */
244241
struct rds_connection *found;
245242

246-
if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
247-
found = NULL;
248-
else
249-
found = rds_conn_lookup(net, head, laddr, faddr, trans);
243+
found = rds_conn_lookup(net, head, laddr, faddr, trans);
250244
if (found) {
251245
trans->conn_free(conn->c_transport_data);
252246
kmem_cache_free(rds_conn_slab, conn);
253247
conn = found;
254248
} else {
255-
if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) ||
256-
(otrans->t_type != RDS_TRANS_TCP)) {
257-
/* Only the active side should be added to
258-
* reconnect list for TCP.
259-
*/
260-
hlist_add_head_rcu(&conn->c_hash_node, head);
261-
}
249+
hlist_add_head_rcu(&conn->c_hash_node, head);
262250
rds_cong_add_conn(conn);
263251
rds_conn_count++;
264252
}
@@ -337,7 +325,9 @@ void rds_conn_shutdown(struct rds_connection *conn)
337325
rcu_read_lock();
338326
if (!hlist_unhashed(&conn->c_hash_node)) {
339327
rcu_read_unlock();
340-
rds_queue_reconnect(conn);
328+
if (conn->c_trans->t_type != RDS_TRANS_TCP ||
329+
conn->c_outgoing == 1)
330+
rds_queue_reconnect(conn);
341331
} else {
342332
rcu_read_unlock();
343333
}

net/rds/rds.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ struct rds_connection {
8686
struct hlist_node c_hash_node;
8787
__be32 c_laddr;
8888
__be32 c_faddr;
89-
unsigned int c_loopback:1;
89+
unsigned int c_loopback:1,
90+
c_outgoing:1,
91+
c_pad_to_32:30;
9092
struct rds_connection *c_passive;
9193

9294
struct rds_cong_map *c_lcong;

net/rds/tcp.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,21 +67,13 @@ void rds_tcp_nonagle(struct socket *sock)
6767
set_fs(oldfs);
6868
}
6969

70+
/* All module specific customizations to the RDS-TCP socket should be done in
71+
* rds_tcp_tune() and applied after socket creation. In general these
72+
* customizations should be tunable via module_param()
73+
*/
7074
void rds_tcp_tune(struct socket *sock)
7175
{
72-
struct sock *sk = sock->sk;
73-
7476
rds_tcp_nonagle(sock);
75-
76-
/*
77-
* We're trying to saturate gigabit with the default,
78-
* see svc_sock_setbufsize().
79-
*/
80-
lock_sock(sk);
81-
sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE;
82-
sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE;
83-
sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
84-
release_sock(sk);
8577
}
8678

8779
u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)

net/rds/tcp_listen.c

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -110,28 +110,24 @@ int rds_tcp_accept_one(struct socket *sock)
110110
goto out;
111111
}
112112
/* An incoming SYN request came in, and TCP just accepted it.
113-
* We always create a new conn for listen side of TCP, and do not
114-
* add it to the c_hash_list.
115113
*
116114
* If the client reboots, this conn will need to be cleaned up.
117115
* rds_tcp_state_change() will do that cleanup
118116
*/
119117
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
120-
WARN_ON(!rs_tcp || rs_tcp->t_sock);
121-
122-
/*
123-
* see the comment above rds_queue_delayed_reconnect()
124-
*/
125-
if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
126-
if (rds_conn_state(conn) == RDS_CONN_UP)
127-
rds_tcp_stats_inc(s_tcp_listen_closed_stale);
128-
else
129-
rds_tcp_stats_inc(s_tcp_connect_raced);
130-
rds_conn_drop(conn);
118+
if (rs_tcp->t_sock &&
119+
ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
120+
struct sock *nsk = new_sock->sk;
121+
122+
nsk->sk_user_data = NULL;
123+
nsk->sk_prot->disconnect(nsk, 0);
124+
tcp_done(nsk);
125+
new_sock = NULL;
131126
ret = 0;
132127
goto out;
133128
}
134129

130+
rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
135131
rds_tcp_set_callbacks(new_sock, conn);
136132
rds_connect_complete(conn);
137133
new_sock = NULL;

net/rds/tcp_send.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
8383
struct rds_tcp_connection *tc = conn->c_transport_data;
8484
int done = 0;
8585
int ret = 0;
86+
int more;
8687

8788
if (hdr_off == 0) {
8889
/*
@@ -116,12 +117,15 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
116117
goto out;
117118
}
118119

120+
more = rm->data.op_nents > 1 ? (MSG_MORE | MSG_SENDPAGE_NOTLAST) : 0;
119121
while (sg < rm->data.op_nents) {
122+
int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
123+
120124
ret = tc->t_sock->ops->sendpage(tc->t_sock,
121125
sg_page(&rm->data.op_sg[sg]),
122126
rm->data.op_sg[sg].offset + off,
123127
rm->data.op_sg[sg].length - off,
124-
MSG_DONTWAIT|MSG_NOSIGNAL);
128+
flags);
125129
rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
126130
rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
127131
ret);
@@ -134,6 +138,8 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
134138
off = 0;
135139
sg++;
136140
}
141+
if (sg == rm->data.op_nents - 1)
142+
more = 0;
137143
}
138144

139145
out:

0 commit comments

Comments
 (0)