Skip to content

Commit 7b56543

Browse files
SantoshShilimkardavem330
authored andcommitted
RDS: convert bind hash table to re-sizable hashtable
To further improve the RDS connection scalabilty on massive systems where number of sockets grows into tens of thousands of sockets, there is a need of larger bind hashtable. Pre-allocated 8K or 16K table is not very flexible in terms of memory utilisation. The rhashtable infrastructure gives us the flexibility to grow the hashtbable based on use and also comes up with inbuilt efficient bucket(chain) handling. Reviewed-by: David Miller <[email protected]> Signed-off-by: Santosh Shilimkar <[email protected]> Signed-off-by: Santosh Shilimkar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent d3ffaef commit 7b56543

File tree

3 files changed

+56
-86
lines changed

3 files changed

+56
-86
lines changed

net/rds/af_rds.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ static void rds_exit(void)
573573
rds_threads_exit();
574574
rds_stats_exit();
575575
rds_page_exit();
576+
rds_bind_lock_destroy();
576577
rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
577578
rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
578579
}
@@ -582,11 +583,14 @@ static int rds_init(void)
582583
{
583584
int ret;
584585

585-
rds_bind_lock_init();
586+
ret = rds_bind_lock_init();
587+
if (ret)
588+
goto out;
586589

587590
ret = rds_conn_init();
588591
if (ret)
589-
goto out;
592+
goto out_bind;
593+
590594
ret = rds_threads_init();
591595
if (ret)
592596
goto out_conn;
@@ -620,6 +624,8 @@ static int rds_init(void)
620624
rds_conn_exit();
621625
rds_cong_exit();
622626
rds_page_exit();
627+
out_bind:
628+
rds_bind_lock_destroy();
623629
out:
624630
return ret;
625631
}

net/rds/bind.c

Lines changed: 43 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -38,54 +38,17 @@
3838
#include <linux/ratelimit.h>
3939
#include "rds.h"
4040

41-
struct bind_bucket {
42-
rwlock_t lock;
43-
struct hlist_head head;
41+
static struct rhashtable bind_hash_table;
42+
43+
static struct rhashtable_params ht_parms = {
44+
.nelem_hint = 768,
45+
.key_len = sizeof(u64),
46+
.key_offset = offsetof(struct rds_sock, rs_bound_key),
47+
.head_offset = offsetof(struct rds_sock, rs_bound_node),
48+
.max_size = 16384,
49+
.min_size = 1024,
4450
};
4551

46-
#define BIND_HASH_SIZE 1024
47-
static struct bind_bucket bind_hash_table[BIND_HASH_SIZE];
48-
49-
static struct bind_bucket *hash_to_bucket(__be32 addr, __be16 port)
50-
{
51-
return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
52-
(BIND_HASH_SIZE - 1));
53-
}
54-
55-
/* must hold either read or write lock (write lock for insert != NULL) */
56-
static struct rds_sock *rds_bind_lookup(struct bind_bucket *bucket,
57-
__be32 addr, __be16 port,
58-
struct rds_sock *insert)
59-
{
60-
struct rds_sock *rs;
61-
struct hlist_head *head = &bucket->head;
62-
u64 cmp;
63-
u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
64-
65-
hlist_for_each_entry(rs, head, rs_bound_node) {
66-
cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
67-
be16_to_cpu(rs->rs_bound_port);
68-
69-
if (cmp == needle) {
70-
rds_sock_addref(rs);
71-
return rs;
72-
}
73-
}
74-
75-
if (insert) {
76-
/*
77-
* make sure our addr and port are set before
78-
* we are added to the list.
79-
*/
80-
insert->rs_bound_addr = addr;
81-
insert->rs_bound_port = port;
82-
rds_sock_addref(insert);
83-
84-
hlist_add_head(&insert->rs_bound_node, head);
85-
}
86-
return NULL;
87-
}
88-
8952
/*
9053
* Return the rds_sock bound at the given local address.
9154
*
@@ -94,18 +57,14 @@ static struct rds_sock *rds_bind_lookup(struct bind_bucket *bucket,
9457
*/
9558
struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
9659
{
60+
u64 key = ((u64)addr << 32) | port;
9761
struct rds_sock *rs;
98-
unsigned long flags;
99-
struct bind_bucket *bucket = hash_to_bucket(addr, port);
100-
101-
read_lock_irqsave(&bucket->lock, flags);
102-
rs = rds_bind_lookup(bucket, addr, port, NULL);
103-
read_unlock_irqrestore(&bucket->lock, flags);
10462

105-
if (rs && sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) {
106-
rds_sock_put(rs);
63+
rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms);
64+
if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
65+
rds_sock_addref(rs);
66+
else
10767
rs = NULL;
108-
}
10968

11069
rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
11170
ntohs(port));
@@ -116,10 +75,9 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
11675
/* returns -ve errno or +ve port */
11776
static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
11877
{
119-
unsigned long flags;
12078
int ret = -EADDRINUSE;
12179
u16 rover, last;
122-
struct bind_bucket *bucket;
80+
u64 key;
12381

12482
if (*port != 0) {
12583
rover = be16_to_cpu(*port);
@@ -130,22 +88,29 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
13088
}
13189

13290
do {
133-
struct rds_sock *rrs;
13491
if (rover == 0)
13592
rover++;
13693

137-
bucket = hash_to_bucket(addr, cpu_to_be16(rover));
138-
write_lock_irqsave(&bucket->lock, flags);
139-
rrs = rds_bind_lookup(bucket, addr, cpu_to_be16(rover), rs);
140-
write_unlock_irqrestore(&bucket->lock, flags);
141-
if (!rrs) {
94+
key = ((u64)addr << 32) | cpu_to_be16(rover);
95+
if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
96+
continue;
97+
98+
rs->rs_bound_key = key;
99+
rs->rs_bound_addr = addr;
100+
rs->rs_bound_port = cpu_to_be16(rover);
101+
rs->rs_bound_node.next = NULL;
102+
rds_sock_addref(rs);
103+
if (!rhashtable_insert_fast(&bind_hash_table,
104+
&rs->rs_bound_node, ht_parms)) {
142105
*port = rs->rs_bound_port;
143106
ret = 0;
144107
rdsdebug("rs %p binding to %pI4:%d\n",
145108
rs, &addr, (int)ntohs(*port));
146109
break;
147110
} else {
148-
rds_sock_put(rrs);
111+
rds_sock_put(rs);
112+
ret = -ENOMEM;
113+
break;
149114
}
150115
} while (rover++ != last);
151116

@@ -154,23 +119,17 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
154119

155120
void rds_remove_bound(struct rds_sock *rs)
156121
{
157-
unsigned long flags;
158-
struct bind_bucket *bucket =
159-
hash_to_bucket(rs->rs_bound_addr, rs->rs_bound_port);
160-
161-
write_lock_irqsave(&bucket->lock, flags);
162122

163-
if (rs->rs_bound_addr) {
164-
rdsdebug("rs %p unbinding from %pI4:%d\n",
165-
rs, &rs->rs_bound_addr,
166-
ntohs(rs->rs_bound_port));
123+
if (!rs->rs_bound_addr)
124+
return;
167125

168-
hlist_del_init(&rs->rs_bound_node);
169-
rds_sock_put(rs);
170-
rs->rs_bound_addr = 0;
171-
}
126+
rdsdebug("rs %p unbinding from %pI4:%d\n",
127+
rs, &rs->rs_bound_addr,
128+
ntohs(rs->rs_bound_port));
172129

173-
write_unlock_irqrestore(&bucket->lock, flags);
130+
rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
131+
rds_sock_put(rs);
132+
rs->rs_bound_addr = 0;
174133
}
175134

176135
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -224,10 +183,12 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
224183
return ret;
225184
}
226185

227-
void rds_bind_lock_init(void)
186+
void rds_bind_lock_destroy(void)
228187
{
229-
int i;
188+
rhashtable_destroy(&bind_hash_table);
189+
}
230190

231-
for (i = 0; i < BIND_HASH_SIZE; i++)
232-
rwlock_init(&bind_hash_table[i].lock);
191+
int rds_bind_lock_init(void)
192+
{
193+
return rhashtable_init(&bind_hash_table, &ht_parms);
233194
}

net/rds/rds.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <rdma/rdma_cm.h>
88
#include <linux/mutex.h>
99
#include <linux/rds.h>
10+
#include <linux/rhashtable.h>
1011

1112
#include "info.h"
1213

@@ -474,7 +475,8 @@ struct rds_sock {
474475
* bound_addr used for both incoming and outgoing, no INADDR_ANY
475476
* support.
476477
*/
477-
struct hlist_node rs_bound_node;
478+
struct rhash_head rs_bound_node;
479+
u64 rs_bound_key;
478480
__be32 rs_bound_addr;
479481
__be32 rs_conn_addr;
480482
__be16 rs_bound_port;
@@ -605,7 +607,8 @@ extern wait_queue_head_t rds_poll_waitq;
605607
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
606608
void rds_remove_bound(struct rds_sock *rs);
607609
struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
608-
void rds_bind_lock_init(void);
610+
int rds_bind_lock_init(void);
611+
void rds_bind_lock_destroy(void);
609612

610613
/* cong.c */
611614
int rds_cong_get_maps(struct rds_connection *conn);

0 commit comments

Comments
 (0)