Skip to content

Commit 52e3ad9

Browse files
committed
Merge branch 'rhashtable-next'
Ying Xue says: ==================== remove nl_sk_hash_lock from netlink socket After tipc socket successfully avoids the involvement of an extra lock with rhashtable_lookup_insert(), it's possible for netlink socket to remove its hash socket lock now. But as netlink socket needs a compare function to look for an object, we first introduce a new function called rhashtable_lookup_compare_insert() in commit #1 which is implemented based on original rhashtable_lookup_insert(). We subsequently remove nl_sk_hash_lock from netlink socket with the new introduced function in commit #2. Lastly, as Thomas requested, we add commit #3 to indicate the implementation of what the grow and shrink decision function must enforce min/max shift. v2: As Thomas pointed out, there was a race between checking portid and then setting it in commit #2. Now use socket lock to make the process of both checking and setting portid atomic, and then eliminate the race. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents d2c60b1 + 6f73d3b commit 52e3ad9

File tree

5 files changed

+74
-21
lines changed

5 files changed

+74
-21
lines changed

include/linux/rhashtable.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ struct rhashtable;
7979
* @obj_hashfn: Function to hash object
8080
* @grow_decision: If defined, may return true if table should expand
8181
* @shrink_decision: If defined, may return true if table should shrink
82+
*
83+
* Note: when implementing the grow and shrink decision function, min/max
84+
* shift must be enforced, otherwise, resizing watermarks they set may be
85+
* useless.
8286
*/
8387
struct rhashtable_params {
8488
size_t nelem_hint;
@@ -168,7 +172,12 @@ int rhashtable_shrink(struct rhashtable *ht);
168172
void *rhashtable_lookup(struct rhashtable *ht, const void *key);
169173
void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
170174
bool (*compare)(void *, void *), void *arg);
175+
171176
bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj);
177+
bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
178+
struct rhash_head *obj,
179+
bool (*compare)(void *, void *),
180+
void *arg);
172181

173182
void rhashtable_destroy(struct rhashtable *ht);
174183

lib/rhashtable.c

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,43 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
726726
* to rhashtable_init().
727727
*/
728728
bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
729+
{
730+
struct rhashtable_compare_arg arg = {
731+
.ht = ht,
732+
.key = rht_obj(ht, obj) + ht->p.key_offset,
733+
};
734+
735+
BUG_ON(!ht->p.key_len);
736+
737+
return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
738+
&arg);
739+
}
740+
EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
741+
742+
/**
743+
* rhashtable_lookup_compare_insert - search and insert object to hash table
744+
* with compare function
745+
* @ht: hash table
746+
* @obj: pointer to hash head inside object
747+
* @compare: compare function, must return true on match
748+
* @arg: argument passed on to compare function
749+
*
750+
* Locks down the bucket chain in both the old and new table if a resize
751+
* is in progress to ensure that writers can't remove from the old table
752+
* and can't insert to the new table during the atomic operation of search
753+
* and insertion. Searches for duplicates in both the old and new table if
754+
* a resize is in progress.
755+
*
756+
* Lookups may occur in parallel with hashtable mutations and resizing.
757+
*
758+
* Will trigger an automatic deferred table resizing if the size grows
759+
* beyond the watermark indicated by grow_decision() which can be passed
760+
* to rhashtable_init().
761+
*/
762+
bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
763+
struct rhash_head *obj,
764+
bool (*compare)(void *, void *),
765+
void *arg)
729766
{
730767
struct bucket_table *new_tbl, *old_tbl;
731768
spinlock_t *new_bucket_lock, *old_bucket_lock;
@@ -747,7 +784,8 @@ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
747784
if (unlikely(old_tbl != new_tbl))
748785
spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
749786

750-
if (rhashtable_lookup(ht, rht_obj(ht, obj) + ht->p.key_offset)) {
787+
if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
788+
compare, arg)) {
751789
success = false;
752790
goto exit;
753791
}
@@ -763,7 +801,7 @@ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
763801

764802
return success;
765803
}
766-
EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
804+
EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
767805

768806
static size_t rounded_hashtable_size(struct rhashtable_params *params)
769807
{

net/netlink/af_netlink.c

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ static void netlink_skb_destructor(struct sk_buff *skb);
9898

9999
/* nl_table locking explained:
100100
* Lookup and traversal are protected with an RCU read-side lock. Insertion
101-
* and removal are protected with nl_sk_hash_lock while using RCU list
101+
* and removal are protected with per bucket lock while using RCU list
102102
* modification primitives and may run in parallel to RCU protected lookups.
103103
* Destruction of the Netlink socket may only occur *after* nl_table_lock has
104104
* been acquired * either during or after the socket has been removed from
@@ -110,10 +110,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
110110

111111
#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
112112

113-
/* Protects netlink socket hash table mutations */
114-
DEFINE_MUTEX(nl_sk_hash_lock);
115-
EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
116-
117113
static ATOMIC_NOTIFIER_HEAD(netlink_chain);
118114

119115
static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -998,6 +994,19 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
998994
&netlink_compare, &arg);
999995
}
1000996

997+
static bool __netlink_insert(struct netlink_table *table, struct sock *sk,
998+
struct net *net)
999+
{
1000+
struct netlink_compare_arg arg = {
1001+
.net = net,
1002+
.portid = nlk_sk(sk)->portid,
1003+
};
1004+
1005+
return rhashtable_lookup_compare_insert(&table->hash,
1006+
&nlk_sk(sk)->node,
1007+
&netlink_compare, &arg);
1008+
}
1009+
10011010
static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
10021011
{
10031012
struct netlink_table *table = &nl_table[protocol];
@@ -1043,9 +1052,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
10431052
struct netlink_table *table = &nl_table[sk->sk_protocol];
10441053
int err = -EADDRINUSE;
10451054

1046-
mutex_lock(&nl_sk_hash_lock);
1047-
if (__netlink_lookup(table, portid, net))
1048-
goto err;
1055+
lock_sock(sk);
10491056

10501057
err = -EBUSY;
10511058
if (nlk_sk(sk)->portid)
@@ -1058,24 +1065,24 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
10581065

10591066
nlk_sk(sk)->portid = portid;
10601067
sock_hold(sk);
1061-
rhashtable_insert(&table->hash, &nlk_sk(sk)->node);
1062-
err = 0;
1068+
if (__netlink_insert(table, sk, net))
1069+
err = 0;
1070+
else
1071+
sock_put(sk);
10631072
err:
1064-
mutex_unlock(&nl_sk_hash_lock);
1073+
release_sock(sk);
10651074
return err;
10661075
}
10671076

10681077
static void netlink_remove(struct sock *sk)
10691078
{
10701079
struct netlink_table *table;
10711080

1072-
mutex_lock(&nl_sk_hash_lock);
10731081
table = &nl_table[sk->sk_protocol];
10741082
if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
10751083
WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
10761084
__sock_put(sk);
10771085
}
1078-
mutex_unlock(&nl_sk_hash_lock);
10791086

10801087
netlink_table_grab();
10811088
if (nlk_sk(sk)->subscriptions) {

net/netlink/af_netlink.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,5 @@ struct netlink_table {
7474

7575
extern struct netlink_table *nl_table;
7676
extern rwlock_t nl_table_lock;
77-
extern struct mutex nl_sk_hash_lock;
7877

7978
#endif

net/netlink/diag.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
103103
{
104104
struct netlink_table *tbl = &nl_table[protocol];
105105
struct rhashtable *ht = &tbl->hash;
106-
const struct bucket_table *htbl = rht_dereference(ht->tbl, ht);
106+
const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
107107
struct net *net = sock_net(skb->sk);
108108
struct netlink_diag_req *req;
109109
struct netlink_sock *nlsk;
@@ -115,7 +115,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
115115
for (i = 0; i < htbl->size; i++) {
116116
struct rhash_head *pos;
117117

118-
rht_for_each_entry(nlsk, pos, htbl, i, node) {
118+
rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
119119
sk = (struct sock *)nlsk;
120120

121121
if (!net_eq(sock_net(sk), net))
@@ -172,7 +172,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
172172

173173
req = nlmsg_data(cb->nlh);
174174

175-
mutex_lock(&nl_sk_hash_lock);
175+
rcu_read_lock();
176176
read_lock(&nl_table_lock);
177177

178178
if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
@@ -186,15 +186,15 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
186186
} else {
187187
if (req->sdiag_protocol >= MAX_LINKS) {
188188
read_unlock(&nl_table_lock);
189-
mutex_unlock(&nl_sk_hash_lock);
189+
rcu_read_unlock();
190190
return -ENOENT;
191191
}
192192

193193
__netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
194194
}
195195

196196
read_unlock(&nl_table_lock);
197-
mutex_unlock(&nl_sk_hash_lock);
197+
rcu_read_unlock();
198198

199199
return skb->len;
200200
}

0 commit comments

Comments
 (0)