Skip to content

Commit 97f7cf1

Browse files
Jozsef Kadlecsikummakynes
authored andcommitted
netfilter: ipset: fix performance regression in swap operation
The patch "netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test", commit 28628fa fixes a race condition. But the synchronize_rcu() added to the swap function unnecessarily slows it down: it can safely be moved to destroy and use call_rcu() instead. Eric Dumazet pointed out that simply calling the destroy functions as rcu callback does not work: sets with timeout use garbage collectors which need cancelling at destroy which can wait. Therefore the destroy functions are split into two: cancelling garbage collectors safely at executing the command received by netlink and moving the remaining part only into the rcu callback. Link: https://lore.kernel.org/lkml/[email protected]/ Fixes: 28628fa ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") Reported-by: Ale Crismani <[email protected]> Reported-by: David Wang <[email protected]> Tested-by: David Wang <[email protected]> Signed-off-by: Jozsef Kadlecsik <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 6e34806 commit 97f7cf1

File tree

5 files changed

+65
-18
lines changed

5 files changed

+65
-18
lines changed

include/linux/netfilter/ipset/ip_set.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ struct ip_set_type_variant {
186186
/* Return true if "b" set is the same as "a"
187187
* according to the create set parameters */
188188
bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
189+
/* Cancel ongoing garbage collectors before destroying the set*/
190+
void (*cancel_gc)(struct ip_set *set);
189191
/* Region-locking is used */
190192
bool region_lock;
191193
};
@@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type);
242244

243245
/* A generic IP set */
244246
struct ip_set {
247+
/* For call_cru in destroy */
248+
struct rcu_head rcu;
245249
/* The name of the set */
246250
char name[IPSET_MAXNAMELEN];
247251
/* Lock protecting the set data */

net/netfilter/ipset/ip_set_bitmap_gen.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#define mtype_del IPSET_TOKEN(MTYPE, _del)
3131
#define mtype_list IPSET_TOKEN(MTYPE, _list)
3232
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
33+
#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
3334
#define mtype MTYPE
3435

3536
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set)
5960
{
6061
struct mtype *map = set->data;
6162

62-
if (SET_WITH_TIMEOUT(set))
63-
del_timer_sync(&map->gc);
64-
6563
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
6664
mtype_ext_cleanup(set);
6765
ip_set_free(map->members);
@@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t)
290288
add_timer(&map->gc);
291289
}
292290

291+
static void
292+
mtype_cancel_gc(struct ip_set *set)
293+
{
294+
struct mtype *map = set->data;
295+
296+
if (SET_WITH_TIMEOUT(set))
297+
del_timer_sync(&map->gc);
298+
}
299+
293300
static const struct ip_set_type_variant mtype = {
294301
.kadt = mtype_kadt,
295302
.uadt = mtype_uadt,
@@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = {
303310
.head = mtype_head,
304311
.list = mtype_list,
305312
.same_set = mtype_same_set,
313+
.cancel_gc = mtype_cancel_gc,
306314
};
307315

308316
#endif /* __IP_SET_BITMAP_IP_GEN_H */

net/netfilter/ipset/ip_set_core.c

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,6 +1182,14 @@ ip_set_destroy_set(struct ip_set *set)
11821182
kfree(set);
11831183
}
11841184

1185+
static void
1186+
ip_set_destroy_set_rcu(struct rcu_head *head)
1187+
{
1188+
struct ip_set *set = container_of(head, struct ip_set, rcu);
1189+
1190+
ip_set_destroy_set(set);
1191+
}
1192+
11851193
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
11861194
const struct nlattr * const attr[])
11871195
{
@@ -1193,8 +1201,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
11931201
if (unlikely(protocol_min_failed(attr)))
11941202
return -IPSET_ERR_PROTOCOL;
11951203

1196-
/* Must wait for flush to be really finished in list:set */
1197-
rcu_barrier();
11981204

11991205
/* Commands are serialized and references are
12001206
* protected by the ip_set_ref_lock.
@@ -1206,8 +1212,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
12061212
* counter, so if it's already zero, we can proceed
12071213
* without holding the lock.
12081214
*/
1209-
read_lock_bh(&ip_set_ref_lock);
12101215
if (!attr[IPSET_ATTR_SETNAME]) {
1216+
/* Must wait for flush to be really finished in list:set */
1217+
rcu_barrier();
1218+
read_lock_bh(&ip_set_ref_lock);
12111219
for (i = 0; i < inst->ip_set_max; i++) {
12121220
s = ip_set(inst, i);
12131221
if (s && (s->ref || s->ref_netlink)) {
@@ -1221,13 +1229,18 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
12211229
s = ip_set(inst, i);
12221230
if (s) {
12231231
ip_set(inst, i) = NULL;
1232+
/* Must cancel garbage collectors */
1233+
s->variant->cancel_gc(s);
12241234
ip_set_destroy_set(s);
12251235
}
12261236
}
12271237
/* Modified by ip_set_destroy() only, which is serialized */
12281238
inst->is_destroyed = false;
12291239
} else {
12301240
u32 flags = flag_exist(info->nlh);
1241+
u16 features = 0;
1242+
1243+
read_lock_bh(&ip_set_ref_lock);
12311244
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
12321245
&i);
12331246
if (!s) {
@@ -1238,10 +1251,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
12381251
ret = -IPSET_ERR_BUSY;
12391252
goto out;
12401253
}
1254+
features = s->type->features;
12411255
ip_set(inst, i) = NULL;
12421256
read_unlock_bh(&ip_set_ref_lock);
1243-
1244-
ip_set_destroy_set(s);
1257+
if (features & IPSET_TYPE_NAME) {
1258+
/* Must wait for flush to be really finished */
1259+
rcu_barrier();
1260+
}
1261+
/* Must cancel garbage collectors */
1262+
s->variant->cancel_gc(s);
1263+
call_rcu(&s->rcu, ip_set_destroy_set_rcu);
12451264
}
12461265
return 0;
12471266
out:
@@ -1394,9 +1413,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
13941413
ip_set(inst, to_id) = from;
13951414
write_unlock_bh(&ip_set_ref_lock);
13961415

1397-
/* Make sure all readers of the old set pointers are completed. */
1398-
synchronize_rcu();
1399-
14001416
return 0;
14011417
}
14021418

@@ -2409,8 +2425,11 @@ ip_set_fini(void)
24092425
{
24102426
nf_unregister_sockopt(&so_set);
24112427
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
2412-
24132428
unregister_pernet_subsys(&ip_set_net_ops);
2429+
2430+
/* Wait for call_rcu() in destroy */
2431+
rcu_barrier();
2432+
24142433
pr_debug("these are the famous last words\n");
24152434
}
24162435

net/netfilter/ipset/ip_set_hash_gen.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {};
222222
#undef mtype_gc_do
223223
#undef mtype_gc
224224
#undef mtype_gc_init
225+
#undef mtype_cancel_gc
225226
#undef mtype_variant
226227
#undef mtype_data_match
227228

@@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {};
266267
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
267268
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
268269
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
270+
#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
269271
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
270272
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
271273

@@ -450,9 +452,6 @@ mtype_destroy(struct ip_set *set)
450452
struct htype *h = set->data;
451453
struct list_head *l, *lt;
452454

453-
if (SET_WITH_TIMEOUT(set))
454-
cancel_delayed_work_sync(&h->gc.dwork);
455-
456455
mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
457456
list_for_each_safe(l, lt, &h->ad) {
458457
list_del(l);
@@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc)
599598
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
600599
}
601600

601+
static void
602+
mtype_cancel_gc(struct ip_set *set)
603+
{
604+
struct htype *h = set->data;
605+
606+
if (SET_WITH_TIMEOUT(set))
607+
cancel_delayed_work_sync(&h->gc.dwork);
608+
}
609+
602610
static int
603611
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
604612
struct ip_set_ext *mext, u32 flags);
@@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = {
14411449
.uref = mtype_uref,
14421450
.resize = mtype_resize,
14431451
.same_set = mtype_same_set,
1452+
.cancel_gc = mtype_cancel_gc,
14441453
.region_lock = true,
14451454
};
14461455

net/netfilter/ipset/ip_set_list_set.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
426426
struct list_set *map = set->data;
427427
struct set_elem *e, *n;
428428

429-
if (SET_WITH_TIMEOUT(set))
430-
timer_shutdown_sync(&map->gc);
431-
432429
list_for_each_entry_safe(e, n, &map->members, list) {
433430
list_del(&e->list);
434431
ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
545542
a->extensions == b->extensions;
546543
}
547544

545+
static void
546+
list_set_cancel_gc(struct ip_set *set)
547+
{
548+
struct list_set *map = set->data;
549+
550+
if (SET_WITH_TIMEOUT(set))
551+
timer_shutdown_sync(&map->gc);
552+
}
553+
548554
static const struct ip_set_type_variant set_variant = {
549555
.kadt = list_set_kadt,
550556
.uadt = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
558564
.head = list_set_head,
559565
.list = list_set_list,
560566
.same_set = list_set_same_set,
567+
.cancel_gc = list_set_cancel_gc,
561568
};
562569

563570
static void

0 commit comments

Comments
 (0)