Skip to content

Commit 4e7aaa6

Browse files
Jozsef Kadlecsikummakynes
authored andcommitted
netfilter: ipset: Fix race between namespace cleanup and gc in the list:set type
Lion Ackermann reported that there is a race condition between namespace cleanup in ipset and the garbage collection of the list:set type. The namespace cleanup can destroy the list:set type of sets while the gc of the set type is waiting to run in rcu cleanup. The latter uses data from the destroyed set which thus leads use after free. The patch contains the following parts: - When destroying all sets, first remove the garbage collectors, then wait if needed and then destroy the sets. - Fix the badly ordered "wait then remove gc" for the destroy a single set case. - Fix the missing rcu locking in the list:set type in the userspace test case. - Use proper RCU list handlings in the list:set type. The patch depends on c1193d9 (netfilter: ipset: Add list flush to cancel_gc). Fixes: 97f7cf1 (netfilter: ipset: fix performance regression in swap operation) Reported-by: Lion Ackermann <[email protected]> Tested-by: Lion Ackermann <[email protected]> Signed-off-by: Jozsef Kadlecsik <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent c4ab9da commit 4e7aaa6

File tree

2 files changed

+60
-51
lines changed

2 files changed

+60
-51
lines changed

net/netfilter/ipset/ip_set_core.c

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,23 +1172,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
11721172
.len = IPSET_MAXNAMELEN - 1 },
11731173
};
11741174

1175+
/* In order to return quickly when destroying a single set, it is split
1176+
* into two stages:
1177+
* - Cancel garbage collector
1178+
* - Destroy the set itself via call_rcu()
1179+
*/
1180+
11751181
static void
1176-
ip_set_destroy_set(struct ip_set *set)
1182+
ip_set_destroy_set_rcu(struct rcu_head *head)
11771183
{
1178-
pr_debug("set: %s\n", set->name);
1184+
struct ip_set *set = container_of(head, struct ip_set, rcu);
11791185

1180-
/* Must call it without holding any lock */
11811186
set->variant->destroy(set);
11821187
module_put(set->type->me);
11831188
kfree(set);
11841189
}
11851190

11861191
static void
1187-
ip_set_destroy_set_rcu(struct rcu_head *head)
1192+
_destroy_all_sets(struct ip_set_net *inst)
11881193
{
1189-
struct ip_set *set = container_of(head, struct ip_set, rcu);
1194+
struct ip_set *set;
1195+
ip_set_id_t i;
1196+
bool need_wait = false;
11901197

1191-
ip_set_destroy_set(set);
1198+
/* First cancel gc's: set:list sets are flushed as well */
1199+
for (i = 0; i < inst->ip_set_max; i++) {
1200+
set = ip_set(inst, i);
1201+
if (set) {
1202+
set->variant->cancel_gc(set);
1203+
if (set->type->features & IPSET_TYPE_NAME)
1204+
need_wait = true;
1205+
}
1206+
}
1207+
/* Must wait for flush to be really finished */
1208+
if (need_wait)
1209+
rcu_barrier();
1210+
for (i = 0; i < inst->ip_set_max; i++) {
1211+
set = ip_set(inst, i);
1212+
if (set) {
1213+
ip_set(inst, i) = NULL;
1214+
set->variant->destroy(set);
1215+
module_put(set->type->me);
1216+
kfree(set);
1217+
}
1218+
}
11921219
}
11931220

11941221
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
@@ -1202,20 +1229,17 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
12021229
if (unlikely(protocol_min_failed(attr)))
12031230
return -IPSET_ERR_PROTOCOL;
12041231

1205-
12061232
/* Commands are serialized and references are
12071233
* protected by the ip_set_ref_lock.
12081234
* External systems (i.e. xt_set) must call
1209-
* ip_set_put|get_nfnl_* functions, that way we
1235+
* ip_set_nfnl_get_* functions, that way we
12101236
* can safely check references here.
12111237
*
12121238
* list:set timer can only decrement the reference
12131239
* counter, so if it's already zero, we can proceed
12141240
* without holding the lock.
12151241
*/
12161242
if (!attr[IPSET_ATTR_SETNAME]) {
1217-
/* Must wait for flush to be really finished in list:set */
1218-
rcu_barrier();
12191243
read_lock_bh(&ip_set_ref_lock);
12201244
for (i = 0; i < inst->ip_set_max; i++) {
12211245
s = ip_set(inst, i);
@@ -1226,15 +1250,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
12261250
}
12271251
inst->is_destroyed = true;
12281252
read_unlock_bh(&ip_set_ref_lock);
1229-
for (i = 0; i < inst->ip_set_max; i++) {
1230-
s = ip_set(inst, i);
1231-
if (s) {
1232-
ip_set(inst, i) = NULL;
1233-
/* Must cancel garbage collectors */
1234-
s->variant->cancel_gc(s);
1235-
ip_set_destroy_set(s);
1236-
}
1237-
}
1253+
_destroy_all_sets(inst);
12381254
/* Modified by ip_set_destroy() only, which is serialized */
12391255
inst->is_destroyed = false;
12401256
} else {
@@ -1255,12 +1271,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
12551271
features = s->type->features;
12561272
ip_set(inst, i) = NULL;
12571273
read_unlock_bh(&ip_set_ref_lock);
1274+
/* Must cancel garbage collectors */
1275+
s->variant->cancel_gc(s);
12581276
if (features & IPSET_TYPE_NAME) {
12591277
/* Must wait for flush to be really finished */
12601278
rcu_barrier();
12611279
}
1262-
/* Must cancel garbage collectors */
1263-
s->variant->cancel_gc(s);
12641280
call_rcu(&s->rcu, ip_set_destroy_set_rcu);
12651281
}
12661282
return 0;
@@ -2365,30 +2381,25 @@ ip_set_net_init(struct net *net)
23652381
}
23662382

23672383
static void __net_exit
2368-
ip_set_net_exit(struct net *net)
2384+
ip_set_net_pre_exit(struct net *net)
23692385
{
23702386
struct ip_set_net *inst = ip_set_pernet(net);
23712387

2372-
struct ip_set *set = NULL;
2373-
ip_set_id_t i;
2374-
23752388
inst->is_deleted = true; /* flag for ip_set_nfnl_put */
2389+
}
23762390

2377-
nfnl_lock(NFNL_SUBSYS_IPSET);
2378-
for (i = 0; i < inst->ip_set_max; i++) {
2379-
set = ip_set(inst, i);
2380-
if (set) {
2381-
ip_set(inst, i) = NULL;
2382-
set->variant->cancel_gc(set);
2383-
ip_set_destroy_set(set);
2384-
}
2385-
}
2386-
nfnl_unlock(NFNL_SUBSYS_IPSET);
2391+
static void __net_exit
2392+
ip_set_net_exit(struct net *net)
2393+
{
2394+
struct ip_set_net *inst = ip_set_pernet(net);
2395+
2396+
_destroy_all_sets(inst);
23872397
kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
23882398
}
23892399

23902400
static struct pernet_operations ip_set_net_ops = {
23912401
.init = ip_set_net_init,
2402+
.pre_exit = ip_set_net_pre_exit,
23922403
.exit = ip_set_net_exit,
23932404
.id = &ip_set_net_id,
23942405
.size = sizeof(struct ip_set_net),

net/netfilter/ipset/ip_set_list_set.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
7979
struct set_elem *e;
8080
int ret;
8181

82-
list_for_each_entry(e, &map->members, list) {
82+
list_for_each_entry_rcu(e, &map->members, list) {
8383
if (SET_WITH_TIMEOUT(set) &&
8484
ip_set_timeout_expired(ext_timeout(e, set)))
8585
continue;
@@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
9999
struct set_elem *e;
100100
int ret;
101101

102-
list_for_each_entry(e, &map->members, list) {
102+
list_for_each_entry_rcu(e, &map->members, list) {
103103
if (SET_WITH_TIMEOUT(set) &&
104104
ip_set_timeout_expired(ext_timeout(e, set)))
105105
continue;
@@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
188188
struct list_set *map = set->data;
189189
struct set_adt_elem *d = value;
190190
struct set_elem *e, *next, *prev = NULL;
191-
int ret;
191+
int ret = 0;
192192

193-
list_for_each_entry(e, &map->members, list) {
193+
rcu_read_lock();
194+
list_for_each_entry_rcu(e, &map->members, list) {
194195
if (SET_WITH_TIMEOUT(set) &&
195196
ip_set_timeout_expired(ext_timeout(e, set)))
196197
continue;
@@ -201,16 +202,19 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
201202

202203
if (d->before == 0) {
203204
ret = 1;
205+
goto out;
204206
} else if (d->before > 0) {
205207
next = list_next_entry(e, list);
206208
ret = !list_is_last(&e->list, &map->members) &&
207209
next->id == d->refid;
208210
} else {
209211
ret = prev && prev->id == d->refid;
210212
}
211-
return ret;
213+
goto out;
212214
}
213-
return 0;
215+
out:
216+
rcu_read_unlock();
217+
return ret;
214218
}
215219

216220
static void
@@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
239243

240244
/* Find where to add the new entry */
241245
n = prev = next = NULL;
242-
list_for_each_entry(e, &map->members, list) {
246+
list_for_each_entry_rcu(e, &map->members, list) {
243247
if (SET_WITH_TIMEOUT(set) &&
244248
ip_set_timeout_expired(ext_timeout(e, set)))
245249
continue;
@@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
316320
{
317321
struct list_set *map = set->data;
318322
struct set_adt_elem *d = value;
319-
struct set_elem *e, *next, *prev = NULL;
323+
struct set_elem *e, *n, *next, *prev = NULL;
320324

321-
list_for_each_entry(e, &map->members, list) {
325+
list_for_each_entry_safe(e, n, &map->members, list) {
322326
if (SET_WITH_TIMEOUT(set) &&
323327
ip_set_timeout_expired(ext_timeout(e, set)))
324328
continue;
@@ -424,14 +428,8 @@ static void
424428
list_set_destroy(struct ip_set *set)
425429
{
426430
struct list_set *map = set->data;
427-
struct set_elem *e, *n;
428431

429-
list_for_each_entry_safe(e, n, &map->members, list) {
430-
list_del(&e->list);
431-
ip_set_put_byindex(map->net, e->id);
432-
ip_set_ext_destroy(set, e);
433-
kfree(e);
434-
}
432+
WARN_ON_ONCE(!list_empty(&map->members));
435433
kfree(map);
436434

437435
set->data = NULL;

0 commit comments

Comments
 (0)