Skip to content

Commit 64b892a

Browse files
committed
Merge branch 'inet-frag-fixes'
Florian Westphal says: ==================== inet: ip defrag bug fixes Johan Schuijt and Frank Schreuder reported crash and softlockup after the inet workqueue eviction change: general protection fault: 0000 [#1] SMP CPU: 0 PID: 4 Comm: kworker/0:0 Not tainted 3.18.18-transip-1.5 #1 Workqueue: events inet_frag_worker task: ffff880224935130 ti: ffff880224938000 task.ti: ffff880224938000 RIP: 0010:[<ffffffff8149288c>] [<ffffffff8149288c>] inet_evict_bucket+0xfc/0x160 RSP: 0018:ffff88022493bd58 EFLAGS: 00010286 RAX: ffff88021f4f3e80 RBX: dead000000100100 RCX: 000000000000006b RDX: 000000000000006c RSI: ffff88021f4f3e80 RDI: dead0000001000a8 RBP: 0000000000000002 R08: ffff880222273900 R09: ffff880036e49200 R10: ffff8800c6e86500 R11: ffff880036f45500 R12: ffffffff81a87100 R13: ffff88022493bd70 R14: 0000000000000000 R15: ffff8800c9b26280 [..] Call Trace: [<ffffffff814929e0>] ? inet_frag_worker+0x60/0x210 [<ffffffff8107e3a2>] ? process_one_work+0x142/0x3b0 [<ffffffff8107eb94>] ? worker_thread+0x114/0x440 [..] A second issue results in softlockup since the evictor may restart the eviction loop for a (potentially) unlimited number of times while local softirqs are disabled. Frank reports that test system remained stable for 14 hours of testing (before, crash occured within half an hour in their setup). ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 81296fc + caaecdd commit 64b892a

File tree

6 files changed

+42
-47
lines changed

6 files changed

+42
-47
lines changed

include/net/inet_frag.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,11 @@ struct netns_frags {
2121
* @INET_FRAG_FIRST_IN: first fragment has arrived
2222
* @INET_FRAG_LAST_IN: final fragment has arrived
2323
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
24-
* @INET_FRAG_EVICTED: frag queue is being evicted
2524
*/
2625
enum {
2726
INET_FRAG_FIRST_IN = BIT(0),
2827
INET_FRAG_LAST_IN = BIT(1),
2928
INET_FRAG_COMPLETE = BIT(2),
30-
INET_FRAG_EVICTED = BIT(3)
3129
};
3230

3331
/**
@@ -45,6 +43,7 @@ enum {
4543
* @flags: fragment queue flags
4644
* @max_size: maximum received fragment size
4745
* @net: namespace that this frag belongs to
46+
* @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
4847
*/
4948
struct inet_frag_queue {
5049
spinlock_t lock;
@@ -59,6 +58,7 @@ struct inet_frag_queue {
5958
__u8 flags;
6059
u16 max_size;
6160
struct netns_frags *net;
61+
struct hlist_node list_evictor;
6262
};
6363

6464
#define INETFRAGS_HASHSZ 1024
@@ -125,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f
125125
inet_frag_destroy(q, f);
126126
}
127127

128+
static inline bool inet_frag_evicting(struct inet_frag_queue *q)
129+
{
130+
return !hlist_unhashed(&q->list_evictor);
131+
}
132+
128133
/* Memory Tracking Functions. */
129134

130135
/* The default percpu_counter batch size is not big enough to scale to
@@ -139,14 +144,14 @@ static inline int frag_mem_limit(struct netns_frags *nf)
139144
return percpu_counter_read(&nf->mem);
140145
}
141146

142-
static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i)
147+
static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
143148
{
144-
__percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch);
149+
__percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch);
145150
}
146151

147-
static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i)
152+
static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
148153
{
149-
__percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch);
154+
__percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch);
150155
}
151156

152157
static inline void init_frag_mem_limit(struct netns_frags *nf)

net/ieee802154/6lowpan/reassembly.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
207207
} else {
208208
fq->q.meat += skb->len;
209209
}
210-
add_frag_mem_limit(&fq->q, skb->truesize);
210+
add_frag_mem_limit(fq->q.net, skb->truesize);
211211

212212
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
213213
fq->q.meat == fq->q.len) {
@@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
287287
clone->data_len = clone->len;
288288
head->data_len -= clone->len;
289289
head->len -= clone->len;
290-
add_frag_mem_limit(&fq->q, clone->truesize);
290+
add_frag_mem_limit(fq->q.net, clone->truesize);
291291
}
292292

293293
WARN_ON(head == NULL);
@@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
310310
}
311311
fp = next;
312312
}
313-
sub_frag_mem_limit(&fq->q, sum_truesize);
313+
sub_frag_mem_limit(fq->q.net, sum_truesize);
314314

315315
head->next = NULL;
316316
head->dev = dev;

net/ipv4/inet_fragment.c

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
131131
unsigned int evicted = 0;
132132
HLIST_HEAD(expired);
133133

134-
evict_again:
135134
spin_lock(&hb->chain_lock);
136135

137136
hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
138137
if (!inet_fragq_should_evict(fq))
139138
continue;
140139

141-
if (!del_timer(&fq->timer)) {
142-
/* q expiring right now thus increment its refcount so
143-
* it won't be freed under us and wait until the timer
144-
* has finished executing then destroy it
145-
*/
146-
atomic_inc(&fq->refcnt);
147-
spin_unlock(&hb->chain_lock);
148-
del_timer_sync(&fq->timer);
149-
inet_frag_put(fq, f);
150-
goto evict_again;
151-
}
140+
if (!del_timer(&fq->timer))
141+
continue;
152142

153-
fq->flags |= INET_FRAG_EVICTED;
154-
hlist_del(&fq->list);
155-
hlist_add_head(&fq->list, &expired);
143+
hlist_add_head(&fq->list_evictor, &expired);
156144
++evicted;
157145
}
158146

159147
spin_unlock(&hb->chain_lock);
160148

161-
hlist_for_each_entry_safe(fq, n, &expired, list)
149+
hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
162150
f->frag_expire((unsigned long) fq);
163151

164152
return evicted;
@@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
240228
int i;
241229

242230
nf->low_thresh = 0;
243-
local_bh_disable();
244231

245232
evict_again:
233+
local_bh_disable();
246234
seq = read_seqbegin(&f->rnd_seqlock);
247235

248236
for (i = 0; i < INETFRAGS_HASHSZ ; i++)
249237
inet_evict_bucket(f, &f->hash[i]);
250238

251-
if (read_seqretry(&f->rnd_seqlock, seq))
252-
goto evict_again;
253-
254239
local_bh_enable();
240+
cond_resched();
241+
242+
if (read_seqretry(&f->rnd_seqlock, seq) ||
243+
percpu_counter_sum(&nf->mem))
244+
goto evict_again;
255245

256246
percpu_counter_destroy(&nf->mem);
257247
}
@@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
284274
struct inet_frag_bucket *hb;
285275

286276
hb = get_frag_bucket_locked(fq, f);
287-
if (!(fq->flags & INET_FRAG_EVICTED))
288-
hlist_del(&fq->list);
277+
hlist_del(&fq->list);
278+
fq->flags |= INET_FRAG_COMPLETE;
289279
spin_unlock(&hb->chain_lock);
290280
}
291281

@@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
297287
if (!(fq->flags & INET_FRAG_COMPLETE)) {
298288
fq_unlink(fq, f);
299289
atomic_dec(&fq->refcnt);
300-
fq->flags |= INET_FRAG_COMPLETE;
301290
}
302291
}
303292
EXPORT_SYMBOL(inet_frag_kill);
@@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
330319
fp = xp;
331320
}
332321
sum = sum_truesize + f->qsize;
333-
sub_frag_mem_limit(q, sum);
334322

335323
if (f->destructor)
336324
f->destructor(q);
337325
kmem_cache_free(f->frags_cachep, q);
326+
327+
sub_frag_mem_limit(nf, sum);
338328
}
339329
EXPORT_SYMBOL(inet_frag_destroy);
340330

@@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
390380

391381
q->net = nf;
392382
f->constructor(q, arg);
393-
add_frag_mem_limit(q, f->qsize);
383+
add_frag_mem_limit(nf, f->qsize);
394384

395385
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
396386
spin_lock_init(&q->lock);

net/ipv4/ip_fragment.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg)
202202
ipq_kill(qp);
203203
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
204204

205-
if (!(qp->q.flags & INET_FRAG_EVICTED)) {
205+
if (!inet_frag_evicting(&qp->q)) {
206206
struct sk_buff *head = qp->q.fragments;
207207
const struct iphdr *iph;
208208
int err;
@@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp)
309309
kfree_skb(fp);
310310
fp = xp;
311311
} while (fp);
312-
sub_frag_mem_limit(&qp->q, sum_truesize);
312+
sub_frag_mem_limit(qp->q.net, sum_truesize);
313313

314314
qp->q.flags = 0;
315315
qp->q.len = 0;
@@ -455,7 +455,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
455455
qp->q.fragments = next;
456456

457457
qp->q.meat -= free_it->len;
458-
sub_frag_mem_limit(&qp->q, free_it->truesize);
458+
sub_frag_mem_limit(qp->q.net, free_it->truesize);
459459
kfree_skb(free_it);
460460
}
461461
}
@@ -479,7 +479,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
479479
qp->q.stamp = skb->tstamp;
480480
qp->q.meat += skb->len;
481481
qp->ecn |= ecn;
482-
add_frag_mem_limit(&qp->q, skb->truesize);
482+
add_frag_mem_limit(qp->q.net, skb->truesize);
483483
if (offset == 0)
484484
qp->q.flags |= INET_FRAG_FIRST_IN;
485485

@@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
587587
head->len -= clone->len;
588588
clone->csum = 0;
589589
clone->ip_summed = head->ip_summed;
590-
add_frag_mem_limit(&qp->q, clone->truesize);
590+
add_frag_mem_limit(qp->q.net, clone->truesize);
591591
}
592592

593593
skb_push(head, head->data - skb_network_header(head));
@@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
615615
}
616616
fp = next;
617617
}
618-
sub_frag_mem_limit(&qp->q, sum_truesize);
618+
sub_frag_mem_limit(qp->q.net, sum_truesize);
619619

620620
head->next = NULL;
621621
head->dev = dev;

net/ipv6/netfilter/nf_conntrack_reasm.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
348348
fq->ecn |= ecn;
349349
if (payload_len > fq->q.max_size)
350350
fq->q.max_size = payload_len;
351-
add_frag_mem_limit(&fq->q, skb->truesize);
351+
add_frag_mem_limit(fq->q.net, skb->truesize);
352352

353353
/* The first fragment.
354354
* nhoffset is obtained from the first fragment, of course.
@@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
430430
clone->ip_summed = head->ip_summed;
431431

432432
NFCT_FRAG6_CB(clone)->orig = NULL;
433-
add_frag_mem_limit(&fq->q, clone->truesize);
433+
add_frag_mem_limit(fq->q.net, clone->truesize);
434434
}
435435

436436
/* We have to remove fragment header from datagram and to relocate
@@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
454454
head->csum = csum_add(head->csum, fp->csum);
455455
head->truesize += fp->truesize;
456456
}
457-
sub_frag_mem_limit(&fq->q, head->truesize);
457+
sub_frag_mem_limit(fq->q.net, head->truesize);
458458

459459
head->ignore_df = 1;
460460
head->next = NULL;

net/ipv6/reassembly.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
144144

145145
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
146146

147-
if (fq->q.flags & INET_FRAG_EVICTED)
147+
if (inet_frag_evicting(&fq->q))
148148
goto out_rcu_unlock;
149149

150150
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
@@ -330,7 +330,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
330330
fq->q.stamp = skb->tstamp;
331331
fq->q.meat += skb->len;
332332
fq->ecn |= ecn;
333-
add_frag_mem_limit(&fq->q, skb->truesize);
333+
add_frag_mem_limit(fq->q.net, skb->truesize);
334334

335335
/* The first fragment.
336336
* nhoffset is obtained from the first fragment, of course.
@@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
443443
head->len -= clone->len;
444444
clone->csum = 0;
445445
clone->ip_summed = head->ip_summed;
446-
add_frag_mem_limit(&fq->q, clone->truesize);
446+
add_frag_mem_limit(fq->q.net, clone->truesize);
447447
}
448448

449449
/* We have to remove fragment header from datagram and to relocate
@@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
481481
}
482482
fp = next;
483483
}
484-
sub_frag_mem_limit(&fq->q, sum_truesize);
484+
sub_frag_mem_limit(fq->q.net, sum_truesize);
485485

486486
head->next = NULL;
487487
head->dev = dev;

0 commit comments

Comments
 (0)