Skip to content

Commit 71ae0df

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: xtables: use percpu rule counters
The binary arp/ip/ip6tables ruleset is stored per cpu. The only reason left as to why we need percpu duplication are the rule counters embedded into ipt_entry et al -- since each cpu has its own copy of the rules, all counters can be lockless. The downside is that the more cpus are supported, the more memory is required. Rules are not just duplicated per online cpu but for each possible cpu, i.e. if maxcpu is 144, then rule is duplicated 144 times, not for the e.g. 64 cores present. To save some memory and also improve utilization of shared caches it would be preferable to only store the rule blob once. So we first need to separate counters and the rule blob. Instead of using entry->counters, allocate this percpu and store the percpu address in entry->counters.pcnt on CONFIG_SMP. This change makes no sense as-is; it is merely an intermediate step to remove the percpu duplication of the rule set in a followup patch. Suggested-by: Eric Dumazet <[email protected]> Acked-by: Jesper Dangaard Brouer <[email protected]> Reported-by: Marcelo Ricardo Leitner <[email protected]> Signed-off-by: Florian Westphal <[email protected]> Acked-by: Eric Dumazet <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent d7b5974 commit 71ae0df

File tree

4 files changed

+129
-14
lines changed

4 files changed

+129
-14
lines changed

include/linux/netfilter/x_tables.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
353353
return ret;
354354
}
355355

356+
357+
/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
358+
* real (percpu) counter. On !SMP, its just the packet count,
359+
* so nothing needs to be done there.
360+
*
361+
* xt_percpu_counter_alloc returns the address of the percpu
362+
* counter, or 0 on !SMP.
363+
*
364+
* Hence caller must use IS_ERR_VALUE to check for error, this
365+
* allows us to return 0 for single core systems without forcing
366+
* callers to deal with SMP vs. NONSMP issues.
367+
*/
368+
static inline u64 xt_percpu_counter_alloc(void)
369+
{
370+
if (nr_cpu_ids > 1) {
371+
void __percpu *res = alloc_percpu(struct xt_counters);
372+
373+
if (res == NULL)
374+
return (u64) -ENOMEM;
375+
376+
return (__force u64) res;
377+
}
378+
379+
return 0;
380+
}
381+
static inline void xt_percpu_counter_free(u64 pcnt)
382+
{
383+
if (nr_cpu_ids > 1)
384+
free_percpu((void __percpu *) pcnt);
385+
}
386+
387+
static inline struct xt_counters *
388+
xt_get_this_cpu_counter(struct xt_counters *cnt)
389+
{
390+
if (nr_cpu_ids > 1)
391+
return this_cpu_ptr((void __percpu *) cnt->pcnt);
392+
393+
return cnt;
394+
}
395+
396+
static inline struct xt_counters *
397+
xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
398+
{
399+
if (nr_cpu_ids > 1)
400+
return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);
401+
402+
return cnt;
403+
}
404+
356405
struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
357406
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
358407

net/ipv4/netfilter/arp_tables.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,13 +289,15 @@ unsigned int arpt_do_table(struct sk_buff *skb,
289289
arp = arp_hdr(skb);
290290
do {
291291
const struct xt_entry_target *t;
292+
struct xt_counters *counter;
292293

293294
if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
294295
e = arpt_next_entry(e);
295296
continue;
296297
}
297298

298-
ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
299+
counter = xt_get_this_cpu_counter(&e->counters);
300+
ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1);
299301

300302
t = arpt_get_target_c(e);
301303

@@ -521,6 +523,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
521523
if (ret)
522524
return ret;
523525

526+
e->counters.pcnt = xt_percpu_counter_alloc();
527+
if (IS_ERR_VALUE(e->counters.pcnt))
528+
return -ENOMEM;
529+
524530
t = arpt_get_target(e);
525531
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
526532
t->u.user.revision);
@@ -538,6 +544,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
538544
err:
539545
module_put(t->u.kernel.target->me);
540546
out:
547+
xt_percpu_counter_free(e->counters.pcnt);
548+
541549
return ret;
542550
}
543551

@@ -614,6 +622,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
614622
if (par.target->destroy != NULL)
615623
par.target->destroy(&par);
616624
module_put(par.target->me);
625+
xt_percpu_counter_free(e->counters.pcnt);
617626
}
618627

619628
/* Checks and translates the user-supplied table segment (held in
@@ -723,13 +732,15 @@ static void get_counters(const struct xt_table_info *t,
723732

724733
i = 0;
725734
xt_entry_foreach(iter, t->entries[cpu], t->size) {
735+
struct xt_counters *tmp;
726736
u64 bcnt, pcnt;
727737
unsigned int start;
728738

739+
tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
729740
do {
730741
start = read_seqcount_begin(s);
731-
bcnt = iter->counters.bcnt;
732-
pcnt = iter->counters.pcnt;
742+
bcnt = tmp->bcnt;
743+
pcnt = tmp->pcnt;
733744
} while (read_seqcount_retry(s, start));
734745

735746
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -1186,7 +1197,10 @@ static int do_add_counters(struct net *net, const void __user *user,
11861197
loc_cpu_entry = private->entries[curcpu];
11871198
addend = xt_write_recseq_begin();
11881199
xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1189-
ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1200+
struct xt_counters *tmp;
1201+
1202+
tmp = xt_get_this_cpu_counter(&iter->counters);
1203+
ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
11901204
++i;
11911205
}
11921206
xt_write_recseq_end(addend);
@@ -1416,9 +1430,17 @@ static int translate_compat_table(const char *name,
14161430

14171431
i = 0;
14181432
xt_entry_foreach(iter1, entry1, newinfo->size) {
1433+
iter1->counters.pcnt = xt_percpu_counter_alloc();
1434+
if (IS_ERR_VALUE(iter1->counters.pcnt)) {
1435+
ret = -ENOMEM;
1436+
break;
1437+
}
1438+
14191439
ret = check_target(iter1, name);
1420-
if (ret != 0)
1440+
if (ret != 0) {
1441+
xt_percpu_counter_free(iter1->counters.pcnt);
14211442
break;
1443+
}
14221444
++i;
14231445
if (strcmp(arpt_get_target(iter1)->u.user.name,
14241446
XT_ERROR_TARGET) == 0)

net/ipv4/netfilter/ip_tables.c

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ ipt_do_table(struct sk_buff *skb,
345345
do {
346346
const struct xt_entry_target *t;
347347
const struct xt_entry_match *ematch;
348+
struct xt_counters *counter;
348349

349350
IP_NF_ASSERT(e);
350351
if (!ip_packet_match(ip, indev, outdev,
@@ -361,7 +362,8 @@ ipt_do_table(struct sk_buff *skb,
361362
goto no_match;
362363
}
363364

364-
ADD_COUNTER(e->counters, skb->len, 1);
365+
counter = xt_get_this_cpu_counter(&e->counters);
366+
ADD_COUNTER(*counter, skb->len, 1);
365367

366368
t = ipt_get_target(e);
367369
IP_NF_ASSERT(t->u.kernel.target);
@@ -665,6 +667,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
665667
if (ret)
666668
return ret;
667669

670+
e->counters.pcnt = xt_percpu_counter_alloc();
671+
if (IS_ERR_VALUE(e->counters.pcnt))
672+
return -ENOMEM;
673+
668674
j = 0;
669675
mtpar.net = net;
670676
mtpar.table = name;
@@ -691,6 +697,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
691697
ret = check_target(e, net, name);
692698
if (ret)
693699
goto err;
700+
694701
return 0;
695702
err:
696703
module_put(t->u.kernel.target->me);
@@ -700,6 +707,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
700707
break;
701708
cleanup_match(ematch, net);
702709
}
710+
711+
xt_percpu_counter_free(e->counters.pcnt);
712+
703713
return ret;
704714
}
705715

@@ -784,6 +794,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
784794
if (par.target->destroy != NULL)
785795
par.target->destroy(&par);
786796
module_put(par.target->me);
797+
xt_percpu_counter_free(e->counters.pcnt);
787798
}
788799

789800
/* Checks and translates the user-supplied table segment (held in
@@ -888,13 +899,15 @@ get_counters(const struct xt_table_info *t,
888899

889900
i = 0;
890901
xt_entry_foreach(iter, t->entries[cpu], t->size) {
902+
struct xt_counters *tmp;
891903
u64 bcnt, pcnt;
892904
unsigned int start;
893905

906+
tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
894907
do {
895908
start = read_seqcount_begin(s);
896-
bcnt = iter->counters.bcnt;
897-
pcnt = iter->counters.pcnt;
909+
bcnt = tmp->bcnt;
910+
pcnt = tmp->pcnt;
898911
} while (read_seqcount_retry(s, start));
899912

900913
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -1374,7 +1387,10 @@ do_add_counters(struct net *net, const void __user *user,
13741387
loc_cpu_entry = private->entries[curcpu];
13751388
addend = xt_write_recseq_begin();
13761389
xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1377-
ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1390+
struct xt_counters *tmp;
1391+
1392+
tmp = xt_get_this_cpu_counter(&iter->counters);
1393+
ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
13781394
++i;
13791395
}
13801396
xt_write_recseq_end(addend);
@@ -1608,6 +1624,10 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
16081624
unsigned int j;
16091625
int ret = 0;
16101626

1627+
e->counters.pcnt = xt_percpu_counter_alloc();
1628+
if (IS_ERR_VALUE(e->counters.pcnt))
1629+
return -ENOMEM;
1630+
16111631
j = 0;
16121632
mtpar.net = net;
16131633
mtpar.table = name;
@@ -1632,6 +1652,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
16321652
break;
16331653
cleanup_match(ematch, net);
16341654
}
1655+
1656+
xt_percpu_counter_free(e->counters.pcnt);
1657+
16351658
return ret;
16361659
}
16371660

net/ipv6/netfilter/ip6_tables.c

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ ip6t_do_table(struct sk_buff *skb,
367367
do {
368368
const struct xt_entry_target *t;
369369
const struct xt_entry_match *ematch;
370+
struct xt_counters *counter;
370371

371372
IP_NF_ASSERT(e);
372373
acpar.thoff = 0;
@@ -384,7 +385,8 @@ ip6t_do_table(struct sk_buff *skb,
384385
goto no_match;
385386
}
386387

387-
ADD_COUNTER(e->counters, skb->len, 1);
388+
counter = xt_get_this_cpu_counter(&e->counters);
389+
ADD_COUNTER(*counter, skb->len, 1);
388390

389391
t = ip6t_get_target_c(e);
390392
IP_NF_ASSERT(t->u.kernel.target);
@@ -679,6 +681,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
679681
if (ret)
680682
return ret;
681683

684+
e->counters.pcnt = xt_percpu_counter_alloc();
685+
if (IS_ERR_VALUE(e->counters.pcnt))
686+
return -ENOMEM;
687+
682688
j = 0;
683689
mtpar.net = net;
684690
mtpar.table = name;
@@ -714,6 +720,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
714720
break;
715721
cleanup_match(ematch, net);
716722
}
723+
724+
xt_percpu_counter_free(e->counters.pcnt);
725+
717726
return ret;
718727
}
719728

@@ -797,6 +806,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
797806
if (par.target->destroy != NULL)
798807
par.target->destroy(&par);
799808
module_put(par.target->me);
809+
810+
xt_percpu_counter_free(e->counters.pcnt);
800811
}
801812

802813
/* Checks and translates the user-supplied table segment (held in
@@ -901,13 +912,15 @@ get_counters(const struct xt_table_info *t,
901912

902913
i = 0;
903914
xt_entry_foreach(iter, t->entries[cpu], t->size) {
915+
struct xt_counters *tmp;
904916
u64 bcnt, pcnt;
905917
unsigned int start;
906918

919+
tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
907920
do {
908921
start = read_seqcount_begin(s);
909-
bcnt = iter->counters.bcnt;
910-
pcnt = iter->counters.pcnt;
922+
bcnt = tmp->bcnt;
923+
pcnt = tmp->pcnt;
911924
} while (read_seqcount_retry(s, start));
912925

913926
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -1374,7 +1387,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
13741387
goto free;
13751388
}
13761389

1377-
13781390
local_bh_disable();
13791391
private = t->private;
13801392
if (private->number != num_counters) {
@@ -1388,7 +1400,10 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
13881400
addend = xt_write_recseq_begin();
13891401
loc_cpu_entry = private->entries[curcpu];
13901402
xt_entry_foreach(iter, loc_cpu_entry, private->size) {
1391-
ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
1403+
struct xt_counters *tmp;
1404+
1405+
tmp = xt_get_this_cpu_counter(&iter->counters);
1406+
ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
13921407
++i;
13931408
}
13941409
xt_write_recseq_end(addend);
@@ -1621,6 +1636,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
16211636
struct xt_mtchk_param mtpar;
16221637
struct xt_entry_match *ematch;
16231638

1639+
e->counters.pcnt = xt_percpu_counter_alloc();
1640+
if (IS_ERR_VALUE(e->counters.pcnt))
1641+
return -ENOMEM;
16241642
j = 0;
16251643
mtpar.net = net;
16261644
mtpar.table = name;
@@ -1645,6 +1663,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
16451663
break;
16461664
cleanup_match(ematch, net);
16471665
}
1666+
1667+
xt_percpu_counter_free(e->counters.pcnt);
1668+
16481669
return ret;
16491670
}
16501671

0 commit comments

Comments
 (0)