Skip to content

Commit de5df63

Browse files
jrfastabdavem330
authored andcommitted
net: sched: cls_u32 changes to knode must appear atomic to readers
Changes to the cls_u32 classifier must appear atomic to the readers. Before this patch if a change is requested for both the exts and ifindex, first the ifindex is updated then the exts with tcf_exts_change(). This opens a small window where a reader can have a exts chain with an incorrect ifindex. This violates the the RCU semantics. Here we resolve this by always passing u32_set_parms() a copy of the tc_u_knode to work on and then inserting it into the hash table after the updates have been successfully applied. Tested with the following short script: #tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 handle 1: \ u32 divisor 256 #tc filter add dev p3p2 parent 8001:0 protocol ip prio 99 \ u32 link 1: hashkey mask ffffff00 at 12 \ match ip src 192.168.8.0/2 #tc filter add dev p3p2 parent 8001:0 protocol ip prio 102 \ handle 1::10 u32 classid 1:2 ht 1: \ match ip src 192.168.8.0/8 match ip tos 0x0a 1e #tc filter change dev p3p2 parent 8001:0 protocol ip prio 102 \ handle 1::10 u32 classid 1:2 ht 1: \ match ip src 1.1.0.0/8 match ip tos 0x0b 1e CC: Eric Dumazet <[email protected]> CC: Jamal Hadi Salim <[email protected]> Signed-off-by: John Fastabend <[email protected]> Acked-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a1ddcfe commit de5df63

File tree

1 file changed

+126
-9
lines changed

1 file changed

+126
-9
lines changed

net/sched/cls_u32.c

Lines changed: 126 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -354,27 +354,53 @@ static int u32_init(struct tcf_proto *tp)
354354
return 0;
355355
}
356356

357-
static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
357+
static int u32_destroy_key(struct tcf_proto *tp,
358+
struct tc_u_knode *n,
359+
bool free_pf)
358360
{
359361
tcf_unbind_filter(tp, &n->res);
360362
tcf_exts_destroy(tp, &n->exts);
361363
if (n->ht_down)
362364
n->ht_down->refcnt--;
363365
#ifdef CONFIG_CLS_U32_PERF
364-
free_percpu(n->pf);
366+
if (free_pf)
367+
free_percpu(n->pf);
365368
#endif
366369
#ifdef CONFIG_CLS_U32_MARK
367-
free_percpu(n->pcpu_success);
370+
if (free_pf)
371+
free_percpu(n->pcpu_success);
368372
#endif
369373
kfree(n);
370374
return 0;
371375
}
372376

377+
/* u32_delete_key_rcu should be called when free'ing a copied
378+
* version of a tc_u_knode obtained from u32_init_knode(). When
379+
* copies are obtained from u32_init_knode() the statistics are
380+
* shared between the old and new copies to allow readers to
381+
* continue to update the statistics during the copy. To support
382+
* this the u32_delete_key_rcu variant does not free the percpu
383+
* statistics.
384+
*/
373385
static void u32_delete_key_rcu(struct rcu_head *rcu)
374386
{
375387
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
376388

377-
u32_destroy_key(key->tp, key);
389+
u32_destroy_key(key->tp, key, false);
390+
}
391+
392+
/* u32_delete_key_freepf_rcu is the rcu callback variant
393+
* that free's the entire structure including the statistics
394+
* percpu variables. Only use this if the key is not a copy
395+
* returned by u32_init_knode(). See u32_delete_key_rcu()
396+
* for the variant that should be used with keys return from
397+
* u32_init_knode()
398+
*/
399+
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
400+
{
401+
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
402+
403+
u32_destroy_key(key->tp, key, true);
378404
}
379405

380406
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
@@ -390,7 +416,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
390416
if (pkp == key) {
391417
RCU_INIT_POINTER(*kp, key->next);
392418

393-
call_rcu(&key->rcu, u32_delete_key_rcu);
419+
call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
394420
return 0;
395421
}
396422
}
@@ -408,7 +434,7 @@ static void u32_clear_hnode(struct tc_u_hnode *ht)
408434
while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
409435
RCU_INIT_POINTER(ht->ht[h],
410436
rtnl_dereference(n->next));
411-
call_rcu(&n->rcu, u32_delete_key_rcu);
437+
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
412438
}
413439
}
414440
}
@@ -584,6 +610,82 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
584610
return err;
585611
}
586612

613+
static void u32_replace_knode(struct tcf_proto *tp,
614+
struct tc_u_common *tp_c,
615+
struct tc_u_knode *n)
616+
{
617+
struct tc_u_knode __rcu **ins;
618+
struct tc_u_knode *pins;
619+
struct tc_u_hnode *ht;
620+
621+
if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
622+
ht = rtnl_dereference(tp->root);
623+
else
624+
ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
625+
626+
ins = &ht->ht[TC_U32_HASH(n->handle)];
627+
628+
/* The node must always exist for it to be replaced if this is not the
629+
* case then something went very wrong elsewhere.
630+
*/
631+
for (pins = rtnl_dereference(*ins); ;
632+
ins = &pins->next, pins = rtnl_dereference(*ins))
633+
if (pins->handle == n->handle)
634+
break;
635+
636+
RCU_INIT_POINTER(n->next, pins->next);
637+
rcu_assign_pointer(*ins, n);
638+
}
639+
640+
static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
641+
struct tc_u_knode *n)
642+
{
643+
struct tc_u_knode *new;
644+
struct tc_u32_sel *s = &n->sel;
645+
646+
new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
647+
GFP_KERNEL);
648+
649+
if (!new)
650+
return NULL;
651+
652+
RCU_INIT_POINTER(new->next, n->next);
653+
new->handle = n->handle;
654+
RCU_INIT_POINTER(new->ht_up, n->ht_up);
655+
656+
#ifdef CONFIG_NET_CLS_IND
657+
new->ifindex = n->ifindex;
658+
#endif
659+
new->fshift = n->fshift;
660+
new->res = n->res;
661+
RCU_INIT_POINTER(new->ht_down, n->ht_down);
662+
663+
/* bump reference count as long as we hold pointer to structure */
664+
if (new->ht_down)
665+
new->ht_down->refcnt++;
666+
667+
#ifdef CONFIG_CLS_U32_PERF
668+
/* Statistics may be incremented by readers during update
669+
* so we must keep them in tact. When the node is later destroyed
670+
* a special destroy call must be made to not free the pf memory.
671+
*/
672+
new->pf = n->pf;
673+
#endif
674+
675+
#ifdef CONFIG_CLS_U32_MARK
676+
new->val = n->val;
677+
new->mask = n->mask;
678+
/* Similarly success statistics must be moved as pointers */
679+
new->pcpu_success = n->pcpu_success;
680+
#endif
681+
new->tp = tp;
682+
memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
683+
684+
tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
685+
686+
return new;
687+
}
688+
587689
static int u32_change(struct net *net, struct sk_buff *in_skb,
588690
struct tcf_proto *tp, unsigned long base, u32 handle,
589691
struct nlattr **tca,
@@ -610,12 +712,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
610712

611713
n = (struct tc_u_knode *)*arg;
612714
if (n) {
715+
struct tc_u_knode *new;
716+
613717
if (TC_U32_KEY(n->handle) == 0)
614718
return -EINVAL;
615719

616-
return u32_set_parms(net, tp, base,
617-
rtnl_dereference(n->ht_up), n, tb,
618-
tca[TCA_RATE], ovr);
720+
new = u32_init_knode(tp, n);
721+
if (!new)
722+
return -ENOMEM;
723+
724+
err = u32_set_parms(net, tp, base,
725+
rtnl_dereference(n->ht_up), new, tb,
726+
tca[TCA_RATE], ovr);
727+
728+
if (err) {
729+
u32_destroy_key(tp, new, false);
730+
return err;
731+
}
732+
733+
u32_replace_knode(tp, tp_c, new);
734+
call_rcu(&n->rcu, u32_delete_key_rcu);
735+
return 0;
619736
}
620737

621738
if (tb[TCA_U32_DIVISOR]) {

0 commit comments

Comments
 (0)