From 23cbcb483a2386dd353ce509078419c43b36f295 Mon Sep 17 00:00:00 2001 From: Brett Mastbergen Date: Mon, 10 Mar 2025 15:10:09 -0400 Subject: [PATCH] net: fix __dst_negative_advice() race jira VULN-1303 cve CVE-2024-36971 commit-author Eric Dumazet commit 92f1655aa2b2294d0b49925f3b875a634bd3b59e upstream-diff This change breaks the kabi. Use the RH_KABI_REPLACE macro to define the negative_advice function such that check-kabi will still pass. From rh_kabi.h: "The RH_KABI_REPLACE* macros attempt to add the ability to use the '_new' element while preserving size alignment and kabi agreement with the '_orig' element." Also, this version of the kernel does not have rcu locking in ip6_negative_advice like the upstream does so the rcu_read_lock/rcu_read_unlock calls are not in this changeset. __dst_negative_advice() does not enforce proper RCU rules when sk->dst_cache must be cleared, leading to possible UAF. RCU rules are that we must first clear sk->sk_dst_cache, then call dst_release(old_dst). Note that sk_dst_reset(sk) is implementing this protocol correctly, while __dst_negative_advice() uses the wrong order. Given that ip6_negative_advice() has special logic against RTF_CACHE, this means each of the three ->negative_advice() existing methods must perform the sk_dst_reset() themselves. Note the check against NULL dst is centralized in __dst_negative_advice(), there is no need to duplicate it in various callbacks. Many thanks to Clement Lecigne for tracking this issue. This old bug became visible after the blamed commit, using UDP sockets. Fixes: a87cb3e48ee8 ("net: Facility to report route quality of connected sockets") Reported-by: Clement Lecigne Diagnosed-by: Clement Lecigne Signed-off-by: Eric Dumazet Cc: Tom Herbert Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240528114353.1794151-1-edumazet@google.com Signed-off-by: Jakub Kicinski (cherry picked from commit 92f1655aa2b2294d0b49925f3b875a634bd3b59e) Signed-off-by: Brett Mastbergen --- include/net/dst_ops.h | 3 ++- include/net/sock.h | 13 +++---------- net/ipv4/route.c | 22 ++++++++-------------- net/ipv6/route.c | 25 +++++++++++++------------ net/xfrm/xfrm_policy.c | 11 +++-------- 5 files changed, 29 insertions(+), 45 deletions(-) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index e03ecf7493965..4bc938b94cd84 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -23,7 +23,8 @@ struct dst_ops { void (*destroy)(struct dst_entry *); void (*ifdown)(struct dst_entry *, struct net_device *dev, int how); - struct dst_entry * (*negative_advice)(struct dst_entry *); + RH_KABI_REPLACE(struct dst_entry * (*negative_advice)(struct dst_entry *), + void (*negative_advice)(struct sock *sk, struct dst_entry *)) void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); diff --git a/include/net/sock.h b/include/net/sock.h index 33e5bc942c538..11411de5474e3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1841,17 +1841,10 @@ extern void sk_reset_txq(struct sock *sk); static inline void dst_negative_advice(struct sock *sk) { - struct dst_entry *ndst, *dst = __sk_dst_get(sk); + struct dst_entry *dst = __sk_dst_get(sk); - if (dst && dst->ops->negative_advice) { - ndst = dst->ops->negative_advice(dst); - - if (ndst != dst) { - rcu_assign_pointer(sk->sk_dst_cache, ndst); - sk_reset_txq(sk); - sk->sk_dst_pending_confirm = 0; - } - } + if (dst && dst->ops->negative_advice) + dst->ops->negative_advice(sk, dst); } static inline void diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 26f19384f3997..23f72d659303f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -137,7 +137,8 @@ static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); static unsigned int ipv4_mtu(const struct dst_entry *dst); -static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); +static void ipv4_negative_advice(struct sock *sk, + struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -838,22 +839,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf __ip_do_redirect(rt, skb, &fl4, true); } -static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) +static void ipv4_negative_advice(struct sock *sk, + struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; - struct dst_entry *ret = dst; - if (rt) { - if (dst->obsolete > 0) { - ip_rt_put(rt); - ret = NULL; - } else if ((rt->rt_flags & RTCF_REDIRECTED) || - rt->dst.expires) { - ip_rt_put(rt); - ret = NULL; - } - } - return ret; + if ((dst->obsolete > 0) || + (rt->rt_flags & RTCF_REDIRECTED) || + rt->dst.expires) + sk_dst_reset(sk); } /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f41bf9a5e5402..fca954e73a757 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -81,7 +81,8 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_mtu(const struct dst_entry *dst); -static struct dst_entry *ip6_negative_advice(struct dst_entry *); +static void ip6_negative_advice(struct sock *sk, + struct dst_entry *dst); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); @@ -1297,22 +1298,22 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) return rt6_check(rt, cookie); } -static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) +static void ip6_negative_advice(struct sock *sk, + struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; - if (rt) { - if (rt->rt6i_flags & RTF_CACHE) { - if (rt6_check_expired(rt)) { - ip6_del_rt(rt); - dst = NULL; - } - } else { - dst_release(dst); - dst = NULL; + if (rt->rt6i_flags & RTF_CACHE) { + if (rt6_check_expired(rt)) { + /* counteract the dst_release() in sk_dst_reset() */ + dst_hold(dst); + sk_dst_reset(sk); + + ip6_del_rt(rt); } + return; } - return dst; + sk_dst_reset(sk); } static void ip6_link_failure(struct sk_buff *skb) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ae671f178c34a..261196a4a4bba 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2632,15 +2632,10 @@ static void xfrm_link_failure(struct sk_buff *skb) /* Impossible. Such dst must be popped before reaches point of failure. */ } -static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) +static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) { - if (dst) { - if (dst->obsolete) { - dst_release(dst); - dst = NULL; - } - } - return dst; + if (dst->obsolete) + sk_dst_reset(sk); } void xfrm_garbage_collect(struct net *net)