Skip to content

Commit 41ec374

Browse files
committed
Merge branch 'vxlan-fix-npds-when-using-nexthop-objects'
Ido Schimmel says: ==================== vxlan: Fix NPDs when using nexthop objects With FDB nexthop groups, VXLAN FDB entries do not necessarily point to a remote destination but rather to an FDB nexthop group. This means that first_remote_{rcu,rtnl}() can return NULL and a few places in the driver were not ready for that, resulting in NULL pointer dereferences. Patches #1-#2 fix these NPDs. Note that vxlan_fdb_find_uc() still dereferences the remote returned by first_remote_rcu() without checking that it is not NULL, but this function is only invoked by a single driver which vetoes the creation of FDB nexthop groups. I will patch this in net-next to make the code less fragile. Patch #3 adds a selftests which exercises these code paths and tests basic Tx functionality with FDB nexthop groups. I verified that the test crashes the kernel without the first two patches. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents a7195a3 + 2c9fb92 commit 41ec374

File tree

4 files changed

+237
-9
lines changed

4 files changed

+237
-9
lines changed

drivers/net/vxlan/vxlan_core.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,10 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
14451445
if (READ_ONCE(f->updated) != now)
14461446
WRITE_ONCE(f->updated, now);
14471447

1448+
/* Don't override an fdb with nexthop with a learnt entry */
1449+
if (rcu_access_pointer(f->nh))
1450+
return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
1451+
14481452
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
14491453
rdst->remote_ifindex == ifindex))
14501454
return SKB_NOT_DROPPED_YET;
@@ -1453,10 +1457,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
14531457
if (f->state & (NUD_PERMANENT | NUD_NOARP))
14541458
return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
14551459

1456-
/* Don't override an fdb with nexthop with a learnt entry */
1457-
if (rcu_access_pointer(f->nh))
1458-
return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
1459-
14601460
if (net_ratelimit())
14611461
netdev_info(dev,
14621462
"%pM migrated from %pIS to %pIS\n",
@@ -1877,6 +1877,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
18771877
n = neigh_lookup(&arp_tbl, &tip, dev);
18781878

18791879
if (n) {
1880+
struct vxlan_rdst *rdst = NULL;
18801881
struct vxlan_fdb *f;
18811882
struct sk_buff *reply;
18821883

@@ -1887,7 +1888,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
18871888

18881889
rcu_read_lock();
18891890
f = vxlan_find_mac_tx(vxlan, n->ha, vni);
1890-
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
1891+
if (f)
1892+
rdst = first_remote_rcu(f);
1893+
if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
18911894
/* bridge-local neighbor */
18921895
neigh_release(n);
18931896
rcu_read_unlock();
@@ -2044,6 +2047,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
20442047
n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
20452048

20462049
if (n) {
2050+
struct vxlan_rdst *rdst = NULL;
20472051
struct vxlan_fdb *f;
20482052
struct sk_buff *reply;
20492053

@@ -2053,7 +2057,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
20532057
}
20542058

20552059
f = vxlan_find_mac_tx(vxlan, n->ha, vni);
2056-
if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
2060+
if (f)
2061+
rdst = first_remote_rcu(f);
2062+
if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
20572063
/* bridge-local neighbor */
20582064
neigh_release(n);
20592065
goto out;

drivers/net/vxlan/vxlan_private.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,7 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
6161
return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
6262
}
6363

64-
/* First remote destination for a forwarding entry.
65-
* Guaranteed to be non-NULL because remotes are never deleted.
66-
*/
64+
/* First remote destination for a forwarding entry. */
6765
static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
6866
{
6967
if (rcu_access_pointer(fdb->nh))

tools/testing/selftests/net/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ TEST_GEN_PROGS += bind_wildcard
9999
TEST_GEN_PROGS += bind_timewait
100100
TEST_PROGS += test_vxlan_mdb.sh
101101
TEST_PROGS += test_bridge_neigh_suppress.sh
102+
TEST_PROGS += test_vxlan_nh.sh
102103
TEST_PROGS += test_vxlan_nolocalbypass.sh
103104
TEST_PROGS += test_bridge_backup_port.sh
104105
TEST_PROGS += test_neigh.sh
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
#!/bin/bash
2+
# SPDX-License-Identifier: GPL-2.0
3+
4+
source lib.sh
5+
TESTS="
6+
basic_tx_ipv4
7+
basic_tx_ipv6
8+
learning
9+
proxy_ipv4
10+
proxy_ipv6
11+
"
12+
VERBOSE=0
13+
14+
################################################################################
15+
# Utilities
16+
17+
run_cmd()
18+
{
19+
local cmd="$1"
20+
local out
21+
local stderr="2>/dev/null"
22+
23+
if [ "$VERBOSE" = "1" ]; then
24+
echo "COMMAND: $cmd"
25+
stderr=
26+
fi
27+
28+
out=$(eval "$cmd" "$stderr")
29+
rc=$?
30+
if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
31+
echo " $out"
32+
fi
33+
34+
return $rc
35+
}
36+
37+
################################################################################
38+
# Cleanup
39+
40+
exit_cleanup_all()
41+
{
42+
cleanup_all_ns
43+
exit "${EXIT_STATUS}"
44+
}
45+
46+
################################################################################
47+
# Tests
48+
49+
nh_stats_get()
50+
{
51+
ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]"
52+
}
53+
54+
tc_stats_get()
55+
{
56+
tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1"
57+
}
58+
59+
basic_tx_common()
60+
{
61+
local af_str=$1; shift
62+
local proto=$1; shift
63+
local local_addr=$1; shift
64+
local plen=$1; shift
65+
local remote_addr=$1; shift
66+
67+
RET=0
68+
69+
# Test basic Tx functionality. Check that stats are incremented on
70+
# both the FDB nexthop group and the egress device.
71+
72+
run_cmd "ip -n $ns1 link add name dummy1 up type dummy"
73+
run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1"
74+
run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact"
75+
run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass"
76+
77+
run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
78+
79+
run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
80+
run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
81+
82+
run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789"
83+
run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
84+
85+
run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q"
86+
87+
busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null
88+
check_err $? "FDB nexthop group stats did not increase"
89+
90+
busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null
91+
check_err $? "tc filter stats did not increase"
92+
93+
log_test "VXLAN FDB nexthop: $af_str basic Tx"
94+
}
95+
96+
basic_tx_ipv4()
97+
{
98+
basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2
99+
}
100+
101+
basic_tx_ipv6()
102+
{
103+
basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2
104+
}
105+
106+
learning()
107+
{
108+
RET=0
109+
110+
# When learning is enabled on the VXLAN device, an incoming packet
111+
# might try to refresh an FDB entry that points to an FDB nexthop group
112+
# instead of an ordinary remote destination. Check that the kernel does
113+
# not crash in this situation.
114+
115+
run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo"
116+
run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo"
117+
118+
run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb"
119+
run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
120+
121+
run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass"
122+
run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning"
123+
124+
run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020"
125+
run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10"
126+
127+
run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q"
128+
129+
log_test "VXLAN FDB nexthop: learning"
130+
}
131+
132+
proxy_common()
133+
{
134+
local af_str=$1; shift
135+
local local_addr=$1; shift
136+
local plen=$1; shift
137+
local remote_addr=$1; shift
138+
local neigh_addr=$1; shift
139+
local ping_cmd=$1; shift
140+
141+
RET=0
142+
143+
# When the "proxy" option is enabled on the VXLAN device, the device
144+
# will suppress ARP requests and IPv6 Neighbor Solicitation messages if
145+
# it is able to reply on behalf of the remote host. That is, if a
146+
# matching and valid neighbor entry is configured on the VXLAN device
147+
# whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The
148+
# FDB entry for the neighbor's MAC address might point to an FDB
149+
# nexthop group instead of an ordinary remote destination. Check that
150+
# the kernel does not crash in this situation.
151+
152+
run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
153+
154+
run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
155+
run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
156+
157+
run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy"
158+
159+
run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0"
160+
161+
run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
162+
163+
run_cmd "ip netns exec $ns1 $ping_cmd"
164+
165+
log_test "VXLAN FDB nexthop: $af_str proxy"
166+
}
167+
168+
proxy_ipv4()
169+
{
170+
proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \
171+
"arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3"
172+
}
173+
174+
proxy_ipv6()
175+
{
176+
proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \
177+
"ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0"
178+
}
179+
180+
################################################################################
181+
# Usage
182+
183+
usage()
184+
{
185+
cat <<EOF
186+
usage: ${0##*/} OPTS
187+
188+
-t <test> Test(s) to run (default: all)
189+
(options: $TESTS)
190+
-p Pause on fail
191+
-v Verbose mode (show commands and output)
192+
EOF
193+
}
194+
195+
################################################################################
196+
# Main
197+
198+
while getopts ":t:pvh" opt; do
199+
case $opt in
200+
t) TESTS=$OPTARG;;
201+
p) PAUSE_ON_FAIL=yes;;
202+
v) VERBOSE=$((VERBOSE + 1));;
203+
h) usage; exit 0;;
204+
*) usage; exit 1;;
205+
esac
206+
done
207+
208+
require_command mausezahn
209+
require_command arping
210+
require_command ndisc6
211+
require_command jq
212+
213+
if ! ip nexthop help 2>&1 | grep -q "stats"; then
214+
echo "SKIP: iproute2 ip too old, missing nexthop stats support"
215+
exit "$ksft_skip"
216+
fi
217+
218+
trap exit_cleanup_all EXIT
219+
220+
for t in $TESTS
221+
do
222+
setup_ns ns1; $t; cleanup_all_ns;
223+
done

0 commit comments

Comments
 (0)