Skip to content

Commit 46d6c5a

Browse files
zx2c4ummakynes
authored andcommitted
netfilter: use actual socket sk rather than skb sk when routing harder
If netfilter changes the packet mark when mangling, the packet is rerouted using the route_me_harder set of functions. Prior to this commit, there's one big difference between route_me_harder and the ordinary initial routing functions, described in the comment above __ip_queue_xmit(): /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, That function goes on to correctly make use of sk->sk_bound_dev_if, rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a tunnel will receive a packet in ndo_start_xmit with an initial skb->sk. It will make some transformations to that packet, and then it will send the encapsulated packet out of a *new* socket. That new socket will basically always have a different sk_bound_dev_if (otherwise there'd be a routing loop). So for the purposes of routing the encapsulated packet, the routing information as it pertains to the socket should come from that socket's sk, rather than the packet's original skb->sk. For that reason __ip_queue_xmit() and related functions all do the right thing. One might argue that all tunnels should just call skb_orphan(skb) before transmitting the encapsulated packet into the new socket. But tunnels do *not* do this -- and this is wisely avoided in skb_scrub_packet() too -- because features like TSQ rely on skb->destructor() being called when that buffer space is truely available again. Calling skb_orphan(skb) too early would result in buffers filling up unnecessarily and accounting info being all wrong. Instead, additional routing must take into account the new sk, just as __ip_queue_xmit() notes. So, this commit addresses the problem by fishing the correct sk out of state->sk -- it's already set properly in the call to nf_hook() in __ip_local_out(), which receives the sk as part of its normal functionality. So we make sure to plumb state->sk through the various route_me_harder functions, and then make correct use of it following the example of __ip_queue_xmit(). Fixes: 1da177e ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld <[email protected]> Reviewed-by: Florian Westphal <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent af8afcf commit 46d6c5a

File tree

12 files changed

+26
-24
lines changed

12 files changed

+26
-24
lines changed

include/linux/netfilter_ipv4.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ struct ip_rt_info {
1616
u_int32_t mark;
1717
};
1818

19-
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
19+
int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type);
2020

2121
struct nf_queue_entry;
2222

include/linux/netfilter_ipv6.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct nf_ipv6_ops {
4242
#if IS_MODULE(CONFIG_IPV6)
4343
int (*chk_addr)(struct net *net, const struct in6_addr *addr,
4444
const struct net_device *dev, int strict);
45-
int (*route_me_harder)(struct net *net, struct sk_buff *skb);
45+
int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb);
4646
int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
4747
const struct in6_addr *daddr, unsigned int srcprefs,
4848
struct in6_addr *saddr);
@@ -143,19 +143,19 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
143143
#endif
144144
}
145145

146-
int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
146+
int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb);
147147

148-
static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
148+
static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb)
149149
{
150150
#if IS_MODULE(CONFIG_IPV6)
151151
const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
152152

153153
if (!v6_ops)
154154
return -EHOSTUNREACH;
155155

156-
return v6_ops->route_me_harder(net, skb);
156+
return v6_ops->route_me_harder(net, sk, skb);
157157
#elif IS_BUILTIN(CONFIG_IPV6)
158-
return ip6_route_me_harder(net, skb);
158+
return ip6_route_me_harder(net, sk, skb);
159159
#else
160160
return -EHOSTUNREACH;
161161
#endif

net/ipv4/netfilter.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,19 @@
1717
#include <net/netfilter/nf_queue.h>
1818

1919
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
20-
int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
20+
int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
2121
{
2222
const struct iphdr *iph = ip_hdr(skb);
2323
struct rtable *rt;
2424
struct flowi4 fl4 = {};
2525
__be32 saddr = iph->saddr;
26-
const struct sock *sk = skb_to_full_sk(skb);
27-
__u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
26+
__u8 flags;
2827
struct net_device *dev = skb_dst(skb)->dev;
2928
unsigned int hh_len;
3029

30+
sk = sk_to_full_sk(sk);
31+
flags = sk ? inet_sk_flowi_flags(sk) : 0;
32+
3133
if (addr_type == RTN_UNSPEC)
3234
addr_type = inet_addr_type_dev_table(net, dev, saddr);
3335
if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)

net/ipv4/netfilter/iptable_mangle.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
6262
iph->daddr != daddr ||
6363
skb->mark != mark ||
6464
iph->tos != tos) {
65-
err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
65+
err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
6666
if (err < 0)
6767
ret = NF_DROP_ERR(err);
6868
}

net/ipv4/netfilter/nf_reject_ipv4.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
145145
ip4_dst_hoplimit(skb_dst(nskb)));
146146
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
147147

148-
if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
148+
if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
149149
goto free_nskb;
150150

151151
niph = ip_hdr(nskb);

net/ipv6/netfilter.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
2121
#include "../bridge/br_private.h"
2222

23-
int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
23+
int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
2424
{
2525
const struct ipv6hdr *iph = ipv6_hdr(skb);
26-
struct sock *sk = sk_to_full_sk(skb->sk);
26+
struct sock *sk = sk_to_full_sk(sk_partial);
2727
unsigned int hh_len;
2828
struct dst_entry *dst;
2929
int strict = (ipv6_addr_type(&iph->daddr) &
@@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
8484
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
8585
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
8686
skb->mark != rt_info->mark)
87-
return ip6_route_me_harder(entry->state.net, skb);
87+
return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
8888
}
8989
return 0;
9090
}

net/ipv6/netfilter/ip6table_mangle.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
5757
skb->mark != mark ||
5858
ipv6_hdr(skb)->hop_limit != hop_limit ||
5959
flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
60-
err = ip6_route_me_harder(state->net, skb);
60+
err = ip6_route_me_harder(state->net, state->sk, skb);
6161
if (err < 0)
6262
ret = NF_DROP_ERR(err);
6363
}

net/netfilter/ipvs/ip_vs_core.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,12 +742,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
742742
struct dst_entry *dst = skb_dst(skb);
743743

744744
if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
745-
ip6_route_me_harder(ipvs->net, skb) != 0)
745+
ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0)
746746
return 1;
747747
} else
748748
#endif
749749
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
750-
ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
750+
ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0)
751751
return 1;
752752

753753
return 0;

net/netfilter/nf_nat_proto.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
715715

716716
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
717717
ct->tuplehash[!dir].tuple.src.u3.ip) {
718-
err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
718+
err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
719719
if (err < 0)
720720
ret = NF_DROP_ERR(err);
721721
}
@@ -953,7 +953,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
953953

954954
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
955955
&ct->tuplehash[!dir].tuple.src.u3)) {
956-
err = nf_ip6_route_me_harder(state->net, skb);
956+
err = nf_ip6_route_me_harder(state->net, state->sk, skb);
957957
if (err < 0)
958958
ret = NF_DROP_ERR(err);
959959
}

net/netfilter/nf_synproxy_core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ synproxy_send_tcp(struct net *net,
446446

447447
skb_dst_set_noref(nskb, skb_dst(skb));
448448
nskb->protocol = htons(ETH_P_IP);
449-
if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
449+
if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
450450
goto free_nskb;
451451

452452
if (nfct) {

0 commit comments

Comments
 (0)