Skip to content

Commit 2d7192d

Browse files
committed
ipv4: Sanitize and simplify ip_route_{connect,newports}()
These functions are used together as a unit for route resolution during connect(). They address the chicken-and-egg problem that exists when ports need to be allocated during connect() processing, yet such port allocations require addressing information from the routing code. It's currently more heavy handed than it needs to be, and in particular we allocate and initialize a flow object twice. Let the callers provide the on-stack flow object. That way we only need to initialize it once in the ip_route_connect() call. Later, if ip_route_newports() needs to do anything, it re-uses that flow object as-is except for the ports which it updates before the route re-lookup. Also, describe why this set of facilities are needed and how it works in a big comment. Signed-off-by: David S. Miller <[email protected]> Reviewed-by: Eric Dumazet <[email protected]>
1 parent 15ecd03 commit 2d7192d

File tree

6 files changed

+74
-48
lines changed

6 files changed

+74
-48
lines changed

include/net/route.h

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -217,59 +217,83 @@ static inline char rt_tos2priority(u8 tos)
217217
return ip_tos2prio[IPTOS_TOS(tos)>>1];
218218
}
219219

220-
static inline struct rtable *ip_route_connect(__be32 dst, __be32 src, u32 tos,
221-
int oif, u8 protocol,
222-
__be16 sport, __be16 dport,
223-
struct sock *sk, bool can_sleep)
220+
/* ip_route_connect() and ip_route_newports() work in tandem whilst
221+
* binding a socket for a new outgoing connection.
222+
*
223+
* In order to use IPSEC properly, we must, in the end, have a
224+
* route that was looked up using all available keys including source
225+
* and destination ports.
226+
*
227+
* However, if a source port needs to be allocated (the user specified
228+
* a wildcard source port) we need to obtain addressing information
229+
* in order to perform that allocation.
230+
*
231+
* So ip_route_connect() looks up a route using wildcarded source and
232+
* destination ports in the key, simply so that we can get a pair of
233+
* addresses to use for port allocation.
234+
*
235+
* Later, once the ports are allocated, ip_route_newports() will make
236+
* another route lookup if needed to make sure we catch any IPSEC
237+
* rules keyed on the port information.
238+
*
239+
* The callers allocate the flow key on their stack, and must pass in
240+
* the same flowi4 object to both the ip_route_connect() and the
241+
* ip_route_newports() calls.
242+
*/
243+
244+
static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src,
245+
u32 tos, int oif, u8 protocol,
246+
__be16 sport, __be16 dport,
247+
struct sock *sk, bool can_sleep)
224248
{
225-
struct net *net = sock_net(sk);
226-
struct rtable *rt;
227-
struct flowi4 fl4;
228-
__u8 flow_flags;
249+
__u8 flow_flags = 0;
229250

230-
flow_flags = 0;
231251
if (inet_sk(sk)->transparent)
232252
flow_flags |= FLOWI_FLAG_ANYSRC;
233253
if (protocol == IPPROTO_TCP)
234254
flow_flags |= FLOWI_FLAG_PRECOW_METRICS;
235255
if (can_sleep)
236256
flow_flags |= FLOWI_FLAG_CAN_SLEEP;
237257

238-
flowi4_init_output(&fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
258+
flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
239259
protocol, flow_flags, dst, src, dport, sport);
260+
}
261+
262+
static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
263+
__be32 dst, __be32 src, u32 tos,
264+
int oif, u8 protocol,
265+
__be16 sport, __be16 dport,
266+
struct sock *sk, bool can_sleep)
267+
{
268+
struct net *net = sock_net(sk);
269+
struct rtable *rt;
270+
271+
ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
272+
sport, dport, sk, can_sleep);
240273

241274
if (!dst || !src) {
242-
rt = __ip_route_output_key(net, &fl4);
275+
rt = __ip_route_output_key(net, fl4);
243276
if (IS_ERR(rt))
244277
return rt;
245-
fl4.daddr = rt->rt_dst;
246-
fl4.saddr = rt->rt_src;
278+
fl4->daddr = rt->rt_dst;
279+
fl4->saddr = rt->rt_src;
247280
ip_rt_put(rt);
248281
}
249-
security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
250-
return ip_route_output_flow(net, &fl4, sk);
282+
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
283+
return ip_route_output_flow(net, fl4, sk);
251284
}
252285

253-
static inline struct rtable *ip_route_newports(struct rtable *rt,
254-
u8 protocol, __be16 orig_sport,
255-
__be16 orig_dport, __be16 sport,
256-
__be16 dport, struct sock *sk)
286+
static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt,
287+
__be16 orig_sport, __be16 orig_dport,
288+
__be16 sport, __be16 dport,
289+
struct sock *sk)
257290
{
258291
if (sport != orig_sport || dport != orig_dport) {
259-
struct flowi4 fl4;
260-
__u8 flow_flags;
261-
262-
flow_flags = 0;
263-
if (inet_sk(sk)->transparent)
264-
flow_flags |= FLOWI_FLAG_ANYSRC;
265-
if (protocol == IPPROTO_TCP)
266-
flow_flags |= FLOWI_FLAG_PRECOW_METRICS;
267-
flowi4_init_output(&fl4, rt->rt_oif, rt->rt_mark, rt->rt_tos,
268-
RT_SCOPE_UNIVERSE, protocol, flow_flags,
269-
rt->rt_dst, rt->rt_src, dport, sport);
292+
fl4->fl4_dport = dport;
293+
fl4->fl4_sport = sport;
270294
ip_rt_put(rt);
271-
security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
272-
return ip_route_output_flow(sock_net(sk), &fl4, sk);
295+
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
296+
return ip_route_output_flow(sock_net(sk), fl4, sk);
273297
}
274298
return rt;
275299
}

net/dccp/ipv4.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,13 @@
4040

4141
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
4242
{
43+
const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
4344
struct inet_sock *inet = inet_sk(sk);
4445
struct dccp_sock *dp = dccp_sk(sk);
45-
const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
4646
__be16 orig_sport, orig_dport;
47-
struct rtable *rt;
4847
__be32 daddr, nexthop;
48+
struct flowi4 fl4;
49+
struct rtable *rt;
4950
int err;
5051

5152
dp->dccps_role = DCCP_ROLE_CLIENT;
@@ -65,7 +66,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
6566

6667
orig_sport = inet->inet_sport;
6768
orig_dport = usin->sin_port;
68-
rt = ip_route_connect(nexthop, inet->inet_saddr,
69+
rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr,
6970
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
7071
IPPROTO_DCCP,
7172
orig_sport, orig_dport, sk, true);
@@ -101,8 +102,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
101102
if (err != 0)
102103
goto failure;
103104

104-
rt = ip_route_newports(rt, IPPROTO_DCCP,
105-
orig_sport, orig_dport,
105+
rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport,
106106
inet->inet_sport, inet->inet_dport, sk);
107107
if (IS_ERR(rt)) {
108108
rt = NULL;

net/ipv4/af_inet.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1103,14 +1103,15 @@ static int inet_sk_reselect_saddr(struct sock *sk)
11031103
struct inet_sock *inet = inet_sk(sk);
11041104
__be32 old_saddr = inet->inet_saddr;
11051105
__be32 daddr = inet->inet_daddr;
1106+
struct flowi4 fl4;
11061107
struct rtable *rt;
11071108
__be32 new_saddr;
11081109

11091110
if (inet->opt && inet->opt->srr)
11101111
daddr = inet->opt->faddr;
11111112

11121113
/* Query new route. */
1113-
rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk),
1114+
rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk),
11141115
sk->sk_bound_dev_if, sk->sk_protocol,
11151116
inet->inet_sport, inet->inet_dport, sk, false);
11161117
if (IS_ERR(rt))

net/ipv4/datagram.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
2424
{
2525
struct inet_sock *inet = inet_sk(sk);
2626
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
27+
struct flowi4 fl4;
2728
struct rtable *rt;
2829
__be32 saddr;
2930
int oif;
@@ -46,7 +47,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
4647
if (!saddr)
4748
saddr = inet->mc_addr;
4849
}
49-
rt = ip_route_connect(usin->sin_addr.s_addr, saddr,
50+
rt = ip_route_connect(&fl4, usin->sin_addr.s_addr, saddr,
5051
RT_CONN_FLAGS(sk), oif,
5152
sk->sk_protocol,
5253
inet->inet_sport, usin->sin_port, sk, true);

net/ipv4/tcp_ipv4.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,12 +146,13 @@ EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146146
/* This will initiate an outgoing connection. */
147147
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148148
{
149+
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
149150
struct inet_sock *inet = inet_sk(sk);
150151
struct tcp_sock *tp = tcp_sk(sk);
151-
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
152152
__be16 orig_sport, orig_dport;
153-
struct rtable *rt;
154153
__be32 daddr, nexthop;
154+
struct flowi4 fl4;
155+
struct rtable *rt;
155156
int err;
156157

157158
if (addr_len < sizeof(struct sockaddr_in))
@@ -169,7 +170,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
169170

170171
orig_sport = inet->inet_sport;
171172
orig_dport = usin->sin_port;
172-
rt = ip_route_connect(nexthop, inet->inet_saddr,
173+
rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr,
173174
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174175
IPPROTO_TCP,
175176
orig_sport, orig_dport, sk, true);
@@ -236,8 +237,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
236237
if (err)
237238
goto failure;
238239

239-
rt = ip_route_newports(rt, IPPROTO_TCP,
240-
orig_sport, orig_dport,
240+
rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport,
241241
inet->inet_sport, inet->inet_dport, sk);
242242
if (IS_ERR(rt)) {
243243
err = PTR_ERR(rt);

net/l2tp/l2tp_ip.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,12 +296,12 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
296296

297297
static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
298298
{
299-
int rc;
300-
struct inet_sock *inet = inet_sk(sk);
301299
struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
300+
struct inet_sock *inet = inet_sk(sk);
301+
struct flowi4 fl4;
302302
struct rtable *rt;
303303
__be32 saddr;
304-
int oif;
304+
int oif, rc;
305305

306306
rc = -EINVAL;
307307
if (addr_len < sizeof(*lsa))
@@ -320,7 +320,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
320320
if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
321321
goto out;
322322

323-
rt = ip_route_connect(lsa->l2tp_addr.s_addr, saddr,
323+
rt = ip_route_connect(&fl4, lsa->l2tp_addr.s_addr, saddr,
324324
RT_CONN_FLAGS(sk), oif,
325325
IPPROTO_L2TP,
326326
0, 0, sk, true);

0 commit comments

Comments
 (0)