Skip to content

Commit 90c337d

Browse files
edumazetdavem330
authored andcommitted
inet: add IP_BIND_ADDRESS_NO_PORT to overcome bind(0) limitations
When an application needs to force a source IP on an active TCP socket it has to use bind(IP, port=x). As most applications do not want to deal with already used ports, x is often set to 0, meaning the kernel is in charge to find an available port. But kernel does not know yet if this socket is going to be a listener or be connected. It has very limited choices (no full knowledge of final 4-tuple for a connect()) With limited ephemeral port range (about 32K ports), it is very easy to fill the space. This patch adds a new SOL_IP socket option, asking kernel to ignore the 0 port provided by application in bind(IP, port=0) and only remember the given IP address. The port will be automatically chosen at connect() time, in a way that allows sharing a source port as long as the 4-tuples are unique. This new feature is available for both IPv4 and IPv6 (Thanks Neal) Tested: Wrote a test program and checked its behavior on IPv4 and IPv6. strace(1) shows sequences of bind(IP=127.0.0.2, port=0) followed by connect(). Also getsockname() show that the port is still 0 right after bind() but properly allocated after connect(). socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5 setsockopt(5, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0 bind(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, 16) = 0 getsockname(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0 connect(5, {sa_family=AF_INET, sin_port=htons(53174), sin_addr=inet_addr("127.0.0.3")}, 16) = 0 getsockname(5, {sa_family=AF_INET, sin_port=htons(38050), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0 IPv6 test : socket(PF_INET6, SOCK_STREAM, IPPROTO_IP) = 7 setsockopt(7, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0 bind(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0 getsockname(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0 connect(7, {sa_family=AF_INET6, sin6_port=htons(57300), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0 getsockname(7, {sa_family=AF_INET6, sin6_port=htons(60964), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0 I was able to bind()/connect() a million concurrent IPv4 sockets, instead of ~32000 before patch. lpaa23:~# ulimit -n 1000010 lpaa23:~# ./bind --connect --num-flows=1000000 & 1000000 sockets lpaa23:~# grep TCP /proc/net/sockstat TCP: inuse 2000063 orphan 0 tw 47 alloc 2000157 mem 66 Check that a given source port is indeed used by many different connections : lpaa23:~# ss -t src :40000 | head -10 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 0 0 127.0.0.2:40000 127.0.202.33:44983 ESTAB 0 0 127.0.0.2:40000 127.2.27.240:44983 ESTAB 0 0 127.0.0.2:40000 127.2.98.5:44983 ESTAB 0 0 127.0.0.2:40000 127.0.124.196:44983 ESTAB 0 0 127.0.0.2:40000 127.2.139.38:44983 ESTAB 0 0 127.0.0.2:40000 127.1.59.80:44983 ESTAB 0 0 127.0.0.2:40000 127.3.6.228:44983 ESTAB 0 0 127.0.0.2:40000 127.0.38.53:44983 ESTAB 0 0 127.0.0.2:40000 127.1.197.10:44983 Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 513d1a1 commit 90c337d

File tree

5 files changed

+13
-2
lines changed

5 files changed

+13
-2
lines changed

include/net/inet_sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ struct inet_sock {
187187
transparent:1,
188188
mc_all:1,
189189
nodefrag:1;
190+
__u8 bind_address_no_port:1;
190191
__u8 rcv_tos;
191192
__u8 convert_csum;
192193
int uc_index;

include/uapi/linux/in.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ struct in_addr {
112112
#define IP_MINTTL 21
113113
#define IP_NODEFRAG 22
114114
#define IP_CHECKSUM 23
115+
#define IP_BIND_ADDRESS_NO_PORT 24
115116

116117
/* IP_MTU_DISCOVER values */
117118
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */

net/ipv4/af_inet.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
488488
inet->inet_saddr = 0; /* Use device */
489489

490490
/* Make sure we are allowed to bind here. */
491-
if (sk->sk_prot->get_port(sk, snum)) {
491+
if ((snum || !inet->bind_address_no_port) &&
492+
sk->sk_prot->get_port(sk, snum)) {
492493
inet->inet_saddr = inet->inet_rcv_saddr = 0;
493494
err = -EADDRINUSE;
494495
goto out_release_sock;

net/ipv4/ip_sockglue.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
582582
case IP_TRANSPARENT:
583583
case IP_MINTTL:
584584
case IP_NODEFRAG:
585+
case IP_BIND_ADDRESS_NO_PORT:
585586
case IP_UNICAST_IF:
586587
case IP_MULTICAST_TTL:
587588
case IP_MULTICAST_ALL:
@@ -732,6 +733,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
732733
}
733734
inet->nodefrag = val ? 1 : 0;
734735
break;
736+
case IP_BIND_ADDRESS_NO_PORT:
737+
inet->bind_address_no_port = val ? 1 : 0;
738+
break;
735739
case IP_MTU_DISCOVER:
736740
if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
737741
goto e_inval;
@@ -1324,6 +1328,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
13241328
case IP_NODEFRAG:
13251329
val = inet->nodefrag;
13261330
break;
1331+
case IP_BIND_ADDRESS_NO_PORT:
1332+
val = inet->bind_address_no_port;
1333+
break;
13271334
case IP_MTU_DISCOVER:
13281335
val = inet->pmtudisc;
13291336
break;

net/ipv6/af_inet6.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
362362
np->saddr = addr->sin6_addr;
363363

364364
/* Make sure we are allowed to bind here. */
365-
if (sk->sk_prot->get_port(sk, snum)) {
365+
if ((snum || !inet->bind_address_no_port) &&
366+
sk->sk_prot->get_port(sk, snum)) {
366367
inet_reset_saddr(sk);
367368
err = -EADDRINUSE;
368369
goto out;

0 commit comments

Comments
 (0)