Skip to content

Commit 358105a

Browse files
committed
Merge branch 'tcp-dccp-refine-source-port-selection'
Eric Dumazet says: ==================== tcp/dccp: refine source port selection This patch series leverages IP_LOCAL_PORT_RANGE option to no longer favor even source port selection at connect() time. This should lower time taken by connect() for hosts having many active connections to the same destination. ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 758a8d5 + 2071848 commit 358105a

File tree

3 files changed

+33
-17
lines changed

3 files changed

+33
-17
lines changed

include/net/ip.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ static inline void inet_get_local_port_range(const struct net *net, int *low, in
356356
*low = range & 0xffff;
357357
*high = range >> 16;
358358
}
359-
void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
359+
bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
360360

361361
#ifdef CONFIG_SYSCTL
362362
static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)

net/ipv4/inet_connection_sock.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,16 +117,25 @@ bool inet_rcv_saddr_any(const struct sock *sk)
117117
return !sk->sk_rcv_saddr;
118118
}
119119

120-
void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
120+
/**
121+
* inet_sk_get_local_port_range - fetch ephemeral ports range
122+
* @sk: socket
123+
* @low: pointer to low port
124+
* @high: pointer to high port
125+
*
126+
* Fetch netns port range (/proc/sys/net/ipv4/ip_local_port_range)
127+
* Range can be overridden if socket got IP_LOCAL_PORT_RANGE option.
128+
* Returns true if IP_LOCAL_PORT_RANGE was set on this socket.
129+
*/
130+
bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
121131
{
122-
const struct inet_sock *inet = inet_sk(sk);
123-
const struct net *net = sock_net(sk);
124132
int lo, hi, sk_lo, sk_hi;
133+
bool local_range = false;
125134
u32 sk_range;
126135

127-
inet_get_local_port_range(net, &lo, &hi);
136+
inet_get_local_port_range(sock_net(sk), &lo, &hi);
128137

129-
sk_range = READ_ONCE(inet->local_port_range);
138+
sk_range = READ_ONCE(inet_sk(sk)->local_port_range);
130139
if (unlikely(sk_range)) {
131140
sk_lo = sk_range & 0xffff;
132141
sk_hi = sk_range >> 16;
@@ -135,10 +144,12 @@ void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
135144
lo = sk_lo;
136145
if (lo <= sk_hi && sk_hi <= hi)
137146
hi = sk_hi;
147+
local_range = true;
138148
}
139149

140150
*low = lo;
141151
*high = hi;
152+
return local_range;
142153
}
143154
EXPORT_SYMBOL(inet_sk_get_local_port_range);
144155

net/ipv4/inet_hashtables.c

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10121012
bool tb_created = false;
10131013
u32 remaining, offset;
10141014
int ret, i, low, high;
1015-
int l3mdev;
1015+
bool local_ports;
1016+
int step, l3mdev;
10161017
u32 index;
10171018

10181019
if (port) {
@@ -1024,10 +1025,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10241025

10251026
l3mdev = inet_sk_bound_l3mdev(sk);
10261027

1027-
inet_sk_get_local_port_range(sk, &low, &high);
1028+
local_ports = inet_sk_get_local_port_range(sk, &low, &high);
1029+
step = local_ports ? 1 : 2;
1030+
10281031
high++; /* [32768, 60999] -> [32768, 61000[ */
10291032
remaining = high - low;
1030-
if (likely(remaining > 1))
1033+
if (!local_ports && remaining > 1)
10311034
remaining &= ~1U;
10321035

10331036
get_random_sleepable_once(table_perturb,
@@ -1040,10 +1043,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10401043
/* In first pass we try ports of @low parity.
10411044
* inet_csk_get_port() does the opposite choice.
10421045
*/
1043-
offset &= ~1U;
1046+
if (!local_ports)
1047+
offset &= ~1U;
10441048
other_parity_scan:
10451049
port = low + offset;
1046-
for (i = 0; i < remaining; i += 2, port += 2) {
1050+
for (i = 0; i < remaining; i += step, port += step) {
10471051
if (unlikely(port >= high))
10481052
port -= remaining;
10491053
if (inet_is_local_reserved_port(net, port))
@@ -1083,10 +1087,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10831087
cond_resched();
10841088
}
10851089

1086-
offset++;
1087-
if ((offset & 1) && remaining > 1)
1088-
goto other_parity_scan;
1089-
1090+
if (!local_ports) {
1091+
offset++;
1092+
if ((offset & 1) && remaining > 1)
1093+
goto other_parity_scan;
1094+
}
10901095
return -EADDRNOTAVAIL;
10911096

10921097
ok:
@@ -1109,8 +1114,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
11091114
* on low contention the randomness is maximal and on high contention
11101115
* it may be inexistent.
11111116
*/
1112-
i = max_t(int, i, get_random_u32_below(8) * 2);
1113-
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
1117+
i = max_t(int, i, get_random_u32_below(8) * step);
1118+
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + step);
11141119

11151120
/* Head lock still held and bh's disabled */
11161121
inet_bind_hash(sk, tb, tb2, port);

0 commit comments

Comments
 (0)