Skip to content

Commit 2071848

Browse files
edumazetkuba-moo
authored andcommitted
tcp/dccp: change source port selection at connect() time
In commit 1580ab6 ("tcp/dccp: better use of ephemeral ports in connect()") we added an heuristic to select even ports for connect() and odd ports for bind(). This was nice because no applications changes were needed. But it added more costs when all even ports are in use, when there are few listeners and many active connections. Since then, IP_LOCAL_PORT_RANGE has been added to permit an application to partition ephemeral port range at will. This patch extends the idea so that if IP_LOCAL_PORT_RANGE is set on a socket before accept(), port selection no longer favors even ports. This means that connect() can find a suitable source port faster, and applications can use a different split between connect() and bind() users. This should give more entropy to Toeplitz hash used in RSS: Using even ports was wasting one bit from the 16bit sport. A similar change can be done in inet_csk_find_open_port() if needed. Signed-off-by: Eric Dumazet <[email protected]> Cc: Jakub Sitnicki <[email protected]> Reviewed-by: Kuniyuki Iwashima <[email protected]> Reviewed-by: Jason Xing <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 41db762 commit 2071848

File tree

1 file changed

+16
-11
lines changed

1 file changed

+16
-11
lines changed

net/ipv4/inet_hashtables.c

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10121012
bool tb_created = false;
10131013
u32 remaining, offset;
10141014
int ret, i, low, high;
1015-
int l3mdev;
1015+
bool local_ports;
1016+
int step, l3mdev;
10161017
u32 index;
10171018

10181019
if (port) {
@@ -1024,10 +1025,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10241025

10251026
l3mdev = inet_sk_bound_l3mdev(sk);
10261027

1027-
inet_sk_get_local_port_range(sk, &low, &high);
1028+
local_ports = inet_sk_get_local_port_range(sk, &low, &high);
1029+
step = local_ports ? 1 : 2;
1030+
10281031
high++; /* [32768, 60999] -> [32768, 61000[ */
10291032
remaining = high - low;
1030-
if (likely(remaining > 1))
1033+
if (!local_ports && remaining > 1)
10311034
remaining &= ~1U;
10321035

10331036
get_random_sleepable_once(table_perturb,
@@ -1040,10 +1043,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10401043
/* In first pass we try ports of @low parity.
10411044
* inet_csk_get_port() does the opposite choice.
10421045
*/
1043-
offset &= ~1U;
1046+
if (!local_ports)
1047+
offset &= ~1U;
10441048
other_parity_scan:
10451049
port = low + offset;
1046-
for (i = 0; i < remaining; i += 2, port += 2) {
1050+
for (i = 0; i < remaining; i += step, port += step) {
10471051
if (unlikely(port >= high))
10481052
port -= remaining;
10491053
if (inet_is_local_reserved_port(net, port))
@@ -1083,10 +1087,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
10831087
cond_resched();
10841088
}
10851089

1086-
offset++;
1087-
if ((offset & 1) && remaining > 1)
1088-
goto other_parity_scan;
1089-
1090+
if (!local_ports) {
1091+
offset++;
1092+
if ((offset & 1) && remaining > 1)
1093+
goto other_parity_scan;
1094+
}
10901095
return -EADDRNOTAVAIL;
10911096

10921097
ok:
@@ -1109,8 +1114,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
11091114
* on low contention the randomness is maximal and on high contention
11101115
* it may be inexistent.
11111116
*/
1112-
i = max_t(int, i, get_random_u32_below(8) * 2);
1113-
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
1117+
i = max_t(int, i, get_random_u32_below(8) * step);
1118+
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + step);
11141119

11151120
/* Head lock still held and bh's disabled */
11161121
inet_bind_hash(sk, tb, tb2, port);

0 commit comments

Comments
 (0)