Skip to content

Commit 05f43db

Browse files
committed
Merge branch 'allow-configuration-of-multipath-hash-seed'
Petr Machata says: ==================== Allow configuration of multipath hash seed Let me just quote the commit message of patch #2 here to inform the motivation and some of the implementation: When calculating hashes for the purpose of multipath forwarding, both IPv4 and IPv6 code currently fall back on flow_hash_from_keys(). That uses a randomly-generated seed. That's a fine choice by default, but unfortunately some deployments may need a tighter control over the seed used. In this patchset, make the seed configurable by adding a new sysctl key, net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used specifically for multipath forwarding and not for the other concerns that flow_hash_from_keys() is used for, such as queue selection. Expose the knob as sysctl because other such settings, such as headers to hash, are also handled that way. Despite being placed in the net.ipv4 namespace, the multipath seed sysctl is used for both IPv4 and IPv6, similarly to e.g. a number of TCP variables. Like those, the multipath hash seed is a per-netns variable. The seed used by flow_hash_from_keys() is a 128-bit quantity. However it seems that usually the seed is a much more modest value. 32 bits seem typical (Cisco, Cumulus), some systems go even lower. For that reason, and to decouple the user interface from implementation details, go with a 32-bit quantity, which is then quadruplicated to form the siphash key. One example of use of this interface is avoiding hash polarization, where two ECMP routers, one behind the other, happen to make consistent hashing decisions, and as a result, part of the ECMP space of the latter router is never used. Another is a load balancer where several machines forward traffic to one of a number of leaves, and the forwarding decisions need to be made consistently. (This is a case of a desired hash polarization, mentioned e.g. in chapter 6.3 of [0].) There has already been a proposal to include a hash seed control interface in the past[1]. - Patches #1-#2 contain the substance of the work - Patch #3 is an mlxsw offload - Patches #4 and #5 are a selftest [0] https://www.usenix.org/system/files/conference/nsdi18/nsdi18-araujo.pdf [1] https://lore.kernel.org/netdev/YIlVpYMCn%2F8WfE1P@rnd/ ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 32b0660 + 5f90d93 commit 05f43db

File tree

12 files changed

+484
-14
lines changed

12 files changed

+484
-14
lines changed

Documentation/networking/ip-sysctl.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,20 @@ fib_multipath_hash_fields - UNSIGNED INTEGER
131131

132132
Default: 0x0007 (source IP, destination IP and IP protocol)
133133

134+
fib_multipath_hash_seed - UNSIGNED INTEGER
135+
The seed value used when calculating hash for multipath routes. Applies
136+
to both IPv4 and IPv6 datapath. Only present for kernels built with
137+
CONFIG_IP_ROUTE_MULTIPATH enabled.
138+
139+
When set to 0, the seed value used for multipath routing defaults to an
140+
internal random-generated one.
141+
142+
The actual hashing algorithm is not specified -- there is no guarantee
143+
that a next hop distribution effected by a given seed will keep stable
144+
across kernel versions.
145+
146+
Default: 0 (random)
147+
134148
fib_sync_mem - UNSIGNED INTEGER
135149
Amount of dirty memory from fib entries that can be backlogged before
136150
synchronize_rcu is forced.

drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11450,12 +11450,16 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
1145011450
{
1145111451
bool old_inc_parsing_depth, new_inc_parsing_depth;
1145211452
struct mlxsw_sp_mp_hash_config config = {};
11453+
struct net *net = mlxsw_sp_net(mlxsw_sp);
1145311454
char recr2_pl[MLXSW_REG_RECR2_LEN];
1145411455
unsigned long bit;
1145511456
u32 seed;
1145611457
int err;
1145711458

11458-
seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
11459+
seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).user_seed;
11460+
if (!seed)
11461+
seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
11462+
1145911463
mlxsw_reg_recr2_pack(recr2_pl, seed);
1146011464
mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
1146111465
mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);

include/net/flow_dissector.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
442442
}
443443

444444
u32 flow_hash_from_keys(struct flow_keys *keys);
445+
u32 flow_hash_from_keys_seed(struct flow_keys *keys,
446+
const siphash_key_t *keyval);
445447
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
446448
struct flow_dissector_key_icmp *key_icmp,
447449
const void *data, int thoff, int hlen);

include/net/ip_fib.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,35 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
520520
#ifdef CONFIG_IP_ROUTE_MULTIPATH
521521
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
522522
const struct sk_buff *skb, struct flow_keys *flkeys);
523+
524+
static void
525+
fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed)
526+
{
527+
u64 mp_seed_64 = mp_seed;
528+
529+
key->key[0] = (mp_seed_64 << 32) | mp_seed_64;
530+
key->key[1] = key->key[0];
531+
}
532+
533+
static inline u32 fib_multipath_hash_from_keys(const struct net *net,
534+
struct flow_keys *keys)
535+
{
536+
siphash_aligned_key_t hash_key;
537+
u32 mp_seed;
538+
539+
mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
540+
fib_multipath_hash_construct_key(&hash_key, mp_seed);
541+
542+
return flow_hash_from_keys_seed(keys, &hash_key);
543+
}
544+
#else
545+
static inline u32 fib_multipath_hash_from_keys(const struct net *net,
546+
struct flow_keys *keys)
547+
{
548+
return flow_hash_from_keys(keys);
549+
}
523550
#endif
551+
524552
int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
525553
struct netlink_ext_ack *extack);
526554
void fib_select_multipath(struct fib_result *res, int hash);

include/net/netns/ipv4.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ struct inet_timewait_death_row {
4040

4141
struct tcp_fastopen_context;
4242

43+
#ifdef CONFIG_IP_ROUTE_MULTIPATH
44+
struct sysctl_fib_multipath_hash_seed {
45+
u32 user_seed;
46+
u32 mp_seed;
47+
};
48+
#endif
49+
4350
struct netns_ipv4 {
4451
/* Cacheline organization can be found documented in
4552
* Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
@@ -246,6 +253,7 @@ struct netns_ipv4 {
246253
#endif
247254
#endif
248255
#ifdef CONFIG_IP_ROUTE_MULTIPATH
256+
struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed;
249257
u32 sysctl_fib_multipath_hash_fields;
250258
u8 sysctl_fib_multipath_use_neigh;
251259
u8 sysctl_fib_multipath_hash_policy;

net/core/flow_dissector.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,6 +1806,13 @@ u32 flow_hash_from_keys(struct flow_keys *keys)
18061806
}
18071807
EXPORT_SYMBOL(flow_hash_from_keys);
18081808

1809+
u32 flow_hash_from_keys_seed(struct flow_keys *keys,
1810+
const siphash_key_t *keyval)
1811+
{
1812+
return __flow_hash_from_keys(keys, keyval);
1813+
}
1814+
EXPORT_SYMBOL(flow_hash_from_keys_seed);
1815+
18091816
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
18101817
struct flow_keys *keys,
18111818
const siphash_key_t *keyval)

net/ipv4/route.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1923,7 +1923,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net,
19231923
hash_keys.ports.dst = keys.ports.dst;
19241924

19251925
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
1926-
return flow_hash_from_keys(&hash_keys);
1926+
return fib_multipath_hash_from_keys(net, &hash_keys);
19271927
}
19281928

19291929
static u32 fib_multipath_custom_hash_inner(const struct net *net,
@@ -1972,7 +1972,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net,
19721972
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
19731973
hash_keys.ports.dst = keys.ports.dst;
19741974

1975-
return flow_hash_from_keys(&hash_keys);
1975+
return fib_multipath_hash_from_keys(net, &hash_keys);
19761976
}
19771977

19781978
static u32 fib_multipath_custom_hash_skb(const struct net *net,
@@ -2009,7 +2009,7 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net,
20092009
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
20102010
hash_keys.ports.dst = fl4->fl4_dport;
20112011

2012-
return flow_hash_from_keys(&hash_keys);
2012+
return fib_multipath_hash_from_keys(net, &hash_keys);
20132013
}
20142014

20152015
/* if skb is set it will be used and fl4 can be NULL */
@@ -2030,7 +2030,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
20302030
hash_keys.addrs.v4addrs.src = fl4->saddr;
20312031
hash_keys.addrs.v4addrs.dst = fl4->daddr;
20322032
}
2033-
mhash = flow_hash_from_keys(&hash_keys);
2033+
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
20342034
break;
20352035
case 1:
20362036
/* skb is currently provided only when forwarding */
@@ -2064,7 +2064,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
20642064
hash_keys.ports.dst = fl4->fl4_dport;
20652065
hash_keys.basic.ip_proto = fl4->flowi4_proto;
20662066
}
2067-
mhash = flow_hash_from_keys(&hash_keys);
2067+
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
20682068
break;
20692069
case 2:
20702070
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2095,7 +2095,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
20952095
hash_keys.addrs.v4addrs.src = fl4->saddr;
20962096
hash_keys.addrs.v4addrs.dst = fl4->daddr;
20972097
}
2098-
mhash = flow_hash_from_keys(&hash_keys);
2098+
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
20992099
break;
21002100
case 3:
21012101
if (skb)

net/ipv4/sysctl_net_ipv4.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
464464

465465
return ret;
466466
}
467+
468+
static u32 proc_fib_multipath_hash_rand_seed __ro_after_init;
469+
470+
static void proc_fib_multipath_hash_init_rand_seed(void)
471+
{
472+
get_random_bytes(&proc_fib_multipath_hash_rand_seed,
473+
sizeof(proc_fib_multipath_hash_rand_seed));
474+
}
475+
476+
static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
477+
{
478+
struct sysctl_fib_multipath_hash_seed new = {
479+
.user_seed = user_seed,
480+
.mp_seed = (user_seed ? user_seed :
481+
proc_fib_multipath_hash_rand_seed),
482+
};
483+
484+
WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
485+
}
486+
487+
static int proc_fib_multipath_hash_seed(struct ctl_table *table, int write,
488+
void *buffer, size_t *lenp,
489+
loff_t *ppos)
490+
{
491+
struct sysctl_fib_multipath_hash_seed *mphs;
492+
struct net *net = table->data;
493+
struct ctl_table tmp;
494+
u32 user_seed;
495+
int ret;
496+
497+
mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
498+
user_seed = mphs->user_seed;
499+
500+
tmp = *table;
501+
tmp.data = &user_seed;
502+
503+
ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos);
504+
505+
if (write && ret == 0) {
506+
proc_fib_multipath_hash_set_seed(net, user_seed);
507+
call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
508+
}
509+
510+
return ret;
511+
}
512+
#else
513+
514+
static void proc_fib_multipath_hash_init_rand_seed(void)
515+
{
516+
}
517+
518+
static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
519+
{
520+
}
521+
467522
#endif
468523

469524
static struct ctl_table ipv4_table[] = {
@@ -1072,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = {
10721127
.extra1 = SYSCTL_ONE,
10731128
.extra2 = &fib_multipath_hash_fields_all_mask,
10741129
},
1130+
{
1131+
.procname = "fib_multipath_hash_seed",
1132+
.data = &init_net,
1133+
.maxlen = sizeof(u32),
1134+
.mode = 0644,
1135+
.proc_handler = proc_fib_multipath_hash_seed,
1136+
},
10751137
#endif
10761138
{
10771139
.procname = "ip_unprivileged_port_start",
@@ -1550,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
15501612
if (!net->ipv4.sysctl_local_reserved_ports)
15511613
goto err_ports;
15521614

1615+
proc_fib_multipath_hash_set_seed(net, 0);
1616+
15531617
return 0;
15541618

15551619
err_ports:
@@ -1584,6 +1648,8 @@ static __init int sysctl_ipv4_init(void)
15841648
if (!hdr)
15851649
return -ENOMEM;
15861650

1651+
proc_fib_multipath_hash_init_rand_seed();
1652+
15871653
if (register_pernet_subsys(&ipv4_sysctl_ops)) {
15881654
unregister_net_sysctl_table(hdr);
15891655
return -ENOMEM;

net/ipv6/route.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,7 +2372,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net,
23722372
hash_keys.ports.dst = keys.ports.dst;
23732373

23742374
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
2375-
return flow_hash_from_keys(&hash_keys);
2375+
return fib_multipath_hash_from_keys(net, &hash_keys);
23762376
}
23772377

23782378
static u32 rt6_multipath_custom_hash_inner(const struct net *net,
@@ -2421,7 +2421,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net,
24212421
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
24222422
hash_keys.ports.dst = keys.ports.dst;
24232423

2424-
return flow_hash_from_keys(&hash_keys);
2424+
return fib_multipath_hash_from_keys(net, &hash_keys);
24252425
}
24262426

24272427
static u32 rt6_multipath_custom_hash_skb(const struct net *net,
@@ -2460,7 +2460,7 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
24602460
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
24612461
hash_keys.ports.dst = fl6->fl6_dport;
24622462

2463-
return flow_hash_from_keys(&hash_keys);
2463+
return fib_multipath_hash_from_keys(net, &hash_keys);
24642464
}
24652465

24662466
/* if skb is set it will be used and fl6 can be NULL */
@@ -2482,7 +2482,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
24822482
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
24832483
hash_keys.basic.ip_proto = fl6->flowi6_proto;
24842484
}
2485-
mhash = flow_hash_from_keys(&hash_keys);
2485+
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
24862486
break;
24872487
case 1:
24882488
if (skb) {
@@ -2514,7 +2514,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
25142514
hash_keys.ports.dst = fl6->fl6_dport;
25152515
hash_keys.basic.ip_proto = fl6->flowi6_proto;
25162516
}
2517-
mhash = flow_hash_from_keys(&hash_keys);
2517+
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
25182518
break;
25192519
case 2:
25202520
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2551,7 +2551,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
25512551
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
25522552
hash_keys.basic.ip_proto = fl6->flowi6_proto;
25532553
}
2554-
mhash = flow_hash_from_keys(&hash_keys);
2554+
mhash = fib_multipath_hash_from_keys(net, &hash_keys);
25552555
break;
25562556
case 3:
25572557
if (skb)

tools/testing/selftests/net/forwarding/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
7070
router_broadcast.sh \
7171
router_mpath_nh_res.sh \
7272
router_mpath_nh.sh \
73+
router_mpath_seed.sh \
7374
router_multicast.sh \
7475
router_multipath.sh \
7576
router_nh.sh \

0 commit comments

Comments
 (0)