Skip to content

Commit 6a0d7a0

Browse files
committed
Merge branch 'flow_key_hashing'
Tom Herbert says: ==================== net: Increase inputs to flow_keys hashing This patch set adds new fields to the flow_keys structure and hashes over these fields to get a better flow hash. In particular, these patches now include hashing over the full IPv6 addresses in order to defend against address spoofing that always results in the same hash. The new input also includes the Ethertype, L4 protocol, VLAN, flow label, GRE keyid, and MPLS entropy label. In order to increase hash inputs, we switch to using jhash2 which operates an an array of u32's. jhash2 operates on multiples of three words. The data in the hash is constructed for that, and there are are two variants for IPv4 and Ipv6 addressing. For IPv4 addresses, jhash is performed over six u32's and for IPv6 it is done over twelve. flow_keys can store either IPv4 or IPv6 addresses (addr_proto field is a selector). ipv6_addr_hash is no longer used to convert addresses for setting in flow table. For legacy uses of flow keys outside of flow_dissector the flow_get_u32_src and flow_get_u32_dst functions have been added to get u32 representation representations of addresses in flow_keys. For flow lables we also eliminate the short circuit in flow_dissector for non-zero flow label. The flow label is now considered additional input to ports. Testing: Ran netperf TCP_RR for 200 flows using IPv4 and IPv6 comparing before the patches and with the patches. Did not detect any performance degradation. v2: - Took out MPLS entropy label. Will add this later. v3: - Ensure hash start offset is a four byte boundary. Add BUG_BUILD_ON to check for this. - Fixes sparse error in GRE to get entropy from keyid. v4: - Rebase to Jiri changes to generalize flow dissection - Support TIPC as its own address - Bring back MPLS entropy label dissection - Remove FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS v5: - Minor fixes from feedback v6: - Cleanup and sparse issue with flow label - Change keyid to returned by flow_dissector to be __be32 ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents ffaa31d + b3baa0f commit 6a0d7a0

File tree

12 files changed

+387
-135
lines changed

12 files changed

+387
-135
lines changed

drivers/net/bonding/bond_main.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3059,17 +3059,15 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
30593059
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
30603060
return false;
30613061
iph = ip_hdr(skb);
3062-
fk->addrs.src = iph->saddr;
3063-
fk->addrs.dst = iph->daddr;
3062+
iph_to_flow_copy_v4addrs(fk, iph);
30643063
noff += iph->ihl << 2;
30653064
if (!ip_is_fragment(iph))
30663065
proto = iph->protocol;
30673066
} else if (skb->protocol == htons(ETH_P_IPV6)) {
30683067
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
30693068
return false;
30703069
iph6 = ipv6_hdr(skb);
3071-
fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
3072-
fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
3070+
iph_to_flow_copy_v6addrs(fk, iph6);
30733071
noff += sizeof(*iph6);
30743072
proto = iph6->nexthdr;
30753073
} else {
@@ -3103,7 +3101,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
31033101
hash = bond_eth_hash(skb);
31043102
else
31053103
hash = (__force u32)flow.ports.ports;
3106-
hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src;
3104+
hash ^= (__force u32)flow_get_u32_dst(&flow) ^
3105+
(__force u32)flow_get_u32_src(&flow);
31073106
hash ^= (hash >> 16);
31083107
hash ^= (hash >> 8);
31093108

drivers/net/ethernet/cisco/enic/enic_clsf.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq)
3333
return -EPROTONOSUPPORT;
3434
};
3535
data.type = FILTER_IPV4_5TUPLE;
36-
data.u.ipv4.src_addr = ntohl(keys->addrs.src);
37-
data.u.ipv4.dst_addr = ntohl(keys->addrs.dst);
36+
data.u.ipv4.src_addr = ntohl(keys->addrs.v4addrs.src);
37+
data.u.ipv4.dst_addr = ntohl(keys->addrs.v4addrs.dst);
3838
data.u.ipv4.src_port = ntohs(keys->ports.src);
3939
data.u.ipv4.dst_port = ntohs(keys->ports.dst);
4040
data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE;
@@ -158,8 +158,8 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h,
158158
struct enic_rfs_fltr_node *tpos;
159159

160160
hlist_for_each_entry(tpos, h, node)
161-
if (tpos->keys.addrs.src == k->addrs.src &&
162-
tpos->keys.addrs.dst == k->addrs.dst &&
161+
if (tpos->keys.addrs.v4addrs.src == k->addrs.v4addrs.src &&
162+
tpos->keys.addrs.v4addrs.dst == k->addrs.v4addrs.dst &&
163163
tpos->keys.ports.ports == k->ports.ports &&
164164
tpos->keys.basic.ip_proto == k->basic.ip_proto &&
165165
tpos->keys.basic.n_proto == k->basic.n_proto)

drivers/net/ethernet/cisco/enic/enic_ethtool.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,10 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
346346
break;
347347
}
348348

349-
fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src;
349+
fsp->h_u.tcp_ip4_spec.ip4src = flow_get_u32_src(&n->keys);
350350
fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0;
351351

352-
fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst;
352+
fsp->h_u.tcp_ip4_spec.ip4dst = flow_get_u32_dst(&n->keys);
353353
fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0;
354354

355355
fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src;

include/linux/skbuff.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1943,7 +1943,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
19431943
if (skb_transport_header_was_set(skb))
19441944
return;
19451945
else if (skb_flow_dissect_flow_keys(skb, &keys))
1946-
skb_set_transport_header(skb, keys.basic.thoff);
1946+
skb_set_transport_header(skb, keys.control.thoff);
19471947
else
19481948
skb_set_transport_header(skb, offset_hint);
19491949
}

include/net/flow_dissector.h

Lines changed: 75 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,79 @@
66
#include <linux/in6.h>
77
#include <uapi/linux/if_ether.h>
88

9+
/**
10+
* struct flow_dissector_key_control:
11+
* @thoff: Transport header offset
12+
*/
13+
struct flow_dissector_key_control {
14+
u16 thoff;
15+
u16 addr_type;
16+
};
17+
918
/**
1019
* struct flow_dissector_key_basic:
1120
* @thoff: Transport header offset
1221
* @n_proto: Network header protocol (eg. IPv4/IPv6)
1322
* @ip_proto: Transport header protocol (eg. TCP/UDP)
1423
*/
1524
struct flow_dissector_key_basic {
16-
u16 thoff;
1725
__be16 n_proto;
1826
u8 ip_proto;
27+
u8 padding;
28+
};
29+
30+
struct flow_dissector_key_tags {
31+
u32 vlan_id:12,
32+
flow_label:20;
33+
};
34+
35+
struct flow_dissector_key_keyid {
36+
__be32 keyid;
1937
};
2038

2139
/**
22-
* struct flow_dissector_key_addrs:
23-
* @src: source ip address in case of IPv4
24-
* For IPv6 it contains 32bit hash of src address
25-
* @dst: destination ip address in case of IPv4
26-
* For IPv6 it contains 32bit hash of dst address
40+
* struct flow_dissector_key_ipv4_addrs:
41+
* @src: source ip address
42+
* @dst: destination ip address
2743
*/
28-
struct flow_dissector_key_addrs {
44+
struct flow_dissector_key_ipv4_addrs {
2945
/* (src,dst) must be grouped, in the same way than in IP header */
3046
__be32 src;
3147
__be32 dst;
3248
};
3349

50+
/**
51+
* struct flow_dissector_key_ipv6_addrs:
52+
* @src: source ip address
53+
* @dst: destination ip address
54+
*/
55+
struct flow_dissector_key_ipv6_addrs {
56+
/* (src,dst) must be grouped, in the same way than in IP header */
57+
struct in6_addr src;
58+
struct in6_addr dst;
59+
};
60+
61+
/**
62+
* struct flow_dissector_key_tipc_addrs:
63+
* @srcnode: source node address
64+
*/
65+
struct flow_dissector_key_tipc_addrs {
66+
__be32 srcnode;
67+
};
68+
69+
/**
70+
* struct flow_dissector_key_addrs:
71+
* @v4addrs: IPv4 addresses
72+
* @v6addrs: IPv6 addresses
73+
*/
74+
struct flow_dissector_key_addrs {
75+
union {
76+
struct flow_dissector_key_ipv4_addrs v4addrs;
77+
struct flow_dissector_key_ipv6_addrs v6addrs;
78+
struct flow_dissector_key_tipc_addrs tipcaddrs;
79+
};
80+
};
81+
3482
/**
3583
* flow_dissector_key_tp_ports:
3684
* @ports: port numbers of Transport header
@@ -47,16 +95,6 @@ struct flow_dissector_key_ports {
4795
};
4896
};
4997

50-
/**
51-
* struct flow_dissector_key_ipv6_addrs:
52-
* @src: source ip address
53-
* @dst: destination ip address
54-
*/
55-
struct flow_dissector_key_ipv6_addrs {
56-
/* (src,dst) must be grouped, in the same way than in IP header */
57-
struct in6_addr src;
58-
struct in6_addr dst;
59-
};
6098

6199
/**
62100
* struct flow_dissector_key_eth_addrs:
@@ -70,12 +108,17 @@ struct flow_dissector_key_eth_addrs {
70108
};
71109

72110
enum flow_dissector_key_id {
111+
FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
73112
FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
74-
FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */
75-
FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */
76-
FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
113+
FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
77114
FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
115+
FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
78116
FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
117+
FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */
118+
FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */
119+
FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */
120+
FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */
121+
FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */
79122

80123
FLOW_DISSECTOR_KEY_MAX,
81124
};
@@ -109,11 +152,21 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb,
109152
}
110153

111154
struct flow_keys {
112-
struct flow_dissector_key_addrs addrs;
113-
struct flow_dissector_key_ports ports;
155+
struct flow_dissector_key_control control;
156+
#define FLOW_KEYS_HASH_START_FIELD basic
114157
struct flow_dissector_key_basic basic;
158+
struct flow_dissector_key_tags tags;
159+
struct flow_dissector_key_keyid keyid;
160+
struct flow_dissector_key_ports ports;
161+
struct flow_dissector_key_addrs addrs;
115162
};
116163

164+
#define FLOW_KEYS_HASH_OFFSET \
165+
offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD)
166+
167+
__be32 flow_get_u32_src(const struct flow_keys *flow);
168+
__be32 flow_get_u32_dst(const struct flow_keys *flow);
169+
117170
extern struct flow_dissector flow_keys_dissector;
118171
extern struct flow_dissector flow_keys_buf_dissector;
119172

include/net/ip.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,13 +355,30 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
355355
skb->len, proto, 0);
356356
}
357357

358+
/* copy IPv4 saddr & daddr to flow_keys, possibly using 64bit load/store
359+
* Equivalent to : flow->v4addrs.src = iph->saddr;
360+
* flow->v4addrs.dst = iph->daddr;
361+
*/
362+
static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
363+
const struct iphdr *iph)
364+
{
365+
BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) !=
366+
offsetof(typeof(flow->addrs), v4addrs.src) +
367+
sizeof(flow->addrs.v4addrs.src));
368+
memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs));
369+
flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
370+
}
371+
358372
static inline void inet_set_txhash(struct sock *sk)
359373
{
360374
struct inet_sock *inet = inet_sk(sk);
361375
struct flow_keys keys;
362376

363-
keys.addrs.src = inet->inet_saddr;
364-
keys.addrs.dst = inet->inet_daddr;
377+
memset(&keys, 0, sizeof(keys));
378+
379+
keys.addrs.v4addrs.src = inet->inet_saddr;
380+
keys.addrs.v4addrs.dst = inet->inet_daddr;
381+
keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
365382
keys.ports.src = inet->inet_sport;
366383
keys.ports.dst = inet->inet_dport;
367384

include/net/ipv6.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,15 +692,34 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
692692
return hlimit;
693693
}
694694

695+
/* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
696+
* Equivalent to : flow->v6addrs.src = iph->saddr;
697+
* flow->v6addrs.dst = iph->daddr;
698+
*/
699+
static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
700+
const struct ipv6hdr *iph)
701+
{
702+
BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) !=
703+
offsetof(typeof(flow->addrs), v6addrs.src) +
704+
sizeof(flow->addrs.v6addrs.src));
705+
memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs));
706+
flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
707+
}
708+
695709
#if IS_ENABLED(CONFIG_IPV6)
696710
static inline void ip6_set_txhash(struct sock *sk)
697711
{
698712
struct inet_sock *inet = inet_sk(sk);
699713
struct ipv6_pinfo *np = inet6_sk(sk);
700714
struct flow_keys keys;
701715

702-
keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr);
703-
keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
716+
memset(&keys, 0, sizeof(keys));
717+
718+
memcpy(&keys.addrs.v6addrs.src, &np->saddr,
719+
sizeof(keys.addrs.v6addrs.src));
720+
memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
721+
sizeof(keys.addrs.v6addrs.dst));
722+
keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
704723
keys.ports.src = inet->inet_sport;
705724
keys.ports.dst = inet->inet_dport;
706725

include/uapi/linux/in.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ enum {
6969
#define IPPROTO_SCTP IPPROTO_SCTP
7070
IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
7171
#define IPPROTO_UDPLITE IPPROTO_UDPLITE
72+
IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
73+
#define IPPROTO_MPLS IPPROTO_MPLS
7274
IPPROTO_RAW = 255, /* Raw IP packets */
7375
#define IPPROTO_RAW IPPROTO_RAW
7476
IPPROTO_MAX

0 commit comments

Comments
 (0)