Skip to content

Commit c50b960

Browse files
kaberPablo Neira Ayuso
authored andcommitted
netfilter: nf_tables: implement proper set selection
The current set selection simply choses the first set type that provides the requested features, which always results in the rbtree being chosen by virtue of being the first set in the list. What we actually want to do is choose the implementation that can provide the requested features and is optimal from either a performance or memory perspective depending on the characteristics of the elements and the preferences specified by the user. The elements are not known when creating a set. Even if we would provide them for anonymous (literal) sets, we'd still have standalone sets where the elements are not known in advance. We therefore need an abstract description of the data charcteristics. The kernel already knows the size of the key, this patch starts by introducing a nested set description which so far contains only the maximum amount of elements. Based on this the set implementations are changed to provide an estimate of the required amount of memory and the lookup complexity class. The set ops have a new callback ->estimate() that is invoked during set selection. It receives a structure containing the attributes known to the kernel and is supposed to populate a struct nft_set_estimate with the complexity class and, in case the size is known, the complete amount of memory required, or the amount of memory required per element otherwise. Based on the policy specified by the user (performance/memory, defaulting to performance) the kernel will then select the best suited implementation. Even if the set implementation would allow to add more than the specified maximum amount of elements, they are enforced since new implementations might not be able to add more than maximum based on which they were selected. Signed-off-by: Patrick McHardy <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent fe92ca4 commit c50b960

File tree

5 files changed

+242
-18
lines changed

5 files changed

+242
-18
lines changed

include/net/netfilter/nf_tables.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,44 @@ struct nft_set_iter {
145145
const struct nft_set_elem *elem);
146146
};
147147

148+
/**
149+
* struct nft_set_desc - description of set elements
150+
*
151+
* @klen: key length
152+
* @dlen: data length
153+
* @size: number of set elements
154+
*/
155+
struct nft_set_desc {
156+
unsigned int klen;
157+
unsigned int dlen;
158+
unsigned int size;
159+
};
160+
161+
/**
162+
* enum nft_set_class - performance class
163+
*
164+
* @NFT_LOOKUP_O_1: constant, O(1)
165+
* @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N)
166+
* @NFT_LOOKUP_O_N: linear, O(N)
167+
*/
168+
enum nft_set_class {
169+
NFT_SET_CLASS_O_1,
170+
NFT_SET_CLASS_O_LOG_N,
171+
NFT_SET_CLASS_O_N,
172+
};
173+
174+
/**
175+
* struct nft_set_estimate - estimation of memory and performance
176+
* characteristics
177+
*
178+
* @size: required memory
179+
* @class: lookup performance class
180+
*/
181+
struct nft_set_estimate {
182+
unsigned int size;
183+
enum nft_set_class class;
184+
};
185+
148186
/**
149187
* struct nft_set_ops - nf_tables set operations
150188
*
@@ -174,7 +212,11 @@ struct nft_set_ops {
174212
struct nft_set_iter *iter);
175213

176214
unsigned int (*privsize)(const struct nlattr * const nla[]);
215+
bool (*estimate)(const struct nft_set_desc *desc,
216+
u32 features,
217+
struct nft_set_estimate *est);
177218
int (*init)(const struct nft_set *set,
219+
const struct nft_set_desc *desc,
178220
const struct nlattr * const nla[]);
179221
void (*destroy)(const struct nft_set *set);
180222

@@ -194,6 +236,8 @@ void nft_unregister_set(struct nft_set_ops *ops);
194236
* @name: name of the set
195237
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
196238
* @dtype: data type (verdict or numeric type defined by userspace)
239+
* @size: maximum set size
240+
* @nelems: number of elements
197241
* @ops: set ops
198242
* @flags: set flags
199243
* @klen: key length
@@ -206,6 +250,8 @@ struct nft_set {
206250
char name[IFNAMSIZ];
207251
u32 ktype;
208252
u32 dtype;
253+
u32 size;
254+
u32 nelems;
209255
/* runtime data below here */
210256
const struct nft_set_ops *ops ____cacheline_aligned;
211257
u16 flags;

include/uapi/linux/netfilter/nf_tables.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,29 @@ enum nft_set_flags {
211211
NFT_SET_MAP = 0x8,
212212
};
213213

214+
/**
215+
* enum nft_set_policies - set selection policy
216+
*
217+
* @NFT_SET_POL_PERFORMANCE: prefer high performance over low memory use
218+
* @NFT_SET_POL_MEMORY: prefer low memory use over high performance
219+
*/
220+
enum nft_set_policies {
221+
NFT_SET_POL_PERFORMANCE,
222+
NFT_SET_POL_MEMORY,
223+
};
224+
225+
/**
226+
* enum nft_set_desc_attributes - set element description
227+
*
228+
* @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32)
229+
*/
230+
enum nft_set_desc_attributes {
231+
NFTA_SET_DESC_UNSPEC,
232+
NFTA_SET_DESC_SIZE,
233+
__NFTA_SET_DESC_MAX
234+
};
235+
#define NFTA_SET_DESC_MAX (__NFTA_SET_DESC_MAX - 1)
236+
214237
/**
215238
* enum nft_set_attributes - nf_tables set netlink attributes
216239
*
@@ -221,6 +244,8 @@ enum nft_set_flags {
221244
* @NFTA_SET_KEY_LEN: key data length (NLA_U32)
222245
* @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
223246
* @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
247+
* @NFTA_SET_POLICY: selection policy (NLA_U32)
248+
* @NFTA_SET_DESC: set description (NLA_NESTED)
224249
*/
225250
enum nft_set_attributes {
226251
NFTA_SET_UNSPEC,
@@ -231,6 +256,8 @@ enum nft_set_attributes {
231256
NFTA_SET_KEY_LEN,
232257
NFTA_SET_DATA_TYPE,
233258
NFTA_SET_DATA_LEN,
259+
NFTA_SET_POLICY,
260+
NFTA_SET_DESC,
234261
__NFTA_SET_MAX
235262
};
236263
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)

net/netfilter/nf_tables_api.c

Lines changed: 105 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,9 +1912,18 @@ void nft_unregister_set(struct nft_set_ops *ops)
19121912
}
19131913
EXPORT_SYMBOL_GPL(nft_unregister_set);
19141914

1915-
static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
1915+
/*
1916+
* Select a set implementation based on the data characteristics and the
1917+
* given policy. The total memory use might not be known if no size is
1918+
* given, in that case the amount of memory per element is used.
1919+
*/
1920+
static const struct nft_set_ops *
1921+
nft_select_set_ops(const struct nlattr * const nla[],
1922+
const struct nft_set_desc *desc,
1923+
enum nft_set_policies policy)
19161924
{
1917-
const struct nft_set_ops *ops;
1925+
const struct nft_set_ops *ops, *bops;
1926+
struct nft_set_estimate est, best;
19181927
u32 features;
19191928

19201929
#ifdef CONFIG_MODULES
@@ -1932,15 +1941,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const
19321941
features &= NFT_SET_INTERVAL | NFT_SET_MAP;
19331942
}
19341943

1935-
// FIXME: implement selection properly
1944+
bops = NULL;
1945+
best.size = ~0;
1946+
best.class = ~0;
1947+
19361948
list_for_each_entry(ops, &nf_tables_set_ops, list) {
19371949
if ((ops->features & features) != features)
19381950
continue;
1951+
if (!ops->estimate(desc, features, &est))
1952+
continue;
1953+
1954+
switch (policy) {
1955+
case NFT_SET_POL_PERFORMANCE:
1956+
if (est.class < best.class)
1957+
break;
1958+
if (est.class == best.class && est.size < best.size)
1959+
break;
1960+
continue;
1961+
case NFT_SET_POL_MEMORY:
1962+
if (est.size < best.size)
1963+
break;
1964+
if (est.size == best.size && est.class < best.class)
1965+
break;
1966+
continue;
1967+
default:
1968+
break;
1969+
}
1970+
19391971
if (!try_module_get(ops->owner))
19401972
continue;
1941-
return ops;
1973+
if (bops != NULL)
1974+
module_put(bops->owner);
1975+
1976+
bops = ops;
1977+
best = est;
19421978
}
19431979

1980+
if (bops != NULL)
1981+
return bops;
1982+
19441983
return ERR_PTR(-EOPNOTSUPP);
19451984
}
19461985

@@ -1952,6 +1991,12 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
19521991
[NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
19531992
[NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
19541993
[NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
1994+
[NFTA_SET_POLICY] = { .type = NLA_U32 },
1995+
[NFTA_SET_DESC] = { .type = NLA_NESTED },
1996+
};
1997+
1998+
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
1999+
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
19552000
};
19562001

19572002
static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -2043,6 +2088,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
20432088
{
20442089
struct nfgenmsg *nfmsg;
20452090
struct nlmsghdr *nlh;
2091+
struct nlattr *desc;
20462092
u32 portid = NETLINK_CB(ctx->skb).portid;
20472093
u32 seq = ctx->nlh->nlmsg_seq;
20482094

@@ -2076,6 +2122,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
20762122
goto nla_put_failure;
20772123
}
20782124

2125+
desc = nla_nest_start(skb, NFTA_SET_DESC);
2126+
if (desc == NULL)
2127+
goto nla_put_failure;
2128+
if (set->size &&
2129+
nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
2130+
goto nla_put_failure;
2131+
nla_nest_end(skb, desc);
2132+
20792133
return nlmsg_end(skb, nlh);
20802134

20812135
nla_put_failure:
@@ -2304,6 +2358,23 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
23042358
return err;
23052359
}
23062360

2361+
static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
2362+
struct nft_set_desc *desc,
2363+
const struct nlattr *nla)
2364+
{
2365+
struct nlattr *da[NFTA_SET_DESC_MAX + 1];
2366+
int err;
2367+
2368+
err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
2369+
if (err < 0)
2370+
return err;
2371+
2372+
if (da[NFTA_SET_DESC_SIZE] != NULL)
2373+
desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
2374+
2375+
return 0;
2376+
}
2377+
23072378
static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
23082379
const struct nlmsghdr *nlh,
23092380
const struct nlattr * const nla[])
@@ -2318,23 +2389,26 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
23182389
char name[IFNAMSIZ];
23192390
unsigned int size;
23202391
bool create;
2321-
u32 ktype, klen, dlen, dtype, flags;
2392+
u32 ktype, dtype, flags, policy;
2393+
struct nft_set_desc desc;
23222394
int err;
23232395

23242396
if (nla[NFTA_SET_TABLE] == NULL ||
23252397
nla[NFTA_SET_NAME] == NULL ||
23262398
nla[NFTA_SET_KEY_LEN] == NULL)
23272399
return -EINVAL;
23282400

2401+
memset(&desc, 0, sizeof(desc));
2402+
23292403
ktype = NFT_DATA_VALUE;
23302404
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
23312405
ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
23322406
if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
23332407
return -EINVAL;
23342408
}
23352409

2336-
klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
2337-
if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
2410+
desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
2411+
if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
23382412
return -EINVAL;
23392413

23402414
flags = 0;
@@ -2346,7 +2420,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
23462420
}
23472421

23482422
dtype = 0;
2349-
dlen = 0;
23502423
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
23512424
if (!(flags & NFT_SET_MAP))
23522425
return -EINVAL;
@@ -2359,15 +2432,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
23592432
if (dtype != NFT_DATA_VERDICT) {
23602433
if (nla[NFTA_SET_DATA_LEN] == NULL)
23612434
return -EINVAL;
2362-
dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
2363-
if (dlen == 0 ||
2364-
dlen > FIELD_SIZEOF(struct nft_data, data))
2435+
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
2436+
if (desc.dlen == 0 ||
2437+
desc.dlen > FIELD_SIZEOF(struct nft_data, data))
23652438
return -EINVAL;
23662439
} else
2367-
dlen = sizeof(struct nft_data);
2440+
desc.dlen = sizeof(struct nft_data);
23682441
} else if (flags & NFT_SET_MAP)
23692442
return -EINVAL;
23702443

2444+
policy = NFT_SET_POL_PERFORMANCE;
2445+
if (nla[NFTA_SET_POLICY] != NULL)
2446+
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
2447+
2448+
if (nla[NFTA_SET_DESC] != NULL) {
2449+
err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
2450+
if (err < 0)
2451+
return err;
2452+
}
2453+
23712454
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
23722455

23732456
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
@@ -2398,7 +2481,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
23982481
if (!(nlh->nlmsg_flags & NLM_F_CREATE))
23992482
return -ENOENT;
24002483

2401-
ops = nft_select_set_ops(nla);
2484+
ops = nft_select_set_ops(nla, &desc, policy);
24022485
if (IS_ERR(ops))
24032486
return PTR_ERR(ops);
24042487

@@ -2419,12 +2502,13 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
24192502
INIT_LIST_HEAD(&set->bindings);
24202503
set->ops = ops;
24212504
set->ktype = ktype;
2422-
set->klen = klen;
2505+
set->klen = desc.klen;
24232506
set->dtype = dtype;
2424-
set->dlen = dlen;
2507+
set->dlen = desc.dlen;
24252508
set->flags = flags;
2509+
set->size = desc.size;
24262510

2427-
err = ops->init(set, nla);
2511+
err = ops->init(set, &desc, nla);
24282512
if (err < 0)
24292513
goto err2;
24302514

@@ -2733,6 +2817,9 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
27332817
enum nft_registers dreg;
27342818
int err;
27352819

2820+
if (set->size && set->nelems == set->size)
2821+
return -ENFILE;
2822+
27362823
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
27372824
nft_set_elem_policy);
27382825
if (err < 0)
@@ -2798,6 +2885,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
27982885
err = set->ops->insert(set, &elem);
27992886
if (err < 0)
28002887
goto err3;
2888+
set->nelems++;
28012889

28022890
return 0;
28032891

@@ -2867,6 +2955,7 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
28672955
goto err2;
28682956

28692957
set->ops->remove(set, &elem);
2958+
set->nelems--;
28702959

28712960
nft_data_uninit(&elem.key, NFT_DATA_VALUE);
28722961
if (set->flags & NFT_SET_MAP)

0 commit comments

Comments
 (0)