Skip to content

Commit 9096401

Browse files
committed
netfilter: nf_conntrack: add IPS_OFFLOAD status bit
This new bit tells us that the conntrack entry is owned by the flow table offload infrastructure. # cat /proc/net/nf_conntrack ipv4 2 tcp 6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 zone=0 use=2 Note the [OFFLOAD] tag in the listing. The timer of such conntrack entries look like stopped from userspace. In practise, to make sure the conntrack entry does not go away, the conntrack timer is periodically set to an arbitrary large value that gets refreshed on every iteration from the garbage collector, so it never expires- and they display no internal state in the case of TCP flows. This allows us to save a bitcheck from the packet path via nf_ct_is_expired(). Conntrack entries that have been offloaded to the flow table infrastructure cannot be deleted/flushed via ctnetlink. The flow table infrastructure is also responsible for releasing this conntrack entry. Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 0befd06 commit 9096401

File tree

5 files changed

+50
-6
lines changed

5 files changed

+50
-6
lines changed

include/uapi/linux/netfilter/nf_conntrack_common.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,16 @@ enum ip_conntrack_status {
101101
IPS_HELPER_BIT = 13,
102102
IPS_HELPER = (1 << IPS_HELPER_BIT),
103103

104+
/* Conntrack has been offloaded to flow table. */
105+
IPS_OFFLOAD_BIT = 14,
106+
IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
107+
104108
/* Be careful here, modifying these bits can make things messy,
105109
* so don't let users modify them directly.
106110
*/
107111
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
108112
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
109-
IPS_SEQ_ADJUST | IPS_TEMPLATE),
113+
IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
110114

111115
__IPS_MAX_BIT = 14,
112116
};

net/netfilter/nf_conntrack_core.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,9 @@ static unsigned int early_drop_list(struct net *net,
901901
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
902902
tmp = nf_ct_tuplehash_to_ctrack(h);
903903

904+
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
905+
continue;
906+
904907
if (nf_ct_is_expired(tmp)) {
905908
nf_ct_gc_expired(tmp);
906909
continue;
@@ -975,6 +978,18 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
975978
return false;
976979
}
977980

981+
#define DAY (86400 * HZ)
982+
983+
/* Set an arbitrary timeout large enough not to ever expire, this save
984+
* us a check for the IPS_OFFLOAD_BIT from the packet path via
985+
* nf_ct_is_expired().
986+
*/
987+
static void nf_ct_offload_timeout(struct nf_conn *ct)
988+
{
989+
if (nf_ct_expires(ct) < DAY / 2)
990+
ct->timeout = nfct_time_stamp + DAY;
991+
}
992+
978993
static void gc_worker(struct work_struct *work)
979994
{
980995
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
@@ -1011,6 +1026,11 @@ static void gc_worker(struct work_struct *work)
10111026
tmp = nf_ct_tuplehash_to_ctrack(h);
10121027

10131028
scanned++;
1029+
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
1030+
nf_ct_offload_timeout(tmp);
1031+
continue;
1032+
}
1033+
10141034
if (nf_ct_is_expired(tmp)) {
10151035
nf_ct_gc_expired(tmp);
10161036
expired_count++;

net/netfilter/nf_conntrack_netlink.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,14 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
11101110
.len = NF_CT_LABELS_MAX_SIZE },
11111111
};
11121112

1113+
static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
1114+
{
1115+
if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
1116+
return 0;
1117+
1118+
return ctnetlink_filter_match(ct, data);
1119+
}
1120+
11131121
static int ctnetlink_flush_conntrack(struct net *net,
11141122
const struct nlattr * const cda[],
11151123
u32 portid, int report)
@@ -1122,7 +1130,7 @@ static int ctnetlink_flush_conntrack(struct net *net,
11221130
return PTR_ERR(filter);
11231131
}
11241132

1125-
nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
1133+
nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
11261134
portid, report);
11271135
kfree(filter);
11281136

@@ -1168,6 +1176,11 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
11681176

11691177
ct = nf_ct_tuplehash_to_ctrack(h);
11701178

1179+
if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
1180+
nf_ct_put(ct);
1181+
return -EBUSY;
1182+
}
1183+
11711184
if (cda[CTA_ID]) {
11721185
u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
11731186
if (id != (u32)(unsigned long)ct) {

net/netfilter/nf_conntrack_proto_tcp.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,9 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
305305
/* Print out the private part of the conntrack. */
306306
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
307307
{
308+
if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
309+
return;
310+
308311
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
309312
}
310313
#endif

net/netfilter/nf_conntrack_standalone.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
309309
WARN_ON(!l4proto);
310310

311311
ret = -ENOSPC;
312-
seq_printf(s, "%-8s %u %-8s %u %ld ",
312+
seq_printf(s, "%-8s %u %-8s %u ",
313313
l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
314-
l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
315-
nf_ct_expires(ct) / HZ);
314+
l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
315+
316+
if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
317+
seq_printf(s, "%ld ", nf_ct_expires(ct) / HZ);
316318

317319
if (l4proto->print_conntrack)
318320
l4proto->print_conntrack(s, ct);
@@ -339,7 +341,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
339341
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
340342
goto release;
341343

342-
if (test_bit(IPS_ASSURED_BIT, &ct->status))
344+
if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
345+
seq_puts(s, "[OFFLOAD] ");
346+
else if (test_bit(IPS_ASSURED_BIT, &ct->status))
343347
seq_puts(s, "[ASSURED] ");
344348

345349
if (seq_has_overflowed(s))

0 commit comments

Comments
 (0)