|
| 1 | +#include <linux/kernel.h> |
| 2 | +#include <linux/module.h> |
| 3 | +#include <linux/init.h> |
| 4 | +#include <linux/netlink.h> |
| 5 | +#include <linux/netfilter.h> |
| 6 | +#include <linux/workqueue.h> |
| 7 | +#include <linux/spinlock.h> |
| 8 | +#include <linux/netfilter/nf_tables.h> |
| 9 | +#include <net/ip.h> /* for ipv4 options. */ |
| 10 | +#include <net/netfilter/nf_tables.h> |
| 11 | +#include <net/netfilter/nf_tables_core.h> |
| 12 | +#include <net/netfilter/nf_conntrack_core.h> |
| 13 | +#include <linux/netfilter/nf_conntrack_common.h> |
| 14 | +#include <net/netfilter/nf_flow_table.h> |
| 15 | + |
| 16 | +struct nft_flow_offload { |
| 17 | + struct nft_flowtable *flowtable; |
| 18 | +}; |
| 19 | + |
| 20 | +static int nft_flow_route(const struct nft_pktinfo *pkt, |
| 21 | + const struct nf_conn *ct, |
| 22 | + struct nf_flow_route *route, |
| 23 | + enum ip_conntrack_dir dir) |
| 24 | +{ |
| 25 | + struct dst_entry *this_dst = skb_dst(pkt->skb); |
| 26 | + struct dst_entry *other_dst = NULL; |
| 27 | + struct flowi fl; |
| 28 | + |
| 29 | + memset(&fl, 0, sizeof(fl)); |
| 30 | + switch (nft_pf(pkt)) { |
| 31 | + case NFPROTO_IPV4: |
| 32 | + fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; |
| 33 | + break; |
| 34 | + case NFPROTO_IPV6: |
| 35 | + fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; |
| 36 | + break; |
| 37 | + } |
| 38 | + |
| 39 | + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); |
| 40 | + if (!other_dst) |
| 41 | + return -ENOENT; |
| 42 | + |
| 43 | + route->tuple[dir].dst = this_dst; |
| 44 | + route->tuple[dir].ifindex = nft_in(pkt)->ifindex; |
| 45 | + route->tuple[!dir].dst = other_dst; |
| 46 | + route->tuple[!dir].ifindex = nft_out(pkt)->ifindex; |
| 47 | + |
| 48 | + return 0; |
| 49 | +} |
| 50 | + |
| 51 | +static bool nft_flow_offload_skip(struct sk_buff *skb) |
| 52 | +{ |
| 53 | + struct ip_options *opt = &(IPCB(skb)->opt); |
| 54 | + |
| 55 | + if (unlikely(opt->optlen)) |
| 56 | + return true; |
| 57 | + if (skb_sec_path(skb)) |
| 58 | + return true; |
| 59 | + |
| 60 | + return false; |
| 61 | +} |
| 62 | + |
| 63 | +static void nft_flow_offload_eval(const struct nft_expr *expr, |
| 64 | + struct nft_regs *regs, |
| 65 | + const struct nft_pktinfo *pkt) |
| 66 | +{ |
| 67 | + struct nft_flow_offload *priv = nft_expr_priv(expr); |
| 68 | + struct nf_flowtable *flowtable = &priv->flowtable->data; |
| 69 | + enum ip_conntrack_info ctinfo; |
| 70 | + struct nf_flow_route route; |
| 71 | + struct flow_offload *flow; |
| 72 | + enum ip_conntrack_dir dir; |
| 73 | + struct nf_conn *ct; |
| 74 | + int ret; |
| 75 | + |
| 76 | + if (nft_flow_offload_skip(pkt->skb)) |
| 77 | + goto out; |
| 78 | + |
| 79 | + ct = nf_ct_get(pkt->skb, &ctinfo); |
| 80 | + if (!ct) |
| 81 | + goto out; |
| 82 | + |
| 83 | + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { |
| 84 | + case IPPROTO_TCP: |
| 85 | + case IPPROTO_UDP: |
| 86 | + break; |
| 87 | + default: |
| 88 | + goto out; |
| 89 | + } |
| 90 | + |
| 91 | + if (test_bit(IPS_HELPER_BIT, &ct->status)) |
| 92 | + goto out; |
| 93 | + |
| 94 | + if (ctinfo == IP_CT_NEW || |
| 95 | + ctinfo == IP_CT_RELATED) |
| 96 | + goto out; |
| 97 | + |
| 98 | + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) |
| 99 | + goto out; |
| 100 | + |
| 101 | + dir = CTINFO2DIR(ctinfo); |
| 102 | + if (nft_flow_route(pkt, ct, &route, dir) < 0) |
| 103 | + goto err_flow_route; |
| 104 | + |
| 105 | + flow = flow_offload_alloc(ct, &route); |
| 106 | + if (!flow) |
| 107 | + goto err_flow_alloc; |
| 108 | + |
| 109 | + ret = flow_offload_add(flowtable, flow); |
| 110 | + if (ret < 0) |
| 111 | + goto err_flow_add; |
| 112 | + |
| 113 | + return; |
| 114 | + |
| 115 | +err_flow_add: |
| 116 | + flow_offload_free(flow); |
| 117 | +err_flow_alloc: |
| 118 | + dst_release(route.tuple[!dir].dst); |
| 119 | +err_flow_route: |
| 120 | + clear_bit(IPS_OFFLOAD_BIT, &ct->status); |
| 121 | +out: |
| 122 | + regs->verdict.code = NFT_BREAK; |
| 123 | +} |
| 124 | + |
| 125 | +static int nft_flow_offload_validate(const struct nft_ctx *ctx, |
| 126 | + const struct nft_expr *expr, |
| 127 | + const struct nft_data **data) |
| 128 | +{ |
| 129 | + unsigned int hook_mask = (1 << NF_INET_FORWARD); |
| 130 | + |
| 131 | + return nft_chain_validate_hooks(ctx->chain, hook_mask); |
| 132 | +} |
| 133 | + |
| 134 | +static int nft_flow_offload_init(const struct nft_ctx *ctx, |
| 135 | + const struct nft_expr *expr, |
| 136 | + const struct nlattr * const tb[]) |
| 137 | +{ |
| 138 | + struct nft_flow_offload *priv = nft_expr_priv(expr); |
| 139 | + u8 genmask = nft_genmask_next(ctx->net); |
| 140 | + struct nft_flowtable *flowtable; |
| 141 | + |
| 142 | + if (!tb[NFTA_FLOW_TABLE_NAME]) |
| 143 | + return -EINVAL; |
| 144 | + |
| 145 | + flowtable = nf_tables_flowtable_lookup(ctx->table, |
| 146 | + tb[NFTA_FLOW_TABLE_NAME], |
| 147 | + genmask); |
| 148 | + if (IS_ERR(flowtable)) |
| 149 | + return PTR_ERR(flowtable); |
| 150 | + |
| 151 | + priv->flowtable = flowtable; |
| 152 | + flowtable->use++; |
| 153 | + |
| 154 | + return nf_ct_netns_get(ctx->net, ctx->afi->family); |
| 155 | +} |
| 156 | + |
| 157 | +static void nft_flow_offload_destroy(const struct nft_ctx *ctx, |
| 158 | + const struct nft_expr *expr) |
| 159 | +{ |
| 160 | + struct nft_flow_offload *priv = nft_expr_priv(expr); |
| 161 | + |
| 162 | + priv->flowtable->use--; |
| 163 | + nf_ct_netns_put(ctx->net, ctx->afi->family); |
| 164 | +} |
| 165 | + |
| 166 | +static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) |
| 167 | +{ |
| 168 | + struct nft_flow_offload *priv = nft_expr_priv(expr); |
| 169 | + |
| 170 | + if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name)) |
| 171 | + goto nla_put_failure; |
| 172 | + |
| 173 | + return 0; |
| 174 | + |
| 175 | +nla_put_failure: |
| 176 | + return -1; |
| 177 | +} |
| 178 | + |
| 179 | +static struct nft_expr_type nft_flow_offload_type; |
| 180 | +static const struct nft_expr_ops nft_flow_offload_ops = { |
| 181 | + .type = &nft_flow_offload_type, |
| 182 | + .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), |
| 183 | + .eval = nft_flow_offload_eval, |
| 184 | + .init = nft_flow_offload_init, |
| 185 | + .destroy = nft_flow_offload_destroy, |
| 186 | + .validate = nft_flow_offload_validate, |
| 187 | + .dump = nft_flow_offload_dump, |
| 188 | +}; |
| 189 | + |
| 190 | +static struct nft_expr_type nft_flow_offload_type __read_mostly = { |
| 191 | + .name = "flow_offload", |
| 192 | + .ops = &nft_flow_offload_ops, |
| 193 | + .maxattr = NFTA_FLOW_MAX, |
| 194 | + .owner = THIS_MODULE, |
| 195 | +}; |
| 196 | + |
| 197 | +static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data) |
| 198 | +{ |
| 199 | + struct net_device *dev = data; |
| 200 | + |
| 201 | + if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex) |
| 202 | + return; |
| 203 | + |
| 204 | + flow_offload_dead(flow); |
| 205 | +} |
| 206 | + |
| 207 | +static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable, |
| 208 | + void *data) |
| 209 | +{ |
| 210 | + nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data); |
| 211 | +} |
| 212 | + |
| 213 | +static int flow_offload_netdev_event(struct notifier_block *this, |
| 214 | + unsigned long event, void *ptr) |
| 215 | +{ |
| 216 | + struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
| 217 | + |
| 218 | + if (event != NETDEV_DOWN) |
| 219 | + return NOTIFY_DONE; |
| 220 | + |
| 221 | + nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev); |
| 222 | + |
| 223 | + return NOTIFY_DONE; |
| 224 | +} |
| 225 | + |
| 226 | +static struct notifier_block flow_offload_netdev_notifier = { |
| 227 | + .notifier_call = flow_offload_netdev_event, |
| 228 | +}; |
| 229 | + |
| 230 | +static int __init nft_flow_offload_module_init(void) |
| 231 | +{ |
| 232 | + int err; |
| 233 | + |
| 234 | + register_netdevice_notifier(&flow_offload_netdev_notifier); |
| 235 | + |
| 236 | + err = nft_register_expr(&nft_flow_offload_type); |
| 237 | + if (err < 0) |
| 238 | + goto register_expr; |
| 239 | + |
| 240 | + return 0; |
| 241 | + |
| 242 | +register_expr: |
| 243 | + unregister_netdevice_notifier(&flow_offload_netdev_notifier); |
| 244 | + return err; |
| 245 | +} |
| 246 | + |
| 247 | +static void __exit nft_flow_offload_module_exit(void) |
| 248 | +{ |
| 249 | + struct net *net; |
| 250 | + |
| 251 | + nft_unregister_expr(&nft_flow_offload_type); |
| 252 | + unregister_netdevice_notifier(&flow_offload_netdev_notifier); |
| 253 | + rtnl_lock(); |
| 254 | + for_each_net(net) |
| 255 | + nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL); |
| 256 | + rtnl_unlock(); |
| 257 | +} |
| 258 | + |
| 259 | +module_init(nft_flow_offload_module_init); |
| 260 | +module_exit(nft_flow_offload_module_exit); |
| 261 | + |
| 262 | +MODULE_LICENSE("GPL"); |
| 263 | +MODULE_AUTHOR( "Pablo Neira Ayuso <[email protected]>"); |
| 264 | +MODULE_ALIAS_NFT_EXPR("flow_offload"); |
0 commit comments