1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Unstable Conntrack Helpers for XDP and TC-BPF hook
4 * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
5 * allowed to break compatibility for these functions since the interface they
6 * are exposed through to BPF programs is explicitly unstable.
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/mutex.h>
14 #include <linux/types.h>
15 #include <linux/btf_ids.h>
16 #include <linux/net_namespace.h>
18 #include <net/netfilter/nf_conntrack_bpf.h>
19 #include <net/netfilter/nf_conntrack_core.h>
21 /* bpf_ct_opts - Options for CT lookup helpers
24 * @netns_id - Specify the network namespace for lookup
26 * BPF_F_CURRENT_NETNS (-1)
27 * Use namespace associated with ctx (xdp_md, __sk_buff)
29 * Network Namespace ID
30 * @error - Out parameter, set for any errors encountered
32 * -EINVAL - Passed NULL for bpf_tuple pointer
33 * -EINVAL - opts->reserved is not 0
34 * -EINVAL - netns_id is less than -1
35 * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
36 * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
37 * -ENONET - No network namespace found for netns_id
38 * -ENOENT - Conntrack lookup could not find entry for tuple
39 * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
40 * or sizeof(tuple->ipv6)
41 * @l4proto - Layer 4 protocol
43 * IPPROTO_TCP, IPPROTO_UDP
44 * @dir: - connection tracking tuple direction.
45 * @reserved - Reserved member, will be reused for more options in future
58 NF_BPF_CT_OPTS_SZ = 12,
61 static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
62 u32 tuple_len, u8 protonum, u8 dir,
63 struct nf_conntrack_tuple *tuple)
65 union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
66 union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
67 union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
69 union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
70 : (void *)&tuple->dst.u;
72 if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
75 memset(tuple, 0, sizeof(*tuple));
78 case sizeof(bpf_tuple->ipv4):
79 tuple->src.l3num = AF_INET;
80 src->ip = bpf_tuple->ipv4.saddr;
81 sport->tcp.port = bpf_tuple->ipv4.sport;
82 dst->ip = bpf_tuple->ipv4.daddr;
83 dport->tcp.port = bpf_tuple->ipv4.dport;
85 case sizeof(bpf_tuple->ipv6):
86 tuple->src.l3num = AF_INET6;
87 memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
88 sport->tcp.port = bpf_tuple->ipv6.sport;
89 memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
90 dport->tcp.port = bpf_tuple->ipv6.dport;
95 tuple->dst.protonum = protonum;
101 static struct nf_conn *
102 __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
103 u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
106 struct nf_conntrack_tuple otuple, rtuple;
110 if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
111 opts_len != NF_BPF_CT_OPTS_SZ)
112 return ERR_PTR(-EINVAL);
114 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
115 return ERR_PTR(-EINVAL);
117 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
118 IP_CT_DIR_ORIGINAL, &otuple);
122 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
123 IP_CT_DIR_REPLY, &rtuple);
127 if (opts->netns_id >= 0) {
128 net = get_net_ns_by_id(net, opts->netns_id);
130 return ERR_PTR(-ENONET);
133 ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
138 memset(&ct->proto, 0, sizeof(ct->proto));
139 __nf_ct_set_timeout(ct, timeout * HZ);
142 if (opts->netns_id >= 0)
148 static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
149 struct bpf_sock_tuple *bpf_tuple,
150 u32 tuple_len, struct bpf_ct_opts *opts,
153 struct nf_conntrack_tuple_hash *hash;
154 struct nf_conntrack_tuple tuple;
158 if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
159 opts_len != NF_BPF_CT_OPTS_SZ)
160 return ERR_PTR(-EINVAL);
161 if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
162 return ERR_PTR(-EPROTO);
163 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
164 return ERR_PTR(-EINVAL);
166 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
167 IP_CT_DIR_ORIGINAL, &tuple);
171 if (opts->netns_id >= 0) {
172 net = get_net_ns_by_id(net, opts->netns_id);
174 return ERR_PTR(-ENONET);
177 hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
178 if (opts->netns_id >= 0)
181 return ERR_PTR(-ENOENT);
183 ct = nf_ct_tuplehash_to_ctrack(hash);
184 opts->dir = NF_CT_DIRECTION(hash);
189 BTF_ID_LIST(btf_nf_conn_ids)
190 BTF_ID(struct, nf_conn)
191 BTF_ID(struct, nf_conn___init)
193 /* Check writes into `struct nf_conn` */
194 static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
195 const struct bpf_reg_state *reg,
198 const struct btf_type *ncit, *nct, *t;
201 ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
202 nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
203 t = btf_type_by_id(reg->btf, reg->btf_id);
204 if (t != nct && t != ncit) {
205 bpf_log(log, "only read is supported\n");
209 /* `struct nf_conn` and `struct nf_conn___init` have the same layout
210 * so we are safe to simply merge offset checks here
213 #if defined(CONFIG_NF_CONNTRACK_MARK)
214 case offsetof(struct nf_conn, mark):
215 end = offsetofend(struct nf_conn, mark);
219 bpf_log(log, "no write support to nf_conn at off %d\n", off);
223 if (off + size > end) {
225 "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
233 __bpf_kfunc_start_defs();
235 /* bpf_xdp_ct_alloc - Allocate a new CT entry
238 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
240 * @bpf_tuple - Pointer to memory representing the tuple to look up
242 * @tuple__sz - Length of the tuple structure
243 * Must be one of sizeof(bpf_tuple->ipv4) or
244 * sizeof(bpf_tuple->ipv6)
245 * @opts - Additional options for allocation (documented above)
247 * @opts__sz - Length of the bpf_ct_opts structure
248 * Must be NF_BPF_CT_OPTS_SZ (12)
250 __bpf_kfunc struct nf_conn___init *
251 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
252 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
254 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
255 struct nf_conn *nfct;
257 nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
261 opts->error = PTR_ERR(nfct);
265 return (struct nf_conn___init *)nfct;
268 /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
272 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
274 * @bpf_tuple - Pointer to memory representing the tuple to look up
276 * @tuple__sz - Length of the tuple structure
277 * Must be one of sizeof(bpf_tuple->ipv4) or
278 * sizeof(bpf_tuple->ipv6)
279 * @opts - Additional options for lookup (documented above)
281 * @opts__sz - Length of the bpf_ct_opts structure
282 * Must be NF_BPF_CT_OPTS_SZ (12)
284 __bpf_kfunc struct nf_conn *
285 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
286 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
288 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
289 struct net *caller_net;
290 struct nf_conn *nfct;
292 caller_net = dev_net(ctx->rxq->dev);
293 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
296 opts->error = PTR_ERR(nfct);
302 /* bpf_skb_ct_alloc - Allocate a new CT entry
305 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
307 * @bpf_tuple - Pointer to memory representing the tuple to look up
309 * @tuple__sz - Length of the tuple structure
310 * Must be one of sizeof(bpf_tuple->ipv4) or
311 * sizeof(bpf_tuple->ipv6)
312 * @opts - Additional options for allocation (documented above)
314 * @opts__sz - Length of the bpf_ct_opts structure
315 * Must be NF_BPF_CT_OPTS_SZ (12)
317 __bpf_kfunc struct nf_conn___init *
318 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
319 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
321 struct sk_buff *skb = (struct sk_buff *)skb_ctx;
322 struct nf_conn *nfct;
325 net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
326 nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
329 opts->error = PTR_ERR(nfct);
333 return (struct nf_conn___init *)nfct;
336 /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
340 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
342 * @bpf_tuple - Pointer to memory representing the tuple to look up
344 * @tuple__sz - Length of the tuple structure
345 * Must be one of sizeof(bpf_tuple->ipv4) or
346 * sizeof(bpf_tuple->ipv6)
347 * @opts - Additional options for lookup (documented above)
349 * @opts__sz - Length of the bpf_ct_opts structure
350 * Must be NF_BPF_CT_OPTS_SZ (12)
352 __bpf_kfunc struct nf_conn *
353 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
354 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
356 struct sk_buff *skb = (struct sk_buff *)skb_ctx;
357 struct net *caller_net;
358 struct nf_conn *nfct;
360 caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
361 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
364 opts->error = PTR_ERR(nfct);
370 /* bpf_ct_insert_entry - Add the provided entry into a CT map
372 * This must be invoked for referenced PTR_TO_BTF_ID.
374 * @nfct - Pointer to referenced nf_conn___init object, obtained
375 * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
377 __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
379 struct nf_conn *nfct = (struct nf_conn *)nfct_i;
382 if (!nf_ct_is_confirmed(nfct))
383 nfct->timeout += nfct_time_stamp;
384 nfct->status |= IPS_CONFIRMED;
385 err = nf_conntrack_hash_check_insert(nfct);
387 nf_conntrack_free(nfct);
393 /* bpf_ct_release - Release acquired nf_conn object
395 * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
396 * the program if any references remain in the program in all of the explored
400 * @nf_conn - Pointer to referenced nf_conn object, obtained using
401 * bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
403 __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
408 /* bpf_ct_set_timeout - Set timeout of allocated nf_conn
410 * Sets the default timeout of newly allocated nf_conn before insertion.
411 * This helper must be invoked for refcounted pointer to nf_conn___init.
414 * @nfct - Pointer to referenced nf_conn object, obtained using
415 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
416 * @timeout - Timeout in msecs.
418 __bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
420 __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
423 /* bpf_ct_change_timeout - Change timeout of inserted nf_conn
425 * Change timeout associated of the inserted or looked up nf_conn.
426 * This helper must be invoked for refcounted pointer to nf_conn.
429 * @nfct - Pointer to referenced nf_conn object, obtained using
430 * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
431 * @timeout - New timeout in msecs.
433 __bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
435 return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
438 /* bpf_ct_set_status - Set status field of allocated nf_conn
440 * Set the status field of the newly allocated nf_conn before insertion.
441 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
444 * @nfct - Pointer to referenced nf_conn object, obtained using
445 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
446 * @status - New status value.
448 __bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
450 return nf_ct_change_status_common((struct nf_conn *)nfct, status);
453 /* bpf_ct_change_status - Change status of inserted nf_conn
455 * Change the status field of the provided connection tracking entry.
456 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
459 * @nfct - Pointer to referenced nf_conn object, obtained using
460 * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
461 * @status - New status value.
463 __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
465 return nf_ct_change_status_common(nfct, status);
468 __bpf_kfunc_end_defs();
470 BTF_SET8_START(nf_ct_kfunc_set)
471 BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
472 BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
473 BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
474 BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
475 BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
476 BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
477 BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
478 BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
479 BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
480 BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
481 BTF_SET8_END(nf_ct_kfunc_set)
483 static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
484 .owner = THIS_MODULE,
485 .set = &nf_ct_kfunc_set,
488 int register_nf_conntrack_bpf(void)
492 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
493 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
495 mutex_lock(&nf_conn_btf_access_lock);
496 nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
497 mutex_unlock(&nf_conn_btf_access_lock);
503 void cleanup_nf_conntrack_bpf(void)
505 mutex_lock(&nf_conn_btf_access_lock);
506 nfct_btf_struct_access = NULL;
507 mutex_unlock(&nf_conn_btf_access_lock);