GNU Linux-libre 5.19.9-gnu
[releases.git] / net / netfilter / nf_flow_table_core.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
7 #include <linux/netdevice.h>
8 #include <net/ip.h>
9 #include <net/ip6_route.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_flow_table.h>
12 #include <net/netfilter/nf_conntrack.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_l4proto.h>
15 #include <net/netfilter/nf_conntrack_tuple.h>
16
17 static DEFINE_MUTEX(flowtable_lock);
18 static LIST_HEAD(flowtables);
19
20 static void
21 flow_offload_fill_dir(struct flow_offload *flow,
22                       enum flow_offload_tuple_dir dir)
23 {
24         struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
25         struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
26
27         ft->dir = dir;
28
29         switch (ctt->src.l3num) {
30         case NFPROTO_IPV4:
31                 ft->src_v4 = ctt->src.u3.in;
32                 ft->dst_v4 = ctt->dst.u3.in;
33                 break;
34         case NFPROTO_IPV6:
35                 ft->src_v6 = ctt->src.u3.in6;
36                 ft->dst_v6 = ctt->dst.u3.in6;
37                 break;
38         }
39
40         ft->l3proto = ctt->src.l3num;
41         ft->l4proto = ctt->dst.protonum;
42
43         switch (ctt->dst.protonum) {
44         case IPPROTO_TCP:
45         case IPPROTO_UDP:
46                 ft->src_port = ctt->src.u.tcp.port;
47                 ft->dst_port = ctt->dst.u.tcp.port;
48                 break;
49         }
50 }
51
52 struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
53 {
54         struct flow_offload *flow;
55
56         if (unlikely(nf_ct_is_dying(ct) ||
57             !refcount_inc_not_zero(&ct->ct_general.use)))
58                 return NULL;
59
60         flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
61         if (!flow)
62                 goto err_ct_refcnt;
63
64         flow->ct = ct;
65
66         flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
67         flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
68
69         if (ct->status & IPS_SRC_NAT)
70                 __set_bit(NF_FLOW_SNAT, &flow->flags);
71         if (ct->status & IPS_DST_NAT)
72                 __set_bit(NF_FLOW_DNAT, &flow->flags);
73
74         return flow;
75
76 err_ct_refcnt:
77         nf_ct_put(ct);
78
79         return NULL;
80 }
81 EXPORT_SYMBOL_GPL(flow_offload_alloc);
82
83 static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
84 {
85         const struct rt6_info *rt;
86
87         if (flow_tuple->l3proto == NFPROTO_IPV6) {
88                 rt = (const struct rt6_info *)flow_tuple->dst_cache;
89                 return rt6_get_cookie(rt);
90         }
91
92         return 0;
93 }
94
95 static int flow_offload_fill_route(struct flow_offload *flow,
96                                    const struct nf_flow_route *route,
97                                    enum flow_offload_tuple_dir dir)
98 {
99         struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
100         struct dst_entry *dst = route->tuple[dir].dst;
101         int i, j = 0;
102
103         switch (flow_tuple->l3proto) {
104         case NFPROTO_IPV4:
105                 flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
106                 break;
107         case NFPROTO_IPV6:
108                 flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
109                 break;
110         }
111
112         flow_tuple->iifidx = route->tuple[dir].in.ifindex;
113         for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
114                 flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
115                 flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
116                 if (route->tuple[dir].in.ingress_vlans & BIT(i))
117                         flow_tuple->in_vlan_ingress |= BIT(j);
118                 j++;
119         }
120         flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
121
122         switch (route->tuple[dir].xmit_type) {
123         case FLOW_OFFLOAD_XMIT_DIRECT:
124                 memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
125                        ETH_ALEN);
126                 memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
127                        ETH_ALEN);
128                 flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
129                 flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
130                 break;
131         case FLOW_OFFLOAD_XMIT_XFRM:
132         case FLOW_OFFLOAD_XMIT_NEIGH:
133                 if (!dst_hold_safe(route->tuple[dir].dst))
134                         return -1;
135
136                 flow_tuple->dst_cache = dst;
137                 flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
138                 break;
139         default:
140                 WARN_ON_ONCE(1);
141                 break;
142         }
143         flow_tuple->xmit_type = route->tuple[dir].xmit_type;
144
145         return 0;
146 }
147
148 static void nft_flow_dst_release(struct flow_offload *flow,
149                                  enum flow_offload_tuple_dir dir)
150 {
151         if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
152             flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
153                 dst_release(flow->tuplehash[dir].tuple.dst_cache);
154 }
155
156 int flow_offload_route_init(struct flow_offload *flow,
157                             const struct nf_flow_route *route)
158 {
159         int err;
160
161         err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
162         if (err < 0)
163                 return err;
164
165         err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
166         if (err < 0)
167                 goto err_route_reply;
168
169         flow->type = NF_FLOW_OFFLOAD_ROUTE;
170
171         return 0;
172
173 err_route_reply:
174         nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
175
176         return err;
177 }
178 EXPORT_SYMBOL_GPL(flow_offload_route_init);
179
180 static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
181 {
182         tcp->seen[0].td_maxwin = 0;
183         tcp->seen[1].td_maxwin = 0;
184 }
185
186 static void flow_offload_fixup_ct(struct nf_conn *ct)
187 {
188         struct net *net = nf_ct_net(ct);
189         int l4num = nf_ct_protonum(ct);
190         s32 timeout;
191
192         if (l4num == IPPROTO_TCP) {
193                 struct nf_tcp_net *tn = nf_tcp_pernet(net);
194
195                 flow_offload_fixup_tcp(&ct->proto.tcp);
196
197                 timeout = tn->timeouts[ct->proto.tcp.state];
198                 timeout -= tn->offload_timeout;
199         } else if (l4num == IPPROTO_UDP) {
200                 struct nf_udp_net *tn = nf_udp_pernet(net);
201
202                 timeout = tn->timeouts[UDP_CT_REPLIED];
203                 timeout -= tn->offload_timeout;
204         } else {
205                 return;
206         }
207
208         if (timeout < 0)
209                 timeout = 0;
210
211         if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
212                 WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
213 }
214
215 static void flow_offload_route_release(struct flow_offload *flow)
216 {
217         nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
218         nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
219 }
220
221 void flow_offload_free(struct flow_offload *flow)
222 {
223         switch (flow->type) {
224         case NF_FLOW_OFFLOAD_ROUTE:
225                 flow_offload_route_release(flow);
226                 break;
227         default:
228                 break;
229         }
230         nf_ct_put(flow->ct);
231         kfree_rcu(flow, rcu_head);
232 }
233 EXPORT_SYMBOL_GPL(flow_offload_free);
234
235 static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
236 {
237         const struct flow_offload_tuple *tuple = data;
238
239         return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
240 }
241
242 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
243 {
244         const struct flow_offload_tuple_rhash *tuplehash = data;
245
246         return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
247 }
248
249 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
250                                         const void *ptr)
251 {
252         const struct flow_offload_tuple *tuple = arg->key;
253         const struct flow_offload_tuple_rhash *x = ptr;
254
255         if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
256                 return 1;
257
258         return 0;
259 }
260
261 static const struct rhashtable_params nf_flow_offload_rhash_params = {
262         .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
263         .hashfn                 = flow_offload_hash,
264         .obj_hashfn             = flow_offload_hash_obj,
265         .obj_cmpfn              = flow_offload_hash_cmp,
266         .automatic_shrinking    = true,
267 };
268
269 unsigned long flow_offload_get_timeout(struct flow_offload *flow)
270 {
271         unsigned long timeout = NF_FLOW_TIMEOUT;
272         struct net *net = nf_ct_net(flow->ct);
273         int l4num = nf_ct_protonum(flow->ct);
274
275         if (l4num == IPPROTO_TCP) {
276                 struct nf_tcp_net *tn = nf_tcp_pernet(net);
277
278                 timeout = tn->offload_timeout;
279         } else if (l4num == IPPROTO_UDP) {
280                 struct nf_udp_net *tn = nf_udp_pernet(net);
281
282                 timeout = tn->offload_timeout;
283         }
284
285         return timeout;
286 }
287
288 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
289 {
290         int err;
291
292         flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
293
294         err = rhashtable_insert_fast(&flow_table->rhashtable,
295                                      &flow->tuplehash[0].node,
296                                      nf_flow_offload_rhash_params);
297         if (err < 0)
298                 return err;
299
300         err = rhashtable_insert_fast(&flow_table->rhashtable,
301                                      &flow->tuplehash[1].node,
302                                      nf_flow_offload_rhash_params);
303         if (err < 0) {
304                 rhashtable_remove_fast(&flow_table->rhashtable,
305                                        &flow->tuplehash[0].node,
306                                        nf_flow_offload_rhash_params);
307                 return err;
308         }
309
310         nf_ct_offload_timeout(flow->ct);
311
312         if (nf_flowtable_hw_offload(flow_table)) {
313                 __set_bit(NF_FLOW_HW, &flow->flags);
314                 nf_flow_offload_add(flow_table, flow);
315         }
316
317         return 0;
318 }
319 EXPORT_SYMBOL_GPL(flow_offload_add);
320
321 void flow_offload_refresh(struct nf_flowtable *flow_table,
322                           struct flow_offload *flow)
323 {
324         u32 timeout;
325
326         timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
327         if (timeout - READ_ONCE(flow->timeout) > HZ)
328                 WRITE_ONCE(flow->timeout, timeout);
329         else
330                 return;
331
332         if (likely(!nf_flowtable_hw_offload(flow_table)))
333                 return;
334
335         nf_flow_offload_add(flow_table, flow);
336 }
337 EXPORT_SYMBOL_GPL(flow_offload_refresh);
338
339 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
340 {
341         return nf_flow_timeout_delta(flow->timeout) <= 0;
342 }
343
344 static void flow_offload_del(struct nf_flowtable *flow_table,
345                              struct flow_offload *flow)
346 {
347         rhashtable_remove_fast(&flow_table->rhashtable,
348                                &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
349                                nf_flow_offload_rhash_params);
350         rhashtable_remove_fast(&flow_table->rhashtable,
351                                &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
352                                nf_flow_offload_rhash_params);
353         flow_offload_free(flow);
354 }
355
356 void flow_offload_teardown(struct flow_offload *flow)
357 {
358         clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
359         set_bit(NF_FLOW_TEARDOWN, &flow->flags);
360         flow_offload_fixup_ct(flow->ct);
361 }
362 EXPORT_SYMBOL_GPL(flow_offload_teardown);
363
364 struct flow_offload_tuple_rhash *
365 flow_offload_lookup(struct nf_flowtable *flow_table,
366                     struct flow_offload_tuple *tuple)
367 {
368         struct flow_offload_tuple_rhash *tuplehash;
369         struct flow_offload *flow;
370         int dir;
371
372         tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
373                                       nf_flow_offload_rhash_params);
374         if (!tuplehash)
375                 return NULL;
376
377         dir = tuplehash->tuple.dir;
378         flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
379         if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
380                 return NULL;
381
382         if (unlikely(nf_ct_is_dying(flow->ct)))
383                 return NULL;
384
385         return tuplehash;
386 }
387 EXPORT_SYMBOL_GPL(flow_offload_lookup);
388
389 static int
390 nf_flow_table_iterate(struct nf_flowtable *flow_table,
391                       void (*iter)(struct nf_flowtable *flowtable,
392                                    struct flow_offload *flow, void *data),
393                       void *data)
394 {
395         struct flow_offload_tuple_rhash *tuplehash;
396         struct rhashtable_iter hti;
397         struct flow_offload *flow;
398         int err = 0;
399
400         rhashtable_walk_enter(&flow_table->rhashtable, &hti);
401         rhashtable_walk_start(&hti);
402
403         while ((tuplehash = rhashtable_walk_next(&hti))) {
404                 if (IS_ERR(tuplehash)) {
405                         if (PTR_ERR(tuplehash) != -EAGAIN) {
406                                 err = PTR_ERR(tuplehash);
407                                 break;
408                         }
409                         continue;
410                 }
411                 if (tuplehash->tuple.dir)
412                         continue;
413
414                 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
415
416                 iter(flow_table, flow, data);
417         }
418         rhashtable_walk_stop(&hti);
419         rhashtable_walk_exit(&hti);
420
421         return err;
422 }
423
424 static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
425                                     struct flow_offload *flow, void *data)
426 {
427         if (nf_flow_has_expired(flow) ||
428             nf_ct_is_dying(flow->ct))
429                 flow_offload_teardown(flow);
430
431         if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
432                 if (test_bit(NF_FLOW_HW, &flow->flags)) {
433                         if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
434                                 nf_flow_offload_del(flow_table, flow);
435                         else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
436                                 flow_offload_del(flow_table, flow);
437                 } else {
438                         flow_offload_del(flow_table, flow);
439                 }
440         } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
441                 nf_flow_offload_stats(flow_table, flow);
442         }
443 }
444
445 void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
446 {
447         nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
448 }
449
450 static void nf_flow_offload_work_gc(struct work_struct *work)
451 {
452         struct nf_flowtable *flow_table;
453
454         flow_table = container_of(work, struct nf_flowtable, gc_work.work);
455         nf_flow_table_gc_run(flow_table);
456         queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
457 }
458
459 static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
460                                  __be16 port, __be16 new_port)
461 {
462         struct tcphdr *tcph;
463
464         tcph = (void *)(skb_network_header(skb) + thoff);
465         inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
466 }
467
468 static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
469                                  __be16 port, __be16 new_port)
470 {
471         struct udphdr *udph;
472
473         udph = (void *)(skb_network_header(skb) + thoff);
474         if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
475                 inet_proto_csum_replace2(&udph->check, skb, port,
476                                          new_port, false);
477                 if (!udph->check)
478                         udph->check = CSUM_MANGLED_0;
479         }
480 }
481
482 static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
483                              u8 protocol, __be16 port, __be16 new_port)
484 {
485         switch (protocol) {
486         case IPPROTO_TCP:
487                 nf_flow_nat_port_tcp(skb, thoff, port, new_port);
488                 break;
489         case IPPROTO_UDP:
490                 nf_flow_nat_port_udp(skb, thoff, port, new_port);
491                 break;
492         }
493 }
494
495 void nf_flow_snat_port(const struct flow_offload *flow,
496                        struct sk_buff *skb, unsigned int thoff,
497                        u8 protocol, enum flow_offload_tuple_dir dir)
498 {
499         struct flow_ports *hdr;
500         __be16 port, new_port;
501
502         hdr = (void *)(skb_network_header(skb) + thoff);
503
504         switch (dir) {
505         case FLOW_OFFLOAD_DIR_ORIGINAL:
506                 port = hdr->source;
507                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
508                 hdr->source = new_port;
509                 break;
510         case FLOW_OFFLOAD_DIR_REPLY:
511                 port = hdr->dest;
512                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
513                 hdr->dest = new_port;
514                 break;
515         }
516
517         nf_flow_nat_port(skb, thoff, protocol, port, new_port);
518 }
519 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
520
521 void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
522                        unsigned int thoff, u8 protocol,
523                        enum flow_offload_tuple_dir dir)
524 {
525         struct flow_ports *hdr;
526         __be16 port, new_port;
527
528         hdr = (void *)(skb_network_header(skb) + thoff);
529
530         switch (dir) {
531         case FLOW_OFFLOAD_DIR_ORIGINAL:
532                 port = hdr->dest;
533                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
534                 hdr->dest = new_port;
535                 break;
536         case FLOW_OFFLOAD_DIR_REPLY:
537                 port = hdr->source;
538                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
539                 hdr->source = new_port;
540                 break;
541         }
542
543         nf_flow_nat_port(skb, thoff, protocol, port, new_port);
544 }
545 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
546
547 int nf_flow_table_init(struct nf_flowtable *flowtable)
548 {
549         int err;
550
551         INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
552         flow_block_init(&flowtable->flow_block);
553         init_rwsem(&flowtable->flow_block_lock);
554
555         err = rhashtable_init(&flowtable->rhashtable,
556                               &nf_flow_offload_rhash_params);
557         if (err < 0)
558                 return err;
559
560         queue_delayed_work(system_power_efficient_wq,
561                            &flowtable->gc_work, HZ);
562
563         mutex_lock(&flowtable_lock);
564         list_add(&flowtable->list, &flowtables);
565         mutex_unlock(&flowtable_lock);
566
567         return 0;
568 }
569 EXPORT_SYMBOL_GPL(nf_flow_table_init);
570
571 static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table,
572                                      struct flow_offload *flow, void *data)
573 {
574         struct net_device *dev = data;
575
576         if (!dev) {
577                 flow_offload_teardown(flow);
578                 return;
579         }
580
581         if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
582             (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
583              flow->tuplehash[1].tuple.iifidx == dev->ifindex))
584                 flow_offload_teardown(flow);
585 }
586
587 void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
588                               struct net_device *dev)
589 {
590         nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
591         flush_delayed_work(&flowtable->gc_work);
592         nf_flow_table_offload_flush(flowtable);
593 }
594
595 void nf_flow_table_cleanup(struct net_device *dev)
596 {
597         struct nf_flowtable *flowtable;
598
599         mutex_lock(&flowtable_lock);
600         list_for_each_entry(flowtable, &flowtables, list)
601                 nf_flow_table_gc_cleanup(flowtable, dev);
602         mutex_unlock(&flowtable_lock);
603 }
604 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
605
606 void nf_flow_table_free(struct nf_flowtable *flow_table)
607 {
608         mutex_lock(&flowtable_lock);
609         list_del(&flow_table->list);
610         mutex_unlock(&flowtable_lock);
611
612         cancel_delayed_work_sync(&flow_table->gc_work);
613         nf_flow_table_offload_flush(flow_table);
614         /* ... no more pending work after this stage ... */
615         nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
616         nf_flow_table_gc_run(flow_table);
617         nf_flow_table_offload_flush_cleanup(flow_table);
618         rhashtable_destroy(&flow_table->rhashtable);
619 }
620 EXPORT_SYMBOL_GPL(nf_flow_table_free);
621
622 static int __init nf_flow_table_module_init(void)
623 {
624         return nf_flow_table_offload_init();
625 }
626
627 static void __exit nf_flow_table_module_exit(void)
628 {
629         nf_flow_table_offload_exit();
630 }
631
632 module_init(nf_flow_table_module_init);
633 module_exit(nf_flow_table_module_exit);
634
635 MODULE_LICENSE("GPL");
636 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
637 MODULE_DESCRIPTION("Netfilter flow table module");