GNU Linux-libre 6.8.9-gnu
[releases.git] / net / sched / act_nat.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Stateless NAT actions
4  *
5  * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
6  */
7
8 #include <linux/errno.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <linux/netfilter.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/skbuff.h>
15 #include <linux/slab.h>
16 #include <linux/spinlock.h>
17 #include <linux/string.h>
18 #include <linux/tc_act/tc_nat.h>
19 #include <net/act_api.h>
20 #include <net/pkt_cls.h>
21 #include <net/icmp.h>
22 #include <net/ip.h>
23 #include <net/netlink.h>
24 #include <net/tc_act/tc_nat.h>
25 #include <net/tcp.h>
26 #include <net/udp.h>
27 #include <net/tc_wrapper.h>
28
29 static struct tc_action_ops act_nat_ops;
30
31 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
32         [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
33 };
34
35 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
36                         struct tc_action **a, struct tcf_proto *tp,
37                         u32 flags, struct netlink_ext_ack *extack)
38 {
39         struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id);
40         bool bind = flags & TCA_ACT_FLAGS_BIND;
41         struct tcf_nat_parms *nparm, *oparm;
42         struct nlattr *tb[TCA_NAT_MAX + 1];
43         struct tcf_chain *goto_ch = NULL;
44         struct tc_nat *parm;
45         int ret = 0, err;
46         struct tcf_nat *p;
47         u32 index;
48
49         if (nla == NULL)
50                 return -EINVAL;
51
52         err = nla_parse_nested_deprecated(tb, TCA_NAT_MAX, nla, nat_policy,
53                                           NULL);
54         if (err < 0)
55                 return err;
56
57         if (tb[TCA_NAT_PARMS] == NULL)
58                 return -EINVAL;
59         parm = nla_data(tb[TCA_NAT_PARMS]);
60         index = parm->index;
61         err = tcf_idr_check_alloc(tn, &index, a, bind);
62         if (!err) {
63                 ret = tcf_idr_create_from_flags(tn, index, est, a, &act_nat_ops,
64                                                 bind, flags);
65                 if (ret) {
66                         tcf_idr_cleanup(tn, index);
67                         return ret;
68                 }
69                 ret = ACT_P_CREATED;
70         } else if (err > 0) {
71                 if (bind)
72                         return ACT_P_BOUND;
73                 if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
74                         tcf_idr_release(*a, bind);
75                         return -EEXIST;
76                 }
77         } else {
78                 return err;
79         }
80         err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
81         if (err < 0)
82                 goto release_idr;
83
84         nparm = kzalloc(sizeof(*nparm), GFP_KERNEL);
85         if (!nparm) {
86                 err = -ENOMEM;
87                 goto release_idr;
88         }
89
90         nparm->old_addr = parm->old_addr;
91         nparm->new_addr = parm->new_addr;
92         nparm->mask = parm->mask;
93         nparm->flags = parm->flags;
94
95         p = to_tcf_nat(*a);
96
97         spin_lock_bh(&p->tcf_lock);
98         goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
99         oparm = rcu_replace_pointer(p->parms, nparm, lockdep_is_held(&p->tcf_lock));
100         spin_unlock_bh(&p->tcf_lock);
101
102         if (goto_ch)
103                 tcf_chain_put_by_act(goto_ch);
104
105         if (oparm)
106                 kfree_rcu(oparm, rcu);
107
108         return ret;
109 release_idr:
110         tcf_idr_release(*a, bind);
111         return err;
112 }
113
114 TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
115                                   const struct tc_action *a,
116                                   struct tcf_result *res)
117 {
118         struct tcf_nat *p = to_tcf_nat(a);
119         struct tcf_nat_parms *parms;
120         struct iphdr *iph;
121         __be32 old_addr;
122         __be32 new_addr;
123         __be32 mask;
124         __be32 addr;
125         int egress;
126         int action;
127         int ihl;
128         int noff;
129
130         tcf_lastuse_update(&p->tcf_tm);
131         tcf_action_update_bstats(&p->common, skb);
132
133         action = READ_ONCE(p->tcf_action);
134
135         parms = rcu_dereference_bh(p->parms);
136         old_addr = parms->old_addr;
137         new_addr = parms->new_addr;
138         mask = parms->mask;
139         egress = parms->flags & TCA_NAT_FLAG_EGRESS;
140
141         if (unlikely(action == TC_ACT_SHOT))
142                 goto drop;
143
144         noff = skb_network_offset(skb);
145         if (!pskb_may_pull(skb, sizeof(*iph) + noff))
146                 goto drop;
147
148         iph = ip_hdr(skb);
149
150         if (egress)
151                 addr = iph->saddr;
152         else
153                 addr = iph->daddr;
154
155         if (!((old_addr ^ addr) & mask)) {
156                 if (skb_try_make_writable(skb, sizeof(*iph) + noff))
157                         goto drop;
158
159                 new_addr &= mask;
160                 new_addr |= addr & ~mask;
161
162                 /* Rewrite IP header */
163                 iph = ip_hdr(skb);
164                 if (egress)
165                         iph->saddr = new_addr;
166                 else
167                         iph->daddr = new_addr;
168
169                 csum_replace4(&iph->check, addr, new_addr);
170         } else if ((iph->frag_off & htons(IP_OFFSET)) ||
171                    iph->protocol != IPPROTO_ICMP) {
172                 goto out;
173         }
174
175         ihl = iph->ihl * 4;
176
177         /* It would be nice to share code with stateful NAT. */
178         switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
179         case IPPROTO_TCP:
180         {
181                 struct tcphdr *tcph;
182
183                 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
184                     skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
185                         goto drop;
186
187                 tcph = (void *)(skb_network_header(skb) + ihl);
188                 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr,
189                                          true);
190                 break;
191         }
192         case IPPROTO_UDP:
193         {
194                 struct udphdr *udph;
195
196                 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
197                     skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
198                         goto drop;
199
200                 udph = (void *)(skb_network_header(skb) + ihl);
201                 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
202                         inet_proto_csum_replace4(&udph->check, skb, addr,
203                                                  new_addr, true);
204                         if (!udph->check)
205                                 udph->check = CSUM_MANGLED_0;
206                 }
207                 break;
208         }
209         case IPPROTO_ICMP:
210         {
211                 struct icmphdr *icmph;
212
213                 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff))
214                         goto drop;
215
216                 icmph = (void *)(skb_network_header(skb) + ihl);
217
218                 if (!icmp_is_err(icmph->type))
219                         break;
220
221                 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) +
222                                         noff))
223                         goto drop;
224
225                 icmph = (void *)(skb_network_header(skb) + ihl);
226                 iph = (void *)(icmph + 1);
227                 if (egress)
228                         addr = iph->daddr;
229                 else
230                         addr = iph->saddr;
231
232                 if ((old_addr ^ addr) & mask)
233                         break;
234
235                 if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
236                                           sizeof(*iph) + noff))
237                         goto drop;
238
239                 icmph = (void *)(skb_network_header(skb) + ihl);
240                 iph = (void *)(icmph + 1);
241
242                 new_addr &= mask;
243                 new_addr |= addr & ~mask;
244
245                 /* XXX Fix up the inner checksums. */
246                 if (egress)
247                         iph->daddr = new_addr;
248                 else
249                         iph->saddr = new_addr;
250
251                 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
252                                          false);
253                 break;
254         }
255         default:
256                 break;
257         }
258
259 out:
260         return action;
261
262 drop:
263         tcf_action_inc_drop_qstats(&p->common);
264         return TC_ACT_SHOT;
265 }
266
267 static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
268                         int bind, int ref)
269 {
270         unsigned char *b = skb_tail_pointer(skb);
271         struct tcf_nat *p = to_tcf_nat(a);
272         struct tc_nat opt = {
273                 .index    = p->tcf_index,
274                 .refcnt   = refcount_read(&p->tcf_refcnt) - ref,
275                 .bindcnt  = atomic_read(&p->tcf_bindcnt) - bind,
276         };
277         struct tcf_nat_parms *parms;
278         struct tcf_t t;
279
280         spin_lock_bh(&p->tcf_lock);
281
282         opt.action = p->tcf_action;
283
284         parms = rcu_dereference_protected(p->parms, lockdep_is_held(&p->tcf_lock));
285
286         opt.old_addr = parms->old_addr;
287         opt.new_addr = parms->new_addr;
288         opt.mask = parms->mask;
289         opt.flags = parms->flags;
290
291         if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
292                 goto nla_put_failure;
293
294         tcf_tm_dump(&t, &p->tcf_tm);
295         if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
296                 goto nla_put_failure;
297         spin_unlock_bh(&p->tcf_lock);
298
299         return skb->len;
300
301 nla_put_failure:
302         spin_unlock_bh(&p->tcf_lock);
303         nlmsg_trim(skb, b);
304         return -1;
305 }
306
307 static void tcf_nat_cleanup(struct tc_action *a)
308 {
309         struct tcf_nat *p = to_tcf_nat(a);
310         struct tcf_nat_parms *parms;
311
312         parms = rcu_dereference_protected(p->parms, 1);
313         if (parms)
314                 kfree_rcu(parms, rcu);
315 }
316
317 static struct tc_action_ops act_nat_ops = {
318         .kind           =       "nat",
319         .id             =       TCA_ID_NAT,
320         .owner          =       THIS_MODULE,
321         .act            =       tcf_nat_act,
322         .dump           =       tcf_nat_dump,
323         .init           =       tcf_nat_init,
324         .cleanup        =       tcf_nat_cleanup,
325         .size           =       sizeof(struct tcf_nat),
326 };
327
328 static __net_init int nat_init_net(struct net *net)
329 {
330         struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id);
331
332         return tc_action_net_init(net, tn, &act_nat_ops);
333 }
334
335 static void __net_exit nat_exit_net(struct list_head *net_list)
336 {
337         tc_action_net_exit(net_list, act_nat_ops.net_id);
338 }
339
340 static struct pernet_operations nat_net_ops = {
341         .init = nat_init_net,
342         .exit_batch = nat_exit_net,
343         .id   = &act_nat_ops.net_id,
344         .size = sizeof(struct tc_action_net),
345 };
346
347 MODULE_DESCRIPTION("Stateless NAT actions");
348 MODULE_LICENSE("GPL");
349
350 static int __init nat_init_module(void)
351 {
352         return tcf_register_action(&act_nat_ops, &nat_net_ops);
353 }
354
355 static void __exit nat_cleanup_module(void)
356 {
357         tcf_unregister_action(&act_nat_ops, &nat_net_ops);
358 }
359
360 module_init(nat_init_module);
361 module_exit(nat_cleanup_module);