GNU Linux-libre 6.9.2-gnu
[releases.git] / net / netfilter / nfnetlink_queue.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * This is a module which is used for queueing packets and communicating with
4  * userspace via nfnetlink.
5  *
6  * (C) 2005 by Harald Welte <laforge@netfilter.org>
7  * (C) 2007 by Patrick McHardy <kaber@trash.net>
8  *
9  * Based on the old ipv4-only ip_queue.c:
10  * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
11  * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
12  */
13
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16 #include <linux/module.h>
17 #include <linux/skbuff.h>
18 #include <linux/init.h>
19 #include <linux/spinlock.h>
20 #include <linux/slab.h>
21 #include <linux/notifier.h>
22 #include <linux/netdevice.h>
23 #include <linux/netfilter.h>
24 #include <linux/proc_fs.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <linux/netfilter_bridge.h>
28 #include <linux/netfilter/nfnetlink.h>
29 #include <linux/netfilter/nfnetlink_queue.h>
30 #include <linux/netfilter/nf_conntrack_common.h>
31 #include <linux/list.h>
32 #include <linux/cgroup-defs.h>
33 #include <net/gso.h>
34 #include <net/sock.h>
35 #include <net/tcp_states.h>
36 #include <net/netfilter/nf_queue.h>
37 #include <net/netns/generic.h>
38
39 #include <linux/atomic.h>
40
41 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
42 #include "../bridge/br_private.h"
43 #endif
44
45 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
46 #include <net/netfilter/nf_conntrack.h>
47 #endif
48
49 #define NFQNL_QMAX_DEFAULT 1024
50
51 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
52  * includes the header length. Thus, the maximum packet length that we
53  * support is 65531 bytes. We send truncated packets if the specified length
54  * is larger than that.  Userspace can check for presence of NFQA_CAP_LEN
55  * attribute to detect truncation.
56  */
57 #define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
58
59 struct nfqnl_instance {
60         struct hlist_node hlist;                /* global list of queues */
61         struct rcu_head rcu;
62
63         u32 peer_portid;
64         unsigned int queue_maxlen;
65         unsigned int copy_range;
66         unsigned int queue_dropped;
67         unsigned int queue_user_dropped;
68
69
70         u_int16_t queue_num;                    /* number of this queue */
71         u_int8_t copy_mode;
72         u_int32_t flags;                        /* Set using NFQA_CFG_FLAGS */
73 /*
74  * Following fields are dirtied for each queued packet,
75  * keep them in same cache line if possible.
76  */
77         spinlock_t      lock    ____cacheline_aligned_in_smp;
78         unsigned int    queue_total;
79         unsigned int    id_sequence;            /* 'sequence' of pkt ids */
80         struct list_head queue_list;            /* packets in queue */
81 };
82
83 typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
84
85 static unsigned int nfnl_queue_net_id __read_mostly;
86
87 #define INSTANCE_BUCKETS        16
88 struct nfnl_queue_net {
89         spinlock_t instances_lock;
90         struct hlist_head instance_table[INSTANCE_BUCKETS];
91 };
92
93 static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
94 {
95         return net_generic(net, nfnl_queue_net_id);
96 }
97
98 static inline u_int8_t instance_hashfn(u_int16_t queue_num)
99 {
100         return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
101 }
102
103 static struct nfqnl_instance *
104 instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
105 {
106         struct hlist_head *head;
107         struct nfqnl_instance *inst;
108
109         head = &q->instance_table[instance_hashfn(queue_num)];
110         hlist_for_each_entry_rcu(inst, head, hlist) {
111                 if (inst->queue_num == queue_num)
112                         return inst;
113         }
114         return NULL;
115 }
116
117 static struct nfqnl_instance *
118 instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
119 {
120         struct nfqnl_instance *inst;
121         unsigned int h;
122         int err;
123
124         spin_lock(&q->instances_lock);
125         if (instance_lookup(q, queue_num)) {
126                 err = -EEXIST;
127                 goto out_unlock;
128         }
129
130         inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
131         if (!inst) {
132                 err = -ENOMEM;
133                 goto out_unlock;
134         }
135
136         inst->queue_num = queue_num;
137         inst->peer_portid = portid;
138         inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
139         inst->copy_range = NFQNL_MAX_COPY_RANGE;
140         inst->copy_mode = NFQNL_COPY_NONE;
141         spin_lock_init(&inst->lock);
142         INIT_LIST_HEAD(&inst->queue_list);
143
144         if (!try_module_get(THIS_MODULE)) {
145                 err = -EAGAIN;
146                 goto out_free;
147         }
148
149         h = instance_hashfn(queue_num);
150         hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
151
152         spin_unlock(&q->instances_lock);
153
154         return inst;
155
156 out_free:
157         kfree(inst);
158 out_unlock:
159         spin_unlock(&q->instances_lock);
160         return ERR_PTR(err);
161 }
162
163 static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
164                         unsigned long data);
165
166 static void
167 instance_destroy_rcu(struct rcu_head *head)
168 {
169         struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
170                                                    rcu);
171
172         nfqnl_flush(inst, NULL, 0);
173         kfree(inst);
174         module_put(THIS_MODULE);
175 }
176
177 static void
178 __instance_destroy(struct nfqnl_instance *inst)
179 {
180         hlist_del_rcu(&inst->hlist);
181         call_rcu(&inst->rcu, instance_destroy_rcu);
182 }
183
184 static void
185 instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
186 {
187         spin_lock(&q->instances_lock);
188         __instance_destroy(inst);
189         spin_unlock(&q->instances_lock);
190 }
191
192 static inline void
193 __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
194 {
195        list_add_tail(&entry->list, &queue->queue_list);
196        queue->queue_total++;
197 }
198
199 static void
200 __dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
201 {
202         list_del(&entry->list);
203         queue->queue_total--;
204 }
205
206 static struct nf_queue_entry *
207 find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
208 {
209         struct nf_queue_entry *entry = NULL, *i;
210
211         spin_lock_bh(&queue->lock);
212
213         list_for_each_entry(i, &queue->queue_list, list) {
214                 if (i->id == id) {
215                         entry = i;
216                         break;
217                 }
218         }
219
220         if (entry)
221                 __dequeue_entry(queue, entry);
222
223         spin_unlock_bh(&queue->lock);
224
225         return entry;
226 }
227
228 static unsigned int nf_iterate(struct sk_buff *skb,
229                                struct nf_hook_state *state,
230                                const struct nf_hook_entries *hooks,
231                                unsigned int *index)
232 {
233         const struct nf_hook_entry *hook;
234         unsigned int verdict, i = *index;
235
236         while (i < hooks->num_hook_entries) {
237                 hook = &hooks->hooks[i];
238 repeat:
239                 verdict = nf_hook_entry_hookfn(hook, skb, state);
240                 if (verdict != NF_ACCEPT) {
241                         *index = i;
242                         if (verdict != NF_REPEAT)
243                                 return verdict;
244                         goto repeat;
245                 }
246                 i++;
247         }
248
249         *index = i;
250         return NF_ACCEPT;
251 }
252
253 static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
254 {
255         switch (pf) {
256 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
257         case NFPROTO_BRIDGE:
258                 return rcu_dereference(net->nf.hooks_bridge[hooknum]);
259 #endif
260         case NFPROTO_IPV4:
261                 return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
262         case NFPROTO_IPV6:
263                 return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
264         default:
265                 WARN_ON_ONCE(1);
266                 return NULL;
267         }
268
269         return NULL;
270 }
271
272 static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
273 {
274 #ifdef CONFIG_INET
275         const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
276
277         if (entry->state.hook == NF_INET_LOCAL_OUT) {
278                 const struct iphdr *iph = ip_hdr(skb);
279
280                 if (!(iph->tos == rt_info->tos &&
281                       skb->mark == rt_info->mark &&
282                       iph->daddr == rt_info->daddr &&
283                       iph->saddr == rt_info->saddr))
284                         return ip_route_me_harder(entry->state.net, entry->state.sk,
285                                                   skb, RTN_UNSPEC);
286         }
287 #endif
288         return 0;
289 }
290
291 static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
292 {
293         const struct nf_ipv6_ops *v6ops;
294         int ret = 0;
295
296         switch (entry->state.pf) {
297         case AF_INET:
298                 ret = nf_ip_reroute(skb, entry);
299                 break;
300         case AF_INET6:
301                 v6ops = rcu_dereference(nf_ipv6_ops);
302                 if (v6ops)
303                         ret = v6ops->reroute(skb, entry);
304                 break;
305         }
306         return ret;
307 }
308
309 /* caller must hold rcu read-side lock */
310 static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
311 {
312         const struct nf_hook_entry *hook_entry;
313         const struct nf_hook_entries *hooks;
314         struct sk_buff *skb = entry->skb;
315         const struct net *net;
316         unsigned int i;
317         int err;
318         u8 pf;
319
320         net = entry->state.net;
321         pf = entry->state.pf;
322
323         hooks = nf_hook_entries_head(net, pf, entry->state.hook);
324
325         i = entry->hook_index;
326         if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
327                 kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP);
328                 nf_queue_entry_free(entry);
329                 return;
330         }
331
332         hook_entry = &hooks->hooks[i];
333
334         /* Continue traversal iff userspace said ok... */
335         if (verdict == NF_REPEAT)
336                 verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
337
338         if (verdict == NF_ACCEPT) {
339                 if (nf_reroute(skb, entry) < 0)
340                         verdict = NF_DROP;
341         }
342
343         if (verdict == NF_ACCEPT) {
344 next_hook:
345                 ++i;
346                 verdict = nf_iterate(skb, &entry->state, hooks, &i);
347         }
348
349         switch (verdict & NF_VERDICT_MASK) {
350         case NF_ACCEPT:
351         case NF_STOP:
352                 local_bh_disable();
353                 entry->state.okfn(entry->state.net, entry->state.sk, skb);
354                 local_bh_enable();
355                 break;
356         case NF_QUEUE:
357                 err = nf_queue(skb, &entry->state, i, verdict);
358                 if (err == 1)
359                         goto next_hook;
360                 break;
361         case NF_STOLEN:
362                 break;
363         default:
364                 kfree_skb(skb);
365         }
366
367         nf_queue_entry_free(entry);
368 }
369
370 static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
371 {
372         const struct nf_ct_hook *ct_hook;
373
374         if (verdict == NF_ACCEPT ||
375             verdict == NF_REPEAT ||
376             verdict == NF_STOP) {
377                 unsigned int ct_verdict = verdict;
378
379                 rcu_read_lock();
380                 ct_hook = rcu_dereference(nf_ct_hook);
381                 if (ct_hook)
382                         ct_verdict = ct_hook->update(entry->state.net, entry->skb);
383                 rcu_read_unlock();
384
385                 switch (ct_verdict & NF_VERDICT_MASK) {
386                 case NF_ACCEPT:
387                         /* follow userspace verdict, could be REPEAT */
388                         break;
389                 case NF_STOLEN:
390                         nf_queue_entry_free(entry);
391                         return;
392                 default:
393                         verdict = ct_verdict & NF_VERDICT_MASK;
394                         break;
395                 }
396         }
397         nf_reinject(entry, verdict);
398 }
399
400 static void
401 nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
402 {
403         struct nf_queue_entry *entry, *next;
404
405         spin_lock_bh(&queue->lock);
406         list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
407                 if (!cmpfn || cmpfn(entry, data)) {
408                         list_del(&entry->list);
409                         queue->queue_total--;
410                         nfqnl_reinject(entry, NF_DROP);
411                 }
412         }
413         spin_unlock_bh(&queue->lock);
414 }
415
416 static int
417 nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
418                       bool csum_verify)
419 {
420         __u32 flags = 0;
421
422         if (packet->ip_summed == CHECKSUM_PARTIAL)
423                 flags = NFQA_SKB_CSUMNOTREADY;
424         else if (csum_verify)
425                 flags = NFQA_SKB_CSUM_NOTVERIFIED;
426
427         if (skb_is_gso(packet))
428                 flags |= NFQA_SKB_GSO;
429
430         return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
431 }
432
433 static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
434 {
435         const struct cred *cred;
436
437         if (!sk_fullsock(sk))
438                 return 0;
439
440         read_lock_bh(&sk->sk_callback_lock);
441         if (sk->sk_socket && sk->sk_socket->file) {
442                 cred = sk->sk_socket->file->f_cred;
443                 if (nla_put_be32(skb, NFQA_UID,
444                     htonl(from_kuid_munged(&init_user_ns, cred->fsuid))))
445                         goto nla_put_failure;
446                 if (nla_put_be32(skb, NFQA_GID,
447                     htonl(from_kgid_munged(&init_user_ns, cred->fsgid))))
448                         goto nla_put_failure;
449         }
450         read_unlock_bh(&sk->sk_callback_lock);
451         return 0;
452
453 nla_put_failure:
454         read_unlock_bh(&sk->sk_callback_lock);
455         return -1;
456 }
457
458 static int nfqnl_put_sk_classid(struct sk_buff *skb, struct sock *sk)
459 {
460 #if IS_ENABLED(CONFIG_CGROUP_NET_CLASSID)
461         if (sk && sk_fullsock(sk)) {
462                 u32 classid = sock_cgroup_classid(&sk->sk_cgrp_data);
463
464                 if (classid && nla_put_be32(skb, NFQA_CGROUP_CLASSID, htonl(classid)))
465                         return -1;
466         }
467 #endif
468         return 0;
469 }
470
471 static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
472 {
473         u32 seclen = 0;
474 #if IS_ENABLED(CONFIG_NETWORK_SECMARK)
475         if (!skb || !sk_fullsock(skb->sk))
476                 return 0;
477
478         read_lock_bh(&skb->sk->sk_callback_lock);
479
480         if (skb->secmark)
481                 security_secid_to_secctx(skb->secmark, secdata, &seclen);
482
483         read_unlock_bh(&skb->sk->sk_callback_lock);
484 #endif
485         return seclen;
486 }
487
488 static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry)
489 {
490         struct sk_buff *entskb = entry->skb;
491         u32 nlalen = 0;
492
493         if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
494                 return 0;
495
496         if (skb_vlan_tag_present(entskb))
497                 nlalen += nla_total_size(nla_total_size(sizeof(__be16)) +
498                                          nla_total_size(sizeof(__be16)));
499
500         if (entskb->network_header > entskb->mac_header)
501                 nlalen += nla_total_size((entskb->network_header -
502                                           entskb->mac_header));
503
504         return nlalen;
505 }
506
507 static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb)
508 {
509         struct sk_buff *entskb = entry->skb;
510
511         if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
512                 return 0;
513
514         if (skb_vlan_tag_present(entskb)) {
515                 struct nlattr *nest;
516
517                 nest = nla_nest_start(skb, NFQA_VLAN);
518                 if (!nest)
519                         goto nla_put_failure;
520
521                 if (nla_put_be16(skb, NFQA_VLAN_TCI, htons(entskb->vlan_tci)) ||
522                     nla_put_be16(skb, NFQA_VLAN_PROTO, entskb->vlan_proto))
523                         goto nla_put_failure;
524
525                 nla_nest_end(skb, nest);
526         }
527
528         if (entskb->mac_header < entskb->network_header) {
529                 int len = (int)(entskb->network_header - entskb->mac_header);
530
531                 if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb)))
532                         goto nla_put_failure;
533         }
534
535         return 0;
536
537 nla_put_failure:
538         return -1;
539 }
540
541 static struct sk_buff *
542 nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
543                            struct nf_queue_entry *entry,
544                            __be32 **packet_id_ptr)
545 {
546         size_t size;
547         size_t data_len = 0, cap_len = 0;
548         unsigned int hlen = 0;
549         struct sk_buff *skb;
550         struct nlattr *nla;
551         struct nfqnl_msg_packet_hdr *pmsg;
552         struct nlmsghdr *nlh;
553         struct sk_buff *entskb = entry->skb;
554         struct net_device *indev;
555         struct net_device *outdev;
556         struct nf_conn *ct = NULL;
557         enum ip_conntrack_info ctinfo = 0;
558         const struct nfnl_ct_hook *nfnl_ct;
559         bool csum_verify;
560         char *secdata = NULL;
561         u32 seclen = 0;
562         ktime_t tstamp;
563
564         size = nlmsg_total_size(sizeof(struct nfgenmsg))
565                 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
566                 + nla_total_size(sizeof(u_int32_t))     /* ifindex */
567                 + nla_total_size(sizeof(u_int32_t))     /* ifindex */
568 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
569                 + nla_total_size(sizeof(u_int32_t))     /* ifindex */
570                 + nla_total_size(sizeof(u_int32_t))     /* ifindex */
571 #endif
572                 + nla_total_size(sizeof(u_int32_t))     /* mark */
573                 + nla_total_size(sizeof(u_int32_t))     /* priority */
574                 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
575                 + nla_total_size(sizeof(u_int32_t))     /* skbinfo */
576 #if IS_ENABLED(CONFIG_CGROUP_NET_CLASSID)
577                 + nla_total_size(sizeof(u_int32_t))     /* classid */
578 #endif
579                 + nla_total_size(sizeof(u_int32_t));    /* cap_len */
580
581         tstamp = skb_tstamp_cond(entskb, false);
582         if (tstamp)
583                 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
584
585         size += nfqnl_get_bridge_size(entry);
586
587         if (entry->state.hook <= NF_INET_FORWARD ||
588            (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
589                 csum_verify = !skb_csum_unnecessary(entskb);
590         else
591                 csum_verify = false;
592
593         outdev = entry->state.out;
594
595         switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) {
596         case NFQNL_COPY_META:
597         case NFQNL_COPY_NONE:
598                 break;
599
600         case NFQNL_COPY_PACKET:
601                 if (!(queue->flags & NFQA_CFG_F_GSO) &&
602                     entskb->ip_summed == CHECKSUM_PARTIAL &&
603                     skb_checksum_help(entskb))
604                         return NULL;
605
606                 data_len = READ_ONCE(queue->copy_range);
607                 if (data_len > entskb->len)
608                         data_len = entskb->len;
609
610                 hlen = skb_zerocopy_headlen(entskb);
611                 hlen = min_t(unsigned int, hlen, data_len);
612                 size += sizeof(struct nlattr) + hlen;
613                 cap_len = entskb->len;
614                 break;
615         }
616
617         nfnl_ct = rcu_dereference(nfnl_ct_hook);
618
619 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
620         if (queue->flags & NFQA_CFG_F_CONNTRACK) {
621                 if (nfnl_ct != NULL) {
622                         ct = nf_ct_get(entskb, &ctinfo);
623                         if (ct != NULL)
624                                 size += nfnl_ct->build_size(ct);
625                 }
626         }
627 #endif
628
629         if (queue->flags & NFQA_CFG_F_UID_GID) {
630                 size += (nla_total_size(sizeof(u_int32_t))      /* uid */
631                         + nla_total_size(sizeof(u_int32_t)));   /* gid */
632         }
633
634         if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
635                 seclen = nfqnl_get_sk_secctx(entskb, &secdata);
636                 if (seclen)
637                         size += nla_total_size(seclen);
638         }
639
640         skb = alloc_skb(size, GFP_ATOMIC);
641         if (!skb) {
642                 skb_tx_error(entskb);
643                 goto nlmsg_failure;
644         }
645
646         nlh = nfnl_msg_put(skb, 0, 0,
647                            nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
648                            0, entry->state.pf, NFNETLINK_V0,
649                            htons(queue->queue_num));
650         if (!nlh) {
651                 skb_tx_error(entskb);
652                 kfree_skb(skb);
653                 goto nlmsg_failure;
654         }
655
656         nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
657         pmsg = nla_data(nla);
658         pmsg->hw_protocol       = entskb->protocol;
659         pmsg->hook              = entry->state.hook;
660         *packet_id_ptr          = &pmsg->packet_id;
661
662         indev = entry->state.in;
663         if (indev) {
664 #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
665                 if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
666                         goto nla_put_failure;
667 #else
668                 if (entry->state.pf == PF_BRIDGE) {
669                         /* Case 1: indev is physical input device, we need to
670                          * look for bridge group (when called from
671                          * netfilter_bridge) */
672                         if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
673                                          htonl(indev->ifindex)) ||
674                         /* this is the bridge group "brX" */
675                         /* rcu_read_lock()ed by __nf_queue */
676                             nla_put_be32(skb, NFQA_IFINDEX_INDEV,
677                                          htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
678                                 goto nla_put_failure;
679                 } else {
680                         int physinif;
681
682                         /* Case 2: indev is bridge group, we need to look for
683                          * physical device (when called from ipv4) */
684                         if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
685                                          htonl(indev->ifindex)))
686                                 goto nla_put_failure;
687
688                         physinif = nf_bridge_get_physinif(entskb);
689                         if (physinif &&
690                             nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
691                                          htonl(physinif)))
692                                 goto nla_put_failure;
693                 }
694 #endif
695         }
696
697         if (outdev) {
698 #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
699                 if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
700                         goto nla_put_failure;
701 #else
702                 if (entry->state.pf == PF_BRIDGE) {
703                         /* Case 1: outdev is physical output device, we need to
704                          * look for bridge group (when called from
705                          * netfilter_bridge) */
706                         if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
707                                          htonl(outdev->ifindex)) ||
708                         /* this is the bridge group "brX" */
709                         /* rcu_read_lock()ed by __nf_queue */
710                             nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
711                                          htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
712                                 goto nla_put_failure;
713                 } else {
714                         int physoutif;
715
716                         /* Case 2: outdev is bridge group, we need to look for
717                          * physical output device (when called from ipv4) */
718                         if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
719                                          htonl(outdev->ifindex)))
720                                 goto nla_put_failure;
721
722                         physoutif = nf_bridge_get_physoutif(entskb);
723                         if (physoutif &&
724                             nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
725                                          htonl(physoutif)))
726                                 goto nla_put_failure;
727                 }
728 #endif
729         }
730
731         if (entskb->mark &&
732             nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark)))
733                 goto nla_put_failure;
734
735         if (entskb->priority &&
736             nla_put_be32(skb, NFQA_PRIORITY, htonl(entskb->priority)))
737                 goto nla_put_failure;
738
739         if (indev && entskb->dev &&
740             skb_mac_header_was_set(entskb) &&
741             skb_mac_header_len(entskb) != 0) {
742                 struct nfqnl_msg_packet_hw phw;
743                 int len;
744
745                 memset(&phw, 0, sizeof(phw));
746                 len = dev_parse_header(entskb, phw.hw_addr);
747                 if (len) {
748                         phw.hw_addrlen = htons(len);
749                         if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw))
750                                 goto nla_put_failure;
751                 }
752         }
753
754         if (nfqnl_put_bridge(entry, skb) < 0)
755                 goto nla_put_failure;
756
757         if (entry->state.hook <= NF_INET_FORWARD && tstamp) {
758                 struct nfqnl_msg_packet_timestamp ts;
759                 struct timespec64 kts = ktime_to_timespec64(tstamp);
760
761                 ts.sec = cpu_to_be64(kts.tv_sec);
762                 ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
763
764                 if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
765                         goto nla_put_failure;
766         }
767
768         if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk &&
769             nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
770                 goto nla_put_failure;
771
772         if (nfqnl_put_sk_classid(skb, entskb->sk) < 0)
773                 goto nla_put_failure;
774
775         if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
776                 goto nla_put_failure;
777
778         if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
779                 goto nla_put_failure;
780
781         if (cap_len > data_len &&
782             nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
783                 goto nla_put_failure;
784
785         if (nfqnl_put_packet_info(skb, entskb, csum_verify))
786                 goto nla_put_failure;
787
788         if (data_len) {
789                 struct nlattr *nla;
790
791                 if (skb_tailroom(skb) < sizeof(*nla) + hlen)
792                         goto nla_put_failure;
793
794                 nla = skb_put(skb, sizeof(*nla));
795                 nla->nla_type = NFQA_PAYLOAD;
796                 nla->nla_len = nla_attr_size(data_len);
797
798                 if (skb_zerocopy(skb, entskb, data_len, hlen))
799                         goto nla_put_failure;
800         }
801
802         nlh->nlmsg_len = skb->len;
803         if (seclen)
804                 security_release_secctx(secdata, seclen);
805         return skb;
806
807 nla_put_failure:
808         skb_tx_error(entskb);
809         kfree_skb(skb);
810         net_err_ratelimited("nf_queue: error creating packet message\n");
811 nlmsg_failure:
812         if (seclen)
813                 security_release_secctx(secdata, seclen);
814         return NULL;
815 }
816
817 static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
818 {
819 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
820         static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
821         const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
822
823         if (ct && ((ct->status & flags) == IPS_DYING))
824                 return true;
825 #endif
826         return false;
827 }
828
829 static int
830 __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
831                         struct nf_queue_entry *entry)
832 {
833         struct sk_buff *nskb;
834         int err = -ENOBUFS;
835         __be32 *packet_id_ptr;
836         int failopen = 0;
837
838         nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr);
839         if (nskb == NULL) {
840                 err = -ENOMEM;
841                 goto err_out;
842         }
843         spin_lock_bh(&queue->lock);
844
845         if (nf_ct_drop_unconfirmed(entry))
846                 goto err_out_free_nskb;
847
848         if (queue->queue_total >= queue->queue_maxlen) {
849                 if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
850                         failopen = 1;
851                         err = 0;
852                 } else {
853                         queue->queue_dropped++;
854                         net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
855                                              queue->queue_total);
856                 }
857                 goto err_out_free_nskb;
858         }
859         entry->id = ++queue->id_sequence;
860         *packet_id_ptr = htonl(entry->id);
861
862         /* nfnetlink_unicast will either free the nskb or add it to a socket */
863         err = nfnetlink_unicast(nskb, net, queue->peer_portid);
864         if (err < 0) {
865                 if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
866                         failopen = 1;
867                         err = 0;
868                 } else {
869                         queue->queue_user_dropped++;
870                 }
871                 goto err_out_unlock;
872         }
873
874         __enqueue_entry(queue, entry);
875
876         spin_unlock_bh(&queue->lock);
877         return 0;
878
879 err_out_free_nskb:
880         kfree_skb(nskb);
881 err_out_unlock:
882         spin_unlock_bh(&queue->lock);
883         if (failopen)
884                 nfqnl_reinject(entry, NF_ACCEPT);
885 err_out:
886         return err;
887 }
888
889 static struct nf_queue_entry *
890 nf_queue_entry_dup(struct nf_queue_entry *e)
891 {
892         struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
893
894         if (!entry)
895                 return NULL;
896
897         if (nf_queue_entry_get_refs(entry))
898                 return entry;
899
900         kfree(entry);
901         return NULL;
902 }
903
904 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
905 /* When called from bridge netfilter, skb->data must point to MAC header
906  * before calling skb_gso_segment(). Else, original MAC header is lost
907  * and segmented skbs will be sent to wrong destination.
908  */
909 static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
910 {
911         if (nf_bridge_info_get(skb))
912                 __skb_push(skb, skb->network_header - skb->mac_header);
913 }
914
915 static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
916 {
917         if (nf_bridge_info_get(skb))
918                 __skb_pull(skb, skb->network_header - skb->mac_header);
919 }
920 #else
921 #define nf_bridge_adjust_skb_data(s) do {} while (0)
922 #define nf_bridge_adjust_segmented_data(s) do {} while (0)
923 #endif
924
925 static int
926 __nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
927                            struct sk_buff *skb, struct nf_queue_entry *entry)
928 {
929         int ret = -ENOMEM;
930         struct nf_queue_entry *entry_seg;
931
932         nf_bridge_adjust_segmented_data(skb);
933
934         if (skb->next == NULL) { /* last packet, no need to copy entry */
935                 struct sk_buff *gso_skb = entry->skb;
936                 entry->skb = skb;
937                 ret = __nfqnl_enqueue_packet(net, queue, entry);
938                 if (ret)
939                         entry->skb = gso_skb;
940                 return ret;
941         }
942
943         skb_mark_not_on_list(skb);
944
945         entry_seg = nf_queue_entry_dup(entry);
946         if (entry_seg) {
947                 entry_seg->skb = skb;
948                 ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
949                 if (ret)
950                         nf_queue_entry_free(entry_seg);
951         }
952         return ret;
953 }
954
955 static int
956 nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
957 {
958         unsigned int queued;
959         struct nfqnl_instance *queue;
960         struct sk_buff *skb, *segs, *nskb;
961         int err = -ENOBUFS;
962         struct net *net = entry->state.net;
963         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
964
965         /* rcu_read_lock()ed by nf_hook_thresh */
966         queue = instance_lookup(q, queuenum);
967         if (!queue)
968                 return -ESRCH;
969
970         if (queue->copy_mode == NFQNL_COPY_NONE)
971                 return -EINVAL;
972
973         skb = entry->skb;
974
975         switch (entry->state.pf) {
976         case NFPROTO_IPV4:
977                 skb->protocol = htons(ETH_P_IP);
978                 break;
979         case NFPROTO_IPV6:
980                 skb->protocol = htons(ETH_P_IPV6);
981                 break;
982         }
983
984         if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
985                 return __nfqnl_enqueue_packet(net, queue, entry);
986
987         nf_bridge_adjust_skb_data(skb);
988         segs = skb_gso_segment(skb, 0);
989         /* Does not use PTR_ERR to limit the number of error codes that can be
990          * returned by nf_queue.  For instance, callers rely on -ESRCH to
991          * mean 'ignore this hook'.
992          */
993         if (IS_ERR_OR_NULL(segs))
994                 goto out_err;
995         queued = 0;
996         err = 0;
997         skb_list_walk_safe(segs, segs, nskb) {
998                 if (err == 0)
999                         err = __nfqnl_enqueue_packet_gso(net, queue,
1000                                                         segs, entry);
1001                 if (err == 0)
1002                         queued++;
1003                 else
1004                         kfree_skb(segs);
1005         }
1006
1007         if (queued) {
1008                 if (err) /* some segments are already queued */
1009                         nf_queue_entry_free(entry);
1010                 kfree_skb(skb);
1011                 return 0;
1012         }
1013  out_err:
1014         nf_bridge_adjust_segmented_data(skb);
1015         return err;
1016 }
1017
1018 static int
1019 nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
1020 {
1021         struct sk_buff *nskb;
1022
1023         if (diff < 0) {
1024                 unsigned int min_len = skb_transport_offset(e->skb);
1025
1026                 if (data_len < min_len)
1027                         return -EINVAL;
1028
1029                 if (pskb_trim(e->skb, data_len))
1030                         return -ENOMEM;
1031         } else if (diff > 0) {
1032                 if (data_len > 0xFFFF)
1033                         return -EINVAL;
1034                 if (diff > skb_tailroom(e->skb)) {
1035                         nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
1036                                                diff, GFP_ATOMIC);
1037                         if (!nskb)
1038                                 return -ENOMEM;
1039                         kfree_skb(e->skb);
1040                         e->skb = nskb;
1041                 }
1042                 skb_put(e->skb, diff);
1043         }
1044         if (skb_ensure_writable(e->skb, data_len))
1045                 return -ENOMEM;
1046         skb_copy_to_linear_data(e->skb, data, data_len);
1047         e->skb->ip_summed = CHECKSUM_NONE;
1048         return 0;
1049 }
1050
1051 static int
1052 nfqnl_set_mode(struct nfqnl_instance *queue,
1053                unsigned char mode, unsigned int range)
1054 {
1055         int status = 0;
1056
1057         spin_lock_bh(&queue->lock);
1058         switch (mode) {
1059         case NFQNL_COPY_NONE:
1060         case NFQNL_COPY_META:
1061                 queue->copy_mode = mode;
1062                 queue->copy_range = 0;
1063                 break;
1064
1065         case NFQNL_COPY_PACKET:
1066                 queue->copy_mode = mode;
1067                 if (range == 0 || range > NFQNL_MAX_COPY_RANGE)
1068                         queue->copy_range = NFQNL_MAX_COPY_RANGE;
1069                 else
1070                         queue->copy_range = range;
1071                 break;
1072
1073         default:
1074                 status = -EINVAL;
1075
1076         }
1077         spin_unlock_bh(&queue->lock);
1078
1079         return status;
1080 }
1081
1082 static int
1083 dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
1084 {
1085 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1086         int physinif, physoutif;
1087
1088         physinif = nf_bridge_get_physinif(entry->skb);
1089         physoutif = nf_bridge_get_physoutif(entry->skb);
1090
1091         if (physinif == ifindex || physoutif == ifindex)
1092                 return 1;
1093 #endif
1094         if (entry->state.in)
1095                 if (entry->state.in->ifindex == ifindex)
1096                         return 1;
1097         if (entry->state.out)
1098                 if (entry->state.out->ifindex == ifindex)
1099                         return 1;
1100
1101         return 0;
1102 }
1103
1104 /* drop all packets with either indev or outdev == ifindex from all queue
1105  * instances */
1106 static void
1107 nfqnl_dev_drop(struct net *net, int ifindex)
1108 {
1109         int i;
1110         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1111
1112         rcu_read_lock();
1113
1114         for (i = 0; i < INSTANCE_BUCKETS; i++) {
1115                 struct nfqnl_instance *inst;
1116                 struct hlist_head *head = &q->instance_table[i];
1117
1118                 hlist_for_each_entry_rcu(inst, head, hlist)
1119                         nfqnl_flush(inst, dev_cmp, ifindex);
1120         }
1121
1122         rcu_read_unlock();
1123 }
1124
1125 static int
1126 nfqnl_rcv_dev_event(struct notifier_block *this,
1127                     unsigned long event, void *ptr)
1128 {
1129         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1130
1131         /* Drop any packets associated with the downed device */
1132         if (event == NETDEV_DOWN)
1133                 nfqnl_dev_drop(dev_net(dev), dev->ifindex);
1134         return NOTIFY_DONE;
1135 }
1136
1137 static struct notifier_block nfqnl_dev_notifier = {
1138         .notifier_call  = nfqnl_rcv_dev_event,
1139 };
1140
1141 static void nfqnl_nf_hook_drop(struct net *net)
1142 {
1143         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1144         int i;
1145
1146         /* This function is also called on net namespace error unwind,
1147          * when pernet_ops->init() failed and ->exit() functions of the
1148          * previous pernet_ops gets called.
1149          *
1150          * This may result in a call to nfqnl_nf_hook_drop() before
1151          * struct nfnl_queue_net was allocated.
1152          */
1153         if (!q)
1154                 return;
1155
1156         for (i = 0; i < INSTANCE_BUCKETS; i++) {
1157                 struct nfqnl_instance *inst;
1158                 struct hlist_head *head = &q->instance_table[i];
1159
1160                 hlist_for_each_entry_rcu(inst, head, hlist)
1161                         nfqnl_flush(inst, NULL, 0);
1162         }
1163 }
1164
1165 static int
1166 nfqnl_rcv_nl_event(struct notifier_block *this,
1167                    unsigned long event, void *ptr)
1168 {
1169         struct netlink_notify *n = ptr;
1170         struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
1171
1172         if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
1173                 int i;
1174
1175                 /* destroy all instances for this portid */
1176                 spin_lock(&q->instances_lock);
1177                 for (i = 0; i < INSTANCE_BUCKETS; i++) {
1178                         struct hlist_node *t2;
1179                         struct nfqnl_instance *inst;
1180                         struct hlist_head *head = &q->instance_table[i];
1181
1182                         hlist_for_each_entry_safe(inst, t2, head, hlist) {
1183                                 if (n->portid == inst->peer_portid)
1184                                         __instance_destroy(inst);
1185                         }
1186                 }
1187                 spin_unlock(&q->instances_lock);
1188         }
1189         return NOTIFY_DONE;
1190 }
1191
1192 static struct notifier_block nfqnl_rtnl_notifier = {
1193         .notifier_call  = nfqnl_rcv_nl_event,
1194 };
1195
1196 static const struct nla_policy nfqa_vlan_policy[NFQA_VLAN_MAX + 1] = {
1197         [NFQA_VLAN_TCI]         = { .type = NLA_U16},
1198         [NFQA_VLAN_PROTO]       = { .type = NLA_U16},
1199 };
1200
1201 static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
1202         [NFQA_VERDICT_HDR]      = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
1203         [NFQA_MARK]             = { .type = NLA_U32 },
1204         [NFQA_PAYLOAD]          = { .type = NLA_UNSPEC },
1205         [NFQA_CT]               = { .type = NLA_UNSPEC },
1206         [NFQA_EXP]              = { .type = NLA_UNSPEC },
1207         [NFQA_VLAN]             = { .type = NLA_NESTED },
1208         [NFQA_PRIORITY]         = { .type = NLA_U32 },
1209 };
1210
1211 static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
1212         [NFQA_VERDICT_HDR]      = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
1213         [NFQA_MARK]             = { .type = NLA_U32 },
1214         [NFQA_PRIORITY]         = { .type = NLA_U32 },
1215 };
1216
1217 static struct nfqnl_instance *
1218 verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
1219 {
1220         struct nfqnl_instance *queue;
1221
1222         queue = instance_lookup(q, queue_num);
1223         if (!queue)
1224                 return ERR_PTR(-ENODEV);
1225
1226         if (queue->peer_portid != nlportid)
1227                 return ERR_PTR(-EPERM);
1228
1229         return queue;
1230 }
1231
1232 static struct nfqnl_msg_verdict_hdr*
1233 verdicthdr_get(const struct nlattr * const nfqa[])
1234 {
1235         struct nfqnl_msg_verdict_hdr *vhdr;
1236         unsigned int verdict;
1237
1238         if (!nfqa[NFQA_VERDICT_HDR])
1239                 return NULL;
1240
1241         vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
1242         verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
1243         if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
1244                 return NULL;
1245         return vhdr;
1246 }
1247
1248 static int nfq_id_after(unsigned int id, unsigned int max)
1249 {
1250         return (int)(id - max) > 0;
1251 }
1252
1253 static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
1254                                     const struct nfnl_info *info,
1255                                     const struct nlattr * const nfqa[])
1256 {
1257         struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
1258         u16 queue_num = ntohs(info->nfmsg->res_id);
1259         struct nf_queue_entry *entry, *tmp;
1260         struct nfqnl_msg_verdict_hdr *vhdr;
1261         struct nfqnl_instance *queue;
1262         unsigned int verdict, maxid;
1263         LIST_HEAD(batch_list);
1264
1265         queue = verdict_instance_lookup(q, queue_num,
1266                                         NETLINK_CB(skb).portid);
1267         if (IS_ERR(queue))
1268                 return PTR_ERR(queue);
1269
1270         vhdr = verdicthdr_get(nfqa);
1271         if (!vhdr)
1272                 return -EINVAL;
1273
1274         verdict = ntohl(vhdr->verdict);
1275         maxid = ntohl(vhdr->id);
1276
1277         spin_lock_bh(&queue->lock);
1278
1279         list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
1280                 if (nfq_id_after(entry->id, maxid))
1281                         break;
1282                 __dequeue_entry(queue, entry);
1283                 list_add_tail(&entry->list, &batch_list);
1284         }
1285
1286         spin_unlock_bh(&queue->lock);
1287
1288         if (list_empty(&batch_list))
1289                 return -ENOENT;
1290
1291         list_for_each_entry_safe(entry, tmp, &batch_list, list) {
1292                 if (nfqa[NFQA_MARK])
1293                         entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
1294
1295                 if (nfqa[NFQA_PRIORITY])
1296                         entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
1297
1298                 nfqnl_reinject(entry, verdict);
1299         }
1300         return 0;
1301 }
1302
1303 static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct,
1304                                       const struct nlmsghdr *nlh,
1305                                       const struct nlattr * const nfqa[],
1306                                       struct nf_queue_entry *entry,
1307                                       enum ip_conntrack_info *ctinfo)
1308 {
1309 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1310         struct nf_conn *ct;
1311
1312         ct = nf_ct_get(entry->skb, ctinfo);
1313         if (ct == NULL)
1314                 return NULL;
1315
1316         if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
1317                 return NULL;
1318
1319         if (nfqa[NFQA_EXP])
1320                 nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
1321                                       NETLINK_CB(entry->skb).portid,
1322                                       nlmsg_report(nlh));
1323         return ct;
1324 #else
1325         return NULL;
1326 #endif
1327 }
1328
1329 static int nfqa_parse_bridge(struct nf_queue_entry *entry,
1330                              const struct nlattr * const nfqa[])
1331 {
1332         if (nfqa[NFQA_VLAN]) {
1333                 struct nlattr *tb[NFQA_VLAN_MAX + 1];
1334                 int err;
1335
1336                 err = nla_parse_nested_deprecated(tb, NFQA_VLAN_MAX,
1337                                                   nfqa[NFQA_VLAN],
1338                                                   nfqa_vlan_policy, NULL);
1339                 if (err < 0)
1340                         return err;
1341
1342                 if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO])
1343                         return -EINVAL;
1344
1345                 __vlan_hwaccel_put_tag(entry->skb,
1346                         nla_get_be16(tb[NFQA_VLAN_PROTO]),
1347                         ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])));
1348         }
1349
1350         if (nfqa[NFQA_L2HDR]) {
1351                 int mac_header_len = entry->skb->network_header -
1352                         entry->skb->mac_header;
1353
1354                 if (mac_header_len != nla_len(nfqa[NFQA_L2HDR]))
1355                         return -EINVAL;
1356                 else if (mac_header_len > 0)
1357                         memcpy(skb_mac_header(entry->skb),
1358                                nla_data(nfqa[NFQA_L2HDR]),
1359                                mac_header_len);
1360         }
1361
1362         return 0;
1363 }
1364
1365 static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
1366                               const struct nlattr * const nfqa[])
1367 {
1368         struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
1369         u_int16_t queue_num = ntohs(info->nfmsg->res_id);
1370         const struct nfnl_ct_hook *nfnl_ct;
1371         struct nfqnl_msg_verdict_hdr *vhdr;
1372         enum ip_conntrack_info ctinfo;
1373         struct nfqnl_instance *queue;
1374         struct nf_queue_entry *entry;
1375         struct nf_conn *ct = NULL;
1376         unsigned int verdict;
1377         int err;
1378
1379         queue = verdict_instance_lookup(q, queue_num,
1380                                         NETLINK_CB(skb).portid);
1381         if (IS_ERR(queue))
1382                 return PTR_ERR(queue);
1383
1384         vhdr = verdicthdr_get(nfqa);
1385         if (!vhdr)
1386                 return -EINVAL;
1387
1388         verdict = ntohl(vhdr->verdict);
1389
1390         entry = find_dequeue_entry(queue, ntohl(vhdr->id));
1391         if (entry == NULL)
1392                 return -ENOENT;
1393
1394         /* rcu lock already held from nfnl->call_rcu. */
1395         nfnl_ct = rcu_dereference(nfnl_ct_hook);
1396
1397         if (nfqa[NFQA_CT]) {
1398                 if (nfnl_ct != NULL)
1399                         ct = nfqnl_ct_parse(nfnl_ct, info->nlh, nfqa, entry,
1400                                             &ctinfo);
1401         }
1402
1403         if (entry->state.pf == PF_BRIDGE) {
1404                 err = nfqa_parse_bridge(entry, nfqa);
1405                 if (err < 0)
1406                         return err;
1407         }
1408
1409         if (nfqa[NFQA_PAYLOAD]) {
1410                 u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
1411                 int diff = payload_len - entry->skb->len;
1412
1413                 if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
1414                                  payload_len, entry, diff) < 0)
1415                         verdict = NF_DROP;
1416
1417                 if (ct && diff)
1418                         nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
1419         }
1420
1421         if (nfqa[NFQA_MARK])
1422                 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
1423
1424         if (nfqa[NFQA_PRIORITY])
1425                 entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
1426
1427         nfqnl_reinject(entry, verdict);
1428         return 0;
1429 }
1430
1431 static int nfqnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
1432                              const struct nlattr * const cda[])
1433 {
1434         return -ENOTSUPP;
1435 }
1436
1437 static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
1438         [NFQA_CFG_CMD]          = { .len = sizeof(struct nfqnl_msg_config_cmd) },
1439         [NFQA_CFG_PARAMS]       = { .len = sizeof(struct nfqnl_msg_config_params) },
1440         [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 },
1441         [NFQA_CFG_MASK]         = { .type = NLA_U32 },
1442         [NFQA_CFG_FLAGS]        = { .type = NLA_U32 },
1443 };
1444
1445 static const struct nf_queue_handler nfqh = {
1446         .outfn          = nfqnl_enqueue_packet,
1447         .nf_hook_drop   = nfqnl_nf_hook_drop,
1448 };
1449
1450 static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
1451                              const struct nlattr * const nfqa[])
1452 {
1453         struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
1454         u_int16_t queue_num = ntohs(info->nfmsg->res_id);
1455         struct nfqnl_msg_config_cmd *cmd = NULL;
1456         struct nfqnl_instance *queue;
1457         __u32 flags = 0, mask = 0;
1458         int ret = 0;
1459
1460         if (nfqa[NFQA_CFG_CMD]) {
1461                 cmd = nla_data(nfqa[NFQA_CFG_CMD]);
1462
1463                 /* Obsolete commands without queue context */
1464                 switch (cmd->command) {
1465                 case NFQNL_CFG_CMD_PF_BIND: return 0;
1466                 case NFQNL_CFG_CMD_PF_UNBIND: return 0;
1467                 }
1468         }
1469
1470         /* Check if we support these flags in first place, dependencies should
1471          * be there too not to break atomicity.
1472          */
1473         if (nfqa[NFQA_CFG_FLAGS]) {
1474                 if (!nfqa[NFQA_CFG_MASK]) {
1475                         /* A mask is needed to specify which flags are being
1476                          * changed.
1477                          */
1478                         return -EINVAL;
1479                 }
1480
1481                 flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
1482                 mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
1483
1484                 if (flags >= NFQA_CFG_F_MAX)
1485                         return -EOPNOTSUPP;
1486
1487 #if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
1488                 if (flags & mask & NFQA_CFG_F_SECCTX)
1489                         return -EOPNOTSUPP;
1490 #endif
1491                 if ((flags & mask & NFQA_CFG_F_CONNTRACK) &&
1492                     !rcu_access_pointer(nfnl_ct_hook)) {
1493 #ifdef CONFIG_MODULES
1494                         nfnl_unlock(NFNL_SUBSYS_QUEUE);
1495                         request_module("ip_conntrack_netlink");
1496                         nfnl_lock(NFNL_SUBSYS_QUEUE);
1497                         if (rcu_access_pointer(nfnl_ct_hook))
1498                                 return -EAGAIN;
1499 #endif
1500                         return -EOPNOTSUPP;
1501                 }
1502         }
1503
1504         rcu_read_lock();
1505         queue = instance_lookup(q, queue_num);
1506         if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
1507                 ret = -EPERM;
1508                 goto err_out_unlock;
1509         }
1510
1511         if (cmd != NULL) {
1512                 switch (cmd->command) {
1513                 case NFQNL_CFG_CMD_BIND:
1514                         if (queue) {
1515                                 ret = -EBUSY;
1516                                 goto err_out_unlock;
1517                         }
1518                         queue = instance_create(q, queue_num,
1519                                                 NETLINK_CB(skb).portid);
1520                         if (IS_ERR(queue)) {
1521                                 ret = PTR_ERR(queue);
1522                                 goto err_out_unlock;
1523                         }
1524                         break;
1525                 case NFQNL_CFG_CMD_UNBIND:
1526                         if (!queue) {
1527                                 ret = -ENODEV;
1528                                 goto err_out_unlock;
1529                         }
1530                         instance_destroy(q, queue);
1531                         goto err_out_unlock;
1532                 case NFQNL_CFG_CMD_PF_BIND:
1533                 case NFQNL_CFG_CMD_PF_UNBIND:
1534                         break;
1535                 default:
1536                         ret = -ENOTSUPP;
1537                         goto err_out_unlock;
1538                 }
1539         }
1540
1541         if (!queue) {
1542                 ret = -ENODEV;
1543                 goto err_out_unlock;
1544         }
1545
1546         if (nfqa[NFQA_CFG_PARAMS]) {
1547                 struct nfqnl_msg_config_params *params =
1548                         nla_data(nfqa[NFQA_CFG_PARAMS]);
1549
1550                 nfqnl_set_mode(queue, params->copy_mode,
1551                                 ntohl(params->copy_range));
1552         }
1553
1554         if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
1555                 __be32 *queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
1556
1557                 spin_lock_bh(&queue->lock);
1558                 queue->queue_maxlen = ntohl(*queue_maxlen);
1559                 spin_unlock_bh(&queue->lock);
1560         }
1561
1562         if (nfqa[NFQA_CFG_FLAGS]) {
1563                 spin_lock_bh(&queue->lock);
1564                 queue->flags &= ~mask;
1565                 queue->flags |= flags & mask;
1566                 spin_unlock_bh(&queue->lock);
1567         }
1568
1569 err_out_unlock:
1570         rcu_read_unlock();
1571         return ret;
1572 }
1573
1574 static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
1575         [NFQNL_MSG_PACKET]      = {
1576                 .call           = nfqnl_recv_unsupp,
1577                 .type           = NFNL_CB_RCU,
1578                 .attr_count     = NFQA_MAX,
1579         },
1580         [NFQNL_MSG_VERDICT]     = {
1581                 .call           = nfqnl_recv_verdict,
1582                 .type           = NFNL_CB_RCU,
1583                 .attr_count     = NFQA_MAX,
1584                 .policy         = nfqa_verdict_policy
1585         },
1586         [NFQNL_MSG_CONFIG]      = {
1587                 .call           = nfqnl_recv_config,
1588                 .type           = NFNL_CB_MUTEX,
1589                 .attr_count     = NFQA_CFG_MAX,
1590                 .policy         = nfqa_cfg_policy
1591         },
1592         [NFQNL_MSG_VERDICT_BATCH] = {
1593                 .call           = nfqnl_recv_verdict_batch,
1594                 .type           = NFNL_CB_RCU,
1595                 .attr_count     = NFQA_MAX,
1596                 .policy         = nfqa_verdict_batch_policy
1597         },
1598 };
1599
1600 static const struct nfnetlink_subsystem nfqnl_subsys = {
1601         .name           = "nf_queue",
1602         .subsys_id      = NFNL_SUBSYS_QUEUE,
1603         .cb_count       = NFQNL_MSG_MAX,
1604         .cb             = nfqnl_cb,
1605 };
1606
1607 #ifdef CONFIG_PROC_FS
1608 struct iter_state {
1609         struct seq_net_private p;
1610         unsigned int bucket;
1611 };
1612
1613 static struct hlist_node *get_first(struct seq_file *seq)
1614 {
1615         struct iter_state *st = seq->private;
1616         struct net *net;
1617         struct nfnl_queue_net *q;
1618
1619         if (!st)
1620                 return NULL;
1621
1622         net = seq_file_net(seq);
1623         q = nfnl_queue_pernet(net);
1624         for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
1625                 if (!hlist_empty(&q->instance_table[st->bucket]))
1626                         return q->instance_table[st->bucket].first;
1627         }
1628         return NULL;
1629 }
1630
1631 static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
1632 {
1633         struct iter_state *st = seq->private;
1634         struct net *net = seq_file_net(seq);
1635
1636         h = h->next;
1637         while (!h) {
1638                 struct nfnl_queue_net *q;
1639
1640                 if (++st->bucket >= INSTANCE_BUCKETS)
1641                         return NULL;
1642
1643                 q = nfnl_queue_pernet(net);
1644                 h = q->instance_table[st->bucket].first;
1645         }
1646         return h;
1647 }
1648
1649 static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
1650 {
1651         struct hlist_node *head;
1652         head = get_first(seq);
1653
1654         if (head)
1655                 while (pos && (head = get_next(seq, head)))
1656                         pos--;
1657         return pos ? NULL : head;
1658 }
1659
1660 static void *seq_start(struct seq_file *s, loff_t *pos)
1661         __acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
1662 {
1663         spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
1664         return get_idx(s, *pos);
1665 }
1666
1667 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
1668 {
1669         (*pos)++;
1670         return get_next(s, v);
1671 }
1672
1673 static void seq_stop(struct seq_file *s, void *v)
1674         __releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
1675 {
1676         spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
1677 }
1678
1679 static int seq_show(struct seq_file *s, void *v)
1680 {
1681         const struct nfqnl_instance *inst = v;
1682
1683         seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
1684                    inst->queue_num,
1685                    inst->peer_portid, inst->queue_total,
1686                    inst->copy_mode, inst->copy_range,
1687                    inst->queue_dropped, inst->queue_user_dropped,
1688                    inst->id_sequence, 1);
1689         return 0;
1690 }
1691
1692 static const struct seq_operations nfqnl_seq_ops = {
1693         .start  = seq_start,
1694         .next   = seq_next,
1695         .stop   = seq_stop,
1696         .show   = seq_show,
1697 };
1698 #endif /* PROC_FS */
1699
1700 static int __net_init nfnl_queue_net_init(struct net *net)
1701 {
1702         unsigned int i;
1703         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1704
1705         for (i = 0; i < INSTANCE_BUCKETS; i++)
1706                 INIT_HLIST_HEAD(&q->instance_table[i]);
1707
1708         spin_lock_init(&q->instances_lock);
1709
1710 #ifdef CONFIG_PROC_FS
1711         if (!proc_create_net("nfnetlink_queue", 0440, net->nf.proc_netfilter,
1712                         &nfqnl_seq_ops, sizeof(struct iter_state)))
1713                 return -ENOMEM;
1714 #endif
1715         return 0;
1716 }
1717
1718 static void __net_exit nfnl_queue_net_exit(struct net *net)
1719 {
1720         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1721         unsigned int i;
1722
1723 #ifdef CONFIG_PROC_FS
1724         remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
1725 #endif
1726         for (i = 0; i < INSTANCE_BUCKETS; i++)
1727                 WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
1728 }
1729
1730 static struct pernet_operations nfnl_queue_net_ops = {
1731         .init           = nfnl_queue_net_init,
1732         .exit           = nfnl_queue_net_exit,
1733         .id             = &nfnl_queue_net_id,
1734         .size           = sizeof(struct nfnl_queue_net),
1735 };
1736
1737 static int __init nfnetlink_queue_init(void)
1738 {
1739         int status;
1740
1741         status = register_pernet_subsys(&nfnl_queue_net_ops);
1742         if (status < 0) {
1743                 pr_err("failed to register pernet ops\n");
1744                 goto out;
1745         }
1746
1747         netlink_register_notifier(&nfqnl_rtnl_notifier);
1748         status = nfnetlink_subsys_register(&nfqnl_subsys);
1749         if (status < 0) {
1750                 pr_err("failed to create netlink socket\n");
1751                 goto cleanup_netlink_notifier;
1752         }
1753
1754         status = register_netdevice_notifier(&nfqnl_dev_notifier);
1755         if (status < 0) {
1756                 pr_err("failed to register netdevice notifier\n");
1757                 goto cleanup_netlink_subsys;
1758         }
1759
1760         nf_register_queue_handler(&nfqh);
1761
1762         return status;
1763
1764 cleanup_netlink_subsys:
1765         nfnetlink_subsys_unregister(&nfqnl_subsys);
1766 cleanup_netlink_notifier:
1767         netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1768         unregister_pernet_subsys(&nfnl_queue_net_ops);
1769 out:
1770         return status;
1771 }
1772
1773 static void __exit nfnetlink_queue_fini(void)
1774 {
1775         nf_unregister_queue_handler();
1776         unregister_netdevice_notifier(&nfqnl_dev_notifier);
1777         nfnetlink_subsys_unregister(&nfqnl_subsys);
1778         netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1779         unregister_pernet_subsys(&nfnl_queue_net_ops);
1780
1781         rcu_barrier(); /* Wait for completion of call_rcu()'s */
1782 }
1783
1784 MODULE_DESCRIPTION("netfilter packet queue handler");
1785 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
1786 MODULE_LICENSE("GPL");
1787 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
1788
1789 module_init(nfnetlink_queue_init);
1790 module_exit(nfnetlink_queue_fini);