1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/types.h>
4 #include <linux/netfilter.h>
5 #include <linux/netfilter_ipv6.h>
6 #include <linux/netfilter_bridge.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/icmp.h>
10 #include <linux/sysctl.h>
11 #include <net/route.h>
14 #include <net/netfilter/nf_conntrack.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_helper.h>
17 #include <net/netfilter/nf_conntrack_bridge.h>
19 #include <linux/netfilter/nf_tables.h>
20 #include <net/netfilter/nf_tables.h>
22 #include "../br_private.h"
24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
25 * has been linearized or cloned.
27 static int nf_br_ip_fragment(struct net *net, struct sock *sk,
29 struct nf_bridge_frag_data *data,
30 int (*output)(struct net *, struct sock *sk,
31 const struct nf_bridge_frag_data *data,
34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
35 bool mono_delivery_time = skb->mono_delivery_time;
36 unsigned int hlen, ll_rs, mtu;
37 ktime_t tstamp = skb->tstamp;
38 struct ip_frag_state state;
42 /* for offloaded checksums cleanup checksum before fragmentation */
43 if (skb->ip_summed == CHECKSUM_PARTIAL &&
44 (err = skb_checksum_help(skb)))
50 * Setup starting values
54 frag_max_size -= hlen;
55 ll_rs = LL_RESERVED_SPACE(skb->dev);
58 if (skb_has_frag_list(skb)) {
59 unsigned int first_len = skb_pagelen(skb);
60 struct ip_fraglist_iter iter;
63 if (first_len - hlen > mtu ||
64 skb_headroom(skb) < ll_rs)
70 skb_walk_frags(skb, frag) {
71 if (frag->len > mtu ||
72 skb_headroom(frag) < hlen + ll_rs)
79 ip_fraglist_init(skb, iph, hlen, &iter);
83 ip_fraglist_prepare(skb, &iter);
85 skb_set_delivery_time(skb, tstamp, mono_delivery_time);
86 err = output(net, sk, data, skb);
87 if (err || !iter.frag)
90 skb = ip_fraglist_next(&iter);
96 kfree_skb_list(iter.frag);
101 /* This is a linearized skbuff, the original geometry is lost for us.
102 * This may also be a clone skbuff, we could preserve the geometry for
103 * the copies but probably not worth the effort.
105 ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
107 while (state.left > 0) {
108 struct sk_buff *skb2;
110 skb2 = ip_frag_next(skb, &state);
116 skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
117 err = output(net, sk, data, skb2);
129 /* ip_defrag() expects IPCB() in place. */
130 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
131 size_t inet_skb_parm_size)
133 memcpy(cb, skb->cb, sizeof(*cb));
134 memset(skb->cb, 0, inet_skb_parm_size);
137 static void br_skb_cb_restore(struct sk_buff *skb,
138 const struct br_input_skb_cb *cb,
141 memcpy(skb->cb, cb, sizeof(*cb));
142 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
145 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
146 const struct nf_hook_state *state)
148 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
149 enum ip_conntrack_info ctinfo;
150 struct br_input_skb_cb cb;
151 const struct nf_conn *ct;
154 if (!ip_is_fragment(ip_hdr(skb)))
157 ct = nf_ct_get(skb, &ctinfo);
159 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
161 br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
163 err = ip_defrag(state->net, skb,
164 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
167 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
175 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
176 const struct nf_hook_state *state)
178 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
179 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
180 enum ip_conntrack_info ctinfo;
181 struct br_input_skb_cb cb;
182 const struct nf_conn *ct;
185 ct = nf_ct_get(skb, &ctinfo);
187 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
189 br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
191 err = nf_ct_frag6_gather(state->net, skb,
192 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
194 if (err == -EINPROGRESS)
197 br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
198 return err == 0 ? NF_ACCEPT : NF_DROP;
204 static int nf_ct_br_ip_check(const struct sk_buff *skb)
206 const struct iphdr *iph;
209 nhoff = skb_network_offset(skb);
215 len = skb_ip_totlen(skb);
216 if (skb->len < nhoff + len ||
217 len < (iph->ihl * 4))
223 static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
225 const struct ipv6hdr *hdr;
228 nhoff = skb_network_offset(skb);
230 if (hdr->version != 6)
233 len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
240 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
241 const struct nf_hook_state *state)
243 struct nf_hook_state bridge_state = *state;
244 enum ip_conntrack_info ctinfo;
249 ct = nf_ct_get(skb, &ctinfo);
250 if ((ct && !nf_ct_is_template(ct)) ||
251 ctinfo == IP_CT_UNTRACKED)
254 switch (skb->protocol) {
255 case htons(ETH_P_IP):
256 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
259 len = skb_ip_totlen(skb);
260 if (pskb_trim_rcsum(skb, len))
263 if (nf_ct_br_ip_check(skb))
266 bridge_state.pf = NFPROTO_IPV4;
267 ret = nf_ct_br_defrag4(skb, &bridge_state);
269 case htons(ETH_P_IPV6):
270 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
273 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
274 if (pskb_trim_rcsum(skb, len))
277 if (nf_ct_br_ipv6_check(skb))
280 bridge_state.pf = NFPROTO_IPV6;
281 ret = nf_ct_br_defrag6(skb, &bridge_state);
284 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
288 if (ret != NF_ACCEPT)
291 return nf_conntrack_in(skb, &bridge_state);
294 static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
295 const struct nf_hook_state *state)
297 bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
298 struct nf_conntrack *nfct = skb_nfct(skb);
306 if (!nfct || skb->pkt_type == PACKET_HOST)
309 /* nf_conntrack_confirm() cannot handle concurrent clones,
310 * this happens for broad/multicast frames with e.g. macvlan on top
311 * of the bridge device.
313 ct = container_of(nfct, struct nf_conn, ct_general);
314 if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
317 /* let inet prerouting call conntrack again */
324 static void nf_ct_bridge_frag_save(struct sk_buff *skb,
325 struct nf_bridge_frag_data *data)
327 if (skb_vlan_tag_present(skb)) {
328 data->vlan_present = true;
329 data->vlan_tci = skb->vlan_tci;
330 data->vlan_proto = skb->vlan_proto;
332 data->vlan_present = false;
334 skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
338 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
339 int (*output)(struct net *, struct sock *sk,
340 const struct nf_bridge_frag_data *data,
343 struct nf_bridge_frag_data data;
345 if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
348 nf_ct_bridge_frag_save(skb, &data);
349 switch (skb->protocol) {
350 case htons(ETH_P_IP):
351 nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
353 case htons(ETH_P_IPV6):
354 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
364 /* Actually only slow path refragmentation needs this. */
365 static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
366 const struct nf_bridge_frag_data *data)
370 err = skb_cow_head(skb, ETH_HLEN);
375 if (data->vlan_present)
376 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
377 else if (skb_vlan_tag_present(skb))
378 __vlan_hwaccel_clear_tag(skb);
380 skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
381 skb_reset_mac_header(skb);
386 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
387 const struct nf_bridge_frag_data *data,
392 err = nf_ct_bridge_frag_restore(skb, data);
396 return br_dev_queue_push_xmit(net, sk, skb);
399 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
400 const struct nf_hook_state *state)
404 ret = nf_confirm(priv, skb, state);
405 if (ret != NF_ACCEPT)
408 return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
411 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
413 .hook = nf_ct_bridge_pre,
414 .pf = NFPROTO_BRIDGE,
415 .hooknum = NF_BR_PRE_ROUTING,
416 .priority = NF_IP_PRI_CONNTRACK,
419 .hook = nf_ct_bridge_in,
420 .pf = NFPROTO_BRIDGE,
421 .hooknum = NF_BR_LOCAL_IN,
422 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
425 .hook = nf_ct_bridge_post,
426 .pf = NFPROTO_BRIDGE,
427 .hooknum = NF_BR_POST_ROUTING,
428 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
432 static struct nf_ct_bridge_info bridge_info = {
433 .ops = nf_ct_bridge_hook_ops,
434 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops),
438 static int __init nf_conntrack_l3proto_bridge_init(void)
440 nf_ct_bridge_register(&bridge_info);
445 static void __exit nf_conntrack_l3proto_bridge_fini(void)
447 nf_ct_bridge_unregister(&bridge_info);
450 module_init(nf_conntrack_l3proto_bridge_init);
451 module_exit(nf_conntrack_l3proto_bridge_fini);
453 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
454 MODULE_LICENSE("GPL");
455 MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking");