2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
60 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 return hash_32((__force u32)key ^ (__force u32)remote,
72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
73 __be16 flags, __be32 key)
75 if (p->i_flags & TUNNEL_KEY) {
76 if (flags & TUNNEL_KEY)
77 return key == p->i_key;
79 /* key expected, none present */
82 return !(flags & TUNNEL_KEY);
85 /* Fallback tunnel: no source, no destination, no key, no options
88 We require exact key match i.e. if a key is present in packet
89 it will match only tunnel with the same key; if it is not present,
90 it will match only keyless tunnel.
92 All keysless packets, if not matched configured keyless tunnels
93 will match fallback tunnel.
94 Given src, dst and key, find appropriate for input tunnel.
96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
97 int link, __be16 flags,
98 __be32 remote, __be32 local,
101 struct ip_tunnel *t, *cand = NULL;
102 struct hlist_head *head;
103 struct net_device *ndev;
106 hash = ip_tunnel_hash(key, remote);
107 head = &itn->tunnels[hash];
109 hlist_for_each_entry_rcu(t, head, hash_node) {
110 if (local != t->parms.iph.saddr ||
111 remote != t->parms.iph.daddr ||
112 !(t->dev->flags & IFF_UP))
115 if (!ip_tunnel_key_match(&t->parms, flags, key))
118 if (t->parms.link == link)
124 hlist_for_each_entry_rcu(t, head, hash_node) {
125 if (remote != t->parms.iph.daddr ||
126 t->parms.iph.saddr != 0 ||
127 !(t->dev->flags & IFF_UP))
130 if (!ip_tunnel_key_match(&t->parms, flags, key))
133 if (t->parms.link == link)
139 hash = ip_tunnel_hash(key, 0);
140 head = &itn->tunnels[hash];
142 hlist_for_each_entry_rcu(t, head, hash_node) {
143 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
144 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
147 if (!(t->dev->flags & IFF_UP))
150 if (!ip_tunnel_key_match(&t->parms, flags, key))
153 if (t->parms.link == link)
159 hlist_for_each_entry_rcu(t, head, hash_node) {
160 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
161 t->parms.iph.saddr != 0 ||
162 t->parms.iph.daddr != 0 ||
163 !(t->dev->flags & IFF_UP))
166 if (t->parms.link == link)
175 t = rcu_dereference(itn->collect_md_tun);
179 ndev = READ_ONCE(itn->fb_tunnel_dev);
180 if (ndev && ndev->flags & IFF_UP)
181 return netdev_priv(ndev);
185 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
188 struct ip_tunnel_parm *parms)
192 __be32 i_key = parms->i_key;
194 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
195 remote = parms->iph.daddr;
199 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
202 h = ip_tunnel_hash(i_key, remote);
203 return &itn->tunnels[h];
206 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208 struct hlist_head *head = ip_bucket(itn, &t->parms);
211 rcu_assign_pointer(itn->collect_md_tun, t);
212 hlist_add_head_rcu(&t->hash_node, head);
215 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
218 rcu_assign_pointer(itn->collect_md_tun, NULL);
219 hlist_del_init_rcu(&t->hash_node);
222 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
223 struct ip_tunnel_parm *parms,
226 __be32 remote = parms->iph.daddr;
227 __be32 local = parms->iph.saddr;
228 __be32 key = parms->i_key;
229 __be16 flags = parms->i_flags;
230 int link = parms->link;
231 struct ip_tunnel *t = NULL;
232 struct hlist_head *head = ip_bucket(itn, parms);
234 hlist_for_each_entry_rcu(t, head, hash_node) {
235 if (local == t->parms.iph.saddr &&
236 remote == t->parms.iph.daddr &&
237 link == t->parms.link &&
238 type == t->dev->type &&
239 ip_tunnel_key_match(&t->parms, flags, key))
245 static struct net_device *__ip_tunnel_create(struct net *net,
246 const struct rtnl_link_ops *ops,
247 struct ip_tunnel_parm *parms)
250 struct ip_tunnel *tunnel;
251 struct net_device *dev;
255 if (parms->name[0]) {
256 if (!dev_valid_name(parms->name))
258 strlcpy(name, parms->name, IFNAMSIZ);
260 if (strlen(ops->kind) > (IFNAMSIZ - 3))
262 strcpy(name, ops->kind);
267 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
272 dev_net_set(dev, net);
274 dev->rtnl_link_ops = ops;
276 tunnel = netdev_priv(dev);
277 tunnel->parms = *parms;
280 err = register_netdevice(dev);
292 static inline void init_tunnel_flow(struct flowi4 *fl4,
294 __be32 daddr, __be32 saddr,
295 __be32 key, __u8 tos, int oif)
297 memset(fl4, 0, sizeof(*fl4));
298 fl4->flowi4_oif = oif;
301 fl4->flowi4_tos = tos;
302 fl4->flowi4_proto = proto;
303 fl4->fl4_gre_key = key;
306 static int ip_tunnel_bind_dev(struct net_device *dev)
308 struct net_device *tdev = NULL;
309 struct ip_tunnel *tunnel = netdev_priv(dev);
310 const struct iphdr *iph;
311 int hlen = LL_MAX_HEADER;
312 int mtu = ETH_DATA_LEN;
313 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
315 iph = &tunnel->parms.iph;
317 /* Guess output device to choose reasonable mtu and needed_headroom */
322 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
323 iph->saddr, tunnel->parms.o_key,
324 RT_TOS(iph->tos), tunnel->parms.link);
325 rt = ip_route_output_key(tunnel->net, &fl4);
331 if (dev->type != ARPHRD_ETHER)
332 dev->flags |= IFF_POINTOPOINT;
334 dst_cache_reset(&tunnel->dst_cache);
337 if (!tdev && tunnel->parms.link)
338 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
341 hlen = tdev->hard_header_len + tdev->needed_headroom;
345 dev->needed_headroom = t_hlen + hlen;
346 mtu -= (dev->hard_header_len + t_hlen);
348 if (mtu < IPV4_MIN_MTU)
354 static struct ip_tunnel *ip_tunnel_create(struct net *net,
355 struct ip_tunnel_net *itn,
356 struct ip_tunnel_parm *parms)
358 struct ip_tunnel *nt;
359 struct net_device *dev;
361 BUG_ON(!itn->fb_tunnel_dev);
362 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
364 return ERR_CAST(dev);
366 dev->mtu = ip_tunnel_bind_dev(dev);
368 nt = netdev_priv(dev);
369 ip_tunnel_add(itn, nt);
373 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
377 struct pcpu_sw_netstats *tstats;
378 const struct iphdr *iph = ip_hdr(skb);
381 #ifdef CONFIG_NET_IPGRE_BROADCAST
382 if (ipv4_is_multicast(iph->daddr)) {
383 tunnel->dev->stats.multicast++;
384 skb->pkt_type = PACKET_BROADCAST;
388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
390 tunnel->dev->stats.rx_crc_errors++;
391 tunnel->dev->stats.rx_errors++;
395 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
396 if (!(tpi->flags&TUNNEL_SEQ) ||
397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
398 tunnel->dev->stats.rx_fifo_errors++;
399 tunnel->dev->stats.rx_errors++;
402 tunnel->i_seqno = ntohl(tpi->seq) + 1;
405 skb_reset_network_header(skb);
407 err = IP_ECN_decapsulate(iph, skb);
410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411 &iph->saddr, iph->tos);
413 ++tunnel->dev->stats.rx_frame_errors;
414 ++tunnel->dev->stats.rx_errors;
419 tstats = this_cpu_ptr(tunnel->dev->tstats);
420 u64_stats_update_begin(&tstats->syncp);
421 tstats->rx_packets++;
422 tstats->rx_bytes += skb->len;
423 u64_stats_update_end(&tstats->syncp);
425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
427 if (tunnel->dev->type == ARPHRD_ETHER) {
428 skb->protocol = eth_type_trans(skb, tunnel->dev);
429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
431 skb->dev = tunnel->dev;
435 skb_dst_set(skb, (struct dst_entry *)tun_dst);
437 gro_cells_receive(&tunnel->gro_cells, skb);
444 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
446 static int ip_encap_hlen(struct ip_tunnel_encap *e)
448 const struct ip_tunnel_encap_ops *ops;
451 if (e->type == TUNNEL_ENCAP_NONE)
454 if (e->type >= MAX_IPTUN_ENCAP_OPS)
458 ops = rcu_dereference(iptun_encaps[e->type]);
459 if (likely(ops && ops->encap_hlen))
460 hlen = ops->encap_hlen(e);
466 const struct ip_tunnel_encap_ops __rcu *
467 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
469 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
472 if (num >= MAX_IPTUN_ENCAP_OPS)
475 return !cmpxchg((const struct ip_tunnel_encap_ops **)
479 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
481 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
486 if (num >= MAX_IPTUN_ENCAP_OPS)
489 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
491 ops, NULL) == ops) ? 0 : -1;
497 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
499 int ip_tunnel_encap_setup(struct ip_tunnel *t,
500 struct ip_tunnel_encap *ipencap)
504 memset(&t->encap, 0, sizeof(t->encap));
506 hlen = ip_encap_hlen(ipencap);
510 t->encap.type = ipencap->type;
511 t->encap.sport = ipencap->sport;
512 t->encap.dport = ipencap->dport;
513 t->encap.flags = ipencap->flags;
515 t->encap_hlen = hlen;
516 t->hlen = t->encap_hlen + t->tun_hlen;
520 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
522 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
523 u8 *protocol, struct flowi4 *fl4)
525 const struct ip_tunnel_encap_ops *ops;
528 if (t->encap.type == TUNNEL_ENCAP_NONE)
531 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
535 ops = rcu_dereference(iptun_encaps[t->encap.type]);
536 if (likely(ops && ops->build_header))
537 ret = ops->build_header(skb, &t->encap, protocol, fl4);
542 EXPORT_SYMBOL(ip_tunnel_encap);
544 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
545 struct rtable *rt, __be16 df,
546 const struct iphdr *inner_iph)
548 struct ip_tunnel *tunnel = netdev_priv(dev);
549 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
553 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
554 - sizeof(struct iphdr) - tunnel->hlen;
556 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
559 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
561 if (skb->protocol == htons(ETH_P_IP)) {
562 if (!skb_is_gso(skb) &&
563 (inner_iph->frag_off & htons(IP_DF)) &&
565 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
566 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
570 #if IS_ENABLED(CONFIG_IPV6)
571 else if (skb->protocol == htons(ETH_P_IPV6)) {
572 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
574 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
575 mtu >= IPV6_MIN_MTU) {
576 if ((tunnel->parms.iph.daddr &&
577 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
578 rt6->rt6i_dst.plen == 128) {
579 rt6->rt6i_flags |= RTF_MODIFIED;
580 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
584 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
594 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
595 const struct iphdr *tnl_params, u8 protocol)
597 struct ip_tunnel *tunnel = netdev_priv(dev);
598 unsigned int inner_nhdr_len = 0;
599 const struct iphdr *inner_iph;
603 struct rtable *rt; /* Route to the other host */
604 unsigned int max_headroom; /* The extra header space needed */
609 /* ensure we can access the inner net header, for several users below */
610 if (skb->protocol == htons(ETH_P_IP))
611 inner_nhdr_len = sizeof(struct iphdr);
612 else if (skb->protocol == htons(ETH_P_IPV6))
613 inner_nhdr_len = sizeof(struct ipv6hdr);
614 if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
617 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
618 connected = (tunnel->parms.iph.daddr != 0);
620 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
622 dst = tnl_params->daddr;
627 dev->stats.tx_fifo_errors++;
631 if (skb->protocol == htons(ETH_P_IP)) {
632 rt = skb_rtable(skb);
633 dst = rt_nexthop(rt, inner_iph->daddr);
635 #if IS_ENABLED(CONFIG_IPV6)
636 else if (skb->protocol == htons(ETH_P_IPV6)) {
637 const struct in6_addr *addr6;
638 struct neighbour *neigh;
639 bool do_tx_error_icmp;
642 neigh = dst_neigh_lookup(skb_dst(skb),
643 &ipv6_hdr(skb)->daddr);
647 addr6 = (const struct in6_addr *)&neigh->primary_key;
648 addr_type = ipv6_addr_type(addr6);
650 if (addr_type == IPV6_ADDR_ANY) {
651 addr6 = &ipv6_hdr(skb)->daddr;
652 addr_type = ipv6_addr_type(addr6);
655 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
656 do_tx_error_icmp = true;
658 do_tx_error_icmp = false;
659 dst = addr6->s6_addr32[3];
661 neigh_release(neigh);
662 if (do_tx_error_icmp)
672 tos = tnl_params->tos;
675 if (skb->protocol == htons(ETH_P_IP)) {
676 tos = inner_iph->tos;
678 } else if (skb->protocol == htons(ETH_P_IPV6)) {
679 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
684 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
685 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
687 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
690 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
694 rt = ip_route_output_key(tunnel->net, &fl4);
697 dev->stats.tx_carrier_errors++;
701 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
705 if (rt->dst.dev == dev) {
707 dev->stats.collisions++;
711 df = tnl_params->frag_off;
712 if (skb->protocol == htons(ETH_P_IP))
713 df |= (inner_iph->frag_off & htons(IP_DF));
715 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph)) {
720 if (tunnel->err_count > 0) {
721 if (time_before(jiffies,
722 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
725 dst_link_failure(skb);
727 tunnel->err_count = 0;
730 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
731 ttl = tnl_params->ttl;
733 if (skb->protocol == htons(ETH_P_IP))
734 ttl = inner_iph->ttl;
735 #if IS_ENABLED(CONFIG_IPV6)
736 else if (skb->protocol == htons(ETH_P_IPV6))
737 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
740 ttl = ip4_dst_hoplimit(&rt->dst);
743 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
744 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
745 if (max_headroom > dev->needed_headroom)
746 dev->needed_headroom = max_headroom;
748 if (skb_cow_head(skb, dev->needed_headroom)) {
750 dev->stats.tx_dropped++;
755 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
756 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
757 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
761 #if IS_ENABLED(CONFIG_IPV6)
763 dst_link_failure(skb);
766 dev->stats.tx_errors++;
769 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
771 static void ip_tunnel_update(struct ip_tunnel_net *itn,
773 struct net_device *dev,
774 struct ip_tunnel_parm *p,
777 ip_tunnel_del(itn, t);
778 t->parms.iph.saddr = p->iph.saddr;
779 t->parms.iph.daddr = p->iph.daddr;
780 t->parms.i_key = p->i_key;
781 t->parms.o_key = p->o_key;
782 if (dev->type != ARPHRD_ETHER) {
783 memcpy(dev->dev_addr, &p->iph.saddr, 4);
784 memcpy(dev->broadcast, &p->iph.daddr, 4);
786 ip_tunnel_add(itn, t);
788 t->parms.iph.ttl = p->iph.ttl;
789 t->parms.iph.tos = p->iph.tos;
790 t->parms.iph.frag_off = p->iph.frag_off;
792 if (t->parms.link != p->link) {
795 t->parms.link = p->link;
796 mtu = ip_tunnel_bind_dev(dev);
800 dst_cache_reset(&t->dst_cache);
801 netdev_state_change(dev);
804 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
807 struct ip_tunnel *t = netdev_priv(dev);
808 struct net *net = t->net;
809 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
811 BUG_ON(!itn->fb_tunnel_dev);
814 if (dev == itn->fb_tunnel_dev) {
815 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
817 t = netdev_priv(dev);
819 memcpy(p, &t->parms, sizeof(*p));
825 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
828 p->iph.frag_off |= htons(IP_DF);
829 if (!(p->i_flags & VTI_ISVTI)) {
830 if (!(p->i_flags & TUNNEL_KEY))
832 if (!(p->o_flags & TUNNEL_KEY))
836 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
838 if (cmd == SIOCADDTUNNEL) {
840 t = ip_tunnel_create(net, itn, p);
841 err = PTR_ERR_OR_ZERO(t);
848 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
855 unsigned int nflags = 0;
857 if (ipv4_is_multicast(p->iph.daddr))
858 nflags = IFF_BROADCAST;
859 else if (p->iph.daddr)
860 nflags = IFF_POINTOPOINT;
862 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
867 t = netdev_priv(dev);
873 ip_tunnel_update(itn, t, dev, p, true);
881 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
884 if (dev == itn->fb_tunnel_dev) {
886 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
890 if (t == netdev_priv(itn->fb_tunnel_dev))
894 unregister_netdevice(dev);
905 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
907 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
909 struct ip_tunnel *tunnel = netdev_priv(dev);
910 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
911 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
916 if (new_mtu > max_mtu) {
926 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
928 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
930 return __ip_tunnel_change_mtu(dev, new_mtu, true);
932 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
934 static void ip_tunnel_dev_free(struct net_device *dev)
936 struct ip_tunnel *tunnel = netdev_priv(dev);
938 gro_cells_destroy(&tunnel->gro_cells);
939 dst_cache_destroy(&tunnel->dst_cache);
940 free_percpu(dev->tstats);
944 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
946 struct ip_tunnel *tunnel = netdev_priv(dev);
947 struct ip_tunnel_net *itn;
949 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
951 if (itn->fb_tunnel_dev != dev) {
952 ip_tunnel_del(itn, netdev_priv(dev));
953 unregister_netdevice_queue(dev, head);
956 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
958 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
960 struct ip_tunnel *tunnel = netdev_priv(dev);
964 EXPORT_SYMBOL(ip_tunnel_get_link_net);
966 int ip_tunnel_get_iflink(const struct net_device *dev)
968 struct ip_tunnel *tunnel = netdev_priv(dev);
970 return tunnel->parms.link;
972 EXPORT_SYMBOL(ip_tunnel_get_iflink);
974 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
975 struct rtnl_link_ops *ops, char *devname)
977 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
978 struct ip_tunnel_parm parms;
981 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
982 INIT_HLIST_HEAD(&itn->tunnels[i]);
985 itn->fb_tunnel_dev = NULL;
989 memset(&parms, 0, sizeof(parms));
991 strlcpy(parms.name, devname, IFNAMSIZ);
994 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
995 /* FB netdevice is special: we have one, and only one per netns.
996 * Allowing to move it to another netns is clearly unsafe.
998 if (!IS_ERR(itn->fb_tunnel_dev)) {
999 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1000 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1001 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1005 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1007 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1009 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1010 struct rtnl_link_ops *ops)
1012 struct net *net = dev_net(itn->fb_tunnel_dev);
1013 struct net_device *dev, *aux;
1016 for_each_netdev_safe(net, dev, aux)
1017 if (dev->rtnl_link_ops == ops)
1018 unregister_netdevice_queue(dev, head);
1020 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1021 struct ip_tunnel *t;
1022 struct hlist_node *n;
1023 struct hlist_head *thead = &itn->tunnels[h];
1025 hlist_for_each_entry_safe(t, n, thead, hash_node)
1026 /* If dev is in the same netns, it has already
1027 * been added to the list by the previous loop.
1029 if (!net_eq(dev_net(t->dev), net))
1030 unregister_netdevice_queue(t->dev, head);
1034 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1039 ip_tunnel_destroy(itn, &list, ops);
1040 unregister_netdevice_many(&list);
1043 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1045 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1046 struct ip_tunnel_parm *p)
1048 struct ip_tunnel *nt;
1049 struct net *net = dev_net(dev);
1050 struct ip_tunnel_net *itn;
1054 nt = netdev_priv(dev);
1055 itn = net_generic(net, nt->ip_tnl_net_id);
1057 if (nt->collect_md) {
1058 if (rtnl_dereference(itn->collect_md_tun))
1061 if (ip_tunnel_find(itn, p, dev->type))
1067 err = register_netdevice(dev);
1071 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1072 eth_hw_addr_random(dev);
1074 mtu = ip_tunnel_bind_dev(dev);
1078 ip_tunnel_add(itn, nt);
1082 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1084 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1085 struct ip_tunnel_parm *p)
1087 struct ip_tunnel *t;
1088 struct ip_tunnel *tunnel = netdev_priv(dev);
1089 struct net *net = tunnel->net;
1090 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1092 if (dev == itn->fb_tunnel_dev)
1095 t = ip_tunnel_find(itn, p, dev->type);
1103 if (dev->type != ARPHRD_ETHER) {
1104 unsigned int nflags = 0;
1106 if (ipv4_is_multicast(p->iph.daddr))
1107 nflags = IFF_BROADCAST;
1108 else if (p->iph.daddr)
1109 nflags = IFF_POINTOPOINT;
1111 if ((dev->flags ^ nflags) &
1112 (IFF_POINTOPOINT | IFF_BROADCAST))
1117 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1120 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1122 int ip_tunnel_init(struct net_device *dev)
1124 struct ip_tunnel *tunnel = netdev_priv(dev);
1125 struct iphdr *iph = &tunnel->parms.iph;
1128 dev->destructor = ip_tunnel_dev_free;
1129 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1133 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1135 free_percpu(dev->tstats);
1139 err = gro_cells_init(&tunnel->gro_cells, dev);
1141 dst_cache_destroy(&tunnel->dst_cache);
1142 free_percpu(dev->tstats);
1147 tunnel->net = dev_net(dev);
1148 strcpy(tunnel->parms.name, dev->name);
1152 if (tunnel->collect_md)
1153 netif_keep_dst(dev);
1156 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1158 void ip_tunnel_uninit(struct net_device *dev)
1160 struct ip_tunnel *tunnel = netdev_priv(dev);
1161 struct net *net = tunnel->net;
1162 struct ip_tunnel_net *itn;
1164 itn = net_generic(net, tunnel->ip_tnl_net_id);
1165 ip_tunnel_del(itn, netdev_priv(dev));
1166 if (itn->fb_tunnel_dev == dev)
1167 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1169 dst_cache_reset(&tunnel->dst_cache);
1171 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1173 /* Do least required initialization, rest of init is done in tunnel_init call */
1174 void ip_tunnel_setup(struct net_device *dev, int net_id)
1176 struct ip_tunnel *tunnel = netdev_priv(dev);
1177 tunnel->ip_tnl_net_id = net_id;
1179 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1181 MODULE_LICENSE("GPL");