2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
58 #include <net/dst_metadata.h>
60 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 return hash_32((__force u32)key ^ (__force u32)remote,
72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
73 __be16 flags, __be32 key)
75 if (p->i_flags & TUNNEL_KEY) {
76 if (flags & TUNNEL_KEY)
77 return key == p->i_key;
79 /* key expected, none present */
82 return !(flags & TUNNEL_KEY);
85 /* Fallback tunnel: no source, no destination, no key, no options
88 We require exact key match i.e. if a key is present in packet
89 it will match only tunnel with the same key; if it is not present,
90 it will match only keyless tunnel.
92 All keysless packets, if not matched configured keyless tunnels
93 will match fallback tunnel.
94 Given src, dst and key, find appropriate for input tunnel.
96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
97 int link, __be16 flags,
98 __be32 remote, __be32 local,
101 struct ip_tunnel *t, *cand = NULL;
102 struct hlist_head *head;
103 struct net_device *ndev;
106 hash = ip_tunnel_hash(key, remote);
107 head = &itn->tunnels[hash];
109 hlist_for_each_entry_rcu(t, head, hash_node) {
110 if (local != t->parms.iph.saddr ||
111 remote != t->parms.iph.daddr ||
112 !(t->dev->flags & IFF_UP))
115 if (!ip_tunnel_key_match(&t->parms, flags, key))
118 if (t->parms.link == link)
124 hlist_for_each_entry_rcu(t, head, hash_node) {
125 if (remote != t->parms.iph.daddr ||
126 t->parms.iph.saddr != 0 ||
127 !(t->dev->flags & IFF_UP))
130 if (!ip_tunnel_key_match(&t->parms, flags, key))
133 if (t->parms.link == link)
139 hash = ip_tunnel_hash(key, 0);
140 head = &itn->tunnels[hash];
142 hlist_for_each_entry_rcu(t, head, hash_node) {
143 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
144 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
147 if (!(t->dev->flags & IFF_UP))
150 if (!ip_tunnel_key_match(&t->parms, flags, key))
153 if (t->parms.link == link)
159 hlist_for_each_entry_rcu(t, head, hash_node) {
160 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
161 t->parms.iph.saddr != 0 ||
162 t->parms.iph.daddr != 0 ||
163 !(t->dev->flags & IFF_UP))
166 if (t->parms.link == link)
175 t = rcu_dereference(itn->collect_md_tun);
179 ndev = READ_ONCE(itn->fb_tunnel_dev);
180 if (ndev && ndev->flags & IFF_UP)
181 return netdev_priv(ndev);
185 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
188 struct ip_tunnel_parm *parms)
192 __be32 i_key = parms->i_key;
194 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
195 remote = parms->iph.daddr;
199 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
202 h = ip_tunnel_hash(i_key, remote);
203 return &itn->tunnels[h];
206 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208 struct hlist_head *head = ip_bucket(itn, &t->parms);
211 rcu_assign_pointer(itn->collect_md_tun, t);
212 hlist_add_head_rcu(&t->hash_node, head);
215 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
218 rcu_assign_pointer(itn->collect_md_tun, NULL);
219 hlist_del_init_rcu(&t->hash_node);
222 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
223 struct ip_tunnel_parm *parms,
226 __be32 remote = parms->iph.daddr;
227 __be32 local = parms->iph.saddr;
228 __be32 key = parms->i_key;
229 __be16 flags = parms->i_flags;
230 int link = parms->link;
231 struct ip_tunnel *t = NULL;
232 struct hlist_head *head = ip_bucket(itn, parms);
234 hlist_for_each_entry_rcu(t, head, hash_node) {
235 if (local == t->parms.iph.saddr &&
236 remote == t->parms.iph.daddr &&
237 link == t->parms.link &&
238 type == t->dev->type &&
239 ip_tunnel_key_match(&t->parms, flags, key))
245 static struct net_device *__ip_tunnel_create(struct net *net,
246 const struct rtnl_link_ops *ops,
247 struct ip_tunnel_parm *parms)
250 struct ip_tunnel *tunnel;
251 struct net_device *dev;
255 if (parms->name[0]) {
256 if (!dev_valid_name(parms->name))
258 strlcpy(name, parms->name, IFNAMSIZ);
260 if (strlen(ops->kind) > (IFNAMSIZ - 3))
262 strcpy(name, ops->kind);
267 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
272 dev_net_set(dev, net);
274 dev->rtnl_link_ops = ops;
276 tunnel = netdev_priv(dev);
277 tunnel->parms = *parms;
280 err = register_netdevice(dev);
292 static inline void init_tunnel_flow(struct flowi4 *fl4,
294 __be32 daddr, __be32 saddr,
295 __be32 key, __u8 tos, int oif)
297 memset(fl4, 0, sizeof(*fl4));
298 fl4->flowi4_oif = oif;
301 fl4->flowi4_tos = tos;
302 fl4->flowi4_proto = proto;
303 fl4->fl4_gre_key = key;
306 static int ip_tunnel_bind_dev(struct net_device *dev)
308 struct net_device *tdev = NULL;
309 struct ip_tunnel *tunnel = netdev_priv(dev);
310 const struct iphdr *iph;
311 int hlen = LL_MAX_HEADER;
312 int mtu = ETH_DATA_LEN;
313 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
315 iph = &tunnel->parms.iph;
317 /* Guess output device to choose reasonable mtu and needed_headroom */
322 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
323 iph->saddr, tunnel->parms.o_key,
324 RT_TOS(iph->tos), tunnel->parms.link);
325 rt = ip_route_output_key(tunnel->net, &fl4);
331 if (dev->type != ARPHRD_ETHER)
332 dev->flags |= IFF_POINTOPOINT;
334 dst_cache_reset(&tunnel->dst_cache);
337 if (!tdev && tunnel->parms.link)
338 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
341 hlen = tdev->hard_header_len + tdev->needed_headroom;
345 dev->needed_headroom = t_hlen + hlen;
346 mtu -= (dev->hard_header_len + t_hlen);
348 if (mtu < IPV4_MIN_MTU)
354 static struct ip_tunnel *ip_tunnel_create(struct net *net,
355 struct ip_tunnel_net *itn,
356 struct ip_tunnel_parm *parms)
358 struct ip_tunnel *nt;
359 struct net_device *dev;
361 BUG_ON(!itn->fb_tunnel_dev);
362 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
364 return ERR_CAST(dev);
366 dev->mtu = ip_tunnel_bind_dev(dev);
368 nt = netdev_priv(dev);
369 ip_tunnel_add(itn, nt);
373 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
377 struct pcpu_sw_netstats *tstats;
378 const struct iphdr *iph = ip_hdr(skb);
381 #ifdef CONFIG_NET_IPGRE_BROADCAST
382 if (ipv4_is_multicast(iph->daddr)) {
383 tunnel->dev->stats.multicast++;
384 skb->pkt_type = PACKET_BROADCAST;
388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
390 tunnel->dev->stats.rx_crc_errors++;
391 tunnel->dev->stats.rx_errors++;
395 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
396 if (!(tpi->flags&TUNNEL_SEQ) ||
397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
398 tunnel->dev->stats.rx_fifo_errors++;
399 tunnel->dev->stats.rx_errors++;
402 tunnel->i_seqno = ntohl(tpi->seq) + 1;
405 skb_reset_network_header(skb);
407 err = IP_ECN_decapsulate(iph, skb);
410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411 &iph->saddr, iph->tos);
413 ++tunnel->dev->stats.rx_frame_errors;
414 ++tunnel->dev->stats.rx_errors;
419 tstats = this_cpu_ptr(tunnel->dev->tstats);
420 u64_stats_update_begin(&tstats->syncp);
421 tstats->rx_packets++;
422 tstats->rx_bytes += skb->len;
423 u64_stats_update_end(&tstats->syncp);
425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
427 if (tunnel->dev->type == ARPHRD_ETHER) {
428 skb->protocol = eth_type_trans(skb, tunnel->dev);
429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
431 skb->dev = tunnel->dev;
435 skb_dst_set(skb, (struct dst_entry *)tun_dst);
437 gro_cells_receive(&tunnel->gro_cells, skb);
444 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
446 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
449 if (num >= MAX_IPTUN_ENCAP_OPS)
452 return !cmpxchg((const struct ip_tunnel_encap_ops **)
456 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
458 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
463 if (num >= MAX_IPTUN_ENCAP_OPS)
466 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
468 ops, NULL) == ops) ? 0 : -1;
474 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
476 int ip_tunnel_encap_setup(struct ip_tunnel *t,
477 struct ip_tunnel_encap *ipencap)
481 memset(&t->encap, 0, sizeof(t->encap));
483 hlen = ip_encap_hlen(ipencap);
487 t->encap.type = ipencap->type;
488 t->encap.sport = ipencap->sport;
489 t->encap.dport = ipencap->dport;
490 t->encap.flags = ipencap->flags;
492 t->encap_hlen = hlen;
493 t->hlen = t->encap_hlen + t->tun_hlen;
497 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
499 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
500 struct rtable *rt, __be16 df,
501 const struct iphdr *inner_iph)
503 struct ip_tunnel *tunnel = netdev_priv(dev);
504 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
508 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
509 - sizeof(struct iphdr) - tunnel->hlen;
511 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
514 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
516 if (skb->protocol == htons(ETH_P_IP)) {
517 if (!skb_is_gso(skb) &&
518 (inner_iph->frag_off & htons(IP_DF)) &&
520 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
521 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
525 #if IS_ENABLED(CONFIG_IPV6)
526 else if (skb->protocol == htons(ETH_P_IPV6)) {
527 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
529 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
530 mtu >= IPV6_MIN_MTU) {
531 if ((tunnel->parms.iph.daddr &&
532 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
533 rt6->rt6i_dst.plen == 128) {
534 rt6->rt6i_flags |= RTF_MODIFIED;
535 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
539 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
541 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
549 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
551 struct ip_tunnel *tunnel = netdev_priv(dev);
552 u32 headroom = sizeof(struct iphdr);
553 struct ip_tunnel_info *tun_info;
554 const struct ip_tunnel_key *key;
555 const struct iphdr *inner_iph;
561 tun_info = skb_tunnel_info(skb);
562 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
563 ip_tunnel_info_af(tun_info) != AF_INET))
565 key = &tun_info->key;
566 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
567 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
570 if (skb->protocol == htons(ETH_P_IP))
571 tos = inner_iph->tos;
572 else if (skb->protocol == htons(ETH_P_IPV6))
573 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
575 init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
576 RT_TOS(tos), tunnel->parms.link);
577 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
579 rt = ip_route_output_key(tunnel->net, &fl4);
581 dev->stats.tx_carrier_errors++;
584 if (rt->dst.dev == dev) {
586 dev->stats.collisions++;
589 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
592 if (skb->protocol == htons(ETH_P_IP))
593 ttl = inner_iph->ttl;
594 else if (skb->protocol == htons(ETH_P_IPV6))
595 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
597 ttl = ip4_dst_hoplimit(&rt->dst);
599 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
601 else if (skb->protocol == htons(ETH_P_IP))
602 df = inner_iph->frag_off & htons(IP_DF);
603 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
604 if (headroom > dev->needed_headroom)
605 dev->needed_headroom = headroom;
607 if (skb_cow_head(skb, dev->needed_headroom)) {
611 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
612 df, !net_eq(tunnel->net, dev_net(dev)));
615 dev->stats.tx_errors++;
618 dev->stats.tx_dropped++;
622 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
624 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
625 const struct iphdr *tnl_params, u8 protocol)
627 struct ip_tunnel *tunnel = netdev_priv(dev);
628 unsigned int inner_nhdr_len = 0;
629 const struct iphdr *inner_iph;
633 struct rtable *rt; /* Route to the other host */
634 unsigned int max_headroom; /* The extra header space needed */
638 /* ensure we can access the inner net header, for several users below */
639 if (skb->protocol == htons(ETH_P_IP))
640 inner_nhdr_len = sizeof(struct iphdr);
641 else if (skb->protocol == htons(ETH_P_IPV6))
642 inner_nhdr_len = sizeof(struct ipv6hdr);
643 if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
646 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
647 connected = (tunnel->parms.iph.daddr != 0);
649 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
651 dst = tnl_params->daddr;
654 struct ip_tunnel_info *tun_info;
657 dev->stats.tx_fifo_errors++;
661 tun_info = skb_tunnel_info(skb);
662 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
663 ip_tunnel_info_af(tun_info) == AF_INET &&
664 tun_info->key.u.ipv4.dst)
665 dst = tun_info->key.u.ipv4.dst;
666 else if (skb->protocol == htons(ETH_P_IP)) {
667 rt = skb_rtable(skb);
668 dst = rt_nexthop(rt, inner_iph->daddr);
670 #if IS_ENABLED(CONFIG_IPV6)
671 else if (skb->protocol == htons(ETH_P_IPV6)) {
672 const struct in6_addr *addr6;
673 struct neighbour *neigh;
674 bool do_tx_error_icmp;
677 neigh = dst_neigh_lookup(skb_dst(skb),
678 &ipv6_hdr(skb)->daddr);
682 addr6 = (const struct in6_addr *)&neigh->primary_key;
683 addr_type = ipv6_addr_type(addr6);
685 if (addr_type == IPV6_ADDR_ANY) {
686 addr6 = &ipv6_hdr(skb)->daddr;
687 addr_type = ipv6_addr_type(addr6);
690 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
691 do_tx_error_icmp = true;
693 do_tx_error_icmp = false;
694 dst = addr6->s6_addr32[3];
696 neigh_release(neigh);
697 if (do_tx_error_icmp)
707 tos = tnl_params->tos;
710 if (skb->protocol == htons(ETH_P_IP)) {
711 tos = inner_iph->tos;
713 } else if (skb->protocol == htons(ETH_P_IPV6)) {
714 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
719 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
720 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
722 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
725 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
729 rt = ip_route_output_key(tunnel->net, &fl4);
732 dev->stats.tx_carrier_errors++;
736 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
740 if (rt->dst.dev == dev) {
742 dev->stats.collisions++;
746 df = tnl_params->frag_off;
747 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
748 df |= (inner_iph->frag_off & htons(IP_DF));
750 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph)) {
755 if (tunnel->err_count > 0) {
756 if (time_before(jiffies,
757 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
760 dst_link_failure(skb);
762 tunnel->err_count = 0;
765 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
766 ttl = tnl_params->ttl;
768 if (skb->protocol == htons(ETH_P_IP))
769 ttl = inner_iph->ttl;
770 #if IS_ENABLED(CONFIG_IPV6)
771 else if (skb->protocol == htons(ETH_P_IPV6))
772 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
775 ttl = ip4_dst_hoplimit(&rt->dst);
778 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
779 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
780 if (max_headroom > dev->needed_headroom)
781 dev->needed_headroom = max_headroom;
783 if (skb_cow_head(skb, dev->needed_headroom)) {
785 dev->stats.tx_dropped++;
790 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
791 df, !net_eq(tunnel->net, dev_net(dev)));
794 #if IS_ENABLED(CONFIG_IPV6)
796 dst_link_failure(skb);
799 dev->stats.tx_errors++;
802 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
804 static void ip_tunnel_update(struct ip_tunnel_net *itn,
806 struct net_device *dev,
807 struct ip_tunnel_parm *p,
810 ip_tunnel_del(itn, t);
811 t->parms.iph.saddr = p->iph.saddr;
812 t->parms.iph.daddr = p->iph.daddr;
813 t->parms.i_key = p->i_key;
814 t->parms.o_key = p->o_key;
815 if (dev->type != ARPHRD_ETHER) {
816 memcpy(dev->dev_addr, &p->iph.saddr, 4);
817 memcpy(dev->broadcast, &p->iph.daddr, 4);
819 ip_tunnel_add(itn, t);
821 t->parms.iph.ttl = p->iph.ttl;
822 t->parms.iph.tos = p->iph.tos;
823 t->parms.iph.frag_off = p->iph.frag_off;
825 if (t->parms.link != p->link) {
828 t->parms.link = p->link;
829 mtu = ip_tunnel_bind_dev(dev);
833 dst_cache_reset(&t->dst_cache);
834 netdev_state_change(dev);
837 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
840 struct ip_tunnel *t = netdev_priv(dev);
841 struct net *net = t->net;
842 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
844 BUG_ON(!itn->fb_tunnel_dev);
847 if (dev == itn->fb_tunnel_dev) {
848 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
850 t = netdev_priv(dev);
852 memcpy(p, &t->parms, sizeof(*p));
858 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
861 p->iph.frag_off |= htons(IP_DF);
862 if (!(p->i_flags & VTI_ISVTI)) {
863 if (!(p->i_flags & TUNNEL_KEY))
865 if (!(p->o_flags & TUNNEL_KEY))
869 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
871 if (cmd == SIOCADDTUNNEL) {
873 t = ip_tunnel_create(net, itn, p);
874 err = PTR_ERR_OR_ZERO(t);
881 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
888 unsigned int nflags = 0;
890 if (ipv4_is_multicast(p->iph.daddr))
891 nflags = IFF_BROADCAST;
892 else if (p->iph.daddr)
893 nflags = IFF_POINTOPOINT;
895 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
900 t = netdev_priv(dev);
906 ip_tunnel_update(itn, t, dev, p, true);
914 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
917 if (dev == itn->fb_tunnel_dev) {
919 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
923 if (t == netdev_priv(itn->fb_tunnel_dev))
927 unregister_netdevice(dev);
938 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
940 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
942 struct ip_tunnel *tunnel = netdev_priv(dev);
943 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
944 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
949 if (new_mtu > max_mtu) {
959 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
961 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
963 return __ip_tunnel_change_mtu(dev, new_mtu, true);
965 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
967 static void ip_tunnel_dev_free(struct net_device *dev)
969 struct ip_tunnel *tunnel = netdev_priv(dev);
971 gro_cells_destroy(&tunnel->gro_cells);
972 dst_cache_destroy(&tunnel->dst_cache);
973 free_percpu(dev->tstats);
977 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
979 struct ip_tunnel *tunnel = netdev_priv(dev);
980 struct ip_tunnel_net *itn;
982 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
984 if (itn->fb_tunnel_dev != dev) {
985 ip_tunnel_del(itn, netdev_priv(dev));
986 unregister_netdevice_queue(dev, head);
989 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
991 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
993 struct ip_tunnel *tunnel = netdev_priv(dev);
997 EXPORT_SYMBOL(ip_tunnel_get_link_net);
999 int ip_tunnel_get_iflink(const struct net_device *dev)
1001 struct ip_tunnel *tunnel = netdev_priv(dev);
1003 return tunnel->parms.link;
1005 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1007 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
1008 struct rtnl_link_ops *ops, char *devname)
1010 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1011 struct ip_tunnel_parm parms;
1014 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1015 INIT_HLIST_HEAD(&itn->tunnels[i]);
1018 itn->fb_tunnel_dev = NULL;
1022 memset(&parms, 0, sizeof(parms));
1024 strlcpy(parms.name, devname, IFNAMSIZ);
1027 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1028 /* FB netdevice is special: we have one, and only one per netns.
1029 * Allowing to move it to another netns is clearly unsafe.
1031 if (!IS_ERR(itn->fb_tunnel_dev)) {
1032 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1033 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1034 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1038 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1040 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1042 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1043 struct rtnl_link_ops *ops)
1045 struct net *net = dev_net(itn->fb_tunnel_dev);
1046 struct net_device *dev, *aux;
1049 for_each_netdev_safe(net, dev, aux)
1050 if (dev->rtnl_link_ops == ops)
1051 unregister_netdevice_queue(dev, head);
1053 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1054 struct ip_tunnel *t;
1055 struct hlist_node *n;
1056 struct hlist_head *thead = &itn->tunnels[h];
1058 hlist_for_each_entry_safe(t, n, thead, hash_node)
1059 /* If dev is in the same netns, it has already
1060 * been added to the list by the previous loop.
1062 if (!net_eq(dev_net(t->dev), net))
1063 unregister_netdevice_queue(t->dev, head);
1067 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1072 ip_tunnel_destroy(itn, &list, ops);
1073 unregister_netdevice_many(&list);
1076 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1078 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1079 struct ip_tunnel_parm *p)
1081 struct ip_tunnel *nt;
1082 struct net *net = dev_net(dev);
1083 struct ip_tunnel_net *itn;
1087 nt = netdev_priv(dev);
1088 itn = net_generic(net, nt->ip_tnl_net_id);
1090 if (nt->collect_md) {
1091 if (rtnl_dereference(itn->collect_md_tun))
1094 if (ip_tunnel_find(itn, p, dev->type))
1100 err = register_netdevice(dev);
1104 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1105 eth_hw_addr_random(dev);
1107 mtu = ip_tunnel_bind_dev(dev);
1111 ip_tunnel_add(itn, nt);
1115 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1117 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1118 struct ip_tunnel_parm *p)
1120 struct ip_tunnel *t;
1121 struct ip_tunnel *tunnel = netdev_priv(dev);
1122 struct net *net = tunnel->net;
1123 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1125 if (dev == itn->fb_tunnel_dev)
1128 t = ip_tunnel_find(itn, p, dev->type);
1136 if (dev->type != ARPHRD_ETHER) {
1137 unsigned int nflags = 0;
1139 if (ipv4_is_multicast(p->iph.daddr))
1140 nflags = IFF_BROADCAST;
1141 else if (p->iph.daddr)
1142 nflags = IFF_POINTOPOINT;
1144 if ((dev->flags ^ nflags) &
1145 (IFF_POINTOPOINT | IFF_BROADCAST))
1150 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1153 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1155 int ip_tunnel_init(struct net_device *dev)
1157 struct ip_tunnel *tunnel = netdev_priv(dev);
1158 struct iphdr *iph = &tunnel->parms.iph;
1161 dev->destructor = ip_tunnel_dev_free;
1162 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1166 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1168 free_percpu(dev->tstats);
1172 err = gro_cells_init(&tunnel->gro_cells, dev);
1174 dst_cache_destroy(&tunnel->dst_cache);
1175 free_percpu(dev->tstats);
1180 tunnel->net = dev_net(dev);
1181 strcpy(tunnel->parms.name, dev->name);
1185 if (tunnel->collect_md)
1186 netif_keep_dst(dev);
1189 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1191 void ip_tunnel_uninit(struct net_device *dev)
1193 struct ip_tunnel *tunnel = netdev_priv(dev);
1194 struct net *net = tunnel->net;
1195 struct ip_tunnel_net *itn;
1197 itn = net_generic(net, tunnel->ip_tnl_net_id);
1198 ip_tunnel_del(itn, netdev_priv(dev));
1199 if (itn->fb_tunnel_dev == dev)
1200 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1202 dst_cache_reset(&tunnel->dst_cache);
1204 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1206 /* Do least required initialization, rest of init is done in tunnel_init call */
1207 void ip_tunnel_setup(struct net_device *dev, int net_id)
1209 struct ip_tunnel *tunnel = netdev_priv(dev);
1210 tunnel->ip_tnl_net_id = net_id;
1212 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1214 MODULE_LICENSE("GPL");