2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
58 #include <net/dst_metadata.h>
60 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 return hash_32((__force u32)key ^ (__force u32)remote,
72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
73 __be16 flags, __be32 key)
75 if (p->i_flags & TUNNEL_KEY) {
76 if (flags & TUNNEL_KEY)
77 return key == p->i_key;
79 /* key expected, none present */
82 return !(flags & TUNNEL_KEY);
85 /* Fallback tunnel: no source, no destination, no key, no options
88 We require exact key match i.e. if a key is present in packet
89 it will match only tunnel with the same key; if it is not present,
90 it will match only keyless tunnel.
92 All keysless packets, if not matched configured keyless tunnels
93 will match fallback tunnel.
94 Given src, dst and key, find appropriate for input tunnel.
96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
97 int link, __be16 flags,
98 __be32 remote, __be32 local,
101 struct ip_tunnel *t, *cand = NULL;
102 struct hlist_head *head;
103 struct net_device *ndev;
106 hash = ip_tunnel_hash(key, remote);
107 head = &itn->tunnels[hash];
109 hlist_for_each_entry_rcu(t, head, hash_node) {
110 if (local != t->parms.iph.saddr ||
111 remote != t->parms.iph.daddr ||
112 !(t->dev->flags & IFF_UP))
115 if (!ip_tunnel_key_match(&t->parms, flags, key))
118 if (t->parms.link == link)
124 hlist_for_each_entry_rcu(t, head, hash_node) {
125 if (remote != t->parms.iph.daddr ||
126 t->parms.iph.saddr != 0 ||
127 !(t->dev->flags & IFF_UP))
130 if (!ip_tunnel_key_match(&t->parms, flags, key))
133 if (t->parms.link == link)
139 hash = ip_tunnel_hash(key, 0);
140 head = &itn->tunnels[hash];
142 hlist_for_each_entry_rcu(t, head, hash_node) {
143 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
144 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
147 if (!(t->dev->flags & IFF_UP))
150 if (!ip_tunnel_key_match(&t->parms, flags, key))
153 if (t->parms.link == link)
159 hlist_for_each_entry_rcu(t, head, hash_node) {
160 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
161 t->parms.iph.saddr != 0 ||
162 t->parms.iph.daddr != 0 ||
163 !(t->dev->flags & IFF_UP))
166 if (t->parms.link == link)
175 t = rcu_dereference(itn->collect_md_tun);
176 if (t && t->dev->flags & IFF_UP)
179 ndev = READ_ONCE(itn->fb_tunnel_dev);
180 if (ndev && ndev->flags & IFF_UP)
181 return netdev_priv(ndev);
185 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
188 struct ip_tunnel_parm *parms)
192 __be32 i_key = parms->i_key;
194 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
195 remote = parms->iph.daddr;
199 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
202 h = ip_tunnel_hash(i_key, remote);
203 return &itn->tunnels[h];
206 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208 struct hlist_head *head = ip_bucket(itn, &t->parms);
211 rcu_assign_pointer(itn->collect_md_tun, t);
212 hlist_add_head_rcu(&t->hash_node, head);
215 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
218 rcu_assign_pointer(itn->collect_md_tun, NULL);
219 hlist_del_init_rcu(&t->hash_node);
222 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
223 struct ip_tunnel_parm *parms,
226 __be32 remote = parms->iph.daddr;
227 __be32 local = parms->iph.saddr;
228 __be32 key = parms->i_key;
229 __be16 flags = parms->i_flags;
230 int link = parms->link;
231 struct ip_tunnel *t = NULL;
232 struct hlist_head *head = ip_bucket(itn, parms);
234 hlist_for_each_entry_rcu(t, head, hash_node) {
235 if (local == t->parms.iph.saddr &&
236 remote == t->parms.iph.daddr &&
237 link == t->parms.link &&
238 type == t->dev->type &&
239 ip_tunnel_key_match(&t->parms, flags, key))
245 static struct net_device *__ip_tunnel_create(struct net *net,
246 const struct rtnl_link_ops *ops,
247 struct ip_tunnel_parm *parms)
250 struct ip_tunnel *tunnel;
251 struct net_device *dev;
255 if (parms->name[0]) {
256 if (!dev_valid_name(parms->name))
258 strlcpy(name, parms->name, IFNAMSIZ);
260 if (strlen(ops->kind) > (IFNAMSIZ - 3))
262 strcpy(name, ops->kind);
267 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
272 dev_net_set(dev, net);
274 dev->rtnl_link_ops = ops;
276 tunnel = netdev_priv(dev);
277 tunnel->parms = *parms;
280 err = register_netdevice(dev);
292 static int ip_tunnel_bind_dev(struct net_device *dev)
294 struct net_device *tdev = NULL;
295 struct ip_tunnel *tunnel = netdev_priv(dev);
296 const struct iphdr *iph;
297 int hlen = LL_MAX_HEADER;
298 int mtu = ETH_DATA_LEN;
299 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
301 iph = &tunnel->parms.iph;
303 /* Guess output device to choose reasonable mtu and needed_headroom */
308 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
309 iph->saddr, tunnel->parms.o_key,
310 RT_TOS(iph->tos), tunnel->parms.link,
312 rt = ip_route_output_key(tunnel->net, &fl4);
318 if (dev->type != ARPHRD_ETHER)
319 dev->flags |= IFF_POINTOPOINT;
321 dst_cache_reset(&tunnel->dst_cache);
324 if (!tdev && tunnel->parms.link)
325 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
328 hlen = tdev->hard_header_len + tdev->needed_headroom;
329 mtu = min(tdev->mtu, IP_MAX_MTU);
332 dev->needed_headroom = t_hlen + hlen;
333 mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
335 if (mtu < IPV4_MIN_MTU)
341 static struct ip_tunnel *ip_tunnel_create(struct net *net,
342 struct ip_tunnel_net *itn,
343 struct ip_tunnel_parm *parms)
345 struct ip_tunnel *nt;
346 struct net_device *dev;
351 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
353 return ERR_CAST(dev);
355 mtu = ip_tunnel_bind_dev(dev);
356 err = dev_set_mtu(dev, mtu);
358 goto err_dev_set_mtu;
360 nt = netdev_priv(dev);
361 t_hlen = nt->hlen + sizeof(struct iphdr);
362 dev->min_mtu = ETH_MIN_MTU;
363 dev->max_mtu = IP_MAX_MTU - t_hlen;
364 if (dev->type == ARPHRD_ETHER)
365 dev->max_mtu -= dev->hard_header_len;
367 ip_tunnel_add(itn, nt);
371 unregister_netdevice(dev);
375 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
376 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
379 struct pcpu_sw_netstats *tstats;
380 const struct iphdr *iph = ip_hdr(skb);
383 #ifdef CONFIG_NET_IPGRE_BROADCAST
384 if (ipv4_is_multicast(iph->daddr)) {
385 tunnel->dev->stats.multicast++;
386 skb->pkt_type = PACKET_BROADCAST;
390 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
391 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
392 tunnel->dev->stats.rx_crc_errors++;
393 tunnel->dev->stats.rx_errors++;
397 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
398 if (!(tpi->flags&TUNNEL_SEQ) ||
399 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
400 tunnel->dev->stats.rx_fifo_errors++;
401 tunnel->dev->stats.rx_errors++;
404 tunnel->i_seqno = ntohl(tpi->seq) + 1;
407 skb_reset_network_header(skb);
409 err = IP_ECN_decapsulate(iph, skb);
412 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
413 &iph->saddr, iph->tos);
415 ++tunnel->dev->stats.rx_frame_errors;
416 ++tunnel->dev->stats.rx_errors;
421 tstats = this_cpu_ptr(tunnel->dev->tstats);
422 u64_stats_update_begin(&tstats->syncp);
423 tstats->rx_packets++;
424 tstats->rx_bytes += skb->len;
425 u64_stats_update_end(&tstats->syncp);
427 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
429 if (tunnel->dev->type == ARPHRD_ETHER) {
430 skb->protocol = eth_type_trans(skb, tunnel->dev);
431 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
433 skb->dev = tunnel->dev;
437 skb_dst_set(skb, (struct dst_entry *)tun_dst);
439 gro_cells_receive(&tunnel->gro_cells, skb);
444 dst_release((struct dst_entry *)tun_dst);
448 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
450 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
453 if (num >= MAX_IPTUN_ENCAP_OPS)
456 return !cmpxchg((const struct ip_tunnel_encap_ops **)
460 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
462 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
467 if (num >= MAX_IPTUN_ENCAP_OPS)
470 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
472 ops, NULL) == ops) ? 0 : -1;
478 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
480 int ip_tunnel_encap_setup(struct ip_tunnel *t,
481 struct ip_tunnel_encap *ipencap)
485 memset(&t->encap, 0, sizeof(t->encap));
487 hlen = ip_encap_hlen(ipencap);
491 t->encap.type = ipencap->type;
492 t->encap.sport = ipencap->sport;
493 t->encap.dport = ipencap->dport;
494 t->encap.flags = ipencap->flags;
496 t->encap_hlen = hlen;
497 t->hlen = t->encap_hlen + t->tun_hlen;
501 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
503 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
504 struct rtable *rt, __be16 df,
505 const struct iphdr *inner_iph)
507 struct ip_tunnel *tunnel = netdev_priv(dev);
511 pkt_size = skb->len - tunnel->hlen;
512 pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
515 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel->hlen);
516 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
518 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
521 skb_dst_update_pmtu_no_confirm(skb, mtu);
523 if (skb->protocol == htons(ETH_P_IP)) {
524 if (!skb_is_gso(skb) &&
525 (inner_iph->frag_off & htons(IP_DF)) &&
527 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
528 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
532 #if IS_ENABLED(CONFIG_IPV6)
533 else if (skb->protocol == htons(ETH_P_IPV6)) {
534 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
536 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
537 mtu >= IPV6_MIN_MTU) {
538 if ((tunnel->parms.iph.daddr &&
539 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
540 rt6->rt6i_dst.plen == 128) {
541 rt6->rt6i_flags |= RTF_MODIFIED;
542 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
546 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
548 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
556 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
558 struct ip_tunnel *tunnel = netdev_priv(dev);
559 u32 headroom = sizeof(struct iphdr);
560 struct ip_tunnel_info *tun_info;
561 const struct ip_tunnel_key *key;
562 const struct iphdr *inner_iph;
568 tun_info = skb_tunnel_info(skb);
569 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
570 ip_tunnel_info_af(tun_info) != AF_INET))
572 key = &tun_info->key;
573 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
574 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
577 if (skb->protocol == htons(ETH_P_IP))
578 tos = inner_iph->tos;
579 else if (skb->protocol == htons(ETH_P_IPV6))
580 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
582 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
583 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
585 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
587 rt = ip_route_output_key(tunnel->net, &fl4);
589 dev->stats.tx_carrier_errors++;
592 if (rt->dst.dev == dev) {
594 dev->stats.collisions++;
597 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
600 if (skb->protocol == htons(ETH_P_IP))
601 ttl = inner_iph->ttl;
602 else if (skb->protocol == htons(ETH_P_IPV6))
603 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
605 ttl = ip4_dst_hoplimit(&rt->dst);
607 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
609 else if (skb->protocol == htons(ETH_P_IP))
610 df = inner_iph->frag_off & htons(IP_DF);
611 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
612 if (headroom > dev->needed_headroom)
613 dev->needed_headroom = headroom;
615 if (skb_cow_head(skb, dev->needed_headroom)) {
619 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
620 df, !net_eq(tunnel->net, dev_net(dev)));
623 dev->stats.tx_errors++;
626 dev->stats.tx_dropped++;
630 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
632 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
633 const struct iphdr *tnl_params, u8 protocol)
635 struct ip_tunnel *tunnel = netdev_priv(dev);
636 const struct iphdr *inner_iph;
640 struct rtable *rt; /* Route to the other host */
641 unsigned int max_headroom; /* The extra header space needed */
645 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
646 connected = (tunnel->parms.iph.daddr != 0);
648 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
650 dst = tnl_params->daddr;
653 struct ip_tunnel_info *tun_info;
656 dev->stats.tx_fifo_errors++;
660 tun_info = skb_tunnel_info(skb);
661 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
662 ip_tunnel_info_af(tun_info) == AF_INET &&
663 tun_info->key.u.ipv4.dst)
664 dst = tun_info->key.u.ipv4.dst;
665 else if (skb->protocol == htons(ETH_P_IP)) {
666 rt = skb_rtable(skb);
667 dst = rt_nexthop(rt, inner_iph->daddr);
669 #if IS_ENABLED(CONFIG_IPV6)
670 else if (skb->protocol == htons(ETH_P_IPV6)) {
671 const struct in6_addr *addr6;
672 struct neighbour *neigh;
673 bool do_tx_error_icmp;
676 neigh = dst_neigh_lookup(skb_dst(skb),
677 &ipv6_hdr(skb)->daddr);
681 addr6 = (const struct in6_addr *)&neigh->primary_key;
682 addr_type = ipv6_addr_type(addr6);
684 if (addr_type == IPV6_ADDR_ANY) {
685 addr6 = &ipv6_hdr(skb)->daddr;
686 addr_type = ipv6_addr_type(addr6);
689 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
690 do_tx_error_icmp = true;
692 do_tx_error_icmp = false;
693 dst = addr6->s6_addr32[3];
695 neigh_release(neigh);
696 if (do_tx_error_icmp)
706 tos = tnl_params->tos;
709 if (skb->protocol == htons(ETH_P_IP)) {
710 tos = inner_iph->tos;
712 } else if (skb->protocol == htons(ETH_P_IPV6)) {
713 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
718 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
719 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
722 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
725 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
729 rt = ip_route_output_key(tunnel->net, &fl4);
732 dev->stats.tx_carrier_errors++;
736 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
740 if (rt->dst.dev == dev) {
742 dev->stats.collisions++;
746 df = tnl_params->frag_off;
747 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
748 df |= (inner_iph->frag_off & htons(IP_DF));
750 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph)) {
755 if (tunnel->err_count > 0) {
756 if (time_before(jiffies,
757 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
760 dst_link_failure(skb);
762 tunnel->err_count = 0;
765 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
766 ttl = tnl_params->ttl;
768 if (skb->protocol == htons(ETH_P_IP))
769 ttl = inner_iph->ttl;
770 #if IS_ENABLED(CONFIG_IPV6)
771 else if (skb->protocol == htons(ETH_P_IPV6))
772 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
775 ttl = ip4_dst_hoplimit(&rt->dst);
778 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
779 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
780 if (max_headroom > dev->needed_headroom)
781 dev->needed_headroom = max_headroom;
783 if (skb_cow_head(skb, dev->needed_headroom)) {
785 dev->stats.tx_dropped++;
790 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
791 df, !net_eq(tunnel->net, dev_net(dev)));
794 #if IS_ENABLED(CONFIG_IPV6)
796 dst_link_failure(skb);
799 dev->stats.tx_errors++;
802 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
804 static void ip_tunnel_update(struct ip_tunnel_net *itn,
806 struct net_device *dev,
807 struct ip_tunnel_parm *p,
811 ip_tunnel_del(itn, t);
812 t->parms.iph.saddr = p->iph.saddr;
813 t->parms.iph.daddr = p->iph.daddr;
814 t->parms.i_key = p->i_key;
815 t->parms.o_key = p->o_key;
816 if (dev->type != ARPHRD_ETHER) {
817 memcpy(dev->dev_addr, &p->iph.saddr, 4);
818 memcpy(dev->broadcast, &p->iph.daddr, 4);
820 ip_tunnel_add(itn, t);
822 t->parms.iph.ttl = p->iph.ttl;
823 t->parms.iph.tos = p->iph.tos;
824 t->parms.iph.frag_off = p->iph.frag_off;
826 if (t->parms.link != p->link || t->fwmark != fwmark) {
829 t->parms.link = p->link;
831 mtu = ip_tunnel_bind_dev(dev);
835 dst_cache_reset(&t->dst_cache);
836 netdev_state_change(dev);
839 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
842 struct ip_tunnel *t = netdev_priv(dev);
843 struct net *net = t->net;
844 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
848 if (dev == itn->fb_tunnel_dev) {
849 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
851 t = netdev_priv(dev);
853 memcpy(p, &t->parms, sizeof(*p));
859 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
862 p->iph.frag_off |= htons(IP_DF);
863 if (!(p->i_flags & VTI_ISVTI)) {
864 if (!(p->i_flags & TUNNEL_KEY))
866 if (!(p->o_flags & TUNNEL_KEY))
870 t = ip_tunnel_find(itn, p, itn->type);
872 if (cmd == SIOCADDTUNNEL) {
874 t = ip_tunnel_create(net, itn, p);
875 err = PTR_ERR_OR_ZERO(t);
882 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
889 unsigned int nflags = 0;
891 if (ipv4_is_multicast(p->iph.daddr))
892 nflags = IFF_BROADCAST;
893 else if (p->iph.daddr)
894 nflags = IFF_POINTOPOINT;
896 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
901 t = netdev_priv(dev);
907 ip_tunnel_update(itn, t, dev, p, true, 0);
915 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
918 if (dev == itn->fb_tunnel_dev) {
920 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
924 if (t == netdev_priv(itn->fb_tunnel_dev))
928 unregister_netdevice(dev);
939 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
941 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
943 struct ip_tunnel *tunnel = netdev_priv(dev);
944 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
945 int max_mtu = IP_MAX_MTU - t_hlen;
947 if (dev->type == ARPHRD_ETHER)
948 max_mtu -= dev->hard_header_len;
950 if (new_mtu < ETH_MIN_MTU)
953 if (new_mtu > max_mtu) {
963 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
965 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
967 return __ip_tunnel_change_mtu(dev, new_mtu, true);
969 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
971 static void ip_tunnel_dev_free(struct net_device *dev)
973 struct ip_tunnel *tunnel = netdev_priv(dev);
975 gro_cells_destroy(&tunnel->gro_cells);
976 dst_cache_destroy(&tunnel->dst_cache);
977 free_percpu(dev->tstats);
980 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
982 struct ip_tunnel *tunnel = netdev_priv(dev);
983 struct ip_tunnel_net *itn;
985 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
987 if (itn->fb_tunnel_dev != dev) {
988 ip_tunnel_del(itn, netdev_priv(dev));
989 unregister_netdevice_queue(dev, head);
992 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
994 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
996 struct ip_tunnel *tunnel = netdev_priv(dev);
1000 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1002 int ip_tunnel_get_iflink(const struct net_device *dev)
1004 struct ip_tunnel *tunnel = netdev_priv(dev);
1006 return tunnel->parms.link;
1008 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1010 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1011 struct rtnl_link_ops *ops, char *devname)
1013 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1014 struct ip_tunnel_parm parms;
1017 itn->rtnl_link_ops = ops;
1018 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1019 INIT_HLIST_HEAD(&itn->tunnels[i]);
1021 if (!ops || !net_has_fallback_tunnels(net)) {
1022 struct ip_tunnel_net *it_init_net;
1024 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1025 itn->type = it_init_net->type;
1026 itn->fb_tunnel_dev = NULL;
1030 memset(&parms, 0, sizeof(parms));
1032 strlcpy(parms.name, devname, IFNAMSIZ);
1035 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1036 /* FB netdevice is special: we have one, and only one per netns.
1037 * Allowing to move it to another netns is clearly unsafe.
1039 if (!IS_ERR(itn->fb_tunnel_dev)) {
1040 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1041 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1042 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1043 itn->type = itn->fb_tunnel_dev->type;
1047 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1049 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1051 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1052 struct list_head *head,
1053 struct rtnl_link_ops *ops)
1055 struct net_device *dev, *aux;
1058 for_each_netdev_safe(net, dev, aux)
1059 if (dev->rtnl_link_ops == ops)
1060 unregister_netdevice_queue(dev, head);
1062 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1063 struct ip_tunnel *t;
1064 struct hlist_node *n;
1065 struct hlist_head *thead = &itn->tunnels[h];
1067 hlist_for_each_entry_safe(t, n, thead, hash_node)
1068 /* If dev is in the same netns, it has already
1069 * been added to the list by the previous loop.
1071 if (!net_eq(dev_net(t->dev), net))
1072 unregister_netdevice_queue(t->dev, head);
1076 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1077 struct rtnl_link_ops *ops)
1079 struct ip_tunnel_net *itn;
1084 list_for_each_entry(net, net_list, exit_list) {
1085 itn = net_generic(net, id);
1086 ip_tunnel_destroy(net, itn, &list, ops);
1088 unregister_netdevice_many(&list);
1091 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1093 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1094 struct ip_tunnel_parm *p, __u32 fwmark)
1096 struct ip_tunnel *nt;
1097 struct net *net = dev_net(dev);
1098 struct ip_tunnel_net *itn;
1102 nt = netdev_priv(dev);
1103 itn = net_generic(net, nt->ip_tnl_net_id);
1105 if (nt->collect_md) {
1106 if (rtnl_dereference(itn->collect_md_tun))
1109 if (ip_tunnel_find(itn, p, dev->type))
1115 nt->fwmark = fwmark;
1116 err = register_netdevice(dev);
1118 goto err_register_netdevice;
1120 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1121 eth_hw_addr_random(dev);
1123 mtu = ip_tunnel_bind_dev(dev);
1125 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1127 if (dev->type == ARPHRD_ETHER)
1128 max -= dev->hard_header_len;
1130 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1133 err = dev_set_mtu(dev, mtu);
1135 goto err_dev_set_mtu;
1137 ip_tunnel_add(itn, nt);
1141 unregister_netdevice(dev);
1142 err_register_netdevice:
1145 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1147 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1148 struct ip_tunnel_parm *p, __u32 fwmark)
1150 struct ip_tunnel *t;
1151 struct ip_tunnel *tunnel = netdev_priv(dev);
1152 struct net *net = tunnel->net;
1153 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1155 if (dev == itn->fb_tunnel_dev)
1158 t = ip_tunnel_find(itn, p, dev->type);
1166 if (dev->type != ARPHRD_ETHER) {
1167 unsigned int nflags = 0;
1169 if (ipv4_is_multicast(p->iph.daddr))
1170 nflags = IFF_BROADCAST;
1171 else if (p->iph.daddr)
1172 nflags = IFF_POINTOPOINT;
1174 if ((dev->flags ^ nflags) &
1175 (IFF_POINTOPOINT | IFF_BROADCAST))
1180 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1183 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1185 int ip_tunnel_init(struct net_device *dev)
1187 struct ip_tunnel *tunnel = netdev_priv(dev);
1188 struct iphdr *iph = &tunnel->parms.iph;
1191 dev->needs_free_netdev = true;
1192 dev->priv_destructor = ip_tunnel_dev_free;
1193 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1197 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1199 free_percpu(dev->tstats);
1203 err = gro_cells_init(&tunnel->gro_cells, dev);
1205 dst_cache_destroy(&tunnel->dst_cache);
1206 free_percpu(dev->tstats);
1211 tunnel->net = dev_net(dev);
1212 strcpy(tunnel->parms.name, dev->name);
1216 if (tunnel->collect_md)
1217 netif_keep_dst(dev);
1220 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1222 void ip_tunnel_uninit(struct net_device *dev)
1224 struct ip_tunnel *tunnel = netdev_priv(dev);
1225 struct net *net = tunnel->net;
1226 struct ip_tunnel_net *itn;
1228 itn = net_generic(net, tunnel->ip_tnl_net_id);
1229 ip_tunnel_del(itn, netdev_priv(dev));
1230 if (itn->fb_tunnel_dev == dev)
1231 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1233 dst_cache_reset(&tunnel->dst_cache);
1235 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1237 /* Do least required initialization, rest of init is done in tunnel_init call */
1238 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1240 struct ip_tunnel *tunnel = netdev_priv(dev);
1241 tunnel->ip_tnl_net_id = net_id;
1243 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1245 MODULE_LICENSE("GPL");