Mention branches and keyring.
[releases.git] / ipv4 / ip_tunnel.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55         return hash_32((__force u32)key ^ (__force u32)remote,
56                          IP_TNL_HASH_BITS);
57 }
58
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60                                 __be16 flags, __be32 key)
61 {
62         if (p->i_flags & TUNNEL_KEY) {
63                 if (flags & TUNNEL_KEY)
64                         return key == p->i_key;
65                 else
66                         /* key expected, none present */
67                         return false;
68         } else
69                 return !(flags & TUNNEL_KEY);
70 }
71
72 /* Fallback tunnel: no source, no destination, no key, no options
73
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84                                    int link, __be16 flags,
85                                    __be32 remote, __be32 local,
86                                    __be32 key)
87 {
88         struct ip_tunnel *t, *cand = NULL;
89         struct hlist_head *head;
90         struct net_device *ndev;
91         unsigned int hash;
92
93         hash = ip_tunnel_hash(key, remote);
94         head = &itn->tunnels[hash];
95
96         hlist_for_each_entry_rcu(t, head, hash_node) {
97                 if (local != t->parms.iph.saddr ||
98                     remote != t->parms.iph.daddr ||
99                     !(t->dev->flags & IFF_UP))
100                         continue;
101
102                 if (!ip_tunnel_key_match(&t->parms, flags, key))
103                         continue;
104
105                 if (t->parms.link == link)
106                         return t;
107                 else
108                         cand = t;
109         }
110
111         hlist_for_each_entry_rcu(t, head, hash_node) {
112                 if (remote != t->parms.iph.daddr ||
113                     t->parms.iph.saddr != 0 ||
114                     !(t->dev->flags & IFF_UP))
115                         continue;
116
117                 if (!ip_tunnel_key_match(&t->parms, flags, key))
118                         continue;
119
120                 if (t->parms.link == link)
121                         return t;
122                 else if (!cand)
123                         cand = t;
124         }
125
126         hash = ip_tunnel_hash(key, 0);
127         head = &itn->tunnels[hash];
128
129         hlist_for_each_entry_rcu(t, head, hash_node) {
130                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132                         continue;
133
134                 if (!(t->dev->flags & IFF_UP))
135                         continue;
136
137                 if (!ip_tunnel_key_match(&t->parms, flags, key))
138                         continue;
139
140                 if (t->parms.link == link)
141                         return t;
142                 else if (!cand)
143                         cand = t;
144         }
145
146         hlist_for_each_entry_rcu(t, head, hash_node) {
147                 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148                     t->parms.iph.saddr != 0 ||
149                     t->parms.iph.daddr != 0 ||
150                     !(t->dev->flags & IFF_UP))
151                         continue;
152
153                 if (t->parms.link == link)
154                         return t;
155                 else if (!cand)
156                         cand = t;
157         }
158
159         if (cand)
160                 return cand;
161
162         t = rcu_dereference(itn->collect_md_tun);
163         if (t && t->dev->flags & IFF_UP)
164                 return t;
165
166         ndev = READ_ONCE(itn->fb_tunnel_dev);
167         if (ndev && ndev->flags & IFF_UP)
168                 return netdev_priv(ndev);
169
170         return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173
174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175                                     struct ip_tunnel_parm *parms)
176 {
177         unsigned int h;
178         __be32 remote;
179         __be32 i_key = parms->i_key;
180
181         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182                 remote = parms->iph.daddr;
183         else
184                 remote = 0;
185
186         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187                 i_key = 0;
188
189         h = ip_tunnel_hash(i_key, remote);
190         return &itn->tunnels[h];
191 }
192
193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195         struct hlist_head *head = ip_bucket(itn, &t->parms);
196
197         if (t->collect_md)
198                 rcu_assign_pointer(itn->collect_md_tun, t);
199         hlist_add_head_rcu(&t->hash_node, head);
200 }
201
202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204         if (t->collect_md)
205                 rcu_assign_pointer(itn->collect_md_tun, NULL);
206         hlist_del_init_rcu(&t->hash_node);
207 }
208
209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210                                         struct ip_tunnel_parm *parms,
211                                         int type)
212 {
213         __be32 remote = parms->iph.daddr;
214         __be32 local = parms->iph.saddr;
215         __be32 key = parms->i_key;
216         __be16 flags = parms->i_flags;
217         int link = parms->link;
218         struct ip_tunnel *t = NULL;
219         struct hlist_head *head = ip_bucket(itn, parms);
220
221         hlist_for_each_entry_rcu(t, head, hash_node) {
222                 if (local == t->parms.iph.saddr &&
223                     remote == t->parms.iph.daddr &&
224                     link == t->parms.link &&
225                     type == t->dev->type &&
226                     ip_tunnel_key_match(&t->parms, flags, key))
227                         break;
228         }
229         return t;
230 }
231
232 static struct net_device *__ip_tunnel_create(struct net *net,
233                                              const struct rtnl_link_ops *ops,
234                                              struct ip_tunnel_parm *parms)
235 {
236         int err;
237         struct ip_tunnel *tunnel;
238         struct net_device *dev;
239         char name[IFNAMSIZ];
240
241         err = -E2BIG;
242         if (parms->name[0]) {
243                 if (!dev_valid_name(parms->name))
244                         goto failed;
245                 strscpy(name, parms->name, IFNAMSIZ);
246         } else {
247                 if (strlen(ops->kind) > (IFNAMSIZ - 3))
248                         goto failed;
249                 strcpy(name, ops->kind);
250                 strcat(name, "%d");
251         }
252
253         ASSERT_RTNL();
254         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255         if (!dev) {
256                 err = -ENOMEM;
257                 goto failed;
258         }
259         dev_net_set(dev, net);
260
261         dev->rtnl_link_ops = ops;
262
263         tunnel = netdev_priv(dev);
264         tunnel->parms = *parms;
265         tunnel->net = net;
266
267         err = register_netdevice(dev);
268         if (err)
269                 goto failed_free;
270
271         return dev;
272
273 failed_free:
274         free_netdev(dev);
275 failed:
276         return ERR_PTR(err);
277 }
278
279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281         struct net_device *tdev = NULL;
282         struct ip_tunnel *tunnel = netdev_priv(dev);
283         const struct iphdr *iph;
284         int hlen = LL_MAX_HEADER;
285         int mtu = ETH_DATA_LEN;
286         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287
288         iph = &tunnel->parms.iph;
289
290         /* Guess output device to choose reasonable mtu and needed_headroom */
291         if (iph->daddr) {
292                 struct flowi4 fl4;
293                 struct rtable *rt;
294
295                 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296                                     iph->saddr, tunnel->parms.o_key,
297                                     RT_TOS(iph->tos), dev_net(dev),
298                                     tunnel->parms.link, tunnel->fwmark, 0, 0);
299                 rt = ip_route_output_key(tunnel->net, &fl4);
300
301                 if (!IS_ERR(rt)) {
302                         tdev = rt->dst.dev;
303                         ip_rt_put(rt);
304                 }
305                 if (dev->type != ARPHRD_ETHER)
306                         dev->flags |= IFF_POINTOPOINT;
307
308                 dst_cache_reset(&tunnel->dst_cache);
309         }
310
311         if (!tdev && tunnel->parms.link)
312                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313
314         if (tdev) {
315                 hlen = tdev->hard_header_len + tdev->needed_headroom;
316                 mtu = min(tdev->mtu, IP_MAX_MTU);
317         }
318
319         dev->needed_headroom = t_hlen + hlen;
320         mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321
322         if (mtu < IPV4_MIN_MTU)
323                 mtu = IPV4_MIN_MTU;
324
325         return mtu;
326 }
327
328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329                                           struct ip_tunnel_net *itn,
330                                           struct ip_tunnel_parm *parms)
331 {
332         struct ip_tunnel *nt;
333         struct net_device *dev;
334         int t_hlen;
335         int mtu;
336         int err;
337
338         dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339         if (IS_ERR(dev))
340                 return ERR_CAST(dev);
341
342         mtu = ip_tunnel_bind_dev(dev);
343         err = dev_set_mtu(dev, mtu);
344         if (err)
345                 goto err_dev_set_mtu;
346
347         nt = netdev_priv(dev);
348         t_hlen = nt->hlen + sizeof(struct iphdr);
349         dev->min_mtu = ETH_MIN_MTU;
350         dev->max_mtu = IP_MAX_MTU - t_hlen;
351         if (dev->type == ARPHRD_ETHER)
352                 dev->max_mtu -= dev->hard_header_len;
353
354         ip_tunnel_add(itn, nt);
355         return nt;
356
357 err_dev_set_mtu:
358         unregister_netdevice(dev);
359         return ERR_PTR(err);
360 }
361
362 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
363 {
364         const struct iphdr *iph = ip_hdr(skb);
365         const struct udphdr *udph;
366
367         if (iph->protocol != IPPROTO_UDP)
368                 return;
369
370         udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
371         info->encap.sport = udph->source;
372         info->encap.dport = udph->dest;
373 }
374 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
375
376 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
377                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
378                   bool log_ecn_error)
379 {
380         const struct iphdr *iph = ip_hdr(skb);
381         int nh, err;
382
383 #ifdef CONFIG_NET_IPGRE_BROADCAST
384         if (ipv4_is_multicast(iph->daddr)) {
385                 DEV_STATS_INC(tunnel->dev, multicast);
386                 skb->pkt_type = PACKET_BROADCAST;
387         }
388 #endif
389
390         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
391              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
392                 DEV_STATS_INC(tunnel->dev, rx_crc_errors);
393                 DEV_STATS_INC(tunnel->dev, rx_errors);
394                 goto drop;
395         }
396
397         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
398                 if (!(tpi->flags&TUNNEL_SEQ) ||
399                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
400                         DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
401                         DEV_STATS_INC(tunnel->dev, rx_errors);
402                         goto drop;
403                 }
404                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
405         }
406
407         /* Save offset of outer header relative to skb->head,
408          * because we are going to reset the network header to the inner header
409          * and might change skb->head.
410          */
411         nh = skb_network_header(skb) - skb->head;
412
413         skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
414
415         if (!pskb_inet_may_pull(skb)) {
416                 DEV_STATS_INC(tunnel->dev, rx_length_errors);
417                 DEV_STATS_INC(tunnel->dev, rx_errors);
418                 goto drop;
419         }
420         iph = (struct iphdr *)(skb->head + nh);
421
422         err = IP_ECN_decapsulate(iph, skb);
423         if (unlikely(err)) {
424                 if (log_ecn_error)
425                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
426                                         &iph->saddr, iph->tos);
427                 if (err > 1) {
428                         DEV_STATS_INC(tunnel->dev, rx_frame_errors);
429                         DEV_STATS_INC(tunnel->dev, rx_errors);
430                         goto drop;
431                 }
432         }
433
434         dev_sw_netstats_rx_add(tunnel->dev, skb->len);
435         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
436
437         if (tunnel->dev->type == ARPHRD_ETHER) {
438                 skb->protocol = eth_type_trans(skb, tunnel->dev);
439                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
440         } else {
441                 skb->dev = tunnel->dev;
442         }
443
444         if (tun_dst)
445                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
446
447         gro_cells_receive(&tunnel->gro_cells, skb);
448         return 0;
449
450 drop:
451         if (tun_dst)
452                 dst_release((struct dst_entry *)tun_dst);
453         kfree_skb(skb);
454         return 0;
455 }
456 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
457
458 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
459                             unsigned int num)
460 {
461         if (num >= MAX_IPTUN_ENCAP_OPS)
462                 return -ERANGE;
463
464         return !cmpxchg((const struct ip_tunnel_encap_ops **)
465                         &iptun_encaps[num],
466                         NULL, ops) ? 0 : -1;
467 }
468 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
469
470 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
471                             unsigned int num)
472 {
473         int ret;
474
475         if (num >= MAX_IPTUN_ENCAP_OPS)
476                 return -ERANGE;
477
478         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
479                        &iptun_encaps[num],
480                        ops, NULL) == ops) ? 0 : -1;
481
482         synchronize_net();
483
484         return ret;
485 }
486 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
487
488 int ip_tunnel_encap_setup(struct ip_tunnel *t,
489                           struct ip_tunnel_encap *ipencap)
490 {
491         int hlen;
492
493         memset(&t->encap, 0, sizeof(t->encap));
494
495         hlen = ip_encap_hlen(ipencap);
496         if (hlen < 0)
497                 return hlen;
498
499         t->encap.type = ipencap->type;
500         t->encap.sport = ipencap->sport;
501         t->encap.dport = ipencap->dport;
502         t->encap.flags = ipencap->flags;
503
504         t->encap_hlen = hlen;
505         t->hlen = t->encap_hlen + t->tun_hlen;
506
507         return 0;
508 }
509 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
510
511 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
512                             struct rtable *rt, __be16 df,
513                             const struct iphdr *inner_iph,
514                             int tunnel_hlen, __be32 dst, bool md)
515 {
516         struct ip_tunnel *tunnel = netdev_priv(dev);
517         int pkt_size;
518         int mtu;
519
520         tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
521         pkt_size = skb->len - tunnel_hlen;
522         pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
523
524         if (df) {
525                 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
526                 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
527         } else {
528                 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
529         }
530
531         if (skb_valid_dst(skb))
532                 skb_dst_update_pmtu_no_confirm(skb, mtu);
533
534         if (skb->protocol == htons(ETH_P_IP)) {
535                 if (!skb_is_gso(skb) &&
536                     (inner_iph->frag_off & htons(IP_DF)) &&
537                     mtu < pkt_size) {
538                         icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
539                         return -E2BIG;
540                 }
541         }
542 #if IS_ENABLED(CONFIG_IPV6)
543         else if (skb->protocol == htons(ETH_P_IPV6)) {
544                 struct rt6_info *rt6;
545                 __be32 daddr;
546
547                 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
548                                            NULL;
549                 daddr = md ? dst : tunnel->parms.iph.daddr;
550
551                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
552                            mtu >= IPV6_MIN_MTU) {
553                         if ((daddr && !ipv4_is_multicast(daddr)) ||
554                             rt6->rt6i_dst.plen == 128) {
555                                 rt6->rt6i_flags |= RTF_MODIFIED;
556                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
557                         }
558                 }
559
560                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
561                                         mtu < pkt_size) {
562                         icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
563                         return -E2BIG;
564                 }
565         }
566 #endif
567         return 0;
568 }
569
570 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
571 {
572         /* we must cap headroom to some upperlimit, else pskb_expand_head
573          * will overflow header offsets in skb_headers_offset_update().
574          */
575         static const unsigned int max_allowed = 512;
576
577         if (headroom > max_allowed)
578                 headroom = max_allowed;
579
580         if (headroom > READ_ONCE(dev->needed_headroom))
581                 WRITE_ONCE(dev->needed_headroom, headroom);
582 }
583
584 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
585                        u8 proto, int tunnel_hlen)
586 {
587         struct ip_tunnel *tunnel = netdev_priv(dev);
588         u32 headroom = sizeof(struct iphdr);
589         struct ip_tunnel_info *tun_info;
590         const struct ip_tunnel_key *key;
591         const struct iphdr *inner_iph;
592         struct rtable *rt = NULL;
593         struct flowi4 fl4;
594         __be16 df = 0;
595         u8 tos, ttl;
596         bool use_cache;
597
598         tun_info = skb_tunnel_info(skb);
599         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
600                      ip_tunnel_info_af(tun_info) != AF_INET))
601                 goto tx_error;
602         key = &tun_info->key;
603         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
604         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
605         tos = key->tos;
606         if (tos == 1) {
607                 if (skb->protocol == htons(ETH_P_IP))
608                         tos = inner_iph->tos;
609                 else if (skb->protocol == htons(ETH_P_IPV6))
610                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
611         }
612         ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
613                             tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
614                             dev_net(dev), 0, skb->mark, skb_get_hash(skb),
615                             key->flow_flags);
616
617         if (!tunnel_hlen)
618                 tunnel_hlen = ip_encap_hlen(&tun_info->encap);
619
620         if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
621                 goto tx_error;
622
623         use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
624         if (use_cache)
625                 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
626         if (!rt) {
627                 rt = ip_route_output_key(tunnel->net, &fl4);
628                 if (IS_ERR(rt)) {
629                         DEV_STATS_INC(dev, tx_carrier_errors);
630                         goto tx_error;
631                 }
632                 if (use_cache)
633                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
634                                           fl4.saddr);
635         }
636         if (rt->dst.dev == dev) {
637                 ip_rt_put(rt);
638                 DEV_STATS_INC(dev, collisions);
639                 goto tx_error;
640         }
641
642         if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
643                 df = htons(IP_DF);
644         if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
645                             key->u.ipv4.dst, true)) {
646                 ip_rt_put(rt);
647                 goto tx_error;
648         }
649
650         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
651         ttl = key->ttl;
652         if (ttl == 0) {
653                 if (skb->protocol == htons(ETH_P_IP))
654                         ttl = inner_iph->ttl;
655                 else if (skb->protocol == htons(ETH_P_IPV6))
656                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
657                 else
658                         ttl = ip4_dst_hoplimit(&rt->dst);
659         }
660
661         headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
662         if (skb_cow_head(skb, headroom)) {
663                 ip_rt_put(rt);
664                 goto tx_dropped;
665         }
666
667         ip_tunnel_adj_headroom(dev, headroom);
668
669         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
670                       df, !net_eq(tunnel->net, dev_net(dev)));
671         return;
672 tx_error:
673         DEV_STATS_INC(dev, tx_errors);
674         goto kfree;
675 tx_dropped:
676         DEV_STATS_INC(dev, tx_dropped);
677 kfree:
678         kfree_skb(skb);
679 }
680 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
681
682 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
683                     const struct iphdr *tnl_params, u8 protocol)
684 {
685         struct ip_tunnel *tunnel = netdev_priv(dev);
686         struct ip_tunnel_info *tun_info = NULL;
687         const struct iphdr *inner_iph;
688         unsigned int max_headroom;      /* The extra header space needed */
689         struct rtable *rt = NULL;               /* Route to the other host */
690         __be16 payload_protocol;
691         bool use_cache = false;
692         struct flowi4 fl4;
693         bool md = false;
694         bool connected;
695         u8 tos, ttl;
696         __be32 dst;
697         __be16 df;
698
699         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
700         connected = (tunnel->parms.iph.daddr != 0);
701         payload_protocol = skb_protocol(skb, true);
702
703         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
704
705         dst = tnl_params->daddr;
706         if (dst == 0) {
707                 /* NBMA tunnel */
708
709                 if (!skb_dst(skb)) {
710                         DEV_STATS_INC(dev, tx_fifo_errors);
711                         goto tx_error;
712                 }
713
714                 tun_info = skb_tunnel_info(skb);
715                 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
716                     ip_tunnel_info_af(tun_info) == AF_INET &&
717                     tun_info->key.u.ipv4.dst) {
718                         dst = tun_info->key.u.ipv4.dst;
719                         md = true;
720                         connected = true;
721                 } else if (payload_protocol == htons(ETH_P_IP)) {
722                         rt = skb_rtable(skb);
723                         dst = rt_nexthop(rt, inner_iph->daddr);
724                 }
725 #if IS_ENABLED(CONFIG_IPV6)
726                 else if (payload_protocol == htons(ETH_P_IPV6)) {
727                         const struct in6_addr *addr6;
728                         struct neighbour *neigh;
729                         bool do_tx_error_icmp;
730                         int addr_type;
731
732                         neigh = dst_neigh_lookup(skb_dst(skb),
733                                                  &ipv6_hdr(skb)->daddr);
734                         if (!neigh)
735                                 goto tx_error;
736
737                         addr6 = (const struct in6_addr *)&neigh->primary_key;
738                         addr_type = ipv6_addr_type(addr6);
739
740                         if (addr_type == IPV6_ADDR_ANY) {
741                                 addr6 = &ipv6_hdr(skb)->daddr;
742                                 addr_type = ipv6_addr_type(addr6);
743                         }
744
745                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
746                                 do_tx_error_icmp = true;
747                         else {
748                                 do_tx_error_icmp = false;
749                                 dst = addr6->s6_addr32[3];
750                         }
751                         neigh_release(neigh);
752                         if (do_tx_error_icmp)
753                                 goto tx_error_icmp;
754                 }
755 #endif
756                 else
757                         goto tx_error;
758
759                 if (!md)
760                         connected = false;
761         }
762
763         tos = tnl_params->tos;
764         if (tos & 0x1) {
765                 tos &= ~0x1;
766                 if (payload_protocol == htons(ETH_P_IP)) {
767                         tos = inner_iph->tos;
768                         connected = false;
769                 } else if (payload_protocol == htons(ETH_P_IPV6)) {
770                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
771                         connected = false;
772                 }
773         }
774
775         ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
776                             tunnel->parms.o_key, RT_TOS(tos),
777                             dev_net(dev), tunnel->parms.link,
778                             tunnel->fwmark, skb_get_hash(skb), 0);
779
780         if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
781                 goto tx_error;
782
783         if (connected && md) {
784                 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
785                 if (use_cache)
786                         rt = dst_cache_get_ip4(&tun_info->dst_cache,
787                                                &fl4.saddr);
788         } else {
789                 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
790                                                 &fl4.saddr) : NULL;
791         }
792
793         if (!rt) {
794                 rt = ip_route_output_key(tunnel->net, &fl4);
795
796                 if (IS_ERR(rt)) {
797                         DEV_STATS_INC(dev, tx_carrier_errors);
798                         goto tx_error;
799                 }
800                 if (use_cache)
801                         dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
802                                           fl4.saddr);
803                 else if (!md && connected)
804                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
805                                           fl4.saddr);
806         }
807
808         if (rt->dst.dev == dev) {
809                 ip_rt_put(rt);
810                 DEV_STATS_INC(dev, collisions);
811                 goto tx_error;
812         }
813
814         df = tnl_params->frag_off;
815         if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
816                 df |= (inner_iph->frag_off & htons(IP_DF));
817
818         if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
819                 ip_rt_put(rt);
820                 goto tx_error;
821         }
822
823         if (tunnel->err_count > 0) {
824                 if (time_before(jiffies,
825                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
826                         tunnel->err_count--;
827
828                         dst_link_failure(skb);
829                 } else
830                         tunnel->err_count = 0;
831         }
832
833         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
834         ttl = tnl_params->ttl;
835         if (ttl == 0) {
836                 if (payload_protocol == htons(ETH_P_IP))
837                         ttl = inner_iph->ttl;
838 #if IS_ENABLED(CONFIG_IPV6)
839                 else if (payload_protocol == htons(ETH_P_IPV6))
840                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
841 #endif
842                 else
843                         ttl = ip4_dst_hoplimit(&rt->dst);
844         }
845
846         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
847                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
848
849         if (skb_cow_head(skb, max_headroom)) {
850                 ip_rt_put(rt);
851                 DEV_STATS_INC(dev, tx_dropped);
852                 kfree_skb(skb);
853                 return;
854         }
855
856         ip_tunnel_adj_headroom(dev, max_headroom);
857
858         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
859                       df, !net_eq(tunnel->net, dev_net(dev)));
860         return;
861
862 #if IS_ENABLED(CONFIG_IPV6)
863 tx_error_icmp:
864         dst_link_failure(skb);
865 #endif
866 tx_error:
867         DEV_STATS_INC(dev, tx_errors);
868         kfree_skb(skb);
869 }
870 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
871
872 static void ip_tunnel_update(struct ip_tunnel_net *itn,
873                              struct ip_tunnel *t,
874                              struct net_device *dev,
875                              struct ip_tunnel_parm *p,
876                              bool set_mtu,
877                              __u32 fwmark)
878 {
879         ip_tunnel_del(itn, t);
880         t->parms.iph.saddr = p->iph.saddr;
881         t->parms.iph.daddr = p->iph.daddr;
882         t->parms.i_key = p->i_key;
883         t->parms.o_key = p->o_key;
884         if (dev->type != ARPHRD_ETHER) {
885                 __dev_addr_set(dev, &p->iph.saddr, 4);
886                 memcpy(dev->broadcast, &p->iph.daddr, 4);
887         }
888         ip_tunnel_add(itn, t);
889
890         t->parms.iph.ttl = p->iph.ttl;
891         t->parms.iph.tos = p->iph.tos;
892         t->parms.iph.frag_off = p->iph.frag_off;
893
894         if (t->parms.link != p->link || t->fwmark != fwmark) {
895                 int mtu;
896
897                 t->parms.link = p->link;
898                 t->fwmark = fwmark;
899                 mtu = ip_tunnel_bind_dev(dev);
900                 if (set_mtu)
901                         dev->mtu = mtu;
902         }
903         dst_cache_reset(&t->dst_cache);
904         netdev_state_change(dev);
905 }
906
907 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
908 {
909         int err = 0;
910         struct ip_tunnel *t = netdev_priv(dev);
911         struct net *net = t->net;
912         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
913
914         switch (cmd) {
915         case SIOCGETTUNNEL:
916                 if (dev == itn->fb_tunnel_dev) {
917                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918                         if (!t)
919                                 t = netdev_priv(dev);
920                 }
921                 memcpy(p, &t->parms, sizeof(*p));
922                 break;
923
924         case SIOCADDTUNNEL:
925         case SIOCCHGTUNNEL:
926                 err = -EPERM;
927                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
928                         goto done;
929                 if (p->iph.ttl)
930                         p->iph.frag_off |= htons(IP_DF);
931                 if (!(p->i_flags & VTI_ISVTI)) {
932                         if (!(p->i_flags & TUNNEL_KEY))
933                                 p->i_key = 0;
934                         if (!(p->o_flags & TUNNEL_KEY))
935                                 p->o_key = 0;
936                 }
937
938                 t = ip_tunnel_find(itn, p, itn->type);
939
940                 if (cmd == SIOCADDTUNNEL) {
941                         if (!t) {
942                                 t = ip_tunnel_create(net, itn, p);
943                                 err = PTR_ERR_OR_ZERO(t);
944                                 break;
945                         }
946
947                         err = -EEXIST;
948                         break;
949                 }
950                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
951                         if (t) {
952                                 if (t->dev != dev) {
953                                         err = -EEXIST;
954                                         break;
955                                 }
956                         } else {
957                                 unsigned int nflags = 0;
958
959                                 if (ipv4_is_multicast(p->iph.daddr))
960                                         nflags = IFF_BROADCAST;
961                                 else if (p->iph.daddr)
962                                         nflags = IFF_POINTOPOINT;
963
964                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
965                                         err = -EINVAL;
966                                         break;
967                                 }
968
969                                 t = netdev_priv(dev);
970                         }
971                 }
972
973                 if (t) {
974                         err = 0;
975                         ip_tunnel_update(itn, t, dev, p, true, 0);
976                 } else {
977                         err = -ENOENT;
978                 }
979                 break;
980
981         case SIOCDELTUNNEL:
982                 err = -EPERM;
983                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
984                         goto done;
985
986                 if (dev == itn->fb_tunnel_dev) {
987                         err = -ENOENT;
988                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
989                         if (!t)
990                                 goto done;
991                         err = -EPERM;
992                         if (t == netdev_priv(itn->fb_tunnel_dev))
993                                 goto done;
994                         dev = t->dev;
995                 }
996                 unregister_netdevice(dev);
997                 err = 0;
998                 break;
999
1000         default:
1001                 err = -EINVAL;
1002         }
1003
1004 done:
1005         return err;
1006 }
1007 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
1008
1009 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1010                              void __user *data, int cmd)
1011 {
1012         struct ip_tunnel_parm p;
1013         int err;
1014
1015         if (copy_from_user(&p, data, sizeof(p)))
1016                 return -EFAULT;
1017         err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1018         if (!err && copy_to_user(data, &p, sizeof(p)))
1019                 return -EFAULT;
1020         return err;
1021 }
1022 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1023
1024 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1025 {
1026         struct ip_tunnel *tunnel = netdev_priv(dev);
1027         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1028         int max_mtu = IP_MAX_MTU - t_hlen;
1029
1030         if (dev->type == ARPHRD_ETHER)
1031                 max_mtu -= dev->hard_header_len;
1032
1033         if (new_mtu < ETH_MIN_MTU)
1034                 return -EINVAL;
1035
1036         if (new_mtu > max_mtu) {
1037                 if (strict)
1038                         return -EINVAL;
1039
1040                 new_mtu = max_mtu;
1041         }
1042
1043         dev->mtu = new_mtu;
1044         return 0;
1045 }
1046 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1047
1048 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1049 {
1050         return __ip_tunnel_change_mtu(dev, new_mtu, true);
1051 }
1052 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1053
1054 static void ip_tunnel_dev_free(struct net_device *dev)
1055 {
1056         struct ip_tunnel *tunnel = netdev_priv(dev);
1057
1058         gro_cells_destroy(&tunnel->gro_cells);
1059         dst_cache_destroy(&tunnel->dst_cache);
1060         free_percpu(dev->tstats);
1061 }
1062
1063 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1064 {
1065         struct ip_tunnel *tunnel = netdev_priv(dev);
1066         struct ip_tunnel_net *itn;
1067
1068         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1069
1070         if (itn->fb_tunnel_dev != dev) {
1071                 ip_tunnel_del(itn, netdev_priv(dev));
1072                 unregister_netdevice_queue(dev, head);
1073         }
1074 }
1075 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1076
1077 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1078 {
1079         struct ip_tunnel *tunnel = netdev_priv(dev);
1080
1081         return tunnel->net;
1082 }
1083 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1084
1085 int ip_tunnel_get_iflink(const struct net_device *dev)
1086 {
1087         struct ip_tunnel *tunnel = netdev_priv(dev);
1088
1089         return tunnel->parms.link;
1090 }
1091 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1092
1093 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1094                                   struct rtnl_link_ops *ops, char *devname)
1095 {
1096         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1097         struct ip_tunnel_parm parms;
1098         unsigned int i;
1099
1100         itn->rtnl_link_ops = ops;
1101         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1102                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1103
1104         if (!ops || !net_has_fallback_tunnels(net)) {
1105                 struct ip_tunnel_net *it_init_net;
1106
1107                 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1108                 itn->type = it_init_net->type;
1109                 itn->fb_tunnel_dev = NULL;
1110                 return 0;
1111         }
1112
1113         memset(&parms, 0, sizeof(parms));
1114         if (devname)
1115                 strscpy(parms.name, devname, IFNAMSIZ);
1116
1117         rtnl_lock();
1118         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1119         /* FB netdevice is special: we have one, and only one per netns.
1120          * Allowing to move it to another netns is clearly unsafe.
1121          */
1122         if (!IS_ERR(itn->fb_tunnel_dev)) {
1123                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1124                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1125                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1126                 itn->type = itn->fb_tunnel_dev->type;
1127         }
1128         rtnl_unlock();
1129
1130         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1131 }
1132 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1133
1134 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1135                               struct list_head *head,
1136                               struct rtnl_link_ops *ops)
1137 {
1138         struct net_device *dev, *aux;
1139         int h;
1140
1141         for_each_netdev_safe(net, dev, aux)
1142                 if (dev->rtnl_link_ops == ops)
1143                         unregister_netdevice_queue(dev, head);
1144
1145         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1146                 struct ip_tunnel *t;
1147                 struct hlist_node *n;
1148                 struct hlist_head *thead = &itn->tunnels[h];
1149
1150                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1151                         /* If dev is in the same netns, it has already
1152                          * been added to the list by the previous loop.
1153                          */
1154                         if (!net_eq(dev_net(t->dev), net))
1155                                 unregister_netdevice_queue(t->dev, head);
1156         }
1157 }
1158
1159 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1160                            struct rtnl_link_ops *ops)
1161 {
1162         struct ip_tunnel_net *itn;
1163         struct net *net;
1164         LIST_HEAD(list);
1165
1166         rtnl_lock();
1167         list_for_each_entry(net, net_list, exit_list) {
1168                 itn = net_generic(net, id);
1169                 ip_tunnel_destroy(net, itn, &list, ops);
1170         }
1171         unregister_netdevice_many(&list);
1172         rtnl_unlock();
1173 }
1174 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1175
1176 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1177                       struct ip_tunnel_parm *p, __u32 fwmark)
1178 {
1179         struct ip_tunnel *nt;
1180         struct net *net = dev_net(dev);
1181         struct ip_tunnel_net *itn;
1182         int mtu;
1183         int err;
1184
1185         nt = netdev_priv(dev);
1186         itn = net_generic(net, nt->ip_tnl_net_id);
1187
1188         if (nt->collect_md) {
1189                 if (rtnl_dereference(itn->collect_md_tun))
1190                         return -EEXIST;
1191         } else {
1192                 if (ip_tunnel_find(itn, p, dev->type))
1193                         return -EEXIST;
1194         }
1195
1196         nt->net = net;
1197         nt->parms = *p;
1198         nt->fwmark = fwmark;
1199         err = register_netdevice(dev);
1200         if (err)
1201                 goto err_register_netdevice;
1202
1203         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1204                 eth_hw_addr_random(dev);
1205
1206         mtu = ip_tunnel_bind_dev(dev);
1207         if (tb[IFLA_MTU]) {
1208                 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1209
1210                 if (dev->type == ARPHRD_ETHER)
1211                         max -= dev->hard_header_len;
1212
1213                 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1214         }
1215
1216         err = dev_set_mtu(dev, mtu);
1217         if (err)
1218                 goto err_dev_set_mtu;
1219
1220         ip_tunnel_add(itn, nt);
1221         return 0;
1222
1223 err_dev_set_mtu:
1224         unregister_netdevice(dev);
1225 err_register_netdevice:
1226         return err;
1227 }
1228 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1229
1230 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1231                          struct ip_tunnel_parm *p, __u32 fwmark)
1232 {
1233         struct ip_tunnel *t;
1234         struct ip_tunnel *tunnel = netdev_priv(dev);
1235         struct net *net = tunnel->net;
1236         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1237
1238         if (dev == itn->fb_tunnel_dev)
1239                 return -EINVAL;
1240
1241         t = ip_tunnel_find(itn, p, dev->type);
1242
1243         if (t) {
1244                 if (t->dev != dev)
1245                         return -EEXIST;
1246         } else {
1247                 t = tunnel;
1248
1249                 if (dev->type != ARPHRD_ETHER) {
1250                         unsigned int nflags = 0;
1251
1252                         if (ipv4_is_multicast(p->iph.daddr))
1253                                 nflags = IFF_BROADCAST;
1254                         else if (p->iph.daddr)
1255                                 nflags = IFF_POINTOPOINT;
1256
1257                         if ((dev->flags ^ nflags) &
1258                             (IFF_POINTOPOINT | IFF_BROADCAST))
1259                                 return -EINVAL;
1260                 }
1261         }
1262
1263         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1264         return 0;
1265 }
1266 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1267
1268 int ip_tunnel_init(struct net_device *dev)
1269 {
1270         struct ip_tunnel *tunnel = netdev_priv(dev);
1271         struct iphdr *iph = &tunnel->parms.iph;
1272         int err;
1273
1274         dev->needs_free_netdev = true;
1275         dev->priv_destructor = ip_tunnel_dev_free;
1276         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1277         if (!dev->tstats)
1278                 return -ENOMEM;
1279
1280         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1281         if (err) {
1282                 free_percpu(dev->tstats);
1283                 return err;
1284         }
1285
1286         err = gro_cells_init(&tunnel->gro_cells, dev);
1287         if (err) {
1288                 dst_cache_destroy(&tunnel->dst_cache);
1289                 free_percpu(dev->tstats);
1290                 return err;
1291         }
1292
1293         tunnel->dev = dev;
1294         tunnel->net = dev_net(dev);
1295         strcpy(tunnel->parms.name, dev->name);
1296         iph->version            = 4;
1297         iph->ihl                = 5;
1298
1299         if (tunnel->collect_md)
1300                 netif_keep_dst(dev);
1301         netdev_lockdep_set_classes(dev);
1302         return 0;
1303 }
1304 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1305
1306 void ip_tunnel_uninit(struct net_device *dev)
1307 {
1308         struct ip_tunnel *tunnel = netdev_priv(dev);
1309         struct net *net = tunnel->net;
1310         struct ip_tunnel_net *itn;
1311
1312         itn = net_generic(net, tunnel->ip_tnl_net_id);
1313         ip_tunnel_del(itn, netdev_priv(dev));
1314         if (itn->fb_tunnel_dev == dev)
1315                 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1316
1317         dst_cache_reset(&tunnel->dst_cache);
1318 }
1319 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1320
1321 /* Do least required initialization, rest of init is done in tunnel_init call */
1322 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1323 {
1324         struct ip_tunnel *tunnel = netdev_priv(dev);
1325         tunnel->ip_tnl_net_id = net_id;
1326 }
1327 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1328
1329 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1330 MODULE_LICENSE("GPL");