Mention branches and keyring.
[releases.git] / ipv4 / ip_gre.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      Linux NET3:     GRE over IP protocol decoder.
4  *
5  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47
48 /*
49    Problems & solutions
50    --------------------
51
52    1. The most important issue is detecting local dead loops.
53    They would cause complete host lockup in transmit, which
54    would be "resolved" by stack overflow or, if queueing is enabled,
55    with infinite looping in net_bh.
56
57    We cannot track such dead loops during route installation,
58    it is infeasible task. The most general solutions would be
59    to keep skb->encapsulation counter (sort of local ttl),
60    and silently drop packet when it expires. It is a good
61    solution, but it supposes maintaining new variable in ALL
62    skb, even if no tunneling is used.
63
64    Current solution: xmit_recursion breaks dead loops. This is a percpu
65    counter, since when we enter the first ndo_xmit(), cpu migration is
66    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
67
68    2. Networking dead loops would not kill routers, but would really
69    kill network. IP hop limit plays role of "t->recursion" in this case,
70    if we copy it from packet being encapsulated to upper header.
71    It is very good solution, but it introduces two problems:
72
73    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74      do not work over tunnels.
75    - traceroute does not work. I planned to relay ICMP from tunnel,
76      so that this problem would be solved and traceroute output
77      would even more informative. This idea appeared to be wrong:
78      only Linux complies to rfc1812 now (yes, guys, Linux is the only
79      true router now :-)), all routers (at least, in neighbourhood of mine)
80      return only 8 bytes of payload. It is the end.
81
82    Hence, if we want that OSPF worked or traceroute said something reasonable,
83    we should search for another solution.
84
85    One of them is to parse packet trying to detect inner encapsulation
86    made by our node. It is difficult or even impossible, especially,
87    taking into account fragmentation. TO be short, ttl is not solution at all.
88
89    Current solution: The solution was UNEXPECTEDLY SIMPLE.
90    We force DF flag on tunnels with preconfigured hop limit,
91    that is ALL. :-) Well, it does not remove the problem completely,
92    but exponential growth of network traffic is changed to linear
93    (branches, that exceed pmtu are pruned) and tunnel mtu
94    rapidly degrades to value <68, where looping stops.
95    Yes, it is not good if there exists a router in the loop,
96    which does not force DF, even when encapsulating packets have DF set.
97    But it is not our problem! Nobody could accuse us, we made
98    all that we could make. Even if it is your gated who injected
99    fatal route to network, even if it were you who configured
100    fatal static route: you are innocent. :-)
101
102    Alexey Kuznetsov.
103  */
104
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
108
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static const struct header_ops ipgre_header_ops;
111
112 static int ipgre_tunnel_init(struct net_device *dev);
113 static void erspan_build_header(struct sk_buff *skb,
114                                 u32 id, u32 index,
115                                 bool truncate, bool is_ipv4);
116
117 static unsigned int ipgre_net_id __read_mostly;
118 static unsigned int gre_tap_net_id __read_mostly;
119 static unsigned int erspan_net_id __read_mostly;
120
121 static int ipgre_err(struct sk_buff *skb, u32 info,
122                      const struct tnl_ptk_info *tpi)
123 {
124
125         /* All the routers (except for Linux) return only
126            8 bytes of packet payload. It means, that precise relaying of
127            ICMP in the real Internet is absolutely infeasible.
128
129            Moreover, Cisco "wise men" put GRE key to the third word
130            in GRE header. It makes impossible maintaining even soft
131            state for keyed GRE tunnels with enabled checksum. Tell
132            them "thank you".
133
134            Well, I wonder, rfc1812 was written by Cisco employee,
135            what the hell these idiots break standards established
136            by themselves???
137            */
138         struct net *net = dev_net(skb->dev);
139         struct ip_tunnel_net *itn;
140         const struct iphdr *iph;
141         const int type = icmp_hdr(skb)->type;
142         const int code = icmp_hdr(skb)->code;
143         unsigned int data_len = 0;
144         struct ip_tunnel *t;
145
146         if (tpi->proto == htons(ETH_P_TEB))
147                 itn = net_generic(net, gre_tap_net_id);
148         else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149                  tpi->proto == htons(ETH_P_ERSPAN2))
150                 itn = net_generic(net, erspan_net_id);
151         else
152                 itn = net_generic(net, ipgre_net_id);
153
154         iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155         t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156                              iph->daddr, iph->saddr, tpi->key);
157
158         if (!t)
159                 return -ENOENT;
160
161         switch (type) {
162         default:
163         case ICMP_PARAMETERPROB:
164                 return 0;
165
166         case ICMP_DEST_UNREACH:
167                 switch (code) {
168                 case ICMP_SR_FAILED:
169                 case ICMP_PORT_UNREACH:
170                         /* Impossible event. */
171                         return 0;
172                 default:
173                         /* All others are translated to HOST_UNREACH.
174                            rfc2003 contains "deep thoughts" about NET_UNREACH,
175                            I believe they are just ether pollution. --ANK
176                          */
177                         break;
178                 }
179                 break;
180
181         case ICMP_TIME_EXCEEDED:
182                 if (code != ICMP_EXC_TTL)
183                         return 0;
184                 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
185                 break;
186
187         case ICMP_REDIRECT:
188                 break;
189         }
190
191 #if IS_ENABLED(CONFIG_IPV6)
192         if (tpi->proto == htons(ETH_P_IPV6) &&
193             !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
194                                         type, data_len))
195                 return 0;
196 #endif
197
198         if (t->parms.iph.daddr == 0 ||
199             ipv4_is_multicast(t->parms.iph.daddr))
200                 return 0;
201
202         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
203                 return 0;
204
205         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
206                 t->err_count++;
207         else
208                 t->err_count = 1;
209         t->err_time = jiffies;
210
211         return 0;
212 }
213
214 static void gre_err(struct sk_buff *skb, u32 info)
215 {
216         /* All the routers (except for Linux) return only
217          * 8 bytes of packet payload. It means, that precise relaying of
218          * ICMP in the real Internet is absolutely infeasible.
219          *
220          * Moreover, Cisco "wise men" put GRE key to the third word
221          * in GRE header. It makes impossible maintaining even soft
222          * state for keyed
223          * GRE tunnels with enabled checksum. Tell them "thank you".
224          *
225          * Well, I wonder, rfc1812 was written by Cisco employee,
226          * what the hell these idiots break standards established
227          * by themselves???
228          */
229
230         const struct iphdr *iph = (struct iphdr *)skb->data;
231         const int type = icmp_hdr(skb)->type;
232         const int code = icmp_hdr(skb)->code;
233         struct tnl_ptk_info tpi;
234
235         if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
236                              iph->ihl * 4) < 0)
237                 return;
238
239         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241                                  skb->dev->ifindex, IPPROTO_GRE);
242                 return;
243         }
244         if (type == ICMP_REDIRECT) {
245                 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
246                               IPPROTO_GRE);
247                 return;
248         }
249
250         ipgre_err(skb, info, &tpi);
251 }
252
253 static bool is_erspan_type1(int gre_hdr_len)
254 {
255         /* Both ERSPAN type I (version 0) and type II (version 1) use
256          * protocol 0x88BE, but the type I has only 4-byte GRE header,
257          * while type II has 8-byte.
258          */
259         return gre_hdr_len == 4;
260 }
261
262 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
263                       int gre_hdr_len)
264 {
265         struct net *net = dev_net(skb->dev);
266         struct metadata_dst *tun_dst = NULL;
267         struct erspan_base_hdr *ershdr;
268         struct ip_tunnel_net *itn;
269         struct ip_tunnel *tunnel;
270         const struct iphdr *iph;
271         struct erspan_md2 *md2;
272         int ver;
273         int len;
274
275         itn = net_generic(net, erspan_net_id);
276         iph = ip_hdr(skb);
277         if (is_erspan_type1(gre_hdr_len)) {
278                 ver = 0;
279                 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280                                           tpi->flags | TUNNEL_NO_KEY,
281                                           iph->saddr, iph->daddr, 0);
282         } else {
283                 if (unlikely(!pskb_may_pull(skb,
284                                             gre_hdr_len + sizeof(*ershdr))))
285                         return PACKET_REJECT;
286
287                 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
288                 ver = ershdr->ver;
289                 iph = ip_hdr(skb);
290                 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
291                                           tpi->flags | TUNNEL_KEY,
292                                           iph->saddr, iph->daddr, tpi->key);
293         }
294
295         if (tunnel) {
296                 if (is_erspan_type1(gre_hdr_len))
297                         len = gre_hdr_len;
298                 else
299                         len = gre_hdr_len + erspan_hdr_len(ver);
300
301                 if (unlikely(!pskb_may_pull(skb, len)))
302                         return PACKET_REJECT;
303
304                 if (__iptunnel_pull_header(skb,
305                                            len,
306                                            htons(ETH_P_TEB),
307                                            false, false) < 0)
308                         goto drop;
309
310                 if (tunnel->collect_md) {
311                         struct erspan_metadata *pkt_md, *md;
312                         struct ip_tunnel_info *info;
313                         unsigned char *gh;
314                         __be64 tun_id;
315                         __be16 flags;
316
317                         tpi->flags |= TUNNEL_KEY;
318                         flags = tpi->flags;
319                         tun_id = key32_to_tunnel_id(tpi->key);
320
321                         tun_dst = ip_tun_rx_dst(skb, flags,
322                                                 tun_id, sizeof(*md));
323                         if (!tun_dst)
324                                 return PACKET_REJECT;
325
326                         /* skb can be uncloned in __iptunnel_pull_header, so
327                          * old pkt_md is no longer valid and we need to reset
328                          * it
329                          */
330                         gh = skb_network_header(skb) +
331                              skb_network_header_len(skb);
332                         pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
333                                                             sizeof(*ershdr));
334                         md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
335                         md->version = ver;
336                         md2 = &md->u.md2;
337                         memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
338                                                        ERSPAN_V2_MDSIZE);
339
340                         info = &tun_dst->u.tun_info;
341                         info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
342                         info->options_len = sizeof(*md);
343                 }
344
345                 skb_reset_mac_header(skb);
346                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
347                 return PACKET_RCVD;
348         }
349         return PACKET_REJECT;
350
351 drop:
352         kfree_skb(skb);
353         return PACKET_RCVD;
354 }
355
356 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
357                        struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
358 {
359         struct metadata_dst *tun_dst = NULL;
360         const struct iphdr *iph;
361         struct ip_tunnel *tunnel;
362
363         iph = ip_hdr(skb);
364         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
365                                   iph->saddr, iph->daddr, tpi->key);
366
367         if (tunnel) {
368                 const struct iphdr *tnl_params;
369
370                 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
371                                            raw_proto, false) < 0)
372                         goto drop;
373
374                 /* Special case for ipgre_header_parse(), which expects the
375                  * mac_header to point to the outer IP header.
376                  */
377                 if (tunnel->dev->header_ops == &ipgre_header_ops)
378                         skb_pop_mac_header(skb);
379                 else
380                         skb_reset_mac_header(skb);
381
382                 tnl_params = &tunnel->parms.iph;
383                 if (tunnel->collect_md || tnl_params->daddr == 0) {
384                         __be16 flags;
385                         __be64 tun_id;
386
387                         flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
388                         tun_id = key32_to_tunnel_id(tpi->key);
389                         tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
390                         if (!tun_dst)
391                                 return PACKET_REJECT;
392                 }
393
394                 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
395                 return PACKET_RCVD;
396         }
397         return PACKET_NEXT;
398
399 drop:
400         kfree_skb(skb);
401         return PACKET_RCVD;
402 }
403
404 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
405                      int hdr_len)
406 {
407         struct net *net = dev_net(skb->dev);
408         struct ip_tunnel_net *itn;
409         int res;
410
411         if (tpi->proto == htons(ETH_P_TEB))
412                 itn = net_generic(net, gre_tap_net_id);
413         else
414                 itn = net_generic(net, ipgre_net_id);
415
416         res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
417         if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
418                 /* ipgre tunnels in collect metadata mode should receive
419                  * also ETH_P_TEB traffic.
420                  */
421                 itn = net_generic(net, ipgre_net_id);
422                 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
423         }
424         return res;
425 }
426
427 static int gre_rcv(struct sk_buff *skb)
428 {
429         struct tnl_ptk_info tpi;
430         bool csum_err = false;
431         int hdr_len;
432
433 #ifdef CONFIG_NET_IPGRE_BROADCAST
434         if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
435                 /* Looped back packet, drop it! */
436                 if (rt_is_output_route(skb_rtable(skb)))
437                         goto drop;
438         }
439 #endif
440
441         hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
442         if (hdr_len < 0)
443                 goto drop;
444
445         if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
446                      tpi.proto == htons(ETH_P_ERSPAN2))) {
447                 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
448                         return 0;
449                 goto out;
450         }
451
452         if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
453                 return 0;
454
455 out:
456         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
457 drop:
458         kfree_skb(skb);
459         return 0;
460 }
461
462 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
463                        const struct iphdr *tnl_params,
464                        __be16 proto)
465 {
466         struct ip_tunnel *tunnel = netdev_priv(dev);
467         __be16 flags = tunnel->parms.o_flags;
468
469         /* Push GRE header. */
470         gre_build_header(skb, tunnel->tun_hlen,
471                          flags, proto, tunnel->parms.o_key,
472                          (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
473
474         ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
475 }
476
477 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
478 {
479         return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
480 }
481
482 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
483                         __be16 proto)
484 {
485         struct ip_tunnel *tunnel = netdev_priv(dev);
486         struct ip_tunnel_info *tun_info;
487         const struct ip_tunnel_key *key;
488         int tunnel_hlen;
489         __be16 flags;
490
491         tun_info = skb_tunnel_info(skb);
492         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
493                      ip_tunnel_info_af(tun_info) != AF_INET))
494                 goto err_free_skb;
495
496         key = &tun_info->key;
497         tunnel_hlen = gre_calc_hlen(key->tun_flags);
498
499         if (skb_cow_head(skb, dev->needed_headroom))
500                 goto err_free_skb;
501
502         /* Push Tunnel header. */
503         if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
504                 goto err_free_skb;
505
506         flags = tun_info->key.tun_flags &
507                 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
508         gre_build_header(skb, tunnel_hlen, flags, proto,
509                          tunnel_id_to_key32(tun_info->key.tun_id),
510                          (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
511
512         ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
513
514         return;
515
516 err_free_skb:
517         kfree_skb(skb);
518         DEV_STATS_INC(dev, tx_dropped);
519 }
520
521 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
522 {
523         struct ip_tunnel *tunnel = netdev_priv(dev);
524         struct ip_tunnel_info *tun_info;
525         const struct ip_tunnel_key *key;
526         struct erspan_metadata *md;
527         bool truncate = false;
528         __be16 proto;
529         int tunnel_hlen;
530         int version;
531         int nhoff;
532
533         tun_info = skb_tunnel_info(skb);
534         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
535                      ip_tunnel_info_af(tun_info) != AF_INET))
536                 goto err_free_skb;
537
538         key = &tun_info->key;
539         if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
540                 goto err_free_skb;
541         if (tun_info->options_len < sizeof(*md))
542                 goto err_free_skb;
543         md = ip_tunnel_info_opts(tun_info);
544
545         /* ERSPAN has fixed 8 byte GRE header */
546         version = md->version;
547         tunnel_hlen = 8 + erspan_hdr_len(version);
548
549         if (skb_cow_head(skb, dev->needed_headroom))
550                 goto err_free_skb;
551
552         if (gre_handle_offloads(skb, false))
553                 goto err_free_skb;
554
555         if (skb->len > dev->mtu + dev->hard_header_len) {
556                 if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
557                         goto err_free_skb;
558                 truncate = true;
559         }
560
561         nhoff = skb_network_offset(skb);
562         if (skb->protocol == htons(ETH_P_IP) &&
563             (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
564                 truncate = true;
565
566         if (skb->protocol == htons(ETH_P_IPV6)) {
567                 int thoff;
568
569                 if (skb_transport_header_was_set(skb))
570                         thoff = skb_transport_offset(skb);
571                 else
572                         thoff = nhoff + sizeof(struct ipv6hdr);
573                 if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
574                         truncate = true;
575         }
576
577         if (version == 1) {
578                 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
579                                     ntohl(md->u.index), truncate, true);
580                 proto = htons(ETH_P_ERSPAN);
581         } else if (version == 2) {
582                 erspan_build_header_v2(skb,
583                                        ntohl(tunnel_id_to_key32(key->tun_id)),
584                                        md->u.md2.dir,
585                                        get_hwid(&md->u.md2),
586                                        truncate, true);
587                 proto = htons(ETH_P_ERSPAN2);
588         } else {
589                 goto err_free_skb;
590         }
591
592         gre_build_header(skb, 8, TUNNEL_SEQ,
593                          proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
594
595         ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
596
597         return;
598
599 err_free_skb:
600         kfree_skb(skb);
601         DEV_STATS_INC(dev, tx_dropped);
602 }
603
604 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
605 {
606         struct ip_tunnel_info *info = skb_tunnel_info(skb);
607         const struct ip_tunnel_key *key;
608         struct rtable *rt;
609         struct flowi4 fl4;
610
611         if (ip_tunnel_info_af(info) != AF_INET)
612                 return -EINVAL;
613
614         key = &info->key;
615         ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
616                             tunnel_id_to_key32(key->tun_id),
617                             key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
618                             skb->mark, skb_get_hash(skb), key->flow_flags);
619         rt = ip_route_output_key(dev_net(dev), &fl4);
620         if (IS_ERR(rt))
621                 return PTR_ERR(rt);
622
623         ip_rt_put(rt);
624         info->key.u.ipv4.src = fl4.saddr;
625         return 0;
626 }
627
628 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
629                               struct net_device *dev)
630 {
631         struct ip_tunnel *tunnel = netdev_priv(dev);
632         const struct iphdr *tnl_params;
633
634         if (!pskb_inet_may_pull(skb))
635                 goto free_skb;
636
637         if (tunnel->collect_md) {
638                 gre_fb_xmit(skb, dev, skb->protocol);
639                 return NETDEV_TX_OK;
640         }
641
642         if (dev->header_ops) {
643                 int pull_len = tunnel->hlen + sizeof(struct iphdr);
644
645                 if (skb_cow_head(skb, 0))
646                         goto free_skb;
647
648                 tnl_params = (const struct iphdr *)skb->data;
649
650                 if (!pskb_network_may_pull(skb, pull_len))
651                         goto free_skb;
652
653                 /* ip_tunnel_xmit() needs skb->data pointing to gre header. */
654                 skb_pull(skb, pull_len);
655                 skb_reset_mac_header(skb);
656
657                 if (skb->ip_summed == CHECKSUM_PARTIAL &&
658                     skb_checksum_start(skb) < skb->data)
659                         goto free_skb;
660         } else {
661                 if (skb_cow_head(skb, dev->needed_headroom))
662                         goto free_skb;
663
664                 tnl_params = &tunnel->parms.iph;
665         }
666
667         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
668                 goto free_skb;
669
670         __gre_xmit(skb, dev, tnl_params, skb->protocol);
671         return NETDEV_TX_OK;
672
673 free_skb:
674         kfree_skb(skb);
675         DEV_STATS_INC(dev, tx_dropped);
676         return NETDEV_TX_OK;
677 }
678
679 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
680                                struct net_device *dev)
681 {
682         struct ip_tunnel *tunnel = netdev_priv(dev);
683         bool truncate = false;
684         __be16 proto;
685
686         if (!pskb_inet_may_pull(skb))
687                 goto free_skb;
688
689         if (tunnel->collect_md) {
690                 erspan_fb_xmit(skb, dev);
691                 return NETDEV_TX_OK;
692         }
693
694         if (gre_handle_offloads(skb, false))
695                 goto free_skb;
696
697         if (skb_cow_head(skb, dev->needed_headroom))
698                 goto free_skb;
699
700         if (skb->len > dev->mtu + dev->hard_header_len) {
701                 if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
702                         goto free_skb;
703                 truncate = true;
704         }
705
706         /* Push ERSPAN header */
707         if (tunnel->erspan_ver == 0) {
708                 proto = htons(ETH_P_ERSPAN);
709                 tunnel->parms.o_flags &= ~TUNNEL_SEQ;
710         } else if (tunnel->erspan_ver == 1) {
711                 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
712                                     tunnel->index,
713                                     truncate, true);
714                 proto = htons(ETH_P_ERSPAN);
715         } else if (tunnel->erspan_ver == 2) {
716                 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
717                                        tunnel->dir, tunnel->hwid,
718                                        truncate, true);
719                 proto = htons(ETH_P_ERSPAN2);
720         } else {
721                 goto free_skb;
722         }
723
724         tunnel->parms.o_flags &= ~TUNNEL_KEY;
725         __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
726         return NETDEV_TX_OK;
727
728 free_skb:
729         kfree_skb(skb);
730         DEV_STATS_INC(dev, tx_dropped);
731         return NETDEV_TX_OK;
732 }
733
734 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
735                                 struct net_device *dev)
736 {
737         struct ip_tunnel *tunnel = netdev_priv(dev);
738
739         if (!pskb_inet_may_pull(skb))
740                 goto free_skb;
741
742         if (tunnel->collect_md) {
743                 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
744                 return NETDEV_TX_OK;
745         }
746
747         if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
748                 goto free_skb;
749
750         if (skb_cow_head(skb, dev->needed_headroom))
751                 goto free_skb;
752
753         __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
754         return NETDEV_TX_OK;
755
756 free_skb:
757         kfree_skb(skb);
758         DEV_STATS_INC(dev, tx_dropped);
759         return NETDEV_TX_OK;
760 }
761
762 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
763 {
764         struct ip_tunnel *tunnel = netdev_priv(dev);
765         __be16 flags;
766         int len;
767
768         len = tunnel->tun_hlen;
769         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
770         len = tunnel->tun_hlen - len;
771         tunnel->hlen = tunnel->hlen + len;
772
773         if (dev->header_ops)
774                 dev->hard_header_len += len;
775         else
776                 dev->needed_headroom += len;
777
778         if (set_mtu)
779                 dev->mtu = max_t(int, dev->mtu - len, 68);
780
781         flags = tunnel->parms.o_flags;
782
783         if (flags & TUNNEL_SEQ ||
784             (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
785                 dev->features &= ~NETIF_F_GSO_SOFTWARE;
786                 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
787         } else {
788                 dev->features |= NETIF_F_GSO_SOFTWARE;
789                 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
790         }
791 }
792
793 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
794                             int cmd)
795 {
796         int err;
797
798         if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
799                 if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
800                     p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
801                     ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
802                         return -EINVAL;
803         }
804
805         p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
806         p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
807
808         err = ip_tunnel_ctl(dev, p, cmd);
809         if (err)
810                 return err;
811
812         if (cmd == SIOCCHGTUNNEL) {
813                 struct ip_tunnel *t = netdev_priv(dev);
814
815                 t->parms.i_flags = p->i_flags;
816                 t->parms.o_flags = p->o_flags;
817
818                 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
819                         ipgre_link_update(dev, true);
820         }
821
822         p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
823         p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
824         return 0;
825 }
826
827 /* Nice toy. Unfortunately, useless in real life :-)
828    It allows to construct virtual multiprotocol broadcast "LAN"
829    over the Internet, provided multicast routing is tuned.
830
831
832    I have no idea was this bicycle invented before me,
833    so that I had to set ARPHRD_IPGRE to a random value.
834    I have an impression, that Cisco could make something similar,
835    but this feature is apparently missing in IOS<=11.2(8).
836
837    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
838    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
839
840    ping -t 255 224.66.66.66
841
842    If nobody answers, mbone does not work.
843
844    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
845    ip addr add 10.66.66.<somewhat>/24 dev Universe
846    ifconfig Universe up
847    ifconfig Universe add fe80::<Your_real_addr>/10
848    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
849    ftp 10.66.66.66
850    ...
851    ftp fec0:6666:6666::193.233.7.65
852    ...
853  */
854 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
855                         unsigned short type,
856                         const void *daddr, const void *saddr, unsigned int len)
857 {
858         struct ip_tunnel *t = netdev_priv(dev);
859         struct iphdr *iph;
860         struct gre_base_hdr *greh;
861
862         iph = skb_push(skb, t->hlen + sizeof(*iph));
863         greh = (struct gre_base_hdr *)(iph+1);
864         greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
865         greh->protocol = htons(type);
866
867         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
868
869         /* Set the source hardware address. */
870         if (saddr)
871                 memcpy(&iph->saddr, saddr, 4);
872         if (daddr)
873                 memcpy(&iph->daddr, daddr, 4);
874         if (iph->daddr)
875                 return t->hlen + sizeof(*iph);
876
877         return -(t->hlen + sizeof(*iph));
878 }
879
880 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
881 {
882         const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
883         memcpy(haddr, &iph->saddr, 4);
884         return 4;
885 }
886
887 static const struct header_ops ipgre_header_ops = {
888         .create = ipgre_header,
889         .parse  = ipgre_header_parse,
890 };
891
892 #ifdef CONFIG_NET_IPGRE_BROADCAST
893 static int ipgre_open(struct net_device *dev)
894 {
895         struct ip_tunnel *t = netdev_priv(dev);
896
897         if (ipv4_is_multicast(t->parms.iph.daddr)) {
898                 struct flowi4 fl4;
899                 struct rtable *rt;
900
901                 rt = ip_route_output_gre(t->net, &fl4,
902                                          t->parms.iph.daddr,
903                                          t->parms.iph.saddr,
904                                          t->parms.o_key,
905                                          RT_TOS(t->parms.iph.tos),
906                                          t->parms.link);
907                 if (IS_ERR(rt))
908                         return -EADDRNOTAVAIL;
909                 dev = rt->dst.dev;
910                 ip_rt_put(rt);
911                 if (!__in_dev_get_rtnl(dev))
912                         return -EADDRNOTAVAIL;
913                 t->mlink = dev->ifindex;
914                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
915         }
916         return 0;
917 }
918
919 static int ipgre_close(struct net_device *dev)
920 {
921         struct ip_tunnel *t = netdev_priv(dev);
922
923         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
924                 struct in_device *in_dev;
925                 in_dev = inetdev_by_index(t->net, t->mlink);
926                 if (in_dev)
927                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
928         }
929         return 0;
930 }
931 #endif
932
933 static const struct net_device_ops ipgre_netdev_ops = {
934         .ndo_init               = ipgre_tunnel_init,
935         .ndo_uninit             = ip_tunnel_uninit,
936 #ifdef CONFIG_NET_IPGRE_BROADCAST
937         .ndo_open               = ipgre_open,
938         .ndo_stop               = ipgre_close,
939 #endif
940         .ndo_start_xmit         = ipgre_xmit,
941         .ndo_siocdevprivate     = ip_tunnel_siocdevprivate,
942         .ndo_change_mtu         = ip_tunnel_change_mtu,
943         .ndo_get_stats64        = dev_get_tstats64,
944         .ndo_get_iflink         = ip_tunnel_get_iflink,
945         .ndo_tunnel_ctl         = ipgre_tunnel_ctl,
946 };
947
948 #define GRE_FEATURES (NETIF_F_SG |              \
949                       NETIF_F_FRAGLIST |        \
950                       NETIF_F_HIGHDMA |         \
951                       NETIF_F_HW_CSUM)
952
953 static void ipgre_tunnel_setup(struct net_device *dev)
954 {
955         dev->netdev_ops         = &ipgre_netdev_ops;
956         dev->type               = ARPHRD_IPGRE;
957         ip_tunnel_setup(dev, ipgre_net_id);
958 }
959
960 static void __gre_tunnel_init(struct net_device *dev)
961 {
962         struct ip_tunnel *tunnel;
963         __be16 flags;
964
965         tunnel = netdev_priv(dev);
966         tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
967         tunnel->parms.iph.protocol = IPPROTO_GRE;
968
969         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
970         dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
971
972         dev->features           |= GRE_FEATURES | NETIF_F_LLTX;
973         dev->hw_features        |= GRE_FEATURES;
974
975         flags = tunnel->parms.o_flags;
976
977         /* TCP offload with GRE SEQ is not supported, nor can we support 2
978          * levels of outer headers requiring an update.
979          */
980         if (flags & TUNNEL_SEQ)
981                 return;
982         if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
983                 return;
984
985         dev->features |= NETIF_F_GSO_SOFTWARE;
986         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
987 }
988
989 static int ipgre_tunnel_init(struct net_device *dev)
990 {
991         struct ip_tunnel *tunnel = netdev_priv(dev);
992         struct iphdr *iph = &tunnel->parms.iph;
993
994         __gre_tunnel_init(dev);
995
996         __dev_addr_set(dev, &iph->saddr, 4);
997         memcpy(dev->broadcast, &iph->daddr, 4);
998
999         dev->flags              = IFF_NOARP;
1000         netif_keep_dst(dev);
1001         dev->addr_len           = 4;
1002
1003         if (iph->daddr && !tunnel->collect_md) {
1004 #ifdef CONFIG_NET_IPGRE_BROADCAST
1005                 if (ipv4_is_multicast(iph->daddr)) {
1006                         if (!iph->saddr)
1007                                 return -EINVAL;
1008                         dev->flags = IFF_BROADCAST;
1009                         dev->header_ops = &ipgre_header_ops;
1010                         dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1011                         dev->needed_headroom = 0;
1012                 }
1013 #endif
1014         } else if (!tunnel->collect_md) {
1015                 dev->header_ops = &ipgre_header_ops;
1016                 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1017                 dev->needed_headroom = 0;
1018         }
1019
1020         return ip_tunnel_init(dev);
1021 }
1022
1023 static const struct gre_protocol ipgre_protocol = {
1024         .handler     = gre_rcv,
1025         .err_handler = gre_err,
1026 };
1027
1028 static int __net_init ipgre_init_net(struct net *net)
1029 {
1030         return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1031 }
1032
1033 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1034 {
1035         ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1036 }
1037
1038 static struct pernet_operations ipgre_net_ops = {
1039         .init = ipgre_init_net,
1040         .exit_batch = ipgre_exit_batch_net,
1041         .id   = &ipgre_net_id,
1042         .size = sizeof(struct ip_tunnel_net),
1043 };
1044
1045 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1046                                  struct netlink_ext_ack *extack)
1047 {
1048         __be16 flags;
1049
1050         if (!data)
1051                 return 0;
1052
1053         flags = 0;
1054         if (data[IFLA_GRE_IFLAGS])
1055                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1056         if (data[IFLA_GRE_OFLAGS])
1057                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1058         if (flags & (GRE_VERSION|GRE_ROUTING))
1059                 return -EINVAL;
1060
1061         if (data[IFLA_GRE_COLLECT_METADATA] &&
1062             data[IFLA_GRE_ENCAP_TYPE] &&
1063             nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1064                 return -EINVAL;
1065
1066         return 0;
1067 }
1068
1069 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1070                               struct netlink_ext_ack *extack)
1071 {
1072         __be32 daddr;
1073
1074         if (tb[IFLA_ADDRESS]) {
1075                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1076                         return -EINVAL;
1077                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1078                         return -EADDRNOTAVAIL;
1079         }
1080
1081         if (!data)
1082                 goto out;
1083
1084         if (data[IFLA_GRE_REMOTE]) {
1085                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1086                 if (!daddr)
1087                         return -EINVAL;
1088         }
1089
1090 out:
1091         return ipgre_tunnel_validate(tb, data, extack);
1092 }
1093
1094 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1095                            struct netlink_ext_ack *extack)
1096 {
1097         __be16 flags = 0;
1098         int ret;
1099
1100         if (!data)
1101                 return 0;
1102
1103         ret = ipgre_tap_validate(tb, data, extack);
1104         if (ret)
1105                 return ret;
1106
1107         if (data[IFLA_GRE_ERSPAN_VER] &&
1108             nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1109                 return 0;
1110
1111         /* ERSPAN type II/III should only have GRE sequence and key flag */
1112         if (data[IFLA_GRE_OFLAGS])
1113                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1114         if (data[IFLA_GRE_IFLAGS])
1115                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1116         if (!data[IFLA_GRE_COLLECT_METADATA] &&
1117             flags != (GRE_SEQ | GRE_KEY))
1118                 return -EINVAL;
1119
1120         /* ERSPAN Session ID only has 10-bit. Since we reuse
1121          * 32-bit key field as ID, check it's range.
1122          */
1123         if (data[IFLA_GRE_IKEY] &&
1124             (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1125                 return -EINVAL;
1126
1127         if (data[IFLA_GRE_OKEY] &&
1128             (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1129                 return -EINVAL;
1130
1131         return 0;
1132 }
1133
1134 static int ipgre_netlink_parms(struct net_device *dev,
1135                                 struct nlattr *data[],
1136                                 struct nlattr *tb[],
1137                                 struct ip_tunnel_parm *parms,
1138                                 __u32 *fwmark)
1139 {
1140         struct ip_tunnel *t = netdev_priv(dev);
1141
1142         memset(parms, 0, sizeof(*parms));
1143
1144         parms->iph.protocol = IPPROTO_GRE;
1145
1146         if (!data)
1147                 return 0;
1148
1149         if (data[IFLA_GRE_LINK])
1150                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1151
1152         if (data[IFLA_GRE_IFLAGS])
1153                 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1154
1155         if (data[IFLA_GRE_OFLAGS])
1156                 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1157
1158         if (data[IFLA_GRE_IKEY])
1159                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1160
1161         if (data[IFLA_GRE_OKEY])
1162                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1163
1164         if (data[IFLA_GRE_LOCAL])
1165                 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1166
1167         if (data[IFLA_GRE_REMOTE])
1168                 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1169
1170         if (data[IFLA_GRE_TTL])
1171                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1172
1173         if (data[IFLA_GRE_TOS])
1174                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1175
1176         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1177                 if (t->ignore_df)
1178                         return -EINVAL;
1179                 parms->iph.frag_off = htons(IP_DF);
1180         }
1181
1182         if (data[IFLA_GRE_COLLECT_METADATA]) {
1183                 t->collect_md = true;
1184                 if (dev->type == ARPHRD_IPGRE)
1185                         dev->type = ARPHRD_NONE;
1186         }
1187
1188         if (data[IFLA_GRE_IGNORE_DF]) {
1189                 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1190                   && (parms->iph.frag_off & htons(IP_DF)))
1191                         return -EINVAL;
1192                 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1193         }
1194
1195         if (data[IFLA_GRE_FWMARK])
1196                 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1197
1198         return 0;
1199 }
1200
1201 static int erspan_netlink_parms(struct net_device *dev,
1202                                 struct nlattr *data[],
1203                                 struct nlattr *tb[],
1204                                 struct ip_tunnel_parm *parms,
1205                                 __u32 *fwmark)
1206 {
1207         struct ip_tunnel *t = netdev_priv(dev);
1208         int err;
1209
1210         err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1211         if (err)
1212                 return err;
1213         if (!data)
1214                 return 0;
1215
1216         if (data[IFLA_GRE_ERSPAN_VER]) {
1217                 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1218
1219                 if (t->erspan_ver > 2)
1220                         return -EINVAL;
1221         }
1222
1223         if (t->erspan_ver == 1) {
1224                 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1225                         t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1226                         if (t->index & ~INDEX_MASK)
1227                                 return -EINVAL;
1228                 }
1229         } else if (t->erspan_ver == 2) {
1230                 if (data[IFLA_GRE_ERSPAN_DIR]) {
1231                         t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1232                         if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1233                                 return -EINVAL;
1234                 }
1235                 if (data[IFLA_GRE_ERSPAN_HWID]) {
1236                         t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1237                         if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1238                                 return -EINVAL;
1239                 }
1240         }
1241
1242         return 0;
1243 }
1244
1245 /* This function returns true when ENCAP attributes are present in the nl msg */
1246 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1247                                       struct ip_tunnel_encap *ipencap)
1248 {
1249         bool ret = false;
1250
1251         memset(ipencap, 0, sizeof(*ipencap));
1252
1253         if (!data)
1254                 return ret;
1255
1256         if (data[IFLA_GRE_ENCAP_TYPE]) {
1257                 ret = true;
1258                 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1259         }
1260
1261         if (data[IFLA_GRE_ENCAP_FLAGS]) {
1262                 ret = true;
1263                 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1264         }
1265
1266         if (data[IFLA_GRE_ENCAP_SPORT]) {
1267                 ret = true;
1268                 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1269         }
1270
1271         if (data[IFLA_GRE_ENCAP_DPORT]) {
1272                 ret = true;
1273                 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1274         }
1275
1276         return ret;
1277 }
1278
1279 static int gre_tap_init(struct net_device *dev)
1280 {
1281         __gre_tunnel_init(dev);
1282         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1283         netif_keep_dst(dev);
1284
1285         return ip_tunnel_init(dev);
1286 }
1287
1288 static const struct net_device_ops gre_tap_netdev_ops = {
1289         .ndo_init               = gre_tap_init,
1290         .ndo_uninit             = ip_tunnel_uninit,
1291         .ndo_start_xmit         = gre_tap_xmit,
1292         .ndo_set_mac_address    = eth_mac_addr,
1293         .ndo_validate_addr      = eth_validate_addr,
1294         .ndo_change_mtu         = ip_tunnel_change_mtu,
1295         .ndo_get_stats64        = dev_get_tstats64,
1296         .ndo_get_iflink         = ip_tunnel_get_iflink,
1297         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1298 };
1299
1300 static int erspan_tunnel_init(struct net_device *dev)
1301 {
1302         struct ip_tunnel *tunnel = netdev_priv(dev);
1303
1304         if (tunnel->erspan_ver == 0)
1305                 tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1306         else
1307                 tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1308
1309         tunnel->parms.iph.protocol = IPPROTO_GRE;
1310         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1311                        erspan_hdr_len(tunnel->erspan_ver);
1312
1313         dev->features           |= GRE_FEATURES;
1314         dev->hw_features        |= GRE_FEATURES;
1315         dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
1316         netif_keep_dst(dev);
1317
1318         return ip_tunnel_init(dev);
1319 }
1320
1321 static const struct net_device_ops erspan_netdev_ops = {
1322         .ndo_init               = erspan_tunnel_init,
1323         .ndo_uninit             = ip_tunnel_uninit,
1324         .ndo_start_xmit         = erspan_xmit,
1325         .ndo_set_mac_address    = eth_mac_addr,
1326         .ndo_validate_addr      = eth_validate_addr,
1327         .ndo_change_mtu         = ip_tunnel_change_mtu,
1328         .ndo_get_stats64        = dev_get_tstats64,
1329         .ndo_get_iflink         = ip_tunnel_get_iflink,
1330         .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1331 };
1332
1333 static void ipgre_tap_setup(struct net_device *dev)
1334 {
1335         ether_setup(dev);
1336         dev->max_mtu = 0;
1337         dev->netdev_ops = &gre_tap_netdev_ops;
1338         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1339         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1340         ip_tunnel_setup(dev, gre_tap_net_id);
1341 }
1342
1343 static int
1344 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1345 {
1346         struct ip_tunnel_encap ipencap;
1347
1348         if (ipgre_netlink_encap_parms(data, &ipencap)) {
1349                 struct ip_tunnel *t = netdev_priv(dev);
1350                 int err = ip_tunnel_encap_setup(t, &ipencap);
1351
1352                 if (err < 0)
1353                         return err;
1354         }
1355
1356         return 0;
1357 }
1358
1359 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1360                          struct nlattr *tb[], struct nlattr *data[],
1361                          struct netlink_ext_ack *extack)
1362 {
1363         struct ip_tunnel_parm p;
1364         __u32 fwmark = 0;
1365         int err;
1366
1367         err = ipgre_newlink_encap_setup(dev, data);
1368         if (err)
1369                 return err;
1370
1371         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1372         if (err < 0)
1373                 return err;
1374         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1375 }
1376
1377 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1378                           struct nlattr *tb[], struct nlattr *data[],
1379                           struct netlink_ext_ack *extack)
1380 {
1381         struct ip_tunnel_parm p;
1382         __u32 fwmark = 0;
1383         int err;
1384
1385         err = ipgre_newlink_encap_setup(dev, data);
1386         if (err)
1387                 return err;
1388
1389         err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1390         if (err)
1391                 return err;
1392         return ip_tunnel_newlink(dev, tb, &p, fwmark);
1393 }
1394
1395 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1396                             struct nlattr *data[],
1397                             struct netlink_ext_ack *extack)
1398 {
1399         struct ip_tunnel *t = netdev_priv(dev);
1400         __u32 fwmark = t->fwmark;
1401         struct ip_tunnel_parm p;
1402         int err;
1403
1404         err = ipgre_newlink_encap_setup(dev, data);
1405         if (err)
1406                 return err;
1407
1408         err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1409         if (err < 0)
1410                 return err;
1411
1412         err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1413         if (err < 0)
1414                 return err;
1415
1416         t->parms.i_flags = p.i_flags;
1417         t->parms.o_flags = p.o_flags;
1418
1419         ipgre_link_update(dev, !tb[IFLA_MTU]);
1420
1421         return 0;
1422 }
1423
1424 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1425                              struct nlattr *data[],
1426                              struct netlink_ext_ack *extack)
1427 {
1428         struct ip_tunnel *t = netdev_priv(dev);
1429         __u32 fwmark = t->fwmark;
1430         struct ip_tunnel_parm p;
1431         int err;
1432
1433         err = ipgre_newlink_encap_setup(dev, data);
1434         if (err)
1435                 return err;
1436
1437         err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1438         if (err < 0)
1439                 return err;
1440
1441         err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1442         if (err < 0)
1443                 return err;
1444
1445         t->parms.i_flags = p.i_flags;
1446         t->parms.o_flags = p.o_flags;
1447
1448         return 0;
1449 }
1450
1451 static size_t ipgre_get_size(const struct net_device *dev)
1452 {
1453         return
1454                 /* IFLA_GRE_LINK */
1455                 nla_total_size(4) +
1456                 /* IFLA_GRE_IFLAGS */
1457                 nla_total_size(2) +
1458                 /* IFLA_GRE_OFLAGS */
1459                 nla_total_size(2) +
1460                 /* IFLA_GRE_IKEY */
1461                 nla_total_size(4) +
1462                 /* IFLA_GRE_OKEY */
1463                 nla_total_size(4) +
1464                 /* IFLA_GRE_LOCAL */
1465                 nla_total_size(4) +
1466                 /* IFLA_GRE_REMOTE */
1467                 nla_total_size(4) +
1468                 /* IFLA_GRE_TTL */
1469                 nla_total_size(1) +
1470                 /* IFLA_GRE_TOS */
1471                 nla_total_size(1) +
1472                 /* IFLA_GRE_PMTUDISC */
1473                 nla_total_size(1) +
1474                 /* IFLA_GRE_ENCAP_TYPE */
1475                 nla_total_size(2) +
1476                 /* IFLA_GRE_ENCAP_FLAGS */
1477                 nla_total_size(2) +
1478                 /* IFLA_GRE_ENCAP_SPORT */
1479                 nla_total_size(2) +
1480                 /* IFLA_GRE_ENCAP_DPORT */
1481                 nla_total_size(2) +
1482                 /* IFLA_GRE_COLLECT_METADATA */
1483                 nla_total_size(0) +
1484                 /* IFLA_GRE_IGNORE_DF */
1485                 nla_total_size(1) +
1486                 /* IFLA_GRE_FWMARK */
1487                 nla_total_size(4) +
1488                 /* IFLA_GRE_ERSPAN_INDEX */
1489                 nla_total_size(4) +
1490                 /* IFLA_GRE_ERSPAN_VER */
1491                 nla_total_size(1) +
1492                 /* IFLA_GRE_ERSPAN_DIR */
1493                 nla_total_size(1) +
1494                 /* IFLA_GRE_ERSPAN_HWID */
1495                 nla_total_size(2) +
1496                 0;
1497 }
1498
1499 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1500 {
1501         struct ip_tunnel *t = netdev_priv(dev);
1502         struct ip_tunnel_parm *p = &t->parms;
1503         __be16 o_flags = p->o_flags;
1504
1505         if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1506             nla_put_be16(skb, IFLA_GRE_IFLAGS,
1507                          gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1508             nla_put_be16(skb, IFLA_GRE_OFLAGS,
1509                          gre_tnl_flags_to_gre_flags(o_flags)) ||
1510             nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1511             nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1512             nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1513             nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1514             nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1515             nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1516             nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1517                        !!(p->iph.frag_off & htons(IP_DF))) ||
1518             nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1519                 goto nla_put_failure;
1520
1521         if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1522                         t->encap.type) ||
1523             nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1524                          t->encap.sport) ||
1525             nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1526                          t->encap.dport) ||
1527             nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1528                         t->encap.flags))
1529                 goto nla_put_failure;
1530
1531         if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1532                 goto nla_put_failure;
1533
1534         if (t->collect_md) {
1535                 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1536                         goto nla_put_failure;
1537         }
1538
1539         return 0;
1540
1541 nla_put_failure:
1542         return -EMSGSIZE;
1543 }
1544
1545 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1546 {
1547         struct ip_tunnel *t = netdev_priv(dev);
1548
1549         if (t->erspan_ver <= 2) {
1550                 if (t->erspan_ver != 0 && !t->collect_md)
1551                         t->parms.o_flags |= TUNNEL_KEY;
1552
1553                 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1554                         goto nla_put_failure;
1555
1556                 if (t->erspan_ver == 1) {
1557                         if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1558                                 goto nla_put_failure;
1559                 } else if (t->erspan_ver == 2) {
1560                         if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1561                                 goto nla_put_failure;
1562                         if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1563                                 goto nla_put_failure;
1564                 }
1565         }
1566
1567         return ipgre_fill_info(skb, dev);
1568
1569 nla_put_failure:
1570         return -EMSGSIZE;
1571 }
1572
1573 static void erspan_setup(struct net_device *dev)
1574 {
1575         struct ip_tunnel *t = netdev_priv(dev);
1576
1577         ether_setup(dev);
1578         dev->max_mtu = 0;
1579         dev->netdev_ops = &erspan_netdev_ops;
1580         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1581         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1582         ip_tunnel_setup(dev, erspan_net_id);
1583         t->erspan_ver = 1;
1584 }
1585
1586 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1587         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1588         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1589         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1590         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1591         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1592         [IFLA_GRE_LOCAL]        = { .len = sizeof_field(struct iphdr, saddr) },
1593         [IFLA_GRE_REMOTE]       = { .len = sizeof_field(struct iphdr, daddr) },
1594         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1595         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1596         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1597         [IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
1598         [IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
1599         [IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
1600         [IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
1601         [IFLA_GRE_COLLECT_METADATA]     = { .type = NLA_FLAG },
1602         [IFLA_GRE_IGNORE_DF]    = { .type = NLA_U8 },
1603         [IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
1604         [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1605         [IFLA_GRE_ERSPAN_VER]   = { .type = NLA_U8 },
1606         [IFLA_GRE_ERSPAN_DIR]   = { .type = NLA_U8 },
1607         [IFLA_GRE_ERSPAN_HWID]  = { .type = NLA_U16 },
1608 };
1609
1610 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1611         .kind           = "gre",
1612         .maxtype        = IFLA_GRE_MAX,
1613         .policy         = ipgre_policy,
1614         .priv_size      = sizeof(struct ip_tunnel),
1615         .setup          = ipgre_tunnel_setup,
1616         .validate       = ipgre_tunnel_validate,
1617         .newlink        = ipgre_newlink,
1618         .changelink     = ipgre_changelink,
1619         .dellink        = ip_tunnel_dellink,
1620         .get_size       = ipgre_get_size,
1621         .fill_info      = ipgre_fill_info,
1622         .get_link_net   = ip_tunnel_get_link_net,
1623 };
1624
1625 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1626         .kind           = "gretap",
1627         .maxtype        = IFLA_GRE_MAX,
1628         .policy         = ipgre_policy,
1629         .priv_size      = sizeof(struct ip_tunnel),
1630         .setup          = ipgre_tap_setup,
1631         .validate       = ipgre_tap_validate,
1632         .newlink        = ipgre_newlink,
1633         .changelink     = ipgre_changelink,
1634         .dellink        = ip_tunnel_dellink,
1635         .get_size       = ipgre_get_size,
1636         .fill_info      = ipgre_fill_info,
1637         .get_link_net   = ip_tunnel_get_link_net,
1638 };
1639
1640 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1641         .kind           = "erspan",
1642         .maxtype        = IFLA_GRE_MAX,
1643         .policy         = ipgre_policy,
1644         .priv_size      = sizeof(struct ip_tunnel),
1645         .setup          = erspan_setup,
1646         .validate       = erspan_validate,
1647         .newlink        = erspan_newlink,
1648         .changelink     = erspan_changelink,
1649         .dellink        = ip_tunnel_dellink,
1650         .get_size       = ipgre_get_size,
1651         .fill_info      = erspan_fill_info,
1652         .get_link_net   = ip_tunnel_get_link_net,
1653 };
1654
1655 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1656                                         u8 name_assign_type)
1657 {
1658         struct nlattr *tb[IFLA_MAX + 1];
1659         struct net_device *dev;
1660         LIST_HEAD(list_kill);
1661         struct ip_tunnel *t;
1662         int err;
1663
1664         memset(&tb, 0, sizeof(tb));
1665
1666         dev = rtnl_create_link(net, name, name_assign_type,
1667                                &ipgre_tap_ops, tb, NULL);
1668         if (IS_ERR(dev))
1669                 return dev;
1670
1671         /* Configure flow based GRE device. */
1672         t = netdev_priv(dev);
1673         t->collect_md = true;
1674
1675         err = ipgre_newlink(net, dev, tb, NULL, NULL);
1676         if (err < 0) {
1677                 free_netdev(dev);
1678                 return ERR_PTR(err);
1679         }
1680
1681         /* openvswitch users expect packet sizes to be unrestricted,
1682          * so set the largest MTU we can.
1683          */
1684         err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1685         if (err)
1686                 goto out;
1687
1688         err = rtnl_configure_link(dev, NULL, 0, NULL);
1689         if (err < 0)
1690                 goto out;
1691
1692         return dev;
1693 out:
1694         ip_tunnel_dellink(dev, &list_kill);
1695         unregister_netdevice_many(&list_kill);
1696         return ERR_PTR(err);
1697 }
1698 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1699
1700 static int __net_init ipgre_tap_init_net(struct net *net)
1701 {
1702         return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1703 }
1704
1705 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1706 {
1707         ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1708 }
1709
1710 static struct pernet_operations ipgre_tap_net_ops = {
1711         .init = ipgre_tap_init_net,
1712         .exit_batch = ipgre_tap_exit_batch_net,
1713         .id   = &gre_tap_net_id,
1714         .size = sizeof(struct ip_tunnel_net),
1715 };
1716
1717 static int __net_init erspan_init_net(struct net *net)
1718 {
1719         return ip_tunnel_init_net(net, erspan_net_id,
1720                                   &erspan_link_ops, "erspan0");
1721 }
1722
1723 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1724 {
1725         ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1726 }
1727
1728 static struct pernet_operations erspan_net_ops = {
1729         .init = erspan_init_net,
1730         .exit_batch = erspan_exit_batch_net,
1731         .id   = &erspan_net_id,
1732         .size = sizeof(struct ip_tunnel_net),
1733 };
1734
1735 static int __init ipgre_init(void)
1736 {
1737         int err;
1738
1739         pr_info("GRE over IPv4 tunneling driver\n");
1740
1741         err = register_pernet_device(&ipgre_net_ops);
1742         if (err < 0)
1743                 return err;
1744
1745         err = register_pernet_device(&ipgre_tap_net_ops);
1746         if (err < 0)
1747                 goto pnet_tap_failed;
1748
1749         err = register_pernet_device(&erspan_net_ops);
1750         if (err < 0)
1751                 goto pnet_erspan_failed;
1752
1753         err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1754         if (err < 0) {
1755                 pr_info("%s: can't add protocol\n", __func__);
1756                 goto add_proto_failed;
1757         }
1758
1759         err = rtnl_link_register(&ipgre_link_ops);
1760         if (err < 0)
1761                 goto rtnl_link_failed;
1762
1763         err = rtnl_link_register(&ipgre_tap_ops);
1764         if (err < 0)
1765                 goto tap_ops_failed;
1766
1767         err = rtnl_link_register(&erspan_link_ops);
1768         if (err < 0)
1769                 goto erspan_link_failed;
1770
1771         return 0;
1772
1773 erspan_link_failed:
1774         rtnl_link_unregister(&ipgre_tap_ops);
1775 tap_ops_failed:
1776         rtnl_link_unregister(&ipgre_link_ops);
1777 rtnl_link_failed:
1778         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1779 add_proto_failed:
1780         unregister_pernet_device(&erspan_net_ops);
1781 pnet_erspan_failed:
1782         unregister_pernet_device(&ipgre_tap_net_ops);
1783 pnet_tap_failed:
1784         unregister_pernet_device(&ipgre_net_ops);
1785         return err;
1786 }
1787
1788 static void __exit ipgre_fini(void)
1789 {
1790         rtnl_link_unregister(&ipgre_tap_ops);
1791         rtnl_link_unregister(&ipgre_link_ops);
1792         rtnl_link_unregister(&erspan_link_ops);
1793         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1794         unregister_pernet_device(&ipgre_tap_net_ops);
1795         unregister_pernet_device(&ipgre_net_ops);
1796         unregister_pernet_device(&erspan_net_ops);
1797 }
1798
1799 module_init(ipgre_init);
1800 module_exit(ipgre_fini);
1801 MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1802 MODULE_LICENSE("GPL");
1803 MODULE_ALIAS_RTNL_LINK("gre");
1804 MODULE_ALIAS_RTNL_LINK("gretap");
1805 MODULE_ALIAS_RTNL_LINK("erspan");
1806 MODULE_ALIAS_NETDEV("gre0");
1807 MODULE_ALIAS_NETDEV("gretap0");
1808 MODULE_ALIAS_NETDEV("erspan0");