1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Linux NET3: GRE over IP protocol decoder.
5 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
52 1. The most important issue is detecting local dead loops.
53 They would cause complete host lockup in transmit, which
54 would be "resolved" by stack overflow or, if queueing is enabled,
55 with infinite looping in net_bh.
57 We cannot track such dead loops during route installation,
58 it is infeasible task. The most general solutions would be
59 to keep skb->encapsulation counter (sort of local ttl),
60 and silently drop packet when it expires. It is a good
61 solution, but it supposes maintaining new variable in ALL
62 skb, even if no tunneling is used.
64 Current solution: xmit_recursion breaks dead loops. This is a percpu
65 counter, since when we enter the first ndo_xmit(), cpu migration is
66 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
68 2. Networking dead loops would not kill routers, but would really
69 kill network. IP hop limit plays role of "t->recursion" in this case,
70 if we copy it from packet being encapsulated to upper header.
71 It is very good solution, but it introduces two problems:
73 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74 do not work over tunnels.
75 - traceroute does not work. I planned to relay ICMP from tunnel,
76 so that this problem would be solved and traceroute output
77 would even more informative. This idea appeared to be wrong:
78 only Linux complies to rfc1812 now (yes, guys, Linux is the only
79 true router now :-)), all routers (at least, in neighbourhood of mine)
80 return only 8 bytes of payload. It is the end.
82 Hence, if we want that OSPF worked or traceroute said something reasonable,
83 we should search for another solution.
85 One of them is to parse packet trying to detect inner encapsulation
86 made by our node. It is difficult or even impossible, especially,
87 taking into account fragmentation. TO be short, ttl is not solution at all.
89 Current solution: The solution was UNEXPECTEDLY SIMPLE.
90 We force DF flag on tunnels with preconfigured hop limit,
91 that is ALL. :-) Well, it does not remove the problem completely,
92 but exponential growth of network traffic is changed to linear
93 (branches, that exceed pmtu are pruned) and tunnel mtu
94 rapidly degrades to value <68, where looping stops.
95 Yes, it is not good if there exists a router in the loop,
96 which does not force DF, even when encapsulating packets have DF set.
97 But it is not our problem! Nobody could accuse us, we made
98 all that we could make. Even if it is your gated who injected
99 fatal route to network, even if it were you who configured
100 fatal static route: you are innocent. :-)
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static int ipgre_tunnel_init(struct net_device *dev);
111 static void erspan_build_header(struct sk_buff *skb,
113 bool truncate, bool is_ipv4);
115 static unsigned int ipgre_net_id __read_mostly;
116 static unsigned int gre_tap_net_id __read_mostly;
117 static unsigned int erspan_net_id __read_mostly;
119 static int ipgre_err(struct sk_buff *skb, u32 info,
120 const struct tnl_ptk_info *tpi)
123 /* All the routers (except for Linux) return only
124 8 bytes of packet payload. It means, that precise relaying of
125 ICMP in the real Internet is absolutely infeasible.
127 Moreover, Cisco "wise men" put GRE key to the third word
128 in GRE header. It makes impossible maintaining even soft
129 state for keyed GRE tunnels with enabled checksum. Tell
132 Well, I wonder, rfc1812 was written by Cisco employee,
133 what the hell these idiots break standards established
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn;
138 const struct iphdr *iph;
139 const int type = icmp_hdr(skb)->type;
140 const int code = icmp_hdr(skb)->code;
141 unsigned int data_len = 0;
144 if (tpi->proto == htons(ETH_P_TEB))
145 itn = net_generic(net, gre_tap_net_id);
146 else if (tpi->proto == htons(ETH_P_ERSPAN) ||
147 tpi->proto == htons(ETH_P_ERSPAN2))
148 itn = net_generic(net, erspan_net_id);
150 itn = net_generic(net, ipgre_net_id);
152 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
153 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
154 iph->daddr, iph->saddr, tpi->key);
161 case ICMP_PARAMETERPROB:
164 case ICMP_DEST_UNREACH:
167 case ICMP_PORT_UNREACH:
168 /* Impossible event. */
171 /* All others are translated to HOST_UNREACH.
172 rfc2003 contains "deep thoughts" about NET_UNREACH,
173 I believe they are just ether pollution. --ANK
179 case ICMP_TIME_EXCEEDED:
180 if (code != ICMP_EXC_TTL)
182 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
189 #if IS_ENABLED(CONFIG_IPV6)
190 if (tpi->proto == htons(ETH_P_IPV6) &&
191 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
196 if (t->parms.iph.daddr == 0 ||
197 ipv4_is_multicast(t->parms.iph.daddr))
200 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
203 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
207 t->err_time = jiffies;
212 static void gre_err(struct sk_buff *skb, u32 info)
214 /* All the routers (except for Linux) return only
215 * 8 bytes of packet payload. It means, that precise relaying of
216 * ICMP in the real Internet is absolutely infeasible.
218 * Moreover, Cisco "wise men" put GRE key to the third word
219 * in GRE header. It makes impossible maintaining even soft
221 * GRE tunnels with enabled checksum. Tell them "thank you".
223 * Well, I wonder, rfc1812 was written by Cisco employee,
224 * what the hell these idiots break standards established
228 const struct iphdr *iph = (struct iphdr *)skb->data;
229 const int type = icmp_hdr(skb)->type;
230 const int code = icmp_hdr(skb)->code;
231 struct tnl_ptk_info tpi;
233 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
237 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
238 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
239 skb->dev->ifindex, IPPROTO_GRE);
242 if (type == ICMP_REDIRECT) {
243 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
248 ipgre_err(skb, info, &tpi);
251 static bool is_erspan_type1(int gre_hdr_len)
253 /* Both ERSPAN type I (version 0) and type II (version 1) use
254 * protocol 0x88BE, but the type I has only 4-byte GRE header,
255 * while type II has 8-byte.
257 return gre_hdr_len == 4;
260 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
263 struct net *net = dev_net(skb->dev);
264 struct metadata_dst *tun_dst = NULL;
265 struct erspan_base_hdr *ershdr;
266 struct ip_tunnel_net *itn;
267 struct ip_tunnel *tunnel;
268 const struct iphdr *iph;
269 struct erspan_md2 *md2;
273 itn = net_generic(net, erspan_net_id);
275 if (is_erspan_type1(gre_hdr_len)) {
277 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
278 tpi->flags | TUNNEL_NO_KEY,
279 iph->saddr, iph->daddr, 0);
281 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
283 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
284 tpi->flags | TUNNEL_KEY,
285 iph->saddr, iph->daddr, tpi->key);
289 if (is_erspan_type1(gre_hdr_len))
292 len = gre_hdr_len + erspan_hdr_len(ver);
294 if (unlikely(!pskb_may_pull(skb, len)))
295 return PACKET_REJECT;
297 if (__iptunnel_pull_header(skb,
303 if (tunnel->collect_md) {
304 struct erspan_metadata *pkt_md, *md;
305 struct ip_tunnel_info *info;
310 tpi->flags |= TUNNEL_KEY;
312 tun_id = key32_to_tunnel_id(tpi->key);
314 tun_dst = ip_tun_rx_dst(skb, flags,
315 tun_id, sizeof(*md));
317 return PACKET_REJECT;
319 /* skb can be uncloned in __iptunnel_pull_header, so
320 * old pkt_md is no longer valid and we need to reset
323 gh = skb_network_header(skb) +
324 skb_network_header_len(skb);
325 pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
327 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
330 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
333 info = &tun_dst->u.tun_info;
334 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
335 info->options_len = sizeof(*md);
338 skb_reset_mac_header(skb);
339 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
342 return PACKET_REJECT;
349 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
350 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
352 struct metadata_dst *tun_dst = NULL;
353 const struct iphdr *iph;
354 struct ip_tunnel *tunnel;
357 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
358 iph->saddr, iph->daddr, tpi->key);
361 const struct iphdr *tnl_params;
363 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
364 raw_proto, false) < 0)
367 if (tunnel->dev->type != ARPHRD_NONE)
368 skb_pop_mac_header(skb);
370 skb_reset_mac_header(skb);
372 tnl_params = &tunnel->parms.iph;
373 if (tunnel->collect_md || tnl_params->daddr == 0) {
377 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
378 tun_id = key32_to_tunnel_id(tpi->key);
379 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
381 return PACKET_REJECT;
384 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
394 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
397 struct net *net = dev_net(skb->dev);
398 struct ip_tunnel_net *itn;
401 if (tpi->proto == htons(ETH_P_TEB))
402 itn = net_generic(net, gre_tap_net_id);
404 itn = net_generic(net, ipgre_net_id);
406 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
407 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
408 /* ipgre tunnels in collect metadata mode should receive
409 * also ETH_P_TEB traffic.
411 itn = net_generic(net, ipgre_net_id);
412 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
417 static int gre_rcv(struct sk_buff *skb)
419 struct tnl_ptk_info tpi;
420 bool csum_err = false;
423 #ifdef CONFIG_NET_IPGRE_BROADCAST
424 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
425 /* Looped back packet, drop it! */
426 if (rt_is_output_route(skb_rtable(skb)))
431 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
435 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
436 tpi.proto == htons(ETH_P_ERSPAN2))) {
437 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
442 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
446 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
452 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
453 const struct iphdr *tnl_params,
456 struct ip_tunnel *tunnel = netdev_priv(dev);
457 __be16 flags = tunnel->parms.o_flags;
459 /* Push GRE header. */
460 gre_build_header(skb, tunnel->tun_hlen,
461 flags, proto, tunnel->parms.o_key,
462 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
464 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
467 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
469 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
472 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
475 struct ip_tunnel *tunnel = netdev_priv(dev);
476 struct ip_tunnel_info *tun_info;
477 const struct ip_tunnel_key *key;
481 tun_info = skb_tunnel_info(skb);
482 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
483 ip_tunnel_info_af(tun_info) != AF_INET))
486 key = &tun_info->key;
487 tunnel_hlen = gre_calc_hlen(key->tun_flags);
489 if (skb_cow_head(skb, dev->needed_headroom))
492 /* Push Tunnel header. */
493 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
496 flags = tun_info->key.tun_flags &
497 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
498 gre_build_header(skb, tunnel_hlen, flags, proto,
499 tunnel_id_to_key32(tun_info->key.tun_id),
500 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
502 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
508 dev->stats.tx_dropped++;
511 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
513 struct ip_tunnel *tunnel = netdev_priv(dev);
514 struct ip_tunnel_info *tun_info;
515 const struct ip_tunnel_key *key;
516 struct erspan_metadata *md;
517 bool truncate = false;
523 tun_info = skb_tunnel_info(skb);
524 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
525 ip_tunnel_info_af(tun_info) != AF_INET))
528 key = &tun_info->key;
529 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
531 if (tun_info->options_len < sizeof(*md))
533 md = ip_tunnel_info_opts(tun_info);
535 /* ERSPAN has fixed 8 byte GRE header */
536 version = md->version;
537 tunnel_hlen = 8 + erspan_hdr_len(version);
539 if (skb_cow_head(skb, dev->needed_headroom))
542 if (gre_handle_offloads(skb, false))
545 if (skb->len > dev->mtu + dev->hard_header_len) {
546 pskb_trim(skb, dev->mtu + dev->hard_header_len);
550 nhoff = skb_network_header(skb) - skb_mac_header(skb);
551 if (skb->protocol == htons(ETH_P_IP) &&
552 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
555 if (skb->protocol == htons(ETH_P_IPV6)) {
558 if (skb_transport_header_was_set(skb))
559 thoff = skb_transport_header(skb) - skb_mac_header(skb);
561 thoff = nhoff + sizeof(struct ipv6hdr);
562 if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
567 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
568 ntohl(md->u.index), truncate, true);
569 proto = htons(ETH_P_ERSPAN);
570 } else if (version == 2) {
571 erspan_build_header_v2(skb,
572 ntohl(tunnel_id_to_key32(key->tun_id)),
574 get_hwid(&md->u.md2),
576 proto = htons(ETH_P_ERSPAN2);
581 gre_build_header(skb, 8, TUNNEL_SEQ,
582 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
584 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
590 dev->stats.tx_dropped++;
593 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
595 struct ip_tunnel_info *info = skb_tunnel_info(skb);
596 const struct ip_tunnel_key *key;
600 if (ip_tunnel_info_af(info) != AF_INET)
604 ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
605 tunnel_id_to_key32(key->tun_id),
606 key->tos & ~INET_ECN_MASK, 0, skb->mark,
608 rt = ip_route_output_key(dev_net(dev), &fl4);
613 info->key.u.ipv4.src = fl4.saddr;
617 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
618 struct net_device *dev)
620 struct ip_tunnel *tunnel = netdev_priv(dev);
621 const struct iphdr *tnl_params;
623 if (!pskb_inet_may_pull(skb))
626 if (tunnel->collect_md) {
627 gre_fb_xmit(skb, dev, skb->protocol);
631 if (dev->header_ops) {
632 if (skb_cow_head(skb, 0))
635 tnl_params = (const struct iphdr *)skb->data;
637 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
640 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
641 skb_reset_mac_header(skb);
643 if (skb->ip_summed == CHECKSUM_PARTIAL &&
644 skb_checksum_start(skb) < skb->data)
647 if (skb_cow_head(skb, dev->needed_headroom))
650 tnl_params = &tunnel->parms.iph;
653 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
656 __gre_xmit(skb, dev, tnl_params, skb->protocol);
661 dev->stats.tx_dropped++;
665 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
666 struct net_device *dev)
668 struct ip_tunnel *tunnel = netdev_priv(dev);
669 bool truncate = false;
672 if (!pskb_inet_may_pull(skb))
675 if (tunnel->collect_md) {
676 erspan_fb_xmit(skb, dev);
680 if (gre_handle_offloads(skb, false))
683 if (skb_cow_head(skb, dev->needed_headroom))
686 if (skb->len > dev->mtu + dev->hard_header_len) {
687 pskb_trim(skb, dev->mtu + dev->hard_header_len);
691 /* Push ERSPAN header */
692 if (tunnel->erspan_ver == 0) {
693 proto = htons(ETH_P_ERSPAN);
694 tunnel->parms.o_flags &= ~TUNNEL_SEQ;
695 } else if (tunnel->erspan_ver == 1) {
696 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
699 proto = htons(ETH_P_ERSPAN);
700 } else if (tunnel->erspan_ver == 2) {
701 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
702 tunnel->dir, tunnel->hwid,
704 proto = htons(ETH_P_ERSPAN2);
709 tunnel->parms.o_flags &= ~TUNNEL_KEY;
710 __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
715 dev->stats.tx_dropped++;
719 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
720 struct net_device *dev)
722 struct ip_tunnel *tunnel = netdev_priv(dev);
724 if (!pskb_inet_may_pull(skb))
727 if (tunnel->collect_md) {
728 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
732 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
735 if (skb_cow_head(skb, dev->needed_headroom))
738 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
743 dev->stats.tx_dropped++;
747 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
749 struct ip_tunnel *tunnel = netdev_priv(dev);
752 len = tunnel->tun_hlen;
753 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
754 len = tunnel->tun_hlen - len;
755 tunnel->hlen = tunnel->hlen + len;
758 dev->hard_header_len += len;
760 dev->needed_headroom += len;
763 dev->mtu = max_t(int, dev->mtu - len, 68);
765 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
766 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
767 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
768 dev->features |= NETIF_F_GSO_SOFTWARE;
769 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
771 dev->features &= ~NETIF_F_GSO_SOFTWARE;
772 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
774 dev->features |= NETIF_F_LLTX;
776 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
777 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
781 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
786 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
787 if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
788 p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
789 ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
793 p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
794 p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
796 err = ip_tunnel_ctl(dev, p, cmd);
800 if (cmd == SIOCCHGTUNNEL) {
801 struct ip_tunnel *t = netdev_priv(dev);
803 t->parms.i_flags = p->i_flags;
804 t->parms.o_flags = p->o_flags;
806 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
807 ipgre_link_update(dev, true);
810 p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
811 p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
815 /* Nice toy. Unfortunately, useless in real life :-)
816 It allows to construct virtual multiprotocol broadcast "LAN"
817 over the Internet, provided multicast routing is tuned.
820 I have no idea was this bicycle invented before me,
821 so that I had to set ARPHRD_IPGRE to a random value.
822 I have an impression, that Cisco could make something similar,
823 but this feature is apparently missing in IOS<=11.2(8).
825 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
826 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
828 ping -t 255 224.66.66.66
830 If nobody answers, mbone does not work.
832 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
833 ip addr add 10.66.66.<somewhat>/24 dev Universe
835 ifconfig Universe add fe80::<Your_real_addr>/10
836 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
839 ftp fec0:6666:6666::193.233.7.65
842 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
844 const void *daddr, const void *saddr, unsigned int len)
846 struct ip_tunnel *t = netdev_priv(dev);
848 struct gre_base_hdr *greh;
850 iph = skb_push(skb, t->hlen + sizeof(*iph));
851 greh = (struct gre_base_hdr *)(iph+1);
852 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
853 greh->protocol = htons(type);
855 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
857 /* Set the source hardware address. */
859 memcpy(&iph->saddr, saddr, 4);
861 memcpy(&iph->daddr, daddr, 4);
863 return t->hlen + sizeof(*iph);
865 return -(t->hlen + sizeof(*iph));
868 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
870 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
871 memcpy(haddr, &iph->saddr, 4);
875 static const struct header_ops ipgre_header_ops = {
876 .create = ipgre_header,
877 .parse = ipgre_header_parse,
880 #ifdef CONFIG_NET_IPGRE_BROADCAST
881 static int ipgre_open(struct net_device *dev)
883 struct ip_tunnel *t = netdev_priv(dev);
885 if (ipv4_is_multicast(t->parms.iph.daddr)) {
889 rt = ip_route_output_gre(t->net, &fl4,
893 RT_TOS(t->parms.iph.tos),
896 return -EADDRNOTAVAIL;
899 if (!__in_dev_get_rtnl(dev))
900 return -EADDRNOTAVAIL;
901 t->mlink = dev->ifindex;
902 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
907 static int ipgre_close(struct net_device *dev)
909 struct ip_tunnel *t = netdev_priv(dev);
911 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
912 struct in_device *in_dev;
913 in_dev = inetdev_by_index(t->net, t->mlink);
915 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
921 static const struct net_device_ops ipgre_netdev_ops = {
922 .ndo_init = ipgre_tunnel_init,
923 .ndo_uninit = ip_tunnel_uninit,
924 #ifdef CONFIG_NET_IPGRE_BROADCAST
925 .ndo_open = ipgre_open,
926 .ndo_stop = ipgre_close,
928 .ndo_start_xmit = ipgre_xmit,
929 .ndo_do_ioctl = ip_tunnel_ioctl,
930 .ndo_change_mtu = ip_tunnel_change_mtu,
931 .ndo_get_stats64 = ip_tunnel_get_stats64,
932 .ndo_get_iflink = ip_tunnel_get_iflink,
933 .ndo_tunnel_ctl = ipgre_tunnel_ctl,
936 #define GRE_FEATURES (NETIF_F_SG | \
941 static void ipgre_tunnel_setup(struct net_device *dev)
943 dev->netdev_ops = &ipgre_netdev_ops;
944 dev->type = ARPHRD_IPGRE;
945 ip_tunnel_setup(dev, ipgre_net_id);
948 static void __gre_tunnel_init(struct net_device *dev)
950 struct ip_tunnel *tunnel;
952 tunnel = netdev_priv(dev);
953 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
954 tunnel->parms.iph.protocol = IPPROTO_GRE;
956 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
957 dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
959 dev->features |= GRE_FEATURES;
960 dev->hw_features |= GRE_FEATURES;
962 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
963 /* TCP offload with GRE SEQ is not supported, nor
964 * can we support 2 levels of outer headers requiring
967 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
968 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
969 dev->features |= NETIF_F_GSO_SOFTWARE;
970 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
973 /* Can use a lockless transmit, unless we generate
976 dev->features |= NETIF_F_LLTX;
980 static int ipgre_tunnel_init(struct net_device *dev)
982 struct ip_tunnel *tunnel = netdev_priv(dev);
983 struct iphdr *iph = &tunnel->parms.iph;
985 __gre_tunnel_init(dev);
987 memcpy(dev->dev_addr, &iph->saddr, 4);
988 memcpy(dev->broadcast, &iph->daddr, 4);
990 dev->flags = IFF_NOARP;
994 if (iph->daddr && !tunnel->collect_md) {
995 #ifdef CONFIG_NET_IPGRE_BROADCAST
996 if (ipv4_is_multicast(iph->daddr)) {
999 dev->flags = IFF_BROADCAST;
1000 dev->header_ops = &ipgre_header_ops;
1001 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1002 dev->needed_headroom = 0;
1005 } else if (!tunnel->collect_md) {
1006 dev->header_ops = &ipgre_header_ops;
1007 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1008 dev->needed_headroom = 0;
1011 return ip_tunnel_init(dev);
1014 static const struct gre_protocol ipgre_protocol = {
1016 .err_handler = gre_err,
1019 static int __net_init ipgre_init_net(struct net *net)
1021 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1024 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1026 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1029 static struct pernet_operations ipgre_net_ops = {
1030 .init = ipgre_init_net,
1031 .exit_batch = ipgre_exit_batch_net,
1032 .id = &ipgre_net_id,
1033 .size = sizeof(struct ip_tunnel_net),
1036 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1037 struct netlink_ext_ack *extack)
1045 if (data[IFLA_GRE_IFLAGS])
1046 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1047 if (data[IFLA_GRE_OFLAGS])
1048 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1049 if (flags & (GRE_VERSION|GRE_ROUTING))
1052 if (data[IFLA_GRE_COLLECT_METADATA] &&
1053 data[IFLA_GRE_ENCAP_TYPE] &&
1054 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1060 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1061 struct netlink_ext_ack *extack)
1065 if (tb[IFLA_ADDRESS]) {
1066 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1068 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1069 return -EADDRNOTAVAIL;
1075 if (data[IFLA_GRE_REMOTE]) {
1076 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1082 return ipgre_tunnel_validate(tb, data, extack);
1085 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1086 struct netlink_ext_ack *extack)
1094 ret = ipgre_tap_validate(tb, data, extack);
1098 if (data[IFLA_GRE_ERSPAN_VER] &&
1099 nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1102 /* ERSPAN type II/III should only have GRE sequence and key flag */
1103 if (data[IFLA_GRE_OFLAGS])
1104 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1105 if (data[IFLA_GRE_IFLAGS])
1106 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1107 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1108 flags != (GRE_SEQ | GRE_KEY))
1111 /* ERSPAN Session ID only has 10-bit. Since we reuse
1112 * 32-bit key field as ID, check it's range.
1114 if (data[IFLA_GRE_IKEY] &&
1115 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1118 if (data[IFLA_GRE_OKEY] &&
1119 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1125 static int ipgre_netlink_parms(struct net_device *dev,
1126 struct nlattr *data[],
1127 struct nlattr *tb[],
1128 struct ip_tunnel_parm *parms,
1131 struct ip_tunnel *t = netdev_priv(dev);
1133 memset(parms, 0, sizeof(*parms));
1135 parms->iph.protocol = IPPROTO_GRE;
1140 if (data[IFLA_GRE_LINK])
1141 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1143 if (data[IFLA_GRE_IFLAGS])
1144 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1146 if (data[IFLA_GRE_OFLAGS])
1147 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1149 if (data[IFLA_GRE_IKEY])
1150 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1152 if (data[IFLA_GRE_OKEY])
1153 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1155 if (data[IFLA_GRE_LOCAL])
1156 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1158 if (data[IFLA_GRE_REMOTE])
1159 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1161 if (data[IFLA_GRE_TTL])
1162 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1164 if (data[IFLA_GRE_TOS])
1165 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1167 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1170 parms->iph.frag_off = htons(IP_DF);
1173 if (data[IFLA_GRE_COLLECT_METADATA]) {
1174 t->collect_md = true;
1175 if (dev->type == ARPHRD_IPGRE)
1176 dev->type = ARPHRD_NONE;
1179 if (data[IFLA_GRE_IGNORE_DF]) {
1180 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1181 && (parms->iph.frag_off & htons(IP_DF)))
1183 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1186 if (data[IFLA_GRE_FWMARK])
1187 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1192 static int erspan_netlink_parms(struct net_device *dev,
1193 struct nlattr *data[],
1194 struct nlattr *tb[],
1195 struct ip_tunnel_parm *parms,
1198 struct ip_tunnel *t = netdev_priv(dev);
1201 err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1207 if (data[IFLA_GRE_ERSPAN_VER]) {
1208 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1210 if (t->erspan_ver > 2)
1214 if (t->erspan_ver == 1) {
1215 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1216 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1217 if (t->index & ~INDEX_MASK)
1220 } else if (t->erspan_ver == 2) {
1221 if (data[IFLA_GRE_ERSPAN_DIR]) {
1222 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1223 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1226 if (data[IFLA_GRE_ERSPAN_HWID]) {
1227 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1228 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1236 /* This function returns true when ENCAP attributes are present in the nl msg */
1237 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1238 struct ip_tunnel_encap *ipencap)
1242 memset(ipencap, 0, sizeof(*ipencap));
1247 if (data[IFLA_GRE_ENCAP_TYPE]) {
1249 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1252 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1254 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1257 if (data[IFLA_GRE_ENCAP_SPORT]) {
1259 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1262 if (data[IFLA_GRE_ENCAP_DPORT]) {
1264 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1270 static int gre_tap_init(struct net_device *dev)
1272 __gre_tunnel_init(dev);
1273 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1274 netif_keep_dst(dev);
1276 return ip_tunnel_init(dev);
1279 static const struct net_device_ops gre_tap_netdev_ops = {
1280 .ndo_init = gre_tap_init,
1281 .ndo_uninit = ip_tunnel_uninit,
1282 .ndo_start_xmit = gre_tap_xmit,
1283 .ndo_set_mac_address = eth_mac_addr,
1284 .ndo_validate_addr = eth_validate_addr,
1285 .ndo_change_mtu = ip_tunnel_change_mtu,
1286 .ndo_get_stats64 = ip_tunnel_get_stats64,
1287 .ndo_get_iflink = ip_tunnel_get_iflink,
1288 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1291 static int erspan_tunnel_init(struct net_device *dev)
1293 struct ip_tunnel *tunnel = netdev_priv(dev);
1295 if (tunnel->erspan_ver == 0)
1296 tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1298 tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1300 tunnel->parms.iph.protocol = IPPROTO_GRE;
1301 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1302 erspan_hdr_len(tunnel->erspan_ver);
1304 dev->features |= GRE_FEATURES;
1305 dev->hw_features |= GRE_FEATURES;
1306 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1307 netif_keep_dst(dev);
1309 return ip_tunnel_init(dev);
1312 static const struct net_device_ops erspan_netdev_ops = {
1313 .ndo_init = erspan_tunnel_init,
1314 .ndo_uninit = ip_tunnel_uninit,
1315 .ndo_start_xmit = erspan_xmit,
1316 .ndo_set_mac_address = eth_mac_addr,
1317 .ndo_validate_addr = eth_validate_addr,
1318 .ndo_change_mtu = ip_tunnel_change_mtu,
1319 .ndo_get_stats64 = ip_tunnel_get_stats64,
1320 .ndo_get_iflink = ip_tunnel_get_iflink,
1321 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1324 static void ipgre_tap_setup(struct net_device *dev)
1328 dev->netdev_ops = &gre_tap_netdev_ops;
1329 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1330 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1331 ip_tunnel_setup(dev, gre_tap_net_id);
1335 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1337 struct ip_tunnel_encap ipencap;
1339 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1340 struct ip_tunnel *t = netdev_priv(dev);
1341 int err = ip_tunnel_encap_setup(t, &ipencap);
1350 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1351 struct nlattr *tb[], struct nlattr *data[],
1352 struct netlink_ext_ack *extack)
1354 struct ip_tunnel_parm p;
1358 err = ipgre_newlink_encap_setup(dev, data);
1362 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1365 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1368 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1369 struct nlattr *tb[], struct nlattr *data[],
1370 struct netlink_ext_ack *extack)
1372 struct ip_tunnel_parm p;
1376 err = ipgre_newlink_encap_setup(dev, data);
1380 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1383 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1386 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1387 struct nlattr *data[],
1388 struct netlink_ext_ack *extack)
1390 struct ip_tunnel *t = netdev_priv(dev);
1391 __u32 fwmark = t->fwmark;
1392 struct ip_tunnel_parm p;
1395 err = ipgre_newlink_encap_setup(dev, data);
1399 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1403 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1407 t->parms.i_flags = p.i_flags;
1408 t->parms.o_flags = p.o_flags;
1410 ipgre_link_update(dev, !tb[IFLA_MTU]);
1415 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1416 struct nlattr *data[],
1417 struct netlink_ext_ack *extack)
1419 struct ip_tunnel *t = netdev_priv(dev);
1420 __u32 fwmark = t->fwmark;
1421 struct ip_tunnel_parm p;
1424 err = ipgre_newlink_encap_setup(dev, data);
1428 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1432 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1436 t->parms.i_flags = p.i_flags;
1437 t->parms.o_flags = p.o_flags;
1442 static size_t ipgre_get_size(const struct net_device *dev)
1447 /* IFLA_GRE_IFLAGS */
1449 /* IFLA_GRE_OFLAGS */
1455 /* IFLA_GRE_LOCAL */
1457 /* IFLA_GRE_REMOTE */
1463 /* IFLA_GRE_PMTUDISC */
1465 /* IFLA_GRE_ENCAP_TYPE */
1467 /* IFLA_GRE_ENCAP_FLAGS */
1469 /* IFLA_GRE_ENCAP_SPORT */
1471 /* IFLA_GRE_ENCAP_DPORT */
1473 /* IFLA_GRE_COLLECT_METADATA */
1475 /* IFLA_GRE_IGNORE_DF */
1477 /* IFLA_GRE_FWMARK */
1479 /* IFLA_GRE_ERSPAN_INDEX */
1481 /* IFLA_GRE_ERSPAN_VER */
1483 /* IFLA_GRE_ERSPAN_DIR */
1485 /* IFLA_GRE_ERSPAN_HWID */
1490 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1492 struct ip_tunnel *t = netdev_priv(dev);
1493 struct ip_tunnel_parm *p = &t->parms;
1494 __be16 o_flags = p->o_flags;
1496 if (t->erspan_ver <= 2) {
1497 if (t->erspan_ver != 0 && !t->collect_md)
1498 o_flags |= TUNNEL_KEY;
1500 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1501 goto nla_put_failure;
1503 if (t->erspan_ver == 1) {
1504 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1505 goto nla_put_failure;
1506 } else if (t->erspan_ver == 2) {
1507 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1508 goto nla_put_failure;
1509 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1510 goto nla_put_failure;
1514 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1515 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1516 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1517 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1518 gre_tnl_flags_to_gre_flags(o_flags)) ||
1519 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1520 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1521 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1522 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1523 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1524 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1525 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1526 !!(p->iph.frag_off & htons(IP_DF))) ||
1527 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1528 goto nla_put_failure;
1530 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1532 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1534 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1536 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1538 goto nla_put_failure;
1540 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1541 goto nla_put_failure;
1543 if (t->collect_md) {
1544 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1545 goto nla_put_failure;
1554 static void erspan_setup(struct net_device *dev)
1556 struct ip_tunnel *t = netdev_priv(dev);
1560 dev->netdev_ops = &erspan_netdev_ops;
1561 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1562 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1563 ip_tunnel_setup(dev, erspan_net_id);
1567 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1568 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1569 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1570 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1571 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1572 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1573 [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
1574 [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
1575 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1576 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1577 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1578 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1579 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1580 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1581 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1582 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1583 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1584 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1585 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1586 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1587 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1588 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1591 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1593 .maxtype = IFLA_GRE_MAX,
1594 .policy = ipgre_policy,
1595 .priv_size = sizeof(struct ip_tunnel),
1596 .setup = ipgre_tunnel_setup,
1597 .validate = ipgre_tunnel_validate,
1598 .newlink = ipgre_newlink,
1599 .changelink = ipgre_changelink,
1600 .dellink = ip_tunnel_dellink,
1601 .get_size = ipgre_get_size,
1602 .fill_info = ipgre_fill_info,
1603 .get_link_net = ip_tunnel_get_link_net,
1606 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1608 .maxtype = IFLA_GRE_MAX,
1609 .policy = ipgre_policy,
1610 .priv_size = sizeof(struct ip_tunnel),
1611 .setup = ipgre_tap_setup,
1612 .validate = ipgre_tap_validate,
1613 .newlink = ipgre_newlink,
1614 .changelink = ipgre_changelink,
1615 .dellink = ip_tunnel_dellink,
1616 .get_size = ipgre_get_size,
1617 .fill_info = ipgre_fill_info,
1618 .get_link_net = ip_tunnel_get_link_net,
1621 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1623 .maxtype = IFLA_GRE_MAX,
1624 .policy = ipgre_policy,
1625 .priv_size = sizeof(struct ip_tunnel),
1626 .setup = erspan_setup,
1627 .validate = erspan_validate,
1628 .newlink = erspan_newlink,
1629 .changelink = erspan_changelink,
1630 .dellink = ip_tunnel_dellink,
1631 .get_size = ipgre_get_size,
1632 .fill_info = ipgre_fill_info,
1633 .get_link_net = ip_tunnel_get_link_net,
1636 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1637 u8 name_assign_type)
1639 struct nlattr *tb[IFLA_MAX + 1];
1640 struct net_device *dev;
1641 LIST_HEAD(list_kill);
1642 struct ip_tunnel *t;
1645 memset(&tb, 0, sizeof(tb));
1647 dev = rtnl_create_link(net, name, name_assign_type,
1648 &ipgre_tap_ops, tb, NULL);
1652 /* Configure flow based GRE device. */
1653 t = netdev_priv(dev);
1654 t->collect_md = true;
1656 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1659 return ERR_PTR(err);
1662 /* openvswitch users expect packet sizes to be unrestricted,
1663 * so set the largest MTU we can.
1665 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1669 err = rtnl_configure_link(dev, NULL);
1675 ip_tunnel_dellink(dev, &list_kill);
1676 unregister_netdevice_many(&list_kill);
1677 return ERR_PTR(err);
1679 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1681 static int __net_init ipgre_tap_init_net(struct net *net)
1683 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1686 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1688 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1691 static struct pernet_operations ipgre_tap_net_ops = {
1692 .init = ipgre_tap_init_net,
1693 .exit_batch = ipgre_tap_exit_batch_net,
1694 .id = &gre_tap_net_id,
1695 .size = sizeof(struct ip_tunnel_net),
1698 static int __net_init erspan_init_net(struct net *net)
1700 return ip_tunnel_init_net(net, erspan_net_id,
1701 &erspan_link_ops, "erspan0");
1704 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1706 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1709 static struct pernet_operations erspan_net_ops = {
1710 .init = erspan_init_net,
1711 .exit_batch = erspan_exit_batch_net,
1712 .id = &erspan_net_id,
1713 .size = sizeof(struct ip_tunnel_net),
1716 static int __init ipgre_init(void)
1720 pr_info("GRE over IPv4 tunneling driver\n");
1722 err = register_pernet_device(&ipgre_net_ops);
1726 err = register_pernet_device(&ipgre_tap_net_ops);
1728 goto pnet_tap_failed;
1730 err = register_pernet_device(&erspan_net_ops);
1732 goto pnet_erspan_failed;
1734 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1736 pr_info("%s: can't add protocol\n", __func__);
1737 goto add_proto_failed;
1740 err = rtnl_link_register(&ipgre_link_ops);
1742 goto rtnl_link_failed;
1744 err = rtnl_link_register(&ipgre_tap_ops);
1746 goto tap_ops_failed;
1748 err = rtnl_link_register(&erspan_link_ops);
1750 goto erspan_link_failed;
1755 rtnl_link_unregister(&ipgre_tap_ops);
1757 rtnl_link_unregister(&ipgre_link_ops);
1759 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1761 unregister_pernet_device(&erspan_net_ops);
1763 unregister_pernet_device(&ipgre_tap_net_ops);
1765 unregister_pernet_device(&ipgre_net_ops);
1769 static void __exit ipgre_fini(void)
1771 rtnl_link_unregister(&ipgre_tap_ops);
1772 rtnl_link_unregister(&ipgre_link_ops);
1773 rtnl_link_unregister(&erspan_link_ops);
1774 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1775 unregister_pernet_device(&ipgre_tap_net_ops);
1776 unregister_pernet_device(&ipgre_net_ops);
1777 unregister_pernet_device(&erspan_net_ops);
1780 module_init(ipgre_init);
1781 module_exit(ipgre_fini);
1782 MODULE_LICENSE("GPL");
1783 MODULE_ALIAS_RTNL_LINK("gre");
1784 MODULE_ALIAS_RTNL_LINK("gretap");
1785 MODULE_ALIAS_RTNL_LINK("erspan");
1786 MODULE_ALIAS_NETDEV("gre0");
1787 MODULE_ALIAS_NETDEV("gretap0");
1788 MODULE_ALIAS_NETDEV("erspan0");