1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Linux NET3: GRE over IP protocol decoder.
5 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
52 1. The most important issue is detecting local dead loops.
53 They would cause complete host lockup in transmit, which
54 would be "resolved" by stack overflow or, if queueing is enabled,
55 with infinite looping in net_bh.
57 We cannot track such dead loops during route installation,
58 it is infeasible task. The most general solutions would be
59 to keep skb->encapsulation counter (sort of local ttl),
60 and silently drop packet when it expires. It is a good
61 solution, but it supposes maintaining new variable in ALL
62 skb, even if no tunneling is used.
64 Current solution: xmit_recursion breaks dead loops. This is a percpu
65 counter, since when we enter the first ndo_xmit(), cpu migration is
66 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
68 2. Networking dead loops would not kill routers, but would really
69 kill network. IP hop limit plays role of "t->recursion" in this case,
70 if we copy it from packet being encapsulated to upper header.
71 It is very good solution, but it introduces two problems:
73 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74 do not work over tunnels.
75 - traceroute does not work. I planned to relay ICMP from tunnel,
76 so that this problem would be solved and traceroute output
77 would even more informative. This idea appeared to be wrong:
78 only Linux complies to rfc1812 now (yes, guys, Linux is the only
79 true router now :-)), all routers (at least, in neighbourhood of mine)
80 return only 8 bytes of payload. It is the end.
82 Hence, if we want that OSPF worked or traceroute said something reasonable,
83 we should search for another solution.
85 One of them is to parse packet trying to detect inner encapsulation
86 made by our node. It is difficult or even impossible, especially,
87 taking into account fragmentation. TO be short, ttl is not solution at all.
89 Current solution: The solution was UNEXPECTEDLY SIMPLE.
90 We force DF flag on tunnels with preconfigured hop limit,
91 that is ALL. :-) Well, it does not remove the problem completely,
92 but exponential growth of network traffic is changed to linear
93 (branches, that exceed pmtu are pruned) and tunnel mtu
94 rapidly degrades to value <68, where looping stops.
95 Yes, it is not good if there exists a router in the loop,
96 which does not force DF, even when encapsulating packets have DF set.
97 But it is not our problem! Nobody could accuse us, we made
98 all that we could make. Even if it is your gated who injected
99 fatal route to network, even if it were you who configured
100 fatal static route: you are innocent. :-)
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static int ipgre_tunnel_init(struct net_device *dev);
111 static void erspan_build_header(struct sk_buff *skb,
113 bool truncate, bool is_ipv4);
115 static unsigned int ipgre_net_id __read_mostly;
116 static unsigned int gre_tap_net_id __read_mostly;
117 static unsigned int erspan_net_id __read_mostly;
119 static int ipgre_err(struct sk_buff *skb, u32 info,
120 const struct tnl_ptk_info *tpi)
123 /* All the routers (except for Linux) return only
124 8 bytes of packet payload. It means, that precise relaying of
125 ICMP in the real Internet is absolutely infeasible.
127 Moreover, Cisco "wise men" put GRE key to the third word
128 in GRE header. It makes impossible maintaining even soft
129 state for keyed GRE tunnels with enabled checksum. Tell
132 Well, I wonder, rfc1812 was written by Cisco employee,
133 what the hell these idiots break standards established
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn;
138 const struct iphdr *iph;
139 const int type = icmp_hdr(skb)->type;
140 const int code = icmp_hdr(skb)->code;
141 unsigned int data_len = 0;
144 if (tpi->proto == htons(ETH_P_TEB))
145 itn = net_generic(net, gre_tap_net_id);
146 else if (tpi->proto == htons(ETH_P_ERSPAN) ||
147 tpi->proto == htons(ETH_P_ERSPAN2))
148 itn = net_generic(net, erspan_net_id);
150 itn = net_generic(net, ipgre_net_id);
152 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
153 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
154 iph->daddr, iph->saddr, tpi->key);
161 case ICMP_PARAMETERPROB:
164 case ICMP_DEST_UNREACH:
167 case ICMP_PORT_UNREACH:
168 /* Impossible event. */
171 /* All others are translated to HOST_UNREACH.
172 rfc2003 contains "deep thoughts" about NET_UNREACH,
173 I believe they are just ether pollution. --ANK
179 case ICMP_TIME_EXCEEDED:
180 if (code != ICMP_EXC_TTL)
182 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
189 #if IS_ENABLED(CONFIG_IPV6)
190 if (tpi->proto == htons(ETH_P_IPV6) &&
191 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
196 if (t->parms.iph.daddr == 0 ||
197 ipv4_is_multicast(t->parms.iph.daddr))
200 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
203 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
207 t->err_time = jiffies;
212 static void gre_err(struct sk_buff *skb, u32 info)
214 /* All the routers (except for Linux) return only
215 * 8 bytes of packet payload. It means, that precise relaying of
216 * ICMP in the real Internet is absolutely infeasible.
218 * Moreover, Cisco "wise men" put GRE key to the third word
219 * in GRE header. It makes impossible maintaining even soft
221 * GRE tunnels with enabled checksum. Tell them "thank you".
223 * Well, I wonder, rfc1812 was written by Cisco employee,
224 * what the hell these idiots break standards established
228 const struct iphdr *iph = (struct iphdr *)skb->data;
229 const int type = icmp_hdr(skb)->type;
230 const int code = icmp_hdr(skb)->code;
231 struct tnl_ptk_info tpi;
233 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
237 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
238 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
239 skb->dev->ifindex, IPPROTO_GRE);
242 if (type == ICMP_REDIRECT) {
243 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
248 ipgre_err(skb, info, &tpi);
251 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
254 struct net *net = dev_net(skb->dev);
255 struct metadata_dst *tun_dst = NULL;
256 struct erspan_base_hdr *ershdr;
257 struct ip_tunnel_net *itn;
258 struct ip_tunnel *tunnel;
259 const struct iphdr *iph;
260 struct erspan_md2 *md2;
264 itn = net_generic(net, erspan_net_id);
267 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
270 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
271 tpi->flags | TUNNEL_KEY,
272 iph->saddr, iph->daddr, tpi->key);
275 len = gre_hdr_len + erspan_hdr_len(ver);
276 if (unlikely(!pskb_may_pull(skb, len)))
277 return PACKET_REJECT;
279 if (__iptunnel_pull_header(skb,
285 if (tunnel->collect_md) {
286 struct erspan_metadata *pkt_md, *md;
287 struct ip_tunnel_info *info;
292 tpi->flags |= TUNNEL_KEY;
294 tun_id = key32_to_tunnel_id(tpi->key);
296 tun_dst = ip_tun_rx_dst(skb, flags,
297 tun_id, sizeof(*md));
299 return PACKET_REJECT;
301 /* skb can be uncloned in __iptunnel_pull_header, so
302 * old pkt_md is no longer valid and we need to reset
305 gh = skb_network_header(skb) +
306 skb_network_header_len(skb);
307 pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
309 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
312 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
315 info = &tun_dst->u.tun_info;
316 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
317 info->options_len = sizeof(*md);
320 skb_reset_mac_header(skb);
321 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
324 return PACKET_REJECT;
331 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
332 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
334 struct metadata_dst *tun_dst = NULL;
335 const struct iphdr *iph;
336 struct ip_tunnel *tunnel;
339 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
340 iph->saddr, iph->daddr, tpi->key);
343 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
344 raw_proto, false) < 0)
347 if (tunnel->dev->type != ARPHRD_NONE)
348 skb_pop_mac_header(skb);
350 skb_reset_mac_header(skb);
351 if (tunnel->collect_md) {
355 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
356 tun_id = key32_to_tunnel_id(tpi->key);
357 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
359 return PACKET_REJECT;
362 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
372 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
375 struct net *net = dev_net(skb->dev);
376 struct ip_tunnel_net *itn;
379 if (tpi->proto == htons(ETH_P_TEB))
380 itn = net_generic(net, gre_tap_net_id);
382 itn = net_generic(net, ipgre_net_id);
384 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
385 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
386 /* ipgre tunnels in collect metadata mode should receive
387 * also ETH_P_TEB traffic.
389 itn = net_generic(net, ipgre_net_id);
390 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
395 static int gre_rcv(struct sk_buff *skb)
397 struct tnl_ptk_info tpi;
398 bool csum_err = false;
401 #ifdef CONFIG_NET_IPGRE_BROADCAST
402 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
403 /* Looped back packet, drop it! */
404 if (rt_is_output_route(skb_rtable(skb)))
409 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
413 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
414 tpi.proto == htons(ETH_P_ERSPAN2))) {
415 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
420 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
424 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
430 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
431 const struct iphdr *tnl_params,
434 struct ip_tunnel *tunnel = netdev_priv(dev);
435 __be16 flags = tunnel->parms.o_flags;
437 /* Push GRE header. */
438 gre_build_header(skb, tunnel->tun_hlen,
439 flags, proto, tunnel->parms.o_key,
440 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
442 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
445 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
447 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
450 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
453 struct ip_tunnel *tunnel = netdev_priv(dev);
454 struct ip_tunnel_info *tun_info;
455 const struct ip_tunnel_key *key;
459 tun_info = skb_tunnel_info(skb);
460 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
461 ip_tunnel_info_af(tun_info) != AF_INET))
464 key = &tun_info->key;
465 tunnel_hlen = gre_calc_hlen(key->tun_flags);
467 if (skb_cow_head(skb, dev->needed_headroom))
470 /* Push Tunnel header. */
471 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
474 flags = tun_info->key.tun_flags &
475 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
476 gre_build_header(skb, tunnel_hlen, flags, proto,
477 tunnel_id_to_key32(tun_info->key.tun_id),
478 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
480 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
486 dev->stats.tx_dropped++;
489 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
491 struct ip_tunnel *tunnel = netdev_priv(dev);
492 struct ip_tunnel_info *tun_info;
493 const struct ip_tunnel_key *key;
494 struct erspan_metadata *md;
495 bool truncate = false;
501 tun_info = skb_tunnel_info(skb);
502 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
503 ip_tunnel_info_af(tun_info) != AF_INET))
506 key = &tun_info->key;
507 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
509 if (tun_info->options_len < sizeof(*md))
511 md = ip_tunnel_info_opts(tun_info);
513 /* ERSPAN has fixed 8 byte GRE header */
514 version = md->version;
515 tunnel_hlen = 8 + erspan_hdr_len(version);
517 if (skb_cow_head(skb, dev->needed_headroom))
520 if (gre_handle_offloads(skb, false))
523 if (skb->len > dev->mtu + dev->hard_header_len) {
524 pskb_trim(skb, dev->mtu + dev->hard_header_len);
528 nhoff = skb_network_offset(skb);
529 if (skb->protocol == htons(ETH_P_IP) &&
530 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
533 if (skb->protocol == htons(ETH_P_IPV6)) {
536 if (skb_transport_header_was_set(skb))
537 thoff = skb_transport_offset(skb);
539 thoff = nhoff + sizeof(struct ipv6hdr);
540 if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
545 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
546 ntohl(md->u.index), truncate, true);
547 proto = htons(ETH_P_ERSPAN);
548 } else if (version == 2) {
549 erspan_build_header_v2(skb,
550 ntohl(tunnel_id_to_key32(key->tun_id)),
552 get_hwid(&md->u.md2),
554 proto = htons(ETH_P_ERSPAN2);
559 gre_build_header(skb, 8, TUNNEL_SEQ,
560 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
562 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
568 dev->stats.tx_dropped++;
571 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
573 struct ip_tunnel_info *info = skb_tunnel_info(skb);
574 const struct ip_tunnel_key *key;
578 if (ip_tunnel_info_af(info) != AF_INET)
582 ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
583 tunnel_id_to_key32(key->tun_id),
584 key->tos & ~INET_ECN_MASK, 0, skb->mark,
586 rt = ip_route_output_key(dev_net(dev), &fl4);
591 info->key.u.ipv4.src = fl4.saddr;
595 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
596 struct net_device *dev)
598 struct ip_tunnel *tunnel = netdev_priv(dev);
599 const struct iphdr *tnl_params;
601 if (!pskb_inet_may_pull(skb))
604 if (tunnel->collect_md) {
605 gre_fb_xmit(skb, dev, skb->protocol);
609 if (dev->header_ops) {
610 if (skb_cow_head(skb, 0))
613 tnl_params = (const struct iphdr *)skb->data;
615 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
618 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
619 skb_reset_mac_header(skb);
621 if (skb->ip_summed == CHECKSUM_PARTIAL &&
622 skb_checksum_start(skb) < skb->data)
625 if (skb_cow_head(skb, dev->needed_headroom))
628 tnl_params = &tunnel->parms.iph;
631 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
634 __gre_xmit(skb, dev, tnl_params, skb->protocol);
639 dev->stats.tx_dropped++;
643 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
644 struct net_device *dev)
646 struct ip_tunnel *tunnel = netdev_priv(dev);
647 bool truncate = false;
650 if (!pskb_inet_may_pull(skb))
653 if (tunnel->collect_md) {
654 erspan_fb_xmit(skb, dev);
658 if (gre_handle_offloads(skb, false))
661 if (skb_cow_head(skb, dev->needed_headroom))
664 if (skb->len > dev->mtu + dev->hard_header_len) {
665 pskb_trim(skb, dev->mtu + dev->hard_header_len);
669 /* Push ERSPAN header */
670 if (tunnel->erspan_ver == 1) {
671 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
674 proto = htons(ETH_P_ERSPAN);
675 } else if (tunnel->erspan_ver == 2) {
676 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
677 tunnel->dir, tunnel->hwid,
679 proto = htons(ETH_P_ERSPAN2);
684 tunnel->parms.o_flags &= ~TUNNEL_KEY;
685 __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
690 dev->stats.tx_dropped++;
694 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
695 struct net_device *dev)
697 struct ip_tunnel *tunnel = netdev_priv(dev);
699 if (!pskb_inet_may_pull(skb))
702 if (tunnel->collect_md) {
703 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
707 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
710 if (skb_cow_head(skb, dev->needed_headroom))
713 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
718 dev->stats.tx_dropped++;
722 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
724 struct ip_tunnel *tunnel = netdev_priv(dev);
727 len = tunnel->tun_hlen;
728 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
729 len = tunnel->tun_hlen - len;
730 tunnel->hlen = tunnel->hlen + len;
733 dev->hard_header_len += len;
735 dev->needed_headroom += len;
738 dev->mtu = max_t(int, dev->mtu - len, 68);
740 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
741 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
742 tunnel->encap.type == TUNNEL_ENCAP_NONE) {
743 dev->features |= NETIF_F_GSO_SOFTWARE;
744 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
746 dev->features &= ~NETIF_F_GSO_SOFTWARE;
747 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
749 dev->features |= NETIF_F_LLTX;
751 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
752 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
756 static int ipgre_tunnel_ioctl(struct net_device *dev,
757 struct ifreq *ifr, int cmd)
759 struct ip_tunnel_parm p;
762 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
765 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
766 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
767 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
768 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
772 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
773 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
775 err = ip_tunnel_ioctl(dev, &p, cmd);
779 if (cmd == SIOCCHGTUNNEL) {
780 struct ip_tunnel *t = netdev_priv(dev);
782 t->parms.i_flags = p.i_flags;
783 t->parms.o_flags = p.o_flags;
785 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
786 ipgre_link_update(dev, true);
789 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
790 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
792 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
798 /* Nice toy. Unfortunately, useless in real life :-)
799 It allows to construct virtual multiprotocol broadcast "LAN"
800 over the Internet, provided multicast routing is tuned.
803 I have no idea was this bicycle invented before me,
804 so that I had to set ARPHRD_IPGRE to a random value.
805 I have an impression, that Cisco could make something similar,
806 but this feature is apparently missing in IOS<=11.2(8).
808 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
809 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
811 ping -t 255 224.66.66.66
813 If nobody answers, mbone does not work.
815 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
816 ip addr add 10.66.66.<somewhat>/24 dev Universe
818 ifconfig Universe add fe80::<Your_real_addr>/10
819 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
822 ftp fec0:6666:6666::193.233.7.65
825 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
827 const void *daddr, const void *saddr, unsigned int len)
829 struct ip_tunnel *t = netdev_priv(dev);
831 struct gre_base_hdr *greh;
833 iph = skb_push(skb, t->hlen + sizeof(*iph));
834 greh = (struct gre_base_hdr *)(iph+1);
835 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
836 greh->protocol = htons(type);
838 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
840 /* Set the source hardware address. */
842 memcpy(&iph->saddr, saddr, 4);
844 memcpy(&iph->daddr, daddr, 4);
846 return t->hlen + sizeof(*iph);
848 return -(t->hlen + sizeof(*iph));
851 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
853 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
854 memcpy(haddr, &iph->saddr, 4);
858 static const struct header_ops ipgre_header_ops = {
859 .create = ipgre_header,
860 .parse = ipgre_header_parse,
863 #ifdef CONFIG_NET_IPGRE_BROADCAST
864 static int ipgre_open(struct net_device *dev)
866 struct ip_tunnel *t = netdev_priv(dev);
868 if (ipv4_is_multicast(t->parms.iph.daddr)) {
872 rt = ip_route_output_gre(t->net, &fl4,
876 RT_TOS(t->parms.iph.tos),
879 return -EADDRNOTAVAIL;
882 if (!__in_dev_get_rtnl(dev))
883 return -EADDRNOTAVAIL;
884 t->mlink = dev->ifindex;
885 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
890 static int ipgre_close(struct net_device *dev)
892 struct ip_tunnel *t = netdev_priv(dev);
894 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
895 struct in_device *in_dev;
896 in_dev = inetdev_by_index(t->net, t->mlink);
898 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
904 static const struct net_device_ops ipgre_netdev_ops = {
905 .ndo_init = ipgre_tunnel_init,
906 .ndo_uninit = ip_tunnel_uninit,
907 #ifdef CONFIG_NET_IPGRE_BROADCAST
908 .ndo_open = ipgre_open,
909 .ndo_stop = ipgre_close,
911 .ndo_start_xmit = ipgre_xmit,
912 .ndo_do_ioctl = ipgre_tunnel_ioctl,
913 .ndo_change_mtu = ip_tunnel_change_mtu,
914 .ndo_get_stats64 = ip_tunnel_get_stats64,
915 .ndo_get_iflink = ip_tunnel_get_iflink,
918 #define GRE_FEATURES (NETIF_F_SG | \
923 static void ipgre_tunnel_setup(struct net_device *dev)
925 dev->netdev_ops = &ipgre_netdev_ops;
926 dev->type = ARPHRD_IPGRE;
927 ip_tunnel_setup(dev, ipgre_net_id);
930 static void __gre_tunnel_init(struct net_device *dev)
932 struct ip_tunnel *tunnel;
934 tunnel = netdev_priv(dev);
935 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
936 tunnel->parms.iph.protocol = IPPROTO_GRE;
938 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
939 dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
941 dev->features |= GRE_FEATURES;
942 dev->hw_features |= GRE_FEATURES;
944 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
945 /* TCP offload with GRE SEQ is not supported, nor
946 * can we support 2 levels of outer headers requiring
949 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
950 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
951 dev->features |= NETIF_F_GSO_SOFTWARE;
952 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
955 /* Can use a lockless transmit, unless we generate
958 dev->features |= NETIF_F_LLTX;
962 static int ipgre_tunnel_init(struct net_device *dev)
964 struct ip_tunnel *tunnel = netdev_priv(dev);
965 struct iphdr *iph = &tunnel->parms.iph;
967 __gre_tunnel_init(dev);
969 memcpy(dev->dev_addr, &iph->saddr, 4);
970 memcpy(dev->broadcast, &iph->daddr, 4);
972 dev->flags = IFF_NOARP;
976 if (iph->daddr && !tunnel->collect_md) {
977 #ifdef CONFIG_NET_IPGRE_BROADCAST
978 if (ipv4_is_multicast(iph->daddr)) {
981 dev->flags = IFF_BROADCAST;
982 dev->header_ops = &ipgre_header_ops;
983 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
984 dev->needed_headroom = 0;
987 } else if (!tunnel->collect_md) {
988 dev->header_ops = &ipgre_header_ops;
989 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
990 dev->needed_headroom = 0;
993 return ip_tunnel_init(dev);
996 static const struct gre_protocol ipgre_protocol = {
998 .err_handler = gre_err,
1001 static int __net_init ipgre_init_net(struct net *net)
1003 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1006 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1008 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1011 static struct pernet_operations ipgre_net_ops = {
1012 .init = ipgre_init_net,
1013 .exit_batch = ipgre_exit_batch_net,
1014 .id = &ipgre_net_id,
1015 .size = sizeof(struct ip_tunnel_net),
1018 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1019 struct netlink_ext_ack *extack)
1027 if (data[IFLA_GRE_IFLAGS])
1028 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1029 if (data[IFLA_GRE_OFLAGS])
1030 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1031 if (flags & (GRE_VERSION|GRE_ROUTING))
1034 if (data[IFLA_GRE_COLLECT_METADATA] &&
1035 data[IFLA_GRE_ENCAP_TYPE] &&
1036 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1042 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1043 struct netlink_ext_ack *extack)
1047 if (tb[IFLA_ADDRESS]) {
1048 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1050 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1051 return -EADDRNOTAVAIL;
1057 if (data[IFLA_GRE_REMOTE]) {
1058 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1064 return ipgre_tunnel_validate(tb, data, extack);
1067 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1068 struct netlink_ext_ack *extack)
1076 ret = ipgre_tap_validate(tb, data, extack);
1080 /* ERSPAN should only have GRE sequence and key flag */
1081 if (data[IFLA_GRE_OFLAGS])
1082 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1083 if (data[IFLA_GRE_IFLAGS])
1084 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1085 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1086 flags != (GRE_SEQ | GRE_KEY))
1089 /* ERSPAN Session ID only has 10-bit. Since we reuse
1090 * 32-bit key field as ID, check it's range.
1092 if (data[IFLA_GRE_IKEY] &&
1093 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1096 if (data[IFLA_GRE_OKEY] &&
1097 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1103 static int ipgre_netlink_parms(struct net_device *dev,
1104 struct nlattr *data[],
1105 struct nlattr *tb[],
1106 struct ip_tunnel_parm *parms,
1109 struct ip_tunnel *t = netdev_priv(dev);
1111 memset(parms, 0, sizeof(*parms));
1113 parms->iph.protocol = IPPROTO_GRE;
1118 if (data[IFLA_GRE_LINK])
1119 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1121 if (data[IFLA_GRE_IFLAGS])
1122 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1124 if (data[IFLA_GRE_OFLAGS])
1125 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1127 if (data[IFLA_GRE_IKEY])
1128 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1130 if (data[IFLA_GRE_OKEY])
1131 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1133 if (data[IFLA_GRE_LOCAL])
1134 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1136 if (data[IFLA_GRE_REMOTE])
1137 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1139 if (data[IFLA_GRE_TTL])
1140 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1142 if (data[IFLA_GRE_TOS])
1143 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1145 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1148 parms->iph.frag_off = htons(IP_DF);
1151 if (data[IFLA_GRE_COLLECT_METADATA]) {
1152 t->collect_md = true;
1153 if (dev->type == ARPHRD_IPGRE)
1154 dev->type = ARPHRD_NONE;
1157 if (data[IFLA_GRE_IGNORE_DF]) {
1158 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1159 && (parms->iph.frag_off & htons(IP_DF)))
1161 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1164 if (data[IFLA_GRE_FWMARK])
1165 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1170 static int erspan_netlink_parms(struct net_device *dev,
1171 struct nlattr *data[],
1172 struct nlattr *tb[],
1173 struct ip_tunnel_parm *parms,
1176 struct ip_tunnel *t = netdev_priv(dev);
1179 err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1185 if (data[IFLA_GRE_ERSPAN_VER]) {
1186 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1188 if (t->erspan_ver != 1 && t->erspan_ver != 2)
1192 if (t->erspan_ver == 1) {
1193 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1194 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1195 if (t->index & ~INDEX_MASK)
1198 } else if (t->erspan_ver == 2) {
1199 if (data[IFLA_GRE_ERSPAN_DIR]) {
1200 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1201 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1204 if (data[IFLA_GRE_ERSPAN_HWID]) {
1205 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1206 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1214 /* This function returns true when ENCAP attributes are present in the nl msg */
1215 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1216 struct ip_tunnel_encap *ipencap)
1220 memset(ipencap, 0, sizeof(*ipencap));
1225 if (data[IFLA_GRE_ENCAP_TYPE]) {
1227 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1230 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1232 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1235 if (data[IFLA_GRE_ENCAP_SPORT]) {
1237 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1240 if (data[IFLA_GRE_ENCAP_DPORT]) {
1242 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1248 static int gre_tap_init(struct net_device *dev)
1250 __gre_tunnel_init(dev);
1251 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1252 netif_keep_dst(dev);
1254 return ip_tunnel_init(dev);
1257 static const struct net_device_ops gre_tap_netdev_ops = {
1258 .ndo_init = gre_tap_init,
1259 .ndo_uninit = ip_tunnel_uninit,
1260 .ndo_start_xmit = gre_tap_xmit,
1261 .ndo_set_mac_address = eth_mac_addr,
1262 .ndo_validate_addr = eth_validate_addr,
1263 .ndo_change_mtu = ip_tunnel_change_mtu,
1264 .ndo_get_stats64 = ip_tunnel_get_stats64,
1265 .ndo_get_iflink = ip_tunnel_get_iflink,
1266 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1269 static int erspan_tunnel_init(struct net_device *dev)
1271 struct ip_tunnel *tunnel = netdev_priv(dev);
1273 tunnel->tun_hlen = 8;
1274 tunnel->parms.iph.protocol = IPPROTO_GRE;
1275 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1276 erspan_hdr_len(tunnel->erspan_ver);
1278 dev->features |= GRE_FEATURES;
1279 dev->hw_features |= GRE_FEATURES;
1280 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1281 netif_keep_dst(dev);
1283 return ip_tunnel_init(dev);
1286 static const struct net_device_ops erspan_netdev_ops = {
1287 .ndo_init = erspan_tunnel_init,
1288 .ndo_uninit = ip_tunnel_uninit,
1289 .ndo_start_xmit = erspan_xmit,
1290 .ndo_set_mac_address = eth_mac_addr,
1291 .ndo_validate_addr = eth_validate_addr,
1292 .ndo_change_mtu = ip_tunnel_change_mtu,
1293 .ndo_get_stats64 = ip_tunnel_get_stats64,
1294 .ndo_get_iflink = ip_tunnel_get_iflink,
1295 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1298 static void ipgre_tap_setup(struct net_device *dev)
1302 dev->netdev_ops = &gre_tap_netdev_ops;
1303 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1304 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1305 ip_tunnel_setup(dev, gre_tap_net_id);
1309 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1311 struct ip_tunnel_encap ipencap;
1313 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1314 struct ip_tunnel *t = netdev_priv(dev);
1315 int err = ip_tunnel_encap_setup(t, &ipencap);
1324 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1325 struct nlattr *tb[], struct nlattr *data[],
1326 struct netlink_ext_ack *extack)
1328 struct ip_tunnel_parm p;
1332 err = ipgre_newlink_encap_setup(dev, data);
1336 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1339 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1342 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1343 struct nlattr *tb[], struct nlattr *data[],
1344 struct netlink_ext_ack *extack)
1346 struct ip_tunnel_parm p;
1350 err = ipgre_newlink_encap_setup(dev, data);
1354 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1357 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1360 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1361 struct nlattr *data[],
1362 struct netlink_ext_ack *extack)
1364 struct ip_tunnel *t = netdev_priv(dev);
1365 __u32 fwmark = t->fwmark;
1366 struct ip_tunnel_parm p;
1369 err = ipgre_newlink_encap_setup(dev, data);
1373 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1377 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1381 t->parms.i_flags = p.i_flags;
1382 t->parms.o_flags = p.o_flags;
1384 ipgre_link_update(dev, !tb[IFLA_MTU]);
1389 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1390 struct nlattr *data[],
1391 struct netlink_ext_ack *extack)
1393 struct ip_tunnel *t = netdev_priv(dev);
1394 __u32 fwmark = t->fwmark;
1395 struct ip_tunnel_parm p;
1398 err = ipgre_newlink_encap_setup(dev, data);
1402 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1406 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1410 t->parms.i_flags = p.i_flags;
1411 t->parms.o_flags = p.o_flags;
1416 static size_t ipgre_get_size(const struct net_device *dev)
1421 /* IFLA_GRE_IFLAGS */
1423 /* IFLA_GRE_OFLAGS */
1429 /* IFLA_GRE_LOCAL */
1431 /* IFLA_GRE_REMOTE */
1437 /* IFLA_GRE_PMTUDISC */
1439 /* IFLA_GRE_ENCAP_TYPE */
1441 /* IFLA_GRE_ENCAP_FLAGS */
1443 /* IFLA_GRE_ENCAP_SPORT */
1445 /* IFLA_GRE_ENCAP_DPORT */
1447 /* IFLA_GRE_COLLECT_METADATA */
1449 /* IFLA_GRE_IGNORE_DF */
1451 /* IFLA_GRE_FWMARK */
1453 /* IFLA_GRE_ERSPAN_INDEX */
1455 /* IFLA_GRE_ERSPAN_VER */
1457 /* IFLA_GRE_ERSPAN_DIR */
1459 /* IFLA_GRE_ERSPAN_HWID */
1464 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1466 struct ip_tunnel *t = netdev_priv(dev);
1467 struct ip_tunnel_parm *p = &t->parms;
1468 __be16 o_flags = p->o_flags;
1470 if (t->erspan_ver == 1 || t->erspan_ver == 2) {
1472 o_flags |= TUNNEL_KEY;
1474 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1475 goto nla_put_failure;
1477 if (t->erspan_ver == 1) {
1478 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1479 goto nla_put_failure;
1481 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1482 goto nla_put_failure;
1483 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1484 goto nla_put_failure;
1488 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1489 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1490 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1491 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1492 gre_tnl_flags_to_gre_flags(o_flags)) ||
1493 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1494 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1495 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1496 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1497 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1498 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1499 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1500 !!(p->iph.frag_off & htons(IP_DF))) ||
1501 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1502 goto nla_put_failure;
1504 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1506 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1508 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1510 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1512 goto nla_put_failure;
1514 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1515 goto nla_put_failure;
1517 if (t->collect_md) {
1518 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1519 goto nla_put_failure;
1528 static void erspan_setup(struct net_device *dev)
1530 struct ip_tunnel *t = netdev_priv(dev);
1534 dev->netdev_ops = &erspan_netdev_ops;
1535 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1536 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1537 ip_tunnel_setup(dev, erspan_net_id);
1541 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1542 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1543 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1544 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1545 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1546 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1547 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1548 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1549 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1550 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1551 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1552 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1553 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1554 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1555 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1556 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1557 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1558 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1559 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1560 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1561 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1562 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1565 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1567 .maxtype = IFLA_GRE_MAX,
1568 .policy = ipgre_policy,
1569 .priv_size = sizeof(struct ip_tunnel),
1570 .setup = ipgre_tunnel_setup,
1571 .validate = ipgre_tunnel_validate,
1572 .newlink = ipgre_newlink,
1573 .changelink = ipgre_changelink,
1574 .dellink = ip_tunnel_dellink,
1575 .get_size = ipgre_get_size,
1576 .fill_info = ipgre_fill_info,
1577 .get_link_net = ip_tunnel_get_link_net,
1580 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1582 .maxtype = IFLA_GRE_MAX,
1583 .policy = ipgre_policy,
1584 .priv_size = sizeof(struct ip_tunnel),
1585 .setup = ipgre_tap_setup,
1586 .validate = ipgre_tap_validate,
1587 .newlink = ipgre_newlink,
1588 .changelink = ipgre_changelink,
1589 .dellink = ip_tunnel_dellink,
1590 .get_size = ipgre_get_size,
1591 .fill_info = ipgre_fill_info,
1592 .get_link_net = ip_tunnel_get_link_net,
1595 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1597 .maxtype = IFLA_GRE_MAX,
1598 .policy = ipgre_policy,
1599 .priv_size = sizeof(struct ip_tunnel),
1600 .setup = erspan_setup,
1601 .validate = erspan_validate,
1602 .newlink = erspan_newlink,
1603 .changelink = erspan_changelink,
1604 .dellink = ip_tunnel_dellink,
1605 .get_size = ipgre_get_size,
1606 .fill_info = ipgre_fill_info,
1607 .get_link_net = ip_tunnel_get_link_net,
1610 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1611 u8 name_assign_type)
1613 struct nlattr *tb[IFLA_MAX + 1];
1614 struct net_device *dev;
1615 LIST_HEAD(list_kill);
1616 struct ip_tunnel *t;
1619 memset(&tb, 0, sizeof(tb));
1621 dev = rtnl_create_link(net, name, name_assign_type,
1622 &ipgre_tap_ops, tb, NULL);
1626 /* Configure flow based GRE device. */
1627 t = netdev_priv(dev);
1628 t->collect_md = true;
1630 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1633 return ERR_PTR(err);
1636 /* openvswitch users expect packet sizes to be unrestricted,
1637 * so set the largest MTU we can.
1639 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1643 err = rtnl_configure_link(dev, NULL);
1649 ip_tunnel_dellink(dev, &list_kill);
1650 unregister_netdevice_many(&list_kill);
1651 return ERR_PTR(err);
1653 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1655 static int __net_init ipgre_tap_init_net(struct net *net)
1657 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1660 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1662 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1665 static struct pernet_operations ipgre_tap_net_ops = {
1666 .init = ipgre_tap_init_net,
1667 .exit_batch = ipgre_tap_exit_batch_net,
1668 .id = &gre_tap_net_id,
1669 .size = sizeof(struct ip_tunnel_net),
1672 static int __net_init erspan_init_net(struct net *net)
1674 return ip_tunnel_init_net(net, erspan_net_id,
1675 &erspan_link_ops, "erspan0");
1678 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1680 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1683 static struct pernet_operations erspan_net_ops = {
1684 .init = erspan_init_net,
1685 .exit_batch = erspan_exit_batch_net,
1686 .id = &erspan_net_id,
1687 .size = sizeof(struct ip_tunnel_net),
1690 static int __init ipgre_init(void)
1694 pr_info("GRE over IPv4 tunneling driver\n");
1696 err = register_pernet_device(&ipgre_net_ops);
1700 err = register_pernet_device(&ipgre_tap_net_ops);
1702 goto pnet_tap_failed;
1704 err = register_pernet_device(&erspan_net_ops);
1706 goto pnet_erspan_failed;
1708 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1710 pr_info("%s: can't add protocol\n", __func__);
1711 goto add_proto_failed;
1714 err = rtnl_link_register(&ipgre_link_ops);
1716 goto rtnl_link_failed;
1718 err = rtnl_link_register(&ipgre_tap_ops);
1720 goto tap_ops_failed;
1722 err = rtnl_link_register(&erspan_link_ops);
1724 goto erspan_link_failed;
1729 rtnl_link_unregister(&ipgre_tap_ops);
1731 rtnl_link_unregister(&ipgre_link_ops);
1733 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1735 unregister_pernet_device(&erspan_net_ops);
1737 unregister_pernet_device(&ipgre_tap_net_ops);
1739 unregister_pernet_device(&ipgre_net_ops);
1743 static void __exit ipgre_fini(void)
1745 rtnl_link_unregister(&ipgre_tap_ops);
1746 rtnl_link_unregister(&ipgre_link_ops);
1747 rtnl_link_unregister(&erspan_link_ops);
1748 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1749 unregister_pernet_device(&ipgre_tap_net_ops);
1750 unregister_pernet_device(&ipgre_net_ops);
1751 unregister_pernet_device(&erspan_net_ops);
1754 module_init(ipgre_init);
1755 module_exit(ipgre_fini);
1756 MODULE_LICENSE("GPL");
1757 MODULE_ALIAS_RTNL_LINK("gre");
1758 MODULE_ALIAS_RTNL_LINK("gretap");
1759 MODULE_ALIAS_RTNL_LINK("erspan");
1760 MODULE_ALIAS_NETDEV("gre0");
1761 MODULE_ALIAS_NETDEV("gretap0");
1762 MODULE_ALIAS_NETDEV("erspan0");