1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Linux NET3: GRE over IP protocol decoder.
5 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
52 1. The most important issue is detecting local dead loops.
53 They would cause complete host lockup in transmit, which
54 would be "resolved" by stack overflow or, if queueing is enabled,
55 with infinite looping in net_bh.
57 We cannot track such dead loops during route installation,
58 it is infeasible task. The most general solutions would be
59 to keep skb->encapsulation counter (sort of local ttl),
60 and silently drop packet when it expires. It is a good
61 solution, but it supposes maintaining new variable in ALL
62 skb, even if no tunneling is used.
64 Current solution: xmit_recursion breaks dead loops. This is a percpu
65 counter, since when we enter the first ndo_xmit(), cpu migration is
66 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
68 2. Networking dead loops would not kill routers, but would really
69 kill network. IP hop limit plays role of "t->recursion" in this case,
70 if we copy it from packet being encapsulated to upper header.
71 It is very good solution, but it introduces two problems:
73 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74 do not work over tunnels.
75 - traceroute does not work. I planned to relay ICMP from tunnel,
76 so that this problem would be solved and traceroute output
77 would even more informative. This idea appeared to be wrong:
78 only Linux complies to rfc1812 now (yes, guys, Linux is the only
79 true router now :-)), all routers (at least, in neighbourhood of mine)
80 return only 8 bytes of payload. It is the end.
82 Hence, if we want that OSPF worked or traceroute said something reasonable,
83 we should search for another solution.
85 One of them is to parse packet trying to detect inner encapsulation
86 made by our node. It is difficult or even impossible, especially,
87 taking into account fragmentation. TO be short, ttl is not solution at all.
89 Current solution: The solution was UNEXPECTEDLY SIMPLE.
90 We force DF flag on tunnels with preconfigured hop limit,
91 that is ALL. :-) Well, it does not remove the problem completely,
92 but exponential growth of network traffic is changed to linear
93 (branches, that exceed pmtu are pruned) and tunnel mtu
94 rapidly degrades to value <68, where looping stops.
95 Yes, it is not good if there exists a router in the loop,
96 which does not force DF, even when encapsulating packets have DF set.
97 But it is not our problem! Nobody could accuse us, we made
98 all that we could make. Even if it is your gated who injected
99 fatal route to network, even if it were you who configured
100 fatal static route: you are innocent. :-)
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static const struct header_ops ipgre_header_ops;
112 static int ipgre_tunnel_init(struct net_device *dev);
113 static void erspan_build_header(struct sk_buff *skb,
115 bool truncate, bool is_ipv4);
117 static unsigned int ipgre_net_id __read_mostly;
118 static unsigned int gre_tap_net_id __read_mostly;
119 static unsigned int erspan_net_id __read_mostly;
121 static int ipgre_err(struct sk_buff *skb, u32 info,
122 const struct tnl_ptk_info *tpi)
125 /* All the routers (except for Linux) return only
126 8 bytes of packet payload. It means, that precise relaying of
127 ICMP in the real Internet is absolutely infeasible.
129 Moreover, Cisco "wise men" put GRE key to the third word
130 in GRE header. It makes impossible maintaining even soft
131 state for keyed GRE tunnels with enabled checksum. Tell
134 Well, I wonder, rfc1812 was written by Cisco employee,
135 what the hell these idiots break standards established
138 struct net *net = dev_net(skb->dev);
139 struct ip_tunnel_net *itn;
140 const struct iphdr *iph;
141 const int type = icmp_hdr(skb)->type;
142 const int code = icmp_hdr(skb)->code;
143 unsigned int data_len = 0;
146 if (tpi->proto == htons(ETH_P_TEB))
147 itn = net_generic(net, gre_tap_net_id);
148 else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 tpi->proto == htons(ETH_P_ERSPAN2))
150 itn = net_generic(net, erspan_net_id);
152 itn = net_generic(net, ipgre_net_id);
154 iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 iph->daddr, iph->saddr, tpi->key);
163 case ICMP_PARAMETERPROB:
166 case ICMP_DEST_UNREACH:
169 case ICMP_PORT_UNREACH:
170 /* Impossible event. */
173 /* All others are translated to HOST_UNREACH.
174 rfc2003 contains "deep thoughts" about NET_UNREACH,
175 I believe they are just ether pollution. --ANK
181 case ICMP_TIME_EXCEEDED:
182 if (code != ICMP_EXC_TTL)
184 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
191 #if IS_ENABLED(CONFIG_IPV6)
192 if (tpi->proto == htons(ETH_P_IPV6) &&
193 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
198 if (t->parms.iph.daddr == 0 ||
199 ipv4_is_multicast(t->parms.iph.daddr))
202 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
205 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
209 t->err_time = jiffies;
214 static void gre_err(struct sk_buff *skb, u32 info)
216 /* All the routers (except for Linux) return only
217 * 8 bytes of packet payload. It means, that precise relaying of
218 * ICMP in the real Internet is absolutely infeasible.
220 * Moreover, Cisco "wise men" put GRE key to the third word
221 * in GRE header. It makes impossible maintaining even soft
223 * GRE tunnels with enabled checksum. Tell them "thank you".
225 * Well, I wonder, rfc1812 was written by Cisco employee,
226 * what the hell these idiots break standards established
230 const struct iphdr *iph = (struct iphdr *)skb->data;
231 const int type = icmp_hdr(skb)->type;
232 const int code = icmp_hdr(skb)->code;
233 struct tnl_ptk_info tpi;
235 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
239 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241 skb->dev->ifindex, IPPROTO_GRE);
244 if (type == ICMP_REDIRECT) {
245 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
250 ipgre_err(skb, info, &tpi);
253 static bool is_erspan_type1(int gre_hdr_len)
255 /* Both ERSPAN type I (version 0) and type II (version 1) use
256 * protocol 0x88BE, but the type I has only 4-byte GRE header,
257 * while type II has 8-byte.
259 return gre_hdr_len == 4;
262 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
265 struct net *net = dev_net(skb->dev);
266 struct metadata_dst *tun_dst = NULL;
267 struct erspan_base_hdr *ershdr;
268 struct ip_tunnel_net *itn;
269 struct ip_tunnel *tunnel;
270 const struct iphdr *iph;
271 struct erspan_md2 *md2;
275 itn = net_generic(net, erspan_net_id);
277 if (is_erspan_type1(gre_hdr_len)) {
279 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
280 tpi->flags | TUNNEL_NO_KEY,
281 iph->saddr, iph->daddr, 0);
283 if (unlikely(!pskb_may_pull(skb,
284 gre_hdr_len + sizeof(*ershdr))))
285 return PACKET_REJECT;
287 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
290 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
291 tpi->flags | TUNNEL_KEY,
292 iph->saddr, iph->daddr, tpi->key);
296 if (is_erspan_type1(gre_hdr_len))
299 len = gre_hdr_len + erspan_hdr_len(ver);
301 if (unlikely(!pskb_may_pull(skb, len)))
302 return PACKET_REJECT;
304 if (__iptunnel_pull_header(skb,
310 if (tunnel->collect_md) {
311 struct erspan_metadata *pkt_md, *md;
312 struct ip_tunnel_info *info;
317 tpi->flags |= TUNNEL_KEY;
319 tun_id = key32_to_tunnel_id(tpi->key);
321 tun_dst = ip_tun_rx_dst(skb, flags,
322 tun_id, sizeof(*md));
324 return PACKET_REJECT;
326 /* skb can be uncloned in __iptunnel_pull_header, so
327 * old pkt_md is no longer valid and we need to reset
330 gh = skb_network_header(skb) +
331 skb_network_header_len(skb);
332 pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
334 md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
337 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
340 info = &tun_dst->u.tun_info;
341 info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
342 info->options_len = sizeof(*md);
345 skb_reset_mac_header(skb);
346 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
349 return PACKET_REJECT;
356 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
357 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
359 struct metadata_dst *tun_dst = NULL;
360 const struct iphdr *iph;
361 struct ip_tunnel *tunnel;
364 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
365 iph->saddr, iph->daddr, tpi->key);
368 const struct iphdr *tnl_params;
370 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
371 raw_proto, false) < 0)
374 /* Special case for ipgre_header_parse(), which expects the
375 * mac_header to point to the outer IP header.
377 if (tunnel->dev->header_ops == &ipgre_header_ops)
378 skb_pop_mac_header(skb);
380 skb_reset_mac_header(skb);
382 tnl_params = &tunnel->parms.iph;
383 if (tunnel->collect_md || tnl_params->daddr == 0) {
387 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
388 tun_id = key32_to_tunnel_id(tpi->key);
389 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
391 return PACKET_REJECT;
394 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
404 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
407 struct net *net = dev_net(skb->dev);
408 struct ip_tunnel_net *itn;
411 if (tpi->proto == htons(ETH_P_TEB))
412 itn = net_generic(net, gre_tap_net_id);
414 itn = net_generic(net, ipgre_net_id);
416 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
417 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
418 /* ipgre tunnels in collect metadata mode should receive
419 * also ETH_P_TEB traffic.
421 itn = net_generic(net, ipgre_net_id);
422 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
427 static int gre_rcv(struct sk_buff *skb)
429 struct tnl_ptk_info tpi;
430 bool csum_err = false;
433 #ifdef CONFIG_NET_IPGRE_BROADCAST
434 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
435 /* Looped back packet, drop it! */
436 if (rt_is_output_route(skb_rtable(skb)))
441 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
445 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
446 tpi.proto == htons(ETH_P_ERSPAN2))) {
447 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
452 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
456 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
462 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
463 const struct iphdr *tnl_params,
466 struct ip_tunnel *tunnel = netdev_priv(dev);
467 __be16 flags = tunnel->parms.o_flags;
469 /* Push GRE header. */
470 gre_build_header(skb, tunnel->tun_hlen,
471 flags, proto, tunnel->parms.o_key,
472 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
474 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
477 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
479 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
482 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
485 struct ip_tunnel *tunnel = netdev_priv(dev);
486 struct ip_tunnel_info *tun_info;
487 const struct ip_tunnel_key *key;
491 tun_info = skb_tunnel_info(skb);
492 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
493 ip_tunnel_info_af(tun_info) != AF_INET))
496 key = &tun_info->key;
497 tunnel_hlen = gre_calc_hlen(key->tun_flags);
499 if (skb_cow_head(skb, dev->needed_headroom))
502 /* Push Tunnel header. */
503 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
506 flags = tun_info->key.tun_flags &
507 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
508 gre_build_header(skb, tunnel_hlen, flags, proto,
509 tunnel_id_to_key32(tun_info->key.tun_id),
510 (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
512 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
518 DEV_STATS_INC(dev, tx_dropped);
521 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
523 struct ip_tunnel *tunnel = netdev_priv(dev);
524 struct ip_tunnel_info *tun_info;
525 const struct ip_tunnel_key *key;
526 struct erspan_metadata *md;
527 bool truncate = false;
533 tun_info = skb_tunnel_info(skb);
534 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
535 ip_tunnel_info_af(tun_info) != AF_INET))
538 key = &tun_info->key;
539 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
541 if (tun_info->options_len < sizeof(*md))
543 md = ip_tunnel_info_opts(tun_info);
545 /* ERSPAN has fixed 8 byte GRE header */
546 version = md->version;
547 tunnel_hlen = 8 + erspan_hdr_len(version);
549 if (skb_cow_head(skb, dev->needed_headroom))
552 if (gre_handle_offloads(skb, false))
555 if (skb->len > dev->mtu + dev->hard_header_len) {
556 if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
561 nhoff = skb_network_offset(skb);
562 if (skb->protocol == htons(ETH_P_IP) &&
563 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
566 if (skb->protocol == htons(ETH_P_IPV6)) {
569 if (skb_transport_header_was_set(skb))
570 thoff = skb_transport_offset(skb);
572 thoff = nhoff + sizeof(struct ipv6hdr);
573 if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
578 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
579 ntohl(md->u.index), truncate, true);
580 proto = htons(ETH_P_ERSPAN);
581 } else if (version == 2) {
582 erspan_build_header_v2(skb,
583 ntohl(tunnel_id_to_key32(key->tun_id)),
585 get_hwid(&md->u.md2),
587 proto = htons(ETH_P_ERSPAN2);
592 gre_build_header(skb, 8, TUNNEL_SEQ,
593 proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
595 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
601 DEV_STATS_INC(dev, tx_dropped);
604 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
606 struct ip_tunnel_info *info = skb_tunnel_info(skb);
607 const struct ip_tunnel_key *key;
611 if (ip_tunnel_info_af(info) != AF_INET)
615 ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
616 tunnel_id_to_key32(key->tun_id),
617 key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
618 skb->mark, skb_get_hash(skb), key->flow_flags);
619 rt = ip_route_output_key(dev_net(dev), &fl4);
624 info->key.u.ipv4.src = fl4.saddr;
628 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
629 struct net_device *dev)
631 struct ip_tunnel *tunnel = netdev_priv(dev);
632 const struct iphdr *tnl_params;
634 if (!pskb_inet_may_pull(skb))
637 if (tunnel->collect_md) {
638 gre_fb_xmit(skb, dev, skb->protocol);
642 if (dev->header_ops) {
643 int pull_len = tunnel->hlen + sizeof(struct iphdr);
645 if (skb_cow_head(skb, 0))
648 tnl_params = (const struct iphdr *)skb->data;
650 if (!pskb_network_may_pull(skb, pull_len))
653 /* ip_tunnel_xmit() needs skb->data pointing to gre header. */
654 skb_pull(skb, pull_len);
655 skb_reset_mac_header(skb);
657 if (skb->ip_summed == CHECKSUM_PARTIAL &&
658 skb_checksum_start(skb) < skb->data)
661 if (skb_cow_head(skb, dev->needed_headroom))
664 tnl_params = &tunnel->parms.iph;
667 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
670 __gre_xmit(skb, dev, tnl_params, skb->protocol);
675 DEV_STATS_INC(dev, tx_dropped);
679 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
680 struct net_device *dev)
682 struct ip_tunnel *tunnel = netdev_priv(dev);
683 bool truncate = false;
686 if (!pskb_inet_may_pull(skb))
689 if (tunnel->collect_md) {
690 erspan_fb_xmit(skb, dev);
694 if (gre_handle_offloads(skb, false))
697 if (skb_cow_head(skb, dev->needed_headroom))
700 if (skb->len > dev->mtu + dev->hard_header_len) {
701 if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
706 /* Push ERSPAN header */
707 if (tunnel->erspan_ver == 0) {
708 proto = htons(ETH_P_ERSPAN);
709 tunnel->parms.o_flags &= ~TUNNEL_SEQ;
710 } else if (tunnel->erspan_ver == 1) {
711 erspan_build_header(skb, ntohl(tunnel->parms.o_key),
714 proto = htons(ETH_P_ERSPAN);
715 } else if (tunnel->erspan_ver == 2) {
716 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
717 tunnel->dir, tunnel->hwid,
719 proto = htons(ETH_P_ERSPAN2);
724 tunnel->parms.o_flags &= ~TUNNEL_KEY;
725 __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
730 DEV_STATS_INC(dev, tx_dropped);
734 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
735 struct net_device *dev)
737 struct ip_tunnel *tunnel = netdev_priv(dev);
739 if (!pskb_inet_may_pull(skb))
742 if (tunnel->collect_md) {
743 gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
747 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
750 if (skb_cow_head(skb, dev->needed_headroom))
753 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
758 DEV_STATS_INC(dev, tx_dropped);
762 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
764 struct ip_tunnel *tunnel = netdev_priv(dev);
768 len = tunnel->tun_hlen;
769 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
770 len = tunnel->tun_hlen - len;
771 tunnel->hlen = tunnel->hlen + len;
774 dev->hard_header_len += len;
776 dev->needed_headroom += len;
779 dev->mtu = max_t(int, dev->mtu - len, 68);
781 flags = tunnel->parms.o_flags;
783 if (flags & TUNNEL_SEQ ||
784 (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
785 dev->features &= ~NETIF_F_GSO_SOFTWARE;
786 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
788 dev->features |= NETIF_F_GSO_SOFTWARE;
789 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
793 static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
798 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
799 if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
800 p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
801 ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
805 p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
806 p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
808 err = ip_tunnel_ctl(dev, p, cmd);
812 if (cmd == SIOCCHGTUNNEL) {
813 struct ip_tunnel *t = netdev_priv(dev);
815 t->parms.i_flags = p->i_flags;
816 t->parms.o_flags = p->o_flags;
818 if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
819 ipgre_link_update(dev, true);
822 p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
823 p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
827 /* Nice toy. Unfortunately, useless in real life :-)
828 It allows to construct virtual multiprotocol broadcast "LAN"
829 over the Internet, provided multicast routing is tuned.
832 I have no idea was this bicycle invented before me,
833 so that I had to set ARPHRD_IPGRE to a random value.
834 I have an impression, that Cisco could make something similar,
835 but this feature is apparently missing in IOS<=11.2(8).
837 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
838 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
840 ping -t 255 224.66.66.66
842 If nobody answers, mbone does not work.
844 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
845 ip addr add 10.66.66.<somewhat>/24 dev Universe
847 ifconfig Universe add fe80::<Your_real_addr>/10
848 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
851 ftp fec0:6666:6666::193.233.7.65
854 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
856 const void *daddr, const void *saddr, unsigned int len)
858 struct ip_tunnel *t = netdev_priv(dev);
860 struct gre_base_hdr *greh;
862 iph = skb_push(skb, t->hlen + sizeof(*iph));
863 greh = (struct gre_base_hdr *)(iph+1);
864 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
865 greh->protocol = htons(type);
867 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
869 /* Set the source hardware address. */
871 memcpy(&iph->saddr, saddr, 4);
873 memcpy(&iph->daddr, daddr, 4);
875 return t->hlen + sizeof(*iph);
877 return -(t->hlen + sizeof(*iph));
880 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
882 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
883 memcpy(haddr, &iph->saddr, 4);
887 static const struct header_ops ipgre_header_ops = {
888 .create = ipgre_header,
889 .parse = ipgre_header_parse,
892 #ifdef CONFIG_NET_IPGRE_BROADCAST
893 static int ipgre_open(struct net_device *dev)
895 struct ip_tunnel *t = netdev_priv(dev);
897 if (ipv4_is_multicast(t->parms.iph.daddr)) {
901 rt = ip_route_output_gre(t->net, &fl4,
905 RT_TOS(t->parms.iph.tos),
908 return -EADDRNOTAVAIL;
911 if (!__in_dev_get_rtnl(dev))
912 return -EADDRNOTAVAIL;
913 t->mlink = dev->ifindex;
914 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
919 static int ipgre_close(struct net_device *dev)
921 struct ip_tunnel *t = netdev_priv(dev);
923 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
924 struct in_device *in_dev;
925 in_dev = inetdev_by_index(t->net, t->mlink);
927 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
933 static const struct net_device_ops ipgre_netdev_ops = {
934 .ndo_init = ipgre_tunnel_init,
935 .ndo_uninit = ip_tunnel_uninit,
936 #ifdef CONFIG_NET_IPGRE_BROADCAST
937 .ndo_open = ipgre_open,
938 .ndo_stop = ipgre_close,
940 .ndo_start_xmit = ipgre_xmit,
941 .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
942 .ndo_change_mtu = ip_tunnel_change_mtu,
943 .ndo_get_stats64 = dev_get_tstats64,
944 .ndo_get_iflink = ip_tunnel_get_iflink,
945 .ndo_tunnel_ctl = ipgre_tunnel_ctl,
948 #define GRE_FEATURES (NETIF_F_SG | \
953 static void ipgre_tunnel_setup(struct net_device *dev)
955 dev->netdev_ops = &ipgre_netdev_ops;
956 dev->type = ARPHRD_IPGRE;
957 ip_tunnel_setup(dev, ipgre_net_id);
960 static void __gre_tunnel_init(struct net_device *dev)
962 struct ip_tunnel *tunnel;
965 tunnel = netdev_priv(dev);
966 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
967 tunnel->parms.iph.protocol = IPPROTO_GRE;
969 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
970 dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
972 dev->features |= GRE_FEATURES | NETIF_F_LLTX;
973 dev->hw_features |= GRE_FEATURES;
975 flags = tunnel->parms.o_flags;
977 /* TCP offload with GRE SEQ is not supported, nor can we support 2
978 * levels of outer headers requiring an update.
980 if (flags & TUNNEL_SEQ)
982 if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
985 dev->features |= NETIF_F_GSO_SOFTWARE;
986 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
989 static int ipgre_tunnel_init(struct net_device *dev)
991 struct ip_tunnel *tunnel = netdev_priv(dev);
992 struct iphdr *iph = &tunnel->parms.iph;
994 __gre_tunnel_init(dev);
996 __dev_addr_set(dev, &iph->saddr, 4);
997 memcpy(dev->broadcast, &iph->daddr, 4);
999 dev->flags = IFF_NOARP;
1000 netif_keep_dst(dev);
1003 if (iph->daddr && !tunnel->collect_md) {
1004 #ifdef CONFIG_NET_IPGRE_BROADCAST
1005 if (ipv4_is_multicast(iph->daddr)) {
1008 dev->flags = IFF_BROADCAST;
1009 dev->header_ops = &ipgre_header_ops;
1010 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1011 dev->needed_headroom = 0;
1014 } else if (!tunnel->collect_md) {
1015 dev->header_ops = &ipgre_header_ops;
1016 dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1017 dev->needed_headroom = 0;
1020 return ip_tunnel_init(dev);
1023 static const struct gre_protocol ipgre_protocol = {
1025 .err_handler = gre_err,
1028 static int __net_init ipgre_init_net(struct net *net)
1030 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1033 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1035 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1038 static struct pernet_operations ipgre_net_ops = {
1039 .init = ipgre_init_net,
1040 .exit_batch = ipgre_exit_batch_net,
1041 .id = &ipgre_net_id,
1042 .size = sizeof(struct ip_tunnel_net),
1045 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1046 struct netlink_ext_ack *extack)
1054 if (data[IFLA_GRE_IFLAGS])
1055 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1056 if (data[IFLA_GRE_OFLAGS])
1057 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1058 if (flags & (GRE_VERSION|GRE_ROUTING))
1061 if (data[IFLA_GRE_COLLECT_METADATA] &&
1062 data[IFLA_GRE_ENCAP_TYPE] &&
1063 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1069 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1070 struct netlink_ext_ack *extack)
1074 if (tb[IFLA_ADDRESS]) {
1075 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1077 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1078 return -EADDRNOTAVAIL;
1084 if (data[IFLA_GRE_REMOTE]) {
1085 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1091 return ipgre_tunnel_validate(tb, data, extack);
1094 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1095 struct netlink_ext_ack *extack)
1103 ret = ipgre_tap_validate(tb, data, extack);
1107 if (data[IFLA_GRE_ERSPAN_VER] &&
1108 nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1111 /* ERSPAN type II/III should only have GRE sequence and key flag */
1112 if (data[IFLA_GRE_OFLAGS])
1113 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1114 if (data[IFLA_GRE_IFLAGS])
1115 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1116 if (!data[IFLA_GRE_COLLECT_METADATA] &&
1117 flags != (GRE_SEQ | GRE_KEY))
1120 /* ERSPAN Session ID only has 10-bit. Since we reuse
1121 * 32-bit key field as ID, check it's range.
1123 if (data[IFLA_GRE_IKEY] &&
1124 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1127 if (data[IFLA_GRE_OKEY] &&
1128 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1134 static int ipgre_netlink_parms(struct net_device *dev,
1135 struct nlattr *data[],
1136 struct nlattr *tb[],
1137 struct ip_tunnel_parm *parms,
1140 struct ip_tunnel *t = netdev_priv(dev);
1142 memset(parms, 0, sizeof(*parms));
1144 parms->iph.protocol = IPPROTO_GRE;
1149 if (data[IFLA_GRE_LINK])
1150 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1152 if (data[IFLA_GRE_IFLAGS])
1153 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1155 if (data[IFLA_GRE_OFLAGS])
1156 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1158 if (data[IFLA_GRE_IKEY])
1159 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1161 if (data[IFLA_GRE_OKEY])
1162 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1164 if (data[IFLA_GRE_LOCAL])
1165 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1167 if (data[IFLA_GRE_REMOTE])
1168 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1170 if (data[IFLA_GRE_TTL])
1171 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1173 if (data[IFLA_GRE_TOS])
1174 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1176 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1179 parms->iph.frag_off = htons(IP_DF);
1182 if (data[IFLA_GRE_COLLECT_METADATA]) {
1183 t->collect_md = true;
1184 if (dev->type == ARPHRD_IPGRE)
1185 dev->type = ARPHRD_NONE;
1188 if (data[IFLA_GRE_IGNORE_DF]) {
1189 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1190 && (parms->iph.frag_off & htons(IP_DF)))
1192 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1195 if (data[IFLA_GRE_FWMARK])
1196 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1201 static int erspan_netlink_parms(struct net_device *dev,
1202 struct nlattr *data[],
1203 struct nlattr *tb[],
1204 struct ip_tunnel_parm *parms,
1207 struct ip_tunnel *t = netdev_priv(dev);
1210 err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1216 if (data[IFLA_GRE_ERSPAN_VER]) {
1217 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1219 if (t->erspan_ver > 2)
1223 if (t->erspan_ver == 1) {
1224 if (data[IFLA_GRE_ERSPAN_INDEX]) {
1225 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1226 if (t->index & ~INDEX_MASK)
1229 } else if (t->erspan_ver == 2) {
1230 if (data[IFLA_GRE_ERSPAN_DIR]) {
1231 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1232 if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1235 if (data[IFLA_GRE_ERSPAN_HWID]) {
1236 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1237 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1245 /* This function returns true when ENCAP attributes are present in the nl msg */
1246 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1247 struct ip_tunnel_encap *ipencap)
1251 memset(ipencap, 0, sizeof(*ipencap));
1256 if (data[IFLA_GRE_ENCAP_TYPE]) {
1258 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1261 if (data[IFLA_GRE_ENCAP_FLAGS]) {
1263 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1266 if (data[IFLA_GRE_ENCAP_SPORT]) {
1268 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1271 if (data[IFLA_GRE_ENCAP_DPORT]) {
1273 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1279 static int gre_tap_init(struct net_device *dev)
1281 __gre_tunnel_init(dev);
1282 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1283 netif_keep_dst(dev);
1285 return ip_tunnel_init(dev);
1288 static const struct net_device_ops gre_tap_netdev_ops = {
1289 .ndo_init = gre_tap_init,
1290 .ndo_uninit = ip_tunnel_uninit,
1291 .ndo_start_xmit = gre_tap_xmit,
1292 .ndo_set_mac_address = eth_mac_addr,
1293 .ndo_validate_addr = eth_validate_addr,
1294 .ndo_change_mtu = ip_tunnel_change_mtu,
1295 .ndo_get_stats64 = dev_get_tstats64,
1296 .ndo_get_iflink = ip_tunnel_get_iflink,
1297 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1300 static int erspan_tunnel_init(struct net_device *dev)
1302 struct ip_tunnel *tunnel = netdev_priv(dev);
1304 if (tunnel->erspan_ver == 0)
1305 tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1307 tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1309 tunnel->parms.iph.protocol = IPPROTO_GRE;
1310 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1311 erspan_hdr_len(tunnel->erspan_ver);
1313 dev->features |= GRE_FEATURES;
1314 dev->hw_features |= GRE_FEATURES;
1315 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1316 netif_keep_dst(dev);
1318 return ip_tunnel_init(dev);
1321 static const struct net_device_ops erspan_netdev_ops = {
1322 .ndo_init = erspan_tunnel_init,
1323 .ndo_uninit = ip_tunnel_uninit,
1324 .ndo_start_xmit = erspan_xmit,
1325 .ndo_set_mac_address = eth_mac_addr,
1326 .ndo_validate_addr = eth_validate_addr,
1327 .ndo_change_mtu = ip_tunnel_change_mtu,
1328 .ndo_get_stats64 = dev_get_tstats64,
1329 .ndo_get_iflink = ip_tunnel_get_iflink,
1330 .ndo_fill_metadata_dst = gre_fill_metadata_dst,
1333 static void ipgre_tap_setup(struct net_device *dev)
1337 dev->netdev_ops = &gre_tap_netdev_ops;
1338 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1339 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1340 ip_tunnel_setup(dev, gre_tap_net_id);
1344 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1346 struct ip_tunnel_encap ipencap;
1348 if (ipgre_netlink_encap_parms(data, &ipencap)) {
1349 struct ip_tunnel *t = netdev_priv(dev);
1350 int err = ip_tunnel_encap_setup(t, &ipencap);
1359 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1360 struct nlattr *tb[], struct nlattr *data[],
1361 struct netlink_ext_ack *extack)
1363 struct ip_tunnel_parm p;
1367 err = ipgre_newlink_encap_setup(dev, data);
1371 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1374 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1377 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1378 struct nlattr *tb[], struct nlattr *data[],
1379 struct netlink_ext_ack *extack)
1381 struct ip_tunnel_parm p;
1385 err = ipgre_newlink_encap_setup(dev, data);
1389 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1392 return ip_tunnel_newlink(dev, tb, &p, fwmark);
1395 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1396 struct nlattr *data[],
1397 struct netlink_ext_ack *extack)
1399 struct ip_tunnel *t = netdev_priv(dev);
1400 __u32 fwmark = t->fwmark;
1401 struct ip_tunnel_parm p;
1404 err = ipgre_newlink_encap_setup(dev, data);
1408 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1412 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1416 t->parms.i_flags = p.i_flags;
1417 t->parms.o_flags = p.o_flags;
1419 ipgre_link_update(dev, !tb[IFLA_MTU]);
1424 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1425 struct nlattr *data[],
1426 struct netlink_ext_ack *extack)
1428 struct ip_tunnel *t = netdev_priv(dev);
1429 __u32 fwmark = t->fwmark;
1430 struct ip_tunnel_parm p;
1433 err = ipgre_newlink_encap_setup(dev, data);
1437 err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1441 err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1445 t->parms.i_flags = p.i_flags;
1446 t->parms.o_flags = p.o_flags;
1451 static size_t ipgre_get_size(const struct net_device *dev)
1456 /* IFLA_GRE_IFLAGS */
1458 /* IFLA_GRE_OFLAGS */
1464 /* IFLA_GRE_LOCAL */
1466 /* IFLA_GRE_REMOTE */
1472 /* IFLA_GRE_PMTUDISC */
1474 /* IFLA_GRE_ENCAP_TYPE */
1476 /* IFLA_GRE_ENCAP_FLAGS */
1478 /* IFLA_GRE_ENCAP_SPORT */
1480 /* IFLA_GRE_ENCAP_DPORT */
1482 /* IFLA_GRE_COLLECT_METADATA */
1484 /* IFLA_GRE_IGNORE_DF */
1486 /* IFLA_GRE_FWMARK */
1488 /* IFLA_GRE_ERSPAN_INDEX */
1490 /* IFLA_GRE_ERSPAN_VER */
1492 /* IFLA_GRE_ERSPAN_DIR */
1494 /* IFLA_GRE_ERSPAN_HWID */
1499 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1501 struct ip_tunnel *t = netdev_priv(dev);
1502 struct ip_tunnel_parm *p = &t->parms;
1503 __be16 o_flags = p->o_flags;
1505 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1506 nla_put_be16(skb, IFLA_GRE_IFLAGS,
1507 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1508 nla_put_be16(skb, IFLA_GRE_OFLAGS,
1509 gre_tnl_flags_to_gre_flags(o_flags)) ||
1510 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1511 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1512 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1513 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1514 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1515 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1516 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1517 !!(p->iph.frag_off & htons(IP_DF))) ||
1518 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1519 goto nla_put_failure;
1521 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1523 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1525 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1527 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1529 goto nla_put_failure;
1531 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1532 goto nla_put_failure;
1534 if (t->collect_md) {
1535 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1536 goto nla_put_failure;
1545 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1547 struct ip_tunnel *t = netdev_priv(dev);
1549 if (t->erspan_ver <= 2) {
1550 if (t->erspan_ver != 0 && !t->collect_md)
1551 t->parms.o_flags |= TUNNEL_KEY;
1553 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1554 goto nla_put_failure;
1556 if (t->erspan_ver == 1) {
1557 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1558 goto nla_put_failure;
1559 } else if (t->erspan_ver == 2) {
1560 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1561 goto nla_put_failure;
1562 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1563 goto nla_put_failure;
1567 return ipgre_fill_info(skb, dev);
1573 static void erspan_setup(struct net_device *dev)
1575 struct ip_tunnel *t = netdev_priv(dev);
1579 dev->netdev_ops = &erspan_netdev_ops;
1580 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1581 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1582 ip_tunnel_setup(dev, erspan_net_id);
1586 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1587 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1588 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1589 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1590 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1591 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1592 [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
1593 [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
1594 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1595 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1596 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1597 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
1598 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
1599 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
1600 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
1601 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
1602 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
1603 [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
1604 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1605 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
1606 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
1607 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
1610 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1612 .maxtype = IFLA_GRE_MAX,
1613 .policy = ipgre_policy,
1614 .priv_size = sizeof(struct ip_tunnel),
1615 .setup = ipgre_tunnel_setup,
1616 .validate = ipgre_tunnel_validate,
1617 .newlink = ipgre_newlink,
1618 .changelink = ipgre_changelink,
1619 .dellink = ip_tunnel_dellink,
1620 .get_size = ipgre_get_size,
1621 .fill_info = ipgre_fill_info,
1622 .get_link_net = ip_tunnel_get_link_net,
1625 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1627 .maxtype = IFLA_GRE_MAX,
1628 .policy = ipgre_policy,
1629 .priv_size = sizeof(struct ip_tunnel),
1630 .setup = ipgre_tap_setup,
1631 .validate = ipgre_tap_validate,
1632 .newlink = ipgre_newlink,
1633 .changelink = ipgre_changelink,
1634 .dellink = ip_tunnel_dellink,
1635 .get_size = ipgre_get_size,
1636 .fill_info = ipgre_fill_info,
1637 .get_link_net = ip_tunnel_get_link_net,
1640 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1642 .maxtype = IFLA_GRE_MAX,
1643 .policy = ipgre_policy,
1644 .priv_size = sizeof(struct ip_tunnel),
1645 .setup = erspan_setup,
1646 .validate = erspan_validate,
1647 .newlink = erspan_newlink,
1648 .changelink = erspan_changelink,
1649 .dellink = ip_tunnel_dellink,
1650 .get_size = ipgre_get_size,
1651 .fill_info = erspan_fill_info,
1652 .get_link_net = ip_tunnel_get_link_net,
1655 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1656 u8 name_assign_type)
1658 struct nlattr *tb[IFLA_MAX + 1];
1659 struct net_device *dev;
1660 LIST_HEAD(list_kill);
1661 struct ip_tunnel *t;
1664 memset(&tb, 0, sizeof(tb));
1666 dev = rtnl_create_link(net, name, name_assign_type,
1667 &ipgre_tap_ops, tb, NULL);
1671 /* Configure flow based GRE device. */
1672 t = netdev_priv(dev);
1673 t->collect_md = true;
1675 err = ipgre_newlink(net, dev, tb, NULL, NULL);
1678 return ERR_PTR(err);
1681 /* openvswitch users expect packet sizes to be unrestricted,
1682 * so set the largest MTU we can.
1684 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1688 err = rtnl_configure_link(dev, NULL, 0, NULL);
1694 ip_tunnel_dellink(dev, &list_kill);
1695 unregister_netdevice_many(&list_kill);
1696 return ERR_PTR(err);
1698 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1700 static int __net_init ipgre_tap_init_net(struct net *net)
1702 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1705 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1707 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1710 static struct pernet_operations ipgre_tap_net_ops = {
1711 .init = ipgre_tap_init_net,
1712 .exit_batch = ipgre_tap_exit_batch_net,
1713 .id = &gre_tap_net_id,
1714 .size = sizeof(struct ip_tunnel_net),
1717 static int __net_init erspan_init_net(struct net *net)
1719 return ip_tunnel_init_net(net, erspan_net_id,
1720 &erspan_link_ops, "erspan0");
1723 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1725 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1728 static struct pernet_operations erspan_net_ops = {
1729 .init = erspan_init_net,
1730 .exit_batch = erspan_exit_batch_net,
1731 .id = &erspan_net_id,
1732 .size = sizeof(struct ip_tunnel_net),
1735 static int __init ipgre_init(void)
1739 pr_info("GRE over IPv4 tunneling driver\n");
1741 err = register_pernet_device(&ipgre_net_ops);
1745 err = register_pernet_device(&ipgre_tap_net_ops);
1747 goto pnet_tap_failed;
1749 err = register_pernet_device(&erspan_net_ops);
1751 goto pnet_erspan_failed;
1753 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1755 pr_info("%s: can't add protocol\n", __func__);
1756 goto add_proto_failed;
1759 err = rtnl_link_register(&ipgre_link_ops);
1761 goto rtnl_link_failed;
1763 err = rtnl_link_register(&ipgre_tap_ops);
1765 goto tap_ops_failed;
1767 err = rtnl_link_register(&erspan_link_ops);
1769 goto erspan_link_failed;
1774 rtnl_link_unregister(&ipgre_tap_ops);
1776 rtnl_link_unregister(&ipgre_link_ops);
1778 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1780 unregister_pernet_device(&erspan_net_ops);
1782 unregister_pernet_device(&ipgre_tap_net_ops);
1784 unregister_pernet_device(&ipgre_net_ops);
1788 static void __exit ipgre_fini(void)
1790 rtnl_link_unregister(&ipgre_tap_ops);
1791 rtnl_link_unregister(&ipgre_link_ops);
1792 rtnl_link_unregister(&erspan_link_ops);
1793 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1794 unregister_pernet_device(&ipgre_tap_net_ops);
1795 unregister_pernet_device(&ipgre_net_ops);
1796 unregister_pernet_device(&erspan_net_ops);
1799 module_init(ipgre_init);
1800 module_exit(ipgre_fini);
1801 MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1802 MODULE_LICENSE("GPL");
1803 MODULE_ALIAS_RTNL_LINK("gre");
1804 MODULE_ALIAS_RTNL_LINK("gretap");
1805 MODULE_ALIAS_RTNL_LINK("erspan");
1806 MODULE_ALIAS_NETDEV("gre0");
1807 MODULE_ALIAS_NETDEV("gretap0");
1808 MODULE_ALIAS_NETDEV("erspan0");