2 * SR-IPv6 implementation
5 * David Lebrun <david.lebrun@uclouvain.be>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 #include <linux/types.h>
15 #include <linux/skbuff.h>
16 #include <linux/net.h>
17 #include <linux/module.h>
19 #include <net/lwtunnel.h>
20 #include <net/netevent.h>
21 #include <net/netns/generic.h>
22 #include <net/ip6_fib.h>
23 #include <net/route.h>
25 #include <linux/seg6.h>
26 #include <linux/seg6_local.h>
27 #include <net/addrconf.h>
28 #include <net/ip6_route.h>
29 #include <net/dst_cache.h>
30 #include <net/ip_tunnels.h>
31 #ifdef CONFIG_IPV6_SEG6_HMAC
32 #include <net/seg6_hmac.h>
34 #include <linux/etherdevice.h>
36 struct seg6_local_lwt;
38 struct seg6_action_desc {
41 int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
45 struct seg6_local_lwt {
47 struct ipv6_sr_hdr *srh;
55 struct seg6_action_desc *desc;
58 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
60 return (struct seg6_local_lwt *)lwt->data;
63 static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
65 struct ipv6_sr_hdr *srh;
68 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
71 if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
74 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
76 len = (srh->hdrlen + 1) << 3;
78 if (!pskb_may_pull(skb, srhoff + len))
81 if (!seg6_validate_srh(srh, len))
87 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
89 struct ipv6_sr_hdr *srh;
95 if (srh->segments_left == 0)
98 #ifdef CONFIG_IPV6_SEG6_HMAC
99 if (!seg6_hmac_validate_skb(skb))
106 static bool decap_and_validate(struct sk_buff *skb, int proto)
108 struct ipv6_sr_hdr *srh;
109 unsigned int off = 0;
112 if (srh && srh->segments_left > 0)
115 #ifdef CONFIG_IPV6_SEG6_HMAC
116 if (srh && !seg6_hmac_validate_skb(skb))
120 if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
123 if (!pskb_pull(skb, off))
126 skb_postpull_rcsum(skb, skb_network_header(skb), off);
128 skb_reset_network_header(skb);
129 skb_reset_transport_header(skb);
130 if (iptunnel_pull_offloads(skb))
136 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
138 struct in6_addr *addr;
140 srh->segments_left--;
141 addr = srh->segments + srh->segments_left;
145 static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
148 struct net *net = dev_net(skb->dev);
149 struct ipv6hdr *hdr = ipv6_hdr(skb);
150 int flags = RT6_LOOKUP_F_HAS_SADDR;
151 struct dst_entry *dst = NULL;
155 fl6.flowi6_iif = skb->dev->ifindex;
156 fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
157 fl6.saddr = hdr->saddr;
158 fl6.flowlabel = ip6_flowinfo(hdr);
159 fl6.flowi6_mark = skb->mark;
160 fl6.flowi6_proto = hdr->nexthdr;
163 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
166 dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
168 struct fib6_table *table;
170 table = fib6_get_table(net, tbl_id);
174 rt = ip6_pol_route(net, table, 0, &fl6, flags);
178 if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
185 rt = net->ipv6.ip6_blk_hole_entry;
191 skb_dst_set(skb, dst);
194 /* regular endpoint function */
195 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
197 struct ipv6_sr_hdr *srh;
199 srh = get_and_validate_srh(skb);
203 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
205 lookup_nexthop(skb, NULL, 0);
207 return dst_input(skb);
214 /* regular endpoint, and forward to specified nexthop */
215 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
217 struct ipv6_sr_hdr *srh;
219 srh = get_and_validate_srh(skb);
223 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
225 lookup_nexthop(skb, &slwt->nh6, 0);
227 return dst_input(skb);
234 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
236 struct ipv6_sr_hdr *srh;
238 srh = get_and_validate_srh(skb);
242 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
244 lookup_nexthop(skb, NULL, slwt->table);
246 return dst_input(skb);
253 /* decapsulate and forward inner L2 frame on specified interface */
254 static int input_action_end_dx2(struct sk_buff *skb,
255 struct seg6_local_lwt *slwt)
257 struct net *net = dev_net(skb->dev);
258 struct net_device *odev;
261 if (!decap_and_validate(skb, NEXTHDR_NONE))
264 if (!pskb_may_pull(skb, ETH_HLEN))
267 skb_reset_mac_header(skb);
268 eth = (struct ethhdr *)skb->data;
270 /* To determine the frame's protocol, we assume it is 802.3. This avoids
271 * a call to eth_type_trans(), which is not really relevant for our
274 if (!eth_proto_is_802_3(eth->h_proto))
277 odev = dev_get_by_index_rcu(net, slwt->oif);
281 /* As we accept Ethernet frames, make sure the egress device is of
284 if (odev->type != ARPHRD_ETHER)
287 if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
292 if (skb_warn_if_lro(skb))
295 skb_forward_csum(skb);
297 if (skb->len - ETH_HLEN > odev->mtu)
301 skb->protocol = eth->h_proto;
303 return dev_queue_xmit(skb);
310 /* decapsulate and forward to specified nexthop */
311 static int input_action_end_dx6(struct sk_buff *skb,
312 struct seg6_local_lwt *slwt)
314 struct in6_addr *nhaddr = NULL;
316 /* this function accepts IPv6 encapsulated packets, with either
317 * an SRH with SL=0, or no SRH.
320 if (!decap_and_validate(skb, IPPROTO_IPV6))
323 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
326 /* The inner packet is not associated to any local interface,
327 * so we do not call netif_rx().
329 * If slwt->nh6 is set to ::, then lookup the nexthop for the
330 * inner packet's DA. Otherwise, use the specified nexthop.
333 if (!ipv6_addr_any(&slwt->nh6))
336 lookup_nexthop(skb, nhaddr, 0);
338 return dst_input(skb);
344 static int input_action_end_dx4(struct sk_buff *skb,
345 struct seg6_local_lwt *slwt)
351 if (!decap_and_validate(skb, IPPROTO_IPIP))
354 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
357 skb->protocol = htons(ETH_P_IP);
361 nhaddr = slwt->nh4.s_addr ?: iph->daddr;
365 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
369 return dst_input(skb);
376 static int input_action_end_dt6(struct sk_buff *skb,
377 struct seg6_local_lwt *slwt)
379 if (!decap_and_validate(skb, IPPROTO_IPV6))
382 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
385 lookup_nexthop(skb, NULL, slwt->table);
387 return dst_input(skb);
394 /* push an SRH on top of the current one */
395 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
397 struct ipv6_sr_hdr *srh;
400 srh = get_and_validate_srh(skb);
404 err = seg6_do_srh_inline(skb, slwt->srh);
408 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
409 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
411 lookup_nexthop(skb, NULL, 0);
413 return dst_input(skb);
420 /* encapsulate within an outer IPv6 header and a specified SRH */
421 static int input_action_end_b6_encap(struct sk_buff *skb,
422 struct seg6_local_lwt *slwt)
424 struct ipv6_sr_hdr *srh;
427 srh = get_and_validate_srh(skb);
431 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
433 skb_reset_inner_headers(skb);
434 skb->encapsulation = 1;
436 err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
440 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
441 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
443 lookup_nexthop(skb, NULL, 0);
445 return dst_input(skb);
452 static struct seg6_action_desc seg6_action_table[] = {
454 .action = SEG6_LOCAL_ACTION_END,
456 .input = input_action_end,
459 .action = SEG6_LOCAL_ACTION_END_X,
460 .attrs = (1 << SEG6_LOCAL_NH6),
461 .input = input_action_end_x,
464 .action = SEG6_LOCAL_ACTION_END_T,
465 .attrs = (1 << SEG6_LOCAL_TABLE),
466 .input = input_action_end_t,
469 .action = SEG6_LOCAL_ACTION_END_DX2,
470 .attrs = (1 << SEG6_LOCAL_OIF),
471 .input = input_action_end_dx2,
474 .action = SEG6_LOCAL_ACTION_END_DX6,
475 .attrs = (1 << SEG6_LOCAL_NH6),
476 .input = input_action_end_dx6,
479 .action = SEG6_LOCAL_ACTION_END_DX4,
480 .attrs = (1 << SEG6_LOCAL_NH4),
481 .input = input_action_end_dx4,
484 .action = SEG6_LOCAL_ACTION_END_DT6,
485 .attrs = (1 << SEG6_LOCAL_TABLE),
486 .input = input_action_end_dt6,
489 .action = SEG6_LOCAL_ACTION_END_B6,
490 .attrs = (1 << SEG6_LOCAL_SRH),
491 .input = input_action_end_b6,
494 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
495 .attrs = (1 << SEG6_LOCAL_SRH),
496 .input = input_action_end_b6_encap,
497 .static_headroom = sizeof(struct ipv6hdr),
501 static struct seg6_action_desc *__get_action_desc(int action)
503 struct seg6_action_desc *desc;
506 count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
507 for (i = 0; i < count; i++) {
508 desc = &seg6_action_table[i];
509 if (desc->action == action)
516 static int seg6_local_input(struct sk_buff *skb)
518 struct dst_entry *orig_dst = skb_dst(skb);
519 struct seg6_action_desc *desc;
520 struct seg6_local_lwt *slwt;
522 if (skb->protocol != htons(ETH_P_IPV6)) {
527 slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
530 return desc->input(skb, slwt);
533 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
534 [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
535 [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
536 [SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
537 [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
538 .len = sizeof(struct in_addr) },
539 [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
540 .len = sizeof(struct in6_addr) },
541 [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
542 [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
545 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
547 struct ipv6_sr_hdr *srh;
550 srh = nla_data(attrs[SEG6_LOCAL_SRH]);
551 len = nla_len(attrs[SEG6_LOCAL_SRH]);
553 /* SRH must contain at least one segment */
554 if (len < sizeof(*srh) + sizeof(struct in6_addr))
557 if (!seg6_validate_srh(srh, len))
560 slwt->srh = kmalloc(len, GFP_KERNEL);
564 memcpy(slwt->srh, srh, len);
566 slwt->headroom += len;
571 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
573 struct ipv6_sr_hdr *srh;
578 len = (srh->hdrlen + 1) << 3;
580 nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
584 memcpy(nla_data(nla), srh, len);
589 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
591 int len = (a->srh->hdrlen + 1) << 3;
593 if (len != ((b->srh->hdrlen + 1) << 3))
596 return memcmp(a->srh, b->srh, len);
599 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
601 slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
606 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
608 if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
614 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
616 if (a->table != b->table)
622 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
624 memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
625 sizeof(struct in_addr));
630 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
634 nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
638 memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
643 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
645 return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
648 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
650 memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
651 sizeof(struct in6_addr));
656 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
660 nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
664 memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
669 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
671 return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
674 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
676 slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
681 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
683 if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
689 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
691 if (a->iif != b->iif)
697 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
699 slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
704 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
706 if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
712 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
714 if (a->oif != b->oif)
720 struct seg6_action_param {
721 int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
722 int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
723 int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
726 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
727 [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
729 .cmp = cmp_nla_srh },
731 [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
732 .put = put_nla_table,
733 .cmp = cmp_nla_table },
735 [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
737 .cmp = cmp_nla_nh4 },
739 [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
741 .cmp = cmp_nla_nh6 },
743 [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
745 .cmp = cmp_nla_iif },
747 [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
749 .cmp = cmp_nla_oif },
752 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
754 struct seg6_action_param *param;
755 struct seg6_action_desc *desc;
758 desc = __get_action_desc(slwt->action);
766 slwt->headroom += desc->static_headroom;
768 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
769 if (desc->attrs & (1 << i)) {
773 param = &seg6_action_params[i];
775 err = param->parse(attrs, slwt);
784 static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
785 const void *cfg, struct lwtunnel_state **ts,
786 struct netlink_ext_ack *extack)
788 struct nlattr *tb[SEG6_LOCAL_MAX + 1];
789 struct lwtunnel_state *newts;
790 struct seg6_local_lwt *slwt;
793 if (family != AF_INET6)
796 err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
802 if (!tb[SEG6_LOCAL_ACTION])
805 newts = lwtunnel_state_alloc(sizeof(*slwt));
809 slwt = seg6_local_lwtunnel(newts);
810 slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
812 err = parse_nla_action(tb, slwt);
816 newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
817 newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
818 newts->headroom = slwt->headroom;
830 static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
832 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
837 static int seg6_local_fill_encap(struct sk_buff *skb,
838 struct lwtunnel_state *lwt)
840 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
841 struct seg6_action_param *param;
844 if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
847 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
848 if (slwt->desc->attrs & (1 << i)) {
849 param = &seg6_action_params[i];
850 err = param->put(skb, slwt);
859 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
861 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
865 nlsize = nla_total_size(4); /* action */
867 attrs = slwt->desc->attrs;
869 if (attrs & (1 << SEG6_LOCAL_SRH))
870 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
872 if (attrs & (1 << SEG6_LOCAL_TABLE))
873 nlsize += nla_total_size(4);
875 if (attrs & (1 << SEG6_LOCAL_NH4))
876 nlsize += nla_total_size(4);
878 if (attrs & (1 << SEG6_LOCAL_NH6))
879 nlsize += nla_total_size(16);
881 if (attrs & (1 << SEG6_LOCAL_IIF))
882 nlsize += nla_total_size(4);
884 if (attrs & (1 << SEG6_LOCAL_OIF))
885 nlsize += nla_total_size(4);
890 static int seg6_local_cmp_encap(struct lwtunnel_state *a,
891 struct lwtunnel_state *b)
893 struct seg6_local_lwt *slwt_a, *slwt_b;
894 struct seg6_action_param *param;
897 slwt_a = seg6_local_lwtunnel(a);
898 slwt_b = seg6_local_lwtunnel(b);
900 if (slwt_a->action != slwt_b->action)
903 if (slwt_a->desc->attrs != slwt_b->desc->attrs)
906 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
907 if (slwt_a->desc->attrs & (1 << i)) {
908 param = &seg6_action_params[i];
909 if (param->cmp(slwt_a, slwt_b))
917 static const struct lwtunnel_encap_ops seg6_local_ops = {
918 .build_state = seg6_local_build_state,
919 .destroy_state = seg6_local_destroy_state,
920 .input = seg6_local_input,
921 .fill_encap = seg6_local_fill_encap,
922 .get_encap_size = seg6_local_get_encap_size,
923 .cmp_encap = seg6_local_cmp_encap,
924 .owner = THIS_MODULE,
927 int __init seg6_local_init(void)
929 return lwtunnel_encap_add_ops(&seg6_local_ops,
930 LWTUNNEL_ENCAP_SEG6_LOCAL);
933 void seg6_local_exit(void)
935 lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);