2 * SR-IPv6 implementation
5 * David Lebrun <david.lebrun@uclouvain.be>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 #include <linux/types.h>
15 #include <linux/skbuff.h>
16 #include <linux/net.h>
17 #include <linux/module.h>
19 #include <net/lwtunnel.h>
20 #include <net/netevent.h>
21 #include <net/netns/generic.h>
22 #include <net/ip6_fib.h>
23 #include <net/route.h>
25 #include <linux/seg6.h>
26 #include <linux/seg6_local.h>
27 #include <net/addrconf.h>
28 #include <net/ip6_route.h>
29 #include <net/dst_cache.h>
30 #include <net/ip_tunnels.h>
31 #ifdef CONFIG_IPV6_SEG6_HMAC
32 #include <net/seg6_hmac.h>
34 #include <linux/etherdevice.h>
36 struct seg6_local_lwt;
38 struct seg6_action_desc {
41 int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
45 struct seg6_local_lwt {
47 struct ipv6_sr_hdr *srh;
55 struct seg6_action_desc *desc;
58 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
60 return (struct seg6_local_lwt *)lwt->data;
63 static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
65 struct ipv6_sr_hdr *srh;
68 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
71 if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
74 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
76 len = (srh->hdrlen + 1) << 3;
78 if (!pskb_may_pull(skb, srhoff + len))
81 if (!seg6_validate_srh(srh, len))
87 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
89 struct ipv6_sr_hdr *srh;
95 if (srh->segments_left == 0)
98 #ifdef CONFIG_IPV6_SEG6_HMAC
99 if (!seg6_hmac_validate_skb(skb))
106 static bool decap_and_validate(struct sk_buff *skb, int proto)
108 struct ipv6_sr_hdr *srh;
109 unsigned int off = 0;
112 if (srh && srh->segments_left > 0)
115 #ifdef CONFIG_IPV6_SEG6_HMAC
116 if (srh && !seg6_hmac_validate_skb(skb))
120 if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
123 if (!pskb_pull(skb, off))
126 skb_postpull_rcsum(skb, skb_network_header(skb), off);
128 skb_reset_network_header(skb);
129 skb_reset_transport_header(skb);
130 if (iptunnel_pull_offloads(skb))
136 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
138 struct in6_addr *addr;
140 srh->segments_left--;
141 addr = srh->segments + srh->segments_left;
145 static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
148 struct net *net = dev_net(skb->dev);
149 struct ipv6hdr *hdr = ipv6_hdr(skb);
150 int flags = RT6_LOOKUP_F_HAS_SADDR;
151 struct dst_entry *dst = NULL;
155 fl6.flowi6_iif = skb->dev->ifindex;
156 fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
157 fl6.saddr = hdr->saddr;
158 fl6.flowlabel = ip6_flowinfo(hdr);
159 fl6.flowi6_mark = skb->mark;
160 fl6.flowi6_proto = hdr->nexthdr;
163 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
166 dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
168 struct fib6_table *table;
170 table = fib6_get_table(net, tbl_id);
174 rt = ip6_pol_route(net, table, 0, &fl6, flags);
178 if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
185 rt = net->ipv6.ip6_blk_hole_entry;
191 skb_dst_set(skb, dst);
194 /* regular endpoint function */
195 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
197 struct ipv6_sr_hdr *srh;
199 srh = get_and_validate_srh(skb);
203 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
205 lookup_nexthop(skb, NULL, 0);
207 return dst_input(skb);
214 /* regular endpoint, and forward to specified nexthop */
215 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
217 struct ipv6_sr_hdr *srh;
219 srh = get_and_validate_srh(skb);
223 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
225 lookup_nexthop(skb, &slwt->nh6, 0);
227 return dst_input(skb);
234 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
236 struct ipv6_sr_hdr *srh;
238 srh = get_and_validate_srh(skb);
242 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
244 lookup_nexthop(skb, NULL, slwt->table);
246 return dst_input(skb);
253 /* decapsulate and forward inner L2 frame on specified interface */
254 static int input_action_end_dx2(struct sk_buff *skb,
255 struct seg6_local_lwt *slwt)
257 struct net *net = dev_net(skb->dev);
258 struct net_device *odev;
261 if (!decap_and_validate(skb, NEXTHDR_NONE))
264 if (!pskb_may_pull(skb, ETH_HLEN))
267 skb_reset_mac_header(skb);
268 eth = (struct ethhdr *)skb->data;
270 /* To determine the frame's protocol, we assume it is 802.3. This avoids
271 * a call to eth_type_trans(), which is not really relevant for our
274 if (!eth_proto_is_802_3(eth->h_proto))
277 odev = dev_get_by_index_rcu(net, slwt->oif);
281 /* As we accept Ethernet frames, make sure the egress device is of
284 if (odev->type != ARPHRD_ETHER)
287 if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
292 if (skb_warn_if_lro(skb))
295 skb_forward_csum(skb);
297 if (skb->len - ETH_HLEN > odev->mtu)
301 skb->protocol = eth->h_proto;
303 return dev_queue_xmit(skb);
310 /* decapsulate and forward to specified nexthop */
311 static int input_action_end_dx6(struct sk_buff *skb,
312 struct seg6_local_lwt *slwt)
314 struct in6_addr *nhaddr = NULL;
316 /* this function accepts IPv6 encapsulated packets, with either
317 * an SRH with SL=0, or no SRH.
320 if (!decap_and_validate(skb, IPPROTO_IPV6))
323 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
326 /* The inner packet is not associated to any local interface,
327 * so we do not call netif_rx().
329 * If slwt->nh6 is set to ::, then lookup the nexthop for the
330 * inner packet's DA. Otherwise, use the specified nexthop.
333 if (!ipv6_addr_any(&slwt->nh6))
336 lookup_nexthop(skb, nhaddr, 0);
338 return dst_input(skb);
344 static int input_action_end_dx4(struct sk_buff *skb,
345 struct seg6_local_lwt *slwt)
351 if (!decap_and_validate(skb, IPPROTO_IPIP))
354 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
357 skb->protocol = htons(ETH_P_IP);
361 nhaddr = slwt->nh4.s_addr ?: iph->daddr;
365 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
369 return dst_input(skb);
376 static int input_action_end_dt6(struct sk_buff *skb,
377 struct seg6_local_lwt *slwt)
379 if (!decap_and_validate(skb, IPPROTO_IPV6))
382 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
385 lookup_nexthop(skb, NULL, slwt->table);
387 return dst_input(skb);
394 /* push an SRH on top of the current one */
395 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
397 struct ipv6_sr_hdr *srh;
400 srh = get_and_validate_srh(skb);
404 err = seg6_do_srh_inline(skb, slwt->srh);
408 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
410 lookup_nexthop(skb, NULL, 0);
412 return dst_input(skb);
419 /* encapsulate within an outer IPv6 header and a specified SRH */
420 static int input_action_end_b6_encap(struct sk_buff *skb,
421 struct seg6_local_lwt *slwt)
423 struct ipv6_sr_hdr *srh;
426 srh = get_and_validate_srh(skb);
430 advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
432 skb_reset_inner_headers(skb);
433 skb->encapsulation = 1;
435 err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
439 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
441 lookup_nexthop(skb, NULL, 0);
443 return dst_input(skb);
450 static struct seg6_action_desc seg6_action_table[] = {
452 .action = SEG6_LOCAL_ACTION_END,
454 .input = input_action_end,
457 .action = SEG6_LOCAL_ACTION_END_X,
458 .attrs = (1 << SEG6_LOCAL_NH6),
459 .input = input_action_end_x,
462 .action = SEG6_LOCAL_ACTION_END_T,
463 .attrs = (1 << SEG6_LOCAL_TABLE),
464 .input = input_action_end_t,
467 .action = SEG6_LOCAL_ACTION_END_DX2,
468 .attrs = (1 << SEG6_LOCAL_OIF),
469 .input = input_action_end_dx2,
472 .action = SEG6_LOCAL_ACTION_END_DX6,
473 .attrs = (1 << SEG6_LOCAL_NH6),
474 .input = input_action_end_dx6,
477 .action = SEG6_LOCAL_ACTION_END_DX4,
478 .attrs = (1 << SEG6_LOCAL_NH4),
479 .input = input_action_end_dx4,
482 .action = SEG6_LOCAL_ACTION_END_DT6,
483 .attrs = (1 << SEG6_LOCAL_TABLE),
484 .input = input_action_end_dt6,
487 .action = SEG6_LOCAL_ACTION_END_B6,
488 .attrs = (1 << SEG6_LOCAL_SRH),
489 .input = input_action_end_b6,
492 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
493 .attrs = (1 << SEG6_LOCAL_SRH),
494 .input = input_action_end_b6_encap,
495 .static_headroom = sizeof(struct ipv6hdr),
499 static struct seg6_action_desc *__get_action_desc(int action)
501 struct seg6_action_desc *desc;
504 count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
505 for (i = 0; i < count; i++) {
506 desc = &seg6_action_table[i];
507 if (desc->action == action)
514 static int seg6_local_input(struct sk_buff *skb)
516 struct dst_entry *orig_dst = skb_dst(skb);
517 struct seg6_action_desc *desc;
518 struct seg6_local_lwt *slwt;
520 if (skb->protocol != htons(ETH_P_IPV6)) {
525 slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
528 return desc->input(skb, slwt);
531 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
532 [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
533 [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
534 [SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
535 [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
536 .len = sizeof(struct in_addr) },
537 [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
538 .len = sizeof(struct in6_addr) },
539 [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
540 [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
543 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
545 struct ipv6_sr_hdr *srh;
548 srh = nla_data(attrs[SEG6_LOCAL_SRH]);
549 len = nla_len(attrs[SEG6_LOCAL_SRH]);
551 /* SRH must contain at least one segment */
552 if (len < sizeof(*srh) + sizeof(struct in6_addr))
555 if (!seg6_validate_srh(srh, len))
558 slwt->srh = kmalloc(len, GFP_KERNEL);
562 memcpy(slwt->srh, srh, len);
564 slwt->headroom += len;
569 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
571 struct ipv6_sr_hdr *srh;
576 len = (srh->hdrlen + 1) << 3;
578 nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
582 memcpy(nla_data(nla), srh, len);
587 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
589 int len = (a->srh->hdrlen + 1) << 3;
591 if (len != ((b->srh->hdrlen + 1) << 3))
594 return memcmp(a->srh, b->srh, len);
597 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
599 slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
604 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
606 if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
612 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
614 if (a->table != b->table)
620 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
622 memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
623 sizeof(struct in_addr));
628 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
632 nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
636 memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
641 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
643 return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
646 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
648 memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
649 sizeof(struct in6_addr));
654 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
658 nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
662 memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
667 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
669 return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
672 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
674 slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
679 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
681 if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
687 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
689 if (a->iif != b->iif)
695 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
697 slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
702 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
704 if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
710 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
712 if (a->oif != b->oif)
718 struct seg6_action_param {
719 int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
720 int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
721 int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
724 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
725 [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
727 .cmp = cmp_nla_srh },
729 [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
730 .put = put_nla_table,
731 .cmp = cmp_nla_table },
733 [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
735 .cmp = cmp_nla_nh4 },
737 [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
739 .cmp = cmp_nla_nh6 },
741 [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
743 .cmp = cmp_nla_iif },
745 [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
747 .cmp = cmp_nla_oif },
750 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
752 struct seg6_action_param *param;
753 struct seg6_action_desc *desc;
756 desc = __get_action_desc(slwt->action);
764 slwt->headroom += desc->static_headroom;
766 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
767 if (desc->attrs & (1 << i)) {
771 param = &seg6_action_params[i];
773 err = param->parse(attrs, slwt);
782 static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
783 const void *cfg, struct lwtunnel_state **ts,
784 struct netlink_ext_ack *extack)
786 struct nlattr *tb[SEG6_LOCAL_MAX + 1];
787 struct lwtunnel_state *newts;
788 struct seg6_local_lwt *slwt;
791 if (family != AF_INET6)
794 err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
800 if (!tb[SEG6_LOCAL_ACTION])
803 newts = lwtunnel_state_alloc(sizeof(*slwt));
807 slwt = seg6_local_lwtunnel(newts);
808 slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
810 err = parse_nla_action(tb, slwt);
814 newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
815 newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
816 newts->headroom = slwt->headroom;
828 static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
830 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
835 static int seg6_local_fill_encap(struct sk_buff *skb,
836 struct lwtunnel_state *lwt)
838 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
839 struct seg6_action_param *param;
842 if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
845 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
846 if (slwt->desc->attrs & (1 << i)) {
847 param = &seg6_action_params[i];
848 err = param->put(skb, slwt);
857 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
859 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
863 nlsize = nla_total_size(4); /* action */
865 attrs = slwt->desc->attrs;
867 if (attrs & (1 << SEG6_LOCAL_SRH))
868 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
870 if (attrs & (1 << SEG6_LOCAL_TABLE))
871 nlsize += nla_total_size(4);
873 if (attrs & (1 << SEG6_LOCAL_NH4))
874 nlsize += nla_total_size(4);
876 if (attrs & (1 << SEG6_LOCAL_NH6))
877 nlsize += nla_total_size(16);
879 if (attrs & (1 << SEG6_LOCAL_IIF))
880 nlsize += nla_total_size(4);
882 if (attrs & (1 << SEG6_LOCAL_OIF))
883 nlsize += nla_total_size(4);
888 static int seg6_local_cmp_encap(struct lwtunnel_state *a,
889 struct lwtunnel_state *b)
891 struct seg6_local_lwt *slwt_a, *slwt_b;
892 struct seg6_action_param *param;
895 slwt_a = seg6_local_lwtunnel(a);
896 slwt_b = seg6_local_lwtunnel(b);
898 if (slwt_a->action != slwt_b->action)
901 if (slwt_a->desc->attrs != slwt_b->desc->attrs)
904 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
905 if (slwt_a->desc->attrs & (1 << i)) {
906 param = &seg6_action_params[i];
907 if (param->cmp(slwt_a, slwt_b))
915 static const struct lwtunnel_encap_ops seg6_local_ops = {
916 .build_state = seg6_local_build_state,
917 .destroy_state = seg6_local_destroy_state,
918 .input = seg6_local_input,
919 .fill_encap = seg6_local_fill_encap,
920 .get_encap_size = seg6_local_get_encap_size,
921 .cmp_encap = seg6_local_cmp_encap,
922 .owner = THIS_MODULE,
925 int __init seg6_local_init(void)
927 return lwtunnel_encap_add_ops(&seg6_local_ops,
928 LWTUNNEL_ENCAP_SEG6_LOCAL);
931 void seg6_local_exit(void)
933 lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);