1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2017 Cavium, Inc.
5 #include <linux/netlink.h>
6 #include <linux/rtnetlink.h>
13 #include <sys/socket.h>
16 #include <arpa/inet.h>
21 #include <sys/ioctl.h>
22 #include <sys/syscall.h>
24 #include <bpf/libbpf.h>
28 #include "xdp_sample_user.h"
29 #include "xdp_router_ipv4.skel.h"
31 static const char *__doc__ =
32 "XDP IPv4 router implementation\n"
33 "Usage: xdp_router_ipv4 <IFNAME-0> ... <IFNAME-N>\n";
35 static char buf[8192];
36 static int lpm_map_fd;
37 static int arp_table_map_fd;
38 static int exact_match_map_fd;
39 static int tx_port_map_fd;
41 static bool routes_thread_exit;
42 static int interval = 5;
44 static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
45 SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_EXCEPTION_CNT;
47 DEFINE_SAMPLE_INIT(xdp_router_ipv4);
49 static const struct option long_options[] = {
50 { "help", no_argument, NULL, 'h' },
51 { "skb-mode", no_argument, NULL, 'S' },
52 { "force", no_argument, NULL, 'F' },
53 { "interval", required_argument, NULL, 'i' },
54 { "verbose", no_argument, NULL, 'v' },
55 { "stats", no_argument, NULL, 's' },
59 static int get_route_table(int rtm_family);
61 static int recv_msg(struct sockaddr_nl sock_addr, int sock)
69 len = recv(sock, buf_ptr, sizeof(buf) - nll, 0);
73 nh = (struct nlmsghdr *)buf_ptr;
75 if (nh->nlmsg_type == NLMSG_DONE)
79 if ((sock_addr.nl_groups & RTMGRP_NEIGH) == RTMGRP_NEIGH)
82 if ((sock_addr.nl_groups & RTMGRP_IPV4_ROUTE) == RTMGRP_IPV4_ROUTE)
88 /* Function to parse the route entry returned by netlink
89 * Updates the route entry related map entries
91 static void read_route(struct nlmsghdr *nh, int nll)
93 char dsts[24], gws[24], ifs[16], dsts_len[24], metrics[24];
94 struct bpf_lpm_trie_key *prefix_key;
95 struct rtattr *rt_attr;
101 int dst_len, iface, metric;
111 struct arp_table arp;
116 memset(&route, 0, sizeof(route));
117 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
118 rt_msg = (struct rtmsg *)NLMSG_DATA(nh);
119 rtm_family = rt_msg->rtm_family;
120 if (rtm_family == AF_INET)
121 if (rt_msg->rtm_table != RT_TABLE_MAIN)
123 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
124 rtl = RTM_PAYLOAD(nh);
126 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
127 switch (rt_attr->rta_type) {
130 (*((__be32 *)RTA_DATA(rt_attr))));
134 *((__be32 *)RTA_DATA(rt_attr)));
138 *((int *)RTA_DATA(rt_attr)));
141 sprintf(metrics, "%u",
142 *((int *)RTA_DATA(rt_attr)));
147 sprintf(dsts_len, "%d", rt_msg->rtm_dst_len);
148 route.dst = atoi(dsts);
149 route.dst_len = atoi(dsts_len);
150 route.gw = atoi(gws);
151 route.iface = atoi(ifs);
152 route.metric = atoi(metrics);
153 assert(get_mac_addr(route.iface, &route.mac) == 0);
154 assert(bpf_map_update_elem(tx_port_map_fd,
155 &route.iface, &route.iface, 0) == 0);
156 if (rtm_family == AF_INET) {
165 prefix_key = alloca(sizeof(*prefix_key) + 4);
166 prefix_value = alloca(sizeof(*prefix_value));
168 prefix_key->prefixlen = 32;
169 prefix_key->prefixlen = route.dst_len;
170 direct_entry.mac = route.mac & 0xffffffffffff;
171 direct_entry.ifindex = route.iface;
172 direct_entry.arp.mac = 0;
173 direct_entry.arp.dst = 0;
174 if (route.dst_len == 32) {
175 if (nh->nlmsg_type == RTM_DELROUTE) {
176 assert(bpf_map_delete_elem(exact_match_map_fd,
179 if (bpf_map_lookup_elem(arp_table_map_fd,
181 &direct_entry.arp.mac) == 0)
182 direct_entry.arp.dst = route.dst;
183 assert(bpf_map_update_elem(exact_match_map_fd,
185 &direct_entry, 0) == 0);
188 for (i = 0; i < 4; i++)
189 prefix_key->data[i] = (route.dst >> i * 8) & 0xff;
191 if (bpf_map_lookup_elem(lpm_map_fd, prefix_key,
193 for (i = 0; i < 4; i++)
194 prefix_value->prefix[i] = prefix_key->data[i];
195 prefix_value->value = route.mac & 0xffffffffffff;
196 prefix_value->ifindex = route.iface;
197 prefix_value->gw = route.gw;
198 prefix_value->metric = route.metric;
200 assert(bpf_map_update_elem(lpm_map_fd,
205 if (nh->nlmsg_type == RTM_DELROUTE) {
206 assert(bpf_map_delete_elem(lpm_map_fd,
209 /* Rereading the route table to check if
210 * there is an entry with the same
211 * prefix but a different metric as the
214 get_route_table(AF_INET);
215 } else if (prefix_key->data[0] ==
216 prefix_value->prefix[0] &&
217 prefix_key->data[1] ==
218 prefix_value->prefix[1] &&
219 prefix_key->data[2] ==
220 prefix_value->prefix[2] &&
221 prefix_key->data[3] ==
222 prefix_value->prefix[3] &&
223 route.metric >= prefix_value->metric) {
226 for (i = 0; i < 4; i++)
227 prefix_value->prefix[i] =
229 prefix_value->value =
230 route.mac & 0xffffffffffff;
231 prefix_value->ifindex = route.iface;
232 prefix_value->gw = route.gw;
233 prefix_value->metric = route.metric;
234 assert(bpf_map_update_elem(lpm_map_fd,
241 memset(&route, 0, sizeof(route));
242 memset(dsts, 0, sizeof(dsts));
243 memset(dsts_len, 0, sizeof(dsts_len));
244 memset(gws, 0, sizeof(gws));
245 memset(ifs, 0, sizeof(ifs));
246 memset(&route, 0, sizeof(route));
250 /* Function to read the existing route table when the process is launched*/
251 static int get_route_table(int rtm_family)
253 struct sockaddr_nl sa;
267 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
269 fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
272 memset(&sa, 0, sizeof(sa));
273 sa.nl_family = AF_NETLINK;
274 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
275 fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
279 memset(&req, 0, sizeof(req));
280 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
281 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
282 req.nl.nlmsg_type = RTM_GETROUTE;
284 req.rt.rtm_family = rtm_family;
285 req.rt.rtm_table = RT_TABLE_MAIN;
286 req.nl.nlmsg_pid = 0;
287 req.nl.nlmsg_seq = ++seq;
288 memset(&msg, 0, sizeof(msg));
289 iov.iov_base = (void *)&req.nl;
290 iov.iov_len = req.nl.nlmsg_len;
293 ret = sendmsg(sock, &msg, 0);
295 fprintf(stderr, "send to netlink: %s\n", strerror(errno));
299 memset(buf, 0, sizeof(buf));
300 nll = recv_msg(sa, sock);
302 fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
306 nh = (struct nlmsghdr *)buf;
313 /* Function to parse the arp entry returned by netlink
314 * Updates the arp entry related map entries
316 static void read_arp(struct nlmsghdr *nh, int nll)
318 struct rtattr *rt_attr;
319 char dsts[24], mac[24];
320 struct ndmsg *rt_msg;
328 struct arp_table arp;
333 for (; NLMSG_OK(nh, nll); nh = NLMSG_NEXT(nh, nll)) {
334 rt_msg = (struct ndmsg *)NLMSG_DATA(nh);
335 rt_attr = (struct rtattr *)RTM_RTA(rt_msg);
336 ndm_family = rt_msg->ndm_family;
337 rtl = RTM_PAYLOAD(nh);
338 for (; RTA_OK(rt_attr, rtl); rt_attr = RTA_NEXT(rt_attr, rtl)) {
339 switch (rt_attr->rta_type) {
342 *((__be32 *)RTA_DATA(rt_attr)));
346 *((__be64 *)RTA_DATA(rt_attr)));
352 arp_entry.dst = atoi(dsts);
353 arp_entry.mac = atol(mac);
355 if (ndm_family == AF_INET) {
356 if (bpf_map_lookup_elem(exact_match_map_fd,
358 &direct_entry) == 0) {
359 if (nh->nlmsg_type == RTM_DELNEIGH) {
360 direct_entry.arp.dst = 0;
361 direct_entry.arp.mac = 0;
362 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
363 direct_entry.arp.dst = arp_entry.dst;
364 direct_entry.arp.mac = arp_entry.mac;
366 assert(bpf_map_update_elem(exact_match_map_fd,
370 memset(&direct_entry, 0, sizeof(direct_entry));
372 if (nh->nlmsg_type == RTM_DELNEIGH) {
373 assert(bpf_map_delete_elem(arp_table_map_fd,
374 &arp_entry.dst) == 0);
375 } else if (nh->nlmsg_type == RTM_NEWNEIGH) {
376 assert(bpf_map_update_elem(arp_table_map_fd,
382 memset(&arp_entry, 0, sizeof(arp_entry));
383 memset(dsts, 0, sizeof(dsts));
387 /* Function to read the existing arp table when the process is launched*/
388 static int get_arp_table(int rtm_family)
390 struct sockaddr_nl sa;
403 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
405 fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
408 memset(&sa, 0, sizeof(sa));
409 sa.nl_family = AF_NETLINK;
410 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
411 fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
415 memset(&req, 0, sizeof(req));
416 req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
417 req.nl.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
418 req.nl.nlmsg_type = RTM_GETNEIGH;
419 req.rt.ndm_state = NUD_REACHABLE;
420 req.rt.ndm_family = rtm_family;
421 req.nl.nlmsg_pid = 0;
422 req.nl.nlmsg_seq = ++seq;
423 memset(&msg, 0, sizeof(msg));
424 iov.iov_base = (void *)&req.nl;
425 iov.iov_len = req.nl.nlmsg_len;
428 ret = sendmsg(sock, &msg, 0);
430 fprintf(stderr, "send to netlink: %s\n", strerror(errno));
434 memset(buf, 0, sizeof(buf));
435 nll = recv_msg(sa, sock);
437 fprintf(stderr, "recv from netlink: %s\n", strerror(nll));
441 nh = (struct nlmsghdr *)buf;
448 /* Function to keep track and update changes in route and arp table
449 * Give regular statistics of packets forwarded
451 static void *monitor_routes_thread(void *arg)
453 struct pollfd fds_route, fds_arp;
454 struct sockaddr_nl la, lr;
455 int sock, sock_arp, nll;
458 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
460 fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
464 fcntl(sock, F_SETFL, O_NONBLOCK);
465 memset(&lr, 0, sizeof(lr));
466 lr.nl_family = AF_NETLINK;
467 lr.nl_groups = RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
468 if (bind(sock, (struct sockaddr *)&lr, sizeof(lr)) < 0) {
469 fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
475 fds_route.events = POLL_IN;
477 sock_arp = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
479 fprintf(stderr, "open netlink socket: %s\n", strerror(errno));
484 fcntl(sock_arp, F_SETFL, O_NONBLOCK);
485 memset(&la, 0, sizeof(la));
486 la.nl_family = AF_NETLINK;
487 la.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
488 if (bind(sock_arp, (struct sockaddr *)&la, sizeof(la)) < 0) {
489 fprintf(stderr, "bind netlink socket: %s\n", strerror(errno));
493 fds_arp.fd = sock_arp;
494 fds_arp.events = POLL_IN;
496 /* dump route and arp tables */
497 if (get_arp_table(AF_INET) < 0) {
498 fprintf(stderr, "Failed reading arp table\n");
502 if (get_route_table(AF_INET) < 0) {
503 fprintf(stderr, "Failed reading route table\n");
507 while (!routes_thread_exit) {
508 memset(buf, 0, sizeof(buf));
509 if (poll(&fds_route, 1, 3) == POLL_IN) {
510 nll = recv_msg(lr, sock);
512 fprintf(stderr, "recv from netlink: %s\n",
517 nh = (struct nlmsghdr *)buf;
521 memset(buf, 0, sizeof(buf));
522 if (poll(&fds_arp, 1, 3) == POLL_IN) {
523 nll = recv_msg(la, sock_arp);
525 fprintf(stderr, "recv from netlink: %s\n",
530 nh = (struct nlmsghdr *)buf;
543 static void usage(char *argv[], const struct option *long_options,
544 const char *doc, int mask, bool error,
545 struct bpf_object *obj)
547 sample_usage(argv, long_options, doc, mask, error);
550 int main(int argc, char **argv)
552 bool error = true, generic = false, force = false;
553 int opt, ret = EXIT_FAIL_BPF;
554 struct xdp_router_ipv4 *skel;
555 int i, total_ifindex = argc - 1;
556 char **ifname_list = argv + 1;
557 pthread_t routes_thread;
560 if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
561 fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
566 skel = xdp_router_ipv4__open();
568 fprintf(stderr, "Failed to xdp_router_ipv4__open: %s\n",
573 ret = sample_init_pre_load(skel);
575 fprintf(stderr, "Failed to sample_init_pre_load: %s\n",
581 ret = xdp_router_ipv4__load(skel);
583 fprintf(stderr, "Failed to xdp_router_ipv4__load: %s\n",
588 ret = sample_init(skel, mask);
590 fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
595 while ((opt = getopt_long(argc, argv, "si:SFvh",
596 long_options, &longindex)) != -1) {
599 mask |= SAMPLE_REDIRECT_MAP_CNT;
604 interval = strtoul(optarg, NULL, 0);
619 sample_switch_mode();
626 usage(argv, long_options, __doc__, mask, error, skel->obj);
631 ret = EXIT_FAIL_OPTION;
632 if (optind == argc) {
633 usage(argv, long_options, __doc__, mask, true, skel->obj);
637 lpm_map_fd = bpf_map__fd(skel->maps.lpm_map);
638 if (lpm_map_fd < 0) {
639 fprintf(stderr, "Failed loading lpm_map %s\n",
640 strerror(-lpm_map_fd));
643 arp_table_map_fd = bpf_map__fd(skel->maps.arp_table);
644 if (arp_table_map_fd < 0) {
645 fprintf(stderr, "Failed loading arp_table_map_fd %s\n",
646 strerror(-arp_table_map_fd));
649 exact_match_map_fd = bpf_map__fd(skel->maps.exact_match);
650 if (exact_match_map_fd < 0) {
651 fprintf(stderr, "Failed loading exact_match_map_fd %s\n",
652 strerror(-exact_match_map_fd));
655 tx_port_map_fd = bpf_map__fd(skel->maps.tx_port);
656 if (tx_port_map_fd < 0) {
657 fprintf(stderr, "Failed loading tx_port_map_fd %s\n",
658 strerror(-tx_port_map_fd));
663 for (i = 0; i < total_ifindex; i++) {
664 int index = if_nametoindex(ifname_list[i]);
667 fprintf(stderr, "Interface %s not found %s\n",
668 ifname_list[i], strerror(-tx_port_map_fd));
671 if (sample_install_xdp(skel->progs.xdp_router_ipv4_prog,
672 index, generic, force) < 0)
676 ret = pthread_create(&routes_thread, NULL, monitor_routes_thread, NULL);
678 fprintf(stderr, "Failed creating routes_thread: %s\n", strerror(-ret));
683 ret = sample_run(interval, NULL, NULL);
684 routes_thread_exit = true;
687 fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
689 goto end_thread_wait;
694 pthread_join(routes_thread, NULL);
696 xdp_router_ipv4__destroy(skel);