1 // SPDX-License-Identifier: GPL-2.0-only
3 * Intel MIC Platform Software Stack (MPSS)
5 * Copyright(c) 2013 Intel Corporation.
7 * Intel MIC User Space Tools.
21 #include <sys/types.h>
24 #include <sys/socket.h>
25 #include <linux/virtio_ring.h>
26 #include <linux/virtio_net.h>
27 #include <linux/virtio_console.h>
28 #include <linux/virtio_blk.h>
29 #include <linux/version.h>
31 #include <linux/mic_ioctl.h>
32 #include <linux/mic_common.h>
33 #include <tools/endian.h>
35 static void *init_mic(void *arg);
38 static struct mic_info mic_list;
40 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
42 #define min_t(type, x, y) ({ \
45 __min1 < __min2 ? __min1 : __min2; })
47 /* align addr on a size boundary - adjust address up/down if needed */
48 #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
49 #define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
51 /* align addr on a size boundary - adjust address up if needed */
52 #define _ALIGN(addr, size) _ALIGN_UP(addr, size)
54 /* to align the pointer to the (next) page boundary */
55 #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
57 #define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
60 #define MAX_GSO_SIZE (64 * 1024)
62 #define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
63 #define MIC_DEVICE_PAGE_END 0x1000
65 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
66 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
70 struct mic_device_desc dd;
71 struct mic_vqconfig vqconfig[2];
72 __u32 host_features, guest_acknowledgements;
73 struct virtio_console_config cons_config;
74 } virtcons_dev_page = {
76 .type = VIRTIO_ID_CONSOLE,
77 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
78 .feature_len = sizeof(virtcons_dev_page.host_features),
79 .config_len = sizeof(virtcons_dev_page.cons_config),
82 .num = htole16(MIC_VRING_ENTRIES),
85 .num = htole16(MIC_VRING_ENTRIES),
90 struct mic_device_desc dd;
91 struct mic_vqconfig vqconfig[2];
92 __u32 host_features, guest_acknowledgements;
93 struct virtio_net_config net_config;
94 } virtnet_dev_page = {
96 .type = VIRTIO_ID_NET,
97 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
98 .feature_len = sizeof(virtnet_dev_page.host_features),
99 .config_len = sizeof(virtnet_dev_page.net_config),
102 .num = htole16(MIC_VRING_ENTRIES),
105 .num = htole16(MIC_VRING_ENTRIES),
108 .host_features = htole32(
109 1 << VIRTIO_NET_F_CSUM |
110 1 << VIRTIO_NET_F_GSO |
111 1 << VIRTIO_NET_F_GUEST_TSO4 |
112 1 << VIRTIO_NET_F_GUEST_TSO6 |
113 1 << VIRTIO_NET_F_GUEST_ECN),
119 static const char *mic_config_dir = "/etc/mpss";
120 static const char *virtblk_backend = "VIRTBLK_BACKEND";
122 struct mic_device_desc dd;
123 struct mic_vqconfig vqconfig[1];
124 __u32 host_features, guest_acknowledgements;
125 struct virtio_blk_config blk_config;
126 } virtblk_dev_page = {
128 .type = VIRTIO_ID_BLOCK,
129 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
130 .feature_len = sizeof(virtblk_dev_page.host_features),
131 .config_len = sizeof(virtblk_dev_page.blk_config),
134 .num = htole16(MIC_VRING_ENTRIES),
137 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
139 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
140 .capacity = htole64(0),
147 tap_configure(struct mic_info *mic, char *dev)
151 char ipaddr[IFNAMSIZ];
162 mpsslog("Configuring %s\n", dev);
163 ret = execvp("ip", ifargv);
165 mpsslog("%s execvp failed errno %s\n",
166 mic->name, strerror(errno));
171 mpsslog("%s fork failed errno %s\n",
172 mic->name, strerror(errno));
176 ret = waitpid(pid, NULL, 0);
178 mpsslog("%s waitpid failed errno %s\n",
179 mic->name, strerror(errno));
183 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
194 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
195 ret = execvp("ip", ifargv);
197 mpsslog("%s execvp failed errno %s\n",
198 mic->name, strerror(errno));
203 mpsslog("%s fork failed errno %s\n",
204 mic->name, strerror(errno));
208 ret = waitpid(pid, NULL, 0);
210 mpsslog("%s waitpid failed errno %s\n",
211 mic->name, strerror(errno));
214 mpsslog("MIC name %s %s %d DONE!\n",
215 mic->name, __func__, __LINE__);
219 static int tun_alloc(struct mic_info *mic, char *dev)
226 fd = open("/dev/net/tun", O_RDWR);
228 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
232 memset(&ifr, 0, sizeof(ifr));
234 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
236 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
238 err = ioctl(fd, TUNSETIFF, (void *)&ifr);
240 mpsslog("%s %s %d TUNSETIFF failed %s\n",
241 mic->name, __func__, __LINE__, strerror(errno));
246 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
248 err = ioctl(fd, TUNSETOFFLOAD, offload);
250 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
251 mic->name, __func__, __LINE__, strerror(errno));
256 strcpy(dev, ifr.ifr_name);
257 mpsslog("Created TAP %s\n", dev);
262 #define NET_FD_VIRTIO_NET 0
266 static void set_dp(struct mic_info *mic, int type, void *dp)
269 case VIRTIO_ID_CONSOLE:
270 mic->mic_console.console_dp = dp;
273 mic->mic_net.net_dp = dp;
275 case VIRTIO_ID_BLOCK:
276 mic->mic_virtblk.block_dp = dp;
279 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
283 static void *get_dp(struct mic_info *mic, int type)
286 case VIRTIO_ID_CONSOLE:
287 return mic->mic_console.console_dp;
289 return mic->mic_net.net_dp;
290 case VIRTIO_ID_BLOCK:
291 return mic->mic_virtblk.block_dp;
293 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
298 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
300 struct mic_device_desc *d;
302 void *dp = get_dp(mic, type);
304 for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
305 i += mic_total_desc_size(d)) {
315 mpsslog("%s %s d-> type %d d %p\n",
316 mic->name, __func__, d->type, d);
318 if (d->type == (__u8)type)
321 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
325 /* See comments in vhost.c for explanation of next_desc() */
326 static unsigned next_desc(struct vring_desc *desc)
330 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
332 next = le16toh(desc->next);
336 /* Sum up all the IOVEC length */
338 sum_iovec_len(struct mic_copy_desc *copy)
343 for (i = 0; i < copy->iovcnt; i++)
344 sum += copy->iov[i].iov_len;
348 static inline void verify_out_len(struct mic_info *mic,
349 struct mic_copy_desc *copy)
351 if (copy->out_len != sum_iovec_len(copy)) {
352 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
353 mic->name, __func__, __LINE__,
354 copy->out_len, sum_iovec_len(copy));
355 assert(copy->out_len == sum_iovec_len(copy));
359 /* Display an iovec */
361 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
362 const char *s, int line)
366 for (i = 0; i < copy->iovcnt; i++)
367 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
368 mic->name, s, line, i,
369 copy->iov[i].iov_base, copy->iov[i].iov_len);
372 static inline __u16 read_avail_idx(struct mic_vring *vr)
374 return READ_ONCE(vr->info->avail_idx);
377 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
378 struct mic_copy_desc *copy, ssize_t len)
380 copy->vr_idx = tx ? 0 : 1;
381 copy->update_used = true;
382 if (type == VIRTIO_ID_NET)
383 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
385 copy->iov[0].iov_len = len;
388 /* Central API which triggers the copies */
390 mic_virtio_copy(struct mic_info *mic, int fd,
391 struct mic_vring *vr, struct mic_copy_desc *copy)
395 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
397 mpsslog("%s %s %d errno %s ret %d\n",
398 mic->name, __func__, __LINE__,
399 strerror(errno), ret);
404 static inline unsigned _vring_size(unsigned int num, unsigned long align)
406 return _ALIGN_UP(((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
407 + align - 1) & ~(align - 1))
408 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num, 4);
412 * This initialization routine requires at least one
413 * vring i.e. vr0. vr1 is optional.
416 init_vr(struct mic_info *mic, int fd, int type,
417 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
422 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
423 MIC_VIRTIO_RING_ALIGN) +
424 sizeof(struct _mic_vring_info));
425 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
426 PROT_READ, MAP_SHARED, fd, 0);
427 if (MAP_FAILED == va) {
428 mpsslog("%s %s %d mmap failed errno %s\n",
429 mic->name, __func__, __LINE__,
433 set_dp(mic, type, va);
434 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
435 vr0->info = vr0->va +
436 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
438 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
439 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
440 __func__, mic->name, vr0->va, vr0->info, vr_size,
441 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
442 mpsslog("magic 0x%x expected 0x%x\n",
443 le32toh(vr0->info->magic), MIC_MAGIC + type);
444 assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
446 vr1->va = (struct mic_vring *)
447 &va[MIC_DEVICE_PAGE_END + vr_size];
448 vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
449 MIC_VIRTIO_RING_ALIGN);
451 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
452 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
453 __func__, mic->name, vr1->va, vr1->info, vr_size,
454 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
455 mpsslog("magic 0x%x expected 0x%x\n",
456 le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
457 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
464 wait_for_card_driver(struct mic_info *mic, int fd, int type)
466 struct pollfd pollfd;
468 struct mic_device_desc *desc = get_device_desc(mic, type);
473 prev_status = desc->status;
475 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
476 mic->name, __func__, type, desc->status);
479 pollfd.events = POLLIN;
481 err = poll(&pollfd, 1, -1);
483 mpsslog("%s %s poll failed %s\n",
484 mic->name, __func__, strerror(errno));
488 if (pollfd.revents) {
489 if (desc->status != prev_status) {
490 mpsslog("%s %s Waiting... desc-> type %d "
492 mic->name, __func__, type,
494 prev_status = desc->status;
496 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
497 mpsslog("%s %s poll.revents %d\n",
498 mic->name, __func__, pollfd.revents);
499 mpsslog("%s %s desc-> type %d status 0x%x\n",
500 mic->name, __func__, type,
509 /* Spin till we have some descriptors */
511 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
513 __u16 avail_idx = read_avail_idx(vr);
515 while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) {
517 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
519 le16toh(vr->vr.avail->idx), vr->info->avail_idx);
526 virtio_net(void *arg)
528 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
529 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
530 struct iovec vnet_iov[2][2] = {
531 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
532 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
533 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
534 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
536 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
537 struct mic_info *mic = (struct mic_info *)arg;
538 char if_name[IFNAMSIZ];
539 struct pollfd net_poll[MAX_NET_FD];
540 struct mic_vring tx_vr, rx_vr;
541 struct mic_copy_desc copy;
542 struct mic_device_desc *desc;
545 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
546 mic->mic_net.tap_fd = tun_alloc(mic, if_name);
547 if (mic->mic_net.tap_fd < 0)
550 if (tap_configure(mic, if_name))
552 mpsslog("MIC name %s id %d\n", mic->name, mic->id);
554 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
555 net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
556 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
557 net_poll[NET_FD_TUN].events = POLLIN;
559 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
560 VIRTIO_ID_NET, &tx_vr, &rx_vr,
561 virtnet_dev_page.dd.num_vq)) {
562 mpsslog("%s init_vr failed %s\n",
563 mic->name, strerror(errno));
568 desc = get_device_desc(mic, VIRTIO_ID_NET);
573 net_poll[NET_FD_VIRTIO_NET].revents = 0;
574 net_poll[NET_FD_TUN].revents = 0;
576 /* Start polling for data from tap and virtio net */
577 err = poll(net_poll, 2, -1);
579 mpsslog("%s poll failed %s\n",
580 __func__, strerror(errno));
583 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
584 err = wait_for_card_driver(mic,
585 mic->mic_net.virtio_net_fd,
588 mpsslog("%s %s %d Exiting...\n",
589 mic->name, __func__, __LINE__);
594 * Check if there is data to be read from TUN and write to
595 * virtio net fd if there is.
597 if (net_poll[NET_FD_TUN].revents & POLLIN) {
599 len = readv(net_poll[NET_FD_TUN].fd,
600 copy.iov, copy.iovcnt);
602 struct virtio_net_hdr *hdr
603 = (struct virtio_net_hdr *)vnet_hdr[0];
605 /* Disable checksums on the card since we are on
606 a reliable PCIe link */
607 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
609 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
610 __func__, __LINE__, hdr->flags);
611 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
612 copy.out_len, hdr->gso_type);
615 disp_iovec(mic, copy, __func__, __LINE__);
616 mpsslog("%s %s %d read from tap 0x%lx\n",
617 mic->name, __func__, __LINE__,
620 spin_for_descriptors(mic, &tx_vr);
621 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©,
624 err = mic_virtio_copy(mic,
625 mic->mic_net.virtio_net_fd, &tx_vr,
628 mpsslog("%s %s %d mic_virtio_copy %s\n",
629 mic->name, __func__, __LINE__,
633 verify_out_len(mic, ©);
635 disp_iovec(mic, copy, __func__, __LINE__);
636 mpsslog("%s %s %d wrote to net 0x%lx\n",
637 mic->name, __func__, __LINE__,
638 sum_iovec_len(©));
640 /* Reinitialize IOV for next run */
641 iov0[1].iov_len = MAX_NET_PKT_SIZE;
642 } else if (len < 0) {
643 disp_iovec(mic, ©, __func__, __LINE__);
644 mpsslog("%s %s %d read failed %s ", mic->name,
645 __func__, __LINE__, strerror(errno));
646 mpsslog("cnt %d sum %zd\n",
647 copy.iovcnt, sum_iovec_len(©));
652 * Check if there is data to be read from virtio net and
653 * write to TUN if there is.
655 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
656 while (rx_vr.info->avail_idx !=
657 le16toh(rx_vr.vr.avail->idx)) {
659 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©,
661 + sizeof(struct virtio_net_hdr));
663 err = mic_virtio_copy(mic,
664 mic->mic_net.virtio_net_fd, &rx_vr,
668 struct virtio_net_hdr *hdr
669 = (struct virtio_net_hdr *)
672 mpsslog("%s %s %d hdr->flags 0x%x, ",
673 mic->name, __func__, __LINE__,
675 mpsslog("out_len %d gso_type 0x%x\n",
679 /* Set the correct output iov_len */
680 iov1[1].iov_len = copy.out_len -
681 sizeof(struct virtio_net_hdr);
682 verify_out_len(mic, ©);
684 disp_iovec(mic, copy, __func__,
687 mic->name, __func__, __LINE__);
688 mpsslog("read from net 0x%lx\n",
689 sum_iovec_len(copy));
691 len = writev(net_poll[NET_FD_TUN].fd,
692 copy.iov, copy.iovcnt);
693 if (len != sum_iovec_len(©)) {
694 mpsslog("Tun write failed %s ",
696 mpsslog("len 0x%zx ", len);
697 mpsslog("read_len 0x%zx\n",
698 sum_iovec_len(©));
701 disp_iovec(mic, ©, __func__,
706 mpsslog("wrote to tap 0x%lx\n",
711 mpsslog("%s %s %d mic_virtio_copy %s\n",
712 mic->name, __func__, __LINE__,
718 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
719 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
726 #define VIRTIO_CONSOLE_FD 0
727 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
728 #define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
729 #define MAX_BUFFER_SIZE PAGE_SIZE
732 virtio_console(void *arg)
734 static __u8 vcons_buf[2][PAGE_SIZE];
735 struct iovec vcons_iov[2] = {
736 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
737 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
739 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
740 struct mic_info *mic = (struct mic_info *)arg;
742 struct pollfd console_poll[MAX_CONSOLE_FD];
746 struct mic_vring tx_vr, rx_vr;
747 struct mic_copy_desc copy;
748 struct mic_device_desc *desc;
750 pty_fd = posix_openpt(O_RDWR);
752 mpsslog("can't open a pseudoterminal master device: %s\n",
756 pts_name = ptsname(pty_fd);
757 if (pts_name == NULL) {
758 mpsslog("can't get pts name\n");
761 printf("%s console message goes to %s\n", mic->name, pts_name);
762 mpsslog("%s console message goes to %s\n", mic->name, pts_name);
763 err = grantpt(pty_fd);
765 mpsslog("can't grant access: %s %s\n",
766 pts_name, strerror(errno));
769 err = unlockpt(pty_fd);
771 mpsslog("can't unlock a pseudoterminal: %s %s\n",
772 pts_name, strerror(errno));
775 console_poll[MONITOR_FD].fd = pty_fd;
776 console_poll[MONITOR_FD].events = POLLIN;
778 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
779 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
781 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
782 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
783 virtcons_dev_page.dd.num_vq)) {
784 mpsslog("%s init_vr failed %s\n",
785 mic->name, strerror(errno));
790 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
793 console_poll[MONITOR_FD].revents = 0;
794 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
795 err = poll(console_poll, MAX_CONSOLE_FD, -1);
797 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
801 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
802 err = wait_for_card_driver(mic,
803 mic->mic_console.virtio_console_fd,
806 mpsslog("%s %s %d Exiting...\n",
807 mic->name, __func__, __LINE__);
812 if (console_poll[MONITOR_FD].revents & POLLIN) {
814 len = readv(pty_fd, copy.iov, copy.iovcnt);
817 disp_iovec(mic, copy, __func__, __LINE__);
818 mpsslog("%s %s %d read from tap 0x%lx\n",
819 mic->name, __func__, __LINE__,
822 spin_for_descriptors(mic, &tx_vr);
823 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
826 err = mic_virtio_copy(mic,
827 mic->mic_console.virtio_console_fd,
830 mpsslog("%s %s %d mic_virtio_copy %s\n",
831 mic->name, __func__, __LINE__,
835 verify_out_len(mic, ©);
837 disp_iovec(mic, copy, __func__, __LINE__);
838 mpsslog("%s %s %d wrote to net 0x%lx\n",
839 mic->name, __func__, __LINE__,
840 sum_iovec_len(copy));
842 /* Reinitialize IOV for next run */
843 iov0->iov_len = PAGE_SIZE;
844 } else if (len < 0) {
845 disp_iovec(mic, ©, __func__, __LINE__);
846 mpsslog("%s %s %d read failed %s ",
847 mic->name, __func__, __LINE__,
849 mpsslog("cnt %d sum %zd\n",
850 copy.iovcnt, sum_iovec_len(©));
854 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
855 while (rx_vr.info->avail_idx !=
856 le16toh(rx_vr.vr.avail->idx)) {
858 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
861 err = mic_virtio_copy(mic,
862 mic->mic_console.virtio_console_fd,
865 /* Set the correct output iov_len */
866 iov1->iov_len = copy.out_len;
867 verify_out_len(mic, ©);
869 disp_iovec(mic, copy, __func__,
872 mic->name, __func__, __LINE__);
873 mpsslog("read from net 0x%lx\n",
874 sum_iovec_len(copy));
877 copy.iov, copy.iovcnt);
878 if (len != sum_iovec_len(©)) {
879 mpsslog("Tun write failed %s ",
881 mpsslog("len 0x%zx ", len);
882 mpsslog("read_len 0x%zx\n",
883 sum_iovec_len(©));
886 disp_iovec(mic, copy, __func__,
891 mpsslog("wrote to tap 0x%lx\n",
896 mpsslog("%s %s %d mic_virtio_copy %s\n",
897 mic->name, __func__, __LINE__,
903 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
904 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
913 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
918 snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
919 fd = open(path, O_RDWR);
921 mpsslog("Could not open %s %s\n", path, strerror(errno));
925 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
927 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
933 mic->mic_net.virtio_net_fd = fd;
934 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
936 case VIRTIO_ID_CONSOLE:
937 mic->mic_console.virtio_console_fd = fd;
938 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
940 case VIRTIO_ID_BLOCK:
941 mic->mic_virtblk.virtio_block_fd = fd;
942 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
948 set_backend_file(struct mic_info *mic)
951 char buff[PATH_MAX], *line, *evv, *p;
953 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
954 config = fopen(buff, "r");
957 do { /* look for "virtblk_backend=XXXX" */
958 line = fgets(buff, PATH_MAX, config);
963 p = strchr(line, '\n');
966 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
970 evv = strchr(line, '=');
973 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
974 if (mic->mic_virtblk.backend_file == NULL) {
975 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
978 strcpy(mic->mic_virtblk.backend_file, evv + 1);
982 #define SECTOR_SIZE 512
984 set_backend_size(struct mic_info *mic)
986 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
988 if (mic->mic_virtblk.backend_size < 0) {
989 mpsslog("%s: can't seek: %s\n",
990 mic->name, mic->mic_virtblk.backend_file);
993 virtblk_dev_page.blk_config.capacity =
994 mic->mic_virtblk.backend_size / SECTOR_SIZE;
995 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
996 virtblk_dev_page.blk_config.capacity++;
998 virtblk_dev_page.blk_config.capacity =
999 htole64(virtblk_dev_page.blk_config.capacity);
1005 open_backend(struct mic_info *mic)
1007 if (!set_backend_file(mic))
1009 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1010 if (mic->mic_virtblk.backend < 0) {
1011 mpsslog("%s: can't open: %s\n", mic->name,
1012 mic->mic_virtblk.backend_file);
1015 if (!set_backend_size(mic))
1017 mic->mic_virtblk.backend_addr = mmap(NULL,
1018 mic->mic_virtblk.backend_size,
1019 PROT_READ|PROT_WRITE, MAP_SHARED,
1020 mic->mic_virtblk.backend, 0L);
1021 if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1022 mpsslog("%s: can't map: %s %s\n",
1023 mic->name, mic->mic_virtblk.backend_file,
1030 close(mic->mic_virtblk.backend);
1032 free(mic->mic_virtblk.backend_file);
1038 close_backend(struct mic_info *mic)
1040 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1041 close(mic->mic_virtblk.backend);
1042 free(mic->mic_virtblk.backend_file);
1046 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1048 if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1049 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1053 add_virtio_device(mic, &virtblk_dev_page.dd);
1054 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1055 VIRTIO_ID_BLOCK, vring, NULL,
1056 virtblk_dev_page.dd.num_vq)) {
1057 mpsslog("%s init_vr failed %s\n",
1058 mic->name, strerror(errno));
1065 stop_virtblk(struct mic_info *mic)
1069 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1070 MIC_VIRTIO_RING_ALIGN) +
1071 sizeof(struct _mic_vring_info));
1072 ret = munmap(mic->mic_virtblk.block_dp,
1073 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1075 mpsslog("%s munmap errno %d\n", mic->name, errno);
1076 close(mic->mic_virtblk.virtio_block_fd);
1080 header_error_check(struct vring_desc *desc)
1082 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1083 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1084 __func__, __LINE__);
1087 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1088 mpsslog("%s() %d: alone\n",
1089 __func__, __LINE__);
1092 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1093 mpsslog("%s() %d: not read\n",
1094 __func__, __LINE__);
1101 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1104 struct mic_copy_desc copy;
1106 iovec.iov_len = sizeof(*hdr);
1107 iovec.iov_base = hdr;
1110 copy.vr_idx = 0; /* only one vring on virtio_block */
1111 copy.update_used = false; /* do not update used index */
1112 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1116 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1118 struct mic_copy_desc copy;
1121 copy.iovcnt = iovcnt;
1122 copy.vr_idx = 0; /* only one vring on virtio_block */
1123 copy.update_used = false; /* do not update used index */
1124 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1128 status_error_check(struct vring_desc *desc)
1130 if (le32toh(desc->len) != sizeof(__u8)) {
1131 mpsslog("%s() %d: length is not sizeof(status)\n",
1132 __func__, __LINE__);
1139 write_status(int fd, __u8 *status)
1142 struct mic_copy_desc copy;
1144 iovec.iov_base = status;
1145 iovec.iov_len = sizeof(*status);
1148 copy.vr_idx = 0; /* only one vring on virtio_block */
1149 copy.update_used = true; /* Update used index */
1150 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1153 #ifndef VIRTIO_BLK_T_GET_ID
1154 #define VIRTIO_BLK_T_GET_ID 8
1158 virtio_block(void *arg)
1160 struct mic_info *mic = (struct mic_info *)arg;
1162 struct pollfd block_poll;
1163 struct mic_vring vring;
1166 struct vring_desc *desc;
1167 struct iovec *iovec, *piov;
1169 __u32 buffer_desc_idx;
1170 struct virtio_blk_outhdr hdr;
1173 for (;;) { /* forever */
1174 if (!open_backend(mic)) { /* No virtblk */
1175 for (mic->mic_virtblk.signaled = 0;
1176 !mic->mic_virtblk.signaled;)
1181 /* backend file is specified. */
1182 if (!start_virtblk(mic, &vring))
1183 goto _close_backend;
1184 iovec = malloc(sizeof(*iovec) *
1185 le32toh(virtblk_dev_page.blk_config.seg_max));
1187 mpsslog("%s: can't alloc iovec: %s\n",
1188 mic->name, strerror(ENOMEM));
1192 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1193 block_poll.events = POLLIN;
1194 for (mic->mic_virtblk.signaled = 0;
1195 !mic->mic_virtblk.signaled;) {
1196 block_poll.revents = 0;
1197 /* timeout in 1 sec to see signaled */
1198 ret = poll(&block_poll, 1, 1000);
1200 mpsslog("%s %d: poll failed: %s\n",
1206 if (!(block_poll.revents & POLLIN)) {
1208 mpsslog("%s %d: block_poll.revents=0x%x\n",
1209 __func__, __LINE__, block_poll.revents);
1215 while (vring.info->avail_idx !=
1216 le16toh(vring.vr.avail->idx)) {
1217 /* read header element */
1219 vring.info->avail_idx &
1222 vring.vr.avail->ring[avail_idx]);
1223 desc = &vring.vr.desc[desc_idx];
1225 mpsslog("%s() %d: avail_idx=%d ",
1227 vring.info->avail_idx);
1228 mpsslog("vring.vr.num=%d desc=%p\n",
1229 vring.vr.num, desc);
1231 status = header_error_check(desc);
1233 mic->mic_virtblk.virtio_block_fd,
1236 mpsslog("%s() %d %s: ret=%d %s\n",
1242 /* buffer element */
1245 fos = mic->mic_virtblk.backend_addr +
1246 (hdr.sector * SECTOR_SIZE);
1247 buffer_desc_idx = next_desc(desc);
1248 desc_idx = buffer_desc_idx;
1249 for (desc = &vring.vr.desc[buffer_desc_idx];
1250 desc->flags & VRING_DESC_F_NEXT;
1251 desc_idx = next_desc(desc),
1252 desc = &vring.vr.desc[desc_idx]) {
1253 piov->iov_len = desc->len;
1254 piov->iov_base = fos;
1258 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1259 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1260 VIRTIO_BLK_T_GET_ID)) {
1262 VIRTIO_BLK_T_IN - does not do
1263 anything. Probably for documenting.
1264 VIRTIO_BLK_T_SCSI_CMD - for
1266 VIRTIO_BLK_T_FLUSH - turned off in
1268 VIRTIO_BLK_T_BARRIER - defined but not
1271 mpsslog("%s() %d: type %x ",
1274 mpsslog("is not supported\n");
1278 ret = transfer_blocks(
1279 mic->mic_virtblk.virtio_block_fd,
1286 /* write status and update used pointer */
1288 status = status_error_check(desc);
1290 mic->mic_virtblk.virtio_block_fd,
1293 mpsslog("%s() %d: write status=%d on desc=%p\n",
1310 reset(struct mic_info *mic)
1312 #define RESET_TIMEOUT 120
1313 int i = RESET_TIMEOUT;
1314 setsysfs(mic->name, "state", "reset");
1317 state = readsysfs(mic->name, "state");
1320 mpsslog("%s: %s %d state %s\n",
1321 mic->name, __func__, __LINE__, state);
1323 if (!strcmp(state, "ready")) {
1335 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1337 if (!strcmp(shutdown_status, "nop"))
1339 if (!strcmp(shutdown_status, "crashed"))
1341 if (!strcmp(shutdown_status, "halted"))
1343 if (!strcmp(shutdown_status, "poweroff"))
1344 return MIC_POWER_OFF;
1345 if (!strcmp(shutdown_status, "restart"))
1347 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1352 static int get_mic_state(struct mic_info *mic)
1355 enum mic_states mic_state;
1358 state = readsysfs(mic->name, "state");
1361 mpsslog("%s: %s %d state %s\n",
1362 mic->name, __func__, __LINE__, state);
1364 if (!strcmp(state, "ready")) {
1365 mic_state = MIC_READY;
1366 } else if (!strcmp(state, "booting")) {
1367 mic_state = MIC_BOOTING;
1368 } else if (!strcmp(state, "online")) {
1369 mic_state = MIC_ONLINE;
1370 } else if (!strcmp(state, "shutting_down")) {
1371 mic_state = MIC_SHUTTING_DOWN;
1372 } else if (!strcmp(state, "reset_failed")) {
1373 mic_state = MIC_RESET_FAILED;
1374 } else if (!strcmp(state, "resetting")) {
1375 mic_state = MIC_RESETTING;
1377 mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1385 static void mic_handle_shutdown(struct mic_info *mic)
1387 #define SHUTDOWN_TIMEOUT 60
1388 int i = SHUTDOWN_TIMEOUT;
1389 char *shutdown_status;
1391 shutdown_status = readsysfs(mic->name, "shutdown_status");
1392 if (!shutdown_status) {
1396 mpsslog("%s: %s %d shutdown_status %s\n",
1397 mic->name, __func__, __LINE__, shutdown_status);
1398 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1404 free(shutdown_status);
1409 free(shutdown_status);
1415 mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1416 mic->name, __func__, __LINE__, shutdown_status);
1420 static int open_state_fd(struct mic_info *mic)
1422 char pathname[PATH_MAX];
1425 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1426 MICSYSFSDIR, mic->name, "state");
1428 fd = open(pathname, O_RDONLY);
1430 mpsslog("%s: opening file %s failed %s\n",
1431 mic->name, pathname, strerror(errno));
1435 static int block_till_state_change(int fd, struct mic_info *mic)
1437 struct pollfd ufds[1];
1438 char value[PAGE_SIZE];
1442 ufds[0].events = POLLERR | POLLPRI;
1443 ret = poll(ufds, 1, -1);
1445 mpsslog("%s: %s %d poll failed %s\n",
1446 mic->name, __func__, __LINE__, strerror(errno));
1450 ret = lseek(fd, 0, SEEK_SET);
1452 mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1453 mic->name, __func__, __LINE__, strerror(errno));
1457 ret = read(fd, value, sizeof(value));
1459 mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1460 mic->name, __func__, __LINE__, strerror(errno));
1468 mic_config(void *arg)
1470 struct mic_info *mic = (struct mic_info *)arg;
1471 int fd, ret, stat = 0;
1473 fd = open_state_fd(mic);
1475 mpsslog("%s: %s %d open state fd failed %s\n",
1476 mic->name, __func__, __LINE__, strerror(errno));
1481 ret = block_till_state_change(fd, mic);
1483 mpsslog("%s: %s %d block_till_state_change error %s\n",
1484 mic->name, __func__, __LINE__, strerror(errno));
1488 switch (get_mic_state(mic)) {
1489 case MIC_SHUTTING_DOWN:
1490 mic_handle_shutdown(mic);
1493 case MIC_RESET_FAILED:
1494 ret = kill(mic->pid, SIGTERM);
1495 mpsslog("%s: %s %d kill pid %d ret %d\n",
1496 mic->name, __func__, __LINE__,
1499 ret = waitpid(mic->pid, &stat,
1501 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1502 mic->name, __func__, __LINE__,
1505 if (mic->boot_on_resume) {
1506 setsysfs(mic->name, "state", "boot");
1507 mic->boot_on_resume = 0;
1523 set_cmdline(struct mic_info *mic)
1525 char buffer[PATH_MAX];
1528 len = snprintf(buffer, PATH_MAX,
1529 "clocksource=tsc highres=off nohz=off ");
1530 len += snprintf(buffer + len, PATH_MAX - len,
1531 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1532 len += snprintf(buffer + len, PATH_MAX - len,
1533 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1536 setsysfs(mic->name, "cmdline", buffer);
1537 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1538 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1539 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1543 set_log_buf_info(struct mic_info *mic)
1547 char system_map[] = "/lib/firmware/mic/System.map";
1548 char *map, *temp, log_buf[17] = {'\0'};
1550 fd = open(system_map, O_RDONLY);
1552 mpsslog("%s: Opening System.map failed: %d\n",
1556 len = lseek(fd, 0, SEEK_END);
1558 mpsslog("%s: Reading System.map size failed: %d\n",
1563 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1564 if (map == MAP_FAILED) {
1565 mpsslog("%s: mmap of System.map failed: %d\n",
1570 temp = strstr(map, "__log_buf");
1572 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1577 strncpy(log_buf, temp - 19, 16);
1578 setsysfs(mic->name, "log_buf_addr", log_buf);
1579 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1580 temp = strstr(map, "log_buf_len");
1582 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1587 strncpy(log_buf, temp - 19, 16);
1588 setsysfs(mic->name, "log_buf_len", log_buf);
1589 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1595 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1597 struct mic_info *mic;
1599 for (mic = mic_list.next; mic != NULL; mic = mic->next)
1600 mic->mic_virtblk.signaled = 1/* true */;
1604 set_mic_boot_params(struct mic_info *mic)
1606 set_log_buf_info(mic);
1613 struct mic_info *mic = (struct mic_info *)arg;
1614 struct sigaction ignore = {
1616 .sa_handler = SIG_IGN
1618 struct sigaction act = {
1619 .sa_flags = SA_SIGINFO,
1620 .sa_sigaction = change_virtblk_backend,
1622 char buffer[PATH_MAX];
1626 * Currently, one virtio block device is supported for each MIC card
1627 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1628 * The signal informs the virtio block backend about a change in the
1629 * configuration file which specifies the virtio backend file name on
1630 * the host. Virtio block backend then re-reads the configuration file
1631 * and switches to the new block device. This signalling mechanism may
1632 * not be required once multiple virtio block devices are supported by
1635 sigaction(SIGUSR1, &ignore, NULL);
1637 fd = open_state_fd(mic);
1639 mpsslog("%s: %s %d open state fd failed %s\n",
1640 mic->name, __func__, __LINE__, strerror(errno));
1646 snprintf(buffer, PATH_MAX, "boot");
1647 setsysfs(mic->name, "state", buffer);
1648 mpsslog("%s restarting mic %d\n",
1649 mic->name, mic->restart);
1654 while (block_till_state_change(fd, mic)) {
1655 mpsslog("%s: %s %d block_till_state_change error %s\n",
1656 mic->name, __func__, __LINE__, strerror(errno));
1661 if (get_mic_state(mic) == MIC_BOOTING)
1668 add_virtio_device(mic, &virtcons_dev_page.dd);
1669 add_virtio_device(mic, &virtnet_dev_page.dd);
1670 err = pthread_create(&mic->mic_console.console_thread, NULL,
1671 virtio_console, mic);
1673 mpsslog("%s virtcons pthread_create failed %s\n",
1674 mic->name, strerror(err));
1675 err = pthread_create(&mic->mic_net.net_thread, NULL,
1678 mpsslog("%s virtnet pthread_create failed %s\n",
1679 mic->name, strerror(err));
1680 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1683 mpsslog("%s virtblk pthread_create failed %s\n",
1684 mic->name, strerror(err));
1685 sigemptyset(&act.sa_mask);
1686 err = sigaction(SIGUSR1, &act, NULL);
1688 mpsslog("%s sigaction SIGUSR1 failed %s\n",
1689 mic->name, strerror(errno));
1693 mpsslog("fork failed MIC name %s id %d errno %d\n",
1694 mic->name, mic->id, errno);
1697 err = pthread_create(&mic->config_thread, NULL,
1700 mpsslog("%s mic_config pthread_create failed %s\n",
1701 mic->name, strerror(err));
1710 struct mic_info *mic;
1713 for (mic = mic_list.next; mic; mic = mic->next) {
1714 set_mic_boot_params(mic);
1715 err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1717 mpsslog("%s init_mic pthread_create failed %s\n",
1718 mic->name, strerror(err));
1728 struct mic_info *mic = &mic_list;
1729 struct dirent *file;
1733 dp = opendir(MICSYSFSDIR);
1737 while ((file = readdir(dp)) != NULL) {
1738 if (!strncmp(file->d_name, "mic", 3)) {
1739 mic->next = calloc(1, sizeof(struct mic_info));
1742 mic->id = atoi(&file->d_name[3]);
1743 mic->name = malloc(strlen(file->d_name) + 16);
1745 strcpy(mic->name, file->d_name);
1746 mpsslog("MIC name %s id %d\n", mic->name,
1758 mpsslog(char *format, ...)
1768 va_start(args, format);
1769 vsprintf(buffer, format, args);
1773 ts1 = ctime_r(&t, ts);
1774 ts1[strlen(ts1) - 1] = '\0';
1775 fprintf(logfp, "%s: %s", ts1, buffer);
1781 main(int argc, char *argv[])
1788 logfp = fopen(LOGFILE_NAME, "a+");
1790 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1803 mpsslog("MIC Daemon start\n");
1805 cnt = init_mic_list();
1807 mpsslog("MIC module not loaded\n");
1810 mpsslog("MIC found %d devices\n", cnt);