1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2020 Intel Corporation
4 * Author: Johannes Berg <johannes@sipsolutions.net>
6 #include <linux/module.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_config.h>
10 #include <linux/logic_iomem.h>
11 #include <linux/irqdomain.h>
12 #include <linux/virtio_pcidev.h>
13 #include <linux/virtio-uml.h>
14 #include <linux/delay.h>
15 #include <linux/msi.h>
16 #include <asm/unaligned.h>
20 #define MAX_MSI_VECTORS 32
21 #define CFG_SPACE_SIZE 4096
23 /* for MSI-X we have a 32-bit payload */
24 #define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
25 #define NUM_IRQ_MSGS 10
27 #define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1))
28 #define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1)
30 struct um_pci_device {
31 struct virtio_device *vdev;
33 /* for now just standard BARs */
34 u8 resptr[PCI_STD_NUM_BARS];
36 struct virtqueue *cmd_vq, *irq_vq;
38 #define UM_PCI_STAT_WAITING 0
44 struct um_pci_device_reg {
45 struct um_pci_device *dev;
49 static struct pci_host_bridge *bridge;
50 static DEFINE_MUTEX(um_pci_mtx);
51 static struct um_pci_device_reg um_pci_devices[MAX_DEVICES];
52 static struct fwnode_handle *um_pci_fwnode;
53 static struct irq_domain *um_pci_inner_domain;
54 static struct irq_domain *um_pci_msi_domain;
55 static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
57 #define UM_VIRT_PCI_MAXDELAY 40000
59 struct um_pci_message_buffer {
60 struct virtio_pcidev_msg hdr;
64 static struct um_pci_message_buffer __percpu *um_pci_msg_bufs;
66 static int um_pci_send_cmd(struct um_pci_device *dev,
67 struct virtio_pcidev_msg *cmd,
68 unsigned int cmd_size,
69 const void *extra, unsigned int extra_size,
70 void *out, unsigned int out_size)
72 struct scatterlist out_sg, extra_sg, in_sg;
73 struct scatterlist *sgs_list[] = {
75 [1] = extra ? &extra_sg : &in_sg,
76 [2] = extra ? &in_sg : NULL,
78 struct um_pci_message_buffer *buf;
83 if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
87 case VIRTIO_PCIDEV_OP_CFG_WRITE:
88 case VIRTIO_PCIDEV_OP_MMIO_WRITE:
89 case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
90 /* in PCI, writes are posted, so don't wait */
99 buf = get_cpu_var(um_pci_msg_bufs);
101 memcpy(buf, cmd, cmd_size);
104 u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
107 memcpy(ncmd, cmd, cmd_size);
109 memcpy(ncmd + cmd_size, extra, extra_size);
111 cmd_size += extra_size;
115 /* try without allocating memory */
123 sg_init_one(&out_sg, cmd, cmd_size);
125 sg_init_one(&extra_sg, extra, extra_size);
127 sg_init_one(&in_sg, out, out_size);
129 /* add to internal virtio queue */
130 ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
133 posted ? cmd : HANDLE_NO_FREE(cmd),
142 virtqueue_kick(dev->cmd_vq);
147 /* kick and poll for getting a response on the queue */
148 set_bit(UM_PCI_STAT_WAITING, &dev->status);
149 virtqueue_kick(dev->cmd_vq);
152 void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
154 if (completed == HANDLE_NO_FREE(cmd))
157 if (completed && !HANDLE_IS_NO_FREE(completed))
160 if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
161 ++delay_count > UM_VIRT_PCI_MAXDELAY,
162 "um virt-pci delay: %d", delay_count)) {
168 clear_bit(UM_PCI_STAT_WAITING, &dev->status);
171 put_cpu_var(um_pci_msg_bufs);
175 static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
178 struct um_pci_device_reg *reg = priv;
179 struct um_pci_device *dev = reg->dev;
180 struct virtio_pcidev_msg hdr = {
181 .op = VIRTIO_PCIDEV_OP_CFG_READ,
185 /* buf->data is maximum size - we may only use parts of it */
186 struct um_pci_message_buffer *buf;
188 unsigned long ret = ULONG_MAX;
189 size_t bytes = sizeof(buf->data);
194 buf = get_cpu_var(um_pci_msg_bufs);
198 memset(data, 0xff, bytes);
209 WARN(1, "invalid config space read size %d\n", size);
213 if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes))
221 ret = le16_to_cpup((void *)data);
224 ret = le32_to_cpup((void *)data);
228 ret = le64_to_cpup((void *)data);
236 put_cpu_var(um_pci_msg_bufs);
240 static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
243 struct um_pci_device_reg *reg = priv;
244 struct um_pci_device *dev = reg->dev;
246 struct virtio_pcidev_msg hdr;
247 /* maximum size - we may only use parts of it */
251 .op = VIRTIO_PCIDEV_OP_CFG_WRITE,
262 msg.data[0] = (u8)val;
265 put_unaligned_le16(val, (void *)msg.data);
268 put_unaligned_le32(val, (void *)msg.data);
272 put_unaligned_le64(val, (void *)msg.data);
276 WARN(1, "invalid config space write size %d\n", size);
280 WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
283 static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
284 .read = um_pci_cfgspace_read,
285 .write = um_pci_cfgspace_write,
288 static void um_pci_bar_copy_from(void *priv, void *buffer,
289 unsigned int offset, int size)
292 struct um_pci_device *dev = container_of(resptr - *resptr,
293 struct um_pci_device,
295 struct virtio_pcidev_msg hdr = {
296 .op = VIRTIO_PCIDEV_OP_MMIO_READ,
302 memset(buffer, 0xff, size);
304 um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
307 static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
310 /* buf->data is maximum size - we may only use parts of it */
311 struct um_pci_message_buffer *buf;
313 unsigned long ret = ULONG_MAX;
315 buf = get_cpu_var(um_pci_msg_bufs);
327 WARN(1, "invalid config space read size %d\n", size);
331 um_pci_bar_copy_from(priv, data, offset, size);
338 ret = le16_to_cpup((void *)data);
341 ret = le32_to_cpup((void *)data);
345 ret = le64_to_cpup((void *)data);
353 put_cpu_var(um_pci_msg_bufs);
357 static void um_pci_bar_copy_to(void *priv, unsigned int offset,
358 const void *buffer, int size)
361 struct um_pci_device *dev = container_of(resptr - *resptr,
362 struct um_pci_device,
364 struct virtio_pcidev_msg hdr = {
365 .op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
371 um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
374 static void um_pci_bar_write(void *priv, unsigned int offset, int size,
377 /* maximum size - we may only use parts of it */
385 put_unaligned_le16(val, (void *)data);
388 put_unaligned_le32(val, (void *)data);
392 put_unaligned_le64(val, (void *)data);
396 WARN(1, "invalid config space write size %d\n", size);
400 um_pci_bar_copy_to(priv, offset, data, size);
403 static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
406 struct um_pci_device *dev = container_of(resptr - *resptr,
407 struct um_pci_device,
410 struct virtio_pcidev_msg hdr;
414 .op = VIRTIO_PCIDEV_OP_CFG_WRITE,
422 um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
425 static const struct logic_iomem_ops um_pci_device_bar_ops = {
426 .read = um_pci_bar_read,
427 .write = um_pci_bar_write,
428 .set = um_pci_bar_set,
429 .copy_from = um_pci_bar_copy_from,
430 .copy_to = um_pci_bar_copy_to,
433 static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
436 struct um_pci_device_reg *dev;
437 unsigned int busn = bus->number;
442 /* not allowing functions for now ... */
446 if (devfn / 8 >= ARRAY_SIZE(um_pci_devices))
449 dev = &um_pci_devices[devfn / 8];
453 return (void __iomem *)((unsigned long)dev->iomem + where);
456 static struct pci_ops um_pci_ops = {
457 .map_bus = um_pci_map_bus,
458 .read = pci_generic_config_read,
459 .write = pci_generic_config_write,
462 static void um_pci_rescan(void)
464 pci_lock_rescan_remove();
465 pci_rescan_bus(bridge->bus);
466 pci_unlock_rescan_remove();
469 static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
471 struct scatterlist sg[1];
473 sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
474 if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
480 static void um_pci_handle_irq_message(struct virtqueue *vq,
481 struct virtio_pcidev_msg *msg)
483 struct virtio_device *vdev = vq->vdev;
484 struct um_pci_device *dev = vdev->priv;
486 /* we should properly chain interrupts, but on ARCH=um we don't care */
489 case VIRTIO_PCIDEV_OP_INT:
490 generic_handle_irq(dev->irq);
492 case VIRTIO_PCIDEV_OP_MSI:
493 /* our MSI message is just the interrupt number */
494 if (msg->size == sizeof(u32))
495 generic_handle_irq(le32_to_cpup((void *)msg->data));
497 generic_handle_irq(le16_to_cpup((void *)msg->data));
499 case VIRTIO_PCIDEV_OP_PME:
500 /* nothing to do - we already woke up due to the message */
503 dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
508 static void um_pci_cmd_vq_cb(struct virtqueue *vq)
510 struct virtio_device *vdev = vq->vdev;
511 struct um_pci_device *dev = vdev->priv;
515 if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
518 while ((cmd = virtqueue_get_buf(vq, &len))) {
519 if (WARN_ON(HANDLE_IS_NO_FREE(cmd)))
525 static void um_pci_irq_vq_cb(struct virtqueue *vq)
527 struct virtio_pcidev_msg *msg;
530 while ((msg = virtqueue_get_buf(vq, &len))) {
531 if (len >= sizeof(*msg))
532 um_pci_handle_irq_message(vq, msg);
534 /* recycle the message buffer */
535 um_pci_irq_vq_addbuf(vq, msg, true);
539 static int um_pci_init_vqs(struct um_pci_device *dev)
541 struct virtqueue *vqs[2];
542 static const char *const names[2] = { "cmd", "irq" };
543 vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb };
546 err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL);
550 dev->cmd_vq = vqs[0];
551 dev->irq_vq = vqs[1];
553 virtio_device_ready(dev->vdev);
555 for (i = 0; i < NUM_IRQ_MSGS; i++) {
556 void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
559 um_pci_irq_vq_addbuf(dev->irq_vq, msg, false);
562 virtqueue_kick(dev->irq_vq);
567 static int um_pci_virtio_probe(struct virtio_device *vdev)
569 struct um_pci_device *dev;
573 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
580 mutex_lock(&um_pci_mtx);
581 for (i = 0; i < MAX_DEVICES; i++) {
582 if (um_pci_devices[i].dev)
591 err = um_pci_init_vqs(dev);
595 dev->irq = irq_alloc_desc(numa_node_id());
600 um_pci_devices[free].dev = dev;
603 mutex_unlock(&um_pci_mtx);
605 device_set_wakeup_enable(&vdev->dev, true);
608 * In order to do suspend-resume properly, don't allow VQs
611 virtio_uml_set_no_vq_suspend(vdev, true);
616 virtio_reset_device(vdev);
617 vdev->config->del_vqs(vdev);
619 mutex_unlock(&um_pci_mtx);
624 static void um_pci_virtio_remove(struct virtio_device *vdev)
626 struct um_pci_device *dev = vdev->priv;
629 device_set_wakeup_enable(&vdev->dev, false);
631 mutex_lock(&um_pci_mtx);
632 for (i = 0; i < MAX_DEVICES; i++) {
633 if (um_pci_devices[i].dev != dev)
636 um_pci_devices[i].dev = NULL;
637 irq_free_desc(dev->irq);
641 mutex_unlock(&um_pci_mtx);
643 if (i < MAX_DEVICES) {
644 struct pci_dev *pci_dev;
646 pci_dev = pci_get_slot(bridge->bus, i);
648 pci_stop_and_remove_bus_device_locked(pci_dev);
651 /* Stop all virtqueues */
652 virtio_reset_device(vdev);
655 vdev->config->del_vqs(vdev);
660 static struct virtio_device_id id_table[] = {
661 { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
664 MODULE_DEVICE_TABLE(virtio, id_table);
666 static struct virtio_driver um_pci_virtio_driver = {
667 .driver.name = "virtio-pci",
668 .driver.owner = THIS_MODULE,
669 .id_table = id_table,
670 .probe = um_pci_virtio_probe,
671 .remove = um_pci_virtio_remove,
674 static struct resource virt_cfgspace_resource = {
675 .name = "PCI config space",
676 .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE,
677 .end = 0xf0000000 - 1,
678 .flags = IORESOURCE_MEM,
681 static long um_pci_map_cfgspace(unsigned long offset, size_t size,
682 const struct logic_iomem_ops **ops,
685 if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE))
688 if (offset / CFG_SPACE_SIZE < MAX_DEVICES) {
689 *ops = &um_pci_device_cfgspace_ops;
690 *priv = &um_pci_devices[offset / CFG_SPACE_SIZE];
694 WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size);
698 static const struct logic_iomem_region_ops um_pci_cfgspace_ops = {
699 .map = um_pci_map_cfgspace,
702 static struct resource virt_iomem_resource = {
706 .flags = IORESOURCE_MEM,
709 struct um_pci_map_iomem_data {
710 unsigned long offset;
712 const struct logic_iomem_ops **ops;
717 static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data)
719 struct um_pci_map_iomem_data *data = _data;
720 struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
721 struct um_pci_device *dev;
727 for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) {
728 struct resource *r = &pdev->resource[i];
730 if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM)
734 * must be the whole or part of the resource,
735 * not allowed to only overlap
737 if (data->offset < r->start || data->offset > r->end)
739 if (data->offset + data->size - 1 > r->end)
743 *data->ops = &um_pci_device_bar_ops;
745 *data->priv = &dev->resptr[i];
746 data->ret = data->offset - r->start;
748 /* no need to continue */
755 static long um_pci_map_iomem(unsigned long offset, size_t size,
756 const struct logic_iomem_ops **ops,
759 struct um_pci_map_iomem_data data = {
760 /* we want the full address here */
761 .offset = offset + virt_iomem_resource.start,
768 pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data);
772 static const struct logic_iomem_region_ops um_pci_iomem_ops = {
773 .map = um_pci_map_iomem,
776 static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
779 * This is a very low address and not actually valid 'physical' memory
780 * in UML, so we can simply map MSI(-X) vectors to there, it cannot be
781 * legitimately written to by the device in any other way.
782 * We use the (virtual) IRQ number here as the message to simplify the
783 * code that receives the message, where for now we simply trust the
784 * device to send the correct message.
787 msg->address_lo = 0xa0000;
788 msg->data = data->irq;
791 static struct irq_chip um_pci_msi_bottom_irq_chip = {
792 .name = "UM virtio MSI",
793 .irq_compose_msi_msg = um_pci_compose_msi_msg,
796 static int um_pci_inner_domain_alloc(struct irq_domain *domain,
797 unsigned int virq, unsigned int nr_irqs,
802 WARN_ON(nr_irqs != 1);
804 mutex_lock(&um_pci_mtx);
805 bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS);
806 if (bit >= MAX_MSI_VECTORS) {
807 mutex_unlock(&um_pci_mtx);
811 set_bit(bit, um_pci_msi_used);
812 mutex_unlock(&um_pci_mtx);
814 irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip,
815 domain->host_data, handle_simple_irq,
821 static void um_pci_inner_domain_free(struct irq_domain *domain,
822 unsigned int virq, unsigned int nr_irqs)
824 struct irq_data *d = irq_domain_get_irq_data(domain, virq);
826 mutex_lock(&um_pci_mtx);
828 if (!test_bit(d->hwirq, um_pci_msi_used))
829 pr_err("trying to free unused MSI#%lu\n", d->hwirq);
831 __clear_bit(d->hwirq, um_pci_msi_used);
833 mutex_unlock(&um_pci_mtx);
836 static const struct irq_domain_ops um_pci_inner_domain_ops = {
837 .alloc = um_pci_inner_domain_alloc,
838 .free = um_pci_inner_domain_free,
841 static struct irq_chip um_pci_msi_irq_chip = {
842 .name = "UM virtio PCIe MSI",
843 .irq_mask = pci_msi_mask_irq,
844 .irq_unmask = pci_msi_unmask_irq,
847 static struct msi_domain_info um_pci_msi_domain_info = {
848 .flags = MSI_FLAG_USE_DEF_DOM_OPS |
849 MSI_FLAG_USE_DEF_CHIP_OPS |
851 .chip = &um_pci_msi_irq_chip,
854 static struct resource busn_resource = {
858 .flags = IORESOURCE_BUS,
861 static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
863 struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
865 if (WARN_ON(!reg->dev))
868 /* Yes, we map all pins to the same IRQ ... doesn't matter for now. */
869 return reg->dev->irq;
872 void *pci_root_bus_fwnode(struct pci_bus *bus)
874 return um_pci_fwnode;
877 static int __init um_pci_init(void)
881 WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
882 &um_pci_cfgspace_ops));
883 WARN_ON(logic_iomem_add_region(&virt_iomem_resource,
886 if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
887 "No virtio device ID configured for PCI - no PCI support\n"))
890 um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer);
891 if (!um_pci_msg_bufs)
894 bridge = pci_alloc_host_bridge(0);
900 um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
901 if (!um_pci_fwnode) {
906 um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS,
908 &um_pci_inner_domain_ops, NULL);
909 if (!um_pci_inner_domain) {
914 um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode,
915 &um_pci_msi_domain_info,
916 um_pci_inner_domain);
917 if (!um_pci_msi_domain) {
922 pci_add_resource(&bridge->windows, &virt_iomem_resource);
923 pci_add_resource(&bridge->windows, &busn_resource);
924 bridge->ops = &um_pci_ops;
925 bridge->map_irq = um_pci_map_irq;
927 for (i = 0; i < MAX_DEVICES; i++) {
928 resource_size_t start;
930 start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE;
931 um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE);
932 if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) {
938 err = pci_host_probe(bridge);
942 err = register_virtio_driver(&um_pci_virtio_driver);
947 if (um_pci_inner_domain)
948 irq_domain_remove(um_pci_inner_domain);
950 irq_domain_free_fwnode(um_pci_fwnode);
952 pci_free_resource_list(&bridge->windows);
953 pci_free_host_bridge(bridge);
955 free_percpu(um_pci_msg_bufs);
958 module_init(um_pci_init);
960 static void __exit um_pci_exit(void)
962 unregister_virtio_driver(&um_pci_virtio_driver);
963 irq_domain_remove(um_pci_msi_domain);
964 irq_domain_remove(um_pci_inner_domain);
965 pci_free_resource_list(&bridge->windows);
966 pci_free_host_bridge(bridge);
967 free_percpu(um_pci_msg_bufs);
969 module_exit(um_pci_exit);