2 * Freescale Hypervisor Management Driver
4 * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
5 * Author: Timur Tabi <timur@freescale.com>
7 * This file is licensed under the terms of the GNU General Public License
8 * version 2. This program is licensed "as is" without any warranty of any
9 * kind, whether express or implied.
11 * The Freescale hypervisor management driver provides several services to
12 * drivers and applications related to the Freescale hypervisor:
14 * 1. An ioctl interface for querying and managing partitions.
16 * 2. A file interface to reading incoming doorbells.
18 * 3. An interrupt handler for shutting down the partition upon receiving the
19 * shutdown doorbell from a manager partition.
21 * 4. A kernel interface for receiving callbacks when a managed partition
25 #include <linux/kernel.h>
26 #include <linux/module.h>
27 #include <linux/init.h>
28 #include <linux/types.h>
29 #include <linux/err.h>
31 #include <linux/miscdevice.h>
33 #include <linux/pagemap.h>
34 #include <linux/slab.h>
35 #include <linux/poll.h>
37 #include <linux/of_irq.h>
38 #include <linux/reboot.h>
39 #include <linux/uaccess.h>
40 #include <linux/notifier.h>
41 #include <linux/interrupt.h>
44 #include <asm/fsl_hcalls.h>
46 #include <linux/fsl_hypervisor.h>
48 static BLOCKING_NOTIFIER_HEAD(failover_subscribers);
51 * Ioctl interface for FSL_HV_IOCTL_PARTITION_RESTART
53 * Restart a running partition
55 static long ioctl_restart(struct fsl_hv_ioctl_restart __user *p)
57 struct fsl_hv_ioctl_restart param;
59 /* Get the parameters from the user */
60 if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_restart)))
63 param.ret = fh_partition_restart(param.partition);
65 if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32)))
72 * Ioctl interface for FSL_HV_IOCTL_PARTITION_STATUS
74 * Query the status of a partition
76 static long ioctl_status(struct fsl_hv_ioctl_status __user *p)
78 struct fsl_hv_ioctl_status param;
81 /* Get the parameters from the user */
82 if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_status)))
85 param.ret = fh_partition_get_status(param.partition, &status);
87 param.status = status;
89 if (copy_to_user(p, ¶m, sizeof(struct fsl_hv_ioctl_status)))
96 * Ioctl interface for FSL_HV_IOCTL_PARTITION_START
98 * Start a stopped partition.
100 static long ioctl_start(struct fsl_hv_ioctl_start __user *p)
102 struct fsl_hv_ioctl_start param;
104 /* Get the parameters from the user */
105 if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_start)))
108 param.ret = fh_partition_start(param.partition, param.entry_point,
111 if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32)))
118 * Ioctl interface for FSL_HV_IOCTL_PARTITION_STOP
120 * Stop a running partition
122 static long ioctl_stop(struct fsl_hv_ioctl_stop __user *p)
124 struct fsl_hv_ioctl_stop param;
126 /* Get the parameters from the user */
127 if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_stop)))
130 param.ret = fh_partition_stop(param.partition);
132 if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32)))
139 * Ioctl interface for FSL_HV_IOCTL_MEMCPY
141 * The FH_MEMCPY hypercall takes an array of address/address/size structures
142 * to represent the data being copied. As a convenience to the user, this
143 * ioctl takes a user-create buffer and a pointer to a guest physically
144 * contiguous buffer in the remote partition, and creates the
145 * address/address/size array for the hypercall.
147 static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
149 struct fsl_hv_ioctl_memcpy param;
151 struct page **pages = NULL;
152 void *sg_list_unaligned = NULL;
153 struct fh_sg_list *sg_list = NULL;
155 unsigned int num_pages;
156 unsigned long lb_offset; /* Offset within a page of the local buffer */
160 int num_pinned = 0; /* return value from get_user_pages_fast() */
161 phys_addr_t remote_paddr; /* The next address in the remote buffer */
162 uint32_t count; /* The number of bytes left to copy */
164 /* Get the parameters from the user */
165 if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_memcpy)))
169 * One partition must be local, the other must be remote. In other
170 * words, if source and target are both -1, or are both not -1, then
173 if ((param.source == -1) == (param.target == -1))
177 * The array of pages returned by get_user_pages_fast() covers only
178 * page-aligned memory. Since the user buffer is probably not
179 * page-aligned, we need to handle the discrepancy.
181 * We calculate the offset within a page of the S/G list, and make
182 * adjustments accordingly. This will result in a page list that looks
185 * ---- <-- first page starts before the buffer
206 * | | <-- last page ends after the buffer
209 * The distance between the start of the first page and the start of the
210 * buffer is lb_offset. The hashed (///) areas are the parts of the
211 * page list that contain the actual buffer.
213 * The advantage of this approach is that the number of pages is
214 * equal to the number of entries in the S/G list that we give to the
217 lb_offset = param.local_vaddr & (PAGE_SIZE - 1);
218 if (param.count == 0 ||
219 param.count > U64_MAX - lb_offset - PAGE_SIZE + 1)
221 num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
223 /* Allocate the buffers we need */
226 * 'pages' is an array of struct page pointers that's initialized by
227 * get_user_pages_fast().
229 pages = kzalloc(num_pages * sizeof(struct page *), GFP_KERNEL);
231 pr_debug("fsl-hv: could not allocate page list\n");
236 * sg_list is the list of fh_sg_list objects that we pass to the
239 sg_list_unaligned = kmalloc(num_pages * sizeof(struct fh_sg_list) +
240 sizeof(struct fh_sg_list) - 1, GFP_KERNEL);
241 if (!sg_list_unaligned) {
242 pr_debug("fsl-hv: could not allocate S/G list\n");
246 sg_list = PTR_ALIGN(sg_list_unaligned, sizeof(struct fh_sg_list));
248 /* Get the physical addresses of the source buffer */
249 down_read(¤t->mm->mmap_sem);
250 num_pinned = get_user_pages(param.local_vaddr - lb_offset,
251 num_pages, (param.source == -1) ? 0 : FOLL_WRITE,
253 up_read(¤t->mm->mmap_sem);
255 if (num_pinned != num_pages) {
256 pr_debug("fsl-hv: could not lock source buffer\n");
257 ret = (num_pinned < 0) ? num_pinned : -EFAULT;
262 * Build the fh_sg_list[] array. The first page is special
263 * because it's misaligned.
265 if (param.source == -1) {
266 sg_list[0].source = page_to_phys(pages[0]) + lb_offset;
267 sg_list[0].target = param.remote_paddr;
269 sg_list[0].source = param.remote_paddr;
270 sg_list[0].target = page_to_phys(pages[0]) + lb_offset;
272 sg_list[0].size = min_t(uint64_t, param.count, PAGE_SIZE - lb_offset);
274 remote_paddr = param.remote_paddr + sg_list[0].size;
275 count = param.count - sg_list[0].size;
277 for (i = 1; i < num_pages; i++) {
278 if (param.source == -1) {
279 /* local to remote */
280 sg_list[i].source = page_to_phys(pages[i]);
281 sg_list[i].target = remote_paddr;
283 /* remote to local */
284 sg_list[i].source = remote_paddr;
285 sg_list[i].target = page_to_phys(pages[i]);
287 sg_list[i].size = min_t(uint64_t, count, PAGE_SIZE);
289 remote_paddr += sg_list[i].size;
290 count -= sg_list[i].size;
293 param.ret = fh_partition_memcpy(param.source, param.target,
294 virt_to_phys(sg_list), num_pages);
297 if (pages && (num_pinned > 0)) {
298 for (i = 0; i < num_pinned; i++)
302 kfree(sg_list_unaligned);
307 if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32)))
314 * Ioctl interface for FSL_HV_IOCTL_DOORBELL
318 static long ioctl_doorbell(struct fsl_hv_ioctl_doorbell __user *p)
320 struct fsl_hv_ioctl_doorbell param;
322 /* Get the parameters from the user. */
323 if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_doorbell)))
326 param.ret = ev_doorbell_send(param.doorbell);
328 if (copy_to_user(&p->ret, ¶m.ret, sizeof(__u32)))
334 static long ioctl_dtprop(struct fsl_hv_ioctl_prop __user *p, int set)
336 struct fsl_hv_ioctl_prop param;
337 char __user *upath, *upropname;
338 void __user *upropval;
339 char *path, *propname;
343 /* Get the parameters from the user. */
344 if (copy_from_user(¶m, p, sizeof(struct fsl_hv_ioctl_prop)))
347 upath = (char __user *)(uintptr_t)param.path;
348 upropname = (char __user *)(uintptr_t)param.propname;
349 upropval = (void __user *)(uintptr_t)param.propval;
351 path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN);
353 return PTR_ERR(path);
355 propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN);
356 if (IS_ERR(propname)) {
357 ret = PTR_ERR(propname);
361 if (param.proplen > FH_DTPROP_MAX_PROPLEN) {
363 goto err_free_propname;
366 propval = kmalloc(param.proplen, GFP_KERNEL);
369 goto err_free_propname;
373 if (copy_from_user(propval, upropval, param.proplen)) {
375 goto err_free_propval;
378 param.ret = fh_partition_set_dtprop(param.handle,
380 virt_to_phys(propname),
381 virt_to_phys(propval),
384 param.ret = fh_partition_get_dtprop(param.handle,
386 virt_to_phys(propname),
387 virt_to_phys(propval),
390 if (param.ret == 0) {
391 if (copy_to_user(upropval, propval, param.proplen) ||
392 put_user(param.proplen, &p->proplen)) {
394 goto err_free_propval;
399 if (put_user(param.ret, &p->ret))
413 * Ioctl main entry point
415 static long fsl_hv_ioctl(struct file *file, unsigned int cmd,
416 unsigned long argaddr)
418 void __user *arg = (void __user *)argaddr;
422 case FSL_HV_IOCTL_PARTITION_RESTART:
423 ret = ioctl_restart(arg);
425 case FSL_HV_IOCTL_PARTITION_GET_STATUS:
426 ret = ioctl_status(arg);
428 case FSL_HV_IOCTL_PARTITION_START:
429 ret = ioctl_start(arg);
431 case FSL_HV_IOCTL_PARTITION_STOP:
432 ret = ioctl_stop(arg);
434 case FSL_HV_IOCTL_MEMCPY:
435 ret = ioctl_memcpy(arg);
437 case FSL_HV_IOCTL_DOORBELL:
438 ret = ioctl_doorbell(arg);
440 case FSL_HV_IOCTL_GETPROP:
441 ret = ioctl_dtprop(arg, 0);
443 case FSL_HV_IOCTL_SETPROP:
444 ret = ioctl_dtprop(arg, 1);
447 pr_debug("fsl-hv: bad ioctl dir=%u type=%u cmd=%u size=%u\n",
448 _IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd),
456 /* Linked list of processes that have us open */
457 static struct list_head db_list;
459 /* spinlock for db_list */
460 static DEFINE_SPINLOCK(db_list_lock);
462 /* The size of the doorbell event queue. This must be a power of two. */
465 /* Returns the next head/tail pointer, wrapping around the queue if necessary */
466 #define nextp(x) (((x) + 1) & (QSIZE - 1))
468 /* Per-open data structure */
469 struct doorbell_queue {
470 struct list_head list;
472 wait_queue_head_t wait;
478 /* Linked list of ISRs that we registered */
479 struct list_head isr_list;
481 /* Per-ISR data structure */
482 struct doorbell_isr {
483 struct list_head list;
485 uint32_t doorbell; /* The doorbell handle */
486 uint32_t partition; /* The partition handle, if used */
490 * Add a doorbell to all of the doorbell queues
492 static void fsl_hv_queue_doorbell(uint32_t doorbell)
494 struct doorbell_queue *dbq;
497 /* Prevent another core from modifying db_list */
498 spin_lock_irqsave(&db_list_lock, flags);
500 list_for_each_entry(dbq, &db_list, list) {
501 if (dbq->head != nextp(dbq->tail)) {
502 dbq->q[dbq->tail] = doorbell;
504 * This memory barrier eliminates the need to grab
505 * the spinlock for dbq.
508 dbq->tail = nextp(dbq->tail);
509 wake_up_interruptible(&dbq->wait);
513 spin_unlock_irqrestore(&db_list_lock, flags);
517 * Interrupt handler for all doorbells
519 * We use the same interrupt handler for all doorbells. Whenever a doorbell
520 * is rung, and we receive an interrupt, we just put the handle for that
521 * doorbell (passed to us as *data) into all of the queues.
523 static irqreturn_t fsl_hv_isr(int irq, void *data)
525 fsl_hv_queue_doorbell((uintptr_t) data);
531 * State change thread function
533 * The state change notification arrives in an interrupt, but we can't call
534 * blocking_notifier_call_chain() in an interrupt handler. We could call
535 * atomic_notifier_call_chain(), but that would require the clients' call-back
536 * function to run in interrupt context. Since we don't want to impose that
537 * restriction on the clients, we use a threaded IRQ to process the
538 * notification in kernel context.
540 static irqreturn_t fsl_hv_state_change_thread(int irq, void *data)
542 struct doorbell_isr *dbisr = data;
544 blocking_notifier_call_chain(&failover_subscribers, dbisr->partition,
551 * Interrupt handler for state-change doorbells
553 static irqreturn_t fsl_hv_state_change_isr(int irq, void *data)
556 struct doorbell_isr *dbisr = data;
559 /* It's still a doorbell, so add it to all the queues. */
560 fsl_hv_queue_doorbell(dbisr->doorbell);
562 /* Determine the new state, and if it's stopped, notify the clients. */
563 ret = fh_partition_get_status(dbisr->partition, &status);
564 if (!ret && (status == FH_PARTITION_STOPPED))
565 return IRQ_WAKE_THREAD;
571 * Returns a bitmask indicating whether a read will block
573 static unsigned int fsl_hv_poll(struct file *filp, struct poll_table_struct *p)
575 struct doorbell_queue *dbq = filp->private_data;
579 spin_lock_irqsave(&dbq->lock, flags);
581 poll_wait(filp, &dbq->wait, p);
582 mask = (dbq->head == dbq->tail) ? 0 : (POLLIN | POLLRDNORM);
584 spin_unlock_irqrestore(&dbq->lock, flags);
590 * Return the handles for any incoming doorbells
592 * If there are doorbell handles in the queue for this open instance, then
593 * return them to the caller as an array of 32-bit integers. Otherwise,
594 * block until there is at least one handle to return.
596 static ssize_t fsl_hv_read(struct file *filp, char __user *buf, size_t len,
599 struct doorbell_queue *dbq = filp->private_data;
600 uint32_t __user *p = (uint32_t __user *) buf; /* for put_user() */
604 /* Make sure we stop when the user buffer is full. */
605 while (len >= sizeof(uint32_t)) {
606 uint32_t dbell; /* Local copy of doorbell queue data */
608 spin_lock_irqsave(&dbq->lock, flags);
611 * If the queue is empty, then either we're done or we need
612 * to block. If the application specified O_NONBLOCK, then
613 * we return the appropriate error code.
615 if (dbq->head == dbq->tail) {
616 spin_unlock_irqrestore(&dbq->lock, flags);
619 if (filp->f_flags & O_NONBLOCK)
621 if (wait_event_interruptible(dbq->wait,
622 dbq->head != dbq->tail))
628 * Even though we have an smp_wmb() in the ISR, the core
629 * might speculatively execute the "dbell = ..." below while
630 * it's evaluating the if-statement above. In that case, the
631 * value put into dbell could be stale if the core accepts the
632 * speculation. To prevent that, we need a read memory barrier
637 /* Copy the data to a temporary local buffer, because
638 * we can't call copy_to_user() from inside a spinlock
640 dbell = dbq->q[dbq->head];
641 dbq->head = nextp(dbq->head);
643 spin_unlock_irqrestore(&dbq->lock, flags);
645 if (put_user(dbell, p))
648 count += sizeof(uint32_t);
649 len -= sizeof(uint32_t);
656 * Open the driver and prepare for reading doorbells.
658 * Every time an application opens the driver, we create a doorbell queue
659 * for that file handle. This queue is used for any incoming doorbells.
661 static int fsl_hv_open(struct inode *inode, struct file *filp)
663 struct doorbell_queue *dbq;
667 dbq = kzalloc(sizeof(struct doorbell_queue), GFP_KERNEL);
669 pr_err("fsl-hv: out of memory\n");
673 spin_lock_init(&dbq->lock);
674 init_waitqueue_head(&dbq->wait);
676 spin_lock_irqsave(&db_list_lock, flags);
677 list_add(&dbq->list, &db_list);
678 spin_unlock_irqrestore(&db_list_lock, flags);
680 filp->private_data = dbq;
688 static int fsl_hv_close(struct inode *inode, struct file *filp)
690 struct doorbell_queue *dbq = filp->private_data;
695 spin_lock_irqsave(&db_list_lock, flags);
696 list_del(&dbq->list);
697 spin_unlock_irqrestore(&db_list_lock, flags);
704 static const struct file_operations fsl_hv_fops = {
705 .owner = THIS_MODULE,
707 .release = fsl_hv_close,
710 .unlocked_ioctl = fsl_hv_ioctl,
711 .compat_ioctl = fsl_hv_ioctl,
714 static struct miscdevice fsl_hv_misc_dev = {
720 static irqreturn_t fsl_hv_shutdown_isr(int irq, void *data)
722 orderly_poweroff(false);
728 * Returns the handle of the parent of the given node
730 * The handle is the value of the 'hv-handle' property
732 static int get_parent_handle(struct device_node *np)
734 struct device_node *parent;
735 const uint32_t *prop;
739 parent = of_get_parent(np);
741 /* It's not really possible for this to fail */
745 * The proper name for the handle property is "hv-handle", but some
746 * older versions of the hypervisor used "reg".
748 prop = of_get_property(parent, "hv-handle", &len);
750 prop = of_get_property(parent, "reg", &len);
752 if (!prop || (len != sizeof(uint32_t))) {
753 /* This can happen only if the node is malformed */
758 handle = be32_to_cpup(prop);
765 * Register a callback for failover events
767 * This function is called by device drivers to register their callback
768 * functions for fail-over events.
770 int fsl_hv_failover_register(struct notifier_block *nb)
772 return blocking_notifier_chain_register(&failover_subscribers, nb);
774 EXPORT_SYMBOL(fsl_hv_failover_register);
777 * Unregister a callback for failover events
779 int fsl_hv_failover_unregister(struct notifier_block *nb)
781 return blocking_notifier_chain_unregister(&failover_subscribers, nb);
783 EXPORT_SYMBOL(fsl_hv_failover_unregister);
786 * Return TRUE if we're running under FSL hypervisor
788 * This function checks to see if we're running under the Freescale
789 * hypervisor, and returns zero if we're not, or non-zero if we are.
791 * First, it checks if MSR[GS]==1, which means we're running under some
792 * hypervisor. Then it checks if there is a hypervisor node in the device
793 * tree. Currently, that means there needs to be a node in the root called
794 * "hypervisor" and which has a property named "fsl,hv-version".
796 static int has_fsl_hypervisor(void)
798 struct device_node *node;
801 node = of_find_node_by_path("/hypervisor");
805 ret = of_find_property(node, "fsl,hv-version", NULL) != NULL;
813 * Freescale hypervisor management driver init
815 * This function is called when this module is loaded.
817 * Register ourselves as a miscellaneous driver. This will register the
818 * fops structure and create the right sysfs entries for udev.
820 static int __init fsl_hypervisor_init(void)
822 struct device_node *np;
823 struct doorbell_isr *dbisr, *n;
826 pr_info("Freescale hypervisor management driver\n");
828 if (!has_fsl_hypervisor()) {
829 pr_info("fsl-hv: no hypervisor found\n");
833 ret = misc_register(&fsl_hv_misc_dev);
835 pr_err("fsl-hv: cannot register device\n");
839 INIT_LIST_HEAD(&db_list);
840 INIT_LIST_HEAD(&isr_list);
842 for_each_compatible_node(np, NULL, "epapr,hv-receive-doorbell") {
844 const uint32_t *handle;
846 handle = of_get_property(np, "interrupts", NULL);
847 irq = irq_of_parse_and_map(np, 0);
848 if (!handle || (irq == NO_IRQ)) {
849 pr_err("fsl-hv: no 'interrupts' property in %s node\n",
854 dbisr = kzalloc(sizeof(*dbisr), GFP_KERNEL);
859 dbisr->doorbell = be32_to_cpup(handle);
861 if (of_device_is_compatible(np, "fsl,hv-shutdown-doorbell")) {
862 /* The shutdown doorbell gets its own ISR */
863 ret = request_irq(irq, fsl_hv_shutdown_isr, 0,
865 } else if (of_device_is_compatible(np,
866 "fsl,hv-state-change-doorbell")) {
868 * The state change doorbell triggers a notification if
869 * the state of the managed partition changes to
870 * "stopped". We need a separate interrupt handler for
871 * that, and we also need to know the handle of the
872 * target partition, not just the handle of the
875 dbisr->partition = ret = get_parent_handle(np);
877 pr_err("fsl-hv: node %s has missing or "
878 "malformed parent\n", np->full_name);
882 ret = request_threaded_irq(irq, fsl_hv_state_change_isr,
883 fsl_hv_state_change_thread,
886 ret = request_irq(irq, fsl_hv_isr, 0, np->name, dbisr);
889 pr_err("fsl-hv: could not request irq %u for node %s\n",
895 list_add(&dbisr->list, &isr_list);
897 pr_info("fsl-hv: registered handler for doorbell %u\n",
904 list_for_each_entry_safe(dbisr, n, &isr_list, list) {
905 free_irq(dbisr->irq, dbisr);
906 list_del(&dbisr->list);
910 misc_deregister(&fsl_hv_misc_dev);
916 * Freescale hypervisor management driver termination
918 * This function is called when this driver is unloaded.
920 static void __exit fsl_hypervisor_exit(void)
922 struct doorbell_isr *dbisr, *n;
924 list_for_each_entry_safe(dbisr, n, &isr_list, list) {
925 free_irq(dbisr->irq, dbisr);
926 list_del(&dbisr->list);
930 misc_deregister(&fsl_hv_misc_dev);
933 module_init(fsl_hypervisor_init);
934 module_exit(fsl_hypervisor_exit);
936 MODULE_AUTHOR("Timur Tabi <timur@freescale.com>");
937 MODULE_DESCRIPTION("Freescale hypervisor management driver");
938 MODULE_LICENSE("GPL v2");