2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6ffffffffffffUL,
122 0x005effffffffffffUL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
190 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
194 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
196 goto out_debug_unreg;
199 /* Register floating interrupt controller interface. */
200 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
202 pr_err("Failed to register FLIC rc=%d\n", rc);
203 goto out_debug_unreg;
208 debug_unregister(kvm_s390_dbf);
212 void kvm_arch_exit(void)
214 debug_unregister(kvm_s390_dbf);
217 /* Section: device related */
218 long kvm_arch_dev_ioctl(struct file *filp,
219 unsigned int ioctl, unsigned long arg)
221 if (ioctl == KVM_S390_ENABLE_SIE)
222 return s390_enable_sie();
226 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
231 case KVM_CAP_S390_PSW:
232 case KVM_CAP_S390_GMAP:
233 case KVM_CAP_SYNC_MMU:
234 #ifdef CONFIG_KVM_S390_UCONTROL
235 case KVM_CAP_S390_UCONTROL:
237 case KVM_CAP_ASYNC_PF:
238 case KVM_CAP_SYNC_REGS:
239 case KVM_CAP_ONE_REG:
240 case KVM_CAP_ENABLE_CAP:
241 case KVM_CAP_S390_CSS_SUPPORT:
242 case KVM_CAP_IOEVENTFD:
243 case KVM_CAP_DEVICE_CTRL:
244 case KVM_CAP_ENABLE_CAP_VM:
245 case KVM_CAP_S390_IRQCHIP:
246 case KVM_CAP_VM_ATTRIBUTES:
247 case KVM_CAP_MP_STATE:
248 case KVM_CAP_S390_INJECT_IRQ:
249 case KVM_CAP_S390_USER_SIGP:
250 case KVM_CAP_S390_USER_STSI:
251 case KVM_CAP_S390_SKEYS:
252 case KVM_CAP_S390_IRQ_STATE:
255 case KVM_CAP_S390_MEM_OP:
258 case KVM_CAP_NR_VCPUS:
259 case KVM_CAP_MAX_VCPUS:
262 case KVM_CAP_NR_MEMSLOTS:
263 r = KVM_USER_MEM_SLOTS;
265 case KVM_CAP_S390_COW:
266 r = MACHINE_HAS_ESOP;
268 case KVM_CAP_S390_VECTOR_REGISTERS:
271 case KVM_CAP_S390_BPB:
272 r = test_facility(82);
280 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
281 struct kvm_memory_slot *memslot)
283 gfn_t cur_gfn, last_gfn;
284 unsigned long address;
285 struct gmap *gmap = kvm->arch.gmap;
287 down_read(&gmap->mm->mmap_sem);
288 /* Loop over all guest pages */
289 last_gfn = memslot->base_gfn + memslot->npages;
290 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
291 address = gfn_to_hva_memslot(memslot, cur_gfn);
293 if (gmap_test_and_clear_dirty(address, gmap))
294 mark_page_dirty(kvm, cur_gfn);
296 up_read(&gmap->mm->mmap_sem);
299 /* Section: vm related */
301 * Get (and clear) the dirty memory log for a memory slot.
303 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
304 struct kvm_dirty_log *log)
308 struct kvm_memslots *slots;
309 struct kvm_memory_slot *memslot;
312 if (kvm_is_ucontrol(kvm))
315 mutex_lock(&kvm->slots_lock);
318 if (log->slot >= KVM_USER_MEM_SLOTS)
321 slots = kvm_memslots(kvm);
322 memslot = id_to_memslot(slots, log->slot);
324 if (!memslot->dirty_bitmap)
327 kvm_s390_sync_dirty_log(kvm, memslot);
328 r = kvm_get_dirty_log(kvm, log, &is_dirty);
332 /* Clear the dirty log */
334 n = kvm_dirty_bitmap_bytes(memslot);
335 memset(memslot->dirty_bitmap, 0, n);
339 mutex_unlock(&kvm->slots_lock);
343 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
351 case KVM_CAP_S390_IRQCHIP:
352 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
353 kvm->arch.use_irqchip = 1;
356 case KVM_CAP_S390_USER_SIGP:
357 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
358 kvm->arch.user_sigp = 1;
361 case KVM_CAP_S390_VECTOR_REGISTERS:
362 mutex_lock(&kvm->lock);
363 if (atomic_read(&kvm->online_vcpus)) {
365 } else if (MACHINE_HAS_VX) {
366 set_kvm_facility(kvm->arch.model.fac->mask, 129);
367 set_kvm_facility(kvm->arch.model.fac->list, 129);
371 mutex_unlock(&kvm->lock);
372 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
373 r ? "(not available)" : "(success)");
375 case KVM_CAP_S390_USER_STSI:
376 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
377 kvm->arch.user_stsi = 1;
387 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
391 switch (attr->attr) {
392 case KVM_S390_VM_MEM_LIMIT_SIZE:
394 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
395 kvm->arch.gmap->asce_end);
396 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
406 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
410 switch (attr->attr) {
411 case KVM_S390_VM_MEM_ENABLE_CMMA:
412 /* enable CMMA only for z10 and later (EDAT_1) */
414 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
418 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
419 mutex_lock(&kvm->lock);
420 if (atomic_read(&kvm->online_vcpus) == 0) {
421 kvm->arch.use_cmma = 1;
424 mutex_unlock(&kvm->lock);
426 case KVM_S390_VM_MEM_CLR_CMMA:
428 if (!kvm->arch.use_cmma)
431 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
432 mutex_lock(&kvm->lock);
433 idx = srcu_read_lock(&kvm->srcu);
434 s390_reset_cmma(kvm->arch.gmap->mm);
435 srcu_read_unlock(&kvm->srcu, idx);
436 mutex_unlock(&kvm->lock);
439 case KVM_S390_VM_MEM_LIMIT_SIZE: {
440 unsigned long new_limit;
442 if (kvm_is_ucontrol(kvm))
445 if (get_user(new_limit, (u64 __user *)attr->addr))
448 if (new_limit > kvm->arch.gmap->asce_end)
452 mutex_lock(&kvm->lock);
453 if (atomic_read(&kvm->online_vcpus) == 0) {
454 /* gmap_alloc will round the limit up */
455 struct gmap *new = gmap_alloc(current->mm, new_limit);
460 gmap_free(kvm->arch.gmap);
462 kvm->arch.gmap = new;
466 mutex_unlock(&kvm->lock);
467 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
477 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
479 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
481 struct kvm_vcpu *vcpu;
484 if (!test_kvm_facility(kvm, 76))
487 mutex_lock(&kvm->lock);
488 switch (attr->attr) {
489 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
491 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
492 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
493 kvm->arch.crypto.aes_kw = 1;
494 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
496 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
498 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
499 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
500 kvm->arch.crypto.dea_kw = 1;
501 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
503 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
504 kvm->arch.crypto.aes_kw = 0;
505 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
506 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
507 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
509 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
510 kvm->arch.crypto.dea_kw = 0;
511 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
512 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
513 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
516 mutex_unlock(&kvm->lock);
520 kvm_for_each_vcpu(i, vcpu, kvm) {
521 kvm_s390_vcpu_crypto_setup(vcpu);
524 mutex_unlock(&kvm->lock);
528 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
532 if (copy_from_user(>od_high, (void __user *)attr->addr,
538 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
543 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
547 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
550 kvm_s390_set_tod_clock(kvm, gtod);
551 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
555 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
562 switch (attr->attr) {
563 case KVM_S390_VM_TOD_HIGH:
564 ret = kvm_s390_set_tod_high(kvm, attr);
566 case KVM_S390_VM_TOD_LOW:
567 ret = kvm_s390_set_tod_low(kvm, attr);
576 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
580 if (copy_to_user((void __user *)attr->addr, >od_high,
583 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
588 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
592 gtod = kvm_s390_get_tod_clock_fast(kvm);
593 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
595 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
600 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
607 switch (attr->attr) {
608 case KVM_S390_VM_TOD_HIGH:
609 ret = kvm_s390_get_tod_high(kvm, attr);
611 case KVM_S390_VM_TOD_LOW:
612 ret = kvm_s390_get_tod_low(kvm, attr);
621 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
623 struct kvm_s390_vm_cpu_processor *proc;
626 mutex_lock(&kvm->lock);
627 if (atomic_read(&kvm->online_vcpus)) {
631 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
636 if (!copy_from_user(proc, (void __user *)attr->addr,
638 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
639 sizeof(struct cpuid));
640 kvm->arch.model.ibc = proc->ibc;
641 memcpy(kvm->arch.model.fac->list, proc->fac_list,
642 S390_ARCH_FAC_LIST_SIZE_BYTE);
647 mutex_unlock(&kvm->lock);
651 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
655 switch (attr->attr) {
656 case KVM_S390_VM_CPU_PROCESSOR:
657 ret = kvm_s390_set_processor(kvm, attr);
663 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
665 struct kvm_s390_vm_cpu_processor *proc;
668 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
673 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
674 proc->ibc = kvm->arch.model.ibc;
675 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
676 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
683 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
685 struct kvm_s390_vm_cpu_machine *mach;
688 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
693 get_cpu_id((struct cpuid *) &mach->cpuid);
694 mach->ibc = sclp.ibc;
695 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
696 S390_ARCH_FAC_LIST_SIZE_BYTE);
697 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
698 S390_ARCH_FAC_LIST_SIZE_BYTE);
699 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
706 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
710 switch (attr->attr) {
711 case KVM_S390_VM_CPU_PROCESSOR:
712 ret = kvm_s390_get_processor(kvm, attr);
714 case KVM_S390_VM_CPU_MACHINE:
715 ret = kvm_s390_get_machine(kvm, attr);
721 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
725 switch (attr->group) {
726 case KVM_S390_VM_MEM_CTRL:
727 ret = kvm_s390_set_mem_control(kvm, attr);
729 case KVM_S390_VM_TOD:
730 ret = kvm_s390_set_tod(kvm, attr);
732 case KVM_S390_VM_CPU_MODEL:
733 ret = kvm_s390_set_cpu_model(kvm, attr);
735 case KVM_S390_VM_CRYPTO:
736 ret = kvm_s390_vm_set_crypto(kvm, attr);
746 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
750 switch (attr->group) {
751 case KVM_S390_VM_MEM_CTRL:
752 ret = kvm_s390_get_mem_control(kvm, attr);
754 case KVM_S390_VM_TOD:
755 ret = kvm_s390_get_tod(kvm, attr);
757 case KVM_S390_VM_CPU_MODEL:
758 ret = kvm_s390_get_cpu_model(kvm, attr);
768 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
772 switch (attr->group) {
773 case KVM_S390_VM_MEM_CTRL:
774 switch (attr->attr) {
775 case KVM_S390_VM_MEM_ENABLE_CMMA:
776 case KVM_S390_VM_MEM_CLR_CMMA:
777 case KVM_S390_VM_MEM_LIMIT_SIZE:
785 case KVM_S390_VM_TOD:
786 switch (attr->attr) {
787 case KVM_S390_VM_TOD_LOW:
788 case KVM_S390_VM_TOD_HIGH:
796 case KVM_S390_VM_CPU_MODEL:
797 switch (attr->attr) {
798 case KVM_S390_VM_CPU_PROCESSOR:
799 case KVM_S390_VM_CPU_MACHINE:
807 case KVM_S390_VM_CRYPTO:
808 switch (attr->attr) {
809 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
810 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
811 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
812 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
828 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
832 unsigned long curkey;
835 if (args->flags != 0)
838 /* Is this guest using storage keys? */
839 if (!mm_use_skey(current->mm))
840 return KVM_S390_GET_SKEYS_NONE;
842 /* Enforce sane limit on memory allocation */
843 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
846 keys = kmalloc_array(args->count, sizeof(uint8_t),
847 GFP_KERNEL | __GFP_NOWARN);
849 keys = vmalloc(sizeof(uint8_t) * args->count);
853 for (i = 0; i < args->count; i++) {
854 hva = gfn_to_hva(kvm, args->start_gfn + i);
855 if (kvm_is_error_hva(hva)) {
860 curkey = get_guest_storage_key(current->mm, hva);
861 if (IS_ERR_VALUE(curkey)) {
868 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
869 sizeof(uint8_t) * args->count);
877 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
883 if (args->flags != 0)
886 /* Enforce sane limit on memory allocation */
887 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
890 keys = kmalloc_array(args->count, sizeof(uint8_t),
891 GFP_KERNEL | __GFP_NOWARN);
893 keys = vmalloc(sizeof(uint8_t) * args->count);
897 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
898 sizeof(uint8_t) * args->count);
904 /* Enable storage key handling for the guest */
905 r = s390_enable_skey();
909 for (i = 0; i < args->count; i++) {
910 hva = gfn_to_hva(kvm, args->start_gfn + i);
911 if (kvm_is_error_hva(hva)) {
916 /* Lowest order bit is reserved */
917 if (keys[i] & 0x01) {
922 r = set_guest_storage_key(current->mm, hva,
923 (unsigned long)keys[i], 0);
932 long kvm_arch_vm_ioctl(struct file *filp,
933 unsigned int ioctl, unsigned long arg)
935 struct kvm *kvm = filp->private_data;
936 void __user *argp = (void __user *)arg;
937 struct kvm_device_attr attr;
941 case KVM_S390_INTERRUPT: {
942 struct kvm_s390_interrupt s390int;
945 if (copy_from_user(&s390int, argp, sizeof(s390int)))
947 r = kvm_s390_inject_vm(kvm, &s390int);
950 case KVM_ENABLE_CAP: {
951 struct kvm_enable_cap cap;
953 if (copy_from_user(&cap, argp, sizeof(cap)))
955 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
958 case KVM_CREATE_IRQCHIP: {
959 struct kvm_irq_routing_entry routing;
962 if (kvm->arch.use_irqchip) {
963 /* Set up dummy routing. */
964 memset(&routing, 0, sizeof(routing));
965 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
969 case KVM_SET_DEVICE_ATTR: {
971 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
973 r = kvm_s390_vm_set_attr(kvm, &attr);
976 case KVM_GET_DEVICE_ATTR: {
978 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
980 r = kvm_s390_vm_get_attr(kvm, &attr);
983 case KVM_HAS_DEVICE_ATTR: {
985 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
987 r = kvm_s390_vm_has_attr(kvm, &attr);
990 case KVM_S390_GET_SKEYS: {
991 struct kvm_s390_skeys args;
994 if (copy_from_user(&args, argp,
995 sizeof(struct kvm_s390_skeys)))
997 r = kvm_s390_get_skeys(kvm, &args);
1000 case KVM_S390_SET_SKEYS: {
1001 struct kvm_s390_skeys args;
1004 if (copy_from_user(&args, argp,
1005 sizeof(struct kvm_s390_skeys)))
1007 r = kvm_s390_set_skeys(kvm, &args);
1017 static int kvm_s390_query_ap_config(u8 *config)
1019 u32 fcn_code = 0x04000000UL;
1022 memset(config, 0, 128);
1026 ".long 0xb2af0000\n" /* PQAP(QCI) */
1032 : "r" (fcn_code), "r" (config)
1033 : "cc", "0", "2", "memory"
1039 static int kvm_s390_apxa_installed(void)
1044 if (test_facility(2) && test_facility(12)) {
1045 cc = kvm_s390_query_ap_config(config);
1048 pr_err("PQAP(QCI) failed with cc=%d", cc);
1050 return config[0] & 0x40;
1056 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1058 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1060 if (kvm_s390_apxa_installed())
1061 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1063 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1066 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1069 cpu_id->version = 0xff;
1072 static int kvm_s390_crypto_init(struct kvm *kvm)
1074 if (!test_kvm_facility(kvm, 76))
1077 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1078 GFP_KERNEL | GFP_DMA);
1079 if (!kvm->arch.crypto.crycb)
1082 kvm_s390_set_crycb_format(kvm);
1084 /* Enable AES/DEA protected key functions by default */
1085 kvm->arch.crypto.aes_kw = 1;
1086 kvm->arch.crypto.dea_kw = 1;
1087 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1088 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1089 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1090 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1095 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1098 char debug_name[16];
1099 static unsigned long sca_offset;
1102 #ifdef CONFIG_KVM_S390_UCONTROL
1103 if (type & ~KVM_VM_S390_UCONTROL)
1105 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1112 rc = s390_enable_sie();
1118 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1121 spin_lock(&kvm_lock);
1123 if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
1125 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1126 spin_unlock(&kvm_lock);
1128 sprintf(debug_name, "kvm-%u", current->pid);
1130 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1135 * The architectural maximum amount of facilities is 16 kbit. To store
1136 * this amount, 2 kbyte of memory is required. Thus we need a full
1137 * page to hold the guest facility list (arch.model.fac->list) and the
1138 * facility mask (arch.model.fac->mask). Its address size has to be
1139 * 31 bits and word aligned.
1141 kvm->arch.model.fac =
1142 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1143 if (!kvm->arch.model.fac)
1146 /* Populate the facility mask initially. */
1147 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1148 S390_ARCH_FAC_LIST_SIZE_BYTE);
1149 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1150 if (i < kvm_s390_fac_list_mask_size())
1151 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1153 kvm->arch.model.fac->mask[i] = 0UL;
1156 /* Populate the facility list initially. */
1157 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1158 S390_ARCH_FAC_LIST_SIZE_BYTE);
1160 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1161 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1163 if (kvm_s390_crypto_init(kvm) < 0)
1166 spin_lock_init(&kvm->arch.float_int.lock);
1167 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1168 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1169 init_waitqueue_head(&kvm->arch.ipte_wq);
1170 mutex_init(&kvm->arch.ipte_mutex);
1172 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1173 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1175 if (type & KVM_VM_S390_UCONTROL) {
1176 kvm->arch.gmap = NULL;
1178 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1179 if (!kvm->arch.gmap)
1181 kvm->arch.gmap->private = kvm;
1182 kvm->arch.gmap->pfault_enabled = 0;
1185 kvm->arch.css_support = 0;
1186 kvm->arch.use_irqchip = 0;
1187 kvm->arch.epoch = 0;
1189 spin_lock_init(&kvm->arch.start_stop_lock);
1190 KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1194 kfree(kvm->arch.crypto.crycb);
1195 free_page((unsigned long)kvm->arch.model.fac);
1196 debug_unregister(kvm->arch.dbf);
1197 free_page((unsigned long)(kvm->arch.sca));
1198 KVM_EVENT(3, "creation of vm failed: %d", rc);
1202 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1204 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1205 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1206 kvm_s390_clear_local_irqs(vcpu);
1207 kvm_clear_async_pf_completion_queue(vcpu);
1208 if (!kvm_is_ucontrol(vcpu->kvm)) {
1209 clear_bit(63 - vcpu->vcpu_id,
1210 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1211 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1212 (__u64) vcpu->arch.sie_block)
1213 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1217 if (kvm_is_ucontrol(vcpu->kvm))
1218 gmap_free(vcpu->arch.gmap);
1220 if (vcpu->kvm->arch.use_cmma)
1221 kvm_s390_vcpu_unsetup_cmma(vcpu);
1222 free_page((unsigned long)(vcpu->arch.sie_block));
1224 kvm_vcpu_uninit(vcpu);
1225 kmem_cache_free(kvm_vcpu_cache, vcpu);
1228 static void kvm_free_vcpus(struct kvm *kvm)
1231 struct kvm_vcpu *vcpu;
1233 kvm_for_each_vcpu(i, vcpu, kvm)
1234 kvm_arch_vcpu_destroy(vcpu);
1236 mutex_lock(&kvm->lock);
1237 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1238 kvm->vcpus[i] = NULL;
1240 atomic_set(&kvm->online_vcpus, 0);
1241 mutex_unlock(&kvm->lock);
1244 void kvm_arch_destroy_vm(struct kvm *kvm)
1246 kvm_free_vcpus(kvm);
1247 free_page((unsigned long)kvm->arch.model.fac);
1248 free_page((unsigned long)(kvm->arch.sca));
1249 debug_unregister(kvm->arch.dbf);
1250 kfree(kvm->arch.crypto.crycb);
1251 if (!kvm_is_ucontrol(kvm))
1252 gmap_free(kvm->arch.gmap);
1253 kvm_s390_destroy_adapters(kvm);
1254 kvm_s390_clear_float_irqs(kvm);
1255 KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1258 /* Section: vcpu related */
1259 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1261 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1262 if (!vcpu->arch.gmap)
1264 vcpu->arch.gmap->private = vcpu->kvm;
1269 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1271 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1272 kvm_clear_async_pf_completion_queue(vcpu);
1273 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1279 if (test_kvm_facility(vcpu->kvm, 129))
1280 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1281 if (test_kvm_facility(vcpu->kvm, 82))
1282 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
1284 if (kvm_is_ucontrol(vcpu->kvm))
1285 return __kvm_ucontrol_vcpu_init(vcpu);
1290 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1292 /* Save host register state */
1294 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1295 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1297 /* Depending on MACHINE_HAS_VX, data stored to vrs either
1298 * has vector register or floating point register format.
1300 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1301 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1302 if (test_fp_ctl(current->thread.fpu.fpc))
1303 /* User space provided an invalid FPC, let's clear it */
1304 current->thread.fpu.fpc = 0;
1306 save_access_regs(vcpu->arch.host_acrs);
1307 restore_access_regs(vcpu->run->s.regs.acrs);
1308 gmap_enable(vcpu->arch.gmap);
1309 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1312 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1314 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1315 gmap_disable(vcpu->arch.gmap);
1317 /* Save guest register state */
1319 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1321 /* Restore host register state */
1322 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1323 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1325 save_access_regs(vcpu->run->s.regs.acrs);
1326 restore_access_regs(vcpu->arch.host_acrs);
1329 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1331 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1332 vcpu->arch.sie_block->gpsw.mask = 0UL;
1333 vcpu->arch.sie_block->gpsw.addr = 0UL;
1334 kvm_s390_set_prefix(vcpu, 0);
1335 vcpu->arch.sie_block->cputm = 0UL;
1336 vcpu->arch.sie_block->ckc = 0UL;
1337 vcpu->arch.sie_block->todpr = 0;
1338 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1339 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1340 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1341 /* make sure the new fpc will be lazily loaded */
1343 current->thread.fpu.fpc = 0;
1344 vcpu->arch.sie_block->gbea = 1;
1345 vcpu->arch.sie_block->pp = 0;
1346 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
1347 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1348 kvm_clear_async_pf_completion_queue(vcpu);
1349 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1350 kvm_s390_vcpu_stop(vcpu);
1351 kvm_s390_clear_local_irqs(vcpu);
1354 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1356 mutex_lock(&vcpu->kvm->lock);
1358 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1360 mutex_unlock(&vcpu->kvm->lock);
1361 if (!kvm_is_ucontrol(vcpu->kvm))
1362 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1365 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1367 if (!test_kvm_facility(vcpu->kvm, 76))
1370 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1372 if (vcpu->kvm->arch.crypto.aes_kw)
1373 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1374 if (vcpu->kvm->arch.crypto.dea_kw)
1375 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1377 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1380 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1382 free_page(vcpu->arch.sie_block->cbrlo);
1383 vcpu->arch.sie_block->cbrlo = 0;
1386 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1388 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1389 if (!vcpu->arch.sie_block->cbrlo)
1392 vcpu->arch.sie_block->ecb2 |= 0x80;
1393 vcpu->arch.sie_block->ecb2 &= ~0x08;
1397 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1399 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1401 vcpu->arch.cpu_id = model->cpu_id;
1402 vcpu->arch.sie_block->ibc = model->ibc;
1403 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1406 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1410 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1414 if (test_kvm_facility(vcpu->kvm, 78))
1415 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1416 else if (test_kvm_facility(vcpu->kvm, 8))
1417 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1419 kvm_s390_vcpu_setup_model(vcpu);
1421 vcpu->arch.sie_block->ecb = 6;
1422 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1423 vcpu->arch.sie_block->ecb |= 0x10;
1425 vcpu->arch.sie_block->ecb2 = 8;
1426 vcpu->arch.sie_block->eca = 0xC1002000U;
1428 vcpu->arch.sie_block->eca |= 1;
1429 if (sclp.has_sigpif)
1430 vcpu->arch.sie_block->eca |= 0x10000000U;
1431 if (test_kvm_facility(vcpu->kvm, 129)) {
1432 vcpu->arch.sie_block->eca |= 0x00020000;
1433 vcpu->arch.sie_block->ecd |= 0x20000000;
1435 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1437 if (vcpu->kvm->arch.use_cmma) {
1438 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1442 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1443 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1445 kvm_s390_vcpu_crypto_setup(vcpu);
1450 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1453 struct kvm_vcpu *vcpu;
1454 struct sie_page *sie_page;
1457 if (id >= KVM_MAX_VCPUS)
1462 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1466 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1470 vcpu->arch.sie_block = &sie_page->sie_block;
1471 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1473 vcpu->arch.sie_block->icpua = id;
1474 if (!kvm_is_ucontrol(kvm)) {
1475 if (!kvm->arch.sca) {
1479 if (!kvm->arch.sca->cpu[id].sda)
1480 kvm->arch.sca->cpu[id].sda =
1481 (__u64) vcpu->arch.sie_block;
1482 vcpu->arch.sie_block->scaoh =
1483 (__u32)(((__u64)kvm->arch.sca) >> 32);
1484 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1485 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1488 spin_lock_init(&vcpu->arch.local_int.lock);
1489 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1490 vcpu->arch.local_int.wq = &vcpu->wq;
1491 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1493 rc = kvm_vcpu_init(vcpu, kvm, id);
1495 goto out_free_sie_block;
1496 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1497 vcpu->arch.sie_block);
1498 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1502 free_page((unsigned long)(vcpu->arch.sie_block));
1504 kmem_cache_free(kvm_vcpu_cache, vcpu);
1509 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1511 return kvm_s390_vcpu_has_irq(vcpu, 0);
1514 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1516 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1520 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1522 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1525 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1527 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1531 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1533 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1537 * Kick a guest cpu out of SIE and wait until SIE is not running.
1538 * If the CPU is not running (e.g. waiting as idle) the function will
1539 * return immediately. */
1540 void exit_sie(struct kvm_vcpu *vcpu)
1542 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1543 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1547 /* Kick a guest cpu out of SIE to process a request synchronously */
1548 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1550 kvm_make_request(req, vcpu);
1551 kvm_s390_vcpu_request(vcpu);
1554 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1557 struct kvm *kvm = gmap->private;
1558 struct kvm_vcpu *vcpu;
1560 kvm_for_each_vcpu(i, vcpu, kvm) {
1561 /* match against both prefix pages */
1562 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1563 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1564 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1569 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1571 /* kvm common code refers to this, but never calls it */
1576 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1577 struct kvm_one_reg *reg)
1582 case KVM_REG_S390_TODPR:
1583 r = put_user(vcpu->arch.sie_block->todpr,
1584 (u32 __user *)reg->addr);
1586 case KVM_REG_S390_EPOCHDIFF:
1587 r = put_user(vcpu->arch.sie_block->epoch,
1588 (u64 __user *)reg->addr);
1590 case KVM_REG_S390_CPU_TIMER:
1591 r = put_user(vcpu->arch.sie_block->cputm,
1592 (u64 __user *)reg->addr);
1594 case KVM_REG_S390_CLOCK_COMP:
1595 r = put_user(vcpu->arch.sie_block->ckc,
1596 (u64 __user *)reg->addr);
1598 case KVM_REG_S390_PFTOKEN:
1599 r = put_user(vcpu->arch.pfault_token,
1600 (u64 __user *)reg->addr);
1602 case KVM_REG_S390_PFCOMPARE:
1603 r = put_user(vcpu->arch.pfault_compare,
1604 (u64 __user *)reg->addr);
1606 case KVM_REG_S390_PFSELECT:
1607 r = put_user(vcpu->arch.pfault_select,
1608 (u64 __user *)reg->addr);
1610 case KVM_REG_S390_PP:
1611 r = put_user(vcpu->arch.sie_block->pp,
1612 (u64 __user *)reg->addr);
1614 case KVM_REG_S390_GBEA:
1615 r = put_user(vcpu->arch.sie_block->gbea,
1616 (u64 __user *)reg->addr);
1625 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1626 struct kvm_one_reg *reg)
1631 case KVM_REG_S390_TODPR:
1632 r = get_user(vcpu->arch.sie_block->todpr,
1633 (u32 __user *)reg->addr);
1635 case KVM_REG_S390_EPOCHDIFF:
1636 r = get_user(vcpu->arch.sie_block->epoch,
1637 (u64 __user *)reg->addr);
1639 case KVM_REG_S390_CPU_TIMER:
1640 r = get_user(vcpu->arch.sie_block->cputm,
1641 (u64 __user *)reg->addr);
1643 case KVM_REG_S390_CLOCK_COMP:
1644 r = get_user(vcpu->arch.sie_block->ckc,
1645 (u64 __user *)reg->addr);
1647 case KVM_REG_S390_PFTOKEN:
1648 r = get_user(vcpu->arch.pfault_token,
1649 (u64 __user *)reg->addr);
1650 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1651 kvm_clear_async_pf_completion_queue(vcpu);
1653 case KVM_REG_S390_PFCOMPARE:
1654 r = get_user(vcpu->arch.pfault_compare,
1655 (u64 __user *)reg->addr);
1657 case KVM_REG_S390_PFSELECT:
1658 r = get_user(vcpu->arch.pfault_select,
1659 (u64 __user *)reg->addr);
1661 case KVM_REG_S390_PP:
1662 r = get_user(vcpu->arch.sie_block->pp,
1663 (u64 __user *)reg->addr);
1665 case KVM_REG_S390_GBEA:
1666 r = get_user(vcpu->arch.sie_block->gbea,
1667 (u64 __user *)reg->addr);
1676 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1678 kvm_s390_vcpu_initial_reset(vcpu);
1682 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1684 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1688 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1690 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1694 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1695 struct kvm_sregs *sregs)
1697 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1698 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1699 restore_access_regs(vcpu->run->s.regs.acrs);
1703 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1704 struct kvm_sregs *sregs)
1706 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1707 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1711 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1713 /* make sure the new values will be lazily loaded */
1715 if (test_fp_ctl(fpu->fpc))
1717 current->thread.fpu.fpc = fpu->fpc;
1719 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1721 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1725 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1727 /* make sure we have the latest values */
1730 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1732 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1733 fpu->fpc = current->thread.fpu.fpc;
1737 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1741 if (!is_vcpu_stopped(vcpu))
1744 vcpu->run->psw_mask = psw.mask;
1745 vcpu->run->psw_addr = psw.addr;
1750 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1751 struct kvm_translation *tr)
1753 return -EINVAL; /* not implemented yet */
1756 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1757 KVM_GUESTDBG_USE_HW_BP | \
1758 KVM_GUESTDBG_ENABLE)
1760 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1761 struct kvm_guest_debug *dbg)
1765 vcpu->guest_debug = 0;
1766 kvm_s390_clear_bp_data(vcpu);
1768 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1771 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1772 vcpu->guest_debug = dbg->control;
1773 /* enforce guest PER */
1774 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1776 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1777 rc = kvm_s390_import_bp_data(vcpu, dbg);
1779 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1780 vcpu->arch.guestdbg.last_bp = 0;
1784 vcpu->guest_debug = 0;
1785 kvm_s390_clear_bp_data(vcpu);
1786 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1792 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1793 struct kvm_mp_state *mp_state)
1795 /* CHECK_STOP and LOAD are not supported yet */
1796 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1797 KVM_MP_STATE_OPERATING;
1800 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1801 struct kvm_mp_state *mp_state)
1805 /* user space knows about this interface - let it control the state */
1806 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1808 switch (mp_state->mp_state) {
1809 case KVM_MP_STATE_STOPPED:
1810 kvm_s390_vcpu_stop(vcpu);
1812 case KVM_MP_STATE_OPERATING:
1813 kvm_s390_vcpu_start(vcpu);
1815 case KVM_MP_STATE_LOAD:
1816 case KVM_MP_STATE_CHECK_STOP:
1817 /* fall through - CHECK_STOP and LOAD are not supported yet */
1825 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1827 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1830 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1833 kvm_s390_vcpu_request_handled(vcpu);
1834 if (!vcpu->requests)
1837 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1838 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1839 * This ensures that the ipte instruction for this request has
1840 * already finished. We might race against a second unmapper that
1841 * wants to set the blocking bit. Lets just retry the request loop.
1843 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1845 rc = gmap_ipte_notify(vcpu->arch.gmap,
1846 kvm_s390_get_prefix(vcpu),
1853 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1854 vcpu->arch.sie_block->ihcpu = 0xffff;
1858 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1859 if (!ibs_enabled(vcpu)) {
1860 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1861 atomic_or(CPUSTAT_IBS,
1862 &vcpu->arch.sie_block->cpuflags);
1867 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1868 if (ibs_enabled(vcpu)) {
1869 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1870 atomic_andnot(CPUSTAT_IBS,
1871 &vcpu->arch.sie_block->cpuflags);
1876 /* nothing to do, just clear the request */
1877 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1882 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1884 struct kvm_vcpu *vcpu;
1887 mutex_lock(&kvm->lock);
1889 kvm->arch.epoch = tod - get_tod_clock();
1890 kvm_s390_vcpu_block_all(kvm);
1891 kvm_for_each_vcpu(i, vcpu, kvm)
1892 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1893 kvm_s390_vcpu_unblock_all(kvm);
1895 mutex_unlock(&kvm->lock);
1899 * kvm_arch_fault_in_page - fault-in guest page if necessary
1900 * @vcpu: The corresponding virtual cpu
1901 * @gpa: Guest physical address
1902 * @writable: Whether the page should be writable or not
1904 * Make sure that a guest page has been faulted-in on the host.
1906 * Return: Zero on success, negative error code otherwise.
1908 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1910 return gmap_fault(vcpu->arch.gmap, gpa,
1911 writable ? FAULT_FLAG_WRITE : 0);
1914 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1915 unsigned long token)
1917 struct kvm_s390_interrupt inti;
1918 struct kvm_s390_irq irq;
1921 irq.u.ext.ext_params2 = token;
1922 irq.type = KVM_S390_INT_PFAULT_INIT;
1923 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1925 inti.type = KVM_S390_INT_PFAULT_DONE;
1926 inti.parm64 = token;
1927 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1931 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1932 struct kvm_async_pf *work)
1934 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1935 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1938 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1939 struct kvm_async_pf *work)
1941 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1942 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1945 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1946 struct kvm_async_pf *work)
1948 /* s390 will always inject the page directly */
1951 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1954 * s390 will always inject the page directly,
1955 * but we still want check_async_completion to cleanup
1960 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1963 struct kvm_arch_async_pf arch;
1966 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1968 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1969 vcpu->arch.pfault_compare)
1971 if (psw_extint_disabled(vcpu))
1973 if (kvm_s390_vcpu_has_irq(vcpu, 0))
1975 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1977 if (!vcpu->arch.gmap->pfault_enabled)
1980 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1981 hva += current->thread.gmap_addr & ~PAGE_MASK;
1982 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1985 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1989 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1994 * On s390 notifications for arriving pages will be delivered directly
1995 * to the guest but the house keeping for completed pfaults is
1996 * handled outside the worker.
1998 kvm_check_async_pf_completion(vcpu);
2000 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2005 if (test_cpu_flag(CIF_MCCK_PENDING))
2008 if (!kvm_is_ucontrol(vcpu->kvm)) {
2009 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2014 rc = kvm_s390_handle_requests(vcpu);
2018 if (guestdbg_enabled(vcpu)) {
2019 kvm_s390_backup_guest_per_regs(vcpu);
2020 kvm_s390_patch_guest_per_regs(vcpu);
2023 vcpu->arch.sie_block->icptcode = 0;
2024 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2025 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2026 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2031 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2033 psw_t *psw = &vcpu->arch.sie_block->gpsw;
2037 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2038 trace_kvm_s390_sie_fault(vcpu);
2041 * We want to inject an addressing exception, which is defined as a
2042 * suppressing or terminating exception. However, since we came here
2043 * by a DAT access exception, the PSW still points to the faulting
2044 * instruction since DAT exceptions are nullifying. So we've got
2045 * to look up the current opcode to get the length of the instruction
2046 * to be able to forward the PSW.
2048 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2050 return kvm_s390_inject_prog_cond(vcpu, rc);
2051 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2053 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2056 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2060 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2061 vcpu->arch.sie_block->icptcode);
2062 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2064 if (guestdbg_enabled(vcpu))
2065 kvm_s390_restore_guest_per_regs(vcpu);
2067 if (exit_reason >= 0) {
2069 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2070 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2071 vcpu->run->s390_ucontrol.trans_exc_code =
2072 current->thread.gmap_addr;
2073 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2076 } else if (current->thread.gmap_pfault) {
2077 trace_kvm_s390_major_guest_pfault(vcpu);
2078 current->thread.gmap_pfault = 0;
2079 if (kvm_arch_setup_async_pf(vcpu)) {
2082 gpa_t gpa = current->thread.gmap_addr;
2083 rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2088 rc = vcpu_post_run_fault_in_sie(vcpu);
2090 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2093 if (kvm_is_ucontrol(vcpu->kvm))
2094 /* Don't exit for host interrupts. */
2095 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2097 rc = kvm_handle_sie_intercept(vcpu);
2103 static int __vcpu_run(struct kvm_vcpu *vcpu)
2105 int rc, exit_reason;
2108 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2109 * ning the guest), so that memslots (and other stuff) are protected
2111 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2114 rc = vcpu_pre_run(vcpu);
2118 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2120 * As PF_VCPU will be used in fault handler, between
2121 * guest_enter and guest_exit should be no uaccess.
2123 local_irq_disable();
2124 __kvm_guest_enter();
2126 exit_reason = sie64a(vcpu->arch.sie_block,
2127 vcpu->run->s.regs.gprs);
2128 local_irq_disable();
2131 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2133 rc = vcpu_post_run(vcpu, exit_reason);
2134 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2136 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2140 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2142 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2143 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2144 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2145 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2146 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2147 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2148 /* some control register changes require a tlb flush */
2149 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2151 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2152 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2153 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2154 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2155 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2156 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2158 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2159 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2160 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2161 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2162 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2163 kvm_clear_async_pf_completion_queue(vcpu);
2165 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
2166 test_kvm_facility(vcpu->kvm, 82)) {
2167 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2168 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
2170 kvm_run->kvm_dirty_regs = 0;
2173 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2175 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2176 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2177 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2178 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2179 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2180 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2181 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2182 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2183 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2184 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2185 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2186 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2187 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
2190 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2195 if (guestdbg_exit_pending(vcpu)) {
2196 kvm_s390_prepare_debug_exit(vcpu);
2200 if (vcpu->sigset_active)
2201 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2203 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2204 kvm_s390_vcpu_start(vcpu);
2205 } else if (is_vcpu_stopped(vcpu)) {
2206 pr_err_ratelimited("can't run stopped vcpu %d\n",
2211 sync_regs(vcpu, kvm_run);
2214 rc = __vcpu_run(vcpu);
2216 if (signal_pending(current) && !rc) {
2217 kvm_run->exit_reason = KVM_EXIT_INTR;
2221 if (guestdbg_exit_pending(vcpu) && !rc) {
2222 kvm_s390_prepare_debug_exit(vcpu);
2226 if (rc == -EOPNOTSUPP) {
2227 /* intercept cannot be handled in-kernel, prepare kvm-run */
2228 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
2229 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2230 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2231 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2235 if (rc == -EREMOTE) {
2236 /* intercept was handled, but userspace support is needed
2237 * kvm_run has been prepared by the handler */
2241 store_regs(vcpu, kvm_run);
2243 if (vcpu->sigset_active)
2244 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2246 vcpu->stat.exit_userspace++;
2251 * store status at address
2252 * we use have two special cases:
2253 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2254 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2256 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2258 unsigned char archmode = 1;
2259 freg_t fprs[NUM_FPRS];
2264 px = kvm_s390_get_prefix(vcpu);
2265 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2266 if (write_guest_abs(vcpu, 163, &archmode, 1))
2269 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2270 if (write_guest_real(vcpu, 163, &archmode, 1))
2274 gpa -= __LC_FPREGS_SAVE_AREA;
2276 /* manually convert vector registers if necessary */
2277 if (MACHINE_HAS_VX) {
2278 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2279 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2282 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2283 vcpu->run->s.regs.vrs, 128);
2285 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2286 vcpu->run->s.regs.gprs, 128);
2287 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2288 &vcpu->arch.sie_block->gpsw, 16);
2289 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2291 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2292 &vcpu->run->s.regs.fpc, 4);
2293 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2294 &vcpu->arch.sie_block->todpr, 4);
2295 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2296 &vcpu->arch.sie_block->cputm, 8);
2297 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2298 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2300 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2301 &vcpu->run->s.regs.acrs, 64);
2302 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2303 &vcpu->arch.sie_block->gcr, 128);
2304 return rc ? -EFAULT : 0;
2307 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2310 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2311 * copying in vcpu load/put. Lets update our copies before we save
2312 * it into the save area
2315 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2316 save_access_regs(vcpu->run->s.regs.acrs);
2318 return kvm_s390_store_status_unloaded(vcpu, addr);
2322 * store additional status at address
2324 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2327 /* Only bits 0-53 are used for address formation */
2328 if (!(gpa & ~0x3ff))
2331 return write_guest_abs(vcpu, gpa & ~0x3ff,
2332 (void *)&vcpu->run->s.regs.vrs, 512);
2335 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2337 if (!test_kvm_facility(vcpu->kvm, 129))
2341 * The guest VXRS are in the host VXRs due to the lazy
2342 * copying in vcpu load/put. We can simply call save_fpu_regs()
2343 * to save the current register state because we are in the
2344 * middle of a load/put cycle.
2346 * Let's update our copies before we save it into the save area.
2350 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2353 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2355 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2356 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2359 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2362 struct kvm_vcpu *vcpu;
2364 kvm_for_each_vcpu(i, vcpu, kvm) {
2365 __disable_ibs_on_vcpu(vcpu);
2369 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2371 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2372 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2375 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2377 int i, online_vcpus, started_vcpus = 0;
2379 if (!is_vcpu_stopped(vcpu))
2382 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2383 /* Only one cpu at a time may enter/leave the STOPPED state. */
2384 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2385 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2387 for (i = 0; i < online_vcpus; i++) {
2388 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2392 if (started_vcpus == 0) {
2393 /* we're the only active VCPU -> speed it up */
2394 __enable_ibs_on_vcpu(vcpu);
2395 } else if (started_vcpus == 1) {
2397 * As we are starting a second VCPU, we have to disable
2398 * the IBS facility on all VCPUs to remove potentially
2399 * oustanding ENABLE requests.
2401 __disable_ibs_on_all_vcpus(vcpu->kvm);
2404 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2406 * Another VCPU might have used IBS while we were offline.
2407 * Let's play safe and flush the VCPU at startup.
2409 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2410 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2414 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2416 int i, online_vcpus, started_vcpus = 0;
2417 struct kvm_vcpu *started_vcpu = NULL;
2419 if (is_vcpu_stopped(vcpu))
2422 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2423 /* Only one cpu at a time may enter/leave the STOPPED state. */
2424 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2425 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2427 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2428 kvm_s390_clear_stop_irq(vcpu);
2430 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2431 __disable_ibs_on_vcpu(vcpu);
2433 for (i = 0; i < online_vcpus; i++) {
2434 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2436 started_vcpu = vcpu->kvm->vcpus[i];
2440 if (started_vcpus == 1) {
2442 * As we only have one VCPU left, we want to enable the
2443 * IBS facility for that VCPU to speed it up.
2445 __enable_ibs_on_vcpu(started_vcpu);
2448 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2452 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2453 struct kvm_enable_cap *cap)
2461 case KVM_CAP_S390_CSS_SUPPORT:
2462 if (!vcpu->kvm->arch.css_support) {
2463 vcpu->kvm->arch.css_support = 1;
2464 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2465 trace_kvm_s390_enable_css(vcpu->kvm);
2476 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2477 struct kvm_s390_mem_op *mop)
2479 void __user *uaddr = (void __user *)mop->buf;
2480 void *tmpbuf = NULL;
2482 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2483 | KVM_S390_MEMOP_F_CHECK_ONLY;
2485 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
2488 if (mop->size > MEM_OP_MAX_SIZE)
2491 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2492 tmpbuf = vmalloc(mop->size);
2497 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2500 case KVM_S390_MEMOP_LOGICAL_READ:
2501 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2502 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2505 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2507 if (copy_to_user(uaddr, tmpbuf, mop->size))
2511 case KVM_S390_MEMOP_LOGICAL_WRITE:
2512 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2513 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2516 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2520 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2526 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2528 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2529 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2535 long kvm_arch_vcpu_ioctl(struct file *filp,
2536 unsigned int ioctl, unsigned long arg)
2538 struct kvm_vcpu *vcpu = filp->private_data;
2539 void __user *argp = (void __user *)arg;
2544 case KVM_S390_IRQ: {
2545 struct kvm_s390_irq s390irq;
2548 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2550 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2553 case KVM_S390_INTERRUPT: {
2554 struct kvm_s390_interrupt s390int;
2555 struct kvm_s390_irq s390irq = {};
2558 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2560 if (s390int_to_s390irq(&s390int, &s390irq))
2562 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2565 case KVM_S390_STORE_STATUS:
2566 idx = srcu_read_lock(&vcpu->kvm->srcu);
2567 r = kvm_s390_vcpu_store_status(vcpu, arg);
2568 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2570 case KVM_S390_SET_INITIAL_PSW: {
2574 if (copy_from_user(&psw, argp, sizeof(psw)))
2576 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2579 case KVM_S390_INITIAL_RESET:
2580 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2582 case KVM_SET_ONE_REG:
2583 case KVM_GET_ONE_REG: {
2584 struct kvm_one_reg reg;
2586 if (copy_from_user(®, argp, sizeof(reg)))
2588 if (ioctl == KVM_SET_ONE_REG)
2589 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2591 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2594 #ifdef CONFIG_KVM_S390_UCONTROL
2595 case KVM_S390_UCAS_MAP: {
2596 struct kvm_s390_ucas_mapping ucasmap;
2598 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2603 if (!kvm_is_ucontrol(vcpu->kvm)) {
2608 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2609 ucasmap.vcpu_addr, ucasmap.length);
2612 case KVM_S390_UCAS_UNMAP: {
2613 struct kvm_s390_ucas_mapping ucasmap;
2615 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2620 if (!kvm_is_ucontrol(vcpu->kvm)) {
2625 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2630 case KVM_S390_VCPU_FAULT: {
2631 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2634 case KVM_ENABLE_CAP:
2636 struct kvm_enable_cap cap;
2638 if (copy_from_user(&cap, argp, sizeof(cap)))
2640 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2643 case KVM_S390_MEM_OP: {
2644 struct kvm_s390_mem_op mem_op;
2646 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2647 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2652 case KVM_S390_SET_IRQ_STATE: {
2653 struct kvm_s390_irq_state irq_state;
2656 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2658 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2659 irq_state.len == 0 ||
2660 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2664 r = kvm_s390_set_irq_state(vcpu,
2665 (void __user *) irq_state.buf,
2669 case KVM_S390_GET_IRQ_STATE: {
2670 struct kvm_s390_irq_state irq_state;
2673 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2675 if (irq_state.len == 0) {
2679 r = kvm_s390_get_irq_state(vcpu,
2680 (__u8 __user *) irq_state.buf,
2690 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2692 #ifdef CONFIG_KVM_S390_UCONTROL
2693 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2694 && (kvm_is_ucontrol(vcpu->kvm))) {
2695 vmf->page = virt_to_page(vcpu->arch.sie_block);
2696 get_page(vmf->page);
2700 return VM_FAULT_SIGBUS;
2703 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2704 unsigned long npages)
2709 /* Section: memory related */
2710 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2711 struct kvm_memory_slot *memslot,
2712 const struct kvm_userspace_memory_region *mem,
2713 enum kvm_mr_change change)
2715 /* A few sanity checks. We can have memory slots which have to be
2716 located/ended at a segment boundary (1MB). The memory in userland is
2717 ok to be fragmented into various different vmas. It is okay to mmap()
2718 and munmap() stuff in this slot after doing this call at any time */
2720 if (mem->userspace_addr & 0xffffful)
2723 if (mem->memory_size & 0xffffful)
2729 void kvm_arch_commit_memory_region(struct kvm *kvm,
2730 const struct kvm_userspace_memory_region *mem,
2731 const struct kvm_memory_slot *old,
2732 const struct kvm_memory_slot *new,
2733 enum kvm_mr_change change)
2739 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
2740 old->npages * PAGE_SIZE);
2743 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
2744 old->npages * PAGE_SIZE);
2749 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2750 mem->guest_phys_addr, mem->memory_size);
2752 case KVM_MR_FLAGS_ONLY:
2755 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
2758 pr_warn("failed to commit memory region\n");
2762 static int __init kvm_s390_init(void)
2764 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2767 static void __exit kvm_s390_exit(void)
2772 module_init(kvm_s390_init);
2773 module_exit(kvm_s390_exit);
2776 * Enable autoloading of the kvm module.
2777 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2778 * since x86 takes a different approach.
2780 #include <linux/miscdevice.h>
2781 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2782 MODULE_ALIAS("devname:kvm");