2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
34 #include <asm/pgtable.h>
37 #include <asm/switch_to.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
45 #define KMSG_COMPONENT "kvm-s390"
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 #define CREATE_TRACE_POINTS
51 #include "trace-s390.h"
53 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_pei", VCPU_STAT(exit_pei) },
69 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91 { "instruction_spx", VCPU_STAT(instruction_spx) },
92 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93 { "instruction_stap", VCPU_STAT(instruction_stap) },
94 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98 { "instruction_essa", VCPU_STAT(instruction_essa) },
99 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103 { "instruction_sie", VCPU_STAT(instruction_sie) },
104 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120 { "diagnose_10", VCPU_STAT(diagnose_10) },
121 { "diagnose_44", VCPU_STAT(diagnose_44) },
122 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123 { "diagnose_258", VCPU_STAT(diagnose_258) },
124 { "diagnose_308", VCPU_STAT(diagnose_308) },
125 { "diagnose_500", VCPU_STAT(diagnose_500) },
129 /* allow nested virtualization in KVM (if enabled by user space) */
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
137 unsigned long kvm_s390_fac_list_mask_size(void)
139 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140 return ARRAY_SIZE(kvm_s390_fac_list_mask);
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
155 /* every s390 is virtualization enabled ;-) */
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
163 * This callback is executed during stop_machine(). All CPUs are therefore
164 * temporarily stopped. In order not to change guest behavior, we have to
165 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166 * so a CPU won't be stopped while calculating with the epoch.
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
172 struct kvm_vcpu *vcpu;
174 unsigned long long *delta = v;
176 list_for_each_entry(kvm, &vm_list, vm_list) {
177 kvm->arch.epoch -= *delta;
178 kvm_for_each_vcpu(i, vcpu, kvm) {
179 vcpu->arch.sie_block->epoch -= *delta;
180 if (vcpu->arch.cputm_enabled)
181 vcpu->arch.cputm_start += *delta;
182 if (vcpu->arch.vsie_block)
183 vcpu->arch.vsie_block->epoch -= *delta;
189 static struct notifier_block kvm_clock_notifier = {
190 .notifier_call = kvm_clock_sync,
193 int kvm_arch_hardware_setup(void)
195 gmap_notifier.notifier_call = kvm_gmap_notifier;
196 gmap_register_pte_notifier(&gmap_notifier);
197 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198 gmap_register_pte_notifier(&vsie_gmap_notifier);
199 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200 &kvm_clock_notifier);
204 void kvm_arch_hardware_unsetup(void)
206 gmap_unregister_pte_notifier(&gmap_notifier);
207 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209 &kvm_clock_notifier);
212 static void allow_cpu_feat(unsigned long nr)
214 set_bit_inv(nr, kvm_s390_available_cpu_feat);
217 static inline int plo_test_bit(unsigned char nr)
219 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220 int cc = 3; /* subfunction not available */
223 /* Parameter registers are ignored for "test bit" */
233 static void kvm_s390_cpu_feat_init(void)
237 for (i = 0; i < 256; ++i) {
239 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
242 if (test_facility(28)) /* TOD-clock steering */
243 ptff(kvm_s390_available_subfunc.ptff,
244 sizeof(kvm_s390_available_subfunc.ptff),
247 if (test_facility(17)) { /* MSA */
248 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249 kvm_s390_available_subfunc.kmac);
250 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251 kvm_s390_available_subfunc.kmc);
252 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.km);
254 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kimd);
256 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.klmd);
259 if (test_facility(76)) /* MSA3 */
260 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.pckmo);
262 if (test_facility(77)) { /* MSA4 */
263 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.kmctr);
265 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266 kvm_s390_available_subfunc.kmf);
267 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmo);
269 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.pcc);
272 if (test_facility(57)) /* MSA5 */
273 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.ppno);
276 if (MACHINE_HAS_ESOP)
277 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
279 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
282 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283 !test_facility(3) || !nested)
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286 if (sclp.has_64bscao)
287 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
289 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
301 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302 * all skey handling functions read/set the skey from the PGSTE
303 * instead of the real storage key.
305 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306 * pages being detected as preserved although they are resident.
308 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
311 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
315 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316 * cannot easily shadow the SCA because of the ipte lock.
320 int kvm_arch_init(void *opaque)
324 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
328 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
330 goto out_debug_unreg;
333 kvm_s390_cpu_feat_init();
335 /* Register floating interrupt controller interface. */
336 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
338 pr_err("Failed to register FLIC rc=%d\n", rc);
339 goto out_debug_unreg;
344 debug_unregister(kvm_s390_dbf);
348 void kvm_arch_exit(void)
350 debug_unregister(kvm_s390_dbf);
353 /* Section: device related */
354 long kvm_arch_dev_ioctl(struct file *filp,
355 unsigned int ioctl, unsigned long arg)
357 if (ioctl == KVM_S390_ENABLE_SIE)
358 return s390_enable_sie();
362 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
367 case KVM_CAP_S390_PSW:
368 case KVM_CAP_S390_GMAP:
369 case KVM_CAP_SYNC_MMU:
370 #ifdef CONFIG_KVM_S390_UCONTROL
371 case KVM_CAP_S390_UCONTROL:
373 case KVM_CAP_ASYNC_PF:
374 case KVM_CAP_SYNC_REGS:
375 case KVM_CAP_ONE_REG:
376 case KVM_CAP_ENABLE_CAP:
377 case KVM_CAP_S390_CSS_SUPPORT:
378 case KVM_CAP_IOEVENTFD:
379 case KVM_CAP_DEVICE_CTRL:
380 case KVM_CAP_ENABLE_CAP_VM:
381 case KVM_CAP_S390_IRQCHIP:
382 case KVM_CAP_VM_ATTRIBUTES:
383 case KVM_CAP_MP_STATE:
384 case KVM_CAP_S390_INJECT_IRQ:
385 case KVM_CAP_S390_USER_SIGP:
386 case KVM_CAP_S390_USER_STSI:
387 case KVM_CAP_S390_SKEYS:
388 case KVM_CAP_S390_IRQ_STATE:
389 case KVM_CAP_S390_USER_INSTR0:
392 case KVM_CAP_S390_MEM_OP:
395 case KVM_CAP_NR_VCPUS:
396 case KVM_CAP_MAX_VCPUS:
397 r = KVM_S390_BSCA_CPU_SLOTS;
398 if (!kvm_s390_use_sca_entries())
400 else if (sclp.has_esca && sclp.has_64bscao)
401 r = KVM_S390_ESCA_CPU_SLOTS;
403 case KVM_CAP_NR_MEMSLOTS:
404 r = KVM_USER_MEM_SLOTS;
406 case KVM_CAP_S390_COW:
407 r = MACHINE_HAS_ESOP;
409 case KVM_CAP_S390_VECTOR_REGISTERS:
412 case KVM_CAP_S390_RI:
413 r = test_facility(64);
415 case KVM_CAP_S390_BPB:
416 r = test_facility(82);
424 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
425 struct kvm_memory_slot *memslot)
427 gfn_t cur_gfn, last_gfn;
428 unsigned long address;
429 struct gmap *gmap = kvm->arch.gmap;
431 /* Loop over all guest pages */
432 last_gfn = memslot->base_gfn + memslot->npages;
433 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
434 address = gfn_to_hva_memslot(memslot, cur_gfn);
436 if (test_and_clear_guest_dirty(gmap->mm, address))
437 mark_page_dirty(kvm, cur_gfn);
438 if (fatal_signal_pending(current))
444 /* Section: vm related */
445 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448 * Get (and clear) the dirty memory log for a memory slot.
450 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
451 struct kvm_dirty_log *log)
455 struct kvm_memslots *slots;
456 struct kvm_memory_slot *memslot;
459 if (kvm_is_ucontrol(kvm))
462 mutex_lock(&kvm->slots_lock);
465 if (log->slot >= KVM_USER_MEM_SLOTS)
468 slots = kvm_memslots(kvm);
469 memslot = id_to_memslot(slots, log->slot);
471 if (!memslot->dirty_bitmap)
474 kvm_s390_sync_dirty_log(kvm, memslot);
475 r = kvm_get_dirty_log(kvm, log, &is_dirty);
479 /* Clear the dirty log */
481 n = kvm_dirty_bitmap_bytes(memslot);
482 memset(memslot->dirty_bitmap, 0, n);
486 mutex_unlock(&kvm->slots_lock);
490 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 struct kvm_vcpu *vcpu;
495 kvm_for_each_vcpu(i, vcpu, kvm) {
496 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
500 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
508 case KVM_CAP_S390_IRQCHIP:
509 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
510 kvm->arch.use_irqchip = 1;
513 case KVM_CAP_S390_USER_SIGP:
514 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
515 kvm->arch.user_sigp = 1;
518 case KVM_CAP_S390_VECTOR_REGISTERS:
519 mutex_lock(&kvm->lock);
520 if (kvm->created_vcpus) {
522 } else if (MACHINE_HAS_VX) {
523 set_kvm_facility(kvm->arch.model.fac_mask, 129);
524 set_kvm_facility(kvm->arch.model.fac_list, 129);
528 mutex_unlock(&kvm->lock);
529 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
530 r ? "(not available)" : "(success)");
532 case KVM_CAP_S390_RI:
534 mutex_lock(&kvm->lock);
535 if (kvm->created_vcpus) {
537 } else if (test_facility(64)) {
538 set_kvm_facility(kvm->arch.model.fac_mask, 64);
539 set_kvm_facility(kvm->arch.model.fac_list, 64);
542 mutex_unlock(&kvm->lock);
543 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
544 r ? "(not available)" : "(success)");
546 case KVM_CAP_S390_USER_STSI:
547 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
548 kvm->arch.user_stsi = 1;
551 case KVM_CAP_S390_USER_INSTR0:
552 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
553 kvm->arch.user_instr0 = 1;
554 icpt_operexc_on_all_vcpus(kvm);
564 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
568 switch (attr->attr) {
569 case KVM_S390_VM_MEM_LIMIT_SIZE:
571 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
572 kvm->arch.mem_limit);
573 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
583 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
587 switch (attr->attr) {
588 case KVM_S390_VM_MEM_ENABLE_CMMA:
594 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
595 mutex_lock(&kvm->lock);
596 if (!kvm->created_vcpus) {
597 kvm->arch.use_cmma = 1;
600 mutex_unlock(&kvm->lock);
602 case KVM_S390_VM_MEM_CLR_CMMA:
607 if (!kvm->arch.use_cmma)
610 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
611 mutex_lock(&kvm->lock);
612 idx = srcu_read_lock(&kvm->srcu);
613 s390_reset_cmma(kvm->arch.gmap->mm);
614 srcu_read_unlock(&kvm->srcu, idx);
615 mutex_unlock(&kvm->lock);
618 case KVM_S390_VM_MEM_LIMIT_SIZE: {
619 unsigned long new_limit;
621 if (kvm_is_ucontrol(kvm))
624 if (get_user(new_limit, (u64 __user *)attr->addr))
627 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
628 new_limit > kvm->arch.mem_limit)
634 /* gmap_create takes last usable address */
635 if (new_limit != KVM_S390_NO_MEM_LIMIT)
639 mutex_lock(&kvm->lock);
640 if (!kvm->created_vcpus) {
641 /* gmap_create will round the limit up */
642 struct gmap *new = gmap_create(current->mm, new_limit);
647 gmap_remove(kvm->arch.gmap);
649 kvm->arch.gmap = new;
653 mutex_unlock(&kvm->lock);
654 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
655 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
656 (void *) kvm->arch.gmap->asce);
666 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
668 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
670 struct kvm_vcpu *vcpu;
673 if (!test_kvm_facility(kvm, 76))
676 mutex_lock(&kvm->lock);
677 switch (attr->attr) {
678 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
680 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
681 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
682 kvm->arch.crypto.aes_kw = 1;
683 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
685 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
687 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
688 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
689 kvm->arch.crypto.dea_kw = 1;
690 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
692 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
693 kvm->arch.crypto.aes_kw = 0;
694 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
695 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
696 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
698 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
699 kvm->arch.crypto.dea_kw = 0;
700 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
701 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
702 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
705 mutex_unlock(&kvm->lock);
709 kvm_for_each_vcpu(i, vcpu, kvm) {
710 kvm_s390_vcpu_crypto_setup(vcpu);
713 mutex_unlock(&kvm->lock);
717 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
721 if (copy_from_user(>od_high, (void __user *)attr->addr,
727 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
732 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
736 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
739 kvm_s390_set_tod_clock(kvm, gtod);
740 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
744 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
751 switch (attr->attr) {
752 case KVM_S390_VM_TOD_HIGH:
753 ret = kvm_s390_set_tod_high(kvm, attr);
755 case KVM_S390_VM_TOD_LOW:
756 ret = kvm_s390_set_tod_low(kvm, attr);
765 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
769 if (copy_to_user((void __user *)attr->addr, >od_high,
772 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
777 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
781 gtod = kvm_s390_get_tod_clock_fast(kvm);
782 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
784 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
789 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
796 switch (attr->attr) {
797 case KVM_S390_VM_TOD_HIGH:
798 ret = kvm_s390_get_tod_high(kvm, attr);
800 case KVM_S390_VM_TOD_LOW:
801 ret = kvm_s390_get_tod_low(kvm, attr);
810 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
812 struct kvm_s390_vm_cpu_processor *proc;
813 u16 lowest_ibc, unblocked_ibc;
816 mutex_lock(&kvm->lock);
817 if (kvm->created_vcpus) {
821 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
826 if (!copy_from_user(proc, (void __user *)attr->addr,
828 kvm->arch.model.cpuid = proc->cpuid;
829 lowest_ibc = sclp.ibc >> 16 & 0xfff;
830 unblocked_ibc = sclp.ibc & 0xfff;
831 if (lowest_ibc && proc->ibc) {
832 if (proc->ibc > unblocked_ibc)
833 kvm->arch.model.ibc = unblocked_ibc;
834 else if (proc->ibc < lowest_ibc)
835 kvm->arch.model.ibc = lowest_ibc;
837 kvm->arch.model.ibc = proc->ibc;
839 memcpy(kvm->arch.model.fac_list, proc->fac_list,
840 S390_ARCH_FAC_LIST_SIZE_BYTE);
845 mutex_unlock(&kvm->lock);
849 static int kvm_s390_set_processor_feat(struct kvm *kvm,
850 struct kvm_device_attr *attr)
852 struct kvm_s390_vm_cpu_feat data;
855 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
857 if (!bitmap_subset((unsigned long *) data.feat,
858 kvm_s390_available_cpu_feat,
859 KVM_S390_VM_CPU_FEAT_NR_BITS))
862 mutex_lock(&kvm->lock);
863 if (!atomic_read(&kvm->online_vcpus)) {
864 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
865 KVM_S390_VM_CPU_FEAT_NR_BITS);
868 mutex_unlock(&kvm->lock);
872 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
873 struct kvm_device_attr *attr)
876 * Once supported by kernel + hw, we have to store the subfunctions
877 * in kvm->arch and remember that user space configured them.
882 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
886 switch (attr->attr) {
887 case KVM_S390_VM_CPU_PROCESSOR:
888 ret = kvm_s390_set_processor(kvm, attr);
890 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
891 ret = kvm_s390_set_processor_feat(kvm, attr);
893 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
894 ret = kvm_s390_set_processor_subfunc(kvm, attr);
900 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
902 struct kvm_s390_vm_cpu_processor *proc;
905 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
910 proc->cpuid = kvm->arch.model.cpuid;
911 proc->ibc = kvm->arch.model.ibc;
912 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
913 S390_ARCH_FAC_LIST_SIZE_BYTE);
914 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
921 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
923 struct kvm_s390_vm_cpu_machine *mach;
926 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
931 get_cpu_id((struct cpuid *) &mach->cpuid);
932 mach->ibc = sclp.ibc;
933 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
934 S390_ARCH_FAC_LIST_SIZE_BYTE);
935 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
936 sizeof(S390_lowcore.stfle_fac_list));
937 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
944 static int kvm_s390_get_processor_feat(struct kvm *kvm,
945 struct kvm_device_attr *attr)
947 struct kvm_s390_vm_cpu_feat data;
949 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
950 KVM_S390_VM_CPU_FEAT_NR_BITS);
951 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
956 static int kvm_s390_get_machine_feat(struct kvm *kvm,
957 struct kvm_device_attr *attr)
959 struct kvm_s390_vm_cpu_feat data;
961 bitmap_copy((unsigned long *) data.feat,
962 kvm_s390_available_cpu_feat,
963 KVM_S390_VM_CPU_FEAT_NR_BITS);
964 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
969 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
970 struct kvm_device_attr *attr)
973 * Once we can actually configure subfunctions (kernel + hw support),
974 * we have to check if they were already set by user space, if so copy
975 * them from kvm->arch.
980 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
981 struct kvm_device_attr *attr)
983 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
984 sizeof(struct kvm_s390_vm_cpu_subfunc)))
988 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
992 switch (attr->attr) {
993 case KVM_S390_VM_CPU_PROCESSOR:
994 ret = kvm_s390_get_processor(kvm, attr);
996 case KVM_S390_VM_CPU_MACHINE:
997 ret = kvm_s390_get_machine(kvm, attr);
999 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1000 ret = kvm_s390_get_processor_feat(kvm, attr);
1002 case KVM_S390_VM_CPU_MACHINE_FEAT:
1003 ret = kvm_s390_get_machine_feat(kvm, attr);
1005 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1006 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1008 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1009 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1015 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1019 switch (attr->group) {
1020 case KVM_S390_VM_MEM_CTRL:
1021 ret = kvm_s390_set_mem_control(kvm, attr);
1023 case KVM_S390_VM_TOD:
1024 ret = kvm_s390_set_tod(kvm, attr);
1026 case KVM_S390_VM_CPU_MODEL:
1027 ret = kvm_s390_set_cpu_model(kvm, attr);
1029 case KVM_S390_VM_CRYPTO:
1030 ret = kvm_s390_vm_set_crypto(kvm, attr);
1040 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1044 switch (attr->group) {
1045 case KVM_S390_VM_MEM_CTRL:
1046 ret = kvm_s390_get_mem_control(kvm, attr);
1048 case KVM_S390_VM_TOD:
1049 ret = kvm_s390_get_tod(kvm, attr);
1051 case KVM_S390_VM_CPU_MODEL:
1052 ret = kvm_s390_get_cpu_model(kvm, attr);
1062 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1066 switch (attr->group) {
1067 case KVM_S390_VM_MEM_CTRL:
1068 switch (attr->attr) {
1069 case KVM_S390_VM_MEM_ENABLE_CMMA:
1070 case KVM_S390_VM_MEM_CLR_CMMA:
1071 ret = sclp.has_cmma ? 0 : -ENXIO;
1073 case KVM_S390_VM_MEM_LIMIT_SIZE:
1081 case KVM_S390_VM_TOD:
1082 switch (attr->attr) {
1083 case KVM_S390_VM_TOD_LOW:
1084 case KVM_S390_VM_TOD_HIGH:
1092 case KVM_S390_VM_CPU_MODEL:
1093 switch (attr->attr) {
1094 case KVM_S390_VM_CPU_PROCESSOR:
1095 case KVM_S390_VM_CPU_MACHINE:
1096 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1097 case KVM_S390_VM_CPU_MACHINE_FEAT:
1098 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1101 /* configuring subfunctions is not supported yet */
1102 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1108 case KVM_S390_VM_CRYPTO:
1109 switch (attr->attr) {
1110 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1111 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1112 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1113 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1129 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1135 if (args->flags != 0)
1138 /* Is this guest using storage keys? */
1139 if (!mm_use_skey(current->mm))
1140 return KVM_S390_GET_SKEYS_NONE;
1142 /* Enforce sane limit on memory allocation */
1143 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1146 keys = kmalloc_array(args->count, sizeof(uint8_t),
1147 GFP_KERNEL | __GFP_NOWARN);
1149 keys = vmalloc(sizeof(uint8_t) * args->count);
1153 down_read(¤t->mm->mmap_sem);
1154 for (i = 0; i < args->count; i++) {
1155 hva = gfn_to_hva(kvm, args->start_gfn + i);
1156 if (kvm_is_error_hva(hva)) {
1161 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1165 up_read(¤t->mm->mmap_sem);
1168 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1169 sizeof(uint8_t) * args->count);
1178 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1184 if (args->flags != 0)
1187 /* Enforce sane limit on memory allocation */
1188 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1191 keys = kmalloc_array(args->count, sizeof(uint8_t),
1192 GFP_KERNEL | __GFP_NOWARN);
1194 keys = vmalloc(sizeof(uint8_t) * args->count);
1198 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1199 sizeof(uint8_t) * args->count);
1205 /* Enable storage key handling for the guest */
1206 r = s390_enable_skey();
1210 down_read(¤t->mm->mmap_sem);
1211 for (i = 0; i < args->count; i++) {
1212 hva = gfn_to_hva(kvm, args->start_gfn + i);
1213 if (kvm_is_error_hva(hva)) {
1218 /* Lowest order bit is reserved */
1219 if (keys[i] & 0x01) {
1224 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1228 up_read(¤t->mm->mmap_sem);
1234 long kvm_arch_vm_ioctl(struct file *filp,
1235 unsigned int ioctl, unsigned long arg)
1237 struct kvm *kvm = filp->private_data;
1238 void __user *argp = (void __user *)arg;
1239 struct kvm_device_attr attr;
1243 case KVM_S390_INTERRUPT: {
1244 struct kvm_s390_interrupt s390int;
1247 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1249 r = kvm_s390_inject_vm(kvm, &s390int);
1252 case KVM_ENABLE_CAP: {
1253 struct kvm_enable_cap cap;
1255 if (copy_from_user(&cap, argp, sizeof(cap)))
1257 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1260 case KVM_CREATE_IRQCHIP: {
1261 struct kvm_irq_routing_entry routing;
1264 if (kvm->arch.use_irqchip) {
1265 /* Set up dummy routing. */
1266 memset(&routing, 0, sizeof(routing));
1267 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1271 case KVM_SET_DEVICE_ATTR: {
1273 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1275 r = kvm_s390_vm_set_attr(kvm, &attr);
1278 case KVM_GET_DEVICE_ATTR: {
1280 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1282 r = kvm_s390_vm_get_attr(kvm, &attr);
1285 case KVM_HAS_DEVICE_ATTR: {
1287 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1289 r = kvm_s390_vm_has_attr(kvm, &attr);
1292 case KVM_S390_GET_SKEYS: {
1293 struct kvm_s390_skeys args;
1296 if (copy_from_user(&args, argp,
1297 sizeof(struct kvm_s390_skeys)))
1299 r = kvm_s390_get_skeys(kvm, &args);
1302 case KVM_S390_SET_SKEYS: {
1303 struct kvm_s390_skeys args;
1306 if (copy_from_user(&args, argp,
1307 sizeof(struct kvm_s390_skeys)))
1309 r = kvm_s390_set_skeys(kvm, &args);
1319 static int kvm_s390_query_ap_config(u8 *config)
1321 u32 fcn_code = 0x04000000UL;
1324 memset(config, 0, 128);
1328 ".long 0xb2af0000\n" /* PQAP(QCI) */
1334 : "r" (fcn_code), "r" (config)
1335 : "cc", "0", "2", "memory"
1341 static int kvm_s390_apxa_installed(void)
1346 if (test_facility(12)) {
1347 cc = kvm_s390_query_ap_config(config);
1350 pr_err("PQAP(QCI) failed with cc=%d", cc);
1352 return config[0] & 0x40;
1358 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1360 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1362 if (kvm_s390_apxa_installed())
1363 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1365 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1368 static u64 kvm_s390_get_initial_cpuid(void)
1373 cpuid.version = 0xff;
1374 return *((u64 *) &cpuid);
1377 static void kvm_s390_crypto_init(struct kvm *kvm)
1379 if (!test_kvm_facility(kvm, 76))
1382 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1383 kvm_s390_set_crycb_format(kvm);
1385 /* Enable AES/DEA protected key functions by default */
1386 kvm->arch.crypto.aes_kw = 1;
1387 kvm->arch.crypto.dea_kw = 1;
1388 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1389 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1390 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1391 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1394 static void sca_dispose(struct kvm *kvm)
1396 if (kvm->arch.use_esca)
1397 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1399 free_page((unsigned long)(kvm->arch.sca));
1400 kvm->arch.sca = NULL;
1403 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1405 gfp_t alloc_flags = GFP_KERNEL;
1407 char debug_name[16];
1408 static unsigned long sca_offset;
1411 #ifdef CONFIG_KVM_S390_UCONTROL
1412 if (type & ~KVM_VM_S390_UCONTROL)
1414 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1421 rc = s390_enable_sie();
1427 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1429 kvm->arch.use_esca = 0; /* start with basic SCA */
1430 if (!sclp.has_64bscao)
1431 alloc_flags |= GFP_DMA;
1432 rwlock_init(&kvm->arch.sca_lock);
1433 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1436 mutex_lock(&kvm_lock);
1438 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1440 kvm->arch.sca = (struct bsca_block *)
1441 ((char *) kvm->arch.sca + sca_offset);
1442 mutex_unlock(&kvm_lock);
1444 sprintf(debug_name, "kvm-%u", current->pid);
1446 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1450 kvm->arch.sie_page2 =
1451 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1452 if (!kvm->arch.sie_page2)
1455 /* Populate the facility mask initially. */
1456 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1457 sizeof(S390_lowcore.stfle_fac_list));
1458 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1459 if (i < kvm_s390_fac_list_mask_size())
1460 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1462 kvm->arch.model.fac_mask[i] = 0UL;
1465 /* Populate the facility list initially. */
1466 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1467 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1468 S390_ARCH_FAC_LIST_SIZE_BYTE);
1470 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1471 set_kvm_facility(kvm->arch.model.fac_list, 74);
1473 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1474 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1476 kvm_s390_crypto_init(kvm);
1478 spin_lock_init(&kvm->arch.float_int.lock);
1479 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1480 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1481 init_waitqueue_head(&kvm->arch.ipte_wq);
1482 mutex_init(&kvm->arch.ipte_mutex);
1484 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1485 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1487 if (type & KVM_VM_S390_UCONTROL) {
1488 kvm->arch.gmap = NULL;
1489 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1491 if (sclp.hamax == U64_MAX)
1492 kvm->arch.mem_limit = TASK_MAX_SIZE;
1494 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1496 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1497 if (!kvm->arch.gmap)
1499 kvm->arch.gmap->private = kvm;
1500 kvm->arch.gmap->pfault_enabled = 0;
1503 kvm->arch.css_support = 0;
1504 kvm->arch.use_irqchip = 0;
1505 kvm->arch.epoch = 0;
1507 spin_lock_init(&kvm->arch.start_stop_lock);
1508 kvm_s390_vsie_init(kvm);
1509 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1513 free_page((unsigned long)kvm->arch.sie_page2);
1514 debug_unregister(kvm->arch.dbf);
1516 KVM_EVENT(3, "creation of vm failed: %d", rc);
1520 bool kvm_arch_has_vcpu_debugfs(void)
1525 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1530 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1532 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1533 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1534 kvm_s390_clear_local_irqs(vcpu);
1535 kvm_clear_async_pf_completion_queue(vcpu);
1536 if (!kvm_is_ucontrol(vcpu->kvm))
1539 if (kvm_is_ucontrol(vcpu->kvm))
1540 gmap_remove(vcpu->arch.gmap);
1542 if (vcpu->kvm->arch.use_cmma)
1543 kvm_s390_vcpu_unsetup_cmma(vcpu);
1544 free_page((unsigned long)(vcpu->arch.sie_block));
1546 kvm_vcpu_uninit(vcpu);
1547 kmem_cache_free(kvm_vcpu_cache, vcpu);
1550 static void kvm_free_vcpus(struct kvm *kvm)
1553 struct kvm_vcpu *vcpu;
1555 kvm_for_each_vcpu(i, vcpu, kvm)
1556 kvm_arch_vcpu_destroy(vcpu);
1558 mutex_lock(&kvm->lock);
1559 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1560 kvm->vcpus[i] = NULL;
1562 atomic_set(&kvm->online_vcpus, 0);
1563 mutex_unlock(&kvm->lock);
1566 void kvm_arch_destroy_vm(struct kvm *kvm)
1568 kvm_free_vcpus(kvm);
1570 debug_unregister(kvm->arch.dbf);
1571 free_page((unsigned long)kvm->arch.sie_page2);
1572 if (!kvm_is_ucontrol(kvm))
1573 gmap_remove(kvm->arch.gmap);
1574 kvm_s390_destroy_adapters(kvm);
1575 kvm_s390_clear_float_irqs(kvm);
1576 kvm_s390_vsie_destroy(kvm);
1577 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1580 /* Section: vcpu related */
1581 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1583 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1584 if (!vcpu->arch.gmap)
1586 vcpu->arch.gmap->private = vcpu->kvm;
1591 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1593 if (!kvm_s390_use_sca_entries())
1595 read_lock(&vcpu->kvm->arch.sca_lock);
1596 if (vcpu->kvm->arch.use_esca) {
1597 struct esca_block *sca = vcpu->kvm->arch.sca;
1599 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1600 sca->cpu[vcpu->vcpu_id].sda = 0;
1602 struct bsca_block *sca = vcpu->kvm->arch.sca;
1604 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1605 sca->cpu[vcpu->vcpu_id].sda = 0;
1607 read_unlock(&vcpu->kvm->arch.sca_lock);
1610 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1612 if (!kvm_s390_use_sca_entries()) {
1613 struct bsca_block *sca = vcpu->kvm->arch.sca;
1615 /* we still need the basic sca for the ipte control */
1616 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1617 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1620 read_lock(&vcpu->kvm->arch.sca_lock);
1621 if (vcpu->kvm->arch.use_esca) {
1622 struct esca_block *sca = vcpu->kvm->arch.sca;
1624 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1625 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1626 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1627 vcpu->arch.sie_block->ecb2 |= 0x04U;
1628 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1630 struct bsca_block *sca = vcpu->kvm->arch.sca;
1632 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1633 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1634 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1635 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1637 read_unlock(&vcpu->kvm->arch.sca_lock);
1640 /* Basic SCA to Extended SCA data copy routines */
1641 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1644 d->sigp_ctrl.c = s->sigp_ctrl.c;
1645 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1648 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1652 d->ipte_control = s->ipte_control;
1654 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1655 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1658 static int sca_switch_to_extended(struct kvm *kvm)
1660 struct bsca_block *old_sca = kvm->arch.sca;
1661 struct esca_block *new_sca;
1662 struct kvm_vcpu *vcpu;
1663 unsigned int vcpu_idx;
1666 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1670 scaoh = (u32)((u64)(new_sca) >> 32);
1671 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1673 kvm_s390_vcpu_block_all(kvm);
1674 write_lock(&kvm->arch.sca_lock);
1676 sca_copy_b_to_e(new_sca, old_sca);
1678 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1679 vcpu->arch.sie_block->scaoh = scaoh;
1680 vcpu->arch.sie_block->scaol = scaol;
1681 vcpu->arch.sie_block->ecb2 |= 0x04U;
1683 kvm->arch.sca = new_sca;
1684 kvm->arch.use_esca = 1;
1686 write_unlock(&kvm->arch.sca_lock);
1687 kvm_s390_vcpu_unblock_all(kvm);
1689 free_page((unsigned long)old_sca);
1691 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1692 old_sca, kvm->arch.sca);
1696 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1700 if (!kvm_s390_use_sca_entries()) {
1701 if (id < KVM_MAX_VCPUS)
1705 if (id < KVM_S390_BSCA_CPU_SLOTS)
1707 if (!sclp.has_esca || !sclp.has_64bscao)
1710 mutex_lock(&kvm->lock);
1711 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1712 mutex_unlock(&kvm->lock);
1714 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1717 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1719 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1720 kvm_clear_async_pf_completion_queue(vcpu);
1721 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1727 kvm_s390_set_prefix(vcpu, 0);
1728 if (test_kvm_facility(vcpu->kvm, 64))
1729 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1730 if (test_kvm_facility(vcpu->kvm, 82))
1731 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
1732 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1733 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1736 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1738 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1740 if (kvm_is_ucontrol(vcpu->kvm))
1741 return __kvm_ucontrol_vcpu_init(vcpu);
1746 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1747 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1749 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1750 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1751 vcpu->arch.cputm_start = get_tod_clock_fast();
1752 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1755 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1756 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1758 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1759 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1760 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1761 vcpu->arch.cputm_start = 0;
1762 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1765 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1766 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1768 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1769 vcpu->arch.cputm_enabled = true;
1770 __start_cpu_timer_accounting(vcpu);
1773 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1774 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1776 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1777 __stop_cpu_timer_accounting(vcpu);
1778 vcpu->arch.cputm_enabled = false;
1781 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1783 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1784 __enable_cpu_timer_accounting(vcpu);
1788 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1790 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1791 __disable_cpu_timer_accounting(vcpu);
1795 /* set the cpu timer - may only be called from the VCPU thread itself */
1796 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1798 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1799 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1800 if (vcpu->arch.cputm_enabled)
1801 vcpu->arch.cputm_start = get_tod_clock_fast();
1802 vcpu->arch.sie_block->cputm = cputm;
1803 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1807 /* update and get the cpu timer - can also be called from other VCPU threads */
1808 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1813 if (unlikely(!vcpu->arch.cputm_enabled))
1814 return vcpu->arch.sie_block->cputm;
1816 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1818 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1820 * If the writer would ever execute a read in the critical
1821 * section, e.g. in irq context, we have a deadlock.
1823 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1824 value = vcpu->arch.sie_block->cputm;
1825 /* if cputm_start is 0, accounting is being started/stopped */
1826 if (likely(vcpu->arch.cputm_start))
1827 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1828 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1833 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1835 /* Save host register state */
1837 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1838 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1841 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1843 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1844 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1845 if (test_fp_ctl(current->thread.fpu.fpc))
1846 /* User space provided an invalid FPC, let's clear it */
1847 current->thread.fpu.fpc = 0;
1848 save_access_regs(vcpu->arch.host_acrs);
1849 restore_access_regs(vcpu->run->s.regs.acrs);
1850 gmap_enable(vcpu->arch.enabled_gmap);
1851 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1852 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1853 __start_cpu_timer_accounting(vcpu);
1857 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1860 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1861 __stop_cpu_timer_accounting(vcpu);
1862 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1863 vcpu->arch.enabled_gmap = gmap_get_enabled();
1864 gmap_disable(vcpu->arch.enabled_gmap);
1866 /* Save guest register state */
1868 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1870 /* Restore host register state */
1871 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1872 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1874 save_access_regs(vcpu->run->s.regs.acrs);
1875 restore_access_regs(vcpu->arch.host_acrs);
1878 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1880 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1881 vcpu->arch.sie_block->gpsw.mask = 0UL;
1882 vcpu->arch.sie_block->gpsw.addr = 0UL;
1883 kvm_s390_set_prefix(vcpu, 0);
1884 kvm_s390_set_cpu_timer(vcpu, 0);
1885 vcpu->arch.sie_block->ckc = 0UL;
1886 vcpu->arch.sie_block->todpr = 0;
1887 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1888 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1889 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1890 /* make sure the new fpc will be lazily loaded */
1892 current->thread.fpu.fpc = 0;
1893 vcpu->arch.sie_block->gbea = 1;
1894 vcpu->arch.sie_block->pp = 0;
1895 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
1896 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1897 kvm_clear_async_pf_completion_queue(vcpu);
1898 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1899 kvm_s390_vcpu_stop(vcpu);
1900 kvm_s390_clear_local_irqs(vcpu);
1903 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1905 mutex_lock(&vcpu->kvm->lock);
1907 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1909 mutex_unlock(&vcpu->kvm->lock);
1910 if (!kvm_is_ucontrol(vcpu->kvm)) {
1911 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1914 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1915 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1916 /* make vcpu_load load the right gmap on the first trigger */
1917 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1920 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1922 if (!test_kvm_facility(vcpu->kvm, 76))
1925 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1927 if (vcpu->kvm->arch.crypto.aes_kw)
1928 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1929 if (vcpu->kvm->arch.crypto.dea_kw)
1930 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1932 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1935 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1937 free_page(vcpu->arch.sie_block->cbrlo);
1938 vcpu->arch.sie_block->cbrlo = 0;
1941 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1943 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1944 if (!vcpu->arch.sie_block->cbrlo)
1947 vcpu->arch.sie_block->ecb2 |= 0x80;
1948 vcpu->arch.sie_block->ecb2 &= ~0x08;
1952 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1954 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1956 vcpu->arch.sie_block->ibc = model->ibc;
1957 if (test_kvm_facility(vcpu->kvm, 7))
1958 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1961 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1965 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1969 if (test_kvm_facility(vcpu->kvm, 78))
1970 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1971 else if (test_kvm_facility(vcpu->kvm, 8))
1972 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1974 kvm_s390_vcpu_setup_model(vcpu);
1976 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1977 if (MACHINE_HAS_ESOP)
1978 vcpu->arch.sie_block->ecb |= 0x02;
1979 if (test_kvm_facility(vcpu->kvm, 9))
1980 vcpu->arch.sie_block->ecb |= 0x04;
1981 if (test_kvm_facility(vcpu->kvm, 73))
1982 vcpu->arch.sie_block->ecb |= 0x10;
1984 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1985 vcpu->arch.sie_block->ecb2 |= 0x08;
1986 vcpu->arch.sie_block->eca = 0x1002000U;
1988 vcpu->arch.sie_block->eca |= 0x80000000U;
1990 vcpu->arch.sie_block->eca |= 0x40000000U;
1992 vcpu->arch.sie_block->eca |= 1;
1993 if (sclp.has_sigpif)
1994 vcpu->arch.sie_block->eca |= 0x10000000U;
1995 if (test_kvm_facility(vcpu->kvm, 129)) {
1996 vcpu->arch.sie_block->eca |= 0x00020000;
1997 vcpu->arch.sie_block->ecd |= 0x20000000;
1999 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2000 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2002 if (vcpu->kvm->arch.use_cmma) {
2003 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2007 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2008 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2010 kvm_s390_vcpu_crypto_setup(vcpu);
2015 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2018 struct kvm_vcpu *vcpu;
2019 struct sie_page *sie_page;
2022 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2027 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2031 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2035 vcpu->arch.sie_block = &sie_page->sie_block;
2036 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2038 /* the real guest size will always be smaller than msl */
2039 vcpu->arch.sie_block->mso = 0;
2040 vcpu->arch.sie_block->msl = sclp.hamax;
2042 vcpu->arch.sie_block->icpua = id;
2043 spin_lock_init(&vcpu->arch.local_int.lock);
2044 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2045 vcpu->arch.local_int.wq = &vcpu->wq;
2046 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2047 seqcount_init(&vcpu->arch.cputm_seqcount);
2049 rc = kvm_vcpu_init(vcpu, kvm, id);
2051 goto out_free_sie_block;
2052 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2053 vcpu->arch.sie_block);
2054 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2058 free_page((unsigned long)(vcpu->arch.sie_block));
2060 kmem_cache_free(kvm_vcpu_cache, vcpu);
2065 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2067 return kvm_s390_vcpu_has_irq(vcpu, 0);
2070 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2072 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2076 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2078 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2081 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2083 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2087 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2089 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2093 * Kick a guest cpu out of SIE and wait until SIE is not running.
2094 * If the CPU is not running (e.g. waiting as idle) the function will
2095 * return immediately. */
2096 void exit_sie(struct kvm_vcpu *vcpu)
2098 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2099 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2103 /* Kick a guest cpu out of SIE to process a request synchronously */
2104 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2106 kvm_make_request(req, vcpu);
2107 kvm_s390_vcpu_request(vcpu);
2110 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2113 struct kvm *kvm = gmap->private;
2114 struct kvm_vcpu *vcpu;
2115 unsigned long prefix;
2118 if (gmap_is_shadow(gmap))
2120 if (start >= 1UL << 31)
2121 /* We are only interested in prefix pages */
2123 kvm_for_each_vcpu(i, vcpu, kvm) {
2124 /* match against both prefix pages */
2125 prefix = kvm_s390_get_prefix(vcpu);
2126 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2127 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2129 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2134 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2136 /* kvm common code refers to this, but never calls it */
2141 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2142 struct kvm_one_reg *reg)
2147 case KVM_REG_S390_TODPR:
2148 r = put_user(vcpu->arch.sie_block->todpr,
2149 (u32 __user *)reg->addr);
2151 case KVM_REG_S390_EPOCHDIFF:
2152 r = put_user(vcpu->arch.sie_block->epoch,
2153 (u64 __user *)reg->addr);
2155 case KVM_REG_S390_CPU_TIMER:
2156 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2157 (u64 __user *)reg->addr);
2159 case KVM_REG_S390_CLOCK_COMP:
2160 r = put_user(vcpu->arch.sie_block->ckc,
2161 (u64 __user *)reg->addr);
2163 case KVM_REG_S390_PFTOKEN:
2164 r = put_user(vcpu->arch.pfault_token,
2165 (u64 __user *)reg->addr);
2167 case KVM_REG_S390_PFCOMPARE:
2168 r = put_user(vcpu->arch.pfault_compare,
2169 (u64 __user *)reg->addr);
2171 case KVM_REG_S390_PFSELECT:
2172 r = put_user(vcpu->arch.pfault_select,
2173 (u64 __user *)reg->addr);
2175 case KVM_REG_S390_PP:
2176 r = put_user(vcpu->arch.sie_block->pp,
2177 (u64 __user *)reg->addr);
2179 case KVM_REG_S390_GBEA:
2180 r = put_user(vcpu->arch.sie_block->gbea,
2181 (u64 __user *)reg->addr);
2190 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2191 struct kvm_one_reg *reg)
2197 case KVM_REG_S390_TODPR:
2198 r = get_user(vcpu->arch.sie_block->todpr,
2199 (u32 __user *)reg->addr);
2201 case KVM_REG_S390_EPOCHDIFF:
2202 r = get_user(vcpu->arch.sie_block->epoch,
2203 (u64 __user *)reg->addr);
2205 case KVM_REG_S390_CPU_TIMER:
2206 r = get_user(val, (u64 __user *)reg->addr);
2208 kvm_s390_set_cpu_timer(vcpu, val);
2210 case KVM_REG_S390_CLOCK_COMP:
2211 r = get_user(vcpu->arch.sie_block->ckc,
2212 (u64 __user *)reg->addr);
2214 case KVM_REG_S390_PFTOKEN:
2215 r = get_user(vcpu->arch.pfault_token,
2216 (u64 __user *)reg->addr);
2217 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2218 kvm_clear_async_pf_completion_queue(vcpu);
2220 case KVM_REG_S390_PFCOMPARE:
2221 r = get_user(vcpu->arch.pfault_compare,
2222 (u64 __user *)reg->addr);
2224 case KVM_REG_S390_PFSELECT:
2225 r = get_user(vcpu->arch.pfault_select,
2226 (u64 __user *)reg->addr);
2228 case KVM_REG_S390_PP:
2229 r = get_user(vcpu->arch.sie_block->pp,
2230 (u64 __user *)reg->addr);
2232 case KVM_REG_S390_GBEA:
2233 r = get_user(vcpu->arch.sie_block->gbea,
2234 (u64 __user *)reg->addr);
2243 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2245 kvm_s390_vcpu_initial_reset(vcpu);
2249 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2251 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2255 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2257 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2261 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2262 struct kvm_sregs *sregs)
2264 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2265 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2266 restore_access_regs(vcpu->run->s.regs.acrs);
2270 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2271 struct kvm_sregs *sregs)
2273 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2274 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2278 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2280 /* make sure the new values will be lazily loaded */
2282 if (test_fp_ctl(fpu->fpc))
2284 current->thread.fpu.fpc = fpu->fpc;
2286 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2287 (freg_t *) fpu->fprs);
2289 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2293 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2295 /* make sure we have the latest values */
2298 convert_vx_to_fp((freg_t *) fpu->fprs,
2299 (__vector128 *) vcpu->run->s.regs.vrs);
2301 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2302 fpu->fpc = current->thread.fpu.fpc;
2306 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2310 if (!is_vcpu_stopped(vcpu))
2313 vcpu->run->psw_mask = psw.mask;
2314 vcpu->run->psw_addr = psw.addr;
2319 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2320 struct kvm_translation *tr)
2322 return -EINVAL; /* not implemented yet */
2325 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2326 KVM_GUESTDBG_USE_HW_BP | \
2327 KVM_GUESTDBG_ENABLE)
2329 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2330 struct kvm_guest_debug *dbg)
2334 vcpu->guest_debug = 0;
2335 kvm_s390_clear_bp_data(vcpu);
2337 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2339 if (!sclp.has_gpere)
2342 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2343 vcpu->guest_debug = dbg->control;
2344 /* enforce guest PER */
2345 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2347 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2348 rc = kvm_s390_import_bp_data(vcpu, dbg);
2350 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2351 vcpu->arch.guestdbg.last_bp = 0;
2355 vcpu->guest_debug = 0;
2356 kvm_s390_clear_bp_data(vcpu);
2357 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2363 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2364 struct kvm_mp_state *mp_state)
2366 /* CHECK_STOP and LOAD are not supported yet */
2367 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2368 KVM_MP_STATE_OPERATING;
2371 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2372 struct kvm_mp_state *mp_state)
2376 /* user space knows about this interface - let it control the state */
2377 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2379 switch (mp_state->mp_state) {
2380 case KVM_MP_STATE_STOPPED:
2381 kvm_s390_vcpu_stop(vcpu);
2383 case KVM_MP_STATE_OPERATING:
2384 kvm_s390_vcpu_start(vcpu);
2386 case KVM_MP_STATE_LOAD:
2387 case KVM_MP_STATE_CHECK_STOP:
2388 /* fall through - CHECK_STOP and LOAD are not supported yet */
2396 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2398 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2401 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2404 kvm_s390_vcpu_request_handled(vcpu);
2405 if (!vcpu->requests)
2408 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2409 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2410 * This ensures that the ipte instruction for this request has
2411 * already finished. We might race against a second unmapper that
2412 * wants to set the blocking bit. Lets just retry the request loop.
2414 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2416 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2417 kvm_s390_get_prefix(vcpu),
2418 PAGE_SIZE * 2, PROT_WRITE);
2420 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2426 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2427 vcpu->arch.sie_block->ihcpu = 0xffff;
2431 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2432 if (!ibs_enabled(vcpu)) {
2433 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2434 atomic_or(CPUSTAT_IBS,
2435 &vcpu->arch.sie_block->cpuflags);
2440 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2441 if (ibs_enabled(vcpu)) {
2442 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2443 atomic_andnot(CPUSTAT_IBS,
2444 &vcpu->arch.sie_block->cpuflags);
2449 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2450 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2454 /* nothing to do, just clear the request */
2455 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2460 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2462 struct kvm_vcpu *vcpu;
2465 mutex_lock(&kvm->lock);
2467 kvm->arch.epoch = tod - get_tod_clock();
2468 kvm_s390_vcpu_block_all(kvm);
2469 kvm_for_each_vcpu(i, vcpu, kvm)
2470 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2471 kvm_s390_vcpu_unblock_all(kvm);
2473 mutex_unlock(&kvm->lock);
2477 * kvm_arch_fault_in_page - fault-in guest page if necessary
2478 * @vcpu: The corresponding virtual cpu
2479 * @gpa: Guest physical address
2480 * @writable: Whether the page should be writable or not
2482 * Make sure that a guest page has been faulted-in on the host.
2484 * Return: Zero on success, negative error code otherwise.
2486 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2488 return gmap_fault(vcpu->arch.gmap, gpa,
2489 writable ? FAULT_FLAG_WRITE : 0);
2492 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2493 unsigned long token)
2495 struct kvm_s390_interrupt inti;
2496 struct kvm_s390_irq irq;
2499 irq.u.ext.ext_params2 = token;
2500 irq.type = KVM_S390_INT_PFAULT_INIT;
2501 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2503 inti.type = KVM_S390_INT_PFAULT_DONE;
2504 inti.parm64 = token;
2505 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2509 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2510 struct kvm_async_pf *work)
2512 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2513 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2516 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2517 struct kvm_async_pf *work)
2519 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2520 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2523 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2524 struct kvm_async_pf *work)
2526 /* s390 will always inject the page directly */
2529 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2532 * s390 will always inject the page directly,
2533 * but we still want check_async_completion to cleanup
2538 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2541 struct kvm_arch_async_pf arch;
2544 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2546 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2547 vcpu->arch.pfault_compare)
2549 if (psw_extint_disabled(vcpu))
2551 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2553 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2555 if (!vcpu->arch.gmap->pfault_enabled)
2558 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2559 hva += current->thread.gmap_addr & ~PAGE_MASK;
2560 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2563 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2567 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2572 * On s390 notifications for arriving pages will be delivered directly
2573 * to the guest but the house keeping for completed pfaults is
2574 * handled outside the worker.
2576 kvm_check_async_pf_completion(vcpu);
2578 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2579 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2584 if (test_cpu_flag(CIF_MCCK_PENDING))
2587 if (!kvm_is_ucontrol(vcpu->kvm)) {
2588 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2593 rc = kvm_s390_handle_requests(vcpu);
2597 if (guestdbg_enabled(vcpu)) {
2598 kvm_s390_backup_guest_per_regs(vcpu);
2599 kvm_s390_patch_guest_per_regs(vcpu);
2602 vcpu->arch.sie_block->icptcode = 0;
2603 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2604 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2605 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2610 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2612 struct kvm_s390_pgm_info pgm_info = {
2613 .code = PGM_ADDRESSING,
2618 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2619 trace_kvm_s390_sie_fault(vcpu);
2622 * We want to inject an addressing exception, which is defined as a
2623 * suppressing or terminating exception. However, since we came here
2624 * by a DAT access exception, the PSW still points to the faulting
2625 * instruction since DAT exceptions are nullifying. So we've got
2626 * to look up the current opcode to get the length of the instruction
2627 * to be able to forward the PSW.
2629 rc = read_guest_instr(vcpu, &opcode, 1);
2630 ilen = insn_length(opcode);
2634 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2635 * Forward by arbitrary ilc, injection will take care of
2636 * nullification if necessary.
2638 pgm_info = vcpu->arch.pgm;
2641 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2642 kvm_s390_forward_psw(vcpu, ilen);
2643 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2646 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2648 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2649 vcpu->arch.sie_block->icptcode);
2650 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2652 if (guestdbg_enabled(vcpu))
2653 kvm_s390_restore_guest_per_regs(vcpu);
2655 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2656 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2658 if (vcpu->arch.sie_block->icptcode > 0) {
2659 int rc = kvm_handle_sie_intercept(vcpu);
2661 if (rc != -EOPNOTSUPP)
2663 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2664 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2665 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2666 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2668 } else if (exit_reason != -EFAULT) {
2669 vcpu->stat.exit_null++;
2671 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2672 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2673 vcpu->run->s390_ucontrol.trans_exc_code =
2674 current->thread.gmap_addr;
2675 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2677 } else if (current->thread.gmap_pfault) {
2678 trace_kvm_s390_major_guest_pfault(vcpu);
2679 current->thread.gmap_pfault = 0;
2680 if (kvm_arch_setup_async_pf(vcpu))
2682 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2684 return vcpu_post_run_fault_in_sie(vcpu);
2687 static int __vcpu_run(struct kvm_vcpu *vcpu)
2689 int rc, exit_reason;
2692 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2693 * ning the guest), so that memslots (and other stuff) are protected
2695 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2698 rc = vcpu_pre_run(vcpu);
2702 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2704 * As PF_VCPU will be used in fault handler, between
2705 * guest_enter and guest_exit should be no uaccess.
2707 local_irq_disable();
2708 guest_enter_irqoff();
2709 __disable_cpu_timer_accounting(vcpu);
2711 exit_reason = sie64a(vcpu->arch.sie_block,
2712 vcpu->run->s.regs.gprs);
2713 local_irq_disable();
2714 __enable_cpu_timer_accounting(vcpu);
2715 guest_exit_irqoff();
2717 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2719 rc = vcpu_post_run(vcpu, exit_reason);
2720 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2722 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2726 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2728 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2729 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2730 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2731 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2732 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2733 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2734 /* some control register changes require a tlb flush */
2735 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2737 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2738 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2739 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2740 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2741 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2742 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2744 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2745 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2746 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2747 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2748 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2749 kvm_clear_async_pf_completion_queue(vcpu);
2752 * If userspace sets the riccb (e.g. after migration) to a valid state,
2753 * we should enable RI here instead of doing the lazy enablement.
2755 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2756 test_kvm_facility(vcpu->kvm, 64)) {
2757 struct runtime_instr_cb *riccb =
2758 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2761 vcpu->arch.sie_block->ecb3 |= 0x01;
2763 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
2764 test_kvm_facility(vcpu->kvm, 82)) {
2765 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2766 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
2769 kvm_run->kvm_dirty_regs = 0;
2772 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2774 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2775 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2776 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2777 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2778 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2779 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2780 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2781 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2782 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2783 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2784 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2785 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2786 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
2789 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2794 if (guestdbg_exit_pending(vcpu)) {
2795 kvm_s390_prepare_debug_exit(vcpu);
2799 if (vcpu->sigset_active)
2800 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2802 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2803 kvm_s390_vcpu_start(vcpu);
2804 } else if (is_vcpu_stopped(vcpu)) {
2805 pr_err_ratelimited("can't run stopped vcpu %d\n",
2810 sync_regs(vcpu, kvm_run);
2811 enable_cpu_timer_accounting(vcpu);
2814 rc = __vcpu_run(vcpu);
2816 if (signal_pending(current) && !rc) {
2817 kvm_run->exit_reason = KVM_EXIT_INTR;
2821 if (guestdbg_exit_pending(vcpu) && !rc) {
2822 kvm_s390_prepare_debug_exit(vcpu);
2826 if (rc == -EREMOTE) {
2827 /* userspace support is needed, kvm_run has been prepared */
2831 disable_cpu_timer_accounting(vcpu);
2832 store_regs(vcpu, kvm_run);
2834 if (vcpu->sigset_active)
2835 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2837 vcpu->stat.exit_userspace++;
2842 * store status at address
2843 * we use have two special cases:
2844 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2845 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2847 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2849 unsigned char archmode = 1;
2850 freg_t fprs[NUM_FPRS];
2855 px = kvm_s390_get_prefix(vcpu);
2856 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2857 if (write_guest_abs(vcpu, 163, &archmode, 1))
2860 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2861 if (write_guest_real(vcpu, 163, &archmode, 1))
2865 gpa -= __LC_FPREGS_SAVE_AREA;
2867 /* manually convert vector registers if necessary */
2868 if (MACHINE_HAS_VX) {
2869 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2870 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2873 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2874 vcpu->run->s.regs.fprs, 128);
2876 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2877 vcpu->run->s.regs.gprs, 128);
2878 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2879 &vcpu->arch.sie_block->gpsw, 16);
2880 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2882 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2883 &vcpu->run->s.regs.fpc, 4);
2884 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2885 &vcpu->arch.sie_block->todpr, 4);
2886 cputm = kvm_s390_get_cpu_timer(vcpu);
2887 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2889 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2890 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2892 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2893 &vcpu->run->s.regs.acrs, 64);
2894 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2895 &vcpu->arch.sie_block->gcr, 128);
2896 return rc ? -EFAULT : 0;
2899 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2902 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2903 * copying in vcpu load/put. Lets update our copies before we save
2904 * it into the save area
2907 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2908 save_access_regs(vcpu->run->s.regs.acrs);
2910 return kvm_s390_store_status_unloaded(vcpu, addr);
2913 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2915 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2916 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2919 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2922 struct kvm_vcpu *vcpu;
2924 kvm_for_each_vcpu(i, vcpu, kvm) {
2925 __disable_ibs_on_vcpu(vcpu);
2929 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2933 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2934 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2937 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2939 int i, online_vcpus, started_vcpus = 0;
2941 if (!is_vcpu_stopped(vcpu))
2944 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2945 /* Only one cpu at a time may enter/leave the STOPPED state. */
2946 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2947 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2949 for (i = 0; i < online_vcpus; i++) {
2950 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2954 if (started_vcpus == 0) {
2955 /* we're the only active VCPU -> speed it up */
2956 __enable_ibs_on_vcpu(vcpu);
2957 } else if (started_vcpus == 1) {
2959 * As we are starting a second VCPU, we have to disable
2960 * the IBS facility on all VCPUs to remove potentially
2961 * oustanding ENABLE requests.
2963 __disable_ibs_on_all_vcpus(vcpu->kvm);
2966 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2968 * Another VCPU might have used IBS while we were offline.
2969 * Let's play safe and flush the VCPU at startup.
2971 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2972 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2976 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2978 int i, online_vcpus, started_vcpus = 0;
2979 struct kvm_vcpu *started_vcpu = NULL;
2981 if (is_vcpu_stopped(vcpu))
2984 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2985 /* Only one cpu at a time may enter/leave the STOPPED state. */
2986 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2987 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2989 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2990 kvm_s390_clear_stop_irq(vcpu);
2992 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2993 __disable_ibs_on_vcpu(vcpu);
2995 for (i = 0; i < online_vcpus; i++) {
2996 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2998 started_vcpu = vcpu->kvm->vcpus[i];
3002 if (started_vcpus == 1) {
3004 * As we only have one VCPU left, we want to enable the
3005 * IBS facility for that VCPU to speed it up.
3007 __enable_ibs_on_vcpu(started_vcpu);
3010 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3014 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3015 struct kvm_enable_cap *cap)
3023 case KVM_CAP_S390_CSS_SUPPORT:
3024 if (!vcpu->kvm->arch.css_support) {
3025 vcpu->kvm->arch.css_support = 1;
3026 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3027 trace_kvm_s390_enable_css(vcpu->kvm);
3038 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3039 struct kvm_s390_mem_op *mop)
3041 void __user *uaddr = (void __user *)mop->buf;
3042 void *tmpbuf = NULL;
3044 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3045 | KVM_S390_MEMOP_F_CHECK_ONLY;
3047 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
3050 if (mop->size > MEM_OP_MAX_SIZE)
3053 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3054 tmpbuf = vmalloc(mop->size);
3059 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3062 case KVM_S390_MEMOP_LOGICAL_READ:
3063 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3064 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3065 mop->size, GACC_FETCH);
3068 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3070 if (copy_to_user(uaddr, tmpbuf, mop->size))
3074 case KVM_S390_MEMOP_LOGICAL_WRITE:
3075 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3076 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3077 mop->size, GACC_STORE);
3080 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3084 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3090 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3092 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3093 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3099 long kvm_arch_vcpu_ioctl(struct file *filp,
3100 unsigned int ioctl, unsigned long arg)
3102 struct kvm_vcpu *vcpu = filp->private_data;
3103 void __user *argp = (void __user *)arg;
3108 case KVM_S390_IRQ: {
3109 struct kvm_s390_irq s390irq;
3112 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3114 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3117 case KVM_S390_INTERRUPT: {
3118 struct kvm_s390_interrupt s390int;
3119 struct kvm_s390_irq s390irq = {};
3122 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3124 if (s390int_to_s390irq(&s390int, &s390irq))
3126 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3129 case KVM_S390_STORE_STATUS:
3130 idx = srcu_read_lock(&vcpu->kvm->srcu);
3131 r = kvm_s390_vcpu_store_status(vcpu, arg);
3132 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3134 case KVM_S390_SET_INITIAL_PSW: {
3138 if (copy_from_user(&psw, argp, sizeof(psw)))
3140 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3143 case KVM_S390_INITIAL_RESET:
3144 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3146 case KVM_SET_ONE_REG:
3147 case KVM_GET_ONE_REG: {
3148 struct kvm_one_reg reg;
3150 if (copy_from_user(®, argp, sizeof(reg)))
3152 if (ioctl == KVM_SET_ONE_REG)
3153 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3155 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3158 #ifdef CONFIG_KVM_S390_UCONTROL
3159 case KVM_S390_UCAS_MAP: {
3160 struct kvm_s390_ucas_mapping ucasmap;
3162 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3167 if (!kvm_is_ucontrol(vcpu->kvm)) {
3172 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3173 ucasmap.vcpu_addr, ucasmap.length);
3176 case KVM_S390_UCAS_UNMAP: {
3177 struct kvm_s390_ucas_mapping ucasmap;
3179 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3184 if (!kvm_is_ucontrol(vcpu->kvm)) {
3189 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3194 case KVM_S390_VCPU_FAULT: {
3195 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3198 case KVM_ENABLE_CAP:
3200 struct kvm_enable_cap cap;
3202 if (copy_from_user(&cap, argp, sizeof(cap)))
3204 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3207 case KVM_S390_MEM_OP: {
3208 struct kvm_s390_mem_op mem_op;
3210 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3211 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3216 case KVM_S390_SET_IRQ_STATE: {
3217 struct kvm_s390_irq_state irq_state;
3220 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3222 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3223 irq_state.len == 0 ||
3224 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3228 r = kvm_s390_set_irq_state(vcpu,
3229 (void __user *) irq_state.buf,
3233 case KVM_S390_GET_IRQ_STATE: {
3234 struct kvm_s390_irq_state irq_state;
3237 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3239 if (irq_state.len == 0) {
3243 r = kvm_s390_get_irq_state(vcpu,
3244 (__u8 __user *) irq_state.buf,
3254 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3256 #ifdef CONFIG_KVM_S390_UCONTROL
3257 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3258 && (kvm_is_ucontrol(vcpu->kvm))) {
3259 vmf->page = virt_to_page(vcpu->arch.sie_block);
3260 get_page(vmf->page);
3264 return VM_FAULT_SIGBUS;
3267 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3268 unsigned long npages)
3273 /* Section: memory related */
3274 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3275 struct kvm_memory_slot *memslot,
3276 const struct kvm_userspace_memory_region *mem,
3277 enum kvm_mr_change change)
3279 /* A few sanity checks. We can have memory slots which have to be
3280 located/ended at a segment boundary (1MB). The memory in userland is
3281 ok to be fragmented into various different vmas. It is okay to mmap()
3282 and munmap() stuff in this slot after doing this call at any time */
3284 if (mem->userspace_addr & 0xffffful)
3287 if (mem->memory_size & 0xffffful)
3290 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3296 void kvm_arch_commit_memory_region(struct kvm *kvm,
3297 const struct kvm_userspace_memory_region *mem,
3298 const struct kvm_memory_slot *old,
3299 const struct kvm_memory_slot *new,
3300 enum kvm_mr_change change)
3306 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3307 old->npages * PAGE_SIZE);
3310 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3311 old->npages * PAGE_SIZE);
3316 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3317 mem->guest_phys_addr, mem->memory_size);
3319 case KVM_MR_FLAGS_ONLY:
3322 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
3325 pr_warn("failed to commit memory region\n");
3329 static inline unsigned long nonhyp_mask(int i)
3331 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3333 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3336 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3338 vcpu->valid_wakeup = false;
3341 static int __init kvm_s390_init(void)
3345 if (!sclp.has_sief2) {
3346 pr_info("SIE not available\n");
3350 for (i = 0; i < 16; i++)
3351 kvm_s390_fac_list_mask[i] |=
3352 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3354 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3357 static void __exit kvm_s390_exit(void)
3362 module_init(kvm_s390_init);
3363 module_exit(kvm_s390_exit);
3366 * Enable autoloading of the kvm module.
3367 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3368 * since x86 takes a different approach.
3370 #include <linux/miscdevice.h>
3371 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3372 MODULE_ALIAS("devname:kvm");