2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
38 #include <asm/pgtable.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
49 #define KMSG_COMPONENT "kvm-s390"
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
53 #define CREATE_TRACE_POINTS
55 #include "trace-s390.h"
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
133 struct kvm_s390_tod_clock_ext {
139 /* allow nested virtualization in KVM (if enabled by user space) */
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
147 unsigned long kvm_s390_fac_list_mask_size(void)
149 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 return ARRAY_SIZE(kvm_s390_fac_list_mask);
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
165 /* every s390 is virtualization enabled ;-) */
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
172 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
177 * The TOD jumps by delta, we have to compensate this by adding
178 * -delta to the epoch.
182 /* sign-extension - we're adding to signed values below */
187 if (scb->ecd & ECD_MEF) {
188 scb->epdx += delta_idx;
189 if (scb->epoch < delta)
195 * This callback is executed during stop_machine(). All CPUs are therefore
196 * temporarily stopped. In order not to change guest behavior, we have to
197 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
198 * so a CPU won't be stopped while calculating with the epoch.
200 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
204 struct kvm_vcpu *vcpu;
206 unsigned long long *delta = v;
208 list_for_each_entry(kvm, &vm_list, vm_list) {
209 kvm_for_each_vcpu(i, vcpu, kvm) {
210 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
212 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
213 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
215 if (vcpu->arch.cputm_enabled)
216 vcpu->arch.cputm_start += *delta;
217 if (vcpu->arch.vsie_block)
218 kvm_clock_sync_scb(vcpu->arch.vsie_block,
225 static struct notifier_block kvm_clock_notifier = {
226 .notifier_call = kvm_clock_sync,
229 int kvm_arch_hardware_setup(void)
231 gmap_notifier.notifier_call = kvm_gmap_notifier;
232 gmap_register_pte_notifier(&gmap_notifier);
233 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
234 gmap_register_pte_notifier(&vsie_gmap_notifier);
235 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
236 &kvm_clock_notifier);
240 void kvm_arch_hardware_unsetup(void)
242 gmap_unregister_pte_notifier(&gmap_notifier);
243 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
244 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
245 &kvm_clock_notifier);
248 static void allow_cpu_feat(unsigned long nr)
250 set_bit_inv(nr, kvm_s390_available_cpu_feat);
253 static inline int plo_test_bit(unsigned char nr)
255 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
259 /* Parameter registers are ignored for "test bit" */
269 static void kvm_s390_cpu_feat_init(void)
273 for (i = 0; i < 256; ++i) {
275 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
278 if (test_facility(28)) /* TOD-clock steering */
279 ptff(kvm_s390_available_subfunc.ptff,
280 sizeof(kvm_s390_available_subfunc.ptff),
283 if (test_facility(17)) { /* MSA */
284 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
285 kvm_s390_available_subfunc.kmac);
286 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
287 kvm_s390_available_subfunc.kmc);
288 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
289 kvm_s390_available_subfunc.km);
290 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
291 kvm_s390_available_subfunc.kimd);
292 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
293 kvm_s390_available_subfunc.klmd);
295 if (test_facility(76)) /* MSA3 */
296 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
297 kvm_s390_available_subfunc.pckmo);
298 if (test_facility(77)) { /* MSA4 */
299 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
300 kvm_s390_available_subfunc.kmctr);
301 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
302 kvm_s390_available_subfunc.kmf);
303 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
304 kvm_s390_available_subfunc.kmo);
305 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
306 kvm_s390_available_subfunc.pcc);
308 if (test_facility(57)) /* MSA5 */
309 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
310 kvm_s390_available_subfunc.ppno);
312 if (test_facility(146)) /* MSA8 */
313 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
314 kvm_s390_available_subfunc.kma);
316 if (MACHINE_HAS_ESOP)
317 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
319 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
320 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
322 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
323 !test_facility(3) || !nested)
325 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
326 if (sclp.has_64bscao)
327 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
329 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
331 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
333 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
335 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
337 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
339 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
341 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
343 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
344 * all skey handling functions read/set the skey from the PGSTE
345 * instead of the real storage key.
347 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
348 * pages being detected as preserved although they are resident.
350 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
351 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
353 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
354 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
355 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
357 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
358 * cannot easily shadow the SCA because of the ipte lock.
362 int kvm_arch_init(void *opaque)
366 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
370 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
372 goto out_debug_unreg;
375 kvm_s390_cpu_feat_init();
377 /* Register floating interrupt controller interface. */
378 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
380 pr_err("Failed to register FLIC rc=%d\n", rc);
381 goto out_debug_unreg;
386 debug_unregister(kvm_s390_dbf);
390 void kvm_arch_exit(void)
392 debug_unregister(kvm_s390_dbf);
395 /* Section: device related */
396 long kvm_arch_dev_ioctl(struct file *filp,
397 unsigned int ioctl, unsigned long arg)
399 if (ioctl == KVM_S390_ENABLE_SIE)
400 return s390_enable_sie();
404 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
409 case KVM_CAP_S390_PSW:
410 case KVM_CAP_S390_GMAP:
411 case KVM_CAP_SYNC_MMU:
412 #ifdef CONFIG_KVM_S390_UCONTROL
413 case KVM_CAP_S390_UCONTROL:
415 case KVM_CAP_ASYNC_PF:
416 case KVM_CAP_SYNC_REGS:
417 case KVM_CAP_ONE_REG:
418 case KVM_CAP_ENABLE_CAP:
419 case KVM_CAP_S390_CSS_SUPPORT:
420 case KVM_CAP_IOEVENTFD:
421 case KVM_CAP_DEVICE_CTRL:
422 case KVM_CAP_ENABLE_CAP_VM:
423 case KVM_CAP_S390_IRQCHIP:
424 case KVM_CAP_VM_ATTRIBUTES:
425 case KVM_CAP_MP_STATE:
426 case KVM_CAP_IMMEDIATE_EXIT:
427 case KVM_CAP_S390_INJECT_IRQ:
428 case KVM_CAP_S390_USER_SIGP:
429 case KVM_CAP_S390_USER_STSI:
430 case KVM_CAP_S390_SKEYS:
431 case KVM_CAP_S390_IRQ_STATE:
432 case KVM_CAP_S390_USER_INSTR0:
433 case KVM_CAP_S390_CMMA_MIGRATION:
434 case KVM_CAP_S390_AIS:
437 case KVM_CAP_S390_MEM_OP:
440 case KVM_CAP_NR_VCPUS:
441 case KVM_CAP_MAX_VCPUS:
442 case KVM_CAP_MAX_VCPU_ID:
443 r = KVM_S390_BSCA_CPU_SLOTS;
444 if (!kvm_s390_use_sca_entries())
446 else if (sclp.has_esca && sclp.has_64bscao)
447 r = KVM_S390_ESCA_CPU_SLOTS;
449 case KVM_CAP_NR_MEMSLOTS:
450 r = KVM_USER_MEM_SLOTS;
452 case KVM_CAP_S390_COW:
453 r = MACHINE_HAS_ESOP;
455 case KVM_CAP_S390_VECTOR_REGISTERS:
458 case KVM_CAP_S390_RI:
459 r = test_facility(64);
461 case KVM_CAP_S390_GS:
462 r = test_facility(133);
464 case KVM_CAP_S390_BPB:
465 r = test_facility(82);
473 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
474 struct kvm_memory_slot *memslot)
476 gfn_t cur_gfn, last_gfn;
477 unsigned long address;
478 struct gmap *gmap = kvm->arch.gmap;
480 /* Loop over all guest pages */
481 last_gfn = memslot->base_gfn + memslot->npages;
482 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
483 address = gfn_to_hva_memslot(memslot, cur_gfn);
485 if (test_and_clear_guest_dirty(gmap->mm, address))
486 mark_page_dirty(kvm, cur_gfn);
487 if (fatal_signal_pending(current))
493 /* Section: vm related */
494 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
497 * Get (and clear) the dirty memory log for a memory slot.
499 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
500 struct kvm_dirty_log *log)
504 struct kvm_memslots *slots;
505 struct kvm_memory_slot *memslot;
508 if (kvm_is_ucontrol(kvm))
511 mutex_lock(&kvm->slots_lock);
514 if (log->slot >= KVM_USER_MEM_SLOTS)
517 slots = kvm_memslots(kvm);
518 memslot = id_to_memslot(slots, log->slot);
520 if (!memslot->dirty_bitmap)
523 kvm_s390_sync_dirty_log(kvm, memslot);
524 r = kvm_get_dirty_log(kvm, log, &is_dirty);
528 /* Clear the dirty log */
530 n = kvm_dirty_bitmap_bytes(memslot);
531 memset(memslot->dirty_bitmap, 0, n);
535 mutex_unlock(&kvm->slots_lock);
539 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
542 struct kvm_vcpu *vcpu;
544 kvm_for_each_vcpu(i, vcpu, kvm) {
545 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
549 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
557 case KVM_CAP_S390_IRQCHIP:
558 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
559 kvm->arch.use_irqchip = 1;
562 case KVM_CAP_S390_USER_SIGP:
563 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
564 kvm->arch.user_sigp = 1;
567 case KVM_CAP_S390_VECTOR_REGISTERS:
568 mutex_lock(&kvm->lock);
569 if (kvm->created_vcpus) {
571 } else if (MACHINE_HAS_VX) {
572 set_kvm_facility(kvm->arch.model.fac_mask, 129);
573 set_kvm_facility(kvm->arch.model.fac_list, 129);
574 if (test_facility(134)) {
575 set_kvm_facility(kvm->arch.model.fac_mask, 134);
576 set_kvm_facility(kvm->arch.model.fac_list, 134);
578 if (test_facility(135)) {
579 set_kvm_facility(kvm->arch.model.fac_mask, 135);
580 set_kvm_facility(kvm->arch.model.fac_list, 135);
585 mutex_unlock(&kvm->lock);
586 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
587 r ? "(not available)" : "(success)");
589 case KVM_CAP_S390_RI:
591 mutex_lock(&kvm->lock);
592 if (kvm->created_vcpus) {
594 } else if (test_facility(64)) {
595 set_kvm_facility(kvm->arch.model.fac_mask, 64);
596 set_kvm_facility(kvm->arch.model.fac_list, 64);
599 mutex_unlock(&kvm->lock);
600 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
601 r ? "(not available)" : "(success)");
603 case KVM_CAP_S390_AIS:
604 mutex_lock(&kvm->lock);
605 if (kvm->created_vcpus) {
608 set_kvm_facility(kvm->arch.model.fac_mask, 72);
609 set_kvm_facility(kvm->arch.model.fac_list, 72);
612 mutex_unlock(&kvm->lock);
613 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
614 r ? "(not available)" : "(success)");
616 case KVM_CAP_S390_GS:
618 mutex_lock(&kvm->lock);
619 if (kvm->created_vcpus) {
621 } else if (test_facility(133)) {
622 set_kvm_facility(kvm->arch.model.fac_mask, 133);
623 set_kvm_facility(kvm->arch.model.fac_list, 133);
626 mutex_unlock(&kvm->lock);
627 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
628 r ? "(not available)" : "(success)");
630 case KVM_CAP_S390_USER_STSI:
631 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
632 kvm->arch.user_stsi = 1;
635 case KVM_CAP_S390_USER_INSTR0:
636 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
637 kvm->arch.user_instr0 = 1;
638 icpt_operexc_on_all_vcpus(kvm);
648 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
652 switch (attr->attr) {
653 case KVM_S390_VM_MEM_LIMIT_SIZE:
655 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
656 kvm->arch.mem_limit);
657 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
667 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
671 switch (attr->attr) {
672 case KVM_S390_VM_MEM_ENABLE_CMMA:
678 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
679 mutex_lock(&kvm->lock);
680 if (!kvm->created_vcpus) {
681 kvm->arch.use_cmma = 1;
684 mutex_unlock(&kvm->lock);
686 case KVM_S390_VM_MEM_CLR_CMMA:
691 if (!kvm->arch.use_cmma)
694 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
695 mutex_lock(&kvm->lock);
696 idx = srcu_read_lock(&kvm->srcu);
697 s390_reset_cmma(kvm->arch.gmap->mm);
698 srcu_read_unlock(&kvm->srcu, idx);
699 mutex_unlock(&kvm->lock);
702 case KVM_S390_VM_MEM_LIMIT_SIZE: {
703 unsigned long new_limit;
705 if (kvm_is_ucontrol(kvm))
708 if (get_user(new_limit, (u64 __user *)attr->addr))
711 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
712 new_limit > kvm->arch.mem_limit)
718 /* gmap_create takes last usable address */
719 if (new_limit != KVM_S390_NO_MEM_LIMIT)
723 mutex_lock(&kvm->lock);
724 if (!kvm->created_vcpus) {
725 /* gmap_create will round the limit up */
726 struct gmap *new = gmap_create(current->mm, new_limit);
731 gmap_remove(kvm->arch.gmap);
733 kvm->arch.gmap = new;
737 mutex_unlock(&kvm->lock);
738 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
739 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
740 (void *) kvm->arch.gmap->asce);
750 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
752 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
754 struct kvm_vcpu *vcpu;
757 if (!test_kvm_facility(kvm, 76))
760 mutex_lock(&kvm->lock);
761 switch (attr->attr) {
762 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
764 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
765 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
766 kvm->arch.crypto.aes_kw = 1;
767 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
769 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
771 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
772 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
773 kvm->arch.crypto.dea_kw = 1;
774 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
776 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
777 kvm->arch.crypto.aes_kw = 0;
778 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
779 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
780 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
782 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
783 kvm->arch.crypto.dea_kw = 0;
784 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
785 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
786 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
789 mutex_unlock(&kvm->lock);
793 kvm_for_each_vcpu(i, vcpu, kvm) {
794 kvm_s390_vcpu_crypto_setup(vcpu);
797 mutex_unlock(&kvm->lock);
801 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
804 struct kvm_vcpu *vcpu;
806 kvm_for_each_vcpu(cx, vcpu, kvm)
807 kvm_s390_sync_request(req, vcpu);
811 * Must be called with kvm->srcu held to avoid races on memslots, and with
812 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
814 static int kvm_s390_vm_start_migration(struct kvm *kvm)
816 struct kvm_s390_migration_state *mgs;
817 struct kvm_memory_slot *ms;
818 /* should be the only one */
819 struct kvm_memslots *slots;
820 unsigned long ram_pages;
823 /* migration mode already enabled */
824 if (kvm->arch.migration_state)
827 slots = kvm_memslots(kvm);
828 if (!slots || !slots->used_slots)
831 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
834 kvm->arch.migration_state = mgs;
836 if (kvm->arch.use_cmma) {
838 * Get the first slot. They are reverse sorted by base_gfn, so
839 * the first slot is also the one at the end of the address
840 * space. We have verified above that at least one slot is
843 ms = slots->memslots;
844 /* round up so we only use full longs */
845 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
846 /* allocate enough bytes to store all the bits */
847 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
848 if (!mgs->pgste_bitmap) {
850 kvm->arch.migration_state = NULL;
854 mgs->bitmap_size = ram_pages;
855 atomic64_set(&mgs->dirty_pages, ram_pages);
856 /* mark all the pages in active slots as dirty */
857 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
858 ms = slots->memslots + slotnr;
859 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
862 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
868 * Must be called with kvm->slots_lock to avoid races with ourselves and
869 * kvm_s390_vm_start_migration.
871 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
873 struct kvm_s390_migration_state *mgs;
875 /* migration mode already disabled */
876 if (!kvm->arch.migration_state)
878 mgs = kvm->arch.migration_state;
879 kvm->arch.migration_state = NULL;
881 if (kvm->arch.use_cmma) {
882 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
883 /* We have to wait for the essa emulation to finish */
884 synchronize_srcu(&kvm->srcu);
885 vfree(mgs->pgste_bitmap);
891 static int kvm_s390_vm_set_migration(struct kvm *kvm,
892 struct kvm_device_attr *attr)
896 mutex_lock(&kvm->slots_lock);
897 switch (attr->attr) {
898 case KVM_S390_VM_MIGRATION_START:
899 res = kvm_s390_vm_start_migration(kvm);
901 case KVM_S390_VM_MIGRATION_STOP:
902 res = kvm_s390_vm_stop_migration(kvm);
907 mutex_unlock(&kvm->slots_lock);
912 static int kvm_s390_vm_get_migration(struct kvm *kvm,
913 struct kvm_device_attr *attr)
915 u64 mig = (kvm->arch.migration_state != NULL);
917 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
920 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
925 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
927 struct kvm_s390_vm_tod_clock gtod;
929 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
932 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
934 kvm_s390_set_tod_clock(kvm, >od);
936 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
937 gtod.epoch_idx, gtod.tod);
942 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
946 if (copy_from_user(>od_high, (void __user *)attr->addr,
952 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
957 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
959 struct kvm_s390_vm_tod_clock gtod = { 0 };
961 if (copy_from_user(>od.tod, (void __user *)attr->addr,
965 kvm_s390_set_tod_clock(kvm, >od);
966 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
970 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
977 switch (attr->attr) {
978 case KVM_S390_VM_TOD_EXT:
979 ret = kvm_s390_set_tod_ext(kvm, attr);
981 case KVM_S390_VM_TOD_HIGH:
982 ret = kvm_s390_set_tod_high(kvm, attr);
984 case KVM_S390_VM_TOD_LOW:
985 ret = kvm_s390_set_tod_low(kvm, attr);
994 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
995 struct kvm_s390_vm_tod_clock *gtod)
997 struct kvm_s390_tod_clock_ext htod;
1001 get_tod_clock_ext((char *)&htod);
1003 gtod->tod = htod.tod + kvm->arch.epoch;
1004 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1006 if (gtod->tod < htod.tod)
1007 gtod->epoch_idx += 1;
1012 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1014 struct kvm_s390_vm_tod_clock gtod;
1016 memset(>od, 0, sizeof(gtod));
1018 if (test_kvm_facility(kvm, 139))
1019 kvm_s390_get_tod_clock_ext(kvm, >od);
1021 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1023 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1026 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1027 gtod.epoch_idx, gtod.tod);
1031 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1035 if (copy_to_user((void __user *)attr->addr, >od_high,
1038 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1043 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1047 gtod = kvm_s390_get_tod_clock_fast(kvm);
1048 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1050 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1055 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1062 switch (attr->attr) {
1063 case KVM_S390_VM_TOD_EXT:
1064 ret = kvm_s390_get_tod_ext(kvm, attr);
1066 case KVM_S390_VM_TOD_HIGH:
1067 ret = kvm_s390_get_tod_high(kvm, attr);
1069 case KVM_S390_VM_TOD_LOW:
1070 ret = kvm_s390_get_tod_low(kvm, attr);
1079 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1081 struct kvm_s390_vm_cpu_processor *proc;
1082 u16 lowest_ibc, unblocked_ibc;
1085 mutex_lock(&kvm->lock);
1086 if (kvm->created_vcpus) {
1090 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1095 if (!copy_from_user(proc, (void __user *)attr->addr,
1097 kvm->arch.model.cpuid = proc->cpuid;
1098 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1099 unblocked_ibc = sclp.ibc & 0xfff;
1100 if (lowest_ibc && proc->ibc) {
1101 if (proc->ibc > unblocked_ibc)
1102 kvm->arch.model.ibc = unblocked_ibc;
1103 else if (proc->ibc < lowest_ibc)
1104 kvm->arch.model.ibc = lowest_ibc;
1106 kvm->arch.model.ibc = proc->ibc;
1108 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1109 S390_ARCH_FAC_LIST_SIZE_BYTE);
1110 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1111 kvm->arch.model.ibc,
1112 kvm->arch.model.cpuid);
1113 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1114 kvm->arch.model.fac_list[0],
1115 kvm->arch.model.fac_list[1],
1116 kvm->arch.model.fac_list[2]);
1121 mutex_unlock(&kvm->lock);
1125 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1126 struct kvm_device_attr *attr)
1128 struct kvm_s390_vm_cpu_feat data;
1131 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1133 if (!bitmap_subset((unsigned long *) data.feat,
1134 kvm_s390_available_cpu_feat,
1135 KVM_S390_VM_CPU_FEAT_NR_BITS))
1138 mutex_lock(&kvm->lock);
1139 if (!kvm->created_vcpus) {
1140 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1141 KVM_S390_VM_CPU_FEAT_NR_BITS);
1144 mutex_unlock(&kvm->lock);
1148 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1149 struct kvm_device_attr *attr)
1152 * Once supported by kernel + hw, we have to store the subfunctions
1153 * in kvm->arch and remember that user space configured them.
1158 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1162 switch (attr->attr) {
1163 case KVM_S390_VM_CPU_PROCESSOR:
1164 ret = kvm_s390_set_processor(kvm, attr);
1166 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1167 ret = kvm_s390_set_processor_feat(kvm, attr);
1169 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1170 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1176 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1178 struct kvm_s390_vm_cpu_processor *proc;
1181 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1186 proc->cpuid = kvm->arch.model.cpuid;
1187 proc->ibc = kvm->arch.model.ibc;
1188 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1189 S390_ARCH_FAC_LIST_SIZE_BYTE);
1190 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1191 kvm->arch.model.ibc,
1192 kvm->arch.model.cpuid);
1193 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1194 kvm->arch.model.fac_list[0],
1195 kvm->arch.model.fac_list[1],
1196 kvm->arch.model.fac_list[2]);
1197 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1204 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1206 struct kvm_s390_vm_cpu_machine *mach;
1209 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1214 get_cpu_id((struct cpuid *) &mach->cpuid);
1215 mach->ibc = sclp.ibc;
1216 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1217 S390_ARCH_FAC_LIST_SIZE_BYTE);
1218 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1219 sizeof(S390_lowcore.stfle_fac_list));
1220 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1221 kvm->arch.model.ibc,
1222 kvm->arch.model.cpuid);
1223 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1227 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1231 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1238 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1239 struct kvm_device_attr *attr)
1241 struct kvm_s390_vm_cpu_feat data;
1243 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1244 KVM_S390_VM_CPU_FEAT_NR_BITS);
1245 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1250 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1251 struct kvm_device_attr *attr)
1253 struct kvm_s390_vm_cpu_feat data;
1255 bitmap_copy((unsigned long *) data.feat,
1256 kvm_s390_available_cpu_feat,
1257 KVM_S390_VM_CPU_FEAT_NR_BITS);
1258 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1263 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1264 struct kvm_device_attr *attr)
1267 * Once we can actually configure subfunctions (kernel + hw support),
1268 * we have to check if they were already set by user space, if so copy
1269 * them from kvm->arch.
1274 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1275 struct kvm_device_attr *attr)
1277 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1278 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1282 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1286 switch (attr->attr) {
1287 case KVM_S390_VM_CPU_PROCESSOR:
1288 ret = kvm_s390_get_processor(kvm, attr);
1290 case KVM_S390_VM_CPU_MACHINE:
1291 ret = kvm_s390_get_machine(kvm, attr);
1293 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1294 ret = kvm_s390_get_processor_feat(kvm, attr);
1296 case KVM_S390_VM_CPU_MACHINE_FEAT:
1297 ret = kvm_s390_get_machine_feat(kvm, attr);
1299 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1300 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1302 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1303 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1309 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1313 switch (attr->group) {
1314 case KVM_S390_VM_MEM_CTRL:
1315 ret = kvm_s390_set_mem_control(kvm, attr);
1317 case KVM_S390_VM_TOD:
1318 ret = kvm_s390_set_tod(kvm, attr);
1320 case KVM_S390_VM_CPU_MODEL:
1321 ret = kvm_s390_set_cpu_model(kvm, attr);
1323 case KVM_S390_VM_CRYPTO:
1324 ret = kvm_s390_vm_set_crypto(kvm, attr);
1326 case KVM_S390_VM_MIGRATION:
1327 ret = kvm_s390_vm_set_migration(kvm, attr);
1337 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1341 switch (attr->group) {
1342 case KVM_S390_VM_MEM_CTRL:
1343 ret = kvm_s390_get_mem_control(kvm, attr);
1345 case KVM_S390_VM_TOD:
1346 ret = kvm_s390_get_tod(kvm, attr);
1348 case KVM_S390_VM_CPU_MODEL:
1349 ret = kvm_s390_get_cpu_model(kvm, attr);
1351 case KVM_S390_VM_MIGRATION:
1352 ret = kvm_s390_vm_get_migration(kvm, attr);
1362 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1366 switch (attr->group) {
1367 case KVM_S390_VM_MEM_CTRL:
1368 switch (attr->attr) {
1369 case KVM_S390_VM_MEM_ENABLE_CMMA:
1370 case KVM_S390_VM_MEM_CLR_CMMA:
1371 ret = sclp.has_cmma ? 0 : -ENXIO;
1373 case KVM_S390_VM_MEM_LIMIT_SIZE:
1381 case KVM_S390_VM_TOD:
1382 switch (attr->attr) {
1383 case KVM_S390_VM_TOD_LOW:
1384 case KVM_S390_VM_TOD_HIGH:
1392 case KVM_S390_VM_CPU_MODEL:
1393 switch (attr->attr) {
1394 case KVM_S390_VM_CPU_PROCESSOR:
1395 case KVM_S390_VM_CPU_MACHINE:
1396 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1397 case KVM_S390_VM_CPU_MACHINE_FEAT:
1398 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1401 /* configuring subfunctions is not supported yet */
1402 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1408 case KVM_S390_VM_CRYPTO:
1409 switch (attr->attr) {
1410 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1411 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1412 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1413 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1421 case KVM_S390_VM_MIGRATION:
1432 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1436 int srcu_idx, i, r = 0;
1438 if (args->flags != 0)
1441 /* Is this guest using storage keys? */
1442 if (!mm_use_skey(current->mm))
1443 return KVM_S390_GET_SKEYS_NONE;
1445 /* Enforce sane limit on memory allocation */
1446 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1449 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1453 down_read(¤t->mm->mmap_sem);
1454 srcu_idx = srcu_read_lock(&kvm->srcu);
1455 for (i = 0; i < args->count; i++) {
1456 hva = gfn_to_hva(kvm, args->start_gfn + i);
1457 if (kvm_is_error_hva(hva)) {
1462 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1466 srcu_read_unlock(&kvm->srcu, srcu_idx);
1467 up_read(¤t->mm->mmap_sem);
1470 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1471 sizeof(uint8_t) * args->count);
1480 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1484 int srcu_idx, i, r = 0;
1486 if (args->flags != 0)
1489 /* Enforce sane limit on memory allocation */
1490 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1493 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1497 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1498 sizeof(uint8_t) * args->count);
1504 /* Enable storage key handling for the guest */
1505 r = s390_enable_skey();
1509 down_read(¤t->mm->mmap_sem);
1510 srcu_idx = srcu_read_lock(&kvm->srcu);
1511 for (i = 0; i < args->count; i++) {
1512 hva = gfn_to_hva(kvm, args->start_gfn + i);
1513 if (kvm_is_error_hva(hva)) {
1518 /* Lowest order bit is reserved */
1519 if (keys[i] & 0x01) {
1524 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1528 srcu_read_unlock(&kvm->srcu, srcu_idx);
1529 up_read(¤t->mm->mmap_sem);
1536 * Base address and length must be sent at the start of each block, therefore
1537 * it's cheaper to send some clean data, as long as it's less than the size of
1540 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1541 /* for consistency */
1542 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1545 * This function searches for the next page with dirty CMMA attributes, and
1546 * saves the attributes in the buffer up to either the end of the buffer or
1547 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1548 * no trailing clean bytes are saved.
1549 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1550 * output buffer will indicate 0 as length.
1552 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1553 struct kvm_s390_cmma_log *args)
1555 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1556 unsigned long bufsize, hva, pgstev, i, next, cur;
1557 int srcu_idx, peek, r = 0, rr;
1560 cur = args->start_gfn;
1561 i = next = pgstev = 0;
1563 if (unlikely(!kvm->arch.use_cmma))
1565 /* Invalid/unsupported flags were specified */
1566 if (args->flags & ~KVM_S390_CMMA_PEEK)
1568 /* Migration mode query, and we are not doing a migration */
1569 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1572 /* CMMA is disabled or was not used, or the buffer has length zero */
1573 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1574 if (!bufsize || !kvm->mm->context.use_cmma) {
1575 memset(args, 0, sizeof(*args));
1580 /* We are not peeking, and there are no dirty pages */
1581 if (!atomic64_read(&s->dirty_pages)) {
1582 memset(args, 0, sizeof(*args));
1585 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1587 if (cur >= s->bitmap_size) /* nothing found, loop back */
1588 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1589 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1590 memset(args, 0, sizeof(*args));
1593 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1596 res = vmalloc(bufsize);
1600 args->start_gfn = cur;
1602 down_read(&kvm->mm->mmap_sem);
1603 srcu_idx = srcu_read_lock(&kvm->srcu);
1604 while (i < bufsize) {
1605 hva = gfn_to_hva(kvm, cur);
1606 if (kvm_is_error_hva(hva)) {
1610 /* decrement only if we actually flipped the bit to 0 */
1611 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1612 atomic64_dec(&s->dirty_pages);
1613 r = get_pgste(kvm->mm, hva, &pgstev);
1616 /* save the value */
1617 res[i++] = (pgstev >> 24) & 0x43;
1619 * if the next bit is too far away, stop.
1620 * if we reached the previous "next", find the next one
1623 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1626 next = find_next_bit(s->pgste_bitmap,
1627 s->bitmap_size, cur + 1);
1628 /* reached the end of the bitmap or of the buffer, stop */
1629 if ((next >= s->bitmap_size) ||
1630 (next >= args->start_gfn + bufsize))
1635 srcu_read_unlock(&kvm->srcu, srcu_idx);
1636 up_read(&kvm->mm->mmap_sem);
1638 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1640 rr = copy_to_user((void __user *)args->values, res, args->count);
1649 * This function sets the CMMA attributes for the given pages. If the input
1650 * buffer has zero length, no action is taken, otherwise the attributes are
1651 * set and the mm->context.use_cmma flag is set.
1653 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1654 const struct kvm_s390_cmma_log *args)
1656 unsigned long hva, mask, pgstev, i;
1658 int srcu_idx, r = 0;
1662 if (!kvm->arch.use_cmma)
1664 /* invalid/unsupported flags */
1665 if (args->flags != 0)
1667 /* Enforce sane limit on memory allocation */
1668 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1671 if (args->count == 0)
1674 bits = vmalloc(sizeof(*bits) * args->count);
1678 r = copy_from_user(bits, (void __user *)args->values, args->count);
1684 down_read(&kvm->mm->mmap_sem);
1685 srcu_idx = srcu_read_lock(&kvm->srcu);
1686 for (i = 0; i < args->count; i++) {
1687 hva = gfn_to_hva(kvm, args->start_gfn + i);
1688 if (kvm_is_error_hva(hva)) {
1694 pgstev = pgstev << 24;
1695 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1696 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1698 srcu_read_unlock(&kvm->srcu, srcu_idx);
1699 up_read(&kvm->mm->mmap_sem);
1701 if (!kvm->mm->context.use_cmma) {
1702 down_write(&kvm->mm->mmap_sem);
1703 kvm->mm->context.use_cmma = 1;
1704 up_write(&kvm->mm->mmap_sem);
1711 long kvm_arch_vm_ioctl(struct file *filp,
1712 unsigned int ioctl, unsigned long arg)
1714 struct kvm *kvm = filp->private_data;
1715 void __user *argp = (void __user *)arg;
1716 struct kvm_device_attr attr;
1720 case KVM_S390_INTERRUPT: {
1721 struct kvm_s390_interrupt s390int;
1724 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1726 r = kvm_s390_inject_vm(kvm, &s390int);
1729 case KVM_ENABLE_CAP: {
1730 struct kvm_enable_cap cap;
1732 if (copy_from_user(&cap, argp, sizeof(cap)))
1734 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1737 case KVM_CREATE_IRQCHIP: {
1738 struct kvm_irq_routing_entry routing;
1741 if (kvm->arch.use_irqchip) {
1742 /* Set up dummy routing. */
1743 memset(&routing, 0, sizeof(routing));
1744 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1748 case KVM_SET_DEVICE_ATTR: {
1750 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1752 r = kvm_s390_vm_set_attr(kvm, &attr);
1755 case KVM_GET_DEVICE_ATTR: {
1757 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1759 r = kvm_s390_vm_get_attr(kvm, &attr);
1762 case KVM_HAS_DEVICE_ATTR: {
1764 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1766 r = kvm_s390_vm_has_attr(kvm, &attr);
1769 case KVM_S390_GET_SKEYS: {
1770 struct kvm_s390_skeys args;
1773 if (copy_from_user(&args, argp,
1774 sizeof(struct kvm_s390_skeys)))
1776 r = kvm_s390_get_skeys(kvm, &args);
1779 case KVM_S390_SET_SKEYS: {
1780 struct kvm_s390_skeys args;
1783 if (copy_from_user(&args, argp,
1784 sizeof(struct kvm_s390_skeys)))
1786 r = kvm_s390_set_skeys(kvm, &args);
1789 case KVM_S390_GET_CMMA_BITS: {
1790 struct kvm_s390_cmma_log args;
1793 if (copy_from_user(&args, argp, sizeof(args)))
1795 mutex_lock(&kvm->slots_lock);
1796 r = kvm_s390_get_cmma_bits(kvm, &args);
1797 mutex_unlock(&kvm->slots_lock);
1799 r = copy_to_user(argp, &args, sizeof(args));
1805 case KVM_S390_SET_CMMA_BITS: {
1806 struct kvm_s390_cmma_log args;
1809 if (copy_from_user(&args, argp, sizeof(args)))
1811 mutex_lock(&kvm->slots_lock);
1812 r = kvm_s390_set_cmma_bits(kvm, &args);
1813 mutex_unlock(&kvm->slots_lock);
1823 static int kvm_s390_query_ap_config(u8 *config)
1825 u32 fcn_code = 0x04000000UL;
1828 memset(config, 0, 128);
1832 ".long 0xb2af0000\n" /* PQAP(QCI) */
1838 : "r" (fcn_code), "r" (config)
1839 : "cc", "0", "2", "memory"
1845 static int kvm_s390_apxa_installed(void)
1850 if (test_facility(12)) {
1851 cc = kvm_s390_query_ap_config(config);
1854 pr_err("PQAP(QCI) failed with cc=%d", cc);
1856 return config[0] & 0x40;
1862 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1864 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1866 if (kvm_s390_apxa_installed())
1867 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1869 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1872 static u64 kvm_s390_get_initial_cpuid(void)
1877 cpuid.version = 0xff;
1878 return *((u64 *) &cpuid);
1881 static void kvm_s390_crypto_init(struct kvm *kvm)
1883 if (!test_kvm_facility(kvm, 76))
1886 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1887 kvm_s390_set_crycb_format(kvm);
1889 /* Enable AES/DEA protected key functions by default */
1890 kvm->arch.crypto.aes_kw = 1;
1891 kvm->arch.crypto.dea_kw = 1;
1892 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1893 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1894 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1895 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1898 static void sca_dispose(struct kvm *kvm)
1900 if (kvm->arch.use_esca)
1901 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1903 free_page((unsigned long)(kvm->arch.sca));
1904 kvm->arch.sca = NULL;
1907 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1909 gfp_t alloc_flags = GFP_KERNEL;
1911 char debug_name[16];
1912 static unsigned long sca_offset;
1915 #ifdef CONFIG_KVM_S390_UCONTROL
1916 if (type & ~KVM_VM_S390_UCONTROL)
1918 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1925 rc = s390_enable_sie();
1931 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1933 kvm->arch.use_esca = 0; /* start with basic SCA */
1934 if (!sclp.has_64bscao)
1935 alloc_flags |= GFP_DMA;
1936 rwlock_init(&kvm->arch.sca_lock);
1937 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1940 mutex_lock(&kvm_lock);
1942 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1944 kvm->arch.sca = (struct bsca_block *)
1945 ((char *) kvm->arch.sca + sca_offset);
1946 mutex_unlock(&kvm_lock);
1948 sprintf(debug_name, "kvm-%u", current->pid);
1950 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1954 kvm->arch.sie_page2 =
1955 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1956 if (!kvm->arch.sie_page2)
1959 /* Populate the facility mask initially. */
1960 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1961 sizeof(S390_lowcore.stfle_fac_list));
1962 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1963 if (i < kvm_s390_fac_list_mask_size())
1964 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1966 kvm->arch.model.fac_mask[i] = 0UL;
1969 /* Populate the facility list initially. */
1970 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1971 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1972 S390_ARCH_FAC_LIST_SIZE_BYTE);
1974 /* we are always in czam mode - even on pre z14 machines */
1975 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1976 set_kvm_facility(kvm->arch.model.fac_list, 138);
1977 /* we emulate STHYI in kvm */
1978 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1979 set_kvm_facility(kvm->arch.model.fac_list, 74);
1980 if (MACHINE_HAS_TLB_GUEST) {
1981 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1982 set_kvm_facility(kvm->arch.model.fac_list, 147);
1985 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1986 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1988 kvm_s390_crypto_init(kvm);
1990 mutex_init(&kvm->arch.float_int.ais_lock);
1991 kvm->arch.float_int.simm = 0;
1992 kvm->arch.float_int.nimm = 0;
1993 spin_lock_init(&kvm->arch.float_int.lock);
1994 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1995 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1996 init_waitqueue_head(&kvm->arch.ipte_wq);
1997 mutex_init(&kvm->arch.ipte_mutex);
1999 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2000 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2002 if (type & KVM_VM_S390_UCONTROL) {
2003 kvm->arch.gmap = NULL;
2004 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2006 if (sclp.hamax == U64_MAX)
2007 kvm->arch.mem_limit = TASK_SIZE_MAX;
2009 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2011 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2012 if (!kvm->arch.gmap)
2014 kvm->arch.gmap->private = kvm;
2015 kvm->arch.gmap->pfault_enabled = 0;
2018 kvm->arch.css_support = 0;
2019 kvm->arch.use_irqchip = 0;
2020 kvm->arch.epoch = 0;
2022 spin_lock_init(&kvm->arch.start_stop_lock);
2023 kvm_s390_vsie_init(kvm);
2024 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2028 free_page((unsigned long)kvm->arch.sie_page2);
2029 debug_unregister(kvm->arch.dbf);
2031 KVM_EVENT(3, "creation of vm failed: %d", rc);
2035 bool kvm_arch_has_vcpu_debugfs(void)
2040 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2045 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2047 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2048 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2049 kvm_s390_clear_local_irqs(vcpu);
2050 kvm_clear_async_pf_completion_queue(vcpu);
2051 if (!kvm_is_ucontrol(vcpu->kvm))
2054 if (kvm_is_ucontrol(vcpu->kvm))
2055 gmap_remove(vcpu->arch.gmap);
2057 if (vcpu->kvm->arch.use_cmma)
2058 kvm_s390_vcpu_unsetup_cmma(vcpu);
2059 free_page((unsigned long)(vcpu->arch.sie_block));
2061 kvm_vcpu_uninit(vcpu);
2062 kmem_cache_free(kvm_vcpu_cache, vcpu);
2065 static void kvm_free_vcpus(struct kvm *kvm)
2068 struct kvm_vcpu *vcpu;
2070 kvm_for_each_vcpu(i, vcpu, kvm)
2071 kvm_arch_vcpu_destroy(vcpu);
2073 mutex_lock(&kvm->lock);
2074 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2075 kvm->vcpus[i] = NULL;
2077 atomic_set(&kvm->online_vcpus, 0);
2078 mutex_unlock(&kvm->lock);
2081 void kvm_arch_destroy_vm(struct kvm *kvm)
2083 kvm_free_vcpus(kvm);
2085 debug_unregister(kvm->arch.dbf);
2086 free_page((unsigned long)kvm->arch.sie_page2);
2087 if (!kvm_is_ucontrol(kvm))
2088 gmap_remove(kvm->arch.gmap);
2089 kvm_s390_destroy_adapters(kvm);
2090 kvm_s390_clear_float_irqs(kvm);
2091 kvm_s390_vsie_destroy(kvm);
2092 if (kvm->arch.migration_state) {
2093 vfree(kvm->arch.migration_state->pgste_bitmap);
2094 kfree(kvm->arch.migration_state);
2096 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2099 /* Section: vcpu related */
2100 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2102 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2103 if (!vcpu->arch.gmap)
2105 vcpu->arch.gmap->private = vcpu->kvm;
2110 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2112 if (!kvm_s390_use_sca_entries())
2114 read_lock(&vcpu->kvm->arch.sca_lock);
2115 if (vcpu->kvm->arch.use_esca) {
2116 struct esca_block *sca = vcpu->kvm->arch.sca;
2118 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2119 sca->cpu[vcpu->vcpu_id].sda = 0;
2121 struct bsca_block *sca = vcpu->kvm->arch.sca;
2123 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2124 sca->cpu[vcpu->vcpu_id].sda = 0;
2126 read_unlock(&vcpu->kvm->arch.sca_lock);
2129 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2131 if (!kvm_s390_use_sca_entries()) {
2132 struct bsca_block *sca = vcpu->kvm->arch.sca;
2134 /* we still need the basic sca for the ipte control */
2135 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2136 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2139 read_lock(&vcpu->kvm->arch.sca_lock);
2140 if (vcpu->kvm->arch.use_esca) {
2141 struct esca_block *sca = vcpu->kvm->arch.sca;
2143 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2144 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2145 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2146 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2147 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2149 struct bsca_block *sca = vcpu->kvm->arch.sca;
2151 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2152 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2153 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2154 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2156 read_unlock(&vcpu->kvm->arch.sca_lock);
2159 /* Basic SCA to Extended SCA data copy routines */
2160 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2163 d->sigp_ctrl.c = s->sigp_ctrl.c;
2164 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2167 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2171 d->ipte_control = s->ipte_control;
2173 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2174 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2177 static int sca_switch_to_extended(struct kvm *kvm)
2179 struct bsca_block *old_sca = kvm->arch.sca;
2180 struct esca_block *new_sca;
2181 struct kvm_vcpu *vcpu;
2182 unsigned int vcpu_idx;
2185 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2189 scaoh = (u32)((u64)(new_sca) >> 32);
2190 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2192 kvm_s390_vcpu_block_all(kvm);
2193 write_lock(&kvm->arch.sca_lock);
2195 sca_copy_b_to_e(new_sca, old_sca);
2197 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2198 vcpu->arch.sie_block->scaoh = scaoh;
2199 vcpu->arch.sie_block->scaol = scaol;
2200 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2202 kvm->arch.sca = new_sca;
2203 kvm->arch.use_esca = 1;
2205 write_unlock(&kvm->arch.sca_lock);
2206 kvm_s390_vcpu_unblock_all(kvm);
2208 free_page((unsigned long)old_sca);
2210 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2211 old_sca, kvm->arch.sca);
2215 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2219 if (!kvm_s390_use_sca_entries()) {
2220 if (id < KVM_MAX_VCPUS)
2224 if (id < KVM_S390_BSCA_CPU_SLOTS)
2226 if (!sclp.has_esca || !sclp.has_64bscao)
2229 mutex_lock(&kvm->lock);
2230 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2231 mutex_unlock(&kvm->lock);
2233 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2236 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2238 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2239 kvm_clear_async_pf_completion_queue(vcpu);
2240 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2246 kvm_s390_set_prefix(vcpu, 0);
2247 if (test_kvm_facility(vcpu->kvm, 64))
2248 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2249 if (test_kvm_facility(vcpu->kvm, 82))
2250 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2251 if (test_kvm_facility(vcpu->kvm, 133))
2252 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2253 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2254 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2257 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2259 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2261 if (kvm_is_ucontrol(vcpu->kvm))
2262 return __kvm_ucontrol_vcpu_init(vcpu);
2267 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2268 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2270 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2271 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2272 vcpu->arch.cputm_start = get_tod_clock_fast();
2273 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2276 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2277 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2279 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2280 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2281 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2282 vcpu->arch.cputm_start = 0;
2283 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2286 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2287 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2289 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2290 vcpu->arch.cputm_enabled = true;
2291 __start_cpu_timer_accounting(vcpu);
2294 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2295 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2297 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2298 __stop_cpu_timer_accounting(vcpu);
2299 vcpu->arch.cputm_enabled = false;
2302 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2304 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2305 __enable_cpu_timer_accounting(vcpu);
2309 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2311 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2312 __disable_cpu_timer_accounting(vcpu);
2316 /* set the cpu timer - may only be called from the VCPU thread itself */
2317 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2319 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2320 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2321 if (vcpu->arch.cputm_enabled)
2322 vcpu->arch.cputm_start = get_tod_clock_fast();
2323 vcpu->arch.sie_block->cputm = cputm;
2324 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2328 /* update and get the cpu timer - can also be called from other VCPU threads */
2329 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2334 if (unlikely(!vcpu->arch.cputm_enabled))
2335 return vcpu->arch.sie_block->cputm;
2337 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2339 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2341 * If the writer would ever execute a read in the critical
2342 * section, e.g. in irq context, we have a deadlock.
2344 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2345 value = vcpu->arch.sie_block->cputm;
2346 /* if cputm_start is 0, accounting is being started/stopped */
2347 if (likely(vcpu->arch.cputm_start))
2348 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2349 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2354 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2357 gmap_enable(vcpu->arch.enabled_gmap);
2358 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2359 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2360 __start_cpu_timer_accounting(vcpu);
2364 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2367 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2368 __stop_cpu_timer_accounting(vcpu);
2369 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2370 vcpu->arch.enabled_gmap = gmap_get_enabled();
2371 gmap_disable(vcpu->arch.enabled_gmap);
2375 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2377 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2378 vcpu->arch.sie_block->gpsw.mask = 0UL;
2379 vcpu->arch.sie_block->gpsw.addr = 0UL;
2380 kvm_s390_set_prefix(vcpu, 0);
2381 kvm_s390_set_cpu_timer(vcpu, 0);
2382 vcpu->arch.sie_block->ckc = 0UL;
2383 vcpu->arch.sie_block->todpr = 0;
2384 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2385 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2386 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2387 vcpu->run->s.regs.fpc = 0;
2388 vcpu->arch.sie_block->gbea = 1;
2389 vcpu->arch.sie_block->pp = 0;
2390 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2391 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2392 kvm_clear_async_pf_completion_queue(vcpu);
2393 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2394 kvm_s390_vcpu_stop(vcpu);
2395 kvm_s390_clear_local_irqs(vcpu);
2398 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2400 mutex_lock(&vcpu->kvm->lock);
2402 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2403 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2405 mutex_unlock(&vcpu->kvm->lock);
2406 if (!kvm_is_ucontrol(vcpu->kvm)) {
2407 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2410 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2411 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2412 /* make vcpu_load load the right gmap on the first trigger */
2413 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2416 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2418 if (!test_kvm_facility(vcpu->kvm, 76))
2421 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2423 if (vcpu->kvm->arch.crypto.aes_kw)
2424 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2425 if (vcpu->kvm->arch.crypto.dea_kw)
2426 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2428 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2431 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2433 free_page(vcpu->arch.sie_block->cbrlo);
2434 vcpu->arch.sie_block->cbrlo = 0;
2437 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2439 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2440 if (!vcpu->arch.sie_block->cbrlo)
2443 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2447 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2449 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2451 vcpu->arch.sie_block->ibc = model->ibc;
2452 if (test_kvm_facility(vcpu->kvm, 7))
2453 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2456 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2460 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2464 if (test_kvm_facility(vcpu->kvm, 78))
2465 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2466 else if (test_kvm_facility(vcpu->kvm, 8))
2467 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2469 kvm_s390_vcpu_setup_model(vcpu);
2471 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2472 if (MACHINE_HAS_ESOP)
2473 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2474 if (test_kvm_facility(vcpu->kvm, 9))
2475 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2476 if (test_kvm_facility(vcpu->kvm, 73))
2477 vcpu->arch.sie_block->ecb |= ECB_TE;
2479 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2480 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2481 if (test_kvm_facility(vcpu->kvm, 130))
2482 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2483 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2485 vcpu->arch.sie_block->eca |= ECA_CEI;
2487 vcpu->arch.sie_block->eca |= ECA_IB;
2489 vcpu->arch.sie_block->eca |= ECA_SII;
2490 if (sclp.has_sigpif)
2491 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2492 if (test_kvm_facility(vcpu->kvm, 129)) {
2493 vcpu->arch.sie_block->eca |= ECA_VX;
2494 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2496 if (test_kvm_facility(vcpu->kvm, 139))
2497 vcpu->arch.sie_block->ecd |= ECD_MEF;
2499 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2501 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2504 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2506 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2508 if (vcpu->kvm->arch.use_cmma) {
2509 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2513 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2514 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2516 kvm_s390_vcpu_crypto_setup(vcpu);
2521 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2524 struct kvm_vcpu *vcpu;
2525 struct sie_page *sie_page;
2528 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2533 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2537 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2538 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2542 vcpu->arch.sie_block = &sie_page->sie_block;
2543 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2545 /* the real guest size will always be smaller than msl */
2546 vcpu->arch.sie_block->mso = 0;
2547 vcpu->arch.sie_block->msl = sclp.hamax;
2549 vcpu->arch.sie_block->icpua = id;
2550 spin_lock_init(&vcpu->arch.local_int.lock);
2551 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2552 vcpu->arch.local_int.wq = &vcpu->wq;
2553 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2554 seqcount_init(&vcpu->arch.cputm_seqcount);
2556 rc = kvm_vcpu_init(vcpu, kvm, id);
2558 goto out_free_sie_block;
2559 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2560 vcpu->arch.sie_block);
2561 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2565 free_page((unsigned long)(vcpu->arch.sie_block));
2567 kmem_cache_free(kvm_vcpu_cache, vcpu);
2572 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2574 return kvm_s390_vcpu_has_irq(vcpu, 0);
2577 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2579 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2582 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2584 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2588 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2590 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2593 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2595 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2599 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2601 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2605 * Kick a guest cpu out of SIE and wait until SIE is not running.
2606 * If the CPU is not running (e.g. waiting as idle) the function will
2607 * return immediately. */
2608 void exit_sie(struct kvm_vcpu *vcpu)
2610 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2611 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2615 /* Kick a guest cpu out of SIE to process a request synchronously */
2616 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2618 kvm_make_request(req, vcpu);
2619 kvm_s390_vcpu_request(vcpu);
2622 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2625 struct kvm *kvm = gmap->private;
2626 struct kvm_vcpu *vcpu;
2627 unsigned long prefix;
2630 if (gmap_is_shadow(gmap))
2632 if (start >= 1UL << 31)
2633 /* We are only interested in prefix pages */
2635 kvm_for_each_vcpu(i, vcpu, kvm) {
2636 /* match against both prefix pages */
2637 prefix = kvm_s390_get_prefix(vcpu);
2638 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2639 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2641 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2646 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2648 /* kvm common code refers to this, but never calls it */
2653 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2654 struct kvm_one_reg *reg)
2659 case KVM_REG_S390_TODPR:
2660 r = put_user(vcpu->arch.sie_block->todpr,
2661 (u32 __user *)reg->addr);
2663 case KVM_REG_S390_EPOCHDIFF:
2664 r = put_user(vcpu->arch.sie_block->epoch,
2665 (u64 __user *)reg->addr);
2667 case KVM_REG_S390_CPU_TIMER:
2668 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2669 (u64 __user *)reg->addr);
2671 case KVM_REG_S390_CLOCK_COMP:
2672 r = put_user(vcpu->arch.sie_block->ckc,
2673 (u64 __user *)reg->addr);
2675 case KVM_REG_S390_PFTOKEN:
2676 r = put_user(vcpu->arch.pfault_token,
2677 (u64 __user *)reg->addr);
2679 case KVM_REG_S390_PFCOMPARE:
2680 r = put_user(vcpu->arch.pfault_compare,
2681 (u64 __user *)reg->addr);
2683 case KVM_REG_S390_PFSELECT:
2684 r = put_user(vcpu->arch.pfault_select,
2685 (u64 __user *)reg->addr);
2687 case KVM_REG_S390_PP:
2688 r = put_user(vcpu->arch.sie_block->pp,
2689 (u64 __user *)reg->addr);
2691 case KVM_REG_S390_GBEA:
2692 r = put_user(vcpu->arch.sie_block->gbea,
2693 (u64 __user *)reg->addr);
2702 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2703 struct kvm_one_reg *reg)
2709 case KVM_REG_S390_TODPR:
2710 r = get_user(vcpu->arch.sie_block->todpr,
2711 (u32 __user *)reg->addr);
2713 case KVM_REG_S390_EPOCHDIFF:
2714 r = get_user(vcpu->arch.sie_block->epoch,
2715 (u64 __user *)reg->addr);
2717 case KVM_REG_S390_CPU_TIMER:
2718 r = get_user(val, (u64 __user *)reg->addr);
2720 kvm_s390_set_cpu_timer(vcpu, val);
2722 case KVM_REG_S390_CLOCK_COMP:
2723 r = get_user(vcpu->arch.sie_block->ckc,
2724 (u64 __user *)reg->addr);
2726 case KVM_REG_S390_PFTOKEN:
2727 r = get_user(vcpu->arch.pfault_token,
2728 (u64 __user *)reg->addr);
2729 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2730 kvm_clear_async_pf_completion_queue(vcpu);
2732 case KVM_REG_S390_PFCOMPARE:
2733 r = get_user(vcpu->arch.pfault_compare,
2734 (u64 __user *)reg->addr);
2736 case KVM_REG_S390_PFSELECT:
2737 r = get_user(vcpu->arch.pfault_select,
2738 (u64 __user *)reg->addr);
2740 case KVM_REG_S390_PP:
2741 r = get_user(vcpu->arch.sie_block->pp,
2742 (u64 __user *)reg->addr);
2744 case KVM_REG_S390_GBEA:
2745 r = get_user(vcpu->arch.sie_block->gbea,
2746 (u64 __user *)reg->addr);
2755 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2757 kvm_s390_vcpu_initial_reset(vcpu);
2761 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2763 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2767 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2769 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2773 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2774 struct kvm_sregs *sregs)
2776 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2777 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2781 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2782 struct kvm_sregs *sregs)
2784 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2785 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2789 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2791 if (test_fp_ctl(fpu->fpc))
2793 vcpu->run->s.regs.fpc = fpu->fpc;
2795 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2796 (freg_t *) fpu->fprs);
2798 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2802 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2804 /* make sure we have the latest values */
2807 convert_vx_to_fp((freg_t *) fpu->fprs,
2808 (__vector128 *) vcpu->run->s.regs.vrs);
2810 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2811 fpu->fpc = vcpu->run->s.regs.fpc;
2815 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2819 if (!is_vcpu_stopped(vcpu))
2822 vcpu->run->psw_mask = psw.mask;
2823 vcpu->run->psw_addr = psw.addr;
2828 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2829 struct kvm_translation *tr)
2831 return -EINVAL; /* not implemented yet */
2834 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2835 KVM_GUESTDBG_USE_HW_BP | \
2836 KVM_GUESTDBG_ENABLE)
2838 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2839 struct kvm_guest_debug *dbg)
2843 vcpu->guest_debug = 0;
2844 kvm_s390_clear_bp_data(vcpu);
2846 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2848 if (!sclp.has_gpere)
2851 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2852 vcpu->guest_debug = dbg->control;
2853 /* enforce guest PER */
2854 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2856 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2857 rc = kvm_s390_import_bp_data(vcpu, dbg);
2859 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2860 vcpu->arch.guestdbg.last_bp = 0;
2864 vcpu->guest_debug = 0;
2865 kvm_s390_clear_bp_data(vcpu);
2866 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2872 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2873 struct kvm_mp_state *mp_state)
2875 /* CHECK_STOP and LOAD are not supported yet */
2876 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2877 KVM_MP_STATE_OPERATING;
2880 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2881 struct kvm_mp_state *mp_state)
2885 /* user space knows about this interface - let it control the state */
2886 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2888 switch (mp_state->mp_state) {
2889 case KVM_MP_STATE_STOPPED:
2890 kvm_s390_vcpu_stop(vcpu);
2892 case KVM_MP_STATE_OPERATING:
2893 kvm_s390_vcpu_start(vcpu);
2895 case KVM_MP_STATE_LOAD:
2896 case KVM_MP_STATE_CHECK_STOP:
2897 /* fall through - CHECK_STOP and LOAD are not supported yet */
2905 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2907 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2910 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2913 kvm_s390_vcpu_request_handled(vcpu);
2914 if (!kvm_request_pending(vcpu))
2917 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2918 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2919 * This ensures that the ipte instruction for this request has
2920 * already finished. We might race against a second unmapper that
2921 * wants to set the blocking bit. Lets just retry the request loop.
2923 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2925 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2926 kvm_s390_get_prefix(vcpu),
2927 PAGE_SIZE * 2, PROT_WRITE);
2929 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2935 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2936 vcpu->arch.sie_block->ihcpu = 0xffff;
2940 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2941 if (!ibs_enabled(vcpu)) {
2942 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2943 atomic_or(CPUSTAT_IBS,
2944 &vcpu->arch.sie_block->cpuflags);
2949 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2950 if (ibs_enabled(vcpu)) {
2951 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2952 atomic_andnot(CPUSTAT_IBS,
2953 &vcpu->arch.sie_block->cpuflags);
2958 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2959 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2963 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2965 * Disable CMMA virtualization; we will emulate the ESSA
2966 * instruction manually, in order to provide additional
2967 * functionalities needed for live migration.
2969 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2973 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2975 * Re-enable CMMA virtualization if CMMA is available and
2978 if ((vcpu->kvm->arch.use_cmma) &&
2979 (vcpu->kvm->mm->context.use_cmma))
2980 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2984 /* nothing to do, just clear the request */
2985 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2990 void kvm_s390_set_tod_clock(struct kvm *kvm,
2991 const struct kvm_s390_vm_tod_clock *gtod)
2993 struct kvm_vcpu *vcpu;
2994 struct kvm_s390_tod_clock_ext htod;
2997 mutex_lock(&kvm->lock);
3000 get_tod_clock_ext((char *)&htod);
3002 kvm->arch.epoch = gtod->tod - htod.tod;
3004 if (test_kvm_facility(kvm, 139)) {
3005 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3006 if (kvm->arch.epoch > gtod->tod)
3007 kvm->arch.epdx -= 1;
3010 kvm_s390_vcpu_block_all(kvm);
3011 kvm_for_each_vcpu(i, vcpu, kvm) {
3012 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3013 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3016 kvm_s390_vcpu_unblock_all(kvm);
3018 mutex_unlock(&kvm->lock);
3022 * kvm_arch_fault_in_page - fault-in guest page if necessary
3023 * @vcpu: The corresponding virtual cpu
3024 * @gpa: Guest physical address
3025 * @writable: Whether the page should be writable or not
3027 * Make sure that a guest page has been faulted-in on the host.
3029 * Return: Zero on success, negative error code otherwise.
3031 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3033 return gmap_fault(vcpu->arch.gmap, gpa,
3034 writable ? FAULT_FLAG_WRITE : 0);
3037 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3038 unsigned long token)
3040 struct kvm_s390_interrupt inti;
3041 struct kvm_s390_irq irq;
3044 irq.u.ext.ext_params2 = token;
3045 irq.type = KVM_S390_INT_PFAULT_INIT;
3046 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3048 inti.type = KVM_S390_INT_PFAULT_DONE;
3049 inti.parm64 = token;
3050 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3054 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3055 struct kvm_async_pf *work)
3057 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3058 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3061 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3062 struct kvm_async_pf *work)
3064 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3065 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3068 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3069 struct kvm_async_pf *work)
3071 /* s390 will always inject the page directly */
3074 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3077 * s390 will always inject the page directly,
3078 * but we still want check_async_completion to cleanup
3083 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3086 struct kvm_arch_async_pf arch;
3089 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3091 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3092 vcpu->arch.pfault_compare)
3094 if (psw_extint_disabled(vcpu))
3096 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3098 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3100 if (!vcpu->arch.gmap->pfault_enabled)
3103 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3104 hva += current->thread.gmap_addr & ~PAGE_MASK;
3105 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3108 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3112 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3117 * On s390 notifications for arriving pages will be delivered directly
3118 * to the guest but the house keeping for completed pfaults is
3119 * handled outside the worker.
3121 kvm_check_async_pf_completion(vcpu);
3123 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3124 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3129 if (test_cpu_flag(CIF_MCCK_PENDING))
3132 if (!kvm_is_ucontrol(vcpu->kvm)) {
3133 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3138 rc = kvm_s390_handle_requests(vcpu);
3142 if (guestdbg_enabled(vcpu)) {
3143 kvm_s390_backup_guest_per_regs(vcpu);
3144 kvm_s390_patch_guest_per_regs(vcpu);
3147 vcpu->arch.sie_block->icptcode = 0;
3148 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3149 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3150 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3155 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3157 struct kvm_s390_pgm_info pgm_info = {
3158 .code = PGM_ADDRESSING,
3163 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3164 trace_kvm_s390_sie_fault(vcpu);
3167 * We want to inject an addressing exception, which is defined as a
3168 * suppressing or terminating exception. However, since we came here
3169 * by a DAT access exception, the PSW still points to the faulting
3170 * instruction since DAT exceptions are nullifying. So we've got
3171 * to look up the current opcode to get the length of the instruction
3172 * to be able to forward the PSW.
3174 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3175 ilen = insn_length(opcode);
3179 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3180 * Forward by arbitrary ilc, injection will take care of
3181 * nullification if necessary.
3183 pgm_info = vcpu->arch.pgm;
3186 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3187 kvm_s390_forward_psw(vcpu, ilen);
3188 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3191 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3193 struct mcck_volatile_info *mcck_info;
3194 struct sie_page *sie_page;
3196 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3197 vcpu->arch.sie_block->icptcode);
3198 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3200 if (guestdbg_enabled(vcpu))
3201 kvm_s390_restore_guest_per_regs(vcpu);
3203 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3204 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3206 if (exit_reason == -EINTR) {
3207 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3208 sie_page = container_of(vcpu->arch.sie_block,
3209 struct sie_page, sie_block);
3210 mcck_info = &sie_page->mcck_info;
3211 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3215 if (vcpu->arch.sie_block->icptcode > 0) {
3216 int rc = kvm_handle_sie_intercept(vcpu);
3218 if (rc != -EOPNOTSUPP)
3220 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3221 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3222 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3223 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3225 } else if (exit_reason != -EFAULT) {
3226 vcpu->stat.exit_null++;
3228 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3229 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3230 vcpu->run->s390_ucontrol.trans_exc_code =
3231 current->thread.gmap_addr;
3232 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3234 } else if (current->thread.gmap_pfault) {
3235 trace_kvm_s390_major_guest_pfault(vcpu);
3236 current->thread.gmap_pfault = 0;
3237 if (kvm_arch_setup_async_pf(vcpu))
3239 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3241 return vcpu_post_run_fault_in_sie(vcpu);
3244 static int __vcpu_run(struct kvm_vcpu *vcpu)
3246 int rc, exit_reason;
3249 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3250 * ning the guest), so that memslots (and other stuff) are protected
3252 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3255 rc = vcpu_pre_run(vcpu);
3259 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3261 * As PF_VCPU will be used in fault handler, between
3262 * guest_enter and guest_exit should be no uaccess.
3264 local_irq_disable();
3265 guest_enter_irqoff();
3266 __disable_cpu_timer_accounting(vcpu);
3268 exit_reason = sie64a(vcpu->arch.sie_block,
3269 vcpu->run->s.regs.gprs);
3270 local_irq_disable();
3271 __enable_cpu_timer_accounting(vcpu);
3272 guest_exit_irqoff();
3274 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3276 rc = vcpu_post_run(vcpu, exit_reason);
3277 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3279 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3283 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3285 struct runtime_instr_cb *riccb;
3288 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3289 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3290 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3291 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3292 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3293 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3294 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3295 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3296 /* some control register changes require a tlb flush */
3297 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3299 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3300 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3301 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3302 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3303 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3304 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3306 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3307 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3308 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3309 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3310 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3311 kvm_clear_async_pf_completion_queue(vcpu);
3314 * If userspace sets the riccb (e.g. after migration) to a valid state,
3315 * we should enable RI here instead of doing the lazy enablement.
3317 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3318 test_kvm_facility(vcpu->kvm, 64) &&
3320 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3321 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3322 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3325 * If userspace sets the gscb (e.g. after migration) to non-zero,
3326 * we should enable GS here instead of doing the lazy enablement.
3328 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3329 test_kvm_facility(vcpu->kvm, 133) &&
3331 !vcpu->arch.gs_enabled) {
3332 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3333 vcpu->arch.sie_block->ecb |= ECB_GS;
3334 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3335 vcpu->arch.gs_enabled = 1;
3337 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3338 test_kvm_facility(vcpu->kvm, 82)) {
3339 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3340 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3342 save_access_regs(vcpu->arch.host_acrs);
3343 restore_access_regs(vcpu->run->s.regs.acrs);
3344 /* save host (userspace) fprs/vrs */
3346 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3347 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3349 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3351 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3352 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3353 if (test_fp_ctl(current->thread.fpu.fpc))
3354 /* User space provided an invalid FPC, let's clear it */
3355 current->thread.fpu.fpc = 0;
3356 if (MACHINE_HAS_GS) {
3358 __ctl_set_bit(2, 4);
3359 if (current->thread.gs_cb) {
3360 vcpu->arch.host_gscb = current->thread.gs_cb;
3361 save_gs_cb(vcpu->arch.host_gscb);
3363 if (vcpu->arch.gs_enabled) {
3364 current->thread.gs_cb = (struct gs_cb *)
3365 &vcpu->run->s.regs.gscb;
3366 restore_gs_cb(current->thread.gs_cb);
3371 kvm_run->kvm_dirty_regs = 0;
3374 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3376 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3377 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3378 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3379 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3380 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3381 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3382 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3383 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3384 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3385 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3386 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3387 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3388 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3389 save_access_regs(vcpu->run->s.regs.acrs);
3390 restore_access_regs(vcpu->arch.host_acrs);
3391 /* Save guest register state */
3393 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3394 /* Restore will be done lazily at return */
3395 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3396 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3397 if (MACHINE_HAS_GS) {
3399 __ctl_set_bit(2, 4);
3400 if (vcpu->arch.gs_enabled)
3401 save_gs_cb(current->thread.gs_cb);
3402 current->thread.gs_cb = vcpu->arch.host_gscb;
3403 restore_gs_cb(vcpu->arch.host_gscb);
3404 if (!vcpu->arch.host_gscb)
3405 __ctl_clear_bit(2, 4);
3406 vcpu->arch.host_gscb = NULL;
3412 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3416 if (kvm_run->immediate_exit)
3419 if (guestdbg_exit_pending(vcpu)) {
3420 kvm_s390_prepare_debug_exit(vcpu);
3424 kvm_sigset_activate(vcpu);
3426 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3427 kvm_s390_vcpu_start(vcpu);
3428 } else if (is_vcpu_stopped(vcpu)) {
3429 pr_err_ratelimited("can't run stopped vcpu %d\n",
3434 sync_regs(vcpu, kvm_run);
3435 enable_cpu_timer_accounting(vcpu);
3438 rc = __vcpu_run(vcpu);
3440 if (signal_pending(current) && !rc) {
3441 kvm_run->exit_reason = KVM_EXIT_INTR;
3445 if (guestdbg_exit_pending(vcpu) && !rc) {
3446 kvm_s390_prepare_debug_exit(vcpu);
3450 if (rc == -EREMOTE) {
3451 /* userspace support is needed, kvm_run has been prepared */
3455 disable_cpu_timer_accounting(vcpu);
3456 store_regs(vcpu, kvm_run);
3458 kvm_sigset_deactivate(vcpu);
3460 vcpu->stat.exit_userspace++;
3465 * store status at address
3466 * we use have two special cases:
3467 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3468 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3470 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3472 unsigned char archmode = 1;
3473 freg_t fprs[NUM_FPRS];
3478 px = kvm_s390_get_prefix(vcpu);
3479 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3480 if (write_guest_abs(vcpu, 163, &archmode, 1))
3483 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3484 if (write_guest_real(vcpu, 163, &archmode, 1))
3488 gpa -= __LC_FPREGS_SAVE_AREA;
3490 /* manually convert vector registers if necessary */
3491 if (MACHINE_HAS_VX) {
3492 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3493 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3496 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3497 vcpu->run->s.regs.fprs, 128);
3499 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3500 vcpu->run->s.regs.gprs, 128);
3501 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3502 &vcpu->arch.sie_block->gpsw, 16);
3503 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3505 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3506 &vcpu->run->s.regs.fpc, 4);
3507 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3508 &vcpu->arch.sie_block->todpr, 4);
3509 cputm = kvm_s390_get_cpu_timer(vcpu);
3510 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3512 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3513 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3515 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3516 &vcpu->run->s.regs.acrs, 64);
3517 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3518 &vcpu->arch.sie_block->gcr, 128);
3519 return rc ? -EFAULT : 0;
3522 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3525 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3526 * switch in the run ioctl. Let's update our copies before we save
3527 * it into the save area
3530 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3531 save_access_regs(vcpu->run->s.regs.acrs);
3533 return kvm_s390_store_status_unloaded(vcpu, addr);
3536 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3538 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3539 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3542 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3545 struct kvm_vcpu *vcpu;
3547 kvm_for_each_vcpu(i, vcpu, kvm) {
3548 __disable_ibs_on_vcpu(vcpu);
3552 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3556 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3557 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3560 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3562 int i, online_vcpus, started_vcpus = 0;
3564 if (!is_vcpu_stopped(vcpu))
3567 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3568 /* Only one cpu at a time may enter/leave the STOPPED state. */
3569 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3570 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3572 for (i = 0; i < online_vcpus; i++) {
3573 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3577 if (started_vcpus == 0) {
3578 /* we're the only active VCPU -> speed it up */
3579 __enable_ibs_on_vcpu(vcpu);
3580 } else if (started_vcpus == 1) {
3582 * As we are starting a second VCPU, we have to disable
3583 * the IBS facility on all VCPUs to remove potentially
3584 * oustanding ENABLE requests.
3586 __disable_ibs_on_all_vcpus(vcpu->kvm);
3589 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3591 * Another VCPU might have used IBS while we were offline.
3592 * Let's play safe and flush the VCPU at startup.
3594 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3595 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3599 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3601 int i, online_vcpus, started_vcpus = 0;
3602 struct kvm_vcpu *started_vcpu = NULL;
3604 if (is_vcpu_stopped(vcpu))
3607 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3608 /* Only one cpu at a time may enter/leave the STOPPED state. */
3609 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3610 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3612 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3613 kvm_s390_clear_stop_irq(vcpu);
3615 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3616 __disable_ibs_on_vcpu(vcpu);
3618 for (i = 0; i < online_vcpus; i++) {
3619 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3621 started_vcpu = vcpu->kvm->vcpus[i];
3625 if (started_vcpus == 1) {
3627 * As we only have one VCPU left, we want to enable the
3628 * IBS facility for that VCPU to speed it up.
3630 __enable_ibs_on_vcpu(started_vcpu);
3633 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3637 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3638 struct kvm_enable_cap *cap)
3646 case KVM_CAP_S390_CSS_SUPPORT:
3647 if (!vcpu->kvm->arch.css_support) {
3648 vcpu->kvm->arch.css_support = 1;
3649 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3650 trace_kvm_s390_enable_css(vcpu->kvm);
3661 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3662 struct kvm_s390_mem_op *mop)
3664 void __user *uaddr = (void __user *)mop->buf;
3665 void *tmpbuf = NULL;
3667 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3668 | KVM_S390_MEMOP_F_CHECK_ONLY;
3670 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
3673 if (mop->size > MEM_OP_MAX_SIZE)
3676 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3677 tmpbuf = vmalloc(mop->size);
3682 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3685 case KVM_S390_MEMOP_LOGICAL_READ:
3686 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3687 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3688 mop->size, GACC_FETCH);
3691 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3693 if (copy_to_user(uaddr, tmpbuf, mop->size))
3697 case KVM_S390_MEMOP_LOGICAL_WRITE:
3698 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3699 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3700 mop->size, GACC_STORE);
3703 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3707 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3713 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3715 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3716 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3722 long kvm_arch_vcpu_ioctl(struct file *filp,
3723 unsigned int ioctl, unsigned long arg)
3725 struct kvm_vcpu *vcpu = filp->private_data;
3726 void __user *argp = (void __user *)arg;
3731 case KVM_S390_IRQ: {
3732 struct kvm_s390_irq s390irq;
3735 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3737 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3740 case KVM_S390_INTERRUPT: {
3741 struct kvm_s390_interrupt s390int;
3742 struct kvm_s390_irq s390irq = {};
3745 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3747 if (s390int_to_s390irq(&s390int, &s390irq))
3749 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3752 case KVM_S390_STORE_STATUS:
3753 idx = srcu_read_lock(&vcpu->kvm->srcu);
3754 r = kvm_s390_store_status_unloaded(vcpu, arg);
3755 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3757 case KVM_S390_SET_INITIAL_PSW: {
3761 if (copy_from_user(&psw, argp, sizeof(psw)))
3763 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3766 case KVM_S390_INITIAL_RESET:
3767 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3769 case KVM_SET_ONE_REG:
3770 case KVM_GET_ONE_REG: {
3771 struct kvm_one_reg reg;
3773 if (copy_from_user(®, argp, sizeof(reg)))
3775 if (ioctl == KVM_SET_ONE_REG)
3776 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3778 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3781 #ifdef CONFIG_KVM_S390_UCONTROL
3782 case KVM_S390_UCAS_MAP: {
3783 struct kvm_s390_ucas_mapping ucasmap;
3785 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3790 if (!kvm_is_ucontrol(vcpu->kvm)) {
3795 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3796 ucasmap.vcpu_addr, ucasmap.length);
3799 case KVM_S390_UCAS_UNMAP: {
3800 struct kvm_s390_ucas_mapping ucasmap;
3802 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3807 if (!kvm_is_ucontrol(vcpu->kvm)) {
3812 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3817 case KVM_S390_VCPU_FAULT: {
3818 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3821 case KVM_ENABLE_CAP:
3823 struct kvm_enable_cap cap;
3825 if (copy_from_user(&cap, argp, sizeof(cap)))
3827 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3830 case KVM_S390_MEM_OP: {
3831 struct kvm_s390_mem_op mem_op;
3833 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3834 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3839 case KVM_S390_SET_IRQ_STATE: {
3840 struct kvm_s390_irq_state irq_state;
3843 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3845 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3846 irq_state.len == 0 ||
3847 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3851 r = kvm_s390_set_irq_state(vcpu,
3852 (void __user *) irq_state.buf,
3856 case KVM_S390_GET_IRQ_STATE: {
3857 struct kvm_s390_irq_state irq_state;
3860 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3862 if (irq_state.len == 0) {
3866 r = kvm_s390_get_irq_state(vcpu,
3867 (__u8 __user *) irq_state.buf,
3877 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3879 #ifdef CONFIG_KVM_S390_UCONTROL
3880 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3881 && (kvm_is_ucontrol(vcpu->kvm))) {
3882 vmf->page = virt_to_page(vcpu->arch.sie_block);
3883 get_page(vmf->page);
3887 return VM_FAULT_SIGBUS;
3890 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3891 unsigned long npages)
3896 /* Section: memory related */
3897 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3898 struct kvm_memory_slot *memslot,
3899 const struct kvm_userspace_memory_region *mem,
3900 enum kvm_mr_change change)
3902 /* A few sanity checks. We can have memory slots which have to be
3903 located/ended at a segment boundary (1MB). The memory in userland is
3904 ok to be fragmented into various different vmas. It is okay to mmap()
3905 and munmap() stuff in this slot after doing this call at any time */
3907 if (mem->userspace_addr & 0xffffful)
3910 if (mem->memory_size & 0xffffful)
3913 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3919 void kvm_arch_commit_memory_region(struct kvm *kvm,
3920 const struct kvm_userspace_memory_region *mem,
3921 const struct kvm_memory_slot *old,
3922 const struct kvm_memory_slot *new,
3923 enum kvm_mr_change change)
3929 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3930 old->npages * PAGE_SIZE);
3933 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3934 old->npages * PAGE_SIZE);
3939 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3940 mem->guest_phys_addr, mem->memory_size);
3942 case KVM_MR_FLAGS_ONLY:
3945 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
3948 pr_warn("failed to commit memory region\n");
3952 static inline unsigned long nonhyp_mask(int i)
3954 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3956 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3959 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3961 vcpu->valid_wakeup = false;
3964 static int __init kvm_s390_init(void)
3968 if (!sclp.has_sief2) {
3969 pr_info("SIE not available\n");
3973 for (i = 0; i < 16; i++)
3974 kvm_s390_fac_list_mask[i] |=
3975 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3977 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3980 static void __exit kvm_s390_exit(void)
3985 module_init(kvm_s390_init);
3986 module_exit(kvm_s390_exit);
3989 * Enable autoloading of the kvm module.
3990 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3991 * since x86 takes a different approach.
3993 #include <linux/miscdevice.h>
3994 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3995 MODULE_ALIAS("devname:kvm");