GNU Linux-libre 4.9.308-gnu1
[releases.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
33 #include <asm/stp.h>
34 #include <asm/pgtable.h>
35 #include <asm/gmap.h>
36 #include <asm/nmi.h>
37 #include <asm/switch_to.h>
38 #include <asm/isc.h>
39 #include <asm/sclp.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
42 #include "kvm-s390.h"
43 #include "gaccess.h"
44
45 #define KMSG_COMPONENT "kvm-s390"
46 #undef pr_fmt
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49 #define CREATE_TRACE_POINTS
50 #include "trace.h"
51 #include "trace-s390.h"
52
53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
54 #define LOCAL_IRQS 32
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         { "userspace_handled", VCPU_STAT(exit_userspace) },
62         { "exit_null", VCPU_STAT(exit_null) },
63         { "exit_validity", VCPU_STAT(exit_validity) },
64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65         { "exit_external_request", VCPU_STAT(exit_external_request) },
66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67         { "exit_instruction", VCPU_STAT(exit_instruction) },
68         { "exit_pei", VCPU_STAT(exit_pei) },
69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91         { "instruction_spx", VCPU_STAT(instruction_spx) },
92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93         { "instruction_stap", VCPU_STAT(instruction_stap) },
94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98         { "instruction_essa", VCPU_STAT(instruction_essa) },
99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103         { "instruction_sie", VCPU_STAT(instruction_sie) },
104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120         { "diagnose_10", VCPU_STAT(diagnose_10) },
121         { "diagnose_44", VCPU_STAT(diagnose_44) },
122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123         { "diagnose_258", VCPU_STAT(diagnose_258) },
124         { "diagnose_308", VCPU_STAT(diagnose_308) },
125         { "diagnose_500", VCPU_STAT(diagnose_500) },
126         { NULL }
127 };
128
129 /* allow nested virtualization in KVM (if enabled by user space) */
130 static int nested;
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137 unsigned long kvm_s390_fac_list_mask_size(void)
138 {
139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
141 }
142
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
151
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
154 {
155         /* every s390 is virtualization enabled ;-) */
156         return 0;
157 }
158
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160                               unsigned long end);
161
162 /*
163  * This callback is executed during stop_machine(). All CPUs are therefore
164  * temporarily stopped. In order not to change guest behavior, we have to
165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166  * so a CPU won't be stopped while calculating with the epoch.
167  */
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169                           void *v)
170 {
171         struct kvm *kvm;
172         struct kvm_vcpu *vcpu;
173         int i;
174         unsigned long long *delta = v;
175
176         list_for_each_entry(kvm, &vm_list, vm_list) {
177                 kvm->arch.epoch -= *delta;
178                 kvm_for_each_vcpu(i, vcpu, kvm) {
179                         vcpu->arch.sie_block->epoch -= *delta;
180                         if (vcpu->arch.cputm_enabled)
181                                 vcpu->arch.cputm_start += *delta;
182                         if (vcpu->arch.vsie_block)
183                                 vcpu->arch.vsie_block->epoch -= *delta;
184                 }
185         }
186         return NOTIFY_OK;
187 }
188
189 static struct notifier_block kvm_clock_notifier = {
190         .notifier_call = kvm_clock_sync,
191 };
192
193 int kvm_arch_hardware_setup(void)
194 {
195         gmap_notifier.notifier_call = kvm_gmap_notifier;
196         gmap_register_pte_notifier(&gmap_notifier);
197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198         gmap_register_pte_notifier(&vsie_gmap_notifier);
199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200                                        &kvm_clock_notifier);
201         return 0;
202 }
203
204 void kvm_arch_hardware_unsetup(void)
205 {
206         gmap_unregister_pte_notifier(&gmap_notifier);
207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209                                          &kvm_clock_notifier);
210 }
211
212 static void allow_cpu_feat(unsigned long nr)
213 {
214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
215 }
216
217 static inline int plo_test_bit(unsigned char nr)
218 {
219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220         int cc = 3; /* subfunction not available */
221
222         asm volatile(
223                 /* Parameter registers are ignored for "test bit" */
224                 "       plo     0,0,0,0(0)\n"
225                 "       ipm     %0\n"
226                 "       srl     %0,28\n"
227                 : "=d" (cc)
228                 : "d" (r0)
229                 : "cc");
230         return cc == 0;
231 }
232
233 static void kvm_s390_cpu_feat_init(void)
234 {
235         int i;
236
237         for (i = 0; i < 256; ++i) {
238                 if (plo_test_bit(i))
239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240         }
241
242         if (test_facility(28)) /* TOD-clock steering */
243                 ptff(kvm_s390_available_subfunc.ptff,
244                      sizeof(kvm_s390_available_subfunc.ptff),
245                      PTFF_QAF);
246
247         if (test_facility(17)) { /* MSA */
248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249                               kvm_s390_available_subfunc.kmac);
250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251                               kvm_s390_available_subfunc.kmc);
252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.km);
254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kimd);
256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.klmd);
258         }
259         if (test_facility(76)) /* MSA3 */
260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.pckmo);
262         if (test_facility(77)) { /* MSA4 */
263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.kmctr);
265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266                               kvm_s390_available_subfunc.kmf);
267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmo);
269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.pcc);
271         }
272         if (test_facility(57)) /* MSA5 */
273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.ppno);
275
276         if (MACHINE_HAS_ESOP)
277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278         /*
279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281          */
282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283             !test_facility(3) || !nested)
284                 return;
285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286         if (sclp.has_64bscao)
287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288         if (sclp.has_siif)
289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290         if (sclp.has_gpere)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292         if (sclp.has_gsls)
293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294         if (sclp.has_ib)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296         if (sclp.has_cei)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298         if (sclp.has_ibs)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300         /*
301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302          * all skey handling functions read/set the skey from the PGSTE
303          * instead of the real storage key.
304          *
305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306          * pages being detected as preserved although they are resident.
307          *
308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310          *
311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314          *
315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316          * cannot easily shadow the SCA because of the ipte lock.
317          */
318 }
319
320 int kvm_arch_init(void *opaque)
321 {
322         int rc;
323
324         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
325         if (!kvm_s390_dbf)
326                 return -ENOMEM;
327
328         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
329                 rc = -ENOMEM;
330                 goto out_debug_unreg;
331         }
332
333         kvm_s390_cpu_feat_init();
334
335         /* Register floating interrupt controller interface. */
336         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
337         if (rc) {
338                 pr_err("Failed to register FLIC rc=%d\n", rc);
339                 goto out_debug_unreg;
340         }
341         return 0;
342
343 out_debug_unreg:
344         debug_unregister(kvm_s390_dbf);
345         return rc;
346 }
347
348 void kvm_arch_exit(void)
349 {
350         debug_unregister(kvm_s390_dbf);
351 }
352
353 /* Section: device related */
354 long kvm_arch_dev_ioctl(struct file *filp,
355                         unsigned int ioctl, unsigned long arg)
356 {
357         if (ioctl == KVM_S390_ENABLE_SIE)
358                 return s390_enable_sie();
359         return -EINVAL;
360 }
361
362 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
363 {
364         int r;
365
366         switch (ext) {
367         case KVM_CAP_S390_PSW:
368         case KVM_CAP_S390_GMAP:
369         case KVM_CAP_SYNC_MMU:
370 #ifdef CONFIG_KVM_S390_UCONTROL
371         case KVM_CAP_S390_UCONTROL:
372 #endif
373         case KVM_CAP_ASYNC_PF:
374         case KVM_CAP_SYNC_REGS:
375         case KVM_CAP_ONE_REG:
376         case KVM_CAP_ENABLE_CAP:
377         case KVM_CAP_S390_CSS_SUPPORT:
378         case KVM_CAP_IOEVENTFD:
379         case KVM_CAP_DEVICE_CTRL:
380         case KVM_CAP_ENABLE_CAP_VM:
381         case KVM_CAP_S390_IRQCHIP:
382         case KVM_CAP_VM_ATTRIBUTES:
383         case KVM_CAP_MP_STATE:
384         case KVM_CAP_S390_INJECT_IRQ:
385         case KVM_CAP_S390_USER_SIGP:
386         case KVM_CAP_S390_USER_STSI:
387         case KVM_CAP_S390_SKEYS:
388         case KVM_CAP_S390_IRQ_STATE:
389         case KVM_CAP_S390_USER_INSTR0:
390                 r = 1;
391                 break;
392         case KVM_CAP_S390_MEM_OP:
393                 r = MEM_OP_MAX_SIZE;
394                 break;
395         case KVM_CAP_NR_VCPUS:
396         case KVM_CAP_MAX_VCPUS:
397                 r = KVM_S390_BSCA_CPU_SLOTS;
398                 if (!kvm_s390_use_sca_entries())
399                         r = KVM_MAX_VCPUS;
400                 else if (sclp.has_esca && sclp.has_64bscao)
401                         r = KVM_S390_ESCA_CPU_SLOTS;
402                 break;
403         case KVM_CAP_NR_MEMSLOTS:
404                 r = KVM_USER_MEM_SLOTS;
405                 break;
406         case KVM_CAP_S390_COW:
407                 r = MACHINE_HAS_ESOP;
408                 break;
409         case KVM_CAP_S390_VECTOR_REGISTERS:
410                 r = MACHINE_HAS_VX;
411                 break;
412         case KVM_CAP_S390_RI:
413                 r = test_facility(64);
414                 break;
415         case KVM_CAP_S390_BPB:
416                 r = test_facility(82);
417                 break;
418         default:
419                 r = 0;
420         }
421         return r;
422 }
423
424 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
425                                         struct kvm_memory_slot *memslot)
426 {
427         gfn_t cur_gfn, last_gfn;
428         unsigned long address;
429         struct gmap *gmap = kvm->arch.gmap;
430
431         /* Loop over all guest pages */
432         last_gfn = memslot->base_gfn + memslot->npages;
433         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
434                 address = gfn_to_hva_memslot(memslot, cur_gfn);
435
436                 if (test_and_clear_guest_dirty(gmap->mm, address))
437                         mark_page_dirty(kvm, cur_gfn);
438                 if (fatal_signal_pending(current))
439                         return;
440                 cond_resched();
441         }
442 }
443
444 /* Section: vm related */
445 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
446
447 /*
448  * Get (and clear) the dirty memory log for a memory slot.
449  */
450 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
451                                struct kvm_dirty_log *log)
452 {
453         int r;
454         unsigned long n;
455         struct kvm_memslots *slots;
456         struct kvm_memory_slot *memslot;
457         int is_dirty = 0;
458
459         if (kvm_is_ucontrol(kvm))
460                 return -EINVAL;
461
462         mutex_lock(&kvm->slots_lock);
463
464         r = -EINVAL;
465         if (log->slot >= KVM_USER_MEM_SLOTS)
466                 goto out;
467
468         slots = kvm_memslots(kvm);
469         memslot = id_to_memslot(slots, log->slot);
470         r = -ENOENT;
471         if (!memslot->dirty_bitmap)
472                 goto out;
473
474         kvm_s390_sync_dirty_log(kvm, memslot);
475         r = kvm_get_dirty_log(kvm, log, &is_dirty);
476         if (r)
477                 goto out;
478
479         /* Clear the dirty log */
480         if (is_dirty) {
481                 n = kvm_dirty_bitmap_bytes(memslot);
482                 memset(memslot->dirty_bitmap, 0, n);
483         }
484         r = 0;
485 out:
486         mutex_unlock(&kvm->slots_lock);
487         return r;
488 }
489
490 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
491 {
492         unsigned int i;
493         struct kvm_vcpu *vcpu;
494
495         kvm_for_each_vcpu(i, vcpu, kvm) {
496                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
497         }
498 }
499
500 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
501 {
502         int r;
503
504         if (cap->flags)
505                 return -EINVAL;
506
507         switch (cap->cap) {
508         case KVM_CAP_S390_IRQCHIP:
509                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
510                 kvm->arch.use_irqchip = 1;
511                 r = 0;
512                 break;
513         case KVM_CAP_S390_USER_SIGP:
514                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
515                 kvm->arch.user_sigp = 1;
516                 r = 0;
517                 break;
518         case KVM_CAP_S390_VECTOR_REGISTERS:
519                 mutex_lock(&kvm->lock);
520                 if (kvm->created_vcpus) {
521                         r = -EBUSY;
522                 } else if (MACHINE_HAS_VX) {
523                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
524                         set_kvm_facility(kvm->arch.model.fac_list, 129);
525                         r = 0;
526                 } else
527                         r = -EINVAL;
528                 mutex_unlock(&kvm->lock);
529                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
530                          r ? "(not available)" : "(success)");
531                 break;
532         case KVM_CAP_S390_RI:
533                 r = -EINVAL;
534                 mutex_lock(&kvm->lock);
535                 if (kvm->created_vcpus) {
536                         r = -EBUSY;
537                 } else if (test_facility(64)) {
538                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
539                         set_kvm_facility(kvm->arch.model.fac_list, 64);
540                         r = 0;
541                 }
542                 mutex_unlock(&kvm->lock);
543                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
544                          r ? "(not available)" : "(success)");
545                 break;
546         case KVM_CAP_S390_USER_STSI:
547                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
548                 kvm->arch.user_stsi = 1;
549                 r = 0;
550                 break;
551         case KVM_CAP_S390_USER_INSTR0:
552                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
553                 kvm->arch.user_instr0 = 1;
554                 icpt_operexc_on_all_vcpus(kvm);
555                 r = 0;
556                 break;
557         default:
558                 r = -EINVAL;
559                 break;
560         }
561         return r;
562 }
563
564 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
565 {
566         int ret;
567
568         switch (attr->attr) {
569         case KVM_S390_VM_MEM_LIMIT_SIZE:
570                 ret = 0;
571                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
572                          kvm->arch.mem_limit);
573                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
574                         ret = -EFAULT;
575                 break;
576         default:
577                 ret = -ENXIO;
578                 break;
579         }
580         return ret;
581 }
582
583 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
584 {
585         int ret;
586         unsigned int idx;
587         switch (attr->attr) {
588         case KVM_S390_VM_MEM_ENABLE_CMMA:
589                 ret = -ENXIO;
590                 if (!sclp.has_cmma)
591                         break;
592
593                 ret = -EBUSY;
594                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
595                 mutex_lock(&kvm->lock);
596                 if (!kvm->created_vcpus) {
597                         kvm->arch.use_cmma = 1;
598                         ret = 0;
599                 }
600                 mutex_unlock(&kvm->lock);
601                 break;
602         case KVM_S390_VM_MEM_CLR_CMMA:
603                 ret = -ENXIO;
604                 if (!sclp.has_cmma)
605                         break;
606                 ret = -EINVAL;
607                 if (!kvm->arch.use_cmma)
608                         break;
609
610                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
611                 mutex_lock(&kvm->lock);
612                 idx = srcu_read_lock(&kvm->srcu);
613                 s390_reset_cmma(kvm->arch.gmap->mm);
614                 srcu_read_unlock(&kvm->srcu, idx);
615                 mutex_unlock(&kvm->lock);
616                 ret = 0;
617                 break;
618         case KVM_S390_VM_MEM_LIMIT_SIZE: {
619                 unsigned long new_limit;
620
621                 if (kvm_is_ucontrol(kvm))
622                         return -EINVAL;
623
624                 if (get_user(new_limit, (u64 __user *)attr->addr))
625                         return -EFAULT;
626
627                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
628                     new_limit > kvm->arch.mem_limit)
629                         return -E2BIG;
630
631                 if (!new_limit)
632                         return -EINVAL;
633
634                 /* gmap_create takes last usable address */
635                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
636                         new_limit -= 1;
637
638                 ret = -EBUSY;
639                 mutex_lock(&kvm->lock);
640                 if (!kvm->created_vcpus) {
641                         /* gmap_create will round the limit up */
642                         struct gmap *new = gmap_create(current->mm, new_limit);
643
644                         if (!new) {
645                                 ret = -ENOMEM;
646                         } else {
647                                 gmap_remove(kvm->arch.gmap);
648                                 new->private = kvm;
649                                 kvm->arch.gmap = new;
650                                 ret = 0;
651                         }
652                 }
653                 mutex_unlock(&kvm->lock);
654                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
655                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
656                          (void *) kvm->arch.gmap->asce);
657                 break;
658         }
659         default:
660                 ret = -ENXIO;
661                 break;
662         }
663         return ret;
664 }
665
666 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
667
668 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
669 {
670         struct kvm_vcpu *vcpu;
671         int i;
672
673         if (!test_kvm_facility(kvm, 76))
674                 return -EINVAL;
675
676         mutex_lock(&kvm->lock);
677         switch (attr->attr) {
678         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
679                 get_random_bytes(
680                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
681                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
682                 kvm->arch.crypto.aes_kw = 1;
683                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
684                 break;
685         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
686                 get_random_bytes(
687                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
688                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
689                 kvm->arch.crypto.dea_kw = 1;
690                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
691                 break;
692         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
693                 kvm->arch.crypto.aes_kw = 0;
694                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
695                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
696                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
697                 break;
698         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
699                 kvm->arch.crypto.dea_kw = 0;
700                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
701                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
702                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
703                 break;
704         default:
705                 mutex_unlock(&kvm->lock);
706                 return -ENXIO;
707         }
708
709         kvm_for_each_vcpu(i, vcpu, kvm) {
710                 kvm_s390_vcpu_crypto_setup(vcpu);
711                 exit_sie(vcpu);
712         }
713         mutex_unlock(&kvm->lock);
714         return 0;
715 }
716
717 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
718 {
719         u8 gtod_high;
720
721         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
722                                            sizeof(gtod_high)))
723                 return -EFAULT;
724
725         if (gtod_high != 0)
726                 return -EINVAL;
727         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
728
729         return 0;
730 }
731
732 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
733 {
734         u64 gtod;
735
736         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
737                 return -EFAULT;
738
739         kvm_s390_set_tod_clock(kvm, gtod);
740         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
741         return 0;
742 }
743
744 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
745 {
746         int ret;
747
748         if (attr->flags)
749                 return -EINVAL;
750
751         switch (attr->attr) {
752         case KVM_S390_VM_TOD_HIGH:
753                 ret = kvm_s390_set_tod_high(kvm, attr);
754                 break;
755         case KVM_S390_VM_TOD_LOW:
756                 ret = kvm_s390_set_tod_low(kvm, attr);
757                 break;
758         default:
759                 ret = -ENXIO;
760                 break;
761         }
762         return ret;
763 }
764
765 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
766 {
767         u8 gtod_high = 0;
768
769         if (copy_to_user((void __user *)attr->addr, &gtod_high,
770                                          sizeof(gtod_high)))
771                 return -EFAULT;
772         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
773
774         return 0;
775 }
776
777 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
778 {
779         u64 gtod;
780
781         gtod = kvm_s390_get_tod_clock_fast(kvm);
782         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
783                 return -EFAULT;
784         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
785
786         return 0;
787 }
788
789 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
790 {
791         int ret;
792
793         if (attr->flags)
794                 return -EINVAL;
795
796         switch (attr->attr) {
797         case KVM_S390_VM_TOD_HIGH:
798                 ret = kvm_s390_get_tod_high(kvm, attr);
799                 break;
800         case KVM_S390_VM_TOD_LOW:
801                 ret = kvm_s390_get_tod_low(kvm, attr);
802                 break;
803         default:
804                 ret = -ENXIO;
805                 break;
806         }
807         return ret;
808 }
809
810 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
811 {
812         struct kvm_s390_vm_cpu_processor *proc;
813         u16 lowest_ibc, unblocked_ibc;
814         int ret = 0;
815
816         mutex_lock(&kvm->lock);
817         if (kvm->created_vcpus) {
818                 ret = -EBUSY;
819                 goto out;
820         }
821         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
822         if (!proc) {
823                 ret = -ENOMEM;
824                 goto out;
825         }
826         if (!copy_from_user(proc, (void __user *)attr->addr,
827                             sizeof(*proc))) {
828                 kvm->arch.model.cpuid = proc->cpuid;
829                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
830                 unblocked_ibc = sclp.ibc & 0xfff;
831                 if (lowest_ibc && proc->ibc) {
832                         if (proc->ibc > unblocked_ibc)
833                                 kvm->arch.model.ibc = unblocked_ibc;
834                         else if (proc->ibc < lowest_ibc)
835                                 kvm->arch.model.ibc = lowest_ibc;
836                         else
837                                 kvm->arch.model.ibc = proc->ibc;
838                 }
839                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
840                        S390_ARCH_FAC_LIST_SIZE_BYTE);
841         } else
842                 ret = -EFAULT;
843         kfree(proc);
844 out:
845         mutex_unlock(&kvm->lock);
846         return ret;
847 }
848
849 static int kvm_s390_set_processor_feat(struct kvm *kvm,
850                                        struct kvm_device_attr *attr)
851 {
852         struct kvm_s390_vm_cpu_feat data;
853         int ret = -EBUSY;
854
855         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
856                 return -EFAULT;
857         if (!bitmap_subset((unsigned long *) data.feat,
858                            kvm_s390_available_cpu_feat,
859                            KVM_S390_VM_CPU_FEAT_NR_BITS))
860                 return -EINVAL;
861
862         mutex_lock(&kvm->lock);
863         if (!atomic_read(&kvm->online_vcpus)) {
864                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
865                             KVM_S390_VM_CPU_FEAT_NR_BITS);
866                 ret = 0;
867         }
868         mutex_unlock(&kvm->lock);
869         return ret;
870 }
871
872 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
873                                           struct kvm_device_attr *attr)
874 {
875         /*
876          * Once supported by kernel + hw, we have to store the subfunctions
877          * in kvm->arch and remember that user space configured them.
878          */
879         return -ENXIO;
880 }
881
882 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
883 {
884         int ret = -ENXIO;
885
886         switch (attr->attr) {
887         case KVM_S390_VM_CPU_PROCESSOR:
888                 ret = kvm_s390_set_processor(kvm, attr);
889                 break;
890         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
891                 ret = kvm_s390_set_processor_feat(kvm, attr);
892                 break;
893         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
894                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
895                 break;
896         }
897         return ret;
898 }
899
900 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
901 {
902         struct kvm_s390_vm_cpu_processor *proc;
903         int ret = 0;
904
905         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
906         if (!proc) {
907                 ret = -ENOMEM;
908                 goto out;
909         }
910         proc->cpuid = kvm->arch.model.cpuid;
911         proc->ibc = kvm->arch.model.ibc;
912         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
913                S390_ARCH_FAC_LIST_SIZE_BYTE);
914         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
915                 ret = -EFAULT;
916         kfree(proc);
917 out:
918         return ret;
919 }
920
921 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
922 {
923         struct kvm_s390_vm_cpu_machine *mach;
924         int ret = 0;
925
926         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
927         if (!mach) {
928                 ret = -ENOMEM;
929                 goto out;
930         }
931         get_cpu_id((struct cpuid *) &mach->cpuid);
932         mach->ibc = sclp.ibc;
933         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
934                S390_ARCH_FAC_LIST_SIZE_BYTE);
935         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
936                sizeof(S390_lowcore.stfle_fac_list));
937         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
938                 ret = -EFAULT;
939         kfree(mach);
940 out:
941         return ret;
942 }
943
944 static int kvm_s390_get_processor_feat(struct kvm *kvm,
945                                        struct kvm_device_attr *attr)
946 {
947         struct kvm_s390_vm_cpu_feat data;
948
949         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
950                     KVM_S390_VM_CPU_FEAT_NR_BITS);
951         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
952                 return -EFAULT;
953         return 0;
954 }
955
956 static int kvm_s390_get_machine_feat(struct kvm *kvm,
957                                      struct kvm_device_attr *attr)
958 {
959         struct kvm_s390_vm_cpu_feat data;
960
961         bitmap_copy((unsigned long *) data.feat,
962                     kvm_s390_available_cpu_feat,
963                     KVM_S390_VM_CPU_FEAT_NR_BITS);
964         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
965                 return -EFAULT;
966         return 0;
967 }
968
969 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
970                                           struct kvm_device_attr *attr)
971 {
972         /*
973          * Once we can actually configure subfunctions (kernel + hw support),
974          * we have to check if they were already set by user space, if so copy
975          * them from kvm->arch.
976          */
977         return -ENXIO;
978 }
979
980 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
981                                         struct kvm_device_attr *attr)
982 {
983         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
984             sizeof(struct kvm_s390_vm_cpu_subfunc)))
985                 return -EFAULT;
986         return 0;
987 }
988 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
989 {
990         int ret = -ENXIO;
991
992         switch (attr->attr) {
993         case KVM_S390_VM_CPU_PROCESSOR:
994                 ret = kvm_s390_get_processor(kvm, attr);
995                 break;
996         case KVM_S390_VM_CPU_MACHINE:
997                 ret = kvm_s390_get_machine(kvm, attr);
998                 break;
999         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1000                 ret = kvm_s390_get_processor_feat(kvm, attr);
1001                 break;
1002         case KVM_S390_VM_CPU_MACHINE_FEAT:
1003                 ret = kvm_s390_get_machine_feat(kvm, attr);
1004                 break;
1005         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1006                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1007                 break;
1008         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1009                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1010                 break;
1011         }
1012         return ret;
1013 }
1014
1015 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017         int ret;
1018
1019         switch (attr->group) {
1020         case KVM_S390_VM_MEM_CTRL:
1021                 ret = kvm_s390_set_mem_control(kvm, attr);
1022                 break;
1023         case KVM_S390_VM_TOD:
1024                 ret = kvm_s390_set_tod(kvm, attr);
1025                 break;
1026         case KVM_S390_VM_CPU_MODEL:
1027                 ret = kvm_s390_set_cpu_model(kvm, attr);
1028                 break;
1029         case KVM_S390_VM_CRYPTO:
1030                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1031                 break;
1032         default:
1033                 ret = -ENXIO;
1034                 break;
1035         }
1036
1037         return ret;
1038 }
1039
1040 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1041 {
1042         int ret;
1043
1044         switch (attr->group) {
1045         case KVM_S390_VM_MEM_CTRL:
1046                 ret = kvm_s390_get_mem_control(kvm, attr);
1047                 break;
1048         case KVM_S390_VM_TOD:
1049                 ret = kvm_s390_get_tod(kvm, attr);
1050                 break;
1051         case KVM_S390_VM_CPU_MODEL:
1052                 ret = kvm_s390_get_cpu_model(kvm, attr);
1053                 break;
1054         default:
1055                 ret = -ENXIO;
1056                 break;
1057         }
1058
1059         return ret;
1060 }
1061
1062 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1063 {
1064         int ret;
1065
1066         switch (attr->group) {
1067         case KVM_S390_VM_MEM_CTRL:
1068                 switch (attr->attr) {
1069                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1070                 case KVM_S390_VM_MEM_CLR_CMMA:
1071                         ret = sclp.has_cmma ? 0 : -ENXIO;
1072                         break;
1073                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1074                         ret = 0;
1075                         break;
1076                 default:
1077                         ret = -ENXIO;
1078                         break;
1079                 }
1080                 break;
1081         case KVM_S390_VM_TOD:
1082                 switch (attr->attr) {
1083                 case KVM_S390_VM_TOD_LOW:
1084                 case KVM_S390_VM_TOD_HIGH:
1085                         ret = 0;
1086                         break;
1087                 default:
1088                         ret = -ENXIO;
1089                         break;
1090                 }
1091                 break;
1092         case KVM_S390_VM_CPU_MODEL:
1093                 switch (attr->attr) {
1094                 case KVM_S390_VM_CPU_PROCESSOR:
1095                 case KVM_S390_VM_CPU_MACHINE:
1096                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1097                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1098                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1099                         ret = 0;
1100                         break;
1101                 /* configuring subfunctions is not supported yet */
1102                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1103                 default:
1104                         ret = -ENXIO;
1105                         break;
1106                 }
1107                 break;
1108         case KVM_S390_VM_CRYPTO:
1109                 switch (attr->attr) {
1110                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1111                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1112                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1113                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1114                         ret = 0;
1115                         break;
1116                 default:
1117                         ret = -ENXIO;
1118                         break;
1119                 }
1120                 break;
1121         default:
1122                 ret = -ENXIO;
1123                 break;
1124         }
1125
1126         return ret;
1127 }
1128
1129 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1130 {
1131         uint8_t *keys;
1132         uint64_t hva;
1133         int i, r = 0;
1134
1135         if (args->flags != 0)
1136                 return -EINVAL;
1137
1138         /* Is this guest using storage keys? */
1139         if (!mm_use_skey(current->mm))
1140                 return KVM_S390_GET_SKEYS_NONE;
1141
1142         /* Enforce sane limit on memory allocation */
1143         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1144                 return -EINVAL;
1145
1146         keys = kmalloc_array(args->count, sizeof(uint8_t),
1147                              GFP_KERNEL | __GFP_NOWARN);
1148         if (!keys)
1149                 keys = vmalloc(sizeof(uint8_t) * args->count);
1150         if (!keys)
1151                 return -ENOMEM;
1152
1153         down_read(&current->mm->mmap_sem);
1154         for (i = 0; i < args->count; i++) {
1155                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1156                 if (kvm_is_error_hva(hva)) {
1157                         r = -EFAULT;
1158                         break;
1159                 }
1160
1161                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1162                 if (r)
1163                         break;
1164         }
1165         up_read(&current->mm->mmap_sem);
1166
1167         if (!r) {
1168                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1169                                  sizeof(uint8_t) * args->count);
1170                 if (r)
1171                         r = -EFAULT;
1172         }
1173
1174         kvfree(keys);
1175         return r;
1176 }
1177
1178 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1179 {
1180         uint8_t *keys;
1181         uint64_t hva;
1182         int i, r = 0;
1183
1184         if (args->flags != 0)
1185                 return -EINVAL;
1186
1187         /* Enforce sane limit on memory allocation */
1188         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1189                 return -EINVAL;
1190
1191         keys = kmalloc_array(args->count, sizeof(uint8_t),
1192                              GFP_KERNEL | __GFP_NOWARN);
1193         if (!keys)
1194                 keys = vmalloc(sizeof(uint8_t) * args->count);
1195         if (!keys)
1196                 return -ENOMEM;
1197
1198         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1199                            sizeof(uint8_t) * args->count);
1200         if (r) {
1201                 r = -EFAULT;
1202                 goto out;
1203         }
1204
1205         /* Enable storage key handling for the guest */
1206         r = s390_enable_skey();
1207         if (r)
1208                 goto out;
1209
1210         down_read(&current->mm->mmap_sem);
1211         for (i = 0; i < args->count; i++) {
1212                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1213                 if (kvm_is_error_hva(hva)) {
1214                         r = -EFAULT;
1215                         break;
1216                 }
1217
1218                 /* Lowest order bit is reserved */
1219                 if (keys[i] & 0x01) {
1220                         r = -EINVAL;
1221                         break;
1222                 }
1223
1224                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1225                 if (r)
1226                         break;
1227         }
1228         up_read(&current->mm->mmap_sem);
1229 out:
1230         kvfree(keys);
1231         return r;
1232 }
1233
1234 long kvm_arch_vm_ioctl(struct file *filp,
1235                        unsigned int ioctl, unsigned long arg)
1236 {
1237         struct kvm *kvm = filp->private_data;
1238         void __user *argp = (void __user *)arg;
1239         struct kvm_device_attr attr;
1240         int r;
1241
1242         switch (ioctl) {
1243         case KVM_S390_INTERRUPT: {
1244                 struct kvm_s390_interrupt s390int;
1245
1246                 r = -EFAULT;
1247                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1248                         break;
1249                 r = kvm_s390_inject_vm(kvm, &s390int);
1250                 break;
1251         }
1252         case KVM_ENABLE_CAP: {
1253                 struct kvm_enable_cap cap;
1254                 r = -EFAULT;
1255                 if (copy_from_user(&cap, argp, sizeof(cap)))
1256                         break;
1257                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1258                 break;
1259         }
1260         case KVM_CREATE_IRQCHIP: {
1261                 struct kvm_irq_routing_entry routing;
1262
1263                 r = -EINVAL;
1264                 if (kvm->arch.use_irqchip) {
1265                         /* Set up dummy routing. */
1266                         memset(&routing, 0, sizeof(routing));
1267                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1268                 }
1269                 break;
1270         }
1271         case KVM_SET_DEVICE_ATTR: {
1272                 r = -EFAULT;
1273                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1274                         break;
1275                 r = kvm_s390_vm_set_attr(kvm, &attr);
1276                 break;
1277         }
1278         case KVM_GET_DEVICE_ATTR: {
1279                 r = -EFAULT;
1280                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1281                         break;
1282                 r = kvm_s390_vm_get_attr(kvm, &attr);
1283                 break;
1284         }
1285         case KVM_HAS_DEVICE_ATTR: {
1286                 r = -EFAULT;
1287                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1288                         break;
1289                 r = kvm_s390_vm_has_attr(kvm, &attr);
1290                 break;
1291         }
1292         case KVM_S390_GET_SKEYS: {
1293                 struct kvm_s390_skeys args;
1294
1295                 r = -EFAULT;
1296                 if (copy_from_user(&args, argp,
1297                                    sizeof(struct kvm_s390_skeys)))
1298                         break;
1299                 r = kvm_s390_get_skeys(kvm, &args);
1300                 break;
1301         }
1302         case KVM_S390_SET_SKEYS: {
1303                 struct kvm_s390_skeys args;
1304
1305                 r = -EFAULT;
1306                 if (copy_from_user(&args, argp,
1307                                    sizeof(struct kvm_s390_skeys)))
1308                         break;
1309                 r = kvm_s390_set_skeys(kvm, &args);
1310                 break;
1311         }
1312         default:
1313                 r = -ENOTTY;
1314         }
1315
1316         return r;
1317 }
1318
1319 static int kvm_s390_query_ap_config(u8 *config)
1320 {
1321         u32 fcn_code = 0x04000000UL;
1322         u32 cc = 0;
1323
1324         memset(config, 0, 128);
1325         asm volatile(
1326                 "lgr 0,%1\n"
1327                 "lgr 2,%2\n"
1328                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1329                 "0: ipm %0\n"
1330                 "srl %0,28\n"
1331                 "1:\n"
1332                 EX_TABLE(0b, 1b)
1333                 : "+r" (cc)
1334                 : "r" (fcn_code), "r" (config)
1335                 : "cc", "0", "2", "memory"
1336         );
1337
1338         return cc;
1339 }
1340
1341 static int kvm_s390_apxa_installed(void)
1342 {
1343         u8 config[128];
1344         int cc;
1345
1346         if (test_facility(12)) {
1347                 cc = kvm_s390_query_ap_config(config);
1348
1349                 if (cc)
1350                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1351                 else
1352                         return config[0] & 0x40;
1353         }
1354
1355         return 0;
1356 }
1357
1358 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1359 {
1360         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1361
1362         if (kvm_s390_apxa_installed())
1363                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1364         else
1365                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1366 }
1367
1368 static u64 kvm_s390_get_initial_cpuid(void)
1369 {
1370         struct cpuid cpuid;
1371
1372         get_cpu_id(&cpuid);
1373         cpuid.version = 0xff;
1374         return *((u64 *) &cpuid);
1375 }
1376
1377 static void kvm_s390_crypto_init(struct kvm *kvm)
1378 {
1379         if (!test_kvm_facility(kvm, 76))
1380                 return;
1381
1382         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1383         kvm_s390_set_crycb_format(kvm);
1384
1385         /* Enable AES/DEA protected key functions by default */
1386         kvm->arch.crypto.aes_kw = 1;
1387         kvm->arch.crypto.dea_kw = 1;
1388         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1389                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1390         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1391                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1392 }
1393
1394 static void sca_dispose(struct kvm *kvm)
1395 {
1396         if (kvm->arch.use_esca)
1397                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1398         else
1399                 free_page((unsigned long)(kvm->arch.sca));
1400         kvm->arch.sca = NULL;
1401 }
1402
1403 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1404 {
1405         gfp_t alloc_flags = GFP_KERNEL;
1406         int i, rc;
1407         char debug_name[16];
1408         static unsigned long sca_offset;
1409
1410         rc = -EINVAL;
1411 #ifdef CONFIG_KVM_S390_UCONTROL
1412         if (type & ~KVM_VM_S390_UCONTROL)
1413                 goto out_err;
1414         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1415                 goto out_err;
1416 #else
1417         if (type)
1418                 goto out_err;
1419 #endif
1420
1421         rc = s390_enable_sie();
1422         if (rc)
1423                 goto out_err;
1424
1425         rc = -ENOMEM;
1426
1427         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1428
1429         kvm->arch.use_esca = 0; /* start with basic SCA */
1430         if (!sclp.has_64bscao)
1431                 alloc_flags |= GFP_DMA;
1432         rwlock_init(&kvm->arch.sca_lock);
1433         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1434         if (!kvm->arch.sca)
1435                 goto out_err;
1436         mutex_lock(&kvm_lock);
1437         sca_offset += 16;
1438         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1439                 sca_offset = 0;
1440         kvm->arch.sca = (struct bsca_block *)
1441                         ((char *) kvm->arch.sca + sca_offset);
1442         mutex_unlock(&kvm_lock);
1443
1444         sprintf(debug_name, "kvm-%u", current->pid);
1445
1446         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1447         if (!kvm->arch.dbf)
1448                 goto out_err;
1449
1450         kvm->arch.sie_page2 =
1451              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1452         if (!kvm->arch.sie_page2)
1453                 goto out_err;
1454
1455         /* Populate the facility mask initially. */
1456         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1457                sizeof(S390_lowcore.stfle_fac_list));
1458         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1459                 if (i < kvm_s390_fac_list_mask_size())
1460                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1461                 else
1462                         kvm->arch.model.fac_mask[i] = 0UL;
1463         }
1464
1465         /* Populate the facility list initially. */
1466         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1467         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1468                S390_ARCH_FAC_LIST_SIZE_BYTE);
1469
1470         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1471         set_kvm_facility(kvm->arch.model.fac_list, 74);
1472
1473         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1474         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1475
1476         kvm_s390_crypto_init(kvm);
1477
1478         spin_lock_init(&kvm->arch.float_int.lock);
1479         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1480                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1481         init_waitqueue_head(&kvm->arch.ipte_wq);
1482         mutex_init(&kvm->arch.ipte_mutex);
1483
1484         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1485         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1486
1487         if (type & KVM_VM_S390_UCONTROL) {
1488                 kvm->arch.gmap = NULL;
1489                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1490         } else {
1491                 if (sclp.hamax == U64_MAX)
1492                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1493                 else
1494                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1495                                                     sclp.hamax + 1);
1496                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1497                 if (!kvm->arch.gmap)
1498                         goto out_err;
1499                 kvm->arch.gmap->private = kvm;
1500                 kvm->arch.gmap->pfault_enabled = 0;
1501         }
1502
1503         kvm->arch.css_support = 0;
1504         kvm->arch.use_irqchip = 0;
1505         kvm->arch.epoch = 0;
1506
1507         spin_lock_init(&kvm->arch.start_stop_lock);
1508         kvm_s390_vsie_init(kvm);
1509         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1510
1511         return 0;
1512 out_err:
1513         free_page((unsigned long)kvm->arch.sie_page2);
1514         debug_unregister(kvm->arch.dbf);
1515         sca_dispose(kvm);
1516         KVM_EVENT(3, "creation of vm failed: %d", rc);
1517         return rc;
1518 }
1519
1520 bool kvm_arch_has_vcpu_debugfs(void)
1521 {
1522         return false;
1523 }
1524
1525 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1526 {
1527         return 0;
1528 }
1529
1530 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1531 {
1532         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1533         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1534         kvm_s390_clear_local_irqs(vcpu);
1535         kvm_clear_async_pf_completion_queue(vcpu);
1536         if (!kvm_is_ucontrol(vcpu->kvm))
1537                 sca_del_vcpu(vcpu);
1538
1539         if (kvm_is_ucontrol(vcpu->kvm))
1540                 gmap_remove(vcpu->arch.gmap);
1541
1542         if (vcpu->kvm->arch.use_cmma)
1543                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1544         free_page((unsigned long)(vcpu->arch.sie_block));
1545
1546         kvm_vcpu_uninit(vcpu);
1547         kmem_cache_free(kvm_vcpu_cache, vcpu);
1548 }
1549
1550 static void kvm_free_vcpus(struct kvm *kvm)
1551 {
1552         unsigned int i;
1553         struct kvm_vcpu *vcpu;
1554
1555         kvm_for_each_vcpu(i, vcpu, kvm)
1556                 kvm_arch_vcpu_destroy(vcpu);
1557
1558         mutex_lock(&kvm->lock);
1559         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1560                 kvm->vcpus[i] = NULL;
1561
1562         atomic_set(&kvm->online_vcpus, 0);
1563         mutex_unlock(&kvm->lock);
1564 }
1565
1566 void kvm_arch_destroy_vm(struct kvm *kvm)
1567 {
1568         kvm_free_vcpus(kvm);
1569         sca_dispose(kvm);
1570         debug_unregister(kvm->arch.dbf);
1571         free_page((unsigned long)kvm->arch.sie_page2);
1572         if (!kvm_is_ucontrol(kvm))
1573                 gmap_remove(kvm->arch.gmap);
1574         kvm_s390_destroy_adapters(kvm);
1575         kvm_s390_clear_float_irqs(kvm);
1576         kvm_s390_vsie_destroy(kvm);
1577         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1578 }
1579
1580 /* Section: vcpu related */
1581 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1582 {
1583         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1584         if (!vcpu->arch.gmap)
1585                 return -ENOMEM;
1586         vcpu->arch.gmap->private = vcpu->kvm;
1587
1588         return 0;
1589 }
1590
1591 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1592 {
1593         if (!kvm_s390_use_sca_entries())
1594                 return;
1595         read_lock(&vcpu->kvm->arch.sca_lock);
1596         if (vcpu->kvm->arch.use_esca) {
1597                 struct esca_block *sca = vcpu->kvm->arch.sca;
1598
1599                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1600                 sca->cpu[vcpu->vcpu_id].sda = 0;
1601         } else {
1602                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1603
1604                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1605                 sca->cpu[vcpu->vcpu_id].sda = 0;
1606         }
1607         read_unlock(&vcpu->kvm->arch.sca_lock);
1608 }
1609
1610 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1611 {
1612         if (!kvm_s390_use_sca_entries()) {
1613                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1614
1615                 /* we still need the basic sca for the ipte control */
1616                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1617                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1618                 return;
1619         }
1620         read_lock(&vcpu->kvm->arch.sca_lock);
1621         if (vcpu->kvm->arch.use_esca) {
1622                 struct esca_block *sca = vcpu->kvm->arch.sca;
1623
1624                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1625                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1626                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1627                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1628                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1629         } else {
1630                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1631
1632                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1633                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1634                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1635                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1636         }
1637         read_unlock(&vcpu->kvm->arch.sca_lock);
1638 }
1639
1640 /* Basic SCA to Extended SCA data copy routines */
1641 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1642 {
1643         d->sda = s->sda;
1644         d->sigp_ctrl.c = s->sigp_ctrl.c;
1645         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1646 }
1647
1648 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1649 {
1650         int i;
1651
1652         d->ipte_control = s->ipte_control;
1653         d->mcn[0] = s->mcn;
1654         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1655                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1656 }
1657
1658 static int sca_switch_to_extended(struct kvm *kvm)
1659 {
1660         struct bsca_block *old_sca = kvm->arch.sca;
1661         struct esca_block *new_sca;
1662         struct kvm_vcpu *vcpu;
1663         unsigned int vcpu_idx;
1664         u32 scaol, scaoh;
1665
1666         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1667         if (!new_sca)
1668                 return -ENOMEM;
1669
1670         scaoh = (u32)((u64)(new_sca) >> 32);
1671         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1672
1673         kvm_s390_vcpu_block_all(kvm);
1674         write_lock(&kvm->arch.sca_lock);
1675
1676         sca_copy_b_to_e(new_sca, old_sca);
1677
1678         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1679                 vcpu->arch.sie_block->scaoh = scaoh;
1680                 vcpu->arch.sie_block->scaol = scaol;
1681                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1682         }
1683         kvm->arch.sca = new_sca;
1684         kvm->arch.use_esca = 1;
1685
1686         write_unlock(&kvm->arch.sca_lock);
1687         kvm_s390_vcpu_unblock_all(kvm);
1688
1689         free_page((unsigned long)old_sca);
1690
1691         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1692                  old_sca, kvm->arch.sca);
1693         return 0;
1694 }
1695
1696 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1697 {
1698         int rc;
1699
1700         if (!kvm_s390_use_sca_entries()) {
1701                 if (id < KVM_MAX_VCPUS)
1702                         return true;
1703                 return false;
1704         }
1705         if (id < KVM_S390_BSCA_CPU_SLOTS)
1706                 return true;
1707         if (!sclp.has_esca || !sclp.has_64bscao)
1708                 return false;
1709
1710         mutex_lock(&kvm->lock);
1711         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1712         mutex_unlock(&kvm->lock);
1713
1714         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1715 }
1716
1717 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1718 {
1719         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1720         kvm_clear_async_pf_completion_queue(vcpu);
1721         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1722                                     KVM_SYNC_GPRS |
1723                                     KVM_SYNC_ACRS |
1724                                     KVM_SYNC_CRS |
1725                                     KVM_SYNC_ARCH0 |
1726                                     KVM_SYNC_PFAULT;
1727         kvm_s390_set_prefix(vcpu, 0);
1728         if (test_kvm_facility(vcpu->kvm, 64))
1729                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1730         if (test_kvm_facility(vcpu->kvm, 82))
1731                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
1732         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1733          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1734          */
1735         if (MACHINE_HAS_VX)
1736                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1737         else
1738                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1739
1740         if (kvm_is_ucontrol(vcpu->kvm))
1741                 return __kvm_ucontrol_vcpu_init(vcpu);
1742
1743         return 0;
1744 }
1745
1746 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1747 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1748 {
1749         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1750         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1751         vcpu->arch.cputm_start = get_tod_clock_fast();
1752         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1753 }
1754
1755 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1756 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1757 {
1758         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1759         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1760         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1761         vcpu->arch.cputm_start = 0;
1762         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1763 }
1764
1765 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1766 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1767 {
1768         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1769         vcpu->arch.cputm_enabled = true;
1770         __start_cpu_timer_accounting(vcpu);
1771 }
1772
1773 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1774 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1775 {
1776         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1777         __stop_cpu_timer_accounting(vcpu);
1778         vcpu->arch.cputm_enabled = false;
1779 }
1780
1781 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1782 {
1783         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1784         __enable_cpu_timer_accounting(vcpu);
1785         preempt_enable();
1786 }
1787
1788 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1789 {
1790         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1791         __disable_cpu_timer_accounting(vcpu);
1792         preempt_enable();
1793 }
1794
1795 /* set the cpu timer - may only be called from the VCPU thread itself */
1796 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1797 {
1798         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1799         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1800         if (vcpu->arch.cputm_enabled)
1801                 vcpu->arch.cputm_start = get_tod_clock_fast();
1802         vcpu->arch.sie_block->cputm = cputm;
1803         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1804         preempt_enable();
1805 }
1806
1807 /* update and get the cpu timer - can also be called from other VCPU threads */
1808 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1809 {
1810         unsigned int seq;
1811         __u64 value;
1812
1813         if (unlikely(!vcpu->arch.cputm_enabled))
1814                 return vcpu->arch.sie_block->cputm;
1815
1816         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1817         do {
1818                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1819                 /*
1820                  * If the writer would ever execute a read in the critical
1821                  * section, e.g. in irq context, we have a deadlock.
1822                  */
1823                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1824                 value = vcpu->arch.sie_block->cputm;
1825                 /* if cputm_start is 0, accounting is being started/stopped */
1826                 if (likely(vcpu->arch.cputm_start))
1827                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1828         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1829         preempt_enable();
1830         return value;
1831 }
1832
1833 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1834 {
1835         /* Save host register state */
1836         save_fpu_regs();
1837         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1838         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1839
1840         if (MACHINE_HAS_VX)
1841                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1842         else
1843                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1844         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1845         if (test_fp_ctl(current->thread.fpu.fpc))
1846                 /* User space provided an invalid FPC, let's clear it */
1847                 current->thread.fpu.fpc = 0;
1848         save_access_regs(vcpu->arch.host_acrs);
1849         restore_access_regs(vcpu->run->s.regs.acrs);
1850         gmap_enable(vcpu->arch.enabled_gmap);
1851         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1852         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1853                 __start_cpu_timer_accounting(vcpu);
1854         vcpu->cpu = cpu;
1855 }
1856
1857 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1858 {
1859         vcpu->cpu = -1;
1860         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1861                 __stop_cpu_timer_accounting(vcpu);
1862         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1863         vcpu->arch.enabled_gmap = gmap_get_enabled();
1864         gmap_disable(vcpu->arch.enabled_gmap);
1865
1866         /* Save guest register state */
1867         save_fpu_regs();
1868         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1869
1870         /* Restore host register state */
1871         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1872         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1873
1874         save_access_regs(vcpu->run->s.regs.acrs);
1875         restore_access_regs(vcpu->arch.host_acrs);
1876 }
1877
1878 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1879 {
1880         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1881         vcpu->arch.sie_block->gpsw.mask = 0UL;
1882         vcpu->arch.sie_block->gpsw.addr = 0UL;
1883         kvm_s390_set_prefix(vcpu, 0);
1884         kvm_s390_set_cpu_timer(vcpu, 0);
1885         vcpu->arch.sie_block->ckc       = 0UL;
1886         vcpu->arch.sie_block->todpr     = 0;
1887         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1888         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1889         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1890         /* make sure the new fpc will be lazily loaded */
1891         save_fpu_regs();
1892         current->thread.fpu.fpc = 0;
1893         vcpu->arch.sie_block->gbea = 1;
1894         vcpu->arch.sie_block->pp = 0;
1895         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
1896         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1897         kvm_clear_async_pf_completion_queue(vcpu);
1898         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1899                 kvm_s390_vcpu_stop(vcpu);
1900         kvm_s390_clear_local_irqs(vcpu);
1901 }
1902
1903 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1904 {
1905         mutex_lock(&vcpu->kvm->lock);
1906         preempt_disable();
1907         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1908         preempt_enable();
1909         mutex_unlock(&vcpu->kvm->lock);
1910         if (!kvm_is_ucontrol(vcpu->kvm)) {
1911                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1912                 sca_add_vcpu(vcpu);
1913         }
1914         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1915                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1916         /* make vcpu_load load the right gmap on the first trigger */
1917         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1918 }
1919
1920 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1921 {
1922         if (!test_kvm_facility(vcpu->kvm, 76))
1923                 return;
1924
1925         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1926
1927         if (vcpu->kvm->arch.crypto.aes_kw)
1928                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1929         if (vcpu->kvm->arch.crypto.dea_kw)
1930                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1931
1932         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1933 }
1934
1935 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1936 {
1937         free_page(vcpu->arch.sie_block->cbrlo);
1938         vcpu->arch.sie_block->cbrlo = 0;
1939 }
1940
1941 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1942 {
1943         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1944         if (!vcpu->arch.sie_block->cbrlo)
1945                 return -ENOMEM;
1946
1947         vcpu->arch.sie_block->ecb2 |= 0x80;
1948         vcpu->arch.sie_block->ecb2 &= ~0x08;
1949         return 0;
1950 }
1951
1952 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1953 {
1954         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1955
1956         vcpu->arch.sie_block->ibc = model->ibc;
1957         if (test_kvm_facility(vcpu->kvm, 7))
1958                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1959 }
1960
1961 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1962 {
1963         int rc = 0;
1964
1965         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1966                                                     CPUSTAT_SM |
1967                                                     CPUSTAT_STOPPED);
1968
1969         if (test_kvm_facility(vcpu->kvm, 78))
1970                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1971         else if (test_kvm_facility(vcpu->kvm, 8))
1972                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1973
1974         kvm_s390_vcpu_setup_model(vcpu);
1975
1976         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1977         if (MACHINE_HAS_ESOP)
1978                 vcpu->arch.sie_block->ecb |= 0x02;
1979         if (test_kvm_facility(vcpu->kvm, 9))
1980                 vcpu->arch.sie_block->ecb |= 0x04;
1981         if (test_kvm_facility(vcpu->kvm, 73))
1982                 vcpu->arch.sie_block->ecb |= 0x10;
1983
1984         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1985                 vcpu->arch.sie_block->ecb2 |= 0x08;
1986         vcpu->arch.sie_block->eca = 0x1002000U;
1987         if (sclp.has_cei)
1988                 vcpu->arch.sie_block->eca |= 0x80000000U;
1989         if (sclp.has_ib)
1990                 vcpu->arch.sie_block->eca |= 0x40000000U;
1991         if (sclp.has_siif)
1992                 vcpu->arch.sie_block->eca |= 1;
1993         if (sclp.has_sigpif)
1994                 vcpu->arch.sie_block->eca |= 0x10000000U;
1995         if (test_kvm_facility(vcpu->kvm, 129)) {
1996                 vcpu->arch.sie_block->eca |= 0x00020000;
1997                 vcpu->arch.sie_block->ecd |= 0x20000000;
1998         }
1999         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2000         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2001
2002         if (vcpu->kvm->arch.use_cmma) {
2003                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2004                 if (rc)
2005                         return rc;
2006         }
2007         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2008         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2009
2010         kvm_s390_vcpu_crypto_setup(vcpu);
2011
2012         return rc;
2013 }
2014
2015 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2016                                       unsigned int id)
2017 {
2018         struct kvm_vcpu *vcpu;
2019         struct sie_page *sie_page;
2020         int rc = -EINVAL;
2021
2022         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2023                 goto out;
2024
2025         rc = -ENOMEM;
2026
2027         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2028         if (!vcpu)
2029                 goto out;
2030
2031         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2032         if (!sie_page)
2033                 goto out_free_cpu;
2034
2035         vcpu->arch.sie_block = &sie_page->sie_block;
2036         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2037
2038         /* the real guest size will always be smaller than msl */
2039         vcpu->arch.sie_block->mso = 0;
2040         vcpu->arch.sie_block->msl = sclp.hamax;
2041
2042         vcpu->arch.sie_block->icpua = id;
2043         spin_lock_init(&vcpu->arch.local_int.lock);
2044         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2045         vcpu->arch.local_int.wq = &vcpu->wq;
2046         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2047         seqcount_init(&vcpu->arch.cputm_seqcount);
2048
2049         rc = kvm_vcpu_init(vcpu, kvm, id);
2050         if (rc)
2051                 goto out_free_sie_block;
2052         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2053                  vcpu->arch.sie_block);
2054         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2055
2056         return vcpu;
2057 out_free_sie_block:
2058         free_page((unsigned long)(vcpu->arch.sie_block));
2059 out_free_cpu:
2060         kmem_cache_free(kvm_vcpu_cache, vcpu);
2061 out:
2062         return ERR_PTR(rc);
2063 }
2064
2065 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2066 {
2067         return kvm_s390_vcpu_has_irq(vcpu, 0);
2068 }
2069
2070 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2071 {
2072         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2073         exit_sie(vcpu);
2074 }
2075
2076 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2077 {
2078         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2079 }
2080
2081 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2082 {
2083         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2084         exit_sie(vcpu);
2085 }
2086
2087 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2088 {
2089         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2090 }
2091
2092 /*
2093  * Kick a guest cpu out of SIE and wait until SIE is not running.
2094  * If the CPU is not running (e.g. waiting as idle) the function will
2095  * return immediately. */
2096 void exit_sie(struct kvm_vcpu *vcpu)
2097 {
2098         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2099         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2100                 cpu_relax();
2101 }
2102
2103 /* Kick a guest cpu out of SIE to process a request synchronously */
2104 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2105 {
2106         kvm_make_request(req, vcpu);
2107         kvm_s390_vcpu_request(vcpu);
2108 }
2109
2110 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2111                               unsigned long end)
2112 {
2113         struct kvm *kvm = gmap->private;
2114         struct kvm_vcpu *vcpu;
2115         unsigned long prefix;
2116         int i;
2117
2118         if (gmap_is_shadow(gmap))
2119                 return;
2120         if (start >= 1UL << 31)
2121                 /* We are only interested in prefix pages */
2122                 return;
2123         kvm_for_each_vcpu(i, vcpu, kvm) {
2124                 /* match against both prefix pages */
2125                 prefix = kvm_s390_get_prefix(vcpu);
2126                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2127                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2128                                    start, end);
2129                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2130                 }
2131         }
2132 }
2133
2134 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2135 {
2136         /* kvm common code refers to this, but never calls it */
2137         BUG();
2138         return 0;
2139 }
2140
2141 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2142                                            struct kvm_one_reg *reg)
2143 {
2144         int r = -EINVAL;
2145
2146         switch (reg->id) {
2147         case KVM_REG_S390_TODPR:
2148                 r = put_user(vcpu->arch.sie_block->todpr,
2149                              (u32 __user *)reg->addr);
2150                 break;
2151         case KVM_REG_S390_EPOCHDIFF:
2152                 r = put_user(vcpu->arch.sie_block->epoch,
2153                              (u64 __user *)reg->addr);
2154                 break;
2155         case KVM_REG_S390_CPU_TIMER:
2156                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2157                              (u64 __user *)reg->addr);
2158                 break;
2159         case KVM_REG_S390_CLOCK_COMP:
2160                 r = put_user(vcpu->arch.sie_block->ckc,
2161                              (u64 __user *)reg->addr);
2162                 break;
2163         case KVM_REG_S390_PFTOKEN:
2164                 r = put_user(vcpu->arch.pfault_token,
2165                              (u64 __user *)reg->addr);
2166                 break;
2167         case KVM_REG_S390_PFCOMPARE:
2168                 r = put_user(vcpu->arch.pfault_compare,
2169                              (u64 __user *)reg->addr);
2170                 break;
2171         case KVM_REG_S390_PFSELECT:
2172                 r = put_user(vcpu->arch.pfault_select,
2173                              (u64 __user *)reg->addr);
2174                 break;
2175         case KVM_REG_S390_PP:
2176                 r = put_user(vcpu->arch.sie_block->pp,
2177                              (u64 __user *)reg->addr);
2178                 break;
2179         case KVM_REG_S390_GBEA:
2180                 r = put_user(vcpu->arch.sie_block->gbea,
2181                              (u64 __user *)reg->addr);
2182                 break;
2183         default:
2184                 break;
2185         }
2186
2187         return r;
2188 }
2189
2190 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2191                                            struct kvm_one_reg *reg)
2192 {
2193         int r = -EINVAL;
2194         __u64 val;
2195
2196         switch (reg->id) {
2197         case KVM_REG_S390_TODPR:
2198                 r = get_user(vcpu->arch.sie_block->todpr,
2199                              (u32 __user *)reg->addr);
2200                 break;
2201         case KVM_REG_S390_EPOCHDIFF:
2202                 r = get_user(vcpu->arch.sie_block->epoch,
2203                              (u64 __user *)reg->addr);
2204                 break;
2205         case KVM_REG_S390_CPU_TIMER:
2206                 r = get_user(val, (u64 __user *)reg->addr);
2207                 if (!r)
2208                         kvm_s390_set_cpu_timer(vcpu, val);
2209                 break;
2210         case KVM_REG_S390_CLOCK_COMP:
2211                 r = get_user(vcpu->arch.sie_block->ckc,
2212                              (u64 __user *)reg->addr);
2213                 break;
2214         case KVM_REG_S390_PFTOKEN:
2215                 r = get_user(vcpu->arch.pfault_token,
2216                              (u64 __user *)reg->addr);
2217                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2218                         kvm_clear_async_pf_completion_queue(vcpu);
2219                 break;
2220         case KVM_REG_S390_PFCOMPARE:
2221                 r = get_user(vcpu->arch.pfault_compare,
2222                              (u64 __user *)reg->addr);
2223                 break;
2224         case KVM_REG_S390_PFSELECT:
2225                 r = get_user(vcpu->arch.pfault_select,
2226                              (u64 __user *)reg->addr);
2227                 break;
2228         case KVM_REG_S390_PP:
2229                 r = get_user(vcpu->arch.sie_block->pp,
2230                              (u64 __user *)reg->addr);
2231                 break;
2232         case KVM_REG_S390_GBEA:
2233                 r = get_user(vcpu->arch.sie_block->gbea,
2234                              (u64 __user *)reg->addr);
2235                 break;
2236         default:
2237                 break;
2238         }
2239
2240         return r;
2241 }
2242
2243 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2244 {
2245         kvm_s390_vcpu_initial_reset(vcpu);
2246         return 0;
2247 }
2248
2249 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2250 {
2251         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2252         return 0;
2253 }
2254
2255 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2256 {
2257         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2258         return 0;
2259 }
2260
2261 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2262                                   struct kvm_sregs *sregs)
2263 {
2264         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2265         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2266         restore_access_regs(vcpu->run->s.regs.acrs);
2267         return 0;
2268 }
2269
2270 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2271                                   struct kvm_sregs *sregs)
2272 {
2273         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2274         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2275         return 0;
2276 }
2277
2278 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2279 {
2280         /* make sure the new values will be lazily loaded */
2281         save_fpu_regs();
2282         if (test_fp_ctl(fpu->fpc))
2283                 return -EINVAL;
2284         current->thread.fpu.fpc = fpu->fpc;
2285         if (MACHINE_HAS_VX)
2286                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2287                                  (freg_t *) fpu->fprs);
2288         else
2289                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2290         return 0;
2291 }
2292
2293 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2294 {
2295         /* make sure we have the latest values */
2296         save_fpu_regs();
2297         if (MACHINE_HAS_VX)
2298                 convert_vx_to_fp((freg_t *) fpu->fprs,
2299                                  (__vector128 *) vcpu->run->s.regs.vrs);
2300         else
2301                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2302         fpu->fpc = current->thread.fpu.fpc;
2303         return 0;
2304 }
2305
2306 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2307 {
2308         int rc = 0;
2309
2310         if (!is_vcpu_stopped(vcpu))
2311                 rc = -EBUSY;
2312         else {
2313                 vcpu->run->psw_mask = psw.mask;
2314                 vcpu->run->psw_addr = psw.addr;
2315         }
2316         return rc;
2317 }
2318
2319 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2320                                   struct kvm_translation *tr)
2321 {
2322         return -EINVAL; /* not implemented yet */
2323 }
2324
2325 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2326                               KVM_GUESTDBG_USE_HW_BP | \
2327                               KVM_GUESTDBG_ENABLE)
2328
2329 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2330                                         struct kvm_guest_debug *dbg)
2331 {
2332         int rc = 0;
2333
2334         vcpu->guest_debug = 0;
2335         kvm_s390_clear_bp_data(vcpu);
2336
2337         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2338                 return -EINVAL;
2339         if (!sclp.has_gpere)
2340                 return -EINVAL;
2341
2342         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2343                 vcpu->guest_debug = dbg->control;
2344                 /* enforce guest PER */
2345                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2346
2347                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2348                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2349         } else {
2350                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2351                 vcpu->arch.guestdbg.last_bp = 0;
2352         }
2353
2354         if (rc) {
2355                 vcpu->guest_debug = 0;
2356                 kvm_s390_clear_bp_data(vcpu);
2357                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2358         }
2359
2360         return rc;
2361 }
2362
2363 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2364                                     struct kvm_mp_state *mp_state)
2365 {
2366         /* CHECK_STOP and LOAD are not supported yet */
2367         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2368                                        KVM_MP_STATE_OPERATING;
2369 }
2370
2371 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2372                                     struct kvm_mp_state *mp_state)
2373 {
2374         int rc = 0;
2375
2376         /* user space knows about this interface - let it control the state */
2377         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2378
2379         switch (mp_state->mp_state) {
2380         case KVM_MP_STATE_STOPPED:
2381                 kvm_s390_vcpu_stop(vcpu);
2382                 break;
2383         case KVM_MP_STATE_OPERATING:
2384                 kvm_s390_vcpu_start(vcpu);
2385                 break;
2386         case KVM_MP_STATE_LOAD:
2387         case KVM_MP_STATE_CHECK_STOP:
2388                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2389         default:
2390                 rc = -ENXIO;
2391         }
2392
2393         return rc;
2394 }
2395
2396 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2397 {
2398         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2399 }
2400
2401 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2402 {
2403 retry:
2404         kvm_s390_vcpu_request_handled(vcpu);
2405         if (!vcpu->requests)
2406                 return 0;
2407         /*
2408          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2409          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2410          * This ensures that the ipte instruction for this request has
2411          * already finished. We might race against a second unmapper that
2412          * wants to set the blocking bit. Lets just retry the request loop.
2413          */
2414         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2415                 int rc;
2416                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2417                                           kvm_s390_get_prefix(vcpu),
2418                                           PAGE_SIZE * 2, PROT_WRITE);
2419                 if (rc) {
2420                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2421                         return rc;
2422                 }
2423                 goto retry;
2424         }
2425
2426         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2427                 vcpu->arch.sie_block->ihcpu = 0xffff;
2428                 goto retry;
2429         }
2430
2431         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2432                 if (!ibs_enabled(vcpu)) {
2433                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2434                         atomic_or(CPUSTAT_IBS,
2435                                         &vcpu->arch.sie_block->cpuflags);
2436                 }
2437                 goto retry;
2438         }
2439
2440         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2441                 if (ibs_enabled(vcpu)) {
2442                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2443                         atomic_andnot(CPUSTAT_IBS,
2444                                           &vcpu->arch.sie_block->cpuflags);
2445                 }
2446                 goto retry;
2447         }
2448
2449         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2450                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2451                 goto retry;
2452         }
2453
2454         /* nothing to do, just clear the request */
2455         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2456
2457         return 0;
2458 }
2459
2460 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2461 {
2462         struct kvm_vcpu *vcpu;
2463         int i;
2464
2465         mutex_lock(&kvm->lock);
2466         preempt_disable();
2467         kvm->arch.epoch = tod - get_tod_clock();
2468         kvm_s390_vcpu_block_all(kvm);
2469         kvm_for_each_vcpu(i, vcpu, kvm)
2470                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2471         kvm_s390_vcpu_unblock_all(kvm);
2472         preempt_enable();
2473         mutex_unlock(&kvm->lock);
2474 }
2475
2476 /**
2477  * kvm_arch_fault_in_page - fault-in guest page if necessary
2478  * @vcpu: The corresponding virtual cpu
2479  * @gpa: Guest physical address
2480  * @writable: Whether the page should be writable or not
2481  *
2482  * Make sure that a guest page has been faulted-in on the host.
2483  *
2484  * Return: Zero on success, negative error code otherwise.
2485  */
2486 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2487 {
2488         return gmap_fault(vcpu->arch.gmap, gpa,
2489                           writable ? FAULT_FLAG_WRITE : 0);
2490 }
2491
2492 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2493                                       unsigned long token)
2494 {
2495         struct kvm_s390_interrupt inti;
2496         struct kvm_s390_irq irq;
2497
2498         if (start_token) {
2499                 irq.u.ext.ext_params2 = token;
2500                 irq.type = KVM_S390_INT_PFAULT_INIT;
2501                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2502         } else {
2503                 inti.type = KVM_S390_INT_PFAULT_DONE;
2504                 inti.parm64 = token;
2505                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2506         }
2507 }
2508
2509 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2510                                      struct kvm_async_pf *work)
2511 {
2512         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2513         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2514 }
2515
2516 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2517                                  struct kvm_async_pf *work)
2518 {
2519         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2520         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2521 }
2522
2523 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2524                                struct kvm_async_pf *work)
2525 {
2526         /* s390 will always inject the page directly */
2527 }
2528
2529 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2530 {
2531         /*
2532          * s390 will always inject the page directly,
2533          * but we still want check_async_completion to cleanup
2534          */
2535         return true;
2536 }
2537
2538 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2539 {
2540         hva_t hva;
2541         struct kvm_arch_async_pf arch;
2542         int rc;
2543
2544         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2545                 return 0;
2546         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2547             vcpu->arch.pfault_compare)
2548                 return 0;
2549         if (psw_extint_disabled(vcpu))
2550                 return 0;
2551         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2552                 return 0;
2553         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2554                 return 0;
2555         if (!vcpu->arch.gmap->pfault_enabled)
2556                 return 0;
2557
2558         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2559         hva += current->thread.gmap_addr & ~PAGE_MASK;
2560         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2561                 return 0;
2562
2563         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2564         return rc;
2565 }
2566
2567 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2568 {
2569         int rc, cpuflags;
2570
2571         /*
2572          * On s390 notifications for arriving pages will be delivered directly
2573          * to the guest but the house keeping for completed pfaults is
2574          * handled outside the worker.
2575          */
2576         kvm_check_async_pf_completion(vcpu);
2577
2578         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2579         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2580
2581         if (need_resched())
2582                 schedule();
2583
2584         if (test_cpu_flag(CIF_MCCK_PENDING))
2585                 s390_handle_mcck();
2586
2587         if (!kvm_is_ucontrol(vcpu->kvm)) {
2588                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2589                 if (rc)
2590                         return rc;
2591         }
2592
2593         rc = kvm_s390_handle_requests(vcpu);
2594         if (rc)
2595                 return rc;
2596
2597         if (guestdbg_enabled(vcpu)) {
2598                 kvm_s390_backup_guest_per_regs(vcpu);
2599                 kvm_s390_patch_guest_per_regs(vcpu);
2600         }
2601
2602         vcpu->arch.sie_block->icptcode = 0;
2603         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2604         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2605         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2606
2607         return 0;
2608 }
2609
2610 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2611 {
2612         struct kvm_s390_pgm_info pgm_info = {
2613                 .code = PGM_ADDRESSING,
2614         };
2615         u8 opcode, ilen;
2616         int rc;
2617
2618         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2619         trace_kvm_s390_sie_fault(vcpu);
2620
2621         /*
2622          * We want to inject an addressing exception, which is defined as a
2623          * suppressing or terminating exception. However, since we came here
2624          * by a DAT access exception, the PSW still points to the faulting
2625          * instruction since DAT exceptions are nullifying. So we've got
2626          * to look up the current opcode to get the length of the instruction
2627          * to be able to forward the PSW.
2628          */
2629         rc = read_guest_instr(vcpu, &opcode, 1);
2630         ilen = insn_length(opcode);
2631         if (rc < 0) {
2632                 return rc;
2633         } else if (rc) {
2634                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2635                  * Forward by arbitrary ilc, injection will take care of
2636                  * nullification if necessary.
2637                  */
2638                 pgm_info = vcpu->arch.pgm;
2639                 ilen = 4;
2640         }
2641         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2642         kvm_s390_forward_psw(vcpu, ilen);
2643         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2644 }
2645
2646 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2647 {
2648         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2649                    vcpu->arch.sie_block->icptcode);
2650         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2651
2652         if (guestdbg_enabled(vcpu))
2653                 kvm_s390_restore_guest_per_regs(vcpu);
2654
2655         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2656         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2657
2658         if (vcpu->arch.sie_block->icptcode > 0) {
2659                 int rc = kvm_handle_sie_intercept(vcpu);
2660
2661                 if (rc != -EOPNOTSUPP)
2662                         return rc;
2663                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2664                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2665                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2666                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2667                 return -EREMOTE;
2668         } else if (exit_reason != -EFAULT) {
2669                 vcpu->stat.exit_null++;
2670                 return 0;
2671         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2672                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2673                 vcpu->run->s390_ucontrol.trans_exc_code =
2674                                                 current->thread.gmap_addr;
2675                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2676                 return -EREMOTE;
2677         } else if (current->thread.gmap_pfault) {
2678                 trace_kvm_s390_major_guest_pfault(vcpu);
2679                 current->thread.gmap_pfault = 0;
2680                 if (kvm_arch_setup_async_pf(vcpu))
2681                         return 0;
2682                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2683         }
2684         return vcpu_post_run_fault_in_sie(vcpu);
2685 }
2686
2687 static int __vcpu_run(struct kvm_vcpu *vcpu)
2688 {
2689         int rc, exit_reason;
2690
2691         /*
2692          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2693          * ning the guest), so that memslots (and other stuff) are protected
2694          */
2695         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2696
2697         do {
2698                 rc = vcpu_pre_run(vcpu);
2699                 if (rc)
2700                         break;
2701
2702                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2703                 /*
2704                  * As PF_VCPU will be used in fault handler, between
2705                  * guest_enter and guest_exit should be no uaccess.
2706                  */
2707                 local_irq_disable();
2708                 guest_enter_irqoff();
2709                 __disable_cpu_timer_accounting(vcpu);
2710                 local_irq_enable();
2711                 exit_reason = sie64a(vcpu->arch.sie_block,
2712                                      vcpu->run->s.regs.gprs);
2713                 local_irq_disable();
2714                 __enable_cpu_timer_accounting(vcpu);
2715                 guest_exit_irqoff();
2716                 local_irq_enable();
2717                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2718
2719                 rc = vcpu_post_run(vcpu, exit_reason);
2720         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2721
2722         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2723         return rc;
2724 }
2725
2726 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2727 {
2728         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2729         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2730         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2731                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2732         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2733                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2734                 /* some control register changes require a tlb flush */
2735                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2736         }
2737         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2738                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2739                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2740                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2741                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2742                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2743         }
2744         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2745                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2746                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2747                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2748                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2749                         kvm_clear_async_pf_completion_queue(vcpu);
2750         }
2751         /*
2752          * If userspace sets the riccb (e.g. after migration) to a valid state,
2753          * we should enable RI here instead of doing the lazy enablement.
2754          */
2755         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2756             test_kvm_facility(vcpu->kvm, 64)) {
2757                 struct runtime_instr_cb *riccb =
2758                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2759
2760                 if (riccb->valid)
2761                         vcpu->arch.sie_block->ecb3 |= 0x01;
2762         }
2763         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
2764             test_kvm_facility(vcpu->kvm, 82)) {
2765                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2766                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
2767         }
2768
2769         kvm_run->kvm_dirty_regs = 0;
2770 }
2771
2772 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2773 {
2774         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2775         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2776         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2777         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2778         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2779         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2780         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2781         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2782         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2783         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2784         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2785         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2786         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
2787 }
2788
2789 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2790 {
2791         int rc;
2792         sigset_t sigsaved;
2793
2794         if (guestdbg_exit_pending(vcpu)) {
2795                 kvm_s390_prepare_debug_exit(vcpu);
2796                 return 0;
2797         }
2798
2799         if (vcpu->sigset_active)
2800                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2801
2802         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2803                 kvm_s390_vcpu_start(vcpu);
2804         } else if (is_vcpu_stopped(vcpu)) {
2805                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2806                                    vcpu->vcpu_id);
2807                 return -EINVAL;
2808         }
2809
2810         sync_regs(vcpu, kvm_run);
2811         enable_cpu_timer_accounting(vcpu);
2812
2813         might_fault();
2814         rc = __vcpu_run(vcpu);
2815
2816         if (signal_pending(current) && !rc) {
2817                 kvm_run->exit_reason = KVM_EXIT_INTR;
2818                 rc = -EINTR;
2819         }
2820
2821         if (guestdbg_exit_pending(vcpu) && !rc)  {
2822                 kvm_s390_prepare_debug_exit(vcpu);
2823                 rc = 0;
2824         }
2825
2826         if (rc == -EREMOTE) {
2827                 /* userspace support is needed, kvm_run has been prepared */
2828                 rc = 0;
2829         }
2830
2831         disable_cpu_timer_accounting(vcpu);
2832         store_regs(vcpu, kvm_run);
2833
2834         if (vcpu->sigset_active)
2835                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2836
2837         vcpu->stat.exit_userspace++;
2838         return rc;
2839 }
2840
2841 /*
2842  * store status at address
2843  * we use have two special cases:
2844  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2845  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2846  */
2847 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2848 {
2849         unsigned char archmode = 1;
2850         freg_t fprs[NUM_FPRS];
2851         unsigned int px;
2852         u64 clkcomp, cputm;
2853         int rc;
2854
2855         px = kvm_s390_get_prefix(vcpu);
2856         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2857                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2858                         return -EFAULT;
2859                 gpa = 0;
2860         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2861                 if (write_guest_real(vcpu, 163, &archmode, 1))
2862                         return -EFAULT;
2863                 gpa = px;
2864         } else
2865                 gpa -= __LC_FPREGS_SAVE_AREA;
2866
2867         /* manually convert vector registers if necessary */
2868         if (MACHINE_HAS_VX) {
2869                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2870                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2871                                      fprs, 128);
2872         } else {
2873                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2874                                      vcpu->run->s.regs.fprs, 128);
2875         }
2876         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2877                               vcpu->run->s.regs.gprs, 128);
2878         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2879                               &vcpu->arch.sie_block->gpsw, 16);
2880         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2881                               &px, 4);
2882         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2883                               &vcpu->run->s.regs.fpc, 4);
2884         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2885                               &vcpu->arch.sie_block->todpr, 4);
2886         cputm = kvm_s390_get_cpu_timer(vcpu);
2887         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2888                               &cputm, 8);
2889         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2890         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2891                               &clkcomp, 8);
2892         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2893                               &vcpu->run->s.regs.acrs, 64);
2894         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2895                               &vcpu->arch.sie_block->gcr, 128);
2896         return rc ? -EFAULT : 0;
2897 }
2898
2899 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2900 {
2901         /*
2902          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2903          * copying in vcpu load/put. Lets update our copies before we save
2904          * it into the save area
2905          */
2906         save_fpu_regs();
2907         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2908         save_access_regs(vcpu->run->s.regs.acrs);
2909
2910         return kvm_s390_store_status_unloaded(vcpu, addr);
2911 }
2912
2913 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2914 {
2915         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2916         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2917 }
2918
2919 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2920 {
2921         unsigned int i;
2922         struct kvm_vcpu *vcpu;
2923
2924         kvm_for_each_vcpu(i, vcpu, kvm) {
2925                 __disable_ibs_on_vcpu(vcpu);
2926         }
2927 }
2928
2929 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2930 {
2931         if (!sclp.has_ibs)
2932                 return;
2933         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2934         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2935 }
2936
2937 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2938 {
2939         int i, online_vcpus, started_vcpus = 0;
2940
2941         if (!is_vcpu_stopped(vcpu))
2942                 return;
2943
2944         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2945         /* Only one cpu at a time may enter/leave the STOPPED state. */
2946         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2947         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2948
2949         for (i = 0; i < online_vcpus; i++) {
2950                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2951                         started_vcpus++;
2952         }
2953
2954         if (started_vcpus == 0) {
2955                 /* we're the only active VCPU -> speed it up */
2956                 __enable_ibs_on_vcpu(vcpu);
2957         } else if (started_vcpus == 1) {
2958                 /*
2959                  * As we are starting a second VCPU, we have to disable
2960                  * the IBS facility on all VCPUs to remove potentially
2961                  * oustanding ENABLE requests.
2962                  */
2963                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2964         }
2965
2966         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2967         /*
2968          * Another VCPU might have used IBS while we were offline.
2969          * Let's play safe and flush the VCPU at startup.
2970          */
2971         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2972         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2973         return;
2974 }
2975
2976 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2977 {
2978         int i, online_vcpus, started_vcpus = 0;
2979         struct kvm_vcpu *started_vcpu = NULL;
2980
2981         if (is_vcpu_stopped(vcpu))
2982                 return;
2983
2984         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2985         /* Only one cpu at a time may enter/leave the STOPPED state. */
2986         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2987         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2988
2989         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2990         kvm_s390_clear_stop_irq(vcpu);
2991
2992         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2993         __disable_ibs_on_vcpu(vcpu);
2994
2995         for (i = 0; i < online_vcpus; i++) {
2996                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2997                         started_vcpus++;
2998                         started_vcpu = vcpu->kvm->vcpus[i];
2999                 }
3000         }
3001
3002         if (started_vcpus == 1) {
3003                 /*
3004                  * As we only have one VCPU left, we want to enable the
3005                  * IBS facility for that VCPU to speed it up.
3006                  */
3007                 __enable_ibs_on_vcpu(started_vcpu);
3008         }
3009
3010         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3011         return;
3012 }
3013
3014 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3015                                      struct kvm_enable_cap *cap)
3016 {
3017         int r;
3018
3019         if (cap->flags)
3020                 return -EINVAL;
3021
3022         switch (cap->cap) {
3023         case KVM_CAP_S390_CSS_SUPPORT:
3024                 if (!vcpu->kvm->arch.css_support) {
3025                         vcpu->kvm->arch.css_support = 1;
3026                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3027                         trace_kvm_s390_enable_css(vcpu->kvm);
3028                 }
3029                 r = 0;
3030                 break;
3031         default:
3032                 r = -EINVAL;
3033                 break;
3034         }
3035         return r;
3036 }
3037
3038 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3039                                   struct kvm_s390_mem_op *mop)
3040 {
3041         void __user *uaddr = (void __user *)mop->buf;
3042         void *tmpbuf = NULL;
3043         int r, srcu_idx;
3044         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3045                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3046
3047         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
3048                 return -EINVAL;
3049
3050         if (mop->size > MEM_OP_MAX_SIZE)
3051                 return -E2BIG;
3052
3053         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3054                 tmpbuf = vmalloc(mop->size);
3055                 if (!tmpbuf)
3056                         return -ENOMEM;
3057         }
3058
3059         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3060
3061         switch (mop->op) {
3062         case KVM_S390_MEMOP_LOGICAL_READ:
3063                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3064                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3065                                             mop->size, GACC_FETCH);
3066                         break;
3067                 }
3068                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3069                 if (r == 0) {
3070                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3071                                 r = -EFAULT;
3072                 }
3073                 break;
3074         case KVM_S390_MEMOP_LOGICAL_WRITE:
3075                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3076                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3077                                             mop->size, GACC_STORE);
3078                         break;
3079                 }
3080                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3081                         r = -EFAULT;
3082                         break;
3083                 }
3084                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3085                 break;
3086         default:
3087                 r = -EINVAL;
3088         }
3089
3090         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3091
3092         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3093                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3094
3095         vfree(tmpbuf);
3096         return r;
3097 }
3098
3099 long kvm_arch_vcpu_ioctl(struct file *filp,
3100                          unsigned int ioctl, unsigned long arg)
3101 {
3102         struct kvm_vcpu *vcpu = filp->private_data;
3103         void __user *argp = (void __user *)arg;
3104         int idx;
3105         long r;
3106
3107         switch (ioctl) {
3108         case KVM_S390_IRQ: {
3109                 struct kvm_s390_irq s390irq;
3110
3111                 r = -EFAULT;
3112                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3113                         break;
3114                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3115                 break;
3116         }
3117         case KVM_S390_INTERRUPT: {
3118                 struct kvm_s390_interrupt s390int;
3119                 struct kvm_s390_irq s390irq = {};
3120
3121                 r = -EFAULT;
3122                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3123                         break;
3124                 if (s390int_to_s390irq(&s390int, &s390irq))
3125                         return -EINVAL;
3126                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3127                 break;
3128         }
3129         case KVM_S390_STORE_STATUS:
3130                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3131                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3132                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3133                 break;
3134         case KVM_S390_SET_INITIAL_PSW: {
3135                 psw_t psw;
3136
3137                 r = -EFAULT;
3138                 if (copy_from_user(&psw, argp, sizeof(psw)))
3139                         break;
3140                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3141                 break;
3142         }
3143         case KVM_S390_INITIAL_RESET:
3144                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3145                 break;
3146         case KVM_SET_ONE_REG:
3147         case KVM_GET_ONE_REG: {
3148                 struct kvm_one_reg reg;
3149                 r = -EFAULT;
3150                 if (copy_from_user(&reg, argp, sizeof(reg)))
3151                         break;
3152                 if (ioctl == KVM_SET_ONE_REG)
3153                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3154                 else
3155                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3156                 break;
3157         }
3158 #ifdef CONFIG_KVM_S390_UCONTROL
3159         case KVM_S390_UCAS_MAP: {
3160                 struct kvm_s390_ucas_mapping ucasmap;
3161
3162                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3163                         r = -EFAULT;
3164                         break;
3165                 }
3166
3167                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3168                         r = -EINVAL;
3169                         break;
3170                 }
3171
3172                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3173                                      ucasmap.vcpu_addr, ucasmap.length);
3174                 break;
3175         }
3176         case KVM_S390_UCAS_UNMAP: {
3177                 struct kvm_s390_ucas_mapping ucasmap;
3178
3179                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3180                         r = -EFAULT;
3181                         break;
3182                 }
3183
3184                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3185                         r = -EINVAL;
3186                         break;
3187                 }
3188
3189                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3190                         ucasmap.length);
3191                 break;
3192         }
3193 #endif
3194         case KVM_S390_VCPU_FAULT: {
3195                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3196                 break;
3197         }
3198         case KVM_ENABLE_CAP:
3199         {
3200                 struct kvm_enable_cap cap;
3201                 r = -EFAULT;
3202                 if (copy_from_user(&cap, argp, sizeof(cap)))
3203                         break;
3204                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3205                 break;
3206         }
3207         case KVM_S390_MEM_OP: {
3208                 struct kvm_s390_mem_op mem_op;
3209
3210                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3211                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3212                 else
3213                         r = -EFAULT;
3214                 break;
3215         }
3216         case KVM_S390_SET_IRQ_STATE: {
3217                 struct kvm_s390_irq_state irq_state;
3218
3219                 r = -EFAULT;
3220                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3221                         break;
3222                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3223                     irq_state.len == 0 ||
3224                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3225                         r = -EINVAL;
3226                         break;
3227                 }
3228                 r = kvm_s390_set_irq_state(vcpu,
3229                                            (void __user *) irq_state.buf,
3230                                            irq_state.len);
3231                 break;
3232         }
3233         case KVM_S390_GET_IRQ_STATE: {
3234                 struct kvm_s390_irq_state irq_state;
3235
3236                 r = -EFAULT;
3237                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3238                         break;
3239                 if (irq_state.len == 0) {
3240                         r = -EINVAL;
3241                         break;
3242                 }
3243                 r = kvm_s390_get_irq_state(vcpu,
3244                                            (__u8 __user *)  irq_state.buf,
3245                                            irq_state.len);
3246                 break;
3247         }
3248         default:
3249                 r = -ENOTTY;
3250         }
3251         return r;
3252 }
3253
3254 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3255 {
3256 #ifdef CONFIG_KVM_S390_UCONTROL
3257         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3258                  && (kvm_is_ucontrol(vcpu->kvm))) {
3259                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3260                 get_page(vmf->page);
3261                 return 0;
3262         }
3263 #endif
3264         return VM_FAULT_SIGBUS;
3265 }
3266
3267 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3268                             unsigned long npages)
3269 {
3270         return 0;
3271 }
3272
3273 /* Section: memory related */
3274 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3275                                    struct kvm_memory_slot *memslot,
3276                                    const struct kvm_userspace_memory_region *mem,
3277                                    enum kvm_mr_change change)
3278 {
3279         /* A few sanity checks. We can have memory slots which have to be
3280            located/ended at a segment boundary (1MB). The memory in userland is
3281            ok to be fragmented into various different vmas. It is okay to mmap()
3282            and munmap() stuff in this slot after doing this call at any time */
3283
3284         if (mem->userspace_addr & 0xffffful)
3285                 return -EINVAL;
3286
3287         if (mem->memory_size & 0xffffful)
3288                 return -EINVAL;
3289
3290         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3291                 return -EINVAL;
3292
3293         return 0;
3294 }
3295
3296 void kvm_arch_commit_memory_region(struct kvm *kvm,
3297                                 const struct kvm_userspace_memory_region *mem,
3298                                 const struct kvm_memory_slot *old,
3299                                 const struct kvm_memory_slot *new,
3300                                 enum kvm_mr_change change)
3301 {
3302         int rc = 0;
3303
3304         switch (change) {
3305         case KVM_MR_DELETE:
3306                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3307                                         old->npages * PAGE_SIZE);
3308                 break;
3309         case KVM_MR_MOVE:
3310                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3311                                         old->npages * PAGE_SIZE);
3312                 if (rc)
3313                         break;
3314                 /* FALLTHROUGH */
3315         case KVM_MR_CREATE:
3316                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3317                                       mem->guest_phys_addr, mem->memory_size);
3318                 break;
3319         case KVM_MR_FLAGS_ONLY:
3320                 break;
3321         default:
3322                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
3323         }
3324         if (rc)
3325                 pr_warn("failed to commit memory region\n");
3326         return;
3327 }
3328
3329 static inline unsigned long nonhyp_mask(int i)
3330 {
3331         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3332
3333         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3334 }
3335
3336 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3337 {
3338         vcpu->valid_wakeup = false;
3339 }
3340
3341 static int __init kvm_s390_init(void)
3342 {
3343         int i;
3344
3345         if (!sclp.has_sief2) {
3346                 pr_info("SIE not available\n");
3347                 return -ENODEV;
3348         }
3349
3350         for (i = 0; i < 16; i++)
3351                 kvm_s390_fac_list_mask[i] |=
3352                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3353
3354         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3355 }
3356
3357 static void __exit kvm_s390_exit(void)
3358 {
3359         kvm_exit();
3360 }
3361
3362 module_init(kvm_s390_init);
3363 module_exit(kvm_s390_exit);
3364
3365 /*
3366  * Enable autoloading of the kvm module.
3367  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3368  * since x86 takes a different approach.
3369  */
3370 #include <linux/miscdevice.h>
3371 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3372 MODULE_ALIAS("devname:kvm");