GNU Linux-libre 5.10.217-gnu1
[releases.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         VCPU_STAT("userspace_handled", exit_userspace),
62         VCPU_STAT("exit_null", exit_null),
63         VCPU_STAT("exit_validity", exit_validity),
64         VCPU_STAT("exit_stop_request", exit_stop_request),
65         VCPU_STAT("exit_external_request", exit_external_request),
66         VCPU_STAT("exit_io_request", exit_io_request),
67         VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68         VCPU_STAT("exit_instruction", exit_instruction),
69         VCPU_STAT("exit_pei", exit_pei),
70         VCPU_STAT("exit_program_interruption", exit_program_interruption),
71         VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72         VCPU_STAT("exit_operation_exception", exit_operation_exception),
73         VCPU_STAT("halt_successful_poll", halt_successful_poll),
74         VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75         VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76         VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77         VCPU_STAT("halt_wakeup", halt_wakeup),
78         VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79         VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80         VCPU_STAT("instruction_lctlg", instruction_lctlg),
81         VCPU_STAT("instruction_lctl", instruction_lctl),
82         VCPU_STAT("instruction_stctl", instruction_stctl),
83         VCPU_STAT("instruction_stctg", instruction_stctg),
84         VCPU_STAT("deliver_ckc", deliver_ckc),
85         VCPU_STAT("deliver_cputm", deliver_cputm),
86         VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87         VCPU_STAT("deliver_external_call", deliver_external_call),
88         VCPU_STAT("deliver_service_signal", deliver_service_signal),
89         VCPU_STAT("deliver_virtio", deliver_virtio),
90         VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91         VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92         VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93         VCPU_STAT("deliver_program", deliver_program),
94         VCPU_STAT("deliver_io", deliver_io),
95         VCPU_STAT("deliver_machine_check", deliver_machine_check),
96         VCPU_STAT("exit_wait_state", exit_wait_state),
97         VCPU_STAT("inject_ckc", inject_ckc),
98         VCPU_STAT("inject_cputm", inject_cputm),
99         VCPU_STAT("inject_external_call", inject_external_call),
100         VM_STAT("inject_float_mchk", inject_float_mchk),
101         VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102         VM_STAT("inject_io", inject_io),
103         VCPU_STAT("inject_mchk", inject_mchk),
104         VM_STAT("inject_pfault_done", inject_pfault_done),
105         VCPU_STAT("inject_program", inject_program),
106         VCPU_STAT("inject_restart", inject_restart),
107         VM_STAT("inject_service_signal", inject_service_signal),
108         VCPU_STAT("inject_set_prefix", inject_set_prefix),
109         VCPU_STAT("inject_stop_signal", inject_stop_signal),
110         VCPU_STAT("inject_pfault_init", inject_pfault_init),
111         VM_STAT("inject_virtio", inject_virtio),
112         VCPU_STAT("instruction_epsw", instruction_epsw),
113         VCPU_STAT("instruction_gs", instruction_gs),
114         VCPU_STAT("instruction_io_other", instruction_io_other),
115         VCPU_STAT("instruction_lpsw", instruction_lpsw),
116         VCPU_STAT("instruction_lpswe", instruction_lpswe),
117         VCPU_STAT("instruction_pfmf", instruction_pfmf),
118         VCPU_STAT("instruction_ptff", instruction_ptff),
119         VCPU_STAT("instruction_stidp", instruction_stidp),
120         VCPU_STAT("instruction_sck", instruction_sck),
121         VCPU_STAT("instruction_sckpf", instruction_sckpf),
122         VCPU_STAT("instruction_spx", instruction_spx),
123         VCPU_STAT("instruction_stpx", instruction_stpx),
124         VCPU_STAT("instruction_stap", instruction_stap),
125         VCPU_STAT("instruction_iske", instruction_iske),
126         VCPU_STAT("instruction_ri", instruction_ri),
127         VCPU_STAT("instruction_rrbe", instruction_rrbe),
128         VCPU_STAT("instruction_sske", instruction_sske),
129         VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130         VCPU_STAT("instruction_essa", instruction_essa),
131         VCPU_STAT("instruction_stsi", instruction_stsi),
132         VCPU_STAT("instruction_stfl", instruction_stfl),
133         VCPU_STAT("instruction_tb", instruction_tb),
134         VCPU_STAT("instruction_tpi", instruction_tpi),
135         VCPU_STAT("instruction_tprot", instruction_tprot),
136         VCPU_STAT("instruction_tsch", instruction_tsch),
137         VCPU_STAT("instruction_sthyi", instruction_sthyi),
138         VCPU_STAT("instruction_sie", instruction_sie),
139         VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140         VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141         VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142         VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143         VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144         VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145         VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146         VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147         VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148         VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149         VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150         VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151         VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152         VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153         VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154         VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155         VCPU_STAT("instruction_diag_10", diagnose_10),
156         VCPU_STAT("instruction_diag_44", diagnose_44),
157         VCPU_STAT("instruction_diag_9c", diagnose_9c),
158         VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159         VCPU_STAT("instruction_diag_258", diagnose_258),
160         VCPU_STAT("instruction_diag_308", diagnose_308),
161         VCPU_STAT("instruction_diag_500", diagnose_500),
162         VCPU_STAT("instruction_diag_other", diagnose_other),
163         { NULL }
164 };
165
166 struct kvm_s390_tod_clock_ext {
167         __u8 epoch_idx;
168         __u64 tod;
169         __u8 reserved[7];
170 } __packed;
171
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa  = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191
192 /*
193  * For now we handle at most 16 double words as this is what the s390 base
194  * kernel handles and stores in the prefix page. If we ever need to go beyond
195  * this, this requires changes to code, but the external uapi can stay.
196  */
197 #define SIZE_INTERNAL 16
198
199 /*
200  * Base feature mask that defines default mask for facilities. Consists of the
201  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202  */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206  * and defines the facilities that can be enabled via a cpu model.
207  */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209
210 static unsigned long kvm_s390_fac_size(void)
211 {
212         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215                 sizeof(S390_lowcore.stfle_fac_list));
216
217         return SIZE_INTERNAL;
218 }
219
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229
230 /* Section: not file related */
231 int kvm_arch_hardware_enable(void)
232 {
233         /* every s390 is virtualization enabled ;-) */
234         return 0;
235 }
236
237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239         return 0;
240 }
241
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244                               unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246
247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249         u8 delta_idx = 0;
250
251         /*
252          * The TOD jumps by delta, we have to compensate this by adding
253          * -delta to the epoch.
254          */
255         delta = -delta;
256
257         /* sign-extension - we're adding to signed values below */
258         if ((s64)delta < 0)
259                 delta_idx = -1;
260
261         scb->epoch += delta;
262         if (scb->ecd & ECD_MEF) {
263                 scb->epdx += delta_idx;
264                 if (scb->epoch < delta)
265                         scb->epdx += 1;
266         }
267 }
268
269 /*
270  * This callback is executed during stop_machine(). All CPUs are therefore
271  * temporarily stopped. In order not to change guest behavior, we have to
272  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273  * so a CPU won't be stopped while calculating with the epoch.
274  */
275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276                           void *v)
277 {
278         struct kvm *kvm;
279         struct kvm_vcpu *vcpu;
280         int i;
281         unsigned long long *delta = v;
282
283         list_for_each_entry(kvm, &vm_list, vm_list) {
284                 kvm_for_each_vcpu(i, vcpu, kvm) {
285                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286                         if (i == 0) {
287                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289                         }
290                         if (vcpu->arch.cputm_enabled)
291                                 vcpu->arch.cputm_start += *delta;
292                         if (vcpu->arch.vsie_block)
293                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
294                                                    *delta);
295                 }
296         }
297         return NOTIFY_OK;
298 }
299
300 static struct notifier_block kvm_clock_notifier = {
301         .notifier_call = kvm_clock_sync,
302 };
303
304 int kvm_arch_hardware_setup(void *opaque)
305 {
306         gmap_notifier.notifier_call = kvm_gmap_notifier;
307         gmap_register_pte_notifier(&gmap_notifier);
308         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309         gmap_register_pte_notifier(&vsie_gmap_notifier);
310         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311                                        &kvm_clock_notifier);
312         return 0;
313 }
314
315 void kvm_arch_hardware_unsetup(void)
316 {
317         gmap_unregister_pte_notifier(&gmap_notifier);
318         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320                                          &kvm_clock_notifier);
321 }
322
323 static void allow_cpu_feat(unsigned long nr)
324 {
325         set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327
328 static inline int plo_test_bit(unsigned char nr)
329 {
330         unsigned long function = (unsigned long)nr | 0x100;
331         int cc;
332
333         asm volatile(
334                 "       lgr     0,%[function]\n"
335                 /* Parameter registers are ignored for "test bit" */
336                 "       plo     0,0,0,0(0)\n"
337                 "       ipm     %0\n"
338                 "       srl     %0,28\n"
339                 : "=d" (cc)
340                 : [function] "d" (function)
341                 : "cc", "0");
342         return cc == 0;
343 }
344
345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347         asm volatile(
348                 "       lghi    0,0\n"
349                 "       lgr     1,%[query]\n"
350                 /* Parameter registers are ignored */
351                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
352                 :
353                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
354                 : "cc", "memory", "0", "1");
355 }
356
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359
360 static void kvm_s390_cpu_feat_init(void)
361 {
362         int i;
363
364         for (i = 0; i < 256; ++i) {
365                 if (plo_test_bit(i))
366                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367         }
368
369         if (test_facility(28)) /* TOD-clock steering */
370                 ptff(kvm_s390_available_subfunc.ptff,
371                      sizeof(kvm_s390_available_subfunc.ptff),
372                      PTFF_QAF);
373
374         if (test_facility(17)) { /* MSA */
375                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376                               kvm_s390_available_subfunc.kmac);
377                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378                               kvm_s390_available_subfunc.kmc);
379                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
380                               kvm_s390_available_subfunc.km);
381                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382                               kvm_s390_available_subfunc.kimd);
383                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384                               kvm_s390_available_subfunc.klmd);
385         }
386         if (test_facility(76)) /* MSA3 */
387                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388                               kvm_s390_available_subfunc.pckmo);
389         if (test_facility(77)) { /* MSA4 */
390                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391                               kvm_s390_available_subfunc.kmctr);
392                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393                               kvm_s390_available_subfunc.kmf);
394                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395                               kvm_s390_available_subfunc.kmo);
396                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397                               kvm_s390_available_subfunc.pcc);
398         }
399         if (test_facility(57)) /* MSA5 */
400                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401                               kvm_s390_available_subfunc.ppno);
402
403         if (test_facility(146)) /* MSA8 */
404                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405                               kvm_s390_available_subfunc.kma);
406
407         if (test_facility(155)) /* MSA9 */
408                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409                               kvm_s390_available_subfunc.kdsa);
410
411         if (test_facility(150)) /* SORTL */
412                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413
414         if (test_facility(151)) /* DFLTCC */
415                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416
417         if (MACHINE_HAS_ESOP)
418                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419         /*
420          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422          */
423         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424             !test_facility(3) || !nested)
425                 return;
426         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427         if (sclp.has_64bscao)
428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429         if (sclp.has_siif)
430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431         if (sclp.has_gpere)
432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433         if (sclp.has_gsls)
434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435         if (sclp.has_ib)
436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437         if (sclp.has_cei)
438                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439         if (sclp.has_ibs)
440                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441         if (sclp.has_kss)
442                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443         /*
444          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445          * all skey handling functions read/set the skey from the PGSTE
446          * instead of the real storage key.
447          *
448          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449          * pages being detected as preserved although they are resident.
450          *
451          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453          *
454          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457          *
458          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459          * cannot easily shadow the SCA because of the ipte lock.
460          */
461 }
462
463 int kvm_arch_init(void *opaque)
464 {
465         int rc = -ENOMEM;
466
467         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468         if (!kvm_s390_dbf)
469                 return -ENOMEM;
470
471         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472         if (!kvm_s390_dbf_uv)
473                 goto out;
474
475         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477                 goto out;
478
479         kvm_s390_cpu_feat_init();
480
481         /* Register floating interrupt controller interface. */
482         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483         if (rc) {
484                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
485                 goto out;
486         }
487
488         rc = kvm_s390_gib_init(GAL_ISC);
489         if (rc)
490                 goto out;
491
492         return 0;
493
494 out:
495         kvm_arch_exit();
496         return rc;
497 }
498
499 void kvm_arch_exit(void)
500 {
501         kvm_s390_gib_destroy();
502         debug_unregister(kvm_s390_dbf);
503         debug_unregister(kvm_s390_dbf_uv);
504 }
505
506 /* Section: device related */
507 long kvm_arch_dev_ioctl(struct file *filp,
508                         unsigned int ioctl, unsigned long arg)
509 {
510         if (ioctl == KVM_S390_ENABLE_SIE)
511                 return s390_enable_sie();
512         return -EINVAL;
513 }
514
515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517         int r;
518
519         switch (ext) {
520         case KVM_CAP_S390_PSW:
521         case KVM_CAP_S390_GMAP:
522         case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524         case KVM_CAP_S390_UCONTROL:
525 #endif
526         case KVM_CAP_ASYNC_PF:
527         case KVM_CAP_SYNC_REGS:
528         case KVM_CAP_ONE_REG:
529         case KVM_CAP_ENABLE_CAP:
530         case KVM_CAP_S390_CSS_SUPPORT:
531         case KVM_CAP_IOEVENTFD:
532         case KVM_CAP_DEVICE_CTRL:
533         case KVM_CAP_S390_IRQCHIP:
534         case KVM_CAP_VM_ATTRIBUTES:
535         case KVM_CAP_MP_STATE:
536         case KVM_CAP_IMMEDIATE_EXIT:
537         case KVM_CAP_S390_INJECT_IRQ:
538         case KVM_CAP_S390_USER_SIGP:
539         case KVM_CAP_S390_USER_STSI:
540         case KVM_CAP_S390_SKEYS:
541         case KVM_CAP_S390_IRQ_STATE:
542         case KVM_CAP_S390_USER_INSTR0:
543         case KVM_CAP_S390_CMMA_MIGRATION:
544         case KVM_CAP_S390_AIS:
545         case KVM_CAP_S390_AIS_MIGRATION:
546         case KVM_CAP_S390_VCPU_RESETS:
547         case KVM_CAP_SET_GUEST_DEBUG:
548         case KVM_CAP_S390_DIAG318:
549                 r = 1;
550                 break;
551         case KVM_CAP_S390_HPAGE_1M:
552                 r = 0;
553                 if (hpage && !kvm_is_ucontrol(kvm))
554                         r = 1;
555                 break;
556         case KVM_CAP_S390_MEM_OP:
557                 r = MEM_OP_MAX_SIZE;
558                 break;
559         case KVM_CAP_NR_VCPUS:
560         case KVM_CAP_MAX_VCPUS:
561         case KVM_CAP_MAX_VCPU_ID:
562                 r = KVM_S390_BSCA_CPU_SLOTS;
563                 if (!kvm_s390_use_sca_entries())
564                         r = KVM_MAX_VCPUS;
565                 else if (sclp.has_esca && sclp.has_64bscao)
566                         r = KVM_S390_ESCA_CPU_SLOTS;
567                 break;
568         case KVM_CAP_S390_COW:
569                 r = MACHINE_HAS_ESOP;
570                 break;
571         case KVM_CAP_S390_VECTOR_REGISTERS:
572                 r = MACHINE_HAS_VX;
573                 break;
574         case KVM_CAP_S390_RI:
575                 r = test_facility(64);
576                 break;
577         case KVM_CAP_S390_GS:
578                 r = test_facility(133);
579                 break;
580         case KVM_CAP_S390_BPB:
581                 r = test_facility(82);
582                 break;
583         case KVM_CAP_S390_PROTECTED:
584                 r = is_prot_virt_host();
585                 break;
586         default:
587                 r = 0;
588         }
589         return r;
590 }
591
592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594         int i;
595         gfn_t cur_gfn, last_gfn;
596         unsigned long gaddr, vmaddr;
597         struct gmap *gmap = kvm->arch.gmap;
598         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599
600         /* Loop over all guest segments */
601         cur_gfn = memslot->base_gfn;
602         last_gfn = memslot->base_gfn + memslot->npages;
603         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604                 gaddr = gfn_to_gpa(cur_gfn);
605                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606                 if (kvm_is_error_hva(vmaddr))
607                         continue;
608
609                 bitmap_zero(bitmap, _PAGE_ENTRIES);
610                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611                 for (i = 0; i < _PAGE_ENTRIES; i++) {
612                         if (test_bit(i, bitmap))
613                                 mark_page_dirty(kvm, cur_gfn + i);
614                 }
615
616                 if (fatal_signal_pending(current))
617                         return;
618                 cond_resched();
619         }
620 }
621
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624
625 /*
626  * Get (and clear) the dirty memory log for a memory slot.
627  */
628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629                                struct kvm_dirty_log *log)
630 {
631         int r;
632         unsigned long n;
633         struct kvm_memory_slot *memslot;
634         int is_dirty;
635
636         if (kvm_is_ucontrol(kvm))
637                 return -EINVAL;
638
639         mutex_lock(&kvm->slots_lock);
640
641         r = -EINVAL;
642         if (log->slot >= KVM_USER_MEM_SLOTS)
643                 goto out;
644
645         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646         if (r)
647                 goto out;
648
649         /* Clear the dirty log */
650         if (is_dirty) {
651                 n = kvm_dirty_bitmap_bytes(memslot);
652                 memset(memslot->dirty_bitmap, 0, n);
653         }
654         r = 0;
655 out:
656         mutex_unlock(&kvm->slots_lock);
657         return r;
658 }
659
660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662         unsigned int i;
663         struct kvm_vcpu *vcpu;
664
665         kvm_for_each_vcpu(i, vcpu, kvm) {
666                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667         }
668 }
669
670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672         int r;
673
674         if (cap->flags)
675                 return -EINVAL;
676
677         switch (cap->cap) {
678         case KVM_CAP_S390_IRQCHIP:
679                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680                 kvm->arch.use_irqchip = 1;
681                 r = 0;
682                 break;
683         case KVM_CAP_S390_USER_SIGP:
684                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685                 kvm->arch.user_sigp = 1;
686                 r = 0;
687                 break;
688         case KVM_CAP_S390_VECTOR_REGISTERS:
689                 mutex_lock(&kvm->lock);
690                 if (kvm->created_vcpus) {
691                         r = -EBUSY;
692                 } else if (MACHINE_HAS_VX) {
693                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
694                         set_kvm_facility(kvm->arch.model.fac_list, 129);
695                         if (test_facility(134)) {
696                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
697                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
698                         }
699                         if (test_facility(135)) {
700                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
701                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
702                         }
703                         if (test_facility(148)) {
704                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
705                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
706                         }
707                         if (test_facility(152)) {
708                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
709                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
710                         }
711                         r = 0;
712                 } else
713                         r = -EINVAL;
714                 mutex_unlock(&kvm->lock);
715                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716                          r ? "(not available)" : "(success)");
717                 break;
718         case KVM_CAP_S390_RI:
719                 r = -EINVAL;
720                 mutex_lock(&kvm->lock);
721                 if (kvm->created_vcpus) {
722                         r = -EBUSY;
723                 } else if (test_facility(64)) {
724                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
725                         set_kvm_facility(kvm->arch.model.fac_list, 64);
726                         r = 0;
727                 }
728                 mutex_unlock(&kvm->lock);
729                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730                          r ? "(not available)" : "(success)");
731                 break;
732         case KVM_CAP_S390_AIS:
733                 mutex_lock(&kvm->lock);
734                 if (kvm->created_vcpus) {
735                         r = -EBUSY;
736                 } else {
737                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
738                         set_kvm_facility(kvm->arch.model.fac_list, 72);
739                         r = 0;
740                 }
741                 mutex_unlock(&kvm->lock);
742                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743                          r ? "(not available)" : "(success)");
744                 break;
745         case KVM_CAP_S390_GS:
746                 r = -EINVAL;
747                 mutex_lock(&kvm->lock);
748                 if (kvm->created_vcpus) {
749                         r = -EBUSY;
750                 } else if (test_facility(133)) {
751                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
752                         set_kvm_facility(kvm->arch.model.fac_list, 133);
753                         r = 0;
754                 }
755                 mutex_unlock(&kvm->lock);
756                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757                          r ? "(not available)" : "(success)");
758                 break;
759         case KVM_CAP_S390_HPAGE_1M:
760                 mutex_lock(&kvm->lock);
761                 if (kvm->created_vcpus)
762                         r = -EBUSY;
763                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764                         r = -EINVAL;
765                 else {
766                         r = 0;
767                         mmap_write_lock(kvm->mm);
768                         kvm->mm->context.allow_gmap_hpage_1m = 1;
769                         mmap_write_unlock(kvm->mm);
770                         /*
771                          * We might have to create fake 4k page
772                          * tables. To avoid that the hardware works on
773                          * stale PGSTEs, we emulate these instructions.
774                          */
775                         kvm->arch.use_skf = 0;
776                         kvm->arch.use_pfmfi = 0;
777                 }
778                 mutex_unlock(&kvm->lock);
779                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780                          r ? "(not available)" : "(success)");
781                 break;
782         case KVM_CAP_S390_USER_STSI:
783                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784                 kvm->arch.user_stsi = 1;
785                 r = 0;
786                 break;
787         case KVM_CAP_S390_USER_INSTR0:
788                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789                 kvm->arch.user_instr0 = 1;
790                 icpt_operexc_on_all_vcpus(kvm);
791                 r = 0;
792                 break;
793         default:
794                 r = -EINVAL;
795                 break;
796         }
797         return r;
798 }
799
800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802         int ret;
803
804         switch (attr->attr) {
805         case KVM_S390_VM_MEM_LIMIT_SIZE:
806                 ret = 0;
807                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808                          kvm->arch.mem_limit);
809                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810                         ret = -EFAULT;
811                 break;
812         default:
813                 ret = -ENXIO;
814                 break;
815         }
816         return ret;
817 }
818
819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821         int ret;
822         unsigned int idx;
823         switch (attr->attr) {
824         case KVM_S390_VM_MEM_ENABLE_CMMA:
825                 ret = -ENXIO;
826                 if (!sclp.has_cmma)
827                         break;
828
829                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830                 mutex_lock(&kvm->lock);
831                 if (kvm->created_vcpus)
832                         ret = -EBUSY;
833                 else if (kvm->mm->context.allow_gmap_hpage_1m)
834                         ret = -EINVAL;
835                 else {
836                         kvm->arch.use_cmma = 1;
837                         /* Not compatible with cmma. */
838                         kvm->arch.use_pfmfi = 0;
839                         ret = 0;
840                 }
841                 mutex_unlock(&kvm->lock);
842                 break;
843         case KVM_S390_VM_MEM_CLR_CMMA:
844                 ret = -ENXIO;
845                 if (!sclp.has_cmma)
846                         break;
847                 ret = -EINVAL;
848                 if (!kvm->arch.use_cmma)
849                         break;
850
851                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852                 mutex_lock(&kvm->lock);
853                 idx = srcu_read_lock(&kvm->srcu);
854                 s390_reset_cmma(kvm->arch.gmap->mm);
855                 srcu_read_unlock(&kvm->srcu, idx);
856                 mutex_unlock(&kvm->lock);
857                 ret = 0;
858                 break;
859         case KVM_S390_VM_MEM_LIMIT_SIZE: {
860                 unsigned long new_limit;
861
862                 if (kvm_is_ucontrol(kvm))
863                         return -EINVAL;
864
865                 if (get_user(new_limit, (u64 __user *)attr->addr))
866                         return -EFAULT;
867
868                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869                     new_limit > kvm->arch.mem_limit)
870                         return -E2BIG;
871
872                 if (!new_limit)
873                         return -EINVAL;
874
875                 /* gmap_create takes last usable address */
876                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
877                         new_limit -= 1;
878
879                 ret = -EBUSY;
880                 mutex_lock(&kvm->lock);
881                 if (!kvm->created_vcpus) {
882                         /* gmap_create will round the limit up */
883                         struct gmap *new = gmap_create(current->mm, new_limit);
884
885                         if (!new) {
886                                 ret = -ENOMEM;
887                         } else {
888                                 gmap_remove(kvm->arch.gmap);
889                                 new->private = kvm;
890                                 kvm->arch.gmap = new;
891                                 ret = 0;
892                         }
893                 }
894                 mutex_unlock(&kvm->lock);
895                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897                          (void *) kvm->arch.gmap->asce);
898                 break;
899         }
900         default:
901                 ret = -ENXIO;
902                 break;
903         }
904         return ret;
905 }
906
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908
909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911         struct kvm_vcpu *vcpu;
912         int i;
913
914         kvm_s390_vcpu_block_all(kvm);
915
916         kvm_for_each_vcpu(i, vcpu, kvm) {
917                 kvm_s390_vcpu_crypto_setup(vcpu);
918                 /* recreate the shadow crycb by leaving the VSIE handler */
919                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920         }
921
922         kvm_s390_vcpu_unblock_all(kvm);
923 }
924
925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927         mutex_lock(&kvm->lock);
928         switch (attr->attr) {
929         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930                 if (!test_kvm_facility(kvm, 76)) {
931                         mutex_unlock(&kvm->lock);
932                         return -EINVAL;
933                 }
934                 get_random_bytes(
935                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937                 kvm->arch.crypto.aes_kw = 1;
938                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939                 break;
940         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941                 if (!test_kvm_facility(kvm, 76)) {
942                         mutex_unlock(&kvm->lock);
943                         return -EINVAL;
944                 }
945                 get_random_bytes(
946                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948                 kvm->arch.crypto.dea_kw = 1;
949                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950                 break;
951         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952                 if (!test_kvm_facility(kvm, 76)) {
953                         mutex_unlock(&kvm->lock);
954                         return -EINVAL;
955                 }
956                 kvm->arch.crypto.aes_kw = 0;
957                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960                 break;
961         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962                 if (!test_kvm_facility(kvm, 76)) {
963                         mutex_unlock(&kvm->lock);
964                         return -EINVAL;
965                 }
966                 kvm->arch.crypto.dea_kw = 0;
967                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970                 break;
971         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972                 if (!ap_instructions_available()) {
973                         mutex_unlock(&kvm->lock);
974                         return -EOPNOTSUPP;
975                 }
976                 kvm->arch.crypto.apie = 1;
977                 break;
978         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979                 if (!ap_instructions_available()) {
980                         mutex_unlock(&kvm->lock);
981                         return -EOPNOTSUPP;
982                 }
983                 kvm->arch.crypto.apie = 0;
984                 break;
985         default:
986                 mutex_unlock(&kvm->lock);
987                 return -ENXIO;
988         }
989
990         kvm_s390_vcpu_crypto_reset_all(kvm);
991         mutex_unlock(&kvm->lock);
992         return 0;
993 }
994
995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997         int cx;
998         struct kvm_vcpu *vcpu;
999
1000         kvm_for_each_vcpu(cx, vcpu, kvm)
1001                 kvm_s390_sync_request(req, vcpu);
1002 }
1003
1004 /*
1005  * Must be called with kvm->srcu held to avoid races on memslots, and with
1006  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007  */
1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010         struct kvm_memory_slot *ms;
1011         struct kvm_memslots *slots;
1012         unsigned long ram_pages = 0;
1013         int slotnr;
1014
1015         /* migration mode already enabled */
1016         if (kvm->arch.migration_mode)
1017                 return 0;
1018         slots = kvm_memslots(kvm);
1019         if (!slots || !slots->used_slots)
1020                 return -EINVAL;
1021
1022         if (!kvm->arch.use_cmma) {
1023                 kvm->arch.migration_mode = 1;
1024                 return 0;
1025         }
1026         /* mark all the pages in active slots as dirty */
1027         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028                 ms = slots->memslots + slotnr;
1029                 if (!ms->dirty_bitmap)
1030                         return -EINVAL;
1031                 /*
1032                  * The second half of the bitmap is only used on x86,
1033                  * and would be wasted otherwise, so we put it to good
1034                  * use here to keep track of the state of the storage
1035                  * attributes.
1036                  */
1037                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038                 ram_pages += ms->npages;
1039         }
1040         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041         kvm->arch.migration_mode = 1;
1042         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043         return 0;
1044 }
1045
1046 /*
1047  * Must be called with kvm->slots_lock to avoid races with ourselves and
1048  * kvm_s390_vm_start_migration.
1049  */
1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052         /* migration mode already disabled */
1053         if (!kvm->arch.migration_mode)
1054                 return 0;
1055         kvm->arch.migration_mode = 0;
1056         if (kvm->arch.use_cmma)
1057                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058         return 0;
1059 }
1060
1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062                                      struct kvm_device_attr *attr)
1063 {
1064         int res = -ENXIO;
1065
1066         mutex_lock(&kvm->slots_lock);
1067         switch (attr->attr) {
1068         case KVM_S390_VM_MIGRATION_START:
1069                 res = kvm_s390_vm_start_migration(kvm);
1070                 break;
1071         case KVM_S390_VM_MIGRATION_STOP:
1072                 res = kvm_s390_vm_stop_migration(kvm);
1073                 break;
1074         default:
1075                 break;
1076         }
1077         mutex_unlock(&kvm->slots_lock);
1078
1079         return res;
1080 }
1081
1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083                                      struct kvm_device_attr *attr)
1084 {
1085         u64 mig = kvm->arch.migration_mode;
1086
1087         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088                 return -ENXIO;
1089
1090         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091                 return -EFAULT;
1092         return 0;
1093 }
1094
1095 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1096
1097 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099         struct kvm_s390_vm_tod_clock gtod;
1100
1101         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1102                 return -EFAULT;
1103
1104         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1105                 return -EINVAL;
1106         __kvm_s390_set_tod_clock(kvm, &gtod);
1107
1108         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1109                 gtod.epoch_idx, gtod.tod);
1110
1111         return 0;
1112 }
1113
1114 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1115 {
1116         u8 gtod_high;
1117
1118         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1119                                            sizeof(gtod_high)))
1120                 return -EFAULT;
1121
1122         if (gtod_high != 0)
1123                 return -EINVAL;
1124         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1125
1126         return 0;
1127 }
1128
1129 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131         struct kvm_s390_vm_tod_clock gtod = { 0 };
1132
1133         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1134                            sizeof(gtod.tod)))
1135                 return -EFAULT;
1136
1137         __kvm_s390_set_tod_clock(kvm, &gtod);
1138         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1139         return 0;
1140 }
1141
1142 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1143 {
1144         int ret;
1145
1146         if (attr->flags)
1147                 return -EINVAL;
1148
1149         mutex_lock(&kvm->lock);
1150         /*
1151          * For protected guests, the TOD is managed by the ultravisor, so trying
1152          * to change it will never bring the expected results.
1153          */
1154         if (kvm_s390_pv_is_protected(kvm)) {
1155                 ret = -EOPNOTSUPP;
1156                 goto out_unlock;
1157         }
1158
1159         switch (attr->attr) {
1160         case KVM_S390_VM_TOD_EXT:
1161                 ret = kvm_s390_set_tod_ext(kvm, attr);
1162                 break;
1163         case KVM_S390_VM_TOD_HIGH:
1164                 ret = kvm_s390_set_tod_high(kvm, attr);
1165                 break;
1166         case KVM_S390_VM_TOD_LOW:
1167                 ret = kvm_s390_set_tod_low(kvm, attr);
1168                 break;
1169         default:
1170                 ret = -ENXIO;
1171                 break;
1172         }
1173
1174 out_unlock:
1175         mutex_unlock(&kvm->lock);
1176         return ret;
1177 }
1178
1179 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1180                                    struct kvm_s390_vm_tod_clock *gtod)
1181 {
1182         struct kvm_s390_tod_clock_ext htod;
1183
1184         preempt_disable();
1185
1186         get_tod_clock_ext((char *)&htod);
1187
1188         gtod->tod = htod.tod + kvm->arch.epoch;
1189         gtod->epoch_idx = 0;
1190         if (test_kvm_facility(kvm, 139)) {
1191                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1192                 if (gtod->tod < htod.tod)
1193                         gtod->epoch_idx += 1;
1194         }
1195
1196         preempt_enable();
1197 }
1198
1199 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201         struct kvm_s390_vm_tod_clock gtod;
1202
1203         memset(&gtod, 0, sizeof(gtod));
1204         kvm_s390_get_tod_clock(kvm, &gtod);
1205         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206                 return -EFAULT;
1207
1208         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1209                 gtod.epoch_idx, gtod.tod);
1210         return 0;
1211 }
1212
1213 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1214 {
1215         u8 gtod_high = 0;
1216
1217         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1218                                          sizeof(gtod_high)))
1219                 return -EFAULT;
1220         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1221
1222         return 0;
1223 }
1224
1225 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1226 {
1227         u64 gtod;
1228
1229         gtod = kvm_s390_get_tod_clock_fast(kvm);
1230         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1231                 return -EFAULT;
1232         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1233
1234         return 0;
1235 }
1236
1237 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1238 {
1239         int ret;
1240
1241         if (attr->flags)
1242                 return -EINVAL;
1243
1244         switch (attr->attr) {
1245         case KVM_S390_VM_TOD_EXT:
1246                 ret = kvm_s390_get_tod_ext(kvm, attr);
1247                 break;
1248         case KVM_S390_VM_TOD_HIGH:
1249                 ret = kvm_s390_get_tod_high(kvm, attr);
1250                 break;
1251         case KVM_S390_VM_TOD_LOW:
1252                 ret = kvm_s390_get_tod_low(kvm, attr);
1253                 break;
1254         default:
1255                 ret = -ENXIO;
1256                 break;
1257         }
1258         return ret;
1259 }
1260
1261 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1262 {
1263         struct kvm_s390_vm_cpu_processor *proc;
1264         u16 lowest_ibc, unblocked_ibc;
1265         int ret = 0;
1266
1267         mutex_lock(&kvm->lock);
1268         if (kvm->created_vcpus) {
1269                 ret = -EBUSY;
1270                 goto out;
1271         }
1272         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1273         if (!proc) {
1274                 ret = -ENOMEM;
1275                 goto out;
1276         }
1277         if (!copy_from_user(proc, (void __user *)attr->addr,
1278                             sizeof(*proc))) {
1279                 kvm->arch.model.cpuid = proc->cpuid;
1280                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1281                 unblocked_ibc = sclp.ibc & 0xfff;
1282                 if (lowest_ibc && proc->ibc) {
1283                         if (proc->ibc > unblocked_ibc)
1284                                 kvm->arch.model.ibc = unblocked_ibc;
1285                         else if (proc->ibc < lowest_ibc)
1286                                 kvm->arch.model.ibc = lowest_ibc;
1287                         else
1288                                 kvm->arch.model.ibc = proc->ibc;
1289                 }
1290                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1291                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1292                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1293                          kvm->arch.model.ibc,
1294                          kvm->arch.model.cpuid);
1295                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1296                          kvm->arch.model.fac_list[0],
1297                          kvm->arch.model.fac_list[1],
1298                          kvm->arch.model.fac_list[2]);
1299         } else
1300                 ret = -EFAULT;
1301         kfree(proc);
1302 out:
1303         mutex_unlock(&kvm->lock);
1304         return ret;
1305 }
1306
1307 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1308                                        struct kvm_device_attr *attr)
1309 {
1310         struct kvm_s390_vm_cpu_feat data;
1311
1312         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1313                 return -EFAULT;
1314         if (!bitmap_subset((unsigned long *) data.feat,
1315                            kvm_s390_available_cpu_feat,
1316                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1317                 return -EINVAL;
1318
1319         mutex_lock(&kvm->lock);
1320         if (kvm->created_vcpus) {
1321                 mutex_unlock(&kvm->lock);
1322                 return -EBUSY;
1323         }
1324         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1325                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1326         mutex_unlock(&kvm->lock);
1327         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1328                          data.feat[0],
1329                          data.feat[1],
1330                          data.feat[2]);
1331         return 0;
1332 }
1333
1334 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1335                                           struct kvm_device_attr *attr)
1336 {
1337         mutex_lock(&kvm->lock);
1338         if (kvm->created_vcpus) {
1339                 mutex_unlock(&kvm->lock);
1340                 return -EBUSY;
1341         }
1342
1343         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1344                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1345                 mutex_unlock(&kvm->lock);
1346                 return -EFAULT;
1347         }
1348         mutex_unlock(&kvm->lock);
1349
1350         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1355         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1358         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1361         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1364         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1367         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1370         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1373         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1376         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1379         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1382         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1385         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1388         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1391         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1392                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1393                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1394         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1395                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1396                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1397         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1398                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1399                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1401                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1402         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1404                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1405                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1407
1408         return 0;
1409 }
1410
1411 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1412 {
1413         int ret = -ENXIO;
1414
1415         switch (attr->attr) {
1416         case KVM_S390_VM_CPU_PROCESSOR:
1417                 ret = kvm_s390_set_processor(kvm, attr);
1418                 break;
1419         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1420                 ret = kvm_s390_set_processor_feat(kvm, attr);
1421                 break;
1422         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1423                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1424                 break;
1425         }
1426         return ret;
1427 }
1428
1429 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1430 {
1431         struct kvm_s390_vm_cpu_processor *proc;
1432         int ret = 0;
1433
1434         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1435         if (!proc) {
1436                 ret = -ENOMEM;
1437                 goto out;
1438         }
1439         proc->cpuid = kvm->arch.model.cpuid;
1440         proc->ibc = kvm->arch.model.ibc;
1441         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1442                S390_ARCH_FAC_LIST_SIZE_BYTE);
1443         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1444                  kvm->arch.model.ibc,
1445                  kvm->arch.model.cpuid);
1446         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1447                  kvm->arch.model.fac_list[0],
1448                  kvm->arch.model.fac_list[1],
1449                  kvm->arch.model.fac_list[2]);
1450         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1451                 ret = -EFAULT;
1452         kfree(proc);
1453 out:
1454         return ret;
1455 }
1456
1457 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1458 {
1459         struct kvm_s390_vm_cpu_machine *mach;
1460         int ret = 0;
1461
1462         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1463         if (!mach) {
1464                 ret = -ENOMEM;
1465                 goto out;
1466         }
1467         get_cpu_id((struct cpuid *) &mach->cpuid);
1468         mach->ibc = sclp.ibc;
1469         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1470                S390_ARCH_FAC_LIST_SIZE_BYTE);
1471         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1472                sizeof(S390_lowcore.stfle_fac_list));
1473         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1474                  kvm->arch.model.ibc,
1475                  kvm->arch.model.cpuid);
1476         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1477                  mach->fac_mask[0],
1478                  mach->fac_mask[1],
1479                  mach->fac_mask[2]);
1480         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1481                  mach->fac_list[0],
1482                  mach->fac_list[1],
1483                  mach->fac_list[2]);
1484         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1485                 ret = -EFAULT;
1486         kfree(mach);
1487 out:
1488         return ret;
1489 }
1490
1491 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1492                                        struct kvm_device_attr *attr)
1493 {
1494         struct kvm_s390_vm_cpu_feat data;
1495
1496         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1497                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1498         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1499                 return -EFAULT;
1500         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1501                          data.feat[0],
1502                          data.feat[1],
1503                          data.feat[2]);
1504         return 0;
1505 }
1506
1507 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1508                                      struct kvm_device_attr *attr)
1509 {
1510         struct kvm_s390_vm_cpu_feat data;
1511
1512         bitmap_copy((unsigned long *) data.feat,
1513                     kvm_s390_available_cpu_feat,
1514                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1515         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1516                 return -EFAULT;
1517         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518                          data.feat[0],
1519                          data.feat[1],
1520                          data.feat[2]);
1521         return 0;
1522 }
1523
1524 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1525                                           struct kvm_device_attr *attr)
1526 {
1527         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1528             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1529                 return -EFAULT;
1530
1531         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1536         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1539         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1542         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1545         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1548         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1551         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1554         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1557         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1560         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1563         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1566         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1569         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1572         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1573                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1574                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1575         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1576                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1577                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1578         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1579                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1580                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1582                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1583         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1585                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1586                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1588
1589         return 0;
1590 }
1591
1592 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1593                                         struct kvm_device_attr *attr)
1594 {
1595         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1596             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1597                 return -EFAULT;
1598
1599         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1600                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1601                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1602                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1603                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1604         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1605                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1606                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1607         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1608                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1609                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1610         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1613         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1614                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1615                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1616         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1617                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1619         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1620                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1621                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1622         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1623                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1625         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1628         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1630                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1631         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1632                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1633                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1634         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1635                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1636                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1637         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1638                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1639                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1640         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1641                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1642                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1643         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1644                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1645                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1646         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1647                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1648                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1649                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1650                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1651         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1652                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1653                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1654                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1655                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1656
1657         return 0;
1658 }
1659
1660 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1661 {
1662         int ret = -ENXIO;
1663
1664         switch (attr->attr) {
1665         case KVM_S390_VM_CPU_PROCESSOR:
1666                 ret = kvm_s390_get_processor(kvm, attr);
1667                 break;
1668         case KVM_S390_VM_CPU_MACHINE:
1669                 ret = kvm_s390_get_machine(kvm, attr);
1670                 break;
1671         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1672                 ret = kvm_s390_get_processor_feat(kvm, attr);
1673                 break;
1674         case KVM_S390_VM_CPU_MACHINE_FEAT:
1675                 ret = kvm_s390_get_machine_feat(kvm, attr);
1676                 break;
1677         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1678                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1679                 break;
1680         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1681                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1682                 break;
1683         }
1684         return ret;
1685 }
1686
1687 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1688 {
1689         int ret;
1690
1691         switch (attr->group) {
1692         case KVM_S390_VM_MEM_CTRL:
1693                 ret = kvm_s390_set_mem_control(kvm, attr);
1694                 break;
1695         case KVM_S390_VM_TOD:
1696                 ret = kvm_s390_set_tod(kvm, attr);
1697                 break;
1698         case KVM_S390_VM_CPU_MODEL:
1699                 ret = kvm_s390_set_cpu_model(kvm, attr);
1700                 break;
1701         case KVM_S390_VM_CRYPTO:
1702                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1703                 break;
1704         case KVM_S390_VM_MIGRATION:
1705                 ret = kvm_s390_vm_set_migration(kvm, attr);
1706                 break;
1707         default:
1708                 ret = -ENXIO;
1709                 break;
1710         }
1711
1712         return ret;
1713 }
1714
1715 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717         int ret;
1718
1719         switch (attr->group) {
1720         case KVM_S390_VM_MEM_CTRL:
1721                 ret = kvm_s390_get_mem_control(kvm, attr);
1722                 break;
1723         case KVM_S390_VM_TOD:
1724                 ret = kvm_s390_get_tod(kvm, attr);
1725                 break;
1726         case KVM_S390_VM_CPU_MODEL:
1727                 ret = kvm_s390_get_cpu_model(kvm, attr);
1728                 break;
1729         case KVM_S390_VM_MIGRATION:
1730                 ret = kvm_s390_vm_get_migration(kvm, attr);
1731                 break;
1732         default:
1733                 ret = -ENXIO;
1734                 break;
1735         }
1736
1737         return ret;
1738 }
1739
1740 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1741 {
1742         int ret;
1743
1744         switch (attr->group) {
1745         case KVM_S390_VM_MEM_CTRL:
1746                 switch (attr->attr) {
1747                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1748                 case KVM_S390_VM_MEM_CLR_CMMA:
1749                         ret = sclp.has_cmma ? 0 : -ENXIO;
1750                         break;
1751                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1752                         ret = 0;
1753                         break;
1754                 default:
1755                         ret = -ENXIO;
1756                         break;
1757                 }
1758                 break;
1759         case KVM_S390_VM_TOD:
1760                 switch (attr->attr) {
1761                 case KVM_S390_VM_TOD_LOW:
1762                 case KVM_S390_VM_TOD_HIGH:
1763                         ret = 0;
1764                         break;
1765                 default:
1766                         ret = -ENXIO;
1767                         break;
1768                 }
1769                 break;
1770         case KVM_S390_VM_CPU_MODEL:
1771                 switch (attr->attr) {
1772                 case KVM_S390_VM_CPU_PROCESSOR:
1773                 case KVM_S390_VM_CPU_MACHINE:
1774                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1775                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1776                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1777                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1778                         ret = 0;
1779                         break;
1780                 default:
1781                         ret = -ENXIO;
1782                         break;
1783                 }
1784                 break;
1785         case KVM_S390_VM_CRYPTO:
1786                 switch (attr->attr) {
1787                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1788                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1789                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1790                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1791                         ret = 0;
1792                         break;
1793                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1794                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1795                         ret = ap_instructions_available() ? 0 : -ENXIO;
1796                         break;
1797                 default:
1798                         ret = -ENXIO;
1799                         break;
1800                 }
1801                 break;
1802         case KVM_S390_VM_MIGRATION:
1803                 ret = 0;
1804                 break;
1805         default:
1806                 ret = -ENXIO;
1807                 break;
1808         }
1809
1810         return ret;
1811 }
1812
1813 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1814 {
1815         uint8_t *keys;
1816         uint64_t hva;
1817         int srcu_idx, i, r = 0;
1818
1819         if (args->flags != 0)
1820                 return -EINVAL;
1821
1822         /* Is this guest using storage keys? */
1823         if (!mm_uses_skeys(current->mm))
1824                 return KVM_S390_GET_SKEYS_NONE;
1825
1826         /* Enforce sane limit on memory allocation */
1827         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1828                 return -EINVAL;
1829
1830         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1831         if (!keys)
1832                 return -ENOMEM;
1833
1834         mmap_read_lock(current->mm);
1835         srcu_idx = srcu_read_lock(&kvm->srcu);
1836         for (i = 0; i < args->count; i++) {
1837                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1838                 if (kvm_is_error_hva(hva)) {
1839                         r = -EFAULT;
1840                         break;
1841                 }
1842
1843                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1844                 if (r)
1845                         break;
1846         }
1847         srcu_read_unlock(&kvm->srcu, srcu_idx);
1848         mmap_read_unlock(current->mm);
1849
1850         if (!r) {
1851                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1852                                  sizeof(uint8_t) * args->count);
1853                 if (r)
1854                         r = -EFAULT;
1855         }
1856
1857         kvfree(keys);
1858         return r;
1859 }
1860
1861 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1862 {
1863         uint8_t *keys;
1864         uint64_t hva;
1865         int srcu_idx, i, r = 0;
1866         bool unlocked;
1867
1868         if (args->flags != 0)
1869                 return -EINVAL;
1870
1871         /* Enforce sane limit on memory allocation */
1872         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1873                 return -EINVAL;
1874
1875         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1876         if (!keys)
1877                 return -ENOMEM;
1878
1879         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1880                            sizeof(uint8_t) * args->count);
1881         if (r) {
1882                 r = -EFAULT;
1883                 goto out;
1884         }
1885
1886         /* Enable storage key handling for the guest */
1887         r = s390_enable_skey();
1888         if (r)
1889                 goto out;
1890
1891         i = 0;
1892         mmap_read_lock(current->mm);
1893         srcu_idx = srcu_read_lock(&kvm->srcu);
1894         while (i < args->count) {
1895                 unlocked = false;
1896                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1897                 if (kvm_is_error_hva(hva)) {
1898                         r = -EFAULT;
1899                         break;
1900                 }
1901
1902                 /* Lowest order bit is reserved */
1903                 if (keys[i] & 0x01) {
1904                         r = -EINVAL;
1905                         break;
1906                 }
1907
1908                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1909                 if (r) {
1910                         r = fixup_user_fault(current->mm, hva,
1911                                              FAULT_FLAG_WRITE, &unlocked);
1912                         if (r)
1913                                 break;
1914                 }
1915                 if (!r)
1916                         i++;
1917         }
1918         srcu_read_unlock(&kvm->srcu, srcu_idx);
1919         mmap_read_unlock(current->mm);
1920 out:
1921         kvfree(keys);
1922         return r;
1923 }
1924
1925 /*
1926  * Base address and length must be sent at the start of each block, therefore
1927  * it's cheaper to send some clean data, as long as it's less than the size of
1928  * two longs.
1929  */
1930 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1931 /* for consistency */
1932 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1933
1934 /*
1935  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1936  * address falls in a hole. In that case the index of one of the memslots
1937  * bordering the hole is returned.
1938  */
1939 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1940 {
1941         int start = 0, end = slots->used_slots;
1942         int slot = atomic_read(&slots->lru_slot);
1943         struct kvm_memory_slot *memslots = slots->memslots;
1944
1945         if (gfn >= memslots[slot].base_gfn &&
1946             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1947                 return slot;
1948
1949         while (start < end) {
1950                 slot = start + (end - start) / 2;
1951
1952                 if (gfn >= memslots[slot].base_gfn)
1953                         end = slot;
1954                 else
1955                         start = slot + 1;
1956         }
1957
1958         if (start >= slots->used_slots)
1959                 return slots->used_slots - 1;
1960
1961         if (gfn >= memslots[start].base_gfn &&
1962             gfn < memslots[start].base_gfn + memslots[start].npages) {
1963                 atomic_set(&slots->lru_slot, start);
1964         }
1965
1966         return start;
1967 }
1968
1969 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1970                               u8 *res, unsigned long bufsize)
1971 {
1972         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1973
1974         args->count = 0;
1975         while (args->count < bufsize) {
1976                 hva = gfn_to_hva(kvm, cur_gfn);
1977                 /*
1978                  * We return an error if the first value was invalid, but we
1979                  * return successfully if at least one value was copied.
1980                  */
1981                 if (kvm_is_error_hva(hva))
1982                         return args->count ? 0 : -EFAULT;
1983                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1984                         pgstev = 0;
1985                 res[args->count++] = (pgstev >> 24) & 0x43;
1986                 cur_gfn++;
1987         }
1988
1989         return 0;
1990 }
1991
1992 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1993                                               unsigned long cur_gfn)
1994 {
1995         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1996         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1997         unsigned long ofs = cur_gfn - ms->base_gfn;
1998
1999         if (ms->base_gfn + ms->npages <= cur_gfn) {
2000                 slotidx--;
2001                 /* If we are above the highest slot, wrap around */
2002                 if (slotidx < 0)
2003                         slotidx = slots->used_slots - 1;
2004
2005                 ms = slots->memslots + slotidx;
2006                 ofs = 0;
2007         }
2008
2009         if (cur_gfn < ms->base_gfn)
2010                 ofs = 0;
2011
2012         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2013         while ((slotidx > 0) && (ofs >= ms->npages)) {
2014                 slotidx--;
2015                 ms = slots->memslots + slotidx;
2016                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2017         }
2018         return ms->base_gfn + ofs;
2019 }
2020
2021 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2022                              u8 *res, unsigned long bufsize)
2023 {
2024         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2025         struct kvm_memslots *slots = kvm_memslots(kvm);
2026         struct kvm_memory_slot *ms;
2027
2028         if (unlikely(!slots->used_slots))
2029                 return 0;
2030
2031         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2032         ms = gfn_to_memslot(kvm, cur_gfn);
2033         args->count = 0;
2034         args->start_gfn = cur_gfn;
2035         if (!ms)
2036                 return 0;
2037         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2039
2040         while (args->count < bufsize) {
2041                 hva = gfn_to_hva(kvm, cur_gfn);
2042                 if (kvm_is_error_hva(hva))
2043                         return 0;
2044                 /* Decrement only if we actually flipped the bit to 0 */
2045                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2046                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2047                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2048                         pgstev = 0;
2049                 /* Save the value */
2050                 res[args->count++] = (pgstev >> 24) & 0x43;
2051                 /* If the next bit is too far away, stop. */
2052                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2053                         return 0;
2054                 /* If we reached the previous "next", find the next one */
2055                 if (cur_gfn == next_gfn)
2056                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2057                 /* Reached the end of memory or of the buffer, stop */
2058                 if ((next_gfn >= mem_end) ||
2059                     (next_gfn - args->start_gfn >= bufsize))
2060                         return 0;
2061                 cur_gfn++;
2062                 /* Reached the end of the current memslot, take the next one. */
2063                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2064                         ms = gfn_to_memslot(kvm, cur_gfn);
2065                         if (!ms)
2066                                 return 0;
2067                 }
2068         }
2069         return 0;
2070 }
2071
2072 /*
2073  * This function searches for the next page with dirty CMMA attributes, and
2074  * saves the attributes in the buffer up to either the end of the buffer or
2075  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2076  * no trailing clean bytes are saved.
2077  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2078  * output buffer will indicate 0 as length.
2079  */
2080 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2081                                   struct kvm_s390_cmma_log *args)
2082 {
2083         unsigned long bufsize;
2084         int srcu_idx, peek, ret;
2085         u8 *values;
2086
2087         if (!kvm->arch.use_cmma)
2088                 return -ENXIO;
2089         /* Invalid/unsupported flags were specified */
2090         if (args->flags & ~KVM_S390_CMMA_PEEK)
2091                 return -EINVAL;
2092         /* Migration mode query, and we are not doing a migration */
2093         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2094         if (!peek && !kvm->arch.migration_mode)
2095                 return -EINVAL;
2096         /* CMMA is disabled or was not used, or the buffer has length zero */
2097         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2098         if (!bufsize || !kvm->mm->context.uses_cmm) {
2099                 memset(args, 0, sizeof(*args));
2100                 return 0;
2101         }
2102         /* We are not peeking, and there are no dirty pages */
2103         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2104                 memset(args, 0, sizeof(*args));
2105                 return 0;
2106         }
2107
2108         values = vmalloc(bufsize);
2109         if (!values)
2110                 return -ENOMEM;
2111
2112         mmap_read_lock(kvm->mm);
2113         srcu_idx = srcu_read_lock(&kvm->srcu);
2114         if (peek)
2115                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2116         else
2117                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2118         srcu_read_unlock(&kvm->srcu, srcu_idx);
2119         mmap_read_unlock(kvm->mm);
2120
2121         if (kvm->arch.migration_mode)
2122                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2123         else
2124                 args->remaining = 0;
2125
2126         if (copy_to_user((void __user *)args->values, values, args->count))
2127                 ret = -EFAULT;
2128
2129         vfree(values);
2130         return ret;
2131 }
2132
2133 /*
2134  * This function sets the CMMA attributes for the given pages. If the input
2135  * buffer has zero length, no action is taken, otherwise the attributes are
2136  * set and the mm->context.uses_cmm flag is set.
2137  */
2138 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2139                                   const struct kvm_s390_cmma_log *args)
2140 {
2141         unsigned long hva, mask, pgstev, i;
2142         uint8_t *bits;
2143         int srcu_idx, r = 0;
2144
2145         mask = args->mask;
2146
2147         if (!kvm->arch.use_cmma)
2148                 return -ENXIO;
2149         /* invalid/unsupported flags */
2150         if (args->flags != 0)
2151                 return -EINVAL;
2152         /* Enforce sane limit on memory allocation */
2153         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2154                 return -EINVAL;
2155         /* Nothing to do */
2156         if (args->count == 0)
2157                 return 0;
2158
2159         bits = vmalloc(array_size(sizeof(*bits), args->count));
2160         if (!bits)
2161                 return -ENOMEM;
2162
2163         r = copy_from_user(bits, (void __user *)args->values, args->count);
2164         if (r) {
2165                 r = -EFAULT;
2166                 goto out;
2167         }
2168
2169         mmap_read_lock(kvm->mm);
2170         srcu_idx = srcu_read_lock(&kvm->srcu);
2171         for (i = 0; i < args->count; i++) {
2172                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2173                 if (kvm_is_error_hva(hva)) {
2174                         r = -EFAULT;
2175                         break;
2176                 }
2177
2178                 pgstev = bits[i];
2179                 pgstev = pgstev << 24;
2180                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2181                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2182         }
2183         srcu_read_unlock(&kvm->srcu, srcu_idx);
2184         mmap_read_unlock(kvm->mm);
2185
2186         if (!kvm->mm->context.uses_cmm) {
2187                 mmap_write_lock(kvm->mm);
2188                 kvm->mm->context.uses_cmm = 1;
2189                 mmap_write_unlock(kvm->mm);
2190         }
2191 out:
2192         vfree(bits);
2193         return r;
2194 }
2195
2196 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2197 {
2198         struct kvm_vcpu *vcpu;
2199         u16 rc, rrc;
2200         int ret = 0;
2201         int i;
2202
2203         /*
2204          * We ignore failures and try to destroy as many CPUs as possible.
2205          * At the same time we must not free the assigned resources when
2206          * this fails, as the ultravisor has still access to that memory.
2207          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2208          * behind.
2209          * We want to return the first failure rc and rrc, though.
2210          */
2211         kvm_for_each_vcpu(i, vcpu, kvm) {
2212                 mutex_lock(&vcpu->mutex);
2213                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2214                         *rcp = rc;
2215                         *rrcp = rrc;
2216                         ret = -EIO;
2217                 }
2218                 mutex_unlock(&vcpu->mutex);
2219         }
2220         return ret;
2221 }
2222
2223 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2224 {
2225         int i, r = 0;
2226         u16 dummy;
2227
2228         struct kvm_vcpu *vcpu;
2229
2230         kvm_for_each_vcpu(i, vcpu, kvm) {
2231                 mutex_lock(&vcpu->mutex);
2232                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2233                 mutex_unlock(&vcpu->mutex);
2234                 if (r)
2235                         break;
2236         }
2237         if (r)
2238                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2239         return r;
2240 }
2241
2242 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2243 {
2244         int r = 0;
2245         u16 dummy;
2246         void __user *argp = (void __user *)cmd->data;
2247
2248         switch (cmd->cmd) {
2249         case KVM_PV_ENABLE: {
2250                 r = -EINVAL;
2251                 if (kvm_s390_pv_is_protected(kvm))
2252                         break;
2253
2254                 /*
2255                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2256                  *  esca, we need no cleanup in the error cases below
2257                  */
2258                 r = sca_switch_to_extended(kvm);
2259                 if (r)
2260                         break;
2261
2262                 mmap_write_lock(current->mm);
2263                 r = gmap_mark_unmergeable();
2264                 mmap_write_unlock(current->mm);
2265                 if (r)
2266                         break;
2267
2268                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2269                 if (r)
2270                         break;
2271
2272                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2273                 if (r)
2274                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2275
2276                 /* we need to block service interrupts from now on */
2277                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278                 break;
2279         }
2280         case KVM_PV_DISABLE: {
2281                 r = -EINVAL;
2282                 if (!kvm_s390_pv_is_protected(kvm))
2283                         break;
2284
2285                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2286                 /*
2287                  * If a CPU could not be destroyed, destroy VM will also fail.
2288                  * There is no point in trying to destroy it. Instead return
2289                  * the rc and rrc from the first CPU that failed destroying.
2290                  */
2291                 if (r)
2292                         break;
2293                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2294
2295                 /* no need to block service interrupts any more */
2296                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2297                 break;
2298         }
2299         case KVM_PV_SET_SEC_PARMS: {
2300                 struct kvm_s390_pv_sec_parm parms = {};
2301                 void *hdr;
2302
2303                 r = -EINVAL;
2304                 if (!kvm_s390_pv_is_protected(kvm))
2305                         break;
2306
2307                 r = -EFAULT;
2308                 if (copy_from_user(&parms, argp, sizeof(parms)))
2309                         break;
2310
2311                 /* Currently restricted to 8KB */
2312                 r = -EINVAL;
2313                 if (parms.length > PAGE_SIZE * 2)
2314                         break;
2315
2316                 r = -ENOMEM;
2317                 hdr = vmalloc(parms.length);
2318                 if (!hdr)
2319                         break;
2320
2321                 r = -EFAULT;
2322                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2323                                     parms.length))
2324                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2325                                                       &cmd->rc, &cmd->rrc);
2326
2327                 vfree(hdr);
2328                 break;
2329         }
2330         case KVM_PV_UNPACK: {
2331                 struct kvm_s390_pv_unp unp = {};
2332
2333                 r = -EINVAL;
2334                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2335                         break;
2336
2337                 r = -EFAULT;
2338                 if (copy_from_user(&unp, argp, sizeof(unp)))
2339                         break;
2340
2341                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2342                                        &cmd->rc, &cmd->rrc);
2343                 break;
2344         }
2345         case KVM_PV_VERIFY: {
2346                 r = -EINVAL;
2347                 if (!kvm_s390_pv_is_protected(kvm))
2348                         break;
2349
2350                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2352                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2353                              cmd->rrc);
2354                 break;
2355         }
2356         case KVM_PV_PREP_RESET: {
2357                 r = -EINVAL;
2358                 if (!kvm_s390_pv_is_protected(kvm))
2359                         break;
2360
2361                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2362                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2363                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2364                              cmd->rc, cmd->rrc);
2365                 break;
2366         }
2367         case KVM_PV_UNSHARE_ALL: {
2368                 r = -EINVAL;
2369                 if (!kvm_s390_pv_is_protected(kvm))
2370                         break;
2371
2372                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2373                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2374                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2375                              cmd->rc, cmd->rrc);
2376                 break;
2377         }
2378         default:
2379                 r = -ENOTTY;
2380         }
2381         return r;
2382 }
2383
2384 long kvm_arch_vm_ioctl(struct file *filp,
2385                        unsigned int ioctl, unsigned long arg)
2386 {
2387         struct kvm *kvm = filp->private_data;
2388         void __user *argp = (void __user *)arg;
2389         struct kvm_device_attr attr;
2390         int r;
2391
2392         switch (ioctl) {
2393         case KVM_S390_INTERRUPT: {
2394                 struct kvm_s390_interrupt s390int;
2395
2396                 r = -EFAULT;
2397                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2398                         break;
2399                 r = kvm_s390_inject_vm(kvm, &s390int);
2400                 break;
2401         }
2402         case KVM_CREATE_IRQCHIP: {
2403                 struct kvm_irq_routing_entry routing;
2404
2405                 r = -EINVAL;
2406                 if (kvm->arch.use_irqchip) {
2407                         /* Set up dummy routing. */
2408                         memset(&routing, 0, sizeof(routing));
2409                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2410                 }
2411                 break;
2412         }
2413         case KVM_SET_DEVICE_ATTR: {
2414                 r = -EFAULT;
2415                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2416                         break;
2417                 r = kvm_s390_vm_set_attr(kvm, &attr);
2418                 break;
2419         }
2420         case KVM_GET_DEVICE_ATTR: {
2421                 r = -EFAULT;
2422                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2423                         break;
2424                 r = kvm_s390_vm_get_attr(kvm, &attr);
2425                 break;
2426         }
2427         case KVM_HAS_DEVICE_ATTR: {
2428                 r = -EFAULT;
2429                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2430                         break;
2431                 r = kvm_s390_vm_has_attr(kvm, &attr);
2432                 break;
2433         }
2434         case KVM_S390_GET_SKEYS: {
2435                 struct kvm_s390_skeys args;
2436
2437                 r = -EFAULT;
2438                 if (copy_from_user(&args, argp,
2439                                    sizeof(struct kvm_s390_skeys)))
2440                         break;
2441                 r = kvm_s390_get_skeys(kvm, &args);
2442                 break;
2443         }
2444         case KVM_S390_SET_SKEYS: {
2445                 struct kvm_s390_skeys args;
2446
2447                 r = -EFAULT;
2448                 if (copy_from_user(&args, argp,
2449                                    sizeof(struct kvm_s390_skeys)))
2450                         break;
2451                 r = kvm_s390_set_skeys(kvm, &args);
2452                 break;
2453         }
2454         case KVM_S390_GET_CMMA_BITS: {
2455                 struct kvm_s390_cmma_log args;
2456
2457                 r = -EFAULT;
2458                 if (copy_from_user(&args, argp, sizeof(args)))
2459                         break;
2460                 mutex_lock(&kvm->slots_lock);
2461                 r = kvm_s390_get_cmma_bits(kvm, &args);
2462                 mutex_unlock(&kvm->slots_lock);
2463                 if (!r) {
2464                         r = copy_to_user(argp, &args, sizeof(args));
2465                         if (r)
2466                                 r = -EFAULT;
2467                 }
2468                 break;
2469         }
2470         case KVM_S390_SET_CMMA_BITS: {
2471                 struct kvm_s390_cmma_log args;
2472
2473                 r = -EFAULT;
2474                 if (copy_from_user(&args, argp, sizeof(args)))
2475                         break;
2476                 mutex_lock(&kvm->slots_lock);
2477                 r = kvm_s390_set_cmma_bits(kvm, &args);
2478                 mutex_unlock(&kvm->slots_lock);
2479                 break;
2480         }
2481         case KVM_S390_PV_COMMAND: {
2482                 struct kvm_pv_cmd args;
2483
2484                 /* protvirt means user sigp */
2485                 kvm->arch.user_cpu_state_ctrl = 1;
2486                 r = 0;
2487                 if (!is_prot_virt_host()) {
2488                         r = -EINVAL;
2489                         break;
2490                 }
2491                 if (copy_from_user(&args, argp, sizeof(args))) {
2492                         r = -EFAULT;
2493                         break;
2494                 }
2495                 if (args.flags) {
2496                         r = -EINVAL;
2497                         break;
2498                 }
2499                 mutex_lock(&kvm->lock);
2500                 r = kvm_s390_handle_pv(kvm, &args);
2501                 mutex_unlock(&kvm->lock);
2502                 if (copy_to_user(argp, &args, sizeof(args))) {
2503                         r = -EFAULT;
2504                         break;
2505                 }
2506                 break;
2507         }
2508         default:
2509                 r = -ENOTTY;
2510         }
2511
2512         return r;
2513 }
2514
2515 static int kvm_s390_apxa_installed(void)
2516 {
2517         struct ap_config_info info;
2518
2519         if (ap_instructions_available()) {
2520                 if (ap_qci(&info) == 0)
2521                         return info.apxa;
2522         }
2523
2524         return 0;
2525 }
2526
2527 /*
2528  * The format of the crypto control block (CRYCB) is specified in the 3 low
2529  * order bits of the CRYCB designation (CRYCBD) field as follows:
2530  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2531  *           AP extended addressing (APXA) facility are installed.
2532  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2533  * Format 2: Both the APXA and MSAX3 facilities are installed
2534  */
2535 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2536 {
2537         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2538
2539         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2540         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2541
2542         /* Check whether MSAX3 is installed */
2543         if (!test_kvm_facility(kvm, 76))
2544                 return;
2545
2546         if (kvm_s390_apxa_installed())
2547                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2548         else
2549                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2550 }
2551
2552 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2553                                unsigned long *aqm, unsigned long *adm)
2554 {
2555         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2556
2557         mutex_lock(&kvm->lock);
2558         kvm_s390_vcpu_block_all(kvm);
2559
2560         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2561         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2562                 memcpy(crycb->apcb1.apm, apm, 32);
2563                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2564                          apm[0], apm[1], apm[2], apm[3]);
2565                 memcpy(crycb->apcb1.aqm, aqm, 32);
2566                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2567                          aqm[0], aqm[1], aqm[2], aqm[3]);
2568                 memcpy(crycb->apcb1.adm, adm, 32);
2569                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2570                          adm[0], adm[1], adm[2], adm[3]);
2571                 break;
2572         case CRYCB_FORMAT1:
2573         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2574                 memcpy(crycb->apcb0.apm, apm, 8);
2575                 memcpy(crycb->apcb0.aqm, aqm, 2);
2576                 memcpy(crycb->apcb0.adm, adm, 2);
2577                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2578                          apm[0], *((unsigned short *)aqm),
2579                          *((unsigned short *)adm));
2580                 break;
2581         default:        /* Can not happen */
2582                 break;
2583         }
2584
2585         /* recreate the shadow crycb for each vcpu */
2586         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2587         kvm_s390_vcpu_unblock_all(kvm);
2588         mutex_unlock(&kvm->lock);
2589 }
2590 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2591
2592 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2593 {
2594         mutex_lock(&kvm->lock);
2595         kvm_s390_vcpu_block_all(kvm);
2596
2597         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2598                sizeof(kvm->arch.crypto.crycb->apcb0));
2599         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2600                sizeof(kvm->arch.crypto.crycb->apcb1));
2601
2602         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2603         /* recreate the shadow crycb for each vcpu */
2604         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2605         kvm_s390_vcpu_unblock_all(kvm);
2606         mutex_unlock(&kvm->lock);
2607 }
2608 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2609
2610 static u64 kvm_s390_get_initial_cpuid(void)
2611 {
2612         struct cpuid cpuid;
2613
2614         get_cpu_id(&cpuid);
2615         cpuid.version = 0xff;
2616         return *((u64 *) &cpuid);
2617 }
2618
2619 static void kvm_s390_crypto_init(struct kvm *kvm)
2620 {
2621         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2622         kvm_s390_set_crycb_format(kvm);
2623
2624         if (!test_kvm_facility(kvm, 76))
2625                 return;
2626
2627         /* Enable AES/DEA protected key functions by default */
2628         kvm->arch.crypto.aes_kw = 1;
2629         kvm->arch.crypto.dea_kw = 1;
2630         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2631                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2632         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2633                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2634 }
2635
2636 static void sca_dispose(struct kvm *kvm)
2637 {
2638         if (kvm->arch.use_esca)
2639                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2640         else
2641                 free_page((unsigned long)(kvm->arch.sca));
2642         kvm->arch.sca = NULL;
2643 }
2644
2645 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2646 {
2647         gfp_t alloc_flags = GFP_KERNEL;
2648         int i, rc;
2649         char debug_name[16];
2650         static unsigned long sca_offset;
2651
2652         rc = -EINVAL;
2653 #ifdef CONFIG_KVM_S390_UCONTROL
2654         if (type & ~KVM_VM_S390_UCONTROL)
2655                 goto out_err;
2656         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2657                 goto out_err;
2658 #else
2659         if (type)
2660                 goto out_err;
2661 #endif
2662
2663         rc = s390_enable_sie();
2664         if (rc)
2665                 goto out_err;
2666
2667         rc = -ENOMEM;
2668
2669         if (!sclp.has_64bscao)
2670                 alloc_flags |= GFP_DMA;
2671         rwlock_init(&kvm->arch.sca_lock);
2672         /* start with basic SCA */
2673         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2674         if (!kvm->arch.sca)
2675                 goto out_err;
2676         mutex_lock(&kvm_lock);
2677         sca_offset += 16;
2678         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2679                 sca_offset = 0;
2680         kvm->arch.sca = (struct bsca_block *)
2681                         ((char *) kvm->arch.sca + sca_offset);
2682         mutex_unlock(&kvm_lock);
2683
2684         sprintf(debug_name, "kvm-%u", current->pid);
2685
2686         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2687         if (!kvm->arch.dbf)
2688                 goto out_err;
2689
2690         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2691         kvm->arch.sie_page2 =
2692              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2693         if (!kvm->arch.sie_page2)
2694                 goto out_err;
2695
2696         kvm->arch.sie_page2->kvm = kvm;
2697         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2698
2699         for (i = 0; i < kvm_s390_fac_size(); i++) {
2700                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2701                                               (kvm_s390_fac_base[i] |
2702                                                kvm_s390_fac_ext[i]);
2703                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2704                                               kvm_s390_fac_base[i];
2705         }
2706         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2707
2708         /* we are always in czam mode - even on pre z14 machines */
2709         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2710         set_kvm_facility(kvm->arch.model.fac_list, 138);
2711         /* we emulate STHYI in kvm */
2712         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2713         set_kvm_facility(kvm->arch.model.fac_list, 74);
2714         if (MACHINE_HAS_TLB_GUEST) {
2715                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2716                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2717         }
2718
2719         if (css_general_characteristics.aiv && test_facility(65))
2720                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2721
2722         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2723         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2724
2725         kvm_s390_crypto_init(kvm);
2726
2727         mutex_init(&kvm->arch.float_int.ais_lock);
2728         spin_lock_init(&kvm->arch.float_int.lock);
2729         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2730                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2731         init_waitqueue_head(&kvm->arch.ipte_wq);
2732         mutex_init(&kvm->arch.ipte_mutex);
2733
2734         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2735         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2736
2737         if (type & KVM_VM_S390_UCONTROL) {
2738                 kvm->arch.gmap = NULL;
2739                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2740         } else {
2741                 if (sclp.hamax == U64_MAX)
2742                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2743                 else
2744                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2745                                                     sclp.hamax + 1);
2746                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2747                 if (!kvm->arch.gmap)
2748                         goto out_err;
2749                 kvm->arch.gmap->private = kvm;
2750                 kvm->arch.gmap->pfault_enabled = 0;
2751         }
2752
2753         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2754         kvm->arch.use_skf = sclp.has_skey;
2755         spin_lock_init(&kvm->arch.start_stop_lock);
2756         kvm_s390_vsie_init(kvm);
2757         if (use_gisa)
2758                 kvm_s390_gisa_init(kvm);
2759         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2760
2761         return 0;
2762 out_err:
2763         free_page((unsigned long)kvm->arch.sie_page2);
2764         debug_unregister(kvm->arch.dbf);
2765         sca_dispose(kvm);
2766         KVM_EVENT(3, "creation of vm failed: %d", rc);
2767         return rc;
2768 }
2769
2770 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2771 {
2772         u16 rc, rrc;
2773
2774         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2775         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2776         kvm_s390_clear_local_irqs(vcpu);
2777         kvm_clear_async_pf_completion_queue(vcpu);
2778         if (!kvm_is_ucontrol(vcpu->kvm))
2779                 sca_del_vcpu(vcpu);
2780
2781         if (kvm_is_ucontrol(vcpu->kvm))
2782                 gmap_remove(vcpu->arch.gmap);
2783
2784         if (vcpu->kvm->arch.use_cmma)
2785                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2786         /* We can not hold the vcpu mutex here, we are already dying */
2787         if (kvm_s390_pv_cpu_get_handle(vcpu))
2788                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2789         free_page((unsigned long)(vcpu->arch.sie_block));
2790 }
2791
2792 static void kvm_free_vcpus(struct kvm *kvm)
2793 {
2794         unsigned int i;
2795         struct kvm_vcpu *vcpu;
2796
2797         kvm_for_each_vcpu(i, vcpu, kvm)
2798                 kvm_vcpu_destroy(vcpu);
2799
2800         mutex_lock(&kvm->lock);
2801         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2802                 kvm->vcpus[i] = NULL;
2803
2804         atomic_set(&kvm->online_vcpus, 0);
2805         mutex_unlock(&kvm->lock);
2806 }
2807
2808 void kvm_arch_destroy_vm(struct kvm *kvm)
2809 {
2810         u16 rc, rrc;
2811
2812         kvm_free_vcpus(kvm);
2813         sca_dispose(kvm);
2814         kvm_s390_gisa_destroy(kvm);
2815         /*
2816          * We are already at the end of life and kvm->lock is not taken.
2817          * This is ok as the file descriptor is closed by now and nobody
2818          * can mess with the pv state. To avoid lockdep_assert_held from
2819          * complaining we do not use kvm_s390_pv_is_protected.
2820          */
2821         if (kvm_s390_pv_get_handle(kvm))
2822                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2823         debug_unregister(kvm->arch.dbf);
2824         free_page((unsigned long)kvm->arch.sie_page2);
2825         if (!kvm_is_ucontrol(kvm))
2826                 gmap_remove(kvm->arch.gmap);
2827         kvm_s390_destroy_adapters(kvm);
2828         kvm_s390_clear_float_irqs(kvm);
2829         kvm_s390_vsie_destroy(kvm);
2830         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2831 }
2832
2833 /* Section: vcpu related */
2834 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2835 {
2836         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2837         if (!vcpu->arch.gmap)
2838                 return -ENOMEM;
2839         vcpu->arch.gmap->private = vcpu->kvm;
2840
2841         return 0;
2842 }
2843
2844 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2845 {
2846         if (!kvm_s390_use_sca_entries())
2847                 return;
2848         read_lock(&vcpu->kvm->arch.sca_lock);
2849         if (vcpu->kvm->arch.use_esca) {
2850                 struct esca_block *sca = vcpu->kvm->arch.sca;
2851
2852                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2853                 sca->cpu[vcpu->vcpu_id].sda = 0;
2854         } else {
2855                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2856
2857                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2858                 sca->cpu[vcpu->vcpu_id].sda = 0;
2859         }
2860         read_unlock(&vcpu->kvm->arch.sca_lock);
2861 }
2862
2863 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2864 {
2865         if (!kvm_s390_use_sca_entries()) {
2866                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2867
2868                 /* we still need the basic sca for the ipte control */
2869                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2870                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2871                 return;
2872         }
2873         read_lock(&vcpu->kvm->arch.sca_lock);
2874         if (vcpu->kvm->arch.use_esca) {
2875                 struct esca_block *sca = vcpu->kvm->arch.sca;
2876
2877                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2878                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2879                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2880                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2881                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2882         } else {
2883                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2884
2885                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2886                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2887                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2888                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2889         }
2890         read_unlock(&vcpu->kvm->arch.sca_lock);
2891 }
2892
2893 /* Basic SCA to Extended SCA data copy routines */
2894 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2895 {
2896         d->sda = s->sda;
2897         d->sigp_ctrl.c = s->sigp_ctrl.c;
2898         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2899 }
2900
2901 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2902 {
2903         int i;
2904
2905         d->ipte_control = s->ipte_control;
2906         d->mcn[0] = s->mcn;
2907         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2908                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2909 }
2910
2911 static int sca_switch_to_extended(struct kvm *kvm)
2912 {
2913         struct bsca_block *old_sca = kvm->arch.sca;
2914         struct esca_block *new_sca;
2915         struct kvm_vcpu *vcpu;
2916         unsigned int vcpu_idx;
2917         u32 scaol, scaoh;
2918
2919         if (kvm->arch.use_esca)
2920                 return 0;
2921
2922         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2923         if (!new_sca)
2924                 return -ENOMEM;
2925
2926         scaoh = (u32)((u64)(new_sca) >> 32);
2927         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2928
2929         kvm_s390_vcpu_block_all(kvm);
2930         write_lock(&kvm->arch.sca_lock);
2931
2932         sca_copy_b_to_e(new_sca, old_sca);
2933
2934         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2935                 vcpu->arch.sie_block->scaoh = scaoh;
2936                 vcpu->arch.sie_block->scaol = scaol;
2937                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2938         }
2939         kvm->arch.sca = new_sca;
2940         kvm->arch.use_esca = 1;
2941
2942         write_unlock(&kvm->arch.sca_lock);
2943         kvm_s390_vcpu_unblock_all(kvm);
2944
2945         free_page((unsigned long)old_sca);
2946
2947         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2948                  old_sca, kvm->arch.sca);
2949         return 0;
2950 }
2951
2952 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2953 {
2954         int rc;
2955
2956         if (!kvm_s390_use_sca_entries()) {
2957                 if (id < KVM_MAX_VCPUS)
2958                         return true;
2959                 return false;
2960         }
2961         if (id < KVM_S390_BSCA_CPU_SLOTS)
2962                 return true;
2963         if (!sclp.has_esca || !sclp.has_64bscao)
2964                 return false;
2965
2966         mutex_lock(&kvm->lock);
2967         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2968         mutex_unlock(&kvm->lock);
2969
2970         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2971 }
2972
2973 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2974 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975 {
2976         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2977         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2978         vcpu->arch.cputm_start = get_tod_clock_fast();
2979         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2980 }
2981
2982 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2983 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2984 {
2985         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2986         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2987         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2988         vcpu->arch.cputm_start = 0;
2989         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2990 }
2991
2992 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2993 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2994 {
2995         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2996         vcpu->arch.cputm_enabled = true;
2997         __start_cpu_timer_accounting(vcpu);
2998 }
2999
3000 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3001 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3002 {
3003         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3004         __stop_cpu_timer_accounting(vcpu);
3005         vcpu->arch.cputm_enabled = false;
3006 }
3007
3008 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3009 {
3010         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3011         __enable_cpu_timer_accounting(vcpu);
3012         preempt_enable();
3013 }
3014
3015 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3016 {
3017         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3018         __disable_cpu_timer_accounting(vcpu);
3019         preempt_enable();
3020 }
3021
3022 /* set the cpu timer - may only be called from the VCPU thread itself */
3023 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3024 {
3025         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3026         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3027         if (vcpu->arch.cputm_enabled)
3028                 vcpu->arch.cputm_start = get_tod_clock_fast();
3029         vcpu->arch.sie_block->cputm = cputm;
3030         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3031         preempt_enable();
3032 }
3033
3034 /* update and get the cpu timer - can also be called from other VCPU threads */
3035 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3036 {
3037         unsigned int seq;
3038         __u64 value;
3039
3040         if (unlikely(!vcpu->arch.cputm_enabled))
3041                 return vcpu->arch.sie_block->cputm;
3042
3043         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3044         do {
3045                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3046                 /*
3047                  * If the writer would ever execute a read in the critical
3048                  * section, e.g. in irq context, we have a deadlock.
3049                  */
3050                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3051                 value = vcpu->arch.sie_block->cputm;
3052                 /* if cputm_start is 0, accounting is being started/stopped */
3053                 if (likely(vcpu->arch.cputm_start))
3054                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3055         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3056         preempt_enable();
3057         return value;
3058 }
3059
3060 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3061 {
3062
3063         gmap_enable(vcpu->arch.enabled_gmap);
3064         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3065         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3066                 __start_cpu_timer_accounting(vcpu);
3067         vcpu->cpu = cpu;
3068 }
3069
3070 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3071 {
3072         vcpu->cpu = -1;
3073         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3074                 __stop_cpu_timer_accounting(vcpu);
3075         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3076         vcpu->arch.enabled_gmap = gmap_get_enabled();
3077         gmap_disable(vcpu->arch.enabled_gmap);
3078
3079 }
3080
3081 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3082 {
3083         mutex_lock(&vcpu->kvm->lock);
3084         preempt_disable();
3085         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3086         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3087         preempt_enable();
3088         mutex_unlock(&vcpu->kvm->lock);
3089         if (!kvm_is_ucontrol(vcpu->kvm)) {
3090                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3091                 sca_add_vcpu(vcpu);
3092         }
3093         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3094                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3095         /* make vcpu_load load the right gmap on the first trigger */
3096         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3097 }
3098
3099 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3100 {
3101         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3102             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3103                 return true;
3104         return false;
3105 }
3106
3107 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3108 {
3109         /* At least one ECC subfunction must be present */
3110         return kvm_has_pckmo_subfunc(kvm, 32) ||
3111                kvm_has_pckmo_subfunc(kvm, 33) ||
3112                kvm_has_pckmo_subfunc(kvm, 34) ||
3113                kvm_has_pckmo_subfunc(kvm, 40) ||
3114                kvm_has_pckmo_subfunc(kvm, 41);
3115
3116 }
3117
3118 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3119 {
3120         /*
3121          * If the AP instructions are not being interpreted and the MSAX3
3122          * facility is not configured for the guest, there is nothing to set up.
3123          */
3124         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3125                 return;
3126
3127         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3128         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3129         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3130         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3131
3132         if (vcpu->kvm->arch.crypto.apie)
3133                 vcpu->arch.sie_block->eca |= ECA_APIE;
3134
3135         /* Set up protected key support */
3136         if (vcpu->kvm->arch.crypto.aes_kw) {
3137                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3138                 /* ecc is also wrapped with AES key */
3139                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3140                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3141         }
3142
3143         if (vcpu->kvm->arch.crypto.dea_kw)
3144                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3145 }
3146
3147 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3148 {
3149         free_page(vcpu->arch.sie_block->cbrlo);
3150         vcpu->arch.sie_block->cbrlo = 0;
3151 }
3152
3153 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3154 {
3155         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3156         if (!vcpu->arch.sie_block->cbrlo)
3157                 return -ENOMEM;
3158         return 0;
3159 }
3160
3161 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3162 {
3163         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3164
3165         vcpu->arch.sie_block->ibc = model->ibc;
3166         if (test_kvm_facility(vcpu->kvm, 7))
3167                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3168 }
3169
3170 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3171 {
3172         int rc = 0;
3173         u16 uvrc, uvrrc;
3174
3175         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3176                                                     CPUSTAT_SM |
3177                                                     CPUSTAT_STOPPED);
3178
3179         if (test_kvm_facility(vcpu->kvm, 78))
3180                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3181         else if (test_kvm_facility(vcpu->kvm, 8))
3182                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3183
3184         kvm_s390_vcpu_setup_model(vcpu);
3185
3186         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3187         if (MACHINE_HAS_ESOP)
3188                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3189         if (test_kvm_facility(vcpu->kvm, 9))
3190                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3191         if (test_kvm_facility(vcpu->kvm, 73))
3192                 vcpu->arch.sie_block->ecb |= ECB_TE;
3193
3194         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3195                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3196         if (test_kvm_facility(vcpu->kvm, 130))
3197                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3198         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3199         if (sclp.has_cei)
3200                 vcpu->arch.sie_block->eca |= ECA_CEI;
3201         if (sclp.has_ib)
3202                 vcpu->arch.sie_block->eca |= ECA_IB;
3203         if (sclp.has_siif)
3204                 vcpu->arch.sie_block->eca |= ECA_SII;
3205         if (sclp.has_sigpif)
3206                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3207         if (test_kvm_facility(vcpu->kvm, 129)) {
3208                 vcpu->arch.sie_block->eca |= ECA_VX;
3209                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3210         }
3211         if (test_kvm_facility(vcpu->kvm, 139))
3212                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3213         if (test_kvm_facility(vcpu->kvm, 156))
3214                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3215         if (vcpu->arch.sie_block->gd) {
3216                 vcpu->arch.sie_block->eca |= ECA_AIV;
3217                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3218                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3219         }
3220         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3221                                         | SDNXC;
3222         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3223
3224         if (sclp.has_kss)
3225                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3226         else
3227                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3228
3229         if (vcpu->kvm->arch.use_cmma) {
3230                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3231                 if (rc)
3232                         return rc;
3233         }
3234         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3235         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3236
3237         vcpu->arch.sie_block->hpid = HPID_KVM;
3238
3239         kvm_s390_vcpu_crypto_setup(vcpu);
3240
3241         mutex_lock(&vcpu->kvm->lock);
3242         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3243                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3244                 if (rc)
3245                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3246         }
3247         mutex_unlock(&vcpu->kvm->lock);
3248
3249         return rc;
3250 }
3251
3252 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3253 {
3254         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3255                 return -EINVAL;
3256         return 0;
3257 }
3258
3259 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3260 {
3261         struct sie_page *sie_page;
3262         int rc;
3263
3264         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3265         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3266         if (!sie_page)
3267                 return -ENOMEM;
3268
3269         vcpu->arch.sie_block = &sie_page->sie_block;
3270         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3271
3272         /* the real guest size will always be smaller than msl */
3273         vcpu->arch.sie_block->mso = 0;
3274         vcpu->arch.sie_block->msl = sclp.hamax;
3275
3276         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3277         spin_lock_init(&vcpu->arch.local_int.lock);
3278         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3279         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3280                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3281         seqcount_init(&vcpu->arch.cputm_seqcount);
3282
3283         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3284         kvm_clear_async_pf_completion_queue(vcpu);
3285         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3286                                     KVM_SYNC_GPRS |
3287                                     KVM_SYNC_ACRS |
3288                                     KVM_SYNC_CRS |
3289                                     KVM_SYNC_ARCH0 |
3290                                     KVM_SYNC_PFAULT |
3291                                     KVM_SYNC_DIAG318;
3292         kvm_s390_set_prefix(vcpu, 0);
3293         if (test_kvm_facility(vcpu->kvm, 64))
3294                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3295         if (test_kvm_facility(vcpu->kvm, 82))
3296                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3297         if (test_kvm_facility(vcpu->kvm, 133))
3298                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3299         if (test_kvm_facility(vcpu->kvm, 156))
3300                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3301         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3302          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3303          */
3304         if (MACHINE_HAS_VX)
3305                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3306         else
3307                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3308
3309         if (kvm_is_ucontrol(vcpu->kvm)) {
3310                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3311                 if (rc)
3312                         goto out_free_sie_block;
3313         }
3314
3315         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3316                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3317         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3318
3319         rc = kvm_s390_vcpu_setup(vcpu);
3320         if (rc)
3321                 goto out_ucontrol_uninit;
3322         return 0;
3323
3324 out_ucontrol_uninit:
3325         if (kvm_is_ucontrol(vcpu->kvm))
3326                 gmap_remove(vcpu->arch.gmap);
3327 out_free_sie_block:
3328         free_page((unsigned long)(vcpu->arch.sie_block));
3329         return rc;
3330 }
3331
3332 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3333 {
3334         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3335         return kvm_s390_vcpu_has_irq(vcpu, 0);
3336 }
3337
3338 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3339 {
3340         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3341 }
3342
3343 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3344 {
3345         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3346         exit_sie(vcpu);
3347 }
3348
3349 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3350 {
3351         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3352 }
3353
3354 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3355 {
3356         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3357         exit_sie(vcpu);
3358 }
3359
3360 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3361 {
3362         return atomic_read(&vcpu->arch.sie_block->prog20) &
3363                (PROG_BLOCK_SIE | PROG_REQUEST);
3364 }
3365
3366 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3367 {
3368         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3369 }
3370
3371 /*
3372  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3373  * If the CPU is not running (e.g. waiting as idle) the function will
3374  * return immediately. */
3375 void exit_sie(struct kvm_vcpu *vcpu)
3376 {
3377         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3378         kvm_s390_vsie_kick(vcpu);
3379         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3380                 cpu_relax();
3381 }
3382
3383 /* Kick a guest cpu out of SIE to process a request synchronously */
3384 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3385 {
3386         kvm_make_request(req, vcpu);
3387         kvm_s390_vcpu_request(vcpu);
3388 }
3389
3390 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3391                               unsigned long end)
3392 {
3393         struct kvm *kvm = gmap->private;
3394         struct kvm_vcpu *vcpu;
3395         unsigned long prefix;
3396         int i;
3397
3398         if (gmap_is_shadow(gmap))
3399                 return;
3400         if (start >= 1UL << 31)
3401                 /* We are only interested in prefix pages */
3402                 return;
3403         kvm_for_each_vcpu(i, vcpu, kvm) {
3404                 /* match against both prefix pages */
3405                 prefix = kvm_s390_get_prefix(vcpu);
3406                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3407                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3408                                    start, end);
3409                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3410                 }
3411         }
3412 }
3413
3414 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3415 {
3416         /* do not poll with more than halt_poll_max_steal percent of steal time */
3417         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3418             halt_poll_max_steal) {
3419                 vcpu->stat.halt_no_poll_steal++;
3420                 return true;
3421         }
3422         return false;
3423 }
3424
3425 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3426 {
3427         /* kvm common code refers to this, but never calls it */
3428         BUG();
3429         return 0;
3430 }
3431
3432 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3433                                            struct kvm_one_reg *reg)
3434 {
3435         int r = -EINVAL;
3436
3437         switch (reg->id) {
3438         case KVM_REG_S390_TODPR:
3439                 r = put_user(vcpu->arch.sie_block->todpr,
3440                              (u32 __user *)reg->addr);
3441                 break;
3442         case KVM_REG_S390_EPOCHDIFF:
3443                 r = put_user(vcpu->arch.sie_block->epoch,
3444                              (u64 __user *)reg->addr);
3445                 break;
3446         case KVM_REG_S390_CPU_TIMER:
3447                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3448                              (u64 __user *)reg->addr);
3449                 break;
3450         case KVM_REG_S390_CLOCK_COMP:
3451                 r = put_user(vcpu->arch.sie_block->ckc,
3452                              (u64 __user *)reg->addr);
3453                 break;
3454         case KVM_REG_S390_PFTOKEN:
3455                 r = put_user(vcpu->arch.pfault_token,
3456                              (u64 __user *)reg->addr);
3457                 break;
3458         case KVM_REG_S390_PFCOMPARE:
3459                 r = put_user(vcpu->arch.pfault_compare,
3460                              (u64 __user *)reg->addr);
3461                 break;
3462         case KVM_REG_S390_PFSELECT:
3463                 r = put_user(vcpu->arch.pfault_select,
3464                              (u64 __user *)reg->addr);
3465                 break;
3466         case KVM_REG_S390_PP:
3467                 r = put_user(vcpu->arch.sie_block->pp,
3468                              (u64 __user *)reg->addr);
3469                 break;
3470         case KVM_REG_S390_GBEA:
3471                 r = put_user(vcpu->arch.sie_block->gbea,
3472                              (u64 __user *)reg->addr);
3473                 break;
3474         default:
3475                 break;
3476         }
3477
3478         return r;
3479 }
3480
3481 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3482                                            struct kvm_one_reg *reg)
3483 {
3484         int r = -EINVAL;
3485         __u64 val;
3486
3487         switch (reg->id) {
3488         case KVM_REG_S390_TODPR:
3489                 r = get_user(vcpu->arch.sie_block->todpr,
3490                              (u32 __user *)reg->addr);
3491                 break;
3492         case KVM_REG_S390_EPOCHDIFF:
3493                 r = get_user(vcpu->arch.sie_block->epoch,
3494                              (u64 __user *)reg->addr);
3495                 break;
3496         case KVM_REG_S390_CPU_TIMER:
3497                 r = get_user(val, (u64 __user *)reg->addr);
3498                 if (!r)
3499                         kvm_s390_set_cpu_timer(vcpu, val);
3500                 break;
3501         case KVM_REG_S390_CLOCK_COMP:
3502                 r = get_user(vcpu->arch.sie_block->ckc,
3503                              (u64 __user *)reg->addr);
3504                 break;
3505         case KVM_REG_S390_PFTOKEN:
3506                 r = get_user(vcpu->arch.pfault_token,
3507                              (u64 __user *)reg->addr);
3508                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3509                         kvm_clear_async_pf_completion_queue(vcpu);
3510                 break;
3511         case KVM_REG_S390_PFCOMPARE:
3512                 r = get_user(vcpu->arch.pfault_compare,
3513                              (u64 __user *)reg->addr);
3514                 break;
3515         case KVM_REG_S390_PFSELECT:
3516                 r = get_user(vcpu->arch.pfault_select,
3517                              (u64 __user *)reg->addr);
3518                 break;
3519         case KVM_REG_S390_PP:
3520                 r = get_user(vcpu->arch.sie_block->pp,
3521                              (u64 __user *)reg->addr);
3522                 break;
3523         case KVM_REG_S390_GBEA:
3524                 r = get_user(vcpu->arch.sie_block->gbea,
3525                              (u64 __user *)reg->addr);
3526                 break;
3527         default:
3528                 break;
3529         }
3530
3531         return r;
3532 }
3533
3534 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3535 {
3536         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3537         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3538         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3539
3540         kvm_clear_async_pf_completion_queue(vcpu);
3541         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3542                 kvm_s390_vcpu_stop(vcpu);
3543         kvm_s390_clear_local_irqs(vcpu);
3544 }
3545
3546 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3547 {
3548         /* Initial reset is a superset of the normal reset */
3549         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3550
3551         /*
3552          * This equals initial cpu reset in pop, but we don't switch to ESA.
3553          * We do not only reset the internal data, but also ...
3554          */
3555         vcpu->arch.sie_block->gpsw.mask = 0;
3556         vcpu->arch.sie_block->gpsw.addr = 0;
3557         kvm_s390_set_prefix(vcpu, 0);
3558         kvm_s390_set_cpu_timer(vcpu, 0);
3559         vcpu->arch.sie_block->ckc = 0;
3560         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3561         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3562         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3563
3564         /* ... the data in sync regs */
3565         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3566         vcpu->run->s.regs.ckc = 0;
3567         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3568         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3569         vcpu->run->psw_addr = 0;
3570         vcpu->run->psw_mask = 0;
3571         vcpu->run->s.regs.todpr = 0;
3572         vcpu->run->s.regs.cputm = 0;
3573         vcpu->run->s.regs.ckc = 0;
3574         vcpu->run->s.regs.pp = 0;
3575         vcpu->run->s.regs.gbea = 1;
3576         vcpu->run->s.regs.fpc = 0;
3577         /*
3578          * Do not reset these registers in the protected case, as some of
3579          * them are overlayed and they are not accessible in this case
3580          * anyway.
3581          */
3582         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3583                 vcpu->arch.sie_block->gbea = 1;
3584                 vcpu->arch.sie_block->pp = 0;
3585                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3586                 vcpu->arch.sie_block->todpr = 0;
3587         }
3588 }
3589
3590 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3591 {
3592         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3593
3594         /* Clear reset is a superset of the initial reset */
3595         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3596
3597         memset(&regs->gprs, 0, sizeof(regs->gprs));
3598         memset(&regs->vrs, 0, sizeof(regs->vrs));
3599         memset(&regs->acrs, 0, sizeof(regs->acrs));
3600         memset(&regs->gscb, 0, sizeof(regs->gscb));
3601
3602         regs->etoken = 0;
3603         regs->etoken_extension = 0;
3604 }
3605
3606 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3607 {
3608         vcpu_load(vcpu);
3609         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3610         vcpu_put(vcpu);
3611         return 0;
3612 }
3613
3614 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3615 {
3616         vcpu_load(vcpu);
3617         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3618         vcpu_put(vcpu);
3619         return 0;
3620 }
3621
3622 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3623                                   struct kvm_sregs *sregs)
3624 {
3625         vcpu_load(vcpu);
3626
3627         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3628         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3629
3630         vcpu_put(vcpu);
3631         return 0;
3632 }
3633
3634 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3635                                   struct kvm_sregs *sregs)
3636 {
3637         vcpu_load(vcpu);
3638
3639         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3640         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3641
3642         vcpu_put(vcpu);
3643         return 0;
3644 }
3645
3646 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3647 {
3648         int ret = 0;
3649
3650         vcpu_load(vcpu);
3651
3652         vcpu->run->s.regs.fpc = fpu->fpc;
3653         if (MACHINE_HAS_VX)
3654                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3655                                  (freg_t *) fpu->fprs);
3656         else
3657                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3658
3659         vcpu_put(vcpu);
3660         return ret;
3661 }
3662
3663 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3664 {
3665         vcpu_load(vcpu);
3666
3667         /* make sure we have the latest values */
3668         save_fpu_regs();
3669         if (MACHINE_HAS_VX)
3670                 convert_vx_to_fp((freg_t *) fpu->fprs,
3671                                  (__vector128 *) vcpu->run->s.regs.vrs);
3672         else
3673                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3674         fpu->fpc = vcpu->run->s.regs.fpc;
3675
3676         vcpu_put(vcpu);
3677         return 0;
3678 }
3679
3680 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3681 {
3682         int rc = 0;
3683
3684         if (!is_vcpu_stopped(vcpu))
3685                 rc = -EBUSY;
3686         else {
3687                 vcpu->run->psw_mask = psw.mask;
3688                 vcpu->run->psw_addr = psw.addr;
3689         }
3690         return rc;
3691 }
3692
3693 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3694                                   struct kvm_translation *tr)
3695 {
3696         return -EINVAL; /* not implemented yet */
3697 }
3698
3699 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3700                               KVM_GUESTDBG_USE_HW_BP | \
3701                               KVM_GUESTDBG_ENABLE)
3702
3703 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3704                                         struct kvm_guest_debug *dbg)
3705 {
3706         int rc = 0;
3707
3708         vcpu_load(vcpu);
3709
3710         vcpu->guest_debug = 0;
3711         kvm_s390_clear_bp_data(vcpu);
3712
3713         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3714                 rc = -EINVAL;
3715                 goto out;
3716         }
3717         if (!sclp.has_gpere) {
3718                 rc = -EINVAL;
3719                 goto out;
3720         }
3721
3722         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3723                 vcpu->guest_debug = dbg->control;
3724                 /* enforce guest PER */
3725                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3726
3727                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3728                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3729         } else {
3730                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3731                 vcpu->arch.guestdbg.last_bp = 0;
3732         }
3733
3734         if (rc) {
3735                 vcpu->guest_debug = 0;
3736                 kvm_s390_clear_bp_data(vcpu);
3737                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3738         }
3739
3740 out:
3741         vcpu_put(vcpu);
3742         return rc;
3743 }
3744
3745 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3746                                     struct kvm_mp_state *mp_state)
3747 {
3748         int ret;
3749
3750         vcpu_load(vcpu);
3751
3752         /* CHECK_STOP and LOAD are not supported yet */
3753         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3754                                       KVM_MP_STATE_OPERATING;
3755
3756         vcpu_put(vcpu);
3757         return ret;
3758 }
3759
3760 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3761                                     struct kvm_mp_state *mp_state)
3762 {
3763         int rc = 0;
3764
3765         vcpu_load(vcpu);
3766
3767         /* user space knows about this interface - let it control the state */
3768         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3769
3770         switch (mp_state->mp_state) {
3771         case KVM_MP_STATE_STOPPED:
3772                 rc = kvm_s390_vcpu_stop(vcpu);
3773                 break;
3774         case KVM_MP_STATE_OPERATING:
3775                 rc = kvm_s390_vcpu_start(vcpu);
3776                 break;
3777         case KVM_MP_STATE_LOAD:
3778                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3779                         rc = -ENXIO;
3780                         break;
3781                 }
3782                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3783                 break;
3784         case KVM_MP_STATE_CHECK_STOP:
3785                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3786         default:
3787                 rc = -ENXIO;
3788         }
3789
3790         vcpu_put(vcpu);
3791         return rc;
3792 }
3793
3794 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3795 {
3796         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3797 }
3798
3799 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3800 {
3801 retry:
3802         kvm_s390_vcpu_request_handled(vcpu);
3803         if (!kvm_request_pending(vcpu))
3804                 return 0;
3805         /*
3806          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3807          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3808          * This ensures that the ipte instruction for this request has
3809          * already finished. We might race against a second unmapper that
3810          * wants to set the blocking bit. Lets just retry the request loop.
3811          */
3812         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3813                 int rc;
3814                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3815                                           kvm_s390_get_prefix(vcpu),
3816                                           PAGE_SIZE * 2, PROT_WRITE);
3817                 if (rc) {
3818                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3819                         return rc;
3820                 }
3821                 goto retry;
3822         }
3823
3824         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3825                 vcpu->arch.sie_block->ihcpu = 0xffff;
3826                 goto retry;
3827         }
3828
3829         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3830                 if (!ibs_enabled(vcpu)) {
3831                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3832                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3833                 }
3834                 goto retry;
3835         }
3836
3837         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3838                 if (ibs_enabled(vcpu)) {
3839                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3840                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3841                 }
3842                 goto retry;
3843         }
3844
3845         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3846                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3847                 goto retry;
3848         }
3849
3850         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3851                 /*
3852                  * Disable CMM virtualization; we will emulate the ESSA
3853                  * instruction manually, in order to provide additional
3854                  * functionalities needed for live migration.
3855                  */
3856                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3857                 goto retry;
3858         }
3859
3860         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3861                 /*
3862                  * Re-enable CMM virtualization if CMMA is available and
3863                  * CMM has been used.
3864                  */
3865                 if ((vcpu->kvm->arch.use_cmma) &&
3866                     (vcpu->kvm->mm->context.uses_cmm))
3867                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3868                 goto retry;
3869         }
3870
3871         /* nothing to do, just clear the request */
3872         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3873         /* we left the vsie handler, nothing to do, just clear the request */
3874         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3875
3876         return 0;
3877 }
3878
3879 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3880 {
3881         struct kvm_vcpu *vcpu;
3882         struct kvm_s390_tod_clock_ext htod;
3883         int i;
3884
3885         preempt_disable();
3886
3887         get_tod_clock_ext((char *)&htod);
3888
3889         kvm->arch.epoch = gtod->tod - htod.tod;
3890         kvm->arch.epdx = 0;
3891         if (test_kvm_facility(kvm, 139)) {
3892                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3893                 if (kvm->arch.epoch > gtod->tod)
3894                         kvm->arch.epdx -= 1;
3895         }
3896
3897         kvm_s390_vcpu_block_all(kvm);
3898         kvm_for_each_vcpu(i, vcpu, kvm) {
3899                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3900                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3901         }
3902
3903         kvm_s390_vcpu_unblock_all(kvm);
3904         preempt_enable();
3905 }
3906
3907 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3908 {
3909         if (!mutex_trylock(&kvm->lock))
3910                 return 0;
3911         __kvm_s390_set_tod_clock(kvm, gtod);
3912         mutex_unlock(&kvm->lock);
3913         return 1;
3914 }
3915
3916 /**
3917  * kvm_arch_fault_in_page - fault-in guest page if necessary
3918  * @vcpu: The corresponding virtual cpu
3919  * @gpa: Guest physical address
3920  * @writable: Whether the page should be writable or not
3921  *
3922  * Make sure that a guest page has been faulted-in on the host.
3923  *
3924  * Return: Zero on success, negative error code otherwise.
3925  */
3926 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3927 {
3928         return gmap_fault(vcpu->arch.gmap, gpa,
3929                           writable ? FAULT_FLAG_WRITE : 0);
3930 }
3931
3932 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3933                                       unsigned long token)
3934 {
3935         struct kvm_s390_interrupt inti;
3936         struct kvm_s390_irq irq;
3937
3938         if (start_token) {
3939                 irq.u.ext.ext_params2 = token;
3940                 irq.type = KVM_S390_INT_PFAULT_INIT;
3941                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3942         } else {
3943                 inti.type = KVM_S390_INT_PFAULT_DONE;
3944                 inti.parm64 = token;
3945                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3946         }
3947 }
3948
3949 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3950                                      struct kvm_async_pf *work)
3951 {
3952         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3953         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3954
3955         return true;
3956 }
3957
3958 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3959                                  struct kvm_async_pf *work)
3960 {
3961         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3962         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3963 }
3964
3965 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3966                                struct kvm_async_pf *work)
3967 {
3968         /* s390 will always inject the page directly */
3969 }
3970
3971 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3972 {
3973         /*
3974          * s390 will always inject the page directly,
3975          * but we still want check_async_completion to cleanup
3976          */
3977         return true;
3978 }
3979
3980 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3981 {
3982         hva_t hva;
3983         struct kvm_arch_async_pf arch;
3984
3985         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3986                 return false;
3987         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3988             vcpu->arch.pfault_compare)
3989                 return false;
3990         if (psw_extint_disabled(vcpu))
3991                 return false;
3992         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3993                 return false;
3994         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3995                 return false;
3996         if (!vcpu->arch.gmap->pfault_enabled)
3997                 return false;
3998
3999         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4000         hva += current->thread.gmap_addr & ~PAGE_MASK;
4001         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4002                 return false;
4003
4004         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4005 }
4006
4007 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4008 {
4009         int rc, cpuflags;
4010
4011         /*
4012          * On s390 notifications for arriving pages will be delivered directly
4013          * to the guest but the house keeping for completed pfaults is
4014          * handled outside the worker.
4015          */
4016         kvm_check_async_pf_completion(vcpu);
4017
4018         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4019         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4020
4021         if (need_resched())
4022                 schedule();
4023
4024         if (!kvm_is_ucontrol(vcpu->kvm)) {
4025                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4026                 if (rc)
4027                         return rc;
4028         }
4029
4030         rc = kvm_s390_handle_requests(vcpu);
4031         if (rc)
4032                 return rc;
4033
4034         if (guestdbg_enabled(vcpu)) {
4035                 kvm_s390_backup_guest_per_regs(vcpu);
4036                 kvm_s390_patch_guest_per_regs(vcpu);
4037         }
4038
4039         clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
4040
4041         vcpu->arch.sie_block->icptcode = 0;
4042         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4043         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4044         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4045
4046         return 0;
4047 }
4048
4049 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4050 {
4051         struct kvm_s390_pgm_info pgm_info = {
4052                 .code = PGM_ADDRESSING,
4053         };
4054         u8 opcode, ilen;
4055         int rc;
4056
4057         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4058         trace_kvm_s390_sie_fault(vcpu);
4059
4060         /*
4061          * We want to inject an addressing exception, which is defined as a
4062          * suppressing or terminating exception. However, since we came here
4063          * by a DAT access exception, the PSW still points to the faulting
4064          * instruction since DAT exceptions are nullifying. So we've got
4065          * to look up the current opcode to get the length of the instruction
4066          * to be able to forward the PSW.
4067          */
4068         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4069         ilen = insn_length(opcode);
4070         if (rc < 0) {
4071                 return rc;
4072         } else if (rc) {
4073                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4074                  * Forward by arbitrary ilc, injection will take care of
4075                  * nullification if necessary.
4076                  */
4077                 pgm_info = vcpu->arch.pgm;
4078                 ilen = 4;
4079         }
4080         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4081         kvm_s390_forward_psw(vcpu, ilen);
4082         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4083 }
4084
4085 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4086 {
4087         struct mcck_volatile_info *mcck_info;
4088         struct sie_page *sie_page;
4089
4090         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4091                    vcpu->arch.sie_block->icptcode);
4092         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4093
4094         if (guestdbg_enabled(vcpu))
4095                 kvm_s390_restore_guest_per_regs(vcpu);
4096
4097         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4098         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4099
4100         if (exit_reason == -EINTR) {
4101                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4102                 sie_page = container_of(vcpu->arch.sie_block,
4103                                         struct sie_page, sie_block);
4104                 mcck_info = &sie_page->mcck_info;
4105                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4106                 return 0;
4107         }
4108
4109         if (vcpu->arch.sie_block->icptcode > 0) {
4110                 int rc = kvm_handle_sie_intercept(vcpu);
4111
4112                 if (rc != -EOPNOTSUPP)
4113                         return rc;
4114                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4115                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4116                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4117                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4118                 return -EREMOTE;
4119         } else if (exit_reason != -EFAULT) {
4120                 vcpu->stat.exit_null++;
4121                 return 0;
4122         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4123                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4124                 vcpu->run->s390_ucontrol.trans_exc_code =
4125                                                 current->thread.gmap_addr;
4126                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4127                 return -EREMOTE;
4128         } else if (current->thread.gmap_pfault) {
4129                 trace_kvm_s390_major_guest_pfault(vcpu);
4130                 current->thread.gmap_pfault = 0;
4131                 if (kvm_arch_setup_async_pf(vcpu))
4132                         return 0;
4133                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4134         }
4135         return vcpu_post_run_fault_in_sie(vcpu);
4136 }
4137
4138 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4139 static int __vcpu_run(struct kvm_vcpu *vcpu)
4140 {
4141         int rc, exit_reason;
4142         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4143
4144         /*
4145          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4146          * ning the guest), so that memslots (and other stuff) are protected
4147          */
4148         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4149
4150         do {
4151                 rc = vcpu_pre_run(vcpu);
4152                 if (rc)
4153                         break;
4154
4155                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4156                 /*
4157                  * As PF_VCPU will be used in fault handler, between
4158                  * guest_enter and guest_exit should be no uaccess.
4159                  */
4160                 local_irq_disable();
4161                 guest_enter_irqoff();
4162                 __disable_cpu_timer_accounting(vcpu);
4163                 local_irq_enable();
4164                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4165                         memcpy(sie_page->pv_grregs,
4166                                vcpu->run->s.regs.gprs,
4167                                sizeof(sie_page->pv_grregs));
4168                 }
4169                 exit_reason = sie64a(vcpu->arch.sie_block,
4170                                      vcpu->run->s.regs.gprs);
4171                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4172                         memcpy(vcpu->run->s.regs.gprs,
4173                                sie_page->pv_grregs,
4174                                sizeof(sie_page->pv_grregs));
4175                         /*
4176                          * We're not allowed to inject interrupts on intercepts
4177                          * that leave the guest state in an "in-between" state
4178                          * where the next SIE entry will do a continuation.
4179                          * Fence interrupts in our "internal" PSW.
4180                          */
4181                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4182                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4183                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4184                         }
4185                 }
4186                 local_irq_disable();
4187                 __enable_cpu_timer_accounting(vcpu);
4188                 guest_exit_irqoff();
4189                 local_irq_enable();
4190                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4191
4192                 rc = vcpu_post_run(vcpu, exit_reason);
4193         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4194
4195         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4196         return rc;
4197 }
4198
4199 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4200 {
4201         struct kvm_run *kvm_run = vcpu->run;
4202         struct runtime_instr_cb *riccb;
4203         struct gs_cb *gscb;
4204
4205         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4206         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4207         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4208         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4209         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4210                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4211                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4212                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4213         }
4214         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4215                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4216                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4217                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4218                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4219                         kvm_clear_async_pf_completion_queue(vcpu);
4220         }
4221         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4222                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4223                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4224         }
4225         /*
4226          * If userspace sets the riccb (e.g. after migration) to a valid state,
4227          * we should enable RI here instead of doing the lazy enablement.
4228          */
4229         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4230             test_kvm_facility(vcpu->kvm, 64) &&
4231             riccb->v &&
4232             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4233                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4234                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4235         }
4236         /*
4237          * If userspace sets the gscb (e.g. after migration) to non-zero,
4238          * we should enable GS here instead of doing the lazy enablement.
4239          */
4240         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4241             test_kvm_facility(vcpu->kvm, 133) &&
4242             gscb->gssm &&
4243             !vcpu->arch.gs_enabled) {
4244                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4245                 vcpu->arch.sie_block->ecb |= ECB_GS;
4246                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4247                 vcpu->arch.gs_enabled = 1;
4248         }
4249         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4250             test_kvm_facility(vcpu->kvm, 82)) {
4251                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4252                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4253         }
4254         if (MACHINE_HAS_GS) {
4255                 preempt_disable();
4256                 __ctl_set_bit(2, 4);
4257                 if (current->thread.gs_cb) {
4258                         vcpu->arch.host_gscb = current->thread.gs_cb;
4259                         save_gs_cb(vcpu->arch.host_gscb);
4260                 }
4261                 if (vcpu->arch.gs_enabled) {
4262                         current->thread.gs_cb = (struct gs_cb *)
4263                                                 &vcpu->run->s.regs.gscb;
4264                         restore_gs_cb(current->thread.gs_cb);
4265                 }
4266                 preempt_enable();
4267         }
4268         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4269 }
4270
4271 static void sync_regs(struct kvm_vcpu *vcpu)
4272 {
4273         struct kvm_run *kvm_run = vcpu->run;
4274
4275         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4276                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4277         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4278                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4279                 /* some control register changes require a tlb flush */
4280                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4281         }
4282         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4283                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4284                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4285         }
4286         save_access_regs(vcpu->arch.host_acrs);
4287         restore_access_regs(vcpu->run->s.regs.acrs);
4288         /* save host (userspace) fprs/vrs */
4289         save_fpu_regs();
4290         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4291         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4292         if (MACHINE_HAS_VX)
4293                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4294         else
4295                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4296         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4297         if (test_fp_ctl(current->thread.fpu.fpc))
4298                 /* User space provided an invalid FPC, let's clear it */
4299                 current->thread.fpu.fpc = 0;
4300
4301         /* Sync fmt2 only data */
4302         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4303                 sync_regs_fmt2(vcpu);
4304         } else {
4305                 /*
4306                  * In several places we have to modify our internal view to
4307                  * not do things that are disallowed by the ultravisor. For
4308                  * example we must not inject interrupts after specific exits
4309                  * (e.g. 112 prefix page not secure). We do this by turning
4310                  * off the machine check, external and I/O interrupt bits
4311                  * of our PSW copy. To avoid getting validity intercepts, we
4312                  * do only accept the condition code from userspace.
4313                  */
4314                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4315                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4316                                                    PSW_MASK_CC;
4317         }
4318
4319         kvm_run->kvm_dirty_regs = 0;
4320 }
4321
4322 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4323 {
4324         struct kvm_run *kvm_run = vcpu->run;
4325
4326         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4327         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4328         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4329         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4330         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4331         if (MACHINE_HAS_GS) {
4332                 preempt_disable();
4333                 __ctl_set_bit(2, 4);
4334                 if (vcpu->arch.gs_enabled)
4335                         save_gs_cb(current->thread.gs_cb);
4336                 current->thread.gs_cb = vcpu->arch.host_gscb;
4337                 restore_gs_cb(vcpu->arch.host_gscb);
4338                 if (!vcpu->arch.host_gscb)
4339                         __ctl_clear_bit(2, 4);
4340                 vcpu->arch.host_gscb = NULL;
4341                 preempt_enable();
4342         }
4343         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4344 }
4345
4346 static void store_regs(struct kvm_vcpu *vcpu)
4347 {
4348         struct kvm_run *kvm_run = vcpu->run;
4349
4350         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4351         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4352         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4353         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4354         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4355         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4356         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4357         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4358         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4359         save_access_regs(vcpu->run->s.regs.acrs);
4360         restore_access_regs(vcpu->arch.host_acrs);
4361         /* Save guest register state */
4362         save_fpu_regs();
4363         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4364         /* Restore will be done lazily at return */
4365         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4366         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4367         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4368                 store_regs_fmt2(vcpu);
4369 }
4370
4371 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4372 {
4373         struct kvm_run *kvm_run = vcpu->run;
4374         int rc;
4375
4376         if (kvm_run->immediate_exit)
4377                 return -EINTR;
4378
4379         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4380             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4381                 return -EINVAL;
4382
4383         vcpu_load(vcpu);
4384
4385         if (guestdbg_exit_pending(vcpu)) {
4386                 kvm_s390_prepare_debug_exit(vcpu);
4387                 rc = 0;
4388                 goto out;
4389         }
4390
4391         kvm_sigset_activate(vcpu);
4392
4393         /*
4394          * no need to check the return value of vcpu_start as it can only have
4395          * an error for protvirt, but protvirt means user cpu state
4396          */
4397         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4398                 kvm_s390_vcpu_start(vcpu);
4399         } else if (is_vcpu_stopped(vcpu)) {
4400                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4401                                    vcpu->vcpu_id);
4402                 rc = -EINVAL;
4403                 goto out;
4404         }
4405
4406         sync_regs(vcpu);
4407         enable_cpu_timer_accounting(vcpu);
4408
4409         might_fault();
4410         rc = __vcpu_run(vcpu);
4411
4412         if (signal_pending(current) && !rc) {
4413                 kvm_run->exit_reason = KVM_EXIT_INTR;
4414                 rc = -EINTR;
4415         }
4416
4417         if (guestdbg_exit_pending(vcpu) && !rc)  {
4418                 kvm_s390_prepare_debug_exit(vcpu);
4419                 rc = 0;
4420         }
4421
4422         if (rc == -EREMOTE) {
4423                 /* userspace support is needed, kvm_run has been prepared */
4424                 rc = 0;
4425         }
4426
4427         disable_cpu_timer_accounting(vcpu);
4428         store_regs(vcpu);
4429
4430         kvm_sigset_deactivate(vcpu);
4431
4432         vcpu->stat.exit_userspace++;
4433 out:
4434         vcpu_put(vcpu);
4435         return rc;
4436 }
4437
4438 /*
4439  * store status at address
4440  * we use have two special cases:
4441  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4442  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4443  */
4444 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4445 {
4446         unsigned char archmode = 1;
4447         freg_t fprs[NUM_FPRS];
4448         unsigned int px;
4449         u64 clkcomp, cputm;
4450         int rc;
4451
4452         px = kvm_s390_get_prefix(vcpu);
4453         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4454                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4455                         return -EFAULT;
4456                 gpa = 0;
4457         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4458                 if (write_guest_real(vcpu, 163, &archmode, 1))
4459                         return -EFAULT;
4460                 gpa = px;
4461         } else
4462                 gpa -= __LC_FPREGS_SAVE_AREA;
4463
4464         /* manually convert vector registers if necessary */
4465         if (MACHINE_HAS_VX) {
4466                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4467                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4468                                      fprs, 128);
4469         } else {
4470                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4471                                      vcpu->run->s.regs.fprs, 128);
4472         }
4473         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4474                               vcpu->run->s.regs.gprs, 128);
4475         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4476                               &vcpu->arch.sie_block->gpsw, 16);
4477         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4478                               &px, 4);
4479         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4480                               &vcpu->run->s.regs.fpc, 4);
4481         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4482                               &vcpu->arch.sie_block->todpr, 4);
4483         cputm = kvm_s390_get_cpu_timer(vcpu);
4484         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4485                               &cputm, 8);
4486         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4487         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4488                               &clkcomp, 8);
4489         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4490                               &vcpu->run->s.regs.acrs, 64);
4491         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4492                               &vcpu->arch.sie_block->gcr, 128);
4493         return rc ? -EFAULT : 0;
4494 }
4495
4496 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4497 {
4498         /*
4499          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4500          * switch in the run ioctl. Let's update our copies before we save
4501          * it into the save area
4502          */
4503         save_fpu_regs();
4504         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4505         save_access_regs(vcpu->run->s.regs.acrs);
4506
4507         return kvm_s390_store_status_unloaded(vcpu, addr);
4508 }
4509
4510 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4511 {
4512         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4513         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4514 }
4515
4516 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4517 {
4518         unsigned int i;
4519         struct kvm_vcpu *vcpu;
4520
4521         kvm_for_each_vcpu(i, vcpu, kvm) {
4522                 __disable_ibs_on_vcpu(vcpu);
4523         }
4524 }
4525
4526 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4527 {
4528         if (!sclp.has_ibs)
4529                 return;
4530         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4531         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4532 }
4533
4534 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4535 {
4536         int i, online_vcpus, r = 0, started_vcpus = 0;
4537
4538         if (!is_vcpu_stopped(vcpu))
4539                 return 0;
4540
4541         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4542         /* Only one cpu at a time may enter/leave the STOPPED state. */
4543         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4544         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4545
4546         /* Let's tell the UV that we want to change into the operating state */
4547         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4548                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4549                 if (r) {
4550                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4551                         return r;
4552                 }
4553         }
4554
4555         for (i = 0; i < online_vcpus; i++) {
4556                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4557                         started_vcpus++;
4558         }
4559
4560         if (started_vcpus == 0) {
4561                 /* we're the only active VCPU -> speed it up */
4562                 __enable_ibs_on_vcpu(vcpu);
4563         } else if (started_vcpus == 1) {
4564                 /*
4565                  * As we are starting a second VCPU, we have to disable
4566                  * the IBS facility on all VCPUs to remove potentially
4567                  * oustanding ENABLE requests.
4568                  */
4569                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4570         }
4571
4572         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4573         /*
4574          * The real PSW might have changed due to a RESTART interpreted by the
4575          * ultravisor. We block all interrupts and let the next sie exit
4576          * refresh our view.
4577          */
4578         if (kvm_s390_pv_cpu_is_protected(vcpu))
4579                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4580         /*
4581          * Another VCPU might have used IBS while we were offline.
4582          * Let's play safe and flush the VCPU at startup.
4583          */
4584         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4585         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4586         return 0;
4587 }
4588
4589 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4590 {
4591         int i, online_vcpus, r = 0, started_vcpus = 0;
4592         struct kvm_vcpu *started_vcpu = NULL;
4593
4594         if (is_vcpu_stopped(vcpu))
4595                 return 0;
4596
4597         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4598         /* Only one cpu at a time may enter/leave the STOPPED state. */
4599         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4600         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4601
4602         /* Let's tell the UV that we want to change into the stopped state */
4603         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4604                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4605                 if (r) {
4606                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4607                         return r;
4608                 }
4609         }
4610
4611         /*
4612          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4613          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4614          * have been fully processed. This will ensure that the VCPU
4615          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4616          */
4617         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4618         kvm_s390_clear_stop_irq(vcpu);
4619
4620         __disable_ibs_on_vcpu(vcpu);
4621
4622         for (i = 0; i < online_vcpus; i++) {
4623                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4624                         started_vcpus++;
4625                         started_vcpu = vcpu->kvm->vcpus[i];
4626                 }
4627         }
4628
4629         if (started_vcpus == 1) {
4630                 /*
4631                  * As we only have one VCPU left, we want to enable the
4632                  * IBS facility for that VCPU to speed it up.
4633                  */
4634                 __enable_ibs_on_vcpu(started_vcpu);
4635         }
4636
4637         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4638         return 0;
4639 }
4640
4641 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4642                                      struct kvm_enable_cap *cap)
4643 {
4644         int r;
4645
4646         if (cap->flags)
4647                 return -EINVAL;
4648
4649         switch (cap->cap) {
4650         case KVM_CAP_S390_CSS_SUPPORT:
4651                 if (!vcpu->kvm->arch.css_support) {
4652                         vcpu->kvm->arch.css_support = 1;
4653                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4654                         trace_kvm_s390_enable_css(vcpu->kvm);
4655                 }
4656                 r = 0;
4657                 break;
4658         default:
4659                 r = -EINVAL;
4660                 break;
4661         }
4662         return r;
4663 }
4664
4665 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4666                                    struct kvm_s390_mem_op *mop)
4667 {
4668         void __user *uaddr = (void __user *)mop->buf;
4669         int r = 0;
4670
4671         if (mop->flags || !mop->size)
4672                 return -EINVAL;
4673         if (mop->size + mop->sida_offset < mop->size)
4674                 return -EINVAL;
4675         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4676                 return -E2BIG;
4677         if (!kvm_s390_pv_cpu_is_protected(vcpu))
4678                 return -EINVAL;
4679
4680         switch (mop->op) {
4681         case KVM_S390_MEMOP_SIDA_READ:
4682                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4683                                  mop->sida_offset), mop->size))
4684                         r = -EFAULT;
4685
4686                 break;
4687         case KVM_S390_MEMOP_SIDA_WRITE:
4688                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4689                                    mop->sida_offset), uaddr, mop->size))
4690                         r = -EFAULT;
4691                 break;
4692         }
4693         return r;
4694 }
4695 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4696                                   struct kvm_s390_mem_op *mop)
4697 {
4698         void __user *uaddr = (void __user *)mop->buf;
4699         void *tmpbuf = NULL;
4700         int r = 0;
4701         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4702                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4703
4704         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4705                 return -EINVAL;
4706
4707         if (mop->size > MEM_OP_MAX_SIZE)
4708                 return -E2BIG;
4709
4710         if (kvm_s390_pv_cpu_is_protected(vcpu))
4711                 return -EINVAL;
4712
4713         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4714                 tmpbuf = vmalloc(mop->size);
4715                 if (!tmpbuf)
4716                         return -ENOMEM;
4717         }
4718
4719         switch (mop->op) {
4720         case KVM_S390_MEMOP_LOGICAL_READ:
4721                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4722                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4723                                             mop->size, GACC_FETCH);
4724                         break;
4725                 }
4726                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4727                 if (r == 0) {
4728                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4729                                 r = -EFAULT;
4730                 }
4731                 break;
4732         case KVM_S390_MEMOP_LOGICAL_WRITE:
4733                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4734                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4735                                             mop->size, GACC_STORE);
4736                         break;
4737                 }
4738                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4739                         r = -EFAULT;
4740                         break;
4741                 }
4742                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4743                 break;
4744         }
4745
4746         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4747                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4748
4749         vfree(tmpbuf);
4750         return r;
4751 }
4752
4753 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4754                                       struct kvm_s390_mem_op *mop)
4755 {
4756         int r, srcu_idx;
4757
4758         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4759
4760         switch (mop->op) {
4761         case KVM_S390_MEMOP_LOGICAL_READ:
4762         case KVM_S390_MEMOP_LOGICAL_WRITE:
4763                 r = kvm_s390_guest_mem_op(vcpu, mop);
4764                 break;
4765         case KVM_S390_MEMOP_SIDA_READ:
4766         case KVM_S390_MEMOP_SIDA_WRITE:
4767                 /* we are locked against sida going away by the vcpu->mutex */
4768                 r = kvm_s390_guest_sida_op(vcpu, mop);
4769                 break;
4770         default:
4771                 r = -EINVAL;
4772         }
4773
4774         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4775         return r;
4776 }
4777
4778 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4779                                unsigned int ioctl, unsigned long arg)
4780 {
4781         struct kvm_vcpu *vcpu = filp->private_data;
4782         void __user *argp = (void __user *)arg;
4783
4784         switch (ioctl) {
4785         case KVM_S390_IRQ: {
4786                 struct kvm_s390_irq s390irq;
4787
4788                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4789                         return -EFAULT;
4790                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4791         }
4792         case KVM_S390_INTERRUPT: {
4793                 struct kvm_s390_interrupt s390int;
4794                 struct kvm_s390_irq s390irq = {};
4795
4796                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4797                         return -EFAULT;
4798                 if (s390int_to_s390irq(&s390int, &s390irq))
4799                         return -EINVAL;
4800                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4801         }
4802         }
4803         return -ENOIOCTLCMD;
4804 }
4805
4806 long kvm_arch_vcpu_ioctl(struct file *filp,
4807                          unsigned int ioctl, unsigned long arg)
4808 {
4809         struct kvm_vcpu *vcpu = filp->private_data;
4810         void __user *argp = (void __user *)arg;
4811         int idx;
4812         long r;
4813         u16 rc, rrc;
4814
4815         vcpu_load(vcpu);
4816
4817         switch (ioctl) {
4818         case KVM_S390_STORE_STATUS:
4819                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4820                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4821                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4822                 break;
4823         case KVM_S390_SET_INITIAL_PSW: {
4824                 psw_t psw;
4825
4826                 r = -EFAULT;
4827                 if (copy_from_user(&psw, argp, sizeof(psw)))
4828                         break;
4829                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4830                 break;
4831         }
4832         case KVM_S390_CLEAR_RESET:
4833                 r = 0;
4834                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4835                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4836                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4837                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4838                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4839                                    rc, rrc);
4840                 }
4841                 break;
4842         case KVM_S390_INITIAL_RESET:
4843                 r = 0;
4844                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4845                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4846                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4847                                           UVC_CMD_CPU_RESET_INITIAL,
4848                                           &rc, &rrc);
4849                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4850                                    rc, rrc);
4851                 }
4852                 break;
4853         case KVM_S390_NORMAL_RESET:
4854                 r = 0;
4855                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4856                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4857                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4858                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4859                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4860                                    rc, rrc);
4861                 }
4862                 break;
4863         case KVM_SET_ONE_REG:
4864         case KVM_GET_ONE_REG: {
4865                 struct kvm_one_reg reg;
4866                 r = -EINVAL;
4867                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4868                         break;
4869                 r = -EFAULT;
4870                 if (copy_from_user(&reg, argp, sizeof(reg)))
4871                         break;
4872                 if (ioctl == KVM_SET_ONE_REG)
4873                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4874                 else
4875                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4876                 break;
4877         }
4878 #ifdef CONFIG_KVM_S390_UCONTROL
4879         case KVM_S390_UCAS_MAP: {
4880                 struct kvm_s390_ucas_mapping ucasmap;
4881
4882                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4883                         r = -EFAULT;
4884                         break;
4885                 }
4886
4887                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4888                         r = -EINVAL;
4889                         break;
4890                 }
4891
4892                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4893                                      ucasmap.vcpu_addr, ucasmap.length);
4894                 break;
4895         }
4896         case KVM_S390_UCAS_UNMAP: {
4897                 struct kvm_s390_ucas_mapping ucasmap;
4898
4899                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4900                         r = -EFAULT;
4901                         break;
4902                 }
4903
4904                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4905                         r = -EINVAL;
4906                         break;
4907                 }
4908
4909                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4910                         ucasmap.length);
4911                 break;
4912         }
4913 #endif
4914         case KVM_S390_VCPU_FAULT: {
4915                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4916                 break;
4917         }
4918         case KVM_ENABLE_CAP:
4919         {
4920                 struct kvm_enable_cap cap;
4921                 r = -EFAULT;
4922                 if (copy_from_user(&cap, argp, sizeof(cap)))
4923                         break;
4924                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4925                 break;
4926         }
4927         case KVM_S390_MEM_OP: {
4928                 struct kvm_s390_mem_op mem_op;
4929
4930                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4931                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4932                 else
4933                         r = -EFAULT;
4934                 break;
4935         }
4936         case KVM_S390_SET_IRQ_STATE: {
4937                 struct kvm_s390_irq_state irq_state;
4938
4939                 r = -EFAULT;
4940                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4941                         break;
4942                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4943                     irq_state.len == 0 ||
4944                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4945                         r = -EINVAL;
4946                         break;
4947                 }
4948                 /* do not use irq_state.flags, it will break old QEMUs */
4949                 r = kvm_s390_set_irq_state(vcpu,
4950                                            (void __user *) irq_state.buf,
4951                                            irq_state.len);
4952                 break;
4953         }
4954         case KVM_S390_GET_IRQ_STATE: {
4955                 struct kvm_s390_irq_state irq_state;
4956
4957                 r = -EFAULT;
4958                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4959                         break;
4960                 if (irq_state.len == 0) {
4961                         r = -EINVAL;
4962                         break;
4963                 }
4964                 /* do not use irq_state.flags, it will break old QEMUs */
4965                 r = kvm_s390_get_irq_state(vcpu,
4966                                            (__u8 __user *)  irq_state.buf,
4967                                            irq_state.len);
4968                 break;
4969         }
4970         default:
4971                 r = -ENOTTY;
4972         }
4973
4974         vcpu_put(vcpu);
4975         return r;
4976 }
4977
4978 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4979 {
4980 #ifdef CONFIG_KVM_S390_UCONTROL
4981         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4982                  && (kvm_is_ucontrol(vcpu->kvm))) {
4983                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4984                 get_page(vmf->page);
4985                 return 0;
4986         }
4987 #endif
4988         return VM_FAULT_SIGBUS;
4989 }
4990
4991 /* Section: memory related */
4992 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4993                                    struct kvm_memory_slot *memslot,
4994                                    const struct kvm_userspace_memory_region *mem,
4995                                    enum kvm_mr_change change)
4996 {
4997         /* A few sanity checks. We can have memory slots which have to be
4998            located/ended at a segment boundary (1MB). The memory in userland is
4999            ok to be fragmented into various different vmas. It is okay to mmap()
5000            and munmap() stuff in this slot after doing this call at any time */
5001
5002         if (mem->userspace_addr & 0xffffful)
5003                 return -EINVAL;
5004
5005         if (mem->memory_size & 0xffffful)
5006                 return -EINVAL;
5007
5008         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5009                 return -EINVAL;
5010
5011         /* When we are protected, we should not change the memory slots */
5012         if (kvm_s390_pv_get_handle(kvm))
5013                 return -EINVAL;
5014
5015         if (!kvm->arch.migration_mode)
5016                 return 0;
5017
5018         /*
5019          * Turn off migration mode when:
5020          * - userspace creates a new memslot with dirty logging off,
5021          * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
5022          *   dirty logging is turned off.
5023          * Migration mode expects dirty page logging being enabled to store
5024          * its dirty bitmap.
5025          */
5026         if (change != KVM_MR_DELETE &&
5027             !(mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
5028                 WARN(kvm_s390_vm_stop_migration(kvm),
5029                      "Failed to stop migration mode");
5030
5031         return 0;
5032 }
5033
5034 void kvm_arch_commit_memory_region(struct kvm *kvm,
5035                                 const struct kvm_userspace_memory_region *mem,
5036                                 struct kvm_memory_slot *old,
5037                                 const struct kvm_memory_slot *new,
5038                                 enum kvm_mr_change change)
5039 {
5040         int rc = 0;
5041
5042         switch (change) {
5043         case KVM_MR_DELETE:
5044                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5045                                         old->npages * PAGE_SIZE);
5046                 break;
5047         case KVM_MR_MOVE:
5048                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5049                                         old->npages * PAGE_SIZE);
5050                 if (rc)
5051                         break;
5052                 fallthrough;
5053         case KVM_MR_CREATE:
5054                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5055                                       mem->guest_phys_addr, mem->memory_size);
5056                 break;
5057         case KVM_MR_FLAGS_ONLY:
5058                 break;
5059         default:
5060                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5061         }
5062         if (rc)
5063                 pr_warn("failed to commit memory region\n");
5064         return;
5065 }
5066
5067 static inline unsigned long nonhyp_mask(int i)
5068 {
5069         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5070
5071         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5072 }
5073
5074 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5075 {
5076         vcpu->valid_wakeup = false;
5077 }
5078
5079 static int __init kvm_s390_init(void)
5080 {
5081         int i;
5082
5083         if (!sclp.has_sief2) {
5084                 pr_info("SIE is not available\n");
5085                 return -ENODEV;
5086         }
5087
5088         if (nested && hpage) {
5089                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5090                 return -EINVAL;
5091         }
5092
5093         for (i = 0; i < 16; i++)
5094                 kvm_s390_fac_base[i] |=
5095                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5096
5097         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5098 }
5099
5100 static void __exit kvm_s390_exit(void)
5101 {
5102         kvm_exit();
5103 }
5104
5105 module_init(kvm_s390_init);
5106 module_exit(kvm_s390_exit);
5107
5108 /*
5109  * Enable autoloading of the kvm module.
5110  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5111  * since x86 takes a different approach.
5112  */
5113 #include <linux/miscdevice.h>
5114 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5115 MODULE_ALIAS("devname:kvm");