kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #include <linux/compiler.h>
  15 #include <linux/err.h>
  16 #include <linux/fs.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/init.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/mman.h>
  22 #include <linux/module.h>
  23 #include <linux/moduleparam.h>
  24 #include <linux/random.h>
  25 #include <linux/slab.h>
  26 #include <linux/timer.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/bitmap.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/string.h>
  31
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include "kvm-s390.h"
  44 #include "gaccess.h"
  45
  46 #define KMSG_COMPONENT "kvm-s390"
  47 #undef pr_fmt
  48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92         { "deliver_program", VCPU_STAT(deliver_program) },
  93         { "deliver_io", VCPU_STAT(deliver_io) },
  94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96         { "inject_ckc", VCPU_STAT(inject_ckc) },
  97         { "inject_cputm", VCPU_STAT(inject_cputm) },
  98         { "inject_external_call", VCPU_STAT(inject_external_call) },
  99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101         { "inject_io", VM_STAT(inject_io) },
 102         { "inject_mchk", VCPU_STAT(inject_mchk) },
 103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104         { "inject_program", VCPU_STAT(inject_program) },
 105         { "inject_restart", VCPU_STAT(inject_restart) },
 106         { "inject_service_signal", VM_STAT(inject_service_signal) },
 107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110         { "inject_virtio", VM_STAT(inject_virtio) },
 111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112         { "instruction_gs", VCPU_STAT(instruction_gs) },
 113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119         { "instruction_sck", VCPU_STAT(instruction_sck) },
 120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121         { "instruction_spx", VCPU_STAT(instruction_spx) },
 122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123         { "instruction_stap", VCPU_STAT(instruction_stap) },
 124         { "instruction_iske", VCPU_STAT(instruction_iske) },
 125         { "instruction_ri", VCPU_STAT(instruction_ri) },
 126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127         { "instruction_sske", VCPU_STAT(instruction_sske) },
 128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129         { "instruction_essa", VCPU_STAT(instruction_essa) },
 130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132         { "instruction_tb", VCPU_STAT(instruction_tb) },
 133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137         { "instruction_sie", VCPU_STAT(instruction_sie) },
 138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161         { NULL }
 162 };
 163
 164 struct kvm_s390_tod_clock_ext {
 165         __u8 epoch_idx;
 166         __u64 tod;
 167         __u8 reserved[7];
 168 } __packed;
 169
 170 /* allow nested virtualization in KVM (if enabled by user space) */
 171 static int nested;
 172 module_param(nested, int, S_IRUGO);
 173 MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 175 /* allow 1m huge page guest backing, if !nested */
 176 static int hpage;
 177 module_param(hpage, int, 0444);
 178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 179
 180 /*
 181  * For now we handle at most 16 double words as this is what the s390 base
 182  * kernel handles and stores in the prefix page. If we ever need to go beyond
 183  * this, this requires changes to code, but the external uapi can stay.
 184  */
 185 #define SIZE_INTERNAL 16
 186
 187 /*
 188  * Base feature mask that defines default mask for facilities. Consists of the
 189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 190  */
 191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 192 /*
 193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 194  * and defines the facilities that can be enabled via a cpu model.
 195  */
 196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 197
 198 static unsigned long kvm_s390_fac_size(void)
 199 {
 200         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 202         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 203                 sizeof(S390_lowcore.stfle_fac_list));
 204
 205         return SIZE_INTERNAL;
 206 }
 207
 208 /* available cpu features supported by kvm */
 209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 210 /* available subfunctions indicated via query / "test bit" */
 211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 212
 213 static struct gmap_notifier gmap_notifier;
 214 static struct gmap_notifier vsie_gmap_notifier;
 215 debug_info_t *kvm_s390_dbf;
 216
 217 /* Section: not file related */
 218 int kvm_arch_hardware_enable(void)
 219 {
 220         /* every s390 is virtualization enabled ;-) */
 221         return 0;
 222 }
 223
 224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 225                               unsigned long end);
 226
 227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 228 {
 229         u8 delta_idx = 0;
 230
 231         /*
 232          * The TOD jumps by delta, we have to compensate this by adding
 233          * -delta to the epoch.
 234          */
 235         delta = -delta;
 236
 237         /* sign-extension - we're adding to signed values below */
 238         if ((s64)delta < 0)
 239                 delta_idx = -1;
 240
 241         scb->epoch += delta;
 242         if (scb->ecd & ECD_MEF) {
 243                 scb->epdx += delta_idx;
 244                 if (scb->epoch < delta)
 245                         scb->epdx += 1;
 246         }
 247 }
 248
 249 /*
 250  * This callback is executed during stop_machine(). All CPUs are therefore
 251  * temporarily stopped. In order not to change guest behavior, we have to
 252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 253  * so a CPU won't be stopped while calculating with the epoch.
 254  */
 255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 256                           void *v)
 257 {
 258         struct kvm *kvm;
 259         struct kvm_vcpu *vcpu;
 260         int i;
 261         unsigned long long *delta = v;
 262
 263         list_for_each_entry(kvm, &vm_list, vm_list) {
 264                 kvm_for_each_vcpu(i, vcpu, kvm) {
 265                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 266                         if (i == 0) {
 267                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 268                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 269                         }
 270                         if (vcpu->arch.cputm_enabled)
 271                                 vcpu->arch.cputm_start += *delta;
 272                         if (vcpu->arch.vsie_block)
 273                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 274                                                    *delta);
 275                 }
 276         }
 277         return NOTIFY_OK;
 278 }
 279
 280 static struct notifier_block kvm_clock_notifier = {
 281         .notifier_call = kvm_clock_sync,
 282 };
 283
 284 int kvm_arch_hardware_setup(void)
 285 {
 286         gmap_notifier.notifier_call = kvm_gmap_notifier;
 287         gmap_register_pte_notifier(&gmap_notifier);
 288         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 289         gmap_register_pte_notifier(&vsie_gmap_notifier);
 290         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 291                                        &kvm_clock_notifier);
 292         return 0;
 293 }
 294
 295 void kvm_arch_hardware_unsetup(void)
 296 {
 297         gmap_unregister_pte_notifier(&gmap_notifier);
 298         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 299         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 300                                          &kvm_clock_notifier);
 301 }
 302
 303 static void allow_cpu_feat(unsigned long nr)
 304 {
 305         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 306 }
 307
 308 static inline int plo_test_bit(unsigned char nr)
 309 {
 310         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 311         int cc;
 312
 313         asm volatile(
 314                 /* Parameter registers are ignored for "test bit" */
 315                 "       plo     0,0,0,0(0)\n"
 316                 "       ipm     %0\n"
 317                 "       srl     %0,28\n"
 318                 : "=d" (cc)
 319                 : "d" (r0)
 320                 : "cc");
 321         return cc == 0;
 322 }
 323
 324 static void kvm_s390_cpu_feat_init(void)
 325 {
 326         int i;
 327
 328         for (i = 0; i < 256; ++i) {
 329                 if (plo_test_bit(i))
 330                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 331         }
 332
 333         if (test_facility(28)) /* TOD-clock steering */
 334                 ptff(kvm_s390_available_subfunc.ptff,
 335                      sizeof(kvm_s390_available_subfunc.ptff),
 336                      PTFF_QAF);
 337
 338         if (test_facility(17)) { /* MSA */
 339                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 340                               kvm_s390_available_subfunc.kmac);
 341                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 342                               kvm_s390_available_subfunc.kmc);
 343                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 344                               kvm_s390_available_subfunc.km);
 345                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 346                               kvm_s390_available_subfunc.kimd);
 347                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 348                               kvm_s390_available_subfunc.klmd);
 349         }
 350         if (test_facility(76)) /* MSA3 */
 351                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 352                               kvm_s390_available_subfunc.pckmo);
 353         if (test_facility(77)) { /* MSA4 */
 354                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 355                               kvm_s390_available_subfunc.kmctr);
 356                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 357                               kvm_s390_available_subfunc.kmf);
 358                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 359                               kvm_s390_available_subfunc.kmo);
 360                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 361                               kvm_s390_available_subfunc.pcc);
 362         }
 363         if (test_facility(57)) /* MSA5 */
 364                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 365                               kvm_s390_available_subfunc.ppno);
 366
 367         if (test_facility(146)) /* MSA8 */
 368                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 369                               kvm_s390_available_subfunc.kma);
 370
 371         if (MACHINE_HAS_ESOP)
 372                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 373         /*
 374          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 375          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 376          */
 377         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 378             !test_facility(3) || !nested)
 379                 return;
 380         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 381         if (sclp.has_64bscao)
 382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 383         if (sclp.has_siif)
 384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 385         if (sclp.has_gpere)
 386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 387         if (sclp.has_gsls)
 388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 389         if (sclp.has_ib)
 390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 391         if (sclp.has_cei)
 392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 393         if (sclp.has_ibs)
 394                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 395         if (sclp.has_kss)
 396                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 397         /*
 398          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 399          * all skey handling functions read/set the skey from the PGSTE
 400          * instead of the real storage key.
 401          *
 402          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 403          * pages being detected as preserved although they are resident.
 404          *
 405          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 406          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 407          *
 408          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 409          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 410          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 411          *
 412          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 413          * cannot easily shadow the SCA because of the ipte lock.
 414          */
 415 }
 416
 417 int kvm_arch_init(void *opaque)
 418 {
 419         int rc;
 420
 421         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 422         if (!kvm_s390_dbf)
 423                 return -ENOMEM;
 424
 425         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 426                 rc = -ENOMEM;
 427                 goto out_debug_unreg;
 428         }
 429
 430         kvm_s390_cpu_feat_init();
 431
 432         /* Register floating interrupt controller interface. */
 433         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 434         if (rc) {
 435                 pr_err("Failed to register FLIC rc=%d\n", rc);
 436                 goto out_debug_unreg;
 437         }
 438         return 0;
 439
 440 out_debug_unreg:
 441         debug_unregister(kvm_s390_dbf);
 442         return rc;
 443 }
 444
 445 void kvm_arch_exit(void)
 446 {
 447         debug_unregister(kvm_s390_dbf);
 448 }
 449
 450 /* Section: device related */
 451 long kvm_arch_dev_ioctl(struct file *filp,
 452                         unsigned int ioctl, unsigned long arg)
 453 {
 454         if (ioctl == KVM_S390_ENABLE_SIE)
 455                 return s390_enable_sie();
 456         return -EINVAL;
 457 }
 458
 459 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 460 {
 461         int r;
 462
 463         switch (ext) {
 464         case KVM_CAP_S390_PSW:
 465         case KVM_CAP_S390_GMAP:
 466         case KVM_CAP_SYNC_MMU:
 467 #ifdef CONFIG_KVM_S390_UCONTROL
 468         case KVM_CAP_S390_UCONTROL:
 469 #endif
 470         case KVM_CAP_ASYNC_PF:
 471         case KVM_CAP_SYNC_REGS:
 472         case KVM_CAP_ONE_REG:
 473         case KVM_CAP_ENABLE_CAP:
 474         case KVM_CAP_S390_CSS_SUPPORT:
 475         case KVM_CAP_IOEVENTFD:
 476         case KVM_CAP_DEVICE_CTRL:
 477         case KVM_CAP_ENABLE_CAP_VM:
 478         case KVM_CAP_S390_IRQCHIP:
 479         case KVM_CAP_VM_ATTRIBUTES:
 480         case KVM_CAP_MP_STATE:
 481         case KVM_CAP_IMMEDIATE_EXIT:
 482         case KVM_CAP_S390_INJECT_IRQ:
 483         case KVM_CAP_S390_USER_SIGP:
 484         case KVM_CAP_S390_USER_STSI:
 485         case KVM_CAP_S390_SKEYS:
 486         case KVM_CAP_S390_IRQ_STATE:
 487         case KVM_CAP_S390_USER_INSTR0:
 488         case KVM_CAP_S390_CMMA_MIGRATION:
 489         case KVM_CAP_S390_AIS:
 490         case KVM_CAP_S390_AIS_MIGRATION:
 491                 r = 1;
 492                 break;
 493         case KVM_CAP_S390_HPAGE_1M:
 494                 r = 0;
 495                 if (hpage && !kvm_is_ucontrol(kvm))
 496                         r = 1;
 497                 break;
 498         case KVM_CAP_S390_MEM_OP:
 499                 r = MEM_OP_MAX_SIZE;
 500                 break;
 501         case KVM_CAP_NR_VCPUS:
 502         case KVM_CAP_MAX_VCPUS:
 503         case KVM_CAP_MAX_VCPU_ID:
 504                 r = KVM_S390_BSCA_CPU_SLOTS;
 505                 if (!kvm_s390_use_sca_entries())
 506                         r = KVM_MAX_VCPUS;
 507                 else if (sclp.has_esca && sclp.has_64bscao)
 508                         r = KVM_S390_ESCA_CPU_SLOTS;
 509                 break;
 510         case KVM_CAP_NR_MEMSLOTS:
 511                 r = KVM_USER_MEM_SLOTS;
 512                 break;
 513         case KVM_CAP_S390_COW:
 514                 r = MACHINE_HAS_ESOP;
 515                 break;
 516         case KVM_CAP_S390_VECTOR_REGISTERS:
 517                 r = MACHINE_HAS_VX;
 518                 break;
 519         case KVM_CAP_S390_RI:
 520                 r = test_facility(64);
 521                 break;
 522         case KVM_CAP_S390_GS:
 523                 r = test_facility(133);
 524                 break;
 525         case KVM_CAP_S390_BPB:
 526                 r = test_facility(82);
 527                 break;
 528         default:
 529                 r = 0;
 530         }
 531         return r;
 532 }
 533
 534 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 535                                     struct kvm_memory_slot *memslot)
 536 {
 537         int i;
 538         gfn_t cur_gfn, last_gfn;
 539         unsigned long gaddr, vmaddr;
 540         struct gmap *gmap = kvm->arch.gmap;
 541         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 542
 543         /* Loop over all guest segments */
 544         cur_gfn = memslot->base_gfn;
 545         last_gfn = memslot->base_gfn + memslot->npages;
 546         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 547                 gaddr = gfn_to_gpa(cur_gfn);
 548                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 549                 if (kvm_is_error_hva(vmaddr))
 550                         continue;
 551
 552                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 553                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 554                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 555                         if (test_bit(i, bitmap))
 556                                 mark_page_dirty(kvm, cur_gfn + i);
 557                 }
 558
 559                 if (fatal_signal_pending(current))
 560                         return;
 561                 cond_resched();
 562         }
 563 }
 564
 565 /* Section: vm related */
 566 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 567
 568 /*
 569  * Get (and clear) the dirty memory log for a memory slot.
 570  */
 571 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 572                                struct kvm_dirty_log *log)
 573 {
 574         int r;
 575         unsigned long n;
 576         struct kvm_memslots *slots;
 577         struct kvm_memory_slot *memslot;
 578         int is_dirty = 0;
 579
 580         if (kvm_is_ucontrol(kvm))
 581                 return -EINVAL;
 582
 583         mutex_lock(&kvm->slots_lock);
 584
 585         r = -EINVAL;
 586         if (log->slot >= KVM_USER_MEM_SLOTS)
 587                 goto out;
 588
 589         slots = kvm_memslots(kvm);
 590         memslot = id_to_memslot(slots, log->slot);
 591         r = -ENOENT;
 592         if (!memslot->dirty_bitmap)
 593                 goto out;
 594
 595         kvm_s390_sync_dirty_log(kvm, memslot);
 596         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 597         if (r)
 598                 goto out;
 599
 600         /* Clear the dirty log */
 601         if (is_dirty) {
 602                 n = kvm_dirty_bitmap_bytes(memslot);
 603                 memset(memslot->dirty_bitmap, 0, n);
 604         }
 605         r = 0;
 606 out:
 607         mutex_unlock(&kvm->slots_lock);
 608         return r;
 609 }
 610
 611 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 612 {
 613         unsigned int i;
 614         struct kvm_vcpu *vcpu;
 615
 616         kvm_for_each_vcpu(i, vcpu, kvm) {
 617                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 618         }
 619 }
 620
 621 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 622 {
 623         int r;
 624
 625         if (cap->flags)
 626                 return -EINVAL;
 627
 628         switch (cap->cap) {
 629         case KVM_CAP_S390_IRQCHIP:
 630                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 631                 kvm->arch.use_irqchip = 1;
 632                 r = 0;
 633                 break;
 634         case KVM_CAP_S390_USER_SIGP:
 635                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 636                 kvm->arch.user_sigp = 1;
 637                 r = 0;
 638                 break;
 639         case KVM_CAP_S390_VECTOR_REGISTERS:
 640                 mutex_lock(&kvm->lock);
 641                 if (kvm->created_vcpus) {
 642                         r = -EBUSY;
 643                 } else if (MACHINE_HAS_VX) {
 644                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 645                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 646                         if (test_facility(134)) {
 647                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 648                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 649                         }
 650                         if (test_facility(135)) {
 651                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 652                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 653                         }
 654                         r = 0;
 655                 } else
 656                         r = -EINVAL;
 657                 mutex_unlock(&kvm->lock);
 658                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 659                          r ? "(not available)" : "(success)");
 660                 break;
 661         case KVM_CAP_S390_RI:
 662                 r = -EINVAL;
 663                 mutex_lock(&kvm->lock);
 664                 if (kvm->created_vcpus) {
 665                         r = -EBUSY;
 666                 } else if (test_facility(64)) {
 667                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 668                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 669                         r = 0;
 670                 }
 671                 mutex_unlock(&kvm->lock);
 672                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 673                          r ? "(not available)" : "(success)");
 674                 break;
 675         case KVM_CAP_S390_AIS:
 676                 mutex_lock(&kvm->lock);
 677                 if (kvm->created_vcpus) {
 678                         r = -EBUSY;
 679                 } else {
 680                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 681                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 682                         r = 0;
 683                 }
 684                 mutex_unlock(&kvm->lock);
 685                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 686                          r ? "(not available)" : "(success)");
 687                 break;
 688         case KVM_CAP_S390_GS:
 689                 r = -EINVAL;
 690                 mutex_lock(&kvm->lock);
 691                 if (kvm->created_vcpus) {
 692                         r = -EBUSY;
 693                 } else if (test_facility(133)) {
 694                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 695                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 696                         r = 0;
 697                 }
 698                 mutex_unlock(&kvm->lock);
 699                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 700                          r ? "(not available)" : "(success)");
 701                 break;
 702         case KVM_CAP_S390_HPAGE_1M:
 703                 mutex_lock(&kvm->lock);
 704                 if (kvm->created_vcpus)
 705                         r = -EBUSY;
 706                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 707                         r = -EINVAL;
 708                 else {
 709                         r = 0;
 710                         down_write(&kvm->mm->mmap_sem);
 711                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 712                         up_write(&kvm->mm->mmap_sem);
 713                         /*
 714                          * We might have to create fake 4k page
 715                          * tables. To avoid that the hardware works on
 716                          * stale PGSTEs, we emulate these instructions.
 717                          */
 718                         kvm->arch.use_skf = 0;
 719                         kvm->arch.use_pfmfi = 0;
 720                 }
 721                 mutex_unlock(&kvm->lock);
 722                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 723                          r ? "(not available)" : "(success)");
 724                 break;
 725         case KVM_CAP_S390_USER_STSI:
 726                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 727                 kvm->arch.user_stsi = 1;
 728                 r = 0;
 729                 break;
 730         case KVM_CAP_S390_USER_INSTR0:
 731                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 732                 kvm->arch.user_instr0 = 1;
 733                 icpt_operexc_on_all_vcpus(kvm);
 734                 r = 0;
 735                 break;
 736         default:
 737                 r = -EINVAL;
 738                 break;
 739         }
 740         return r;
 741 }
 742
 743 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 744 {
 745         int ret;
 746
 747         switch (attr->attr) {
 748         case KVM_S390_VM_MEM_LIMIT_SIZE:
 749                 ret = 0;
 750                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 751                          kvm->arch.mem_limit);
 752                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 753                         ret = -EFAULT;
 754                 break;
 755         default:
 756                 ret = -ENXIO;
 757                 break;
 758         }
 759         return ret;
 760 }
 761
 762 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 763 {
 764         int ret;
 765         unsigned int idx;
 766         switch (attr->attr) {
 767         case KVM_S390_VM_MEM_ENABLE_CMMA:
 768                 ret = -ENXIO;
 769                 if (!sclp.has_cmma)
 770                         break;
 771
 772                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 773                 mutex_lock(&kvm->lock);
 774                 if (kvm->created_vcpus)
 775                         ret = -EBUSY;
 776                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 777                         ret = -EINVAL;
 778                 else {
 779                         kvm->arch.use_cmma = 1;
 780                         /* Not compatible with cmma. */
 781                         kvm->arch.use_pfmfi = 0;
 782                         ret = 0;
 783                 }
 784                 mutex_unlock(&kvm->lock);
 785                 break;
 786         case KVM_S390_VM_MEM_CLR_CMMA:
 787                 ret = -ENXIO;
 788                 if (!sclp.has_cmma)
 789                         break;
 790                 ret = -EINVAL;
 791                 if (!kvm->arch.use_cmma)
 792                         break;
 793
 794                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 795                 mutex_lock(&kvm->lock);
 796                 idx = srcu_read_lock(&kvm->srcu);
 797                 s390_reset_cmma(kvm->arch.gmap->mm);
 798                 srcu_read_unlock(&kvm->srcu, idx);
 799                 mutex_unlock(&kvm->lock);
 800                 ret = 0;
 801                 break;
 802         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 803                 unsigned long new_limit;
 804
 805                 if (kvm_is_ucontrol(kvm))
 806                         return -EINVAL;
 807
 808                 if (get_user(new_limit, (u64 __user *)attr->addr))
 809                         return -EFAULT;
 810
 811                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 812                     new_limit > kvm->arch.mem_limit)
 813                         return -E2BIG;
 814
 815                 if (!new_limit)
 816                         return -EINVAL;
 817
 818                 /* gmap_create takes last usable address */
 819                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 820                         new_limit -= 1;
 821
 822                 ret = -EBUSY;
 823                 mutex_lock(&kvm->lock);
 824                 if (!kvm->created_vcpus) {
 825                         /* gmap_create will round the limit up */
 826                         struct gmap *new = gmap_create(current->mm, new_limit);
 827
 828                         if (!new) {
 829                                 ret = -ENOMEM;
 830                         } else {
 831                                 gmap_remove(kvm->arch.gmap);
 832                                 new->private = kvm;
 833                                 kvm->arch.gmap = new;
 834                                 ret = 0;
 835                         }
 836                 }
 837                 mutex_unlock(&kvm->lock);
 838                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 839                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 840                          (void *) kvm->arch.gmap->asce);
 841                 break;
 842         }
 843         default:
 844                 ret = -ENXIO;
 845                 break;
 846         }
 847         return ret;
 848 }
 849
 850 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 851
 852 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 853 {
 854         struct kvm_vcpu *vcpu;
 855         int i;
 856
 857         kvm_s390_vcpu_block_all(kvm);
 858
 859         kvm_for_each_vcpu(i, vcpu, kvm)
 860                 kvm_s390_vcpu_crypto_setup(vcpu);
 861
 862         kvm_s390_vcpu_unblock_all(kvm);
 863 }
 864
 865 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 866 {
 867         if (!test_kvm_facility(kvm, 76))
 868                 return -EINVAL;
 869
 870         mutex_lock(&kvm->lock);
 871         switch (attr->attr) {
 872         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 873                 get_random_bytes(
 874                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 875                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 876                 kvm->arch.crypto.aes_kw = 1;
 877                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 878                 break;
 879         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 880                 get_random_bytes(
 881                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 882                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 883                 kvm->arch.crypto.dea_kw = 1;
 884                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 885                 break;
 886         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 887                 kvm->arch.crypto.aes_kw = 0;
 888                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 889                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 890                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 891                 break;
 892         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 893                 kvm->arch.crypto.dea_kw = 0;
 894                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 895                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 896                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 897                 break;
 898         default:
 899                 mutex_unlock(&kvm->lock);
 900                 return -ENXIO;
 901         }
 902
 903         kvm_s390_vcpu_crypto_reset_all(kvm);
 904         mutex_unlock(&kvm->lock);
 905         return 0;
 906 }
 907
 908 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 909 {
 910         int cx;
 911         struct kvm_vcpu *vcpu;
 912
 913         kvm_for_each_vcpu(cx, vcpu, kvm)
 914                 kvm_s390_sync_request(req, vcpu);
 915 }
 916
 917 /*
 918  * Must be called with kvm->srcu held to avoid races on memslots, and with
 919  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 920  */
 921 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 922 {
 923         struct kvm_memory_slot *ms;
 924         struct kvm_memslots *slots;
 925         unsigned long ram_pages = 0;
 926         int slotnr;
 927
 928         /* migration mode already enabled */
 929         if (kvm->arch.migration_mode)
 930                 return 0;
 931         slots = kvm_memslots(kvm);
 932         if (!slots || !slots->used_slots)
 933                 return -EINVAL;
 934
 935         if (!kvm->arch.use_cmma) {
 936                 kvm->arch.migration_mode = 1;
 937                 return 0;
 938         }
 939         /* mark all the pages in active slots as dirty */
 940         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 941                 ms = slots->memslots + slotnr;
 942                 if (!ms->dirty_bitmap)
 943                         return -EINVAL;
 944                 /*
 945                  * The second half of the bitmap is only used on x86,
 946                  * and would be wasted otherwise, so we put it to good
 947                  * use here to keep track of the state of the storage
 948                  * attributes.
 949                  */
 950                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
 951                 ram_pages += ms->npages;
 952         }
 953         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
 954         kvm->arch.migration_mode = 1;
 955         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 956         return 0;
 957 }
 958
 959 /*
 960  * Must be called with kvm->slots_lock to avoid races with ourselves and
 961  * kvm_s390_vm_start_migration.
 962  */
 963 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 964 {
 965         /* migration mode already disabled */
 966         if (!kvm->arch.migration_mode)
 967                 return 0;
 968         kvm->arch.migration_mode = 0;
 969         if (kvm->arch.use_cmma)
 970                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 971         return 0;
 972 }
 973
 974 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 975                                      struct kvm_device_attr *attr)
 976 {
 977         int res = -ENXIO;
 978
 979         mutex_lock(&kvm->slots_lock);
 980         switch (attr->attr) {
 981         case KVM_S390_VM_MIGRATION_START:
 982                 res = kvm_s390_vm_start_migration(kvm);
 983                 break;
 984         case KVM_S390_VM_MIGRATION_STOP:
 985                 res = kvm_s390_vm_stop_migration(kvm);
 986                 break;
 987         default:
 988                 break;
 989         }
 990         mutex_unlock(&kvm->slots_lock);
 991
 992         return res;
 993 }
 994
 995 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 996                                      struct kvm_device_attr *attr)
 997 {
 998         u64 mig = kvm->arch.migration_mode;
 999
1000         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1001                 return -ENXIO;
1002
1003         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1004                 return -EFAULT;
1005         return 0;
1006 }
1007
1008 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1009 {
1010         struct kvm_s390_vm_tod_clock gtod;
1011
1012         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1013                 return -EFAULT;
1014
1015         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1016                 return -EINVAL;
1017         kvm_s390_set_tod_clock(kvm, &gtod);
1018
1019         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1020                 gtod.epoch_idx, gtod.tod);
1021
1022         return 0;
1023 }
1024
1025 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1026 {
1027         u8 gtod_high;
1028
1029         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1030                                            sizeof(gtod_high)))
1031                 return -EFAULT;
1032
1033         if (gtod_high != 0)
1034                 return -EINVAL;
1035         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1036
1037         return 0;
1038 }
1039
1040 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1041 {
1042         struct kvm_s390_vm_tod_clock gtod = { 0 };
1043
1044         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1045                            sizeof(gtod.tod)))
1046                 return -EFAULT;
1047
1048         kvm_s390_set_tod_clock(kvm, &gtod);
1049         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1050         return 0;
1051 }
1052
1053 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1054 {
1055         int ret;
1056
1057         if (attr->flags)
1058                 return -EINVAL;
1059
1060         switch (attr->attr) {
1061         case KVM_S390_VM_TOD_EXT:
1062                 ret = kvm_s390_set_tod_ext(kvm, attr);
1063                 break;
1064         case KVM_S390_VM_TOD_HIGH:
1065                 ret = kvm_s390_set_tod_high(kvm, attr);
1066                 break;
1067         case KVM_S390_VM_TOD_LOW:
1068                 ret = kvm_s390_set_tod_low(kvm, attr);
1069                 break;
1070         default:
1071                 ret = -ENXIO;
1072                 break;
1073         }
1074         return ret;
1075 }
1076
1077 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1078                                    struct kvm_s390_vm_tod_clock *gtod)
1079 {
1080         struct kvm_s390_tod_clock_ext htod;
1081
1082         preempt_disable();
1083
1084         get_tod_clock_ext((char *)&htod);
1085
1086         gtod->tod = htod.tod + kvm->arch.epoch;
1087         gtod->epoch_idx = 0;
1088         if (test_kvm_facility(kvm, 139)) {
1089                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1090                 if (gtod->tod < htod.tod)
1091                         gtod->epoch_idx += 1;
1092         }
1093
1094         preempt_enable();
1095 }
1096
1097 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099         struct kvm_s390_vm_tod_clock gtod;
1100
1101         memset(&gtod, 0, sizeof(gtod));
1102         kvm_s390_get_tod_clock(kvm, &gtod);
1103         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1104                 return -EFAULT;
1105
1106         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1107                 gtod.epoch_idx, gtod.tod);
1108         return 0;
1109 }
1110
1111 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1112 {
1113         u8 gtod_high = 0;
1114
1115         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1116                                          sizeof(gtod_high)))
1117                 return -EFAULT;
1118         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1119
1120         return 0;
1121 }
1122
1123 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1124 {
1125         u64 gtod;
1126
1127         gtod = kvm_s390_get_tod_clock_fast(kvm);
1128         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1129                 return -EFAULT;
1130         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1131
1132         return 0;
1133 }
1134
1135 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1136 {
1137         int ret;
1138
1139         if (attr->flags)
1140                 return -EINVAL;
1141
1142         switch (attr->attr) {
1143         case KVM_S390_VM_TOD_EXT:
1144                 ret = kvm_s390_get_tod_ext(kvm, attr);
1145                 break;
1146         case KVM_S390_VM_TOD_HIGH:
1147                 ret = kvm_s390_get_tod_high(kvm, attr);
1148                 break;
1149         case KVM_S390_VM_TOD_LOW:
1150                 ret = kvm_s390_get_tod_low(kvm, attr);
1151                 break;
1152         default:
1153                 ret = -ENXIO;
1154                 break;
1155         }
1156         return ret;
1157 }
1158
1159 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1160 {
1161         struct kvm_s390_vm_cpu_processor *proc;
1162         u16 lowest_ibc, unblocked_ibc;
1163         int ret = 0;
1164
1165         mutex_lock(&kvm->lock);
1166         if (kvm->created_vcpus) {
1167                 ret = -EBUSY;
1168                 goto out;
1169         }
1170         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1171         if (!proc) {
1172                 ret = -ENOMEM;
1173                 goto out;
1174         }
1175         if (!copy_from_user(proc, (void __user *)attr->addr,
1176                             sizeof(*proc))) {
1177                 kvm->arch.model.cpuid = proc->cpuid;
1178                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1179                 unblocked_ibc = sclp.ibc & 0xfff;
1180                 if (lowest_ibc && proc->ibc) {
1181                         if (proc->ibc > unblocked_ibc)
1182                                 kvm->arch.model.ibc = unblocked_ibc;
1183                         else if (proc->ibc < lowest_ibc)
1184                                 kvm->arch.model.ibc = lowest_ibc;
1185                         else
1186                                 kvm->arch.model.ibc = proc->ibc;
1187                 }
1188                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1189                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1190                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1191                          kvm->arch.model.ibc,
1192                          kvm->arch.model.cpuid);
1193                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1194                          kvm->arch.model.fac_list[0],
1195                          kvm->arch.model.fac_list[1],
1196                          kvm->arch.model.fac_list[2]);
1197         } else
1198                 ret = -EFAULT;
1199         kfree(proc);
1200 out:
1201         mutex_unlock(&kvm->lock);
1202         return ret;
1203 }
1204
1205 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1206                                        struct kvm_device_attr *attr)
1207 {
1208         struct kvm_s390_vm_cpu_feat data;
1209
1210         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1211                 return -EFAULT;
1212         if (!bitmap_subset((unsigned long *) data.feat,
1213                            kvm_s390_available_cpu_feat,
1214                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1215                 return -EINVAL;
1216
1217         mutex_lock(&kvm->lock);
1218         if (kvm->created_vcpus) {
1219                 mutex_unlock(&kvm->lock);
1220                 return -EBUSY;
1221         }
1222         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1223                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1224         mutex_unlock(&kvm->lock);
1225         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1226                          data.feat[0],
1227                          data.feat[1],
1228                          data.feat[2]);
1229         return 0;
1230 }
1231
1232 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1233                                           struct kvm_device_attr *attr)
1234 {
1235         /*
1236          * Once supported by kernel + hw, we have to store the subfunctions
1237          * in kvm->arch and remember that user space configured them.
1238          */
1239         return -ENXIO;
1240 }
1241
1242 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244         int ret = -ENXIO;
1245
1246         switch (attr->attr) {
1247         case KVM_S390_VM_CPU_PROCESSOR:
1248                 ret = kvm_s390_set_processor(kvm, attr);
1249                 break;
1250         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1251                 ret = kvm_s390_set_processor_feat(kvm, attr);
1252                 break;
1253         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1254                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1255                 break;
1256         }
1257         return ret;
1258 }
1259
1260 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1261 {
1262         struct kvm_s390_vm_cpu_processor *proc;
1263         int ret = 0;
1264
1265         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1266         if (!proc) {
1267                 ret = -ENOMEM;
1268                 goto out;
1269         }
1270         proc->cpuid = kvm->arch.model.cpuid;
1271         proc->ibc = kvm->arch.model.ibc;
1272         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1273                S390_ARCH_FAC_LIST_SIZE_BYTE);
1274         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1275                  kvm->arch.model.ibc,
1276                  kvm->arch.model.cpuid);
1277         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1278                  kvm->arch.model.fac_list[0],
1279                  kvm->arch.model.fac_list[1],
1280                  kvm->arch.model.fac_list[2]);
1281         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1282                 ret = -EFAULT;
1283         kfree(proc);
1284 out:
1285         return ret;
1286 }
1287
1288 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1289 {
1290         struct kvm_s390_vm_cpu_machine *mach;
1291         int ret = 0;
1292
1293         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1294         if (!mach) {
1295                 ret = -ENOMEM;
1296                 goto out;
1297         }
1298         get_cpu_id((struct cpuid *) &mach->cpuid);
1299         mach->ibc = sclp.ibc;
1300         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1301                S390_ARCH_FAC_LIST_SIZE_BYTE);
1302         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1303                sizeof(S390_lowcore.stfle_fac_list));
1304         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1305                  kvm->arch.model.ibc,
1306                  kvm->arch.model.cpuid);
1307         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1308                  mach->fac_mask[0],
1309                  mach->fac_mask[1],
1310                  mach->fac_mask[2]);
1311         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1312                  mach->fac_list[0],
1313                  mach->fac_list[1],
1314                  mach->fac_list[2]);
1315         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1316                 ret = -EFAULT;
1317         kfree(mach);
1318 out:
1319         return ret;
1320 }
1321
1322 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1323                                        struct kvm_device_attr *attr)
1324 {
1325         struct kvm_s390_vm_cpu_feat data;
1326
1327         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1328                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1329         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1330                 return -EFAULT;
1331         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1332                          data.feat[0],
1333                          data.feat[1],
1334                          data.feat[2]);
1335         return 0;
1336 }
1337
1338 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1339                                      struct kvm_device_attr *attr)
1340 {
1341         struct kvm_s390_vm_cpu_feat data;
1342
1343         bitmap_copy((unsigned long *) data.feat,
1344                     kvm_s390_available_cpu_feat,
1345                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1346         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1347                 return -EFAULT;
1348         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1349                          data.feat[0],
1350                          data.feat[1],
1351                          data.feat[2]);
1352         return 0;
1353 }
1354
1355 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1356                                           struct kvm_device_attr *attr)
1357 {
1358         /*
1359          * Once we can actually configure subfunctions (kernel + hw support),
1360          * we have to check if they were already set by user space, if so copy
1361          * them from kvm->arch.
1362          */
1363         return -ENXIO;
1364 }
1365
1366 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1367                                         struct kvm_device_attr *attr)
1368 {
1369         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1370             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1371                 return -EFAULT;
1372         return 0;
1373 }
1374 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1375 {
1376         int ret = -ENXIO;
1377
1378         switch (attr->attr) {
1379         case KVM_S390_VM_CPU_PROCESSOR:
1380                 ret = kvm_s390_get_processor(kvm, attr);
1381                 break;
1382         case KVM_S390_VM_CPU_MACHINE:
1383                 ret = kvm_s390_get_machine(kvm, attr);
1384                 break;
1385         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1386                 ret = kvm_s390_get_processor_feat(kvm, attr);
1387                 break;
1388         case KVM_S390_VM_CPU_MACHINE_FEAT:
1389                 ret = kvm_s390_get_machine_feat(kvm, attr);
1390                 break;
1391         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1392                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1393                 break;
1394         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1395                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1396                 break;
1397         }
1398         return ret;
1399 }
1400
1401 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1402 {
1403         int ret;
1404
1405         switch (attr->group) {
1406         case KVM_S390_VM_MEM_CTRL:
1407                 ret = kvm_s390_set_mem_control(kvm, attr);
1408                 break;
1409         case KVM_S390_VM_TOD:
1410                 ret = kvm_s390_set_tod(kvm, attr);
1411                 break;
1412         case KVM_S390_VM_CPU_MODEL:
1413                 ret = kvm_s390_set_cpu_model(kvm, attr);
1414                 break;
1415         case KVM_S390_VM_CRYPTO:
1416                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1417                 break;
1418         case KVM_S390_VM_MIGRATION:
1419                 ret = kvm_s390_vm_set_migration(kvm, attr);
1420                 break;
1421         default:
1422                 ret = -ENXIO;
1423                 break;
1424         }
1425
1426         return ret;
1427 }
1428
1429 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1430 {
1431         int ret;
1432
1433         switch (attr->group) {
1434         case KVM_S390_VM_MEM_CTRL:
1435                 ret = kvm_s390_get_mem_control(kvm, attr);
1436                 break;
1437         case KVM_S390_VM_TOD:
1438                 ret = kvm_s390_get_tod(kvm, attr);
1439                 break;
1440         case KVM_S390_VM_CPU_MODEL:
1441                 ret = kvm_s390_get_cpu_model(kvm, attr);
1442                 break;
1443         case KVM_S390_VM_MIGRATION:
1444                 ret = kvm_s390_vm_get_migration(kvm, attr);
1445                 break;
1446         default:
1447                 ret = -ENXIO;
1448                 break;
1449         }
1450
1451         return ret;
1452 }
1453
1454 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1455 {
1456         int ret;
1457
1458         switch (attr->group) {
1459         case KVM_S390_VM_MEM_CTRL:
1460                 switch (attr->attr) {
1461                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1462                 case KVM_S390_VM_MEM_CLR_CMMA:
1463                         ret = sclp.has_cmma ? 0 : -ENXIO;
1464                         break;
1465                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1466                         ret = 0;
1467                         break;
1468                 default:
1469                         ret = -ENXIO;
1470                         break;
1471                 }
1472                 break;
1473         case KVM_S390_VM_TOD:
1474                 switch (attr->attr) {
1475                 case KVM_S390_VM_TOD_LOW:
1476                 case KVM_S390_VM_TOD_HIGH:
1477                         ret = 0;
1478                         break;
1479                 default:
1480                         ret = -ENXIO;
1481                         break;
1482                 }
1483                 break;
1484         case KVM_S390_VM_CPU_MODEL:
1485                 switch (attr->attr) {
1486                 case KVM_S390_VM_CPU_PROCESSOR:
1487                 case KVM_S390_VM_CPU_MACHINE:
1488                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1489                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1490                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1491                         ret = 0;
1492                         break;
1493                 /* configuring subfunctions is not supported yet */
1494                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1495                 default:
1496                         ret = -ENXIO;
1497                         break;
1498                 }
1499                 break;
1500         case KVM_S390_VM_CRYPTO:
1501                 switch (attr->attr) {
1502                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1503                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1504                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1505                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1506                         ret = 0;
1507                         break;
1508                 default:
1509                         ret = -ENXIO;
1510                         break;
1511                 }
1512                 break;
1513         case KVM_S390_VM_MIGRATION:
1514                 ret = 0;
1515                 break;
1516         default:
1517                 ret = -ENXIO;
1518                 break;
1519         }
1520
1521         return ret;
1522 }
1523
1524 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1525 {
1526         uint8_t *keys;
1527         uint64_t hva;
1528         int srcu_idx, i, r = 0;
1529
1530         if (args->flags != 0)
1531                 return -EINVAL;
1532
1533         /* Is this guest using storage keys? */
1534         if (!mm_uses_skeys(current->mm))
1535                 return KVM_S390_GET_SKEYS_NONE;
1536
1537         /* Enforce sane limit on memory allocation */
1538         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1539                 return -EINVAL;
1540
1541         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1542         if (!keys)
1543                 return -ENOMEM;
1544
1545         down_read(&current->mm->mmap_sem);
1546         srcu_idx = srcu_read_lock(&kvm->srcu);
1547         for (i = 0; i < args->count; i++) {
1548                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1549                 if (kvm_is_error_hva(hva)) {
1550                         r = -EFAULT;
1551                         break;
1552                 }
1553
1554                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1555                 if (r)
1556                         break;
1557         }
1558         srcu_read_unlock(&kvm->srcu, srcu_idx);
1559         up_read(&current->mm->mmap_sem);
1560
1561         if (!r) {
1562                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1563                                  sizeof(uint8_t) * args->count);
1564                 if (r)
1565                         r = -EFAULT;
1566         }
1567
1568         kvfree(keys);
1569         return r;
1570 }
1571
1572 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1573 {
1574         uint8_t *keys;
1575         uint64_t hva;
1576         int srcu_idx, i, r = 0;
1577         bool unlocked;
1578
1579         if (args->flags != 0)
1580                 return -EINVAL;
1581
1582         /* Enforce sane limit on memory allocation */
1583         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1584                 return -EINVAL;
1585
1586         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1587         if (!keys)
1588                 return -ENOMEM;
1589
1590         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1591                            sizeof(uint8_t) * args->count);
1592         if (r) {
1593                 r = -EFAULT;
1594                 goto out;
1595         }
1596
1597         /* Enable storage key handling for the guest */
1598         r = s390_enable_skey();
1599         if (r)
1600                 goto out;
1601
1602         i = 0;
1603         down_read(&current->mm->mmap_sem);
1604         srcu_idx = srcu_read_lock(&kvm->srcu);
1605         while (i < args->count) {
1606                 unlocked = false;
1607                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1608                 if (kvm_is_error_hva(hva)) {
1609                         r = -EFAULT;
1610                         break;
1611                 }
1612
1613                 /* Lowest order bit is reserved */
1614                 if (keys[i] & 0x01) {
1615                         r = -EINVAL;
1616                         break;
1617                 }
1618
1619                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1620                 if (r) {
1621                         r = fixup_user_fault(current, current->mm, hva,
1622                                              FAULT_FLAG_WRITE, &unlocked);
1623                         if (r)
1624                                 break;
1625                 }
1626                 if (!r)
1627                         i++;
1628         }
1629         srcu_read_unlock(&kvm->srcu, srcu_idx);
1630         up_read(&current->mm->mmap_sem);
1631 out:
1632         kvfree(keys);
1633         return r;
1634 }
1635
1636 /*
1637  * Base address and length must be sent at the start of each block, therefore
1638  * it's cheaper to send some clean data, as long as it's less than the size of
1639  * two longs.
1640  */
1641 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1642 /* for consistency */
1643 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1644
1645 /*
1646  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1647  * address falls in a hole. In that case the index of one of the memslots
1648  * bordering the hole is returned.
1649  */
1650 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1651 {
1652         int start = 0, end = slots->used_slots;
1653         int slot = atomic_read(&slots->lru_slot);
1654         struct kvm_memory_slot *memslots = slots->memslots;
1655
1656         if (gfn >= memslots[slot].base_gfn &&
1657             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1658                 return slot;
1659
1660         while (start < end) {
1661                 slot = start + (end - start) / 2;
1662
1663                 if (gfn >= memslots[slot].base_gfn)
1664                         end = slot;
1665                 else
1666                         start = slot + 1;
1667         }
1668
1669         if (start >= slots->used_slots)
1670                 return slots->used_slots - 1;
1671
1672         if (gfn >= memslots[start].base_gfn &&
1673             gfn < memslots[start].base_gfn + memslots[start].npages) {
1674                 atomic_set(&slots->lru_slot, start);
1675         }
1676
1677         return start;
1678 }
1679
1680 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1681                               u8 *res, unsigned long bufsize)
1682 {
1683         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1684
1685         args->count = 0;
1686         while (args->count < bufsize) {
1687                 hva = gfn_to_hva(kvm, cur_gfn);
1688                 /*
1689                  * We return an error if the first value was invalid, but we
1690                  * return successfully if at least one value was copied.
1691                  */
1692                 if (kvm_is_error_hva(hva))
1693                         return args->count ? 0 : -EFAULT;
1694                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1695                         pgstev = 0;
1696                 res[args->count++] = (pgstev >> 24) & 0x43;
1697                 cur_gfn++;
1698         }
1699
1700         return 0;
1701 }
1702
1703 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1704                                               unsigned long cur_gfn)
1705 {
1706         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1707         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1708         unsigned long ofs = cur_gfn - ms->base_gfn;
1709
1710         if (ms->base_gfn + ms->npages <= cur_gfn) {
1711                 slotidx--;
1712                 /* If we are above the highest slot, wrap around */
1713                 if (slotidx < 0)
1714                         slotidx = slots->used_slots - 1;
1715
1716                 ms = slots->memslots + slotidx;
1717                 ofs = 0;
1718         }
1719         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1720         while ((slotidx > 0) && (ofs >= ms->npages)) {
1721                 slotidx--;
1722                 ms = slots->memslots + slotidx;
1723                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1724         }
1725         return ms->base_gfn + ofs;
1726 }
1727
1728 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1729                              u8 *res, unsigned long bufsize)
1730 {
1731         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1732         struct kvm_memslots *slots = kvm_memslots(kvm);
1733         struct kvm_memory_slot *ms;
1734
1735         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1736         ms = gfn_to_memslot(kvm, cur_gfn);
1737         args->count = 0;
1738         args->start_gfn = cur_gfn;
1739         if (!ms)
1740                 return 0;
1741         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1742         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1743
1744         while (args->count < bufsize) {
1745                 hva = gfn_to_hva(kvm, cur_gfn);
1746                 if (kvm_is_error_hva(hva))
1747                         return 0;
1748                 /* Decrement only if we actually flipped the bit to 0 */
1749                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1750                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1751                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1752                         pgstev = 0;
1753                 /* Save the value */
1754                 res[args->count++] = (pgstev >> 24) & 0x43;
1755                 /* If the next bit is too far away, stop. */
1756                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1757                         return 0;
1758                 /* If we reached the previous "next", find the next one */
1759                 if (cur_gfn == next_gfn)
1760                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1761                 /* Reached the end of memory or of the buffer, stop */
1762                 if ((next_gfn >= mem_end) ||
1763                     (next_gfn - args->start_gfn >= bufsize))
1764                         return 0;
1765                 cur_gfn++;
1766                 /* Reached the end of the current memslot, take the next one. */
1767                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1768                         ms = gfn_to_memslot(kvm, cur_gfn);
1769                         if (!ms)
1770                                 return 0;
1771                 }
1772         }
1773         return 0;
1774 }
1775
1776 /*
1777  * This function searches for the next page with dirty CMMA attributes, and
1778  * saves the attributes in the buffer up to either the end of the buffer or
1779  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1780  * no trailing clean bytes are saved.
1781  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1782  * output buffer will indicate 0 as length.
1783  */
1784 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1785                                   struct kvm_s390_cmma_log *args)
1786 {
1787         unsigned long bufsize;
1788         int srcu_idx, peek, ret;
1789         u8 *values;
1790
1791         if (!kvm->arch.use_cmma)
1792                 return -ENXIO;
1793         /* Invalid/unsupported flags were specified */
1794         if (args->flags & ~KVM_S390_CMMA_PEEK)
1795                 return -EINVAL;
1796         /* Migration mode query, and we are not doing a migration */
1797         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1798         if (!peek && !kvm->arch.migration_mode)
1799                 return -EINVAL;
1800         /* CMMA is disabled or was not used, or the buffer has length zero */
1801         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1802         if (!bufsize || !kvm->mm->context.uses_cmm) {
1803                 memset(args, 0, sizeof(*args));
1804                 return 0;
1805         }
1806         /* We are not peeking, and there are no dirty pages */
1807         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1808                 memset(args, 0, sizeof(*args));
1809                 return 0;
1810         }
1811
1812         values = vmalloc(bufsize);
1813         if (!values)
1814                 return -ENOMEM;
1815
1816         down_read(&kvm->mm->mmap_sem);
1817         srcu_idx = srcu_read_lock(&kvm->srcu);
1818         if (peek)
1819                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1820         else
1821                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1822         srcu_read_unlock(&kvm->srcu, srcu_idx);
1823         up_read(&kvm->mm->mmap_sem);
1824
1825         if (kvm->arch.migration_mode)
1826                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1827         else
1828                 args->remaining = 0;
1829
1830         if (copy_to_user((void __user *)args->values, values, args->count))
1831                 ret = -EFAULT;
1832
1833         vfree(values);
1834         return ret;
1835 }
1836
1837 /*
1838  * This function sets the CMMA attributes for the given pages. If the input
1839  * buffer has zero length, no action is taken, otherwise the attributes are
1840  * set and the mm->context.uses_cmm flag is set.
1841  */
1842 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1843                                   const struct kvm_s390_cmma_log *args)
1844 {
1845         unsigned long hva, mask, pgstev, i;
1846         uint8_t *bits;
1847         int srcu_idx, r = 0;
1848
1849         mask = args->mask;
1850
1851         if (!kvm->arch.use_cmma)
1852                 return -ENXIO;
1853         /* invalid/unsupported flags */
1854         if (args->flags != 0)
1855                 return -EINVAL;
1856         /* Enforce sane limit on memory allocation */
1857         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1858                 return -EINVAL;
1859         /* Nothing to do */
1860         if (args->count == 0)
1861                 return 0;
1862
1863         bits = vmalloc(array_size(sizeof(*bits), args->count));
1864         if (!bits)
1865                 return -ENOMEM;
1866
1867         r = copy_from_user(bits, (void __user *)args->values, args->count);
1868         if (r) {
1869                 r = -EFAULT;
1870                 goto out;
1871         }
1872
1873         down_read(&kvm->mm->mmap_sem);
1874         srcu_idx = srcu_read_lock(&kvm->srcu);
1875         for (i = 0; i < args->count; i++) {
1876                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1877                 if (kvm_is_error_hva(hva)) {
1878                         r = -EFAULT;
1879                         break;
1880                 }
1881
1882                 pgstev = bits[i];
1883                 pgstev = pgstev << 24;
1884                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1885                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1886         }
1887         srcu_read_unlock(&kvm->srcu, srcu_idx);
1888         up_read(&kvm->mm->mmap_sem);
1889
1890         if (!kvm->mm->context.uses_cmm) {
1891                 down_write(&kvm->mm->mmap_sem);
1892                 kvm->mm->context.uses_cmm = 1;
1893                 up_write(&kvm->mm->mmap_sem);
1894         }
1895 out:
1896         vfree(bits);
1897         return r;
1898 }
1899
1900 long kvm_arch_vm_ioctl(struct file *filp,
1901                        unsigned int ioctl, unsigned long arg)
1902 {
1903         struct kvm *kvm = filp->private_data;
1904         void __user *argp = (void __user *)arg;
1905         struct kvm_device_attr attr;
1906         int r;
1907
1908         switch (ioctl) {
1909         case KVM_S390_INTERRUPT: {
1910                 struct kvm_s390_interrupt s390int;
1911
1912                 r = -EFAULT;
1913                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1914                         break;
1915                 r = kvm_s390_inject_vm(kvm, &s390int);
1916                 break;
1917         }
1918         case KVM_ENABLE_CAP: {
1919                 struct kvm_enable_cap cap;
1920                 r = -EFAULT;
1921                 if (copy_from_user(&cap, argp, sizeof(cap)))
1922                         break;
1923                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1924                 break;
1925         }
1926         case KVM_CREATE_IRQCHIP: {
1927                 struct kvm_irq_routing_entry routing;
1928
1929                 r = -EINVAL;
1930                 if (kvm->arch.use_irqchip) {
1931                         /* Set up dummy routing. */
1932                         memset(&routing, 0, sizeof(routing));
1933                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1934                 }
1935                 break;
1936         }
1937         case KVM_SET_DEVICE_ATTR: {
1938                 r = -EFAULT;
1939                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1940                         break;
1941                 r = kvm_s390_vm_set_attr(kvm, &attr);
1942                 break;
1943         }
1944         case KVM_GET_DEVICE_ATTR: {
1945                 r = -EFAULT;
1946                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1947                         break;
1948                 r = kvm_s390_vm_get_attr(kvm, &attr);
1949                 break;
1950         }
1951         case KVM_HAS_DEVICE_ATTR: {
1952                 r = -EFAULT;
1953                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1954                         break;
1955                 r = kvm_s390_vm_has_attr(kvm, &attr);
1956                 break;
1957         }
1958         case KVM_S390_GET_SKEYS: {
1959                 struct kvm_s390_skeys args;
1960
1961                 r = -EFAULT;
1962                 if (copy_from_user(&args, argp,
1963                                    sizeof(struct kvm_s390_skeys)))
1964                         break;
1965                 r = kvm_s390_get_skeys(kvm, &args);
1966                 break;
1967         }
1968         case KVM_S390_SET_SKEYS: {
1969                 struct kvm_s390_skeys args;
1970
1971                 r = -EFAULT;
1972                 if (copy_from_user(&args, argp,
1973                                    sizeof(struct kvm_s390_skeys)))
1974                         break;
1975                 r = kvm_s390_set_skeys(kvm, &args);
1976                 break;
1977         }
1978         case KVM_S390_GET_CMMA_BITS: {
1979                 struct kvm_s390_cmma_log args;
1980
1981                 r = -EFAULT;
1982                 if (copy_from_user(&args, argp, sizeof(args)))
1983                         break;
1984                 mutex_lock(&kvm->slots_lock);
1985                 r = kvm_s390_get_cmma_bits(kvm, &args);
1986                 mutex_unlock(&kvm->slots_lock);
1987                 if (!r) {
1988                         r = copy_to_user(argp, &args, sizeof(args));
1989                         if (r)
1990                                 r = -EFAULT;
1991                 }
1992                 break;
1993         }
1994         case KVM_S390_SET_CMMA_BITS: {
1995                 struct kvm_s390_cmma_log args;
1996
1997                 r = -EFAULT;
1998                 if (copy_from_user(&args, argp, sizeof(args)))
1999                         break;
2000                 mutex_lock(&kvm->slots_lock);
2001                 r = kvm_s390_set_cmma_bits(kvm, &args);
2002                 mutex_unlock(&kvm->slots_lock);
2003                 break;
2004         }
2005         default:
2006                 r = -ENOTTY;
2007         }
2008
2009         return r;
2010 }
2011
2012 static int kvm_s390_query_ap_config(u8 *config)
2013 {
2014         u32 fcn_code = 0x04000000UL;
2015         u32 cc = 0;
2016
2017         memset(config, 0, 128);
2018         asm volatile(
2019                 "lgr 0,%1\n"
2020                 "lgr 2,%2\n"
2021                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
2022                 "0: ipm %0\n"
2023                 "srl %0,28\n"
2024                 "1:\n"
2025                 EX_TABLE(0b, 1b)
2026                 : "+r" (cc)
2027                 : "r" (fcn_code), "r" (config)
2028                 : "cc", "0", "2", "memory"
2029         );
2030
2031         return cc;
2032 }
2033
2034 static int kvm_s390_apxa_installed(void)
2035 {
2036         u8 config[128];
2037         int cc;
2038
2039         if (test_facility(12)) {
2040                 cc = kvm_s390_query_ap_config(config);
2041
2042                 if (cc)
2043                         pr_err("PQAP(QCI) failed with cc=%d", cc);
2044                 else
2045                         return config[0] & 0x40;
2046         }
2047
2048         return 0;
2049 }
2050
2051 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2052 {
2053         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2054
2055         if (kvm_s390_apxa_installed())
2056                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2057         else
2058                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2059 }
2060
2061 static u64 kvm_s390_get_initial_cpuid(void)
2062 {
2063         struct cpuid cpuid;
2064
2065         get_cpu_id(&cpuid);
2066         cpuid.version = 0xff;
2067         return *((u64 *) &cpuid);
2068 }
2069
2070 static void kvm_s390_crypto_init(struct kvm *kvm)
2071 {
2072         if (!test_kvm_facility(kvm, 76))
2073                 return;
2074
2075         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2076         kvm_s390_set_crycb_format(kvm);
2077
2078         /* Enable AES/DEA protected key functions by default */
2079         kvm->arch.crypto.aes_kw = 1;
2080         kvm->arch.crypto.dea_kw = 1;
2081         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2082                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2083         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2084                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2085 }
2086
2087 static void sca_dispose(struct kvm *kvm)
2088 {
2089         if (kvm->arch.use_esca)
2090                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2091         else
2092                 free_page((unsigned long)(kvm->arch.sca));
2093         kvm->arch.sca = NULL;
2094 }
2095
2096 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2097 {
2098         gfp_t alloc_flags = GFP_KERNEL;
2099         int i, rc;
2100         char debug_name[16];
2101         static unsigned long sca_offset;
2102
2103         rc = -EINVAL;
2104 #ifdef CONFIG_KVM_S390_UCONTROL
2105         if (type & ~KVM_VM_S390_UCONTROL)
2106                 goto out_err;
2107         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2108                 goto out_err;
2109 #else
2110         if (type)
2111                 goto out_err;
2112 #endif
2113
2114         rc = s390_enable_sie();
2115         if (rc)
2116                 goto out_err;
2117
2118         rc = -ENOMEM;
2119
2120         if (!sclp.has_64bscao)
2121                 alloc_flags |= GFP_DMA;
2122         rwlock_init(&kvm->arch.sca_lock);
2123         /* start with basic SCA */
2124         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2125         if (!kvm->arch.sca)
2126                 goto out_err;
2127         mutex_lock(&kvm_lock);
2128         sca_offset += 16;
2129         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2130                 sca_offset = 0;
2131         kvm->arch.sca = (struct bsca_block *)
2132                         ((char *) kvm->arch.sca + sca_offset);
2133         mutex_unlock(&kvm_lock);
2134
2135         sprintf(debug_name, "kvm-%u", current->pid);
2136
2137         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2138         if (!kvm->arch.dbf)
2139                 goto out_err;
2140
2141         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2142         kvm->arch.sie_page2 =
2143              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2144         if (!kvm->arch.sie_page2)
2145                 goto out_err;
2146
2147         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2148
2149         for (i = 0; i < kvm_s390_fac_size(); i++) {
2150                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2151                                               (kvm_s390_fac_base[i] |
2152                                                kvm_s390_fac_ext[i]);
2153                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2154                                               kvm_s390_fac_base[i];
2155         }
2156
2157         /* we are always in czam mode - even on pre z14 machines */
2158         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2159         set_kvm_facility(kvm->arch.model.fac_list, 138);
2160         /* we emulate STHYI in kvm */
2161         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2162         set_kvm_facility(kvm->arch.model.fac_list, 74);
2163         if (MACHINE_HAS_TLB_GUEST) {
2164                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2165                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2166         }
2167
2168         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2169         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2170
2171         kvm_s390_crypto_init(kvm);
2172
2173         mutex_init(&kvm->arch.float_int.ais_lock);
2174         spin_lock_init(&kvm->arch.float_int.lock);
2175         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2176                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2177         init_waitqueue_head(&kvm->arch.ipte_wq);
2178         mutex_init(&kvm->arch.ipte_mutex);
2179
2180         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2181         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2182
2183         if (type & KVM_VM_S390_UCONTROL) {
2184                 kvm->arch.gmap = NULL;
2185                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2186         } else {
2187                 if (sclp.hamax == U64_MAX)
2188                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2189                 else
2190                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2191                                                     sclp.hamax + 1);
2192                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2193                 if (!kvm->arch.gmap)
2194                         goto out_err;
2195                 kvm->arch.gmap->private = kvm;
2196                 kvm->arch.gmap->pfault_enabled = 0;
2197         }
2198
2199         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2200         kvm->arch.use_skf = sclp.has_skey;
2201         spin_lock_init(&kvm->arch.start_stop_lock);
2202         kvm_s390_vsie_init(kvm);
2203         kvm_s390_gisa_init(kvm);
2204         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2205
2206         return 0;
2207 out_err:
2208         free_page((unsigned long)kvm->arch.sie_page2);
2209         debug_unregister(kvm->arch.dbf);
2210         sca_dispose(kvm);
2211         KVM_EVENT(3, "creation of vm failed: %d", rc);
2212         return rc;
2213 }
2214
2215 bool kvm_arch_has_vcpu_debugfs(void)
2216 {
2217         return false;
2218 }
2219
2220 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2221 {
2222         return 0;
2223 }
2224
2225 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2226 {
2227         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2228         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2229         kvm_s390_clear_local_irqs(vcpu);
2230         kvm_clear_async_pf_completion_queue(vcpu);
2231         if (!kvm_is_ucontrol(vcpu->kvm))
2232                 sca_del_vcpu(vcpu);
2233
2234         if (kvm_is_ucontrol(vcpu->kvm))
2235                 gmap_remove(vcpu->arch.gmap);
2236
2237         if (vcpu->kvm->arch.use_cmma)
2238                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2239         free_page((unsigned long)(vcpu->arch.sie_block));
2240
2241         kvm_vcpu_uninit(vcpu);
2242         kmem_cache_free(kvm_vcpu_cache, vcpu);
2243 }
2244
2245 static void kvm_free_vcpus(struct kvm *kvm)
2246 {
2247         unsigned int i;
2248         struct kvm_vcpu *vcpu;
2249
2250         kvm_for_each_vcpu(i, vcpu, kvm)
2251                 kvm_arch_vcpu_destroy(vcpu);
2252
2253         mutex_lock(&kvm->lock);
2254         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2255                 kvm->vcpus[i] = NULL;
2256
2257         atomic_set(&kvm->online_vcpus, 0);
2258         mutex_unlock(&kvm->lock);
2259 }
2260
2261 void kvm_arch_destroy_vm(struct kvm *kvm)
2262 {
2263         kvm_free_vcpus(kvm);
2264         sca_dispose(kvm);
2265         debug_unregister(kvm->arch.dbf);
2266         kvm_s390_gisa_destroy(kvm);
2267         free_page((unsigned long)kvm->arch.sie_page2);
2268         if (!kvm_is_ucontrol(kvm))
2269                 gmap_remove(kvm->arch.gmap);
2270         kvm_s390_destroy_adapters(kvm);
2271         kvm_s390_clear_float_irqs(kvm);
2272         kvm_s390_vsie_destroy(kvm);
2273         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2274 }
2275
2276 /* Section: vcpu related */
2277 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2278 {
2279         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2280         if (!vcpu->arch.gmap)
2281                 return -ENOMEM;
2282         vcpu->arch.gmap->private = vcpu->kvm;
2283
2284         return 0;
2285 }
2286
2287 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2288 {
2289         if (!kvm_s390_use_sca_entries())
2290                 return;
2291         read_lock(&vcpu->kvm->arch.sca_lock);
2292         if (vcpu->kvm->arch.use_esca) {
2293                 struct esca_block *sca = vcpu->kvm->arch.sca;
2294
2295                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2296                 sca->cpu[vcpu->vcpu_id].sda = 0;
2297         } else {
2298                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2299
2300                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2301                 sca->cpu[vcpu->vcpu_id].sda = 0;
2302         }
2303         read_unlock(&vcpu->kvm->arch.sca_lock);
2304 }
2305
2306 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2307 {
2308         if (!kvm_s390_use_sca_entries()) {
2309                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2310
2311                 /* we still need the basic sca for the ipte control */
2312                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2313                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2314                 return;
2315         }
2316         read_lock(&vcpu->kvm->arch.sca_lock);
2317         if (vcpu->kvm->arch.use_esca) {
2318                 struct esca_block *sca = vcpu->kvm->arch.sca;
2319
2320                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2321                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2322                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2323                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2324                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2325         } else {
2326                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2327
2328                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2329                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2330                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2331                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2332         }
2333         read_unlock(&vcpu->kvm->arch.sca_lock);
2334 }
2335
2336 /* Basic SCA to Extended SCA data copy routines */
2337 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2338 {
2339         d->sda = s->sda;
2340         d->sigp_ctrl.c = s->sigp_ctrl.c;
2341         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2342 }
2343
2344 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2345 {
2346         int i;
2347
2348         d->ipte_control = s->ipte_control;
2349         d->mcn[0] = s->mcn;
2350         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2351                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2352 }
2353
2354 static int sca_switch_to_extended(struct kvm *kvm)
2355 {
2356         struct bsca_block *old_sca = kvm->arch.sca;
2357         struct esca_block *new_sca;
2358         struct kvm_vcpu *vcpu;
2359         unsigned int vcpu_idx;
2360         u32 scaol, scaoh;
2361
2362         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2363         if (!new_sca)
2364                 return -ENOMEM;
2365
2366         scaoh = (u32)((u64)(new_sca) >> 32);
2367         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2368
2369         kvm_s390_vcpu_block_all(kvm);
2370         write_lock(&kvm->arch.sca_lock);
2371
2372         sca_copy_b_to_e(new_sca, old_sca);
2373
2374         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2375                 vcpu->arch.sie_block->scaoh = scaoh;
2376                 vcpu->arch.sie_block->scaol = scaol;
2377                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2378         }
2379         kvm->arch.sca = new_sca;
2380         kvm->arch.use_esca = 1;
2381
2382         write_unlock(&kvm->arch.sca_lock);
2383         kvm_s390_vcpu_unblock_all(kvm);
2384
2385         free_page((unsigned long)old_sca);
2386
2387         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2388                  old_sca, kvm->arch.sca);
2389         return 0;
2390 }
2391
2392 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2393 {
2394         int rc;
2395
2396         if (!kvm_s390_use_sca_entries()) {
2397                 if (id < KVM_MAX_VCPUS)
2398                         return true;
2399                 return false;
2400         }
2401         if (id < KVM_S390_BSCA_CPU_SLOTS)
2402                 return true;
2403         if (!sclp.has_esca || !sclp.has_64bscao)
2404                 return false;
2405
2406         mutex_lock(&kvm->lock);
2407         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2408         mutex_unlock(&kvm->lock);
2409
2410         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2411 }
2412
2413 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2414 {
2415         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2416         kvm_clear_async_pf_completion_queue(vcpu);
2417         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2418                                     KVM_SYNC_GPRS |
2419                                     KVM_SYNC_ACRS |
2420                                     KVM_SYNC_CRS |
2421                                     KVM_SYNC_ARCH0 |
2422                                     KVM_SYNC_PFAULT;
2423         kvm_s390_set_prefix(vcpu, 0);
2424         if (test_kvm_facility(vcpu->kvm, 64))
2425                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2426         if (test_kvm_facility(vcpu->kvm, 82))
2427                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2428         if (test_kvm_facility(vcpu->kvm, 133))
2429                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2430         if (test_kvm_facility(vcpu->kvm, 156))
2431                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2432         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2433          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2434          */
2435         if (MACHINE_HAS_VX)
2436                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2437         else
2438                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2439
2440         if (kvm_is_ucontrol(vcpu->kvm))
2441                 return __kvm_ucontrol_vcpu_init(vcpu);
2442
2443         return 0;
2444 }
2445
2446 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2447 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2448 {
2449         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2450         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2451         vcpu->arch.cputm_start = get_tod_clock_fast();
2452         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2453 }
2454
2455 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2456 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2457 {
2458         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2459         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2460         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2461         vcpu->arch.cputm_start = 0;
2462         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2463 }
2464
2465 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2466 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2467 {
2468         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2469         vcpu->arch.cputm_enabled = true;
2470         __start_cpu_timer_accounting(vcpu);
2471 }
2472
2473 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2474 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2475 {
2476         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2477         __stop_cpu_timer_accounting(vcpu);
2478         vcpu->arch.cputm_enabled = false;
2479 }
2480
2481 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2482 {
2483         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2484         __enable_cpu_timer_accounting(vcpu);
2485         preempt_enable();
2486 }
2487
2488 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2489 {
2490         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2491         __disable_cpu_timer_accounting(vcpu);
2492         preempt_enable();
2493 }
2494
2495 /* set the cpu timer - may only be called from the VCPU thread itself */
2496 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2497 {
2498         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2499         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2500         if (vcpu->arch.cputm_enabled)
2501                 vcpu->arch.cputm_start = get_tod_clock_fast();
2502         vcpu->arch.sie_block->cputm = cputm;
2503         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2504         preempt_enable();
2505 }
2506
2507 /* update and get the cpu timer - can also be called from other VCPU threads */
2508 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2509 {
2510         unsigned int seq;
2511         __u64 value;
2512
2513         if (unlikely(!vcpu->arch.cputm_enabled))
2514                 return vcpu->arch.sie_block->cputm;
2515
2516         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2517         do {
2518                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2519                 /*
2520                  * If the writer would ever execute a read in the critical
2521                  * section, e.g. in irq context, we have a deadlock.
2522                  */
2523                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2524                 value = vcpu->arch.sie_block->cputm;
2525                 /* if cputm_start is 0, accounting is being started/stopped */
2526                 if (likely(vcpu->arch.cputm_start))
2527                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2528         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2529         preempt_enable();
2530         return value;
2531 }
2532
2533 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2534 {
2535
2536         gmap_enable(vcpu->arch.enabled_gmap);
2537         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2538         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2539                 __start_cpu_timer_accounting(vcpu);
2540         vcpu->cpu = cpu;
2541 }
2542
2543 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2544 {
2545         vcpu->cpu = -1;
2546         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2547                 __stop_cpu_timer_accounting(vcpu);
2548         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2549         vcpu->arch.enabled_gmap = gmap_get_enabled();
2550         gmap_disable(vcpu->arch.enabled_gmap);
2551
2552 }
2553
2554 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2555 {
2556         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2557         vcpu->arch.sie_block->gpsw.mask = 0UL;
2558         vcpu->arch.sie_block->gpsw.addr = 0UL;
2559         kvm_s390_set_prefix(vcpu, 0);
2560         kvm_s390_set_cpu_timer(vcpu, 0);
2561         vcpu->arch.sie_block->ckc       = 0UL;
2562         vcpu->arch.sie_block->todpr     = 0;
2563         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2564         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2565                                         CR0_INTERRUPT_KEY_SUBMASK |
2566                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2567         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2568                                         CR14_UNUSED_33 |
2569                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2570         vcpu->run->s.regs.fpc = 0;
2571         vcpu->arch.sie_block->gbea = 1;
2572         vcpu->arch.sie_block->pp = 0;
2573         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2574         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2575         kvm_clear_async_pf_completion_queue(vcpu);
2576         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2577                 kvm_s390_vcpu_stop(vcpu);
2578         kvm_s390_clear_local_irqs(vcpu);
2579 }
2580
2581 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2582 {
2583         mutex_lock(&vcpu->kvm->lock);
2584         preempt_disable();
2585         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2586         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2587         preempt_enable();
2588         mutex_unlock(&vcpu->kvm->lock);
2589         if (!kvm_is_ucontrol(vcpu->kvm)) {
2590                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2591                 sca_add_vcpu(vcpu);
2592         }
2593         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2594                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2595         /* make vcpu_load load the right gmap on the first trigger */
2596         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2597 }
2598
2599 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2600 {
2601         if (!test_kvm_facility(vcpu->kvm, 76))
2602                 return;
2603
2604         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2605
2606         if (vcpu->kvm->arch.crypto.aes_kw)
2607                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2608         if (vcpu->kvm->arch.crypto.dea_kw)
2609                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2610
2611         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2612 }
2613
2614 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2615 {
2616         free_page(vcpu->arch.sie_block->cbrlo);
2617         vcpu->arch.sie_block->cbrlo = 0;
2618 }
2619
2620 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2621 {
2622         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2623         if (!vcpu->arch.sie_block->cbrlo)
2624                 return -ENOMEM;
2625         return 0;
2626 }
2627
2628 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2629 {
2630         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2631
2632         vcpu->arch.sie_block->ibc = model->ibc;
2633         if (test_kvm_facility(vcpu->kvm, 7))
2634                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2635 }
2636
2637 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2638 {
2639         int rc = 0;
2640
2641         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2642                                                     CPUSTAT_SM |
2643                                                     CPUSTAT_STOPPED);
2644
2645         if (test_kvm_facility(vcpu->kvm, 78))
2646                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2647         else if (test_kvm_facility(vcpu->kvm, 8))
2648                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2649
2650         kvm_s390_vcpu_setup_model(vcpu);
2651
2652         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2653         if (MACHINE_HAS_ESOP)
2654                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2655         if (test_kvm_facility(vcpu->kvm, 9))
2656                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2657         if (test_kvm_facility(vcpu->kvm, 73))
2658                 vcpu->arch.sie_block->ecb |= ECB_TE;
2659
2660         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2661                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2662         if (test_kvm_facility(vcpu->kvm, 130))
2663                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2664         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2665         if (sclp.has_cei)
2666                 vcpu->arch.sie_block->eca |= ECA_CEI;
2667         if (sclp.has_ib)
2668                 vcpu->arch.sie_block->eca |= ECA_IB;
2669         if (sclp.has_siif)
2670                 vcpu->arch.sie_block->eca |= ECA_SII;
2671         if (sclp.has_sigpif)
2672                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2673         if (test_kvm_facility(vcpu->kvm, 129)) {
2674                 vcpu->arch.sie_block->eca |= ECA_VX;
2675                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2676         }
2677         if (test_kvm_facility(vcpu->kvm, 139))
2678                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2679         if (test_kvm_facility(vcpu->kvm, 156))
2680                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2681         if (vcpu->arch.sie_block->gd) {
2682                 vcpu->arch.sie_block->eca |= ECA_AIV;
2683                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2684                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2685         }
2686         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2687                                         | SDNXC;
2688         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2689
2690         if (sclp.has_kss)
2691                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2692         else
2693                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2694
2695         if (vcpu->kvm->arch.use_cmma) {
2696                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2697                 if (rc)
2698                         return rc;
2699         }
2700         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2701         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2702
2703         kvm_s390_vcpu_crypto_setup(vcpu);
2704
2705         return rc;
2706 }
2707
2708 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2709                                       unsigned int id)
2710 {
2711         struct kvm_vcpu *vcpu;
2712         struct sie_page *sie_page;
2713         int rc = -EINVAL;
2714
2715         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2716                 goto out;
2717
2718         rc = -ENOMEM;
2719
2720         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2721         if (!vcpu)
2722                 goto out;
2723
2724         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2725         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2726         if (!sie_page)
2727                 goto out_free_cpu;
2728
2729         vcpu->arch.sie_block = &sie_page->sie_block;
2730         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2731
2732         /* the real guest size will always be smaller than msl */
2733         vcpu->arch.sie_block->mso = 0;
2734         vcpu->arch.sie_block->msl = sclp.hamax;
2735
2736         vcpu->arch.sie_block->icpua = id;
2737         spin_lock_init(&vcpu->arch.local_int.lock);
2738         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2739         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2740                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2741         seqcount_init(&vcpu->arch.cputm_seqcount);
2742
2743         rc = kvm_vcpu_init(vcpu, kvm, id);
2744         if (rc)
2745                 goto out_free_sie_block;
2746         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2747                  vcpu->arch.sie_block);
2748         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2749
2750         return vcpu;
2751 out_free_sie_block:
2752         free_page((unsigned long)(vcpu->arch.sie_block));
2753 out_free_cpu:
2754         kmem_cache_free(kvm_vcpu_cache, vcpu);
2755 out:
2756         return ERR_PTR(rc);
2757 }
2758
2759 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2760 {
2761         return kvm_s390_vcpu_has_irq(vcpu, 0);
2762 }
2763
2764 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2765 {
2766         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2767 }
2768
2769 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2770 {
2771         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2772         exit_sie(vcpu);
2773 }
2774
2775 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2776 {
2777         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2778 }
2779
2780 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2781 {
2782         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2783         exit_sie(vcpu);
2784 }
2785
2786 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2787 {
2788         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2789 }
2790
2791 /*
2792  * Kick a guest cpu out of SIE and wait until SIE is not running.
2793  * If the CPU is not running (e.g. waiting as idle) the function will
2794  * return immediately. */
2795 void exit_sie(struct kvm_vcpu *vcpu)
2796 {
2797         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2798         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2799                 cpu_relax();
2800 }
2801
2802 /* Kick a guest cpu out of SIE to process a request synchronously */
2803 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2804 {
2805         kvm_make_request(req, vcpu);
2806         kvm_s390_vcpu_request(vcpu);
2807 }
2808
2809 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2810                               unsigned long end)
2811 {
2812         struct kvm *kvm = gmap->private;
2813         struct kvm_vcpu *vcpu;
2814         unsigned long prefix;
2815         int i;
2816
2817         if (gmap_is_shadow(gmap))
2818                 return;
2819         if (start >= 1UL << 31)
2820                 /* We are only interested in prefix pages */
2821                 return;
2822         kvm_for_each_vcpu(i, vcpu, kvm) {
2823                 /* match against both prefix pages */
2824                 prefix = kvm_s390_get_prefix(vcpu);
2825                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2826                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2827                                    start, end);
2828                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2829                 }
2830         }
2831 }
2832
2833 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2834 {
2835         /* kvm common code refers to this, but never calls it */
2836         BUG();
2837         return 0;
2838 }
2839
2840 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2841                                            struct kvm_one_reg *reg)
2842 {
2843         int r = -EINVAL;
2844
2845         switch (reg->id) {
2846         case KVM_REG_S390_TODPR:
2847                 r = put_user(vcpu->arch.sie_block->todpr,
2848                              (u32 __user *)reg->addr);
2849                 break;
2850         case KVM_REG_S390_EPOCHDIFF:
2851                 r = put_user(vcpu->arch.sie_block->epoch,
2852                              (u64 __user *)reg->addr);
2853                 break;
2854         case KVM_REG_S390_CPU_TIMER:
2855                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2856                              (u64 __user *)reg->addr);
2857                 break;
2858         case KVM_REG_S390_CLOCK_COMP:
2859                 r = put_user(vcpu->arch.sie_block->ckc,
2860                              (u64 __user *)reg->addr);
2861                 break;
2862         case KVM_REG_S390_PFTOKEN:
2863                 r = put_user(vcpu->arch.pfault_token,
2864                              (u64 __user *)reg->addr);
2865                 break;
2866         case KVM_REG_S390_PFCOMPARE:
2867                 r = put_user(vcpu->arch.pfault_compare,
2868                              (u64 __user *)reg->addr);
2869                 break;
2870         case KVM_REG_S390_PFSELECT:
2871                 r = put_user(vcpu->arch.pfault_select,
2872                              (u64 __user *)reg->addr);
2873                 break;
2874         case KVM_REG_S390_PP:
2875                 r = put_user(vcpu->arch.sie_block->pp,
2876                              (u64 __user *)reg->addr);
2877                 break;
2878         case KVM_REG_S390_GBEA:
2879                 r = put_user(vcpu->arch.sie_block->gbea,
2880                              (u64 __user *)reg->addr);
2881                 break;
2882         default:
2883                 break;
2884         }
2885
2886         return r;
2887 }
2888
2889 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2890                                            struct kvm_one_reg *reg)
2891 {
2892         int r = -EINVAL;
2893         __u64 val;
2894
2895         switch (reg->id) {
2896         case KVM_REG_S390_TODPR:
2897                 r = get_user(vcpu->arch.sie_block->todpr,
2898                              (u32 __user *)reg->addr);
2899                 break;
2900         case KVM_REG_S390_EPOCHDIFF:
2901                 r = get_user(vcpu->arch.sie_block->epoch,
2902                              (u64 __user *)reg->addr);
2903                 break;
2904         case KVM_REG_S390_CPU_TIMER:
2905                 r = get_user(val, (u64 __user *)reg->addr);
2906                 if (!r)
2907                         kvm_s390_set_cpu_timer(vcpu, val);
2908                 break;
2909         case KVM_REG_S390_CLOCK_COMP:
2910                 r = get_user(vcpu->arch.sie_block->ckc,
2911                              (u64 __user *)reg->addr);
2912                 break;
2913         case KVM_REG_S390_PFTOKEN:
2914                 r = get_user(vcpu->arch.pfault_token,
2915                              (u64 __user *)reg->addr);
2916                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2917                         kvm_clear_async_pf_completion_queue(vcpu);
2918                 break;
2919         case KVM_REG_S390_PFCOMPARE:
2920                 r = get_user(vcpu->arch.pfault_compare,
2921                              (u64 __user *)reg->addr);
2922                 break;
2923         case KVM_REG_S390_PFSELECT:
2924                 r = get_user(vcpu->arch.pfault_select,
2925                              (u64 __user *)reg->addr);
2926                 break;
2927         case KVM_REG_S390_PP:
2928                 r = get_user(vcpu->arch.sie_block->pp,
2929                              (u64 __user *)reg->addr);
2930                 break;
2931         case KVM_REG_S390_GBEA:
2932                 r = get_user(vcpu->arch.sie_block->gbea,
2933                              (u64 __user *)reg->addr);
2934                 break;
2935         default:
2936                 break;
2937         }
2938
2939         return r;
2940 }
2941
2942 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2943 {
2944         kvm_s390_vcpu_initial_reset(vcpu);
2945         return 0;
2946 }
2947
2948 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2949 {
2950         vcpu_load(vcpu);
2951         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2952         vcpu_put(vcpu);
2953         return 0;
2954 }
2955
2956 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2957 {
2958         vcpu_load(vcpu);
2959         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2960         vcpu_put(vcpu);
2961         return 0;
2962 }
2963
2964 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2965                                   struct kvm_sregs *sregs)
2966 {
2967         vcpu_load(vcpu);
2968
2969         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2970         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2971
2972         vcpu_put(vcpu);
2973         return 0;
2974 }
2975
2976 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2977                                   struct kvm_sregs *sregs)
2978 {
2979         vcpu_load(vcpu);
2980
2981         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2982         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2983
2984         vcpu_put(vcpu);
2985         return 0;
2986 }
2987
2988 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2989 {
2990         int ret = 0;
2991
2992         vcpu_load(vcpu);
2993
2994         if (test_fp_ctl(fpu->fpc)) {
2995                 ret = -EINVAL;
2996                 goto out;
2997         }
2998         vcpu->run->s.regs.fpc = fpu->fpc;
2999         if (MACHINE_HAS_VX)
3000                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3001                                  (freg_t *) fpu->fprs);
3002         else
3003                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3004
3005 out:
3006         vcpu_put(vcpu);
3007         return ret;
3008 }
3009
3010 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3011 {
3012         vcpu_load(vcpu);
3013
3014         /* make sure we have the latest values */
3015         save_fpu_regs();
3016         if (MACHINE_HAS_VX)
3017                 convert_vx_to_fp((freg_t *) fpu->fprs,
3018                                  (__vector128 *) vcpu->run->s.regs.vrs);
3019         else
3020                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3021         fpu->fpc = vcpu->run->s.regs.fpc;
3022
3023         vcpu_put(vcpu);
3024         return 0;
3025 }
3026
3027 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3028 {
3029         int rc = 0;
3030
3031         if (!is_vcpu_stopped(vcpu))
3032                 rc = -EBUSY;
3033         else {
3034                 vcpu->run->psw_mask = psw.mask;
3035                 vcpu->run->psw_addr = psw.addr;
3036         }
3037         return rc;
3038 }
3039
3040 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3041                                   struct kvm_translation *tr)
3042 {
3043         return -EINVAL; /* not implemented yet */
3044 }
3045
3046 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3047                               KVM_GUESTDBG_USE_HW_BP | \
3048                               KVM_GUESTDBG_ENABLE)
3049
3050 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3051                                         struct kvm_guest_debug *dbg)
3052 {
3053         int rc = 0;
3054
3055         vcpu_load(vcpu);
3056
3057         vcpu->guest_debug = 0;
3058         kvm_s390_clear_bp_data(vcpu);
3059
3060         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3061                 rc = -EINVAL;
3062                 goto out;
3063         }
3064         if (!sclp.has_gpere) {
3065                 rc = -EINVAL;
3066                 goto out;
3067         }
3068
3069         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3070                 vcpu->guest_debug = dbg->control;
3071                 /* enforce guest PER */
3072                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3073
3074                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3075                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3076         } else {
3077                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3078                 vcpu->arch.guestdbg.last_bp = 0;
3079         }
3080
3081         if (rc) {
3082                 vcpu->guest_debug = 0;
3083                 kvm_s390_clear_bp_data(vcpu);
3084                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3085         }
3086
3087 out:
3088         vcpu_put(vcpu);
3089         return rc;
3090 }
3091
3092 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3093                                     struct kvm_mp_state *mp_state)
3094 {
3095         int ret;
3096
3097         vcpu_load(vcpu);
3098
3099         /* CHECK_STOP and LOAD are not supported yet */
3100         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3101                                       KVM_MP_STATE_OPERATING;
3102
3103         vcpu_put(vcpu);
3104         return ret;
3105 }
3106
3107 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3108                                     struct kvm_mp_state *mp_state)
3109 {
3110         int rc = 0;
3111
3112         vcpu_load(vcpu);
3113
3114         /* user space knows about this interface - let it control the state */
3115         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3116
3117         switch (mp_state->mp_state) {
3118         case KVM_MP_STATE_STOPPED:
3119                 kvm_s390_vcpu_stop(vcpu);
3120                 break;
3121         case KVM_MP_STATE_OPERATING:
3122                 kvm_s390_vcpu_start(vcpu);
3123                 break;
3124         case KVM_MP_STATE_LOAD:
3125         case KVM_MP_STATE_CHECK_STOP:
3126                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3127         default:
3128                 rc = -ENXIO;
3129         }
3130
3131         vcpu_put(vcpu);
3132         return rc;
3133 }
3134
3135 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3136 {
3137         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3138 }
3139
3140 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3141 {
3142 retry:
3143         kvm_s390_vcpu_request_handled(vcpu);
3144         if (!kvm_request_pending(vcpu))
3145                 return 0;
3146         /*
3147          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3148          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3149          * This ensures that the ipte instruction for this request has
3150          * already finished. We might race against a second unmapper that
3151          * wants to set the blocking bit. Lets just retry the request loop.
3152          */
3153         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3154                 int rc;
3155                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3156                                           kvm_s390_get_prefix(vcpu),
3157                                           PAGE_SIZE * 2, PROT_WRITE);
3158                 if (rc) {
3159                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3160                         return rc;
3161                 }
3162                 goto retry;
3163         }
3164
3165         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3166                 vcpu->arch.sie_block->ihcpu = 0xffff;
3167                 goto retry;
3168         }
3169
3170         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3171                 if (!ibs_enabled(vcpu)) {
3172                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3173                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3174                 }
3175                 goto retry;
3176         }
3177
3178         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3179                 if (ibs_enabled(vcpu)) {
3180                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3181                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3182                 }
3183                 goto retry;
3184         }
3185
3186         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3187                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3188                 goto retry;
3189         }
3190
3191         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3192                 /*
3193                  * Disable CMM virtualization; we will emulate the ESSA
3194                  * instruction manually, in order to provide additional
3195                  * functionalities needed for live migration.
3196                  */
3197                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3198                 goto retry;
3199         }
3200
3201         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3202                 /*
3203                  * Re-enable CMM virtualization if CMMA is available and
3204                  * CMM has been used.
3205                  */
3206                 if ((vcpu->kvm->arch.use_cmma) &&
3207                     (vcpu->kvm->mm->context.uses_cmm))
3208                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3209                 goto retry;
3210         }
3211
3212         /* nothing to do, just clear the request */
3213         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3214
3215         return 0;
3216 }
3217
3218 void kvm_s390_set_tod_clock(struct kvm *kvm,
3219                             const struct kvm_s390_vm_tod_clock *gtod)
3220 {
3221         struct kvm_vcpu *vcpu;
3222         struct kvm_s390_tod_clock_ext htod;
3223         int i;
3224
3225         mutex_lock(&kvm->lock);
3226         preempt_disable();
3227
3228         get_tod_clock_ext((char *)&htod);
3229
3230         kvm->arch.epoch = gtod->tod - htod.tod;
3231         kvm->arch.epdx = 0;
3232         if (test_kvm_facility(kvm, 139)) {
3233                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3234                 if (kvm->arch.epoch > gtod->tod)
3235                         kvm->arch.epdx -= 1;
3236         }
3237
3238         kvm_s390_vcpu_block_all(kvm);
3239         kvm_for_each_vcpu(i, vcpu, kvm) {
3240                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3241                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3242         }
3243
3244         kvm_s390_vcpu_unblock_all(kvm);
3245         preempt_enable();
3246         mutex_unlock(&kvm->lock);
3247 }
3248
3249 /**
3250  * kvm_arch_fault_in_page - fault-in guest page if necessary
3251  * @vcpu: The corresponding virtual cpu
3252  * @gpa: Guest physical address
3253  * @writable: Whether the page should be writable or not
3254  *
3255  * Make sure that a guest page has been faulted-in on the host.
3256  *
3257  * Return: Zero on success, negative error code otherwise.
3258  */
3259 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3260 {
3261         return gmap_fault(vcpu->arch.gmap, gpa,
3262                           writable ? FAULT_FLAG_WRITE : 0);
3263 }
3264
3265 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3266                                       unsigned long token)
3267 {
3268         struct kvm_s390_interrupt inti;
3269         struct kvm_s390_irq irq;
3270
3271         if (start_token) {
3272                 irq.u.ext.ext_params2 = token;
3273                 irq.type = KVM_S390_INT_PFAULT_INIT;
3274                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3275         } else {
3276                 inti.type = KVM_S390_INT_PFAULT_DONE;
3277                 inti.parm64 = token;
3278                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3279         }
3280 }
3281
3282 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3283                                      struct kvm_async_pf *work)
3284 {
3285         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3286         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3287 }
3288
3289 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3290                                  struct kvm_async_pf *work)
3291 {
3292         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3293         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3294 }
3295
3296 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3297                                struct kvm_async_pf *work)
3298 {
3299         /* s390 will always inject the page directly */
3300 }
3301
3302 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3303 {
3304         /*
3305          * s390 will always inject the page directly,
3306          * but we still want check_async_completion to cleanup
3307          */
3308         return true;
3309 }
3310
3311 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3312 {
3313         hva_t hva;
3314         struct kvm_arch_async_pf arch;
3315         int rc;
3316
3317         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3318                 return 0;
3319         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3320             vcpu->arch.pfault_compare)
3321                 return 0;
3322         if (psw_extint_disabled(vcpu))
3323                 return 0;
3324         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3325                 return 0;
3326         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3327                 return 0;
3328         if (!vcpu->arch.gmap->pfault_enabled)
3329                 return 0;
3330
3331         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3332         hva += current->thread.gmap_addr & ~PAGE_MASK;
3333         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3334                 return 0;
3335
3336         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3337         return rc;
3338 }
3339
3340 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3341 {
3342         int rc, cpuflags;
3343
3344         /*
3345          * On s390 notifications for arriving pages will be delivered directly
3346          * to the guest but the house keeping for completed pfaults is
3347          * handled outside the worker.
3348          */
3349         kvm_check_async_pf_completion(vcpu);
3350
3351         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3352         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3353
3354         if (need_resched())
3355                 schedule();
3356
3357         if (test_cpu_flag(CIF_MCCK_PENDING))
3358                 s390_handle_mcck();
3359
3360         if (!kvm_is_ucontrol(vcpu->kvm)) {
3361                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3362                 if (rc)
3363                         return rc;
3364         }
3365
3366         rc = kvm_s390_handle_requests(vcpu);
3367         if (rc)
3368                 return rc;
3369
3370         if (guestdbg_enabled(vcpu)) {
3371                 kvm_s390_backup_guest_per_regs(vcpu);
3372                 kvm_s390_patch_guest_per_regs(vcpu);
3373         }
3374
3375         vcpu->arch.sie_block->icptcode = 0;
3376         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3377         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3378         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3379
3380         return 0;
3381 }
3382
3383 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3384 {
3385         struct kvm_s390_pgm_info pgm_info = {
3386                 .code = PGM_ADDRESSING,
3387         };
3388         u8 opcode, ilen;
3389         int rc;
3390
3391         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3392         trace_kvm_s390_sie_fault(vcpu);
3393
3394         /*
3395          * We want to inject an addressing exception, which is defined as a
3396          * suppressing or terminating exception. However, since we came here
3397          * by a DAT access exception, the PSW still points to the faulting
3398          * instruction since DAT exceptions are nullifying. So we've got
3399          * to look up the current opcode to get the length of the instruction
3400          * to be able to forward the PSW.
3401          */
3402         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3403         ilen = insn_length(opcode);
3404         if (rc < 0) {
3405                 return rc;
3406         } else if (rc) {
3407                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3408                  * Forward by arbitrary ilc, injection will take care of
3409                  * nullification if necessary.
3410                  */
3411                 pgm_info = vcpu->arch.pgm;
3412                 ilen = 4;
3413         }
3414         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3415         kvm_s390_forward_psw(vcpu, ilen);
3416         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3417 }
3418
3419 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3420 {
3421         struct mcck_volatile_info *mcck_info;
3422         struct sie_page *sie_page;
3423
3424         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3425                    vcpu->arch.sie_block->icptcode);
3426         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3427
3428         if (guestdbg_enabled(vcpu))
3429                 kvm_s390_restore_guest_per_regs(vcpu);
3430
3431         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3432         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3433
3434         if (exit_reason == -EINTR) {
3435                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3436                 sie_page = container_of(vcpu->arch.sie_block,
3437                                         struct sie_page, sie_block);
3438                 mcck_info = &sie_page->mcck_info;
3439                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3440                 return 0;
3441         }
3442
3443         if (vcpu->arch.sie_block->icptcode > 0) {
3444                 int rc = kvm_handle_sie_intercept(vcpu);
3445
3446                 if (rc != -EOPNOTSUPP)
3447                         return rc;
3448                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3449                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3450                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3451                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3452                 return -EREMOTE;
3453         } else if (exit_reason != -EFAULT) {
3454                 vcpu->stat.exit_null++;
3455                 return 0;
3456         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3457                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3458                 vcpu->run->s390_ucontrol.trans_exc_code =
3459                                                 current->thread.gmap_addr;
3460                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3461                 return -EREMOTE;
3462         } else if (current->thread.gmap_pfault) {
3463                 trace_kvm_s390_major_guest_pfault(vcpu);
3464                 current->thread.gmap_pfault = 0;
3465                 if (kvm_arch_setup_async_pf(vcpu))
3466                         return 0;
3467                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3468         }
3469         return vcpu_post_run_fault_in_sie(vcpu);
3470 }
3471
3472 static int __vcpu_run(struct kvm_vcpu *vcpu)
3473 {
3474         int rc, exit_reason;
3475
3476         /*
3477          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3478          * ning the guest), so that memslots (and other stuff) are protected
3479          */
3480         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3481
3482         do {
3483                 rc = vcpu_pre_run(vcpu);
3484                 if (rc)
3485                         break;
3486
3487                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3488                 /*
3489                  * As PF_VCPU will be used in fault handler, between
3490                  * guest_enter and guest_exit should be no uaccess.
3491                  */
3492                 local_irq_disable();
3493                 guest_enter_irqoff();
3494                 __disable_cpu_timer_accounting(vcpu);
3495                 local_irq_enable();
3496                 exit_reason = sie64a(vcpu->arch.sie_block,
3497                                      vcpu->run->s.regs.gprs);
3498                 local_irq_disable();
3499                 __enable_cpu_timer_accounting(vcpu);
3500                 guest_exit_irqoff();
3501                 local_irq_enable();
3502                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3503
3504                 rc = vcpu_post_run(vcpu, exit_reason);
3505         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3506
3507         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3508         return rc;
3509 }
3510
3511 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3512 {
3513         struct runtime_instr_cb *riccb;
3514         struct gs_cb *gscb;
3515
3516         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3517         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3518         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3519         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3520         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3521                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3522         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3523                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3524                 /* some control register changes require a tlb flush */
3525                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3526         }
3527         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3528                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3529                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3530                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3531                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3532                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3533         }
3534         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3535                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3536                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3537                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3538                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3539                         kvm_clear_async_pf_completion_queue(vcpu);
3540         }
3541         /*
3542          * If userspace sets the riccb (e.g. after migration) to a valid state,
3543          * we should enable RI here instead of doing the lazy enablement.
3544          */
3545         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3546             test_kvm_facility(vcpu->kvm, 64) &&
3547             riccb->v &&
3548             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3549                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3550                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3551         }
3552         /*
3553          * If userspace sets the gscb (e.g. after migration) to non-zero,
3554          * we should enable GS here instead of doing the lazy enablement.
3555          */
3556         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3557             test_kvm_facility(vcpu->kvm, 133) &&
3558             gscb->gssm &&
3559             !vcpu->arch.gs_enabled) {
3560                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3561                 vcpu->arch.sie_block->ecb |= ECB_GS;
3562                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3563                 vcpu->arch.gs_enabled = 1;
3564         }
3565         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3566             test_kvm_facility(vcpu->kvm, 82)) {
3567                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3568                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3569         }
3570         save_access_regs(vcpu->arch.host_acrs);
3571         restore_access_regs(vcpu->run->s.regs.acrs);
3572         /* save host (userspace) fprs/vrs */
3573         save_fpu_regs();
3574         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3575         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3576         if (MACHINE_HAS_VX)
3577                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3578         else
3579                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3580         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3581         if (test_fp_ctl(current->thread.fpu.fpc))
3582                 /* User space provided an invalid FPC, let's clear it */
3583                 current->thread.fpu.fpc = 0;
3584         if (MACHINE_HAS_GS) {
3585                 preempt_disable();
3586                 __ctl_set_bit(2, 4);
3587                 if (current->thread.gs_cb) {
3588                         vcpu->arch.host_gscb = current->thread.gs_cb;
3589                         save_gs_cb(vcpu->arch.host_gscb);
3590                 }
3591                 if (vcpu->arch.gs_enabled) {
3592                         current->thread.gs_cb = (struct gs_cb *)
3593                                                 &vcpu->run->s.regs.gscb;
3594                         restore_gs_cb(current->thread.gs_cb);
3595                 }
3596                 preempt_enable();
3597         }
3598         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3599
3600         kvm_run->kvm_dirty_regs = 0;
3601 }
3602
3603 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3604 {
3605         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3606         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3607         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3608         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3609         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3610         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3611         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3612         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3613         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3614         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3615         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3616         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3617         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3618         save_access_regs(vcpu->run->s.regs.acrs);
3619         restore_access_regs(vcpu->arch.host_acrs);
3620         /* Save guest register state */
3621         save_fpu_regs();
3622         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3623         /* Restore will be done lazily at return */
3624         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3625         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3626         if (MACHINE_HAS_GS) {
3627                 preempt_disable();
3628                 __ctl_set_bit(2, 4);
3629                 if (vcpu->arch.gs_enabled)
3630                         save_gs_cb(current->thread.gs_cb);
3631                 current->thread.gs_cb = vcpu->arch.host_gscb;
3632                 restore_gs_cb(vcpu->arch.host_gscb);
3633                 if (!vcpu->arch.host_gscb)
3634                         __ctl_clear_bit(2, 4);
3635                 vcpu->arch.host_gscb = NULL;
3636                 preempt_enable();
3637         }
3638         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3639 }
3640
3641 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3642 {
3643         int rc;
3644
3645         if (kvm_run->immediate_exit)
3646                 return -EINTR;
3647
3648         vcpu_load(vcpu);
3649
3650         if (guestdbg_exit_pending(vcpu)) {
3651                 kvm_s390_prepare_debug_exit(vcpu);
3652                 rc = 0;
3653                 goto out;
3654         }
3655
3656         kvm_sigset_activate(vcpu);
3657
3658         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3659                 kvm_s390_vcpu_start(vcpu);
3660         } else if (is_vcpu_stopped(vcpu)) {
3661                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3662                                    vcpu->vcpu_id);
3663                 rc = -EINVAL;
3664                 goto out;
3665         }
3666
3667         sync_regs(vcpu, kvm_run);
3668         enable_cpu_timer_accounting(vcpu);
3669
3670         might_fault();
3671         rc = __vcpu_run(vcpu);
3672
3673         if (signal_pending(current) && !rc) {
3674                 kvm_run->exit_reason = KVM_EXIT_INTR;
3675                 rc = -EINTR;
3676         }
3677
3678         if (guestdbg_exit_pending(vcpu) && !rc)  {
3679                 kvm_s390_prepare_debug_exit(vcpu);
3680                 rc = 0;
3681         }
3682
3683         if (rc == -EREMOTE) {
3684                 /* userspace support is needed, kvm_run has been prepared */
3685                 rc = 0;
3686         }
3687
3688         disable_cpu_timer_accounting(vcpu);
3689         store_regs(vcpu, kvm_run);
3690
3691         kvm_sigset_deactivate(vcpu);
3692
3693         vcpu->stat.exit_userspace++;
3694 out:
3695         vcpu_put(vcpu);
3696         return rc;
3697 }
3698
3699 /*
3700  * store status at address
3701  * we use have two special cases:
3702  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3703  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3704  */
3705 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3706 {
3707         unsigned char archmode = 1;
3708         freg_t fprs[NUM_FPRS];
3709         unsigned int px;
3710         u64 clkcomp, cputm;
3711         int rc;
3712
3713         px = kvm_s390_get_prefix(vcpu);
3714         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3715                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3716                         return -EFAULT;
3717                 gpa = 0;
3718         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3719                 if (write_guest_real(vcpu, 163, &archmode, 1))
3720                         return -EFAULT;
3721                 gpa = px;
3722         } else
3723                 gpa -= __LC_FPREGS_SAVE_AREA;
3724
3725         /* manually convert vector registers if necessary */
3726         if (MACHINE_HAS_VX) {
3727                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3728                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3729                                      fprs, 128);
3730         } else {
3731                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3732                                      vcpu->run->s.regs.fprs, 128);
3733         }
3734         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3735                               vcpu->run->s.regs.gprs, 128);
3736         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3737                               &vcpu->arch.sie_block->gpsw, 16);
3738         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3739                               &px, 4);
3740         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3741                               &vcpu->run->s.regs.fpc, 4);
3742         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3743                               &vcpu->arch.sie_block->todpr, 4);
3744         cputm = kvm_s390_get_cpu_timer(vcpu);
3745         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3746                               &cputm, 8);
3747         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3748         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3749                               &clkcomp, 8);
3750         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3751                               &vcpu->run->s.regs.acrs, 64);
3752         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3753                               &vcpu->arch.sie_block->gcr, 128);
3754         return rc ? -EFAULT : 0;
3755 }
3756
3757 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3758 {
3759         /*
3760          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3761          * switch in the run ioctl. Let's update our copies before we save
3762          * it into the save area
3763          */
3764         save_fpu_regs();
3765         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3766         save_access_regs(vcpu->run->s.regs.acrs);
3767
3768         return kvm_s390_store_status_unloaded(vcpu, addr);
3769 }
3770
3771 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3772 {
3773         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3774         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3775 }
3776
3777 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3778 {
3779         unsigned int i;
3780         struct kvm_vcpu *vcpu;
3781
3782         kvm_for_each_vcpu(i, vcpu, kvm) {
3783                 __disable_ibs_on_vcpu(vcpu);
3784         }
3785 }
3786
3787 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3788 {
3789         if (!sclp.has_ibs)
3790                 return;
3791         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3792         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3793 }
3794
3795 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3796 {
3797         int i, online_vcpus, started_vcpus = 0;
3798
3799         if (!is_vcpu_stopped(vcpu))
3800                 return;
3801
3802         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3803         /* Only one cpu at a time may enter/leave the STOPPED state. */
3804         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3805         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3806
3807         for (i = 0; i < online_vcpus; i++) {
3808                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3809                         started_vcpus++;
3810         }
3811
3812         if (started_vcpus == 0) {
3813                 /* we're the only active VCPU -> speed it up */
3814                 __enable_ibs_on_vcpu(vcpu);
3815         } else if (started_vcpus == 1) {
3816                 /*
3817                  * As we are starting a second VCPU, we have to disable
3818                  * the IBS facility on all VCPUs to remove potentially
3819                  * oustanding ENABLE requests.
3820                  */
3821                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3822         }
3823
3824         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3825         /*
3826          * Another VCPU might have used IBS while we were offline.
3827          * Let's play safe and flush the VCPU at startup.
3828          */
3829         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3830         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3831         return;
3832 }
3833
3834 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3835 {
3836         int i, online_vcpus, started_vcpus = 0;
3837         struct kvm_vcpu *started_vcpu = NULL;
3838
3839         if (is_vcpu_stopped(vcpu))
3840                 return;
3841
3842         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3843         /* Only one cpu at a time may enter/leave the STOPPED state. */
3844         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3845         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3846
3847         /*
3848          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
3849          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
3850          * have been fully processed. This will ensure that the VCPU
3851          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
3852          */
3853         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3854         kvm_s390_clear_stop_irq(vcpu);
3855
3856         __disable_ibs_on_vcpu(vcpu);
3857
3858         for (i = 0; i < online_vcpus; i++) {
3859                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3860                         started_vcpus++;
3861                         started_vcpu = vcpu->kvm->vcpus[i];
3862                 }
3863         }
3864
3865         if (started_vcpus == 1) {
3866                 /*
3867                  * As we only have one VCPU left, we want to enable the
3868                  * IBS facility for that VCPU to speed it up.
3869                  */
3870                 __enable_ibs_on_vcpu(started_vcpu);
3871         }
3872
3873         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3874         return;
3875 }
3876
3877 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3878                                      struct kvm_enable_cap *cap)
3879 {
3880         int r;
3881
3882         if (cap->flags)
3883                 return -EINVAL;
3884
3885         switch (cap->cap) {
3886         case KVM_CAP_S390_CSS_SUPPORT:
3887                 if (!vcpu->kvm->arch.css_support) {
3888                         vcpu->kvm->arch.css_support = 1;
3889                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3890                         trace_kvm_s390_enable_css(vcpu->kvm);
3891                 }
3892                 r = 0;
3893                 break;
3894         default:
3895                 r = -EINVAL;
3896                 break;
3897         }
3898         return r;
3899 }
3900
3901 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3902                                   struct kvm_s390_mem_op *mop)
3903 {
3904         void __user *uaddr = (void __user *)mop->buf;
3905         void *tmpbuf = NULL;
3906         int r, srcu_idx;
3907         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3908                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3909
3910         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
3911                 return -EINVAL;
3912
3913         if (mop->size > MEM_OP_MAX_SIZE)
3914                 return -E2BIG;
3915
3916         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3917                 tmpbuf = vmalloc(mop->size);
3918                 if (!tmpbuf)
3919                         return -ENOMEM;
3920         }
3921
3922         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3923
3924         switch (mop->op) {
3925         case KVM_S390_MEMOP_LOGICAL_READ:
3926                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3927                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3928                                             mop->size, GACC_FETCH);
3929                         break;
3930                 }
3931                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3932                 if (r == 0) {
3933                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3934                                 r = -EFAULT;
3935                 }
3936                 break;
3937         case KVM_S390_MEMOP_LOGICAL_WRITE:
3938                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3939                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3940                                             mop->size, GACC_STORE);
3941                         break;
3942                 }
3943                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3944                         r = -EFAULT;
3945                         break;
3946                 }
3947                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3948                 break;
3949         default:
3950                 r = -EINVAL;
3951         }
3952
3953         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3954
3955         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3956                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3957
3958         vfree(tmpbuf);
3959         return r;
3960 }
3961
3962 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3963                                unsigned int ioctl, unsigned long arg)
3964 {
3965         struct kvm_vcpu *vcpu = filp->private_data;
3966         void __user *argp = (void __user *)arg;
3967
3968         switch (ioctl) {
3969         case KVM_S390_IRQ: {
3970                 struct kvm_s390_irq s390irq;
3971
3972                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3973                         return -EFAULT;
3974                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3975         }
3976         case KVM_S390_INTERRUPT: {
3977                 struct kvm_s390_interrupt s390int;
3978                 struct kvm_s390_irq s390irq = {};
3979
3980                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3981                         return -EFAULT;
3982                 if (s390int_to_s390irq(&s390int, &s390irq))
3983                         return -EINVAL;
3984                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3985         }
3986         }
3987         return -ENOIOCTLCMD;
3988 }
3989
3990 long kvm_arch_vcpu_ioctl(struct file *filp,
3991                          unsigned int ioctl, unsigned long arg)
3992 {
3993         struct kvm_vcpu *vcpu = filp->private_data;
3994         void __user *argp = (void __user *)arg;
3995         int idx;
3996         long r;
3997
3998         vcpu_load(vcpu);
3999
4000         switch (ioctl) {
4001         case KVM_S390_STORE_STATUS:
4002                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4003                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4004                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4005                 break;
4006         case KVM_S390_SET_INITIAL_PSW: {
4007                 psw_t psw;
4008
4009                 r = -EFAULT;
4010                 if (copy_from_user(&psw, argp, sizeof(psw)))
4011                         break;
4012                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4013                 break;
4014         }
4015         case KVM_S390_INITIAL_RESET:
4016                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4017                 break;
4018         case KVM_SET_ONE_REG:
4019         case KVM_GET_ONE_REG: {
4020                 struct kvm_one_reg reg;
4021                 r = -EFAULT;
4022                 if (copy_from_user(&reg, argp, sizeof(reg)))
4023                         break;
4024                 if (ioctl == KVM_SET_ONE_REG)
4025                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4026                 else
4027                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4028                 break;
4029         }
4030 #ifdef CONFIG_KVM_S390_UCONTROL
4031         case KVM_S390_UCAS_MAP: {
4032                 struct kvm_s390_ucas_mapping ucasmap;
4033
4034                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4035                         r = -EFAULT;
4036                         break;
4037                 }
4038
4039                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4040                         r = -EINVAL;
4041                         break;
4042                 }
4043
4044                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4045                                      ucasmap.vcpu_addr, ucasmap.length);
4046                 break;
4047         }
4048         case KVM_S390_UCAS_UNMAP: {
4049                 struct kvm_s390_ucas_mapping ucasmap;
4050
4051                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4052                         r = -EFAULT;
4053                         break;
4054                 }
4055
4056                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4057                         r = -EINVAL;
4058                         break;
4059                 }
4060
4061                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4062                         ucasmap.length);
4063                 break;
4064         }
4065 #endif
4066         case KVM_S390_VCPU_FAULT: {
4067                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4068                 break;
4069         }
4070         case KVM_ENABLE_CAP:
4071         {
4072                 struct kvm_enable_cap cap;
4073                 r = -EFAULT;
4074                 if (copy_from_user(&cap, argp, sizeof(cap)))
4075                         break;
4076                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4077                 break;
4078         }
4079         case KVM_S390_MEM_OP: {
4080                 struct kvm_s390_mem_op mem_op;
4081
4082                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4083                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4084                 else
4085                         r = -EFAULT;
4086                 break;
4087         }
4088         case KVM_S390_SET_IRQ_STATE: {
4089                 struct kvm_s390_irq_state irq_state;
4090
4091                 r = -EFAULT;
4092                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4093                         break;
4094                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4095                     irq_state.len == 0 ||
4096                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4097                         r = -EINVAL;
4098                         break;
4099                 }
4100                 /* do not use irq_state.flags, it will break old QEMUs */
4101                 r = kvm_s390_set_irq_state(vcpu,
4102                                            (void __user *) irq_state.buf,
4103                                            irq_state.len);
4104                 break;
4105         }
4106         case KVM_S390_GET_IRQ_STATE: {
4107                 struct kvm_s390_irq_state irq_state;
4108
4109                 r = -EFAULT;
4110                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4111                         break;
4112                 if (irq_state.len == 0) {
4113                         r = -EINVAL;
4114                         break;
4115                 }
4116                 /* do not use irq_state.flags, it will break old QEMUs */
4117                 r = kvm_s390_get_irq_state(vcpu,
4118                                            (__u8 __user *)  irq_state.buf,
4119                                            irq_state.len);
4120                 break;
4121         }
4122         default:
4123                 r = -ENOTTY;
4124         }
4125
4126         vcpu_put(vcpu);
4127         return r;
4128 }
4129
4130 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4131 {
4132 #ifdef CONFIG_KVM_S390_UCONTROL
4133         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4134                  && (kvm_is_ucontrol(vcpu->kvm))) {
4135                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4136                 get_page(vmf->page);
4137                 return 0;
4138         }
4139 #endif
4140         return VM_FAULT_SIGBUS;
4141 }
4142
4143 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4144                             unsigned long npages)
4145 {
4146         return 0;
4147 }
4148
4149 /* Section: memory related */
4150 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4151                                    struct kvm_memory_slot *memslot,
4152                                    const struct kvm_userspace_memory_region *mem,
4153                                    enum kvm_mr_change change)
4154 {
4155         /* A few sanity checks. We can have memory slots which have to be
4156            located/ended at a segment boundary (1MB). The memory in userland is
4157            ok to be fragmented into various different vmas. It is okay to mmap()
4158            and munmap() stuff in this slot after doing this call at any time */
4159
4160         if (mem->userspace_addr & 0xffffful)
4161                 return -EINVAL;
4162
4163         if (mem->memory_size & 0xffffful)
4164                 return -EINVAL;
4165
4166         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4167                 return -EINVAL;
4168
4169         return 0;
4170 }
4171
4172 void kvm_arch_commit_memory_region(struct kvm *kvm,
4173                                 const struct kvm_userspace_memory_region *mem,
4174                                 const struct kvm_memory_slot *old,
4175                                 const struct kvm_memory_slot *new,
4176                                 enum kvm_mr_change change)
4177 {
4178         int rc = 0;
4179
4180         switch (change) {
4181         case KVM_MR_DELETE:
4182                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4183                                         old->npages * PAGE_SIZE);
4184                 break;
4185         case KVM_MR_MOVE:
4186                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4187                                         old->npages * PAGE_SIZE);
4188                 if (rc)
4189                         break;
4190                 /* FALLTHROUGH */
4191         case KVM_MR_CREATE:
4192                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4193                                       mem->guest_phys_addr, mem->memory_size);
4194                 break;
4195         case KVM_MR_FLAGS_ONLY:
4196                 break;
4197         default:
4198                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4199         }
4200         if (rc)
4201                 pr_warn("failed to commit memory region\n");
4202         return;
4203 }
4204
4205 static inline unsigned long nonhyp_mask(int i)
4206 {
4207         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4208
4209         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4210 }
4211
4212 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4213 {
4214         vcpu->valid_wakeup = false;
4215 }
4216
4217 static int __init kvm_s390_init(void)
4218 {
4219         int i;
4220
4221         if (!sclp.has_sief2) {
4222                 pr_info("SIE not available\n");
4223                 return -ENODEV;
4224         }
4225
4226         if (nested && hpage) {
4227                 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4228                 return -EINVAL;
4229         }
4230
4231         for (i = 0; i < 16; i++)
4232                 kvm_s390_fac_base[i] |=
4233                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4234
4235         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4236 }
4237
4238 static void __exit kvm_s390_exit(void)
4239 {
4240         kvm_exit();
4241 }
4242
4243 module_init(kvm_s390_init);
4244 module_exit(kvm_s390_exit);
4245
4246 /*
4247  * Enable autoloading of the kvm module.
4248  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4249  * since x86 takes a different approach.
4250  */
4251 #include <linux/miscdevice.h>
4252 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4253 MODULE_ALIAS("devname:kvm");