s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include "kvm-s390.h"
  47 #include "gaccess.h"
  48
  49 #define KMSG_COMPONENT "kvm-s390"
  50 #undef pr_fmt
  51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  52
  53 #define CREATE_TRACE_POINTS
  54 #include "trace.h"
  55 #include "trace-s390.h"
  56
  57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  58 #define LOCAL_IRQS 32
  59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  61
  62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  63
  64 struct kvm_stats_debugfs_item debugfs_entries[] = {
  65         { "userspace_handled", VCPU_STAT(exit_userspace) },
  66         { "exit_null", VCPU_STAT(exit_null) },
  67         { "exit_validity", VCPU_STAT(exit_validity) },
  68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  69         { "exit_external_request", VCPU_STAT(exit_external_request) },
  70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  71         { "exit_instruction", VCPU_STAT(exit_instruction) },
  72         { "exit_pei", VCPU_STAT(exit_pei) },
  73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  95         { "instruction_spx", VCPU_STAT(instruction_spx) },
  96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  97         { "instruction_stap", VCPU_STAT(instruction_stap) },
  98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
 101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
 102         { "instruction_essa", VCPU_STAT(instruction_essa) },
 103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 107         { "instruction_sie", VCPU_STAT(instruction_sie) },
 108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 124         { "diagnose_10", VCPU_STAT(diagnose_10) },
 125         { "diagnose_44", VCPU_STAT(diagnose_44) },
 126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 127         { "diagnose_258", VCPU_STAT(diagnose_258) },
 128         { "diagnose_308", VCPU_STAT(diagnose_308) },
 129         { "diagnose_500", VCPU_STAT(diagnose_500) },
 130         { NULL }
 131 };
 132
 133 struct kvm_s390_tod_clock_ext {
 134         __u8 epoch_idx;
 135         __u64 tod;
 136         __u8 reserved[7];
 137 } __packed;
 138
 139 /* allow nested virtualization in KVM (if enabled by user space) */
 140 static int nested;
 141 module_param(nested, int, S_IRUGO);
 142 MODULE_PARM_DESC(nested, "Nested virtualization support");
 143
 144 /* upper facilities limit for kvm */
 145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 146
 147 unsigned long kvm_s390_fac_list_mask_size(void)
 148 {
 149         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 150         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 151 }
 152
 153 /* available cpu features supported by kvm */
 154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 155 /* available subfunctions indicated via query / "test bit" */
 156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 157
 158 static struct gmap_notifier gmap_notifier;
 159 static struct gmap_notifier vsie_gmap_notifier;
 160 debug_info_t *kvm_s390_dbf;
 161
 162 /* Section: not file related */
 163 int kvm_arch_hardware_enable(void)
 164 {
 165         /* every s390 is virtualization enabled ;-) */
 166         return 0;
 167 }
 168
 169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 170                               unsigned long end);
 171
 172 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 173 {
 174         u8 delta_idx = 0;
 175
 176         /*
 177          * The TOD jumps by delta, we have to compensate this by adding
 178          * -delta to the epoch.
 179          */
 180         delta = -delta;
 181
 182         /* sign-extension - we're adding to signed values below */
 183         if ((s64)delta < 0)
 184                 delta_idx = -1;
 185
 186         scb->epoch += delta;
 187         if (scb->ecd & ECD_MEF) {
 188                 scb->epdx += delta_idx;
 189                 if (scb->epoch < delta)
 190                         scb->epdx += 1;
 191         }
 192 }
 193
 194 /*
 195  * This callback is executed during stop_machine(). All CPUs are therefore
 196  * temporarily stopped. In order not to change guest behavior, we have to
 197  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 198  * so a CPU won't be stopped while calculating with the epoch.
 199  */
 200 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 201                           void *v)
 202 {
 203         struct kvm *kvm;
 204         struct kvm_vcpu *vcpu;
 205         int i;
 206         unsigned long long *delta = v;
 207
 208         list_for_each_entry(kvm, &vm_list, vm_list) {
 209                 kvm_for_each_vcpu(i, vcpu, kvm) {
 210                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 211                         if (i == 0) {
 212                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 213                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 214                         }
 215                         if (vcpu->arch.cputm_enabled)
 216                                 vcpu->arch.cputm_start += *delta;
 217                         if (vcpu->arch.vsie_block)
 218                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 219                                                    *delta);
 220                 }
 221         }
 222         return NOTIFY_OK;
 223 }
 224
 225 static struct notifier_block kvm_clock_notifier = {
 226         .notifier_call = kvm_clock_sync,
 227 };
 228
 229 int kvm_arch_hardware_setup(void)
 230 {
 231         gmap_notifier.notifier_call = kvm_gmap_notifier;
 232         gmap_register_pte_notifier(&gmap_notifier);
 233         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 234         gmap_register_pte_notifier(&vsie_gmap_notifier);
 235         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 236                                        &kvm_clock_notifier);
 237         return 0;
 238 }
 239
 240 void kvm_arch_hardware_unsetup(void)
 241 {
 242         gmap_unregister_pte_notifier(&gmap_notifier);
 243         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 244         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 245                                          &kvm_clock_notifier);
 246 }
 247
 248 static void allow_cpu_feat(unsigned long nr)
 249 {
 250         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 251 }
 252
 253 static inline int plo_test_bit(unsigned char nr)
 254 {
 255         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 256         int cc;
 257
 258         asm volatile(
 259                 /* Parameter registers are ignored for "test bit" */
 260                 "       plo     0,0,0,0(0)\n"
 261                 "       ipm     %0\n"
 262                 "       srl     %0,28\n"
 263                 : "=d" (cc)
 264                 : "d" (r0)
 265                 : "cc");
 266         return cc == 0;
 267 }
 268
 269 static void kvm_s390_cpu_feat_init(void)
 270 {
 271         int i;
 272
 273         for (i = 0; i < 256; ++i) {
 274                 if (plo_test_bit(i))
 275                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 276         }
 277
 278         if (test_facility(28)) /* TOD-clock steering */
 279                 ptff(kvm_s390_available_subfunc.ptff,
 280                      sizeof(kvm_s390_available_subfunc.ptff),
 281                      PTFF_QAF);
 282
 283         if (test_facility(17)) { /* MSA */
 284                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 285                               kvm_s390_available_subfunc.kmac);
 286                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 287                               kvm_s390_available_subfunc.kmc);
 288                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 289                               kvm_s390_available_subfunc.km);
 290                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 291                               kvm_s390_available_subfunc.kimd);
 292                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 293                               kvm_s390_available_subfunc.klmd);
 294         }
 295         if (test_facility(76)) /* MSA3 */
 296                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 297                               kvm_s390_available_subfunc.pckmo);
 298         if (test_facility(77)) { /* MSA4 */
 299                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 300                               kvm_s390_available_subfunc.kmctr);
 301                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 302                               kvm_s390_available_subfunc.kmf);
 303                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 304                               kvm_s390_available_subfunc.kmo);
 305                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 306                               kvm_s390_available_subfunc.pcc);
 307         }
 308         if (test_facility(57)) /* MSA5 */
 309                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 310                               kvm_s390_available_subfunc.ppno);
 311
 312         if (test_facility(146)) /* MSA8 */
 313                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 314                               kvm_s390_available_subfunc.kma);
 315
 316         if (MACHINE_HAS_ESOP)
 317                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 318         /*
 319          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 320          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 321          */
 322         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 323             !test_facility(3) || !nested)
 324                 return;
 325         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 326         if (sclp.has_64bscao)
 327                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 328         if (sclp.has_siif)
 329                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 330         if (sclp.has_gpere)
 331                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 332         if (sclp.has_gsls)
 333                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 334         if (sclp.has_ib)
 335                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 336         if (sclp.has_cei)
 337                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 338         if (sclp.has_ibs)
 339                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 340         if (sclp.has_kss)
 341                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 342         /*
 343          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 344          * all skey handling functions read/set the skey from the PGSTE
 345          * instead of the real storage key.
 346          *
 347          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 348          * pages being detected as preserved although they are resident.
 349          *
 350          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 351          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 352          *
 353          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 354          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 355          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 356          *
 357          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 358          * cannot easily shadow the SCA because of the ipte lock.
 359          */
 360 }
 361
 362 int kvm_arch_init(void *opaque)
 363 {
 364         int rc;
 365
 366         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 367         if (!kvm_s390_dbf)
 368                 return -ENOMEM;
 369
 370         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 371                 rc = -ENOMEM;
 372                 goto out_debug_unreg;
 373         }
 374
 375         kvm_s390_cpu_feat_init();
 376
 377         /* Register floating interrupt controller interface. */
 378         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 379         if (rc) {
 380                 pr_err("Failed to register FLIC rc=%d\n", rc);
 381                 goto out_debug_unreg;
 382         }
 383         return 0;
 384
 385 out_debug_unreg:
 386         debug_unregister(kvm_s390_dbf);
 387         return rc;
 388 }
 389
 390 void kvm_arch_exit(void)
 391 {
 392         debug_unregister(kvm_s390_dbf);
 393 }
 394
 395 /* Section: device related */
 396 long kvm_arch_dev_ioctl(struct file *filp,
 397                         unsigned int ioctl, unsigned long arg)
 398 {
 399         if (ioctl == KVM_S390_ENABLE_SIE)
 400                 return s390_enable_sie();
 401         return -EINVAL;
 402 }
 403
 404 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 405 {
 406         int r;
 407
 408         switch (ext) {
 409         case KVM_CAP_S390_PSW:
 410         case KVM_CAP_S390_GMAP:
 411         case KVM_CAP_SYNC_MMU:
 412 #ifdef CONFIG_KVM_S390_UCONTROL
 413         case KVM_CAP_S390_UCONTROL:
 414 #endif
 415         case KVM_CAP_ASYNC_PF:
 416         case KVM_CAP_SYNC_REGS:
 417         case KVM_CAP_ONE_REG:
 418         case KVM_CAP_ENABLE_CAP:
 419         case KVM_CAP_S390_CSS_SUPPORT:
 420         case KVM_CAP_IOEVENTFD:
 421         case KVM_CAP_DEVICE_CTRL:
 422         case KVM_CAP_ENABLE_CAP_VM:
 423         case KVM_CAP_S390_IRQCHIP:
 424         case KVM_CAP_VM_ATTRIBUTES:
 425         case KVM_CAP_MP_STATE:
 426         case KVM_CAP_IMMEDIATE_EXIT:
 427         case KVM_CAP_S390_INJECT_IRQ:
 428         case KVM_CAP_S390_USER_SIGP:
 429         case KVM_CAP_S390_USER_STSI:
 430         case KVM_CAP_S390_SKEYS:
 431         case KVM_CAP_S390_IRQ_STATE:
 432         case KVM_CAP_S390_USER_INSTR0:
 433         case KVM_CAP_S390_CMMA_MIGRATION:
 434         case KVM_CAP_S390_AIS:
 435                 r = 1;
 436                 break;
 437         case KVM_CAP_S390_MEM_OP:
 438                 r = MEM_OP_MAX_SIZE;
 439                 break;
 440         case KVM_CAP_NR_VCPUS:
 441         case KVM_CAP_MAX_VCPUS:
 442         case KVM_CAP_MAX_VCPU_ID:
 443                 r = KVM_S390_BSCA_CPU_SLOTS;
 444                 if (!kvm_s390_use_sca_entries())
 445                         r = KVM_MAX_VCPUS;
 446                 else if (sclp.has_esca && sclp.has_64bscao)
 447                         r = KVM_S390_ESCA_CPU_SLOTS;
 448                 break;
 449         case KVM_CAP_NR_MEMSLOTS:
 450                 r = KVM_USER_MEM_SLOTS;
 451                 break;
 452         case KVM_CAP_S390_COW:
 453                 r = MACHINE_HAS_ESOP;
 454                 break;
 455         case KVM_CAP_S390_VECTOR_REGISTERS:
 456                 r = MACHINE_HAS_VX;
 457                 break;
 458         case KVM_CAP_S390_RI:
 459                 r = test_facility(64);
 460                 break;
 461         case KVM_CAP_S390_GS:
 462                 r = test_facility(133);
 463                 break;
 464         case KVM_CAP_S390_BPB:
 465                 r = test_facility(82);
 466                 break;
 467         default:
 468                 r = 0;
 469         }
 470         return r;
 471 }
 472
 473 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 474                                         struct kvm_memory_slot *memslot)
 475 {
 476         gfn_t cur_gfn, last_gfn;
 477         unsigned long address;
 478         struct gmap *gmap = kvm->arch.gmap;
 479
 480         /* Loop over all guest pages */
 481         last_gfn = memslot->base_gfn + memslot->npages;
 482         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 483                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 484
 485                 if (test_and_clear_guest_dirty(gmap->mm, address))
 486                         mark_page_dirty(kvm, cur_gfn);
 487                 if (fatal_signal_pending(current))
 488                         return;
 489                 cond_resched();
 490         }
 491 }
 492
 493 /* Section: vm related */
 494 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 495
 496 /*
 497  * Get (and clear) the dirty memory log for a memory slot.
 498  */
 499 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 500                                struct kvm_dirty_log *log)
 501 {
 502         int r;
 503         unsigned long n;
 504         struct kvm_memslots *slots;
 505         struct kvm_memory_slot *memslot;
 506         int is_dirty = 0;
 507
 508         if (kvm_is_ucontrol(kvm))
 509                 return -EINVAL;
 510
 511         mutex_lock(&kvm->slots_lock);
 512
 513         r = -EINVAL;
 514         if (log->slot >= KVM_USER_MEM_SLOTS)
 515                 goto out;
 516
 517         slots = kvm_memslots(kvm);
 518         memslot = id_to_memslot(slots, log->slot);
 519         r = -ENOENT;
 520         if (!memslot->dirty_bitmap)
 521                 goto out;
 522
 523         kvm_s390_sync_dirty_log(kvm, memslot);
 524         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 525         if (r)
 526                 goto out;
 527
 528         /* Clear the dirty log */
 529         if (is_dirty) {
 530                 n = kvm_dirty_bitmap_bytes(memslot);
 531                 memset(memslot->dirty_bitmap, 0, n);
 532         }
 533         r = 0;
 534 out:
 535         mutex_unlock(&kvm->slots_lock);
 536         return r;
 537 }
 538
 539 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 540 {
 541         unsigned int i;
 542         struct kvm_vcpu *vcpu;
 543
 544         kvm_for_each_vcpu(i, vcpu, kvm) {
 545                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 546         }
 547 }
 548
 549 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 550 {
 551         int r;
 552
 553         if (cap->flags)
 554                 return -EINVAL;
 555
 556         switch (cap->cap) {
 557         case KVM_CAP_S390_IRQCHIP:
 558                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 559                 kvm->arch.use_irqchip = 1;
 560                 r = 0;
 561                 break;
 562         case KVM_CAP_S390_USER_SIGP:
 563                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 564                 kvm->arch.user_sigp = 1;
 565                 r = 0;
 566                 break;
 567         case KVM_CAP_S390_VECTOR_REGISTERS:
 568                 mutex_lock(&kvm->lock);
 569                 if (kvm->created_vcpus) {
 570                         r = -EBUSY;
 571                 } else if (MACHINE_HAS_VX) {
 572                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 573                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 574                         if (test_facility(134)) {
 575                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 576                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 577                         }
 578                         if (test_facility(135)) {
 579                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 580                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 581                         }
 582                         r = 0;
 583                 } else
 584                         r = -EINVAL;
 585                 mutex_unlock(&kvm->lock);
 586                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 587                          r ? "(not available)" : "(success)");
 588                 break;
 589         case KVM_CAP_S390_RI:
 590                 r = -EINVAL;
 591                 mutex_lock(&kvm->lock);
 592                 if (kvm->created_vcpus) {
 593                         r = -EBUSY;
 594                 } else if (test_facility(64)) {
 595                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 596                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 597                         r = 0;
 598                 }
 599                 mutex_unlock(&kvm->lock);
 600                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 601                          r ? "(not available)" : "(success)");
 602                 break;
 603         case KVM_CAP_S390_AIS:
 604                 mutex_lock(&kvm->lock);
 605                 if (kvm->created_vcpus) {
 606                         r = -EBUSY;
 607                 } else {
 608                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 609                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 610                         r = 0;
 611                 }
 612                 mutex_unlock(&kvm->lock);
 613                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 614                          r ? "(not available)" : "(success)");
 615                 break;
 616         case KVM_CAP_S390_GS:
 617                 r = -EINVAL;
 618                 mutex_lock(&kvm->lock);
 619                 if (kvm->created_vcpus) {
 620                         r = -EBUSY;
 621                 } else if (test_facility(133)) {
 622                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 623                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 624                         r = 0;
 625                 }
 626                 mutex_unlock(&kvm->lock);
 627                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 628                          r ? "(not available)" : "(success)");
 629                 break;
 630         case KVM_CAP_S390_USER_STSI:
 631                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 632                 kvm->arch.user_stsi = 1;
 633                 r = 0;
 634                 break;
 635         case KVM_CAP_S390_USER_INSTR0:
 636                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 637                 kvm->arch.user_instr0 = 1;
 638                 icpt_operexc_on_all_vcpus(kvm);
 639                 r = 0;
 640                 break;
 641         default:
 642                 r = -EINVAL;
 643                 break;
 644         }
 645         return r;
 646 }
 647
 648 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 649 {
 650         int ret;
 651
 652         switch (attr->attr) {
 653         case KVM_S390_VM_MEM_LIMIT_SIZE:
 654                 ret = 0;
 655                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 656                          kvm->arch.mem_limit);
 657                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 658                         ret = -EFAULT;
 659                 break;
 660         default:
 661                 ret = -ENXIO;
 662                 break;
 663         }
 664         return ret;
 665 }
 666
 667 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 668 {
 669         int ret;
 670         unsigned int idx;
 671         switch (attr->attr) {
 672         case KVM_S390_VM_MEM_ENABLE_CMMA:
 673                 ret = -ENXIO;
 674                 if (!sclp.has_cmma)
 675                         break;
 676
 677                 ret = -EBUSY;
 678                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 679                 mutex_lock(&kvm->lock);
 680                 if (!kvm->created_vcpus) {
 681                         kvm->arch.use_cmma = 1;
 682                         ret = 0;
 683                 }
 684                 mutex_unlock(&kvm->lock);
 685                 break;
 686         case KVM_S390_VM_MEM_CLR_CMMA:
 687                 ret = -ENXIO;
 688                 if (!sclp.has_cmma)
 689                         break;
 690                 ret = -EINVAL;
 691                 if (!kvm->arch.use_cmma)
 692                         break;
 693
 694                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 695                 mutex_lock(&kvm->lock);
 696                 idx = srcu_read_lock(&kvm->srcu);
 697                 s390_reset_cmma(kvm->arch.gmap->mm);
 698                 srcu_read_unlock(&kvm->srcu, idx);
 699                 mutex_unlock(&kvm->lock);
 700                 ret = 0;
 701                 break;
 702         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 703                 unsigned long new_limit;
 704
 705                 if (kvm_is_ucontrol(kvm))
 706                         return -EINVAL;
 707
 708                 if (get_user(new_limit, (u64 __user *)attr->addr))
 709                         return -EFAULT;
 710
 711                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 712                     new_limit > kvm->arch.mem_limit)
 713                         return -E2BIG;
 714
 715                 if (!new_limit)
 716                         return -EINVAL;
 717
 718                 /* gmap_create takes last usable address */
 719                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 720                         new_limit -= 1;
 721
 722                 ret = -EBUSY;
 723                 mutex_lock(&kvm->lock);
 724                 if (!kvm->created_vcpus) {
 725                         /* gmap_create will round the limit up */
 726                         struct gmap *new = gmap_create(current->mm, new_limit);
 727
 728                         if (!new) {
 729                                 ret = -ENOMEM;
 730                         } else {
 731                                 gmap_remove(kvm->arch.gmap);
 732                                 new->private = kvm;
 733                                 kvm->arch.gmap = new;
 734                                 ret = 0;
 735                         }
 736                 }
 737                 mutex_unlock(&kvm->lock);
 738                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 739                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 740                          (void *) kvm->arch.gmap->asce);
 741                 break;
 742         }
 743         default:
 744                 ret = -ENXIO;
 745                 break;
 746         }
 747         return ret;
 748 }
 749
 750 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 751
 752 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 753 {
 754         struct kvm_vcpu *vcpu;
 755         int i;
 756
 757         if (!test_kvm_facility(kvm, 76))
 758                 return -EINVAL;
 759
 760         mutex_lock(&kvm->lock);
 761         switch (attr->attr) {
 762         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 763                 get_random_bytes(
 764                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 765                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 766                 kvm->arch.crypto.aes_kw = 1;
 767                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 768                 break;
 769         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 770                 get_random_bytes(
 771                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 772                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 773                 kvm->arch.crypto.dea_kw = 1;
 774                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 775                 break;
 776         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 777                 kvm->arch.crypto.aes_kw = 0;
 778                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 779                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 780                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 781                 break;
 782         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 783                 kvm->arch.crypto.dea_kw = 0;
 784                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 785                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 786                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 787                 break;
 788         default:
 789                 mutex_unlock(&kvm->lock);
 790                 return -ENXIO;
 791         }
 792
 793         kvm_for_each_vcpu(i, vcpu, kvm) {
 794                 kvm_s390_vcpu_crypto_setup(vcpu);
 795                 exit_sie(vcpu);
 796         }
 797         mutex_unlock(&kvm->lock);
 798         return 0;
 799 }
 800
 801 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 802 {
 803         int cx;
 804         struct kvm_vcpu *vcpu;
 805
 806         kvm_for_each_vcpu(cx, vcpu, kvm)
 807                 kvm_s390_sync_request(req, vcpu);
 808 }
 809
 810 /*
 811  * Must be called with kvm->srcu held to avoid races on memslots, and with
 812  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 813  */
 814 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 815 {
 816         struct kvm_s390_migration_state *mgs;
 817         struct kvm_memory_slot *ms;
 818         /* should be the only one */
 819         struct kvm_memslots *slots;
 820         unsigned long ram_pages;
 821         int slotnr;
 822
 823         /* migration mode already enabled */
 824         if (kvm->arch.migration_state)
 825                 return 0;
 826
 827         slots = kvm_memslots(kvm);
 828         if (!slots || !slots->used_slots)
 829                 return -EINVAL;
 830
 831         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 832         if (!mgs)
 833                 return -ENOMEM;
 834         kvm->arch.migration_state = mgs;
 835
 836         if (kvm->arch.use_cmma) {
 837                 /*
 838                  * Get the first slot. They are reverse sorted by base_gfn, so
 839                  * the first slot is also the one at the end of the address
 840                  * space. We have verified above that at least one slot is
 841                  * present.
 842                  */
 843                 ms = slots->memslots;
 844                 /* round up so we only use full longs */
 845                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 846                 /* allocate enough bytes to store all the bits */
 847                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 848                 if (!mgs->pgste_bitmap) {
 849                         kfree(mgs);
 850                         kvm->arch.migration_state = NULL;
 851                         return -ENOMEM;
 852                 }
 853
 854                 mgs->bitmap_size = ram_pages;
 855                 atomic64_set(&mgs->dirty_pages, ram_pages);
 856                 /* mark all the pages in active slots as dirty */
 857                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 858                         ms = slots->memslots + slotnr;
 859                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 860                 }
 861
 862                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 863         }
 864         return 0;
 865 }
 866
 867 /*
 868  * Must be called with kvm->slots_lock to avoid races with ourselves and
 869  * kvm_s390_vm_start_migration.
 870  */
 871 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 872 {
 873         struct kvm_s390_migration_state *mgs;
 874
 875         /* migration mode already disabled */
 876         if (!kvm->arch.migration_state)
 877                 return 0;
 878         mgs = kvm->arch.migration_state;
 879         kvm->arch.migration_state = NULL;
 880
 881         if (kvm->arch.use_cmma) {
 882                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 883                 /* We have to wait for the essa emulation to finish */
 884                 synchronize_srcu(&kvm->srcu);
 885                 vfree(mgs->pgste_bitmap);
 886         }
 887         kfree(mgs);
 888         return 0;
 889 }
 890
 891 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 892                                      struct kvm_device_attr *attr)
 893 {
 894         int res = -ENXIO;
 895
 896         mutex_lock(&kvm->slots_lock);
 897         switch (attr->attr) {
 898         case KVM_S390_VM_MIGRATION_START:
 899                 res = kvm_s390_vm_start_migration(kvm);
 900                 break;
 901         case KVM_S390_VM_MIGRATION_STOP:
 902                 res = kvm_s390_vm_stop_migration(kvm);
 903                 break;
 904         default:
 905                 break;
 906         }
 907         mutex_unlock(&kvm->slots_lock);
 908
 909         return res;
 910 }
 911
 912 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 913                                      struct kvm_device_attr *attr)
 914 {
 915         u64 mig = (kvm->arch.migration_state != NULL);
 916
 917         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 918                 return -ENXIO;
 919
 920         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 921                 return -EFAULT;
 922         return 0;
 923 }
 924
 925 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 926 {
 927         struct kvm_s390_vm_tod_clock gtod;
 928
 929         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 930                 return -EFAULT;
 931
 932         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 933                 return -EINVAL;
 934         kvm_s390_set_tod_clock(kvm, &gtod);
 935
 936         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 937                 gtod.epoch_idx, gtod.tod);
 938
 939         return 0;
 940 }
 941
 942 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 943 {
 944         u8 gtod_high;
 945
 946         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 947                                            sizeof(gtod_high)))
 948                 return -EFAULT;
 949
 950         if (gtod_high != 0)
 951                 return -EINVAL;
 952         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 953
 954         return 0;
 955 }
 956
 957 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 958 {
 959         struct kvm_s390_vm_tod_clock gtod = { 0 };
 960
 961         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
 962                            sizeof(gtod.tod)))
 963                 return -EFAULT;
 964
 965         kvm_s390_set_tod_clock(kvm, &gtod);
 966         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
 967         return 0;
 968 }
 969
 970 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 971 {
 972         int ret;
 973
 974         if (attr->flags)
 975                 return -EINVAL;
 976
 977         switch (attr->attr) {
 978         case KVM_S390_VM_TOD_EXT:
 979                 ret = kvm_s390_set_tod_ext(kvm, attr);
 980                 break;
 981         case KVM_S390_VM_TOD_HIGH:
 982                 ret = kvm_s390_set_tod_high(kvm, attr);
 983                 break;
 984         case KVM_S390_VM_TOD_LOW:
 985                 ret = kvm_s390_set_tod_low(kvm, attr);
 986                 break;
 987         default:
 988                 ret = -ENXIO;
 989                 break;
 990         }
 991         return ret;
 992 }
 993
 994 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
 995                                         struct kvm_s390_vm_tod_clock *gtod)
 996 {
 997         struct kvm_s390_tod_clock_ext htod;
 998
 999         preempt_disable();
1000
1001         get_tod_clock_ext((char *)&htod);
1002
1003         gtod->tod = htod.tod + kvm->arch.epoch;
1004         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1005
1006         if (gtod->tod < htod.tod)
1007                 gtod->epoch_idx += 1;
1008
1009         preempt_enable();
1010 }
1011
1012 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1013 {
1014         struct kvm_s390_vm_tod_clock gtod;
1015
1016         memset(&gtod, 0, sizeof(gtod));
1017
1018         if (test_kvm_facility(kvm, 139))
1019                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
1020         else
1021                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
1022
1023         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1024                 return -EFAULT;
1025
1026         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1027                 gtod.epoch_idx, gtod.tod);
1028         return 0;
1029 }
1030
1031 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1032 {
1033         u8 gtod_high = 0;
1034
1035         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1036                                          sizeof(gtod_high)))
1037                 return -EFAULT;
1038         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1039
1040         return 0;
1041 }
1042
1043 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1044 {
1045         u64 gtod;
1046
1047         gtod = kvm_s390_get_tod_clock_fast(kvm);
1048         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1049                 return -EFAULT;
1050         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1051
1052         return 0;
1053 }
1054
1055 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1056 {
1057         int ret;
1058
1059         if (attr->flags)
1060                 return -EINVAL;
1061
1062         switch (attr->attr) {
1063         case KVM_S390_VM_TOD_EXT:
1064                 ret = kvm_s390_get_tod_ext(kvm, attr);
1065                 break;
1066         case KVM_S390_VM_TOD_HIGH:
1067                 ret = kvm_s390_get_tod_high(kvm, attr);
1068                 break;
1069         case KVM_S390_VM_TOD_LOW:
1070                 ret = kvm_s390_get_tod_low(kvm, attr);
1071                 break;
1072         default:
1073                 ret = -ENXIO;
1074                 break;
1075         }
1076         return ret;
1077 }
1078
1079 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1080 {
1081         struct kvm_s390_vm_cpu_processor *proc;
1082         u16 lowest_ibc, unblocked_ibc;
1083         int ret = 0;
1084
1085         mutex_lock(&kvm->lock);
1086         if (kvm->created_vcpus) {
1087                 ret = -EBUSY;
1088                 goto out;
1089         }
1090         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1091         if (!proc) {
1092                 ret = -ENOMEM;
1093                 goto out;
1094         }
1095         if (!copy_from_user(proc, (void __user *)attr->addr,
1096                             sizeof(*proc))) {
1097                 kvm->arch.model.cpuid = proc->cpuid;
1098                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1099                 unblocked_ibc = sclp.ibc & 0xfff;
1100                 if (lowest_ibc && proc->ibc) {
1101                         if (proc->ibc > unblocked_ibc)
1102                                 kvm->arch.model.ibc = unblocked_ibc;
1103                         else if (proc->ibc < lowest_ibc)
1104                                 kvm->arch.model.ibc = lowest_ibc;
1105                         else
1106                                 kvm->arch.model.ibc = proc->ibc;
1107                 }
1108                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1109                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1110                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1111                          kvm->arch.model.ibc,
1112                          kvm->arch.model.cpuid);
1113                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1114                          kvm->arch.model.fac_list[0],
1115                          kvm->arch.model.fac_list[1],
1116                          kvm->arch.model.fac_list[2]);
1117         } else
1118                 ret = -EFAULT;
1119         kfree(proc);
1120 out:
1121         mutex_unlock(&kvm->lock);
1122         return ret;
1123 }
1124
1125 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1126                                        struct kvm_device_attr *attr)
1127 {
1128         struct kvm_s390_vm_cpu_feat data;
1129         int ret = -EBUSY;
1130
1131         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1132                 return -EFAULT;
1133         if (!bitmap_subset((unsigned long *) data.feat,
1134                            kvm_s390_available_cpu_feat,
1135                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1136                 return -EINVAL;
1137
1138         mutex_lock(&kvm->lock);
1139         if (!kvm->created_vcpus) {
1140                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1141                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1142                 ret = 0;
1143         }
1144         mutex_unlock(&kvm->lock);
1145         return ret;
1146 }
1147
1148 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1149                                           struct kvm_device_attr *attr)
1150 {
1151         /*
1152          * Once supported by kernel + hw, we have to store the subfunctions
1153          * in kvm->arch and remember that user space configured them.
1154          */
1155         return -ENXIO;
1156 }
1157
1158 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1159 {
1160         int ret = -ENXIO;
1161
1162         switch (attr->attr) {
1163         case KVM_S390_VM_CPU_PROCESSOR:
1164                 ret = kvm_s390_set_processor(kvm, attr);
1165                 break;
1166         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1167                 ret = kvm_s390_set_processor_feat(kvm, attr);
1168                 break;
1169         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1170                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1171                 break;
1172         }
1173         return ret;
1174 }
1175
1176 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1177 {
1178         struct kvm_s390_vm_cpu_processor *proc;
1179         int ret = 0;
1180
1181         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1182         if (!proc) {
1183                 ret = -ENOMEM;
1184                 goto out;
1185         }
1186         proc->cpuid = kvm->arch.model.cpuid;
1187         proc->ibc = kvm->arch.model.ibc;
1188         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1189                S390_ARCH_FAC_LIST_SIZE_BYTE);
1190         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1191                  kvm->arch.model.ibc,
1192                  kvm->arch.model.cpuid);
1193         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1194                  kvm->arch.model.fac_list[0],
1195                  kvm->arch.model.fac_list[1],
1196                  kvm->arch.model.fac_list[2]);
1197         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1198                 ret = -EFAULT;
1199         kfree(proc);
1200 out:
1201         return ret;
1202 }
1203
1204 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1205 {
1206         struct kvm_s390_vm_cpu_machine *mach;
1207         int ret = 0;
1208
1209         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1210         if (!mach) {
1211                 ret = -ENOMEM;
1212                 goto out;
1213         }
1214         get_cpu_id((struct cpuid *) &mach->cpuid);
1215         mach->ibc = sclp.ibc;
1216         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1217                S390_ARCH_FAC_LIST_SIZE_BYTE);
1218         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1219                sizeof(S390_lowcore.stfle_fac_list));
1220         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1221                  kvm->arch.model.ibc,
1222                  kvm->arch.model.cpuid);
1223         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1224                  mach->fac_mask[0],
1225                  mach->fac_mask[1],
1226                  mach->fac_mask[2]);
1227         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1228                  mach->fac_list[0],
1229                  mach->fac_list[1],
1230                  mach->fac_list[2]);
1231         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1232                 ret = -EFAULT;
1233         kfree(mach);
1234 out:
1235         return ret;
1236 }
1237
1238 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1239                                        struct kvm_device_attr *attr)
1240 {
1241         struct kvm_s390_vm_cpu_feat data;
1242
1243         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1244                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1245         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1246                 return -EFAULT;
1247         return 0;
1248 }
1249
1250 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1251                                      struct kvm_device_attr *attr)
1252 {
1253         struct kvm_s390_vm_cpu_feat data;
1254
1255         bitmap_copy((unsigned long *) data.feat,
1256                     kvm_s390_available_cpu_feat,
1257                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1258         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1259                 return -EFAULT;
1260         return 0;
1261 }
1262
1263 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1264                                           struct kvm_device_attr *attr)
1265 {
1266         /*
1267          * Once we can actually configure subfunctions (kernel + hw support),
1268          * we have to check if they were already set by user space, if so copy
1269          * them from kvm->arch.
1270          */
1271         return -ENXIO;
1272 }
1273
1274 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1275                                         struct kvm_device_attr *attr)
1276 {
1277         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1278             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1279                 return -EFAULT;
1280         return 0;
1281 }
1282 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1283 {
1284         int ret = -ENXIO;
1285
1286         switch (attr->attr) {
1287         case KVM_S390_VM_CPU_PROCESSOR:
1288                 ret = kvm_s390_get_processor(kvm, attr);
1289                 break;
1290         case KVM_S390_VM_CPU_MACHINE:
1291                 ret = kvm_s390_get_machine(kvm, attr);
1292                 break;
1293         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1294                 ret = kvm_s390_get_processor_feat(kvm, attr);
1295                 break;
1296         case KVM_S390_VM_CPU_MACHINE_FEAT:
1297                 ret = kvm_s390_get_machine_feat(kvm, attr);
1298                 break;
1299         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1300                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1301                 break;
1302         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1303                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1304                 break;
1305         }
1306         return ret;
1307 }
1308
1309 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1310 {
1311         int ret;
1312
1313         switch (attr->group) {
1314         case KVM_S390_VM_MEM_CTRL:
1315                 ret = kvm_s390_set_mem_control(kvm, attr);
1316                 break;
1317         case KVM_S390_VM_TOD:
1318                 ret = kvm_s390_set_tod(kvm, attr);
1319                 break;
1320         case KVM_S390_VM_CPU_MODEL:
1321                 ret = kvm_s390_set_cpu_model(kvm, attr);
1322                 break;
1323         case KVM_S390_VM_CRYPTO:
1324                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1325                 break;
1326         case KVM_S390_VM_MIGRATION:
1327                 ret = kvm_s390_vm_set_migration(kvm, attr);
1328                 break;
1329         default:
1330                 ret = -ENXIO;
1331                 break;
1332         }
1333
1334         return ret;
1335 }
1336
1337 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1338 {
1339         int ret;
1340
1341         switch (attr->group) {
1342         case KVM_S390_VM_MEM_CTRL:
1343                 ret = kvm_s390_get_mem_control(kvm, attr);
1344                 break;
1345         case KVM_S390_VM_TOD:
1346                 ret = kvm_s390_get_tod(kvm, attr);
1347                 break;
1348         case KVM_S390_VM_CPU_MODEL:
1349                 ret = kvm_s390_get_cpu_model(kvm, attr);
1350                 break;
1351         case KVM_S390_VM_MIGRATION:
1352                 ret = kvm_s390_vm_get_migration(kvm, attr);
1353                 break;
1354         default:
1355                 ret = -ENXIO;
1356                 break;
1357         }
1358
1359         return ret;
1360 }
1361
1362 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1363 {
1364         int ret;
1365
1366         switch (attr->group) {
1367         case KVM_S390_VM_MEM_CTRL:
1368                 switch (attr->attr) {
1369                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1370                 case KVM_S390_VM_MEM_CLR_CMMA:
1371                         ret = sclp.has_cmma ? 0 : -ENXIO;
1372                         break;
1373                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1374                         ret = 0;
1375                         break;
1376                 default:
1377                         ret = -ENXIO;
1378                         break;
1379                 }
1380                 break;
1381         case KVM_S390_VM_TOD:
1382                 switch (attr->attr) {
1383                 case KVM_S390_VM_TOD_LOW:
1384                 case KVM_S390_VM_TOD_HIGH:
1385                         ret = 0;
1386                         break;
1387                 default:
1388                         ret = -ENXIO;
1389                         break;
1390                 }
1391                 break;
1392         case KVM_S390_VM_CPU_MODEL:
1393                 switch (attr->attr) {
1394                 case KVM_S390_VM_CPU_PROCESSOR:
1395                 case KVM_S390_VM_CPU_MACHINE:
1396                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1397                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1398                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1399                         ret = 0;
1400                         break;
1401                 /* configuring subfunctions is not supported yet */
1402                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1403                 default:
1404                         ret = -ENXIO;
1405                         break;
1406                 }
1407                 break;
1408         case KVM_S390_VM_CRYPTO:
1409                 switch (attr->attr) {
1410                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1411                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1412                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1413                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1414                         ret = 0;
1415                         break;
1416                 default:
1417                         ret = -ENXIO;
1418                         break;
1419                 }
1420                 break;
1421         case KVM_S390_VM_MIGRATION:
1422                 ret = 0;
1423                 break;
1424         default:
1425                 ret = -ENXIO;
1426                 break;
1427         }
1428
1429         return ret;
1430 }
1431
1432 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1433 {
1434         uint8_t *keys;
1435         uint64_t hva;
1436         int srcu_idx, i, r = 0;
1437
1438         if (args->flags != 0)
1439                 return -EINVAL;
1440
1441         /* Is this guest using storage keys? */
1442         if (!mm_use_skey(current->mm))
1443                 return KVM_S390_GET_SKEYS_NONE;
1444
1445         /* Enforce sane limit on memory allocation */
1446         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1447                 return -EINVAL;
1448
1449         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1450         if (!keys)
1451                 return -ENOMEM;
1452
1453         down_read(&current->mm->mmap_sem);
1454         srcu_idx = srcu_read_lock(&kvm->srcu);
1455         for (i = 0; i < args->count; i++) {
1456                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1457                 if (kvm_is_error_hva(hva)) {
1458                         r = -EFAULT;
1459                         break;
1460                 }
1461
1462                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1463                 if (r)
1464                         break;
1465         }
1466         srcu_read_unlock(&kvm->srcu, srcu_idx);
1467         up_read(&current->mm->mmap_sem);
1468
1469         if (!r) {
1470                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1471                                  sizeof(uint8_t) * args->count);
1472                 if (r)
1473                         r = -EFAULT;
1474         }
1475
1476         kvfree(keys);
1477         return r;
1478 }
1479
1480 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1481 {
1482         uint8_t *keys;
1483         uint64_t hva;
1484         int srcu_idx, i, r = 0;
1485
1486         if (args->flags != 0)
1487                 return -EINVAL;
1488
1489         /* Enforce sane limit on memory allocation */
1490         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1491                 return -EINVAL;
1492
1493         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1494         if (!keys)
1495                 return -ENOMEM;
1496
1497         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1498                            sizeof(uint8_t) * args->count);
1499         if (r) {
1500                 r = -EFAULT;
1501                 goto out;
1502         }
1503
1504         /* Enable storage key handling for the guest */
1505         r = s390_enable_skey();
1506         if (r)
1507                 goto out;
1508
1509         down_read(&current->mm->mmap_sem);
1510         srcu_idx = srcu_read_lock(&kvm->srcu);
1511         for (i = 0; i < args->count; i++) {
1512                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1513                 if (kvm_is_error_hva(hva)) {
1514                         r = -EFAULT;
1515                         break;
1516                 }
1517
1518                 /* Lowest order bit is reserved */
1519                 if (keys[i] & 0x01) {
1520                         r = -EINVAL;
1521                         break;
1522                 }
1523
1524                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1525                 if (r)
1526                         break;
1527         }
1528         srcu_read_unlock(&kvm->srcu, srcu_idx);
1529         up_read(&current->mm->mmap_sem);
1530 out:
1531         kvfree(keys);
1532         return r;
1533 }
1534
1535 /*
1536  * Base address and length must be sent at the start of each block, therefore
1537  * it's cheaper to send some clean data, as long as it's less than the size of
1538  * two longs.
1539  */
1540 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1541 /* for consistency */
1542 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1543
1544 /*
1545  * This function searches for the next page with dirty CMMA attributes, and
1546  * saves the attributes in the buffer up to either the end of the buffer or
1547  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1548  * no trailing clean bytes are saved.
1549  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1550  * output buffer will indicate 0 as length.
1551  */
1552 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1553                                   struct kvm_s390_cmma_log *args)
1554 {
1555         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1556         unsigned long bufsize, hva, pgstev, i, next, cur;
1557         int srcu_idx, peek, r = 0, rr;
1558         u8 *res;
1559
1560         cur = args->start_gfn;
1561         i = next = pgstev = 0;
1562
1563         if (unlikely(!kvm->arch.use_cmma))
1564                 return -ENXIO;
1565         /* Invalid/unsupported flags were specified */
1566         if (args->flags & ~KVM_S390_CMMA_PEEK)
1567                 return -EINVAL;
1568         /* Migration mode query, and we are not doing a migration */
1569         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1570         if (!peek && !s)
1571                 return -EINVAL;
1572         /* CMMA is disabled or was not used, or the buffer has length zero */
1573         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1574         if (!bufsize || !kvm->mm->context.use_cmma) {
1575                 memset(args, 0, sizeof(*args));
1576                 return 0;
1577         }
1578
1579         if (!peek) {
1580                 /* We are not peeking, and there are no dirty pages */
1581                 if (!atomic64_read(&s->dirty_pages)) {
1582                         memset(args, 0, sizeof(*args));
1583                         return 0;
1584                 }
1585                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1586                                     args->start_gfn);
1587                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1588                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1589                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1590                         memset(args, 0, sizeof(*args));
1591                         return 0;
1592                 }
1593                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1594         }
1595
1596         res = vmalloc(bufsize);
1597         if (!res)
1598                 return -ENOMEM;
1599
1600         args->start_gfn = cur;
1601
1602         down_read(&kvm->mm->mmap_sem);
1603         srcu_idx = srcu_read_lock(&kvm->srcu);
1604         while (i < bufsize) {
1605                 hva = gfn_to_hva(kvm, cur);
1606                 if (kvm_is_error_hva(hva)) {
1607                         r = -EFAULT;
1608                         break;
1609                 }
1610                 /* decrement only if we actually flipped the bit to 0 */
1611                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1612                         atomic64_dec(&s->dirty_pages);
1613                 r = get_pgste(kvm->mm, hva, &pgstev);
1614                 if (r < 0)
1615                         pgstev = 0;
1616                 /* save the value */
1617                 res[i++] = (pgstev >> 24) & 0x43;
1618                 /*
1619                  * if the next bit is too far away, stop.
1620                  * if we reached the previous "next", find the next one
1621                  */
1622                 if (!peek) {
1623                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1624                                 break;
1625                         if (cur == next)
1626                                 next = find_next_bit(s->pgste_bitmap,
1627                                                      s->bitmap_size, cur + 1);
1628                 /* reached the end of the bitmap or of the buffer, stop */
1629                         if ((next >= s->bitmap_size) ||
1630                             (next >= args->start_gfn + bufsize))
1631                                 break;
1632                 }
1633                 cur++;
1634         }
1635         srcu_read_unlock(&kvm->srcu, srcu_idx);
1636         up_read(&kvm->mm->mmap_sem);
1637         args->count = i;
1638         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1639
1640         rr = copy_to_user((void __user *)args->values, res, args->count);
1641         if (rr)
1642                 r = -EFAULT;
1643
1644         vfree(res);
1645         return r;
1646 }
1647
1648 /*
1649  * This function sets the CMMA attributes for the given pages. If the input
1650  * buffer has zero length, no action is taken, otherwise the attributes are
1651  * set and the mm->context.use_cmma flag is set.
1652  */
1653 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1654                                   const struct kvm_s390_cmma_log *args)
1655 {
1656         unsigned long hva, mask, pgstev, i;
1657         uint8_t *bits;
1658         int srcu_idx, r = 0;
1659
1660         mask = args->mask;
1661
1662         if (!kvm->arch.use_cmma)
1663                 return -ENXIO;
1664         /* invalid/unsupported flags */
1665         if (args->flags != 0)
1666                 return -EINVAL;
1667         /* Enforce sane limit on memory allocation */
1668         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1669                 return -EINVAL;
1670         /* Nothing to do */
1671         if (args->count == 0)
1672                 return 0;
1673
1674         bits = vmalloc(sizeof(*bits) * args->count);
1675         if (!bits)
1676                 return -ENOMEM;
1677
1678         r = copy_from_user(bits, (void __user *)args->values, args->count);
1679         if (r) {
1680                 r = -EFAULT;
1681                 goto out;
1682         }
1683
1684         down_read(&kvm->mm->mmap_sem);
1685         srcu_idx = srcu_read_lock(&kvm->srcu);
1686         for (i = 0; i < args->count; i++) {
1687                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1688                 if (kvm_is_error_hva(hva)) {
1689                         r = -EFAULT;
1690                         break;
1691                 }
1692
1693                 pgstev = bits[i];
1694                 pgstev = pgstev << 24;
1695                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1696                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1697         }
1698         srcu_read_unlock(&kvm->srcu, srcu_idx);
1699         up_read(&kvm->mm->mmap_sem);
1700
1701         if (!kvm->mm->context.use_cmma) {
1702                 down_write(&kvm->mm->mmap_sem);
1703                 kvm->mm->context.use_cmma = 1;
1704                 up_write(&kvm->mm->mmap_sem);
1705         }
1706 out:
1707         vfree(bits);
1708         return r;
1709 }
1710
1711 long kvm_arch_vm_ioctl(struct file *filp,
1712                        unsigned int ioctl, unsigned long arg)
1713 {
1714         struct kvm *kvm = filp->private_data;
1715         void __user *argp = (void __user *)arg;
1716         struct kvm_device_attr attr;
1717         int r;
1718
1719         switch (ioctl) {
1720         case KVM_S390_INTERRUPT: {
1721                 struct kvm_s390_interrupt s390int;
1722
1723                 r = -EFAULT;
1724                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1725                         break;
1726                 r = kvm_s390_inject_vm(kvm, &s390int);
1727                 break;
1728         }
1729         case KVM_ENABLE_CAP: {
1730                 struct kvm_enable_cap cap;
1731                 r = -EFAULT;
1732                 if (copy_from_user(&cap, argp, sizeof(cap)))
1733                         break;
1734                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1735                 break;
1736         }
1737         case KVM_CREATE_IRQCHIP: {
1738                 struct kvm_irq_routing_entry routing;
1739
1740                 r = -EINVAL;
1741                 if (kvm->arch.use_irqchip) {
1742                         /* Set up dummy routing. */
1743                         memset(&routing, 0, sizeof(routing));
1744                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1745                 }
1746                 break;
1747         }
1748         case KVM_SET_DEVICE_ATTR: {
1749                 r = -EFAULT;
1750                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1751                         break;
1752                 r = kvm_s390_vm_set_attr(kvm, &attr);
1753                 break;
1754         }
1755         case KVM_GET_DEVICE_ATTR: {
1756                 r = -EFAULT;
1757                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1758                         break;
1759                 r = kvm_s390_vm_get_attr(kvm, &attr);
1760                 break;
1761         }
1762         case KVM_HAS_DEVICE_ATTR: {
1763                 r = -EFAULT;
1764                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1765                         break;
1766                 r = kvm_s390_vm_has_attr(kvm, &attr);
1767                 break;
1768         }
1769         case KVM_S390_GET_SKEYS: {
1770                 struct kvm_s390_skeys args;
1771
1772                 r = -EFAULT;
1773                 if (copy_from_user(&args, argp,
1774                                    sizeof(struct kvm_s390_skeys)))
1775                         break;
1776                 r = kvm_s390_get_skeys(kvm, &args);
1777                 break;
1778         }
1779         case KVM_S390_SET_SKEYS: {
1780                 struct kvm_s390_skeys args;
1781
1782                 r = -EFAULT;
1783                 if (copy_from_user(&args, argp,
1784                                    sizeof(struct kvm_s390_skeys)))
1785                         break;
1786                 r = kvm_s390_set_skeys(kvm, &args);
1787                 break;
1788         }
1789         case KVM_S390_GET_CMMA_BITS: {
1790                 struct kvm_s390_cmma_log args;
1791
1792                 r = -EFAULT;
1793                 if (copy_from_user(&args, argp, sizeof(args)))
1794                         break;
1795                 mutex_lock(&kvm->slots_lock);
1796                 r = kvm_s390_get_cmma_bits(kvm, &args);
1797                 mutex_unlock(&kvm->slots_lock);
1798                 if (!r) {
1799                         r = copy_to_user(argp, &args, sizeof(args));
1800                         if (r)
1801                                 r = -EFAULT;
1802                 }
1803                 break;
1804         }
1805         case KVM_S390_SET_CMMA_BITS: {
1806                 struct kvm_s390_cmma_log args;
1807
1808                 r = -EFAULT;
1809                 if (copy_from_user(&args, argp, sizeof(args)))
1810                         break;
1811                 mutex_lock(&kvm->slots_lock);
1812                 r = kvm_s390_set_cmma_bits(kvm, &args);
1813                 mutex_unlock(&kvm->slots_lock);
1814                 break;
1815         }
1816         default:
1817                 r = -ENOTTY;
1818         }
1819
1820         return r;
1821 }
1822
1823 static int kvm_s390_query_ap_config(u8 *config)
1824 {
1825         u32 fcn_code = 0x04000000UL;
1826         u32 cc = 0;
1827
1828         memset(config, 0, 128);
1829         asm volatile(
1830                 "lgr 0,%1\n"
1831                 "lgr 2,%2\n"
1832                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1833                 "0: ipm %0\n"
1834                 "srl %0,28\n"
1835                 "1:\n"
1836                 EX_TABLE(0b, 1b)
1837                 : "+r" (cc)
1838                 : "r" (fcn_code), "r" (config)
1839                 : "cc", "0", "2", "memory"
1840         );
1841
1842         return cc;
1843 }
1844
1845 static int kvm_s390_apxa_installed(void)
1846 {
1847         u8 config[128];
1848         int cc;
1849
1850         if (test_facility(12)) {
1851                 cc = kvm_s390_query_ap_config(config);
1852
1853                 if (cc)
1854                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1855                 else
1856                         return config[0] & 0x40;
1857         }
1858
1859         return 0;
1860 }
1861
1862 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1863 {
1864         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1865
1866         if (kvm_s390_apxa_installed())
1867                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1868         else
1869                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1870 }
1871
1872 static u64 kvm_s390_get_initial_cpuid(void)
1873 {
1874         struct cpuid cpuid;
1875
1876         get_cpu_id(&cpuid);
1877         cpuid.version = 0xff;
1878         return *((u64 *) &cpuid);
1879 }
1880
1881 static void kvm_s390_crypto_init(struct kvm *kvm)
1882 {
1883         if (!test_kvm_facility(kvm, 76))
1884                 return;
1885
1886         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1887         kvm_s390_set_crycb_format(kvm);
1888
1889         /* Enable AES/DEA protected key functions by default */
1890         kvm->arch.crypto.aes_kw = 1;
1891         kvm->arch.crypto.dea_kw = 1;
1892         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1893                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1894         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1895                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1896 }
1897
1898 static void sca_dispose(struct kvm *kvm)
1899 {
1900         if (kvm->arch.use_esca)
1901                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1902         else
1903                 free_page((unsigned long)(kvm->arch.sca));
1904         kvm->arch.sca = NULL;
1905 }
1906
1907 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1908 {
1909         gfp_t alloc_flags = GFP_KERNEL;
1910         int i, rc;
1911         char debug_name[16];
1912         static unsigned long sca_offset;
1913
1914         rc = -EINVAL;
1915 #ifdef CONFIG_KVM_S390_UCONTROL
1916         if (type & ~KVM_VM_S390_UCONTROL)
1917                 goto out_err;
1918         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1919                 goto out_err;
1920 #else
1921         if (type)
1922                 goto out_err;
1923 #endif
1924
1925         rc = s390_enable_sie();
1926         if (rc)
1927                 goto out_err;
1928
1929         rc = -ENOMEM;
1930
1931         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1932
1933         kvm->arch.use_esca = 0; /* start with basic SCA */
1934         if (!sclp.has_64bscao)
1935                 alloc_flags |= GFP_DMA;
1936         rwlock_init(&kvm->arch.sca_lock);
1937         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1938         if (!kvm->arch.sca)
1939                 goto out_err;
1940         mutex_lock(&kvm_lock);
1941         sca_offset += 16;
1942         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1943                 sca_offset = 0;
1944         kvm->arch.sca = (struct bsca_block *)
1945                         ((char *) kvm->arch.sca + sca_offset);
1946         mutex_unlock(&kvm_lock);
1947
1948         sprintf(debug_name, "kvm-%u", current->pid);
1949
1950         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1951         if (!kvm->arch.dbf)
1952                 goto out_err;
1953
1954         kvm->arch.sie_page2 =
1955              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1956         if (!kvm->arch.sie_page2)
1957                 goto out_err;
1958
1959         /* Populate the facility mask initially. */
1960         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1961                sizeof(S390_lowcore.stfle_fac_list));
1962         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1963                 if (i < kvm_s390_fac_list_mask_size())
1964                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1965                 else
1966                         kvm->arch.model.fac_mask[i] = 0UL;
1967         }
1968
1969         /* Populate the facility list initially. */
1970         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1971         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1972                S390_ARCH_FAC_LIST_SIZE_BYTE);
1973
1974         /* we are always in czam mode - even on pre z14 machines */
1975         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1976         set_kvm_facility(kvm->arch.model.fac_list, 138);
1977         /* we emulate STHYI in kvm */
1978         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1979         set_kvm_facility(kvm->arch.model.fac_list, 74);
1980         if (MACHINE_HAS_TLB_GUEST) {
1981                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1982                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1983         }
1984
1985         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1986         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1987
1988         kvm_s390_crypto_init(kvm);
1989
1990         mutex_init(&kvm->arch.float_int.ais_lock);
1991         kvm->arch.float_int.simm = 0;
1992         kvm->arch.float_int.nimm = 0;
1993         spin_lock_init(&kvm->arch.float_int.lock);
1994         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1995                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1996         init_waitqueue_head(&kvm->arch.ipte_wq);
1997         mutex_init(&kvm->arch.ipte_mutex);
1998
1999         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2000         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2001
2002         if (type & KVM_VM_S390_UCONTROL) {
2003                 kvm->arch.gmap = NULL;
2004                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2005         } else {
2006                 if (sclp.hamax == U64_MAX)
2007                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2008                 else
2009                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2010                                                     sclp.hamax + 1);
2011                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2012                 if (!kvm->arch.gmap)
2013                         goto out_err;
2014                 kvm->arch.gmap->private = kvm;
2015                 kvm->arch.gmap->pfault_enabled = 0;
2016         }
2017
2018         kvm->arch.css_support = 0;
2019         kvm->arch.use_irqchip = 0;
2020         kvm->arch.epoch = 0;
2021
2022         spin_lock_init(&kvm->arch.start_stop_lock);
2023         kvm_s390_vsie_init(kvm);
2024         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2025
2026         return 0;
2027 out_err:
2028         free_page((unsigned long)kvm->arch.sie_page2);
2029         debug_unregister(kvm->arch.dbf);
2030         sca_dispose(kvm);
2031         KVM_EVENT(3, "creation of vm failed: %d", rc);
2032         return rc;
2033 }
2034
2035 bool kvm_arch_has_vcpu_debugfs(void)
2036 {
2037         return false;
2038 }
2039
2040 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2041 {
2042         return 0;
2043 }
2044
2045 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2046 {
2047         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2048         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2049         kvm_s390_clear_local_irqs(vcpu);
2050         kvm_clear_async_pf_completion_queue(vcpu);
2051         if (!kvm_is_ucontrol(vcpu->kvm))
2052                 sca_del_vcpu(vcpu);
2053
2054         if (kvm_is_ucontrol(vcpu->kvm))
2055                 gmap_remove(vcpu->arch.gmap);
2056
2057         if (vcpu->kvm->arch.use_cmma)
2058                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2059         free_page((unsigned long)(vcpu->arch.sie_block));
2060
2061         kvm_vcpu_uninit(vcpu);
2062         kmem_cache_free(kvm_vcpu_cache, vcpu);
2063 }
2064
2065 static void kvm_free_vcpus(struct kvm *kvm)
2066 {
2067         unsigned int i;
2068         struct kvm_vcpu *vcpu;
2069
2070         kvm_for_each_vcpu(i, vcpu, kvm)
2071                 kvm_arch_vcpu_destroy(vcpu);
2072
2073         mutex_lock(&kvm->lock);
2074         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2075                 kvm->vcpus[i] = NULL;
2076
2077         atomic_set(&kvm->online_vcpus, 0);
2078         mutex_unlock(&kvm->lock);
2079 }
2080
2081 void kvm_arch_destroy_vm(struct kvm *kvm)
2082 {
2083         kvm_free_vcpus(kvm);
2084         sca_dispose(kvm);
2085         debug_unregister(kvm->arch.dbf);
2086         free_page((unsigned long)kvm->arch.sie_page2);
2087         if (!kvm_is_ucontrol(kvm))
2088                 gmap_remove(kvm->arch.gmap);
2089         kvm_s390_destroy_adapters(kvm);
2090         kvm_s390_clear_float_irqs(kvm);
2091         kvm_s390_vsie_destroy(kvm);
2092         if (kvm->arch.migration_state) {
2093                 vfree(kvm->arch.migration_state->pgste_bitmap);
2094                 kfree(kvm->arch.migration_state);
2095         }
2096         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2097 }
2098
2099 /* Section: vcpu related */
2100 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2101 {
2102         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2103         if (!vcpu->arch.gmap)
2104                 return -ENOMEM;
2105         vcpu->arch.gmap->private = vcpu->kvm;
2106
2107         return 0;
2108 }
2109
2110 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2111 {
2112         if (!kvm_s390_use_sca_entries())
2113                 return;
2114         read_lock(&vcpu->kvm->arch.sca_lock);
2115         if (vcpu->kvm->arch.use_esca) {
2116                 struct esca_block *sca = vcpu->kvm->arch.sca;
2117
2118                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2119                 sca->cpu[vcpu->vcpu_id].sda = 0;
2120         } else {
2121                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2122
2123                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2124                 sca->cpu[vcpu->vcpu_id].sda = 0;
2125         }
2126         read_unlock(&vcpu->kvm->arch.sca_lock);
2127 }
2128
2129 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2130 {
2131         if (!kvm_s390_use_sca_entries()) {
2132                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2133
2134                 /* we still need the basic sca for the ipte control */
2135                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2136                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2137                 return;
2138         }
2139         read_lock(&vcpu->kvm->arch.sca_lock);
2140         if (vcpu->kvm->arch.use_esca) {
2141                 struct esca_block *sca = vcpu->kvm->arch.sca;
2142
2143                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2144                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2145                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2146                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2147                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2148         } else {
2149                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2150
2151                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2152                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2153                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2154                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2155         }
2156         read_unlock(&vcpu->kvm->arch.sca_lock);
2157 }
2158
2159 /* Basic SCA to Extended SCA data copy routines */
2160 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2161 {
2162         d->sda = s->sda;
2163         d->sigp_ctrl.c = s->sigp_ctrl.c;
2164         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2165 }
2166
2167 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2168 {
2169         int i;
2170
2171         d->ipte_control = s->ipte_control;
2172         d->mcn[0] = s->mcn;
2173         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2174                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2175 }
2176
2177 static int sca_switch_to_extended(struct kvm *kvm)
2178 {
2179         struct bsca_block *old_sca = kvm->arch.sca;
2180         struct esca_block *new_sca;
2181         struct kvm_vcpu *vcpu;
2182         unsigned int vcpu_idx;
2183         u32 scaol, scaoh;
2184
2185         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2186         if (!new_sca)
2187                 return -ENOMEM;
2188
2189         scaoh = (u32)((u64)(new_sca) >> 32);
2190         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2191
2192         kvm_s390_vcpu_block_all(kvm);
2193         write_lock(&kvm->arch.sca_lock);
2194
2195         sca_copy_b_to_e(new_sca, old_sca);
2196
2197         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2198                 vcpu->arch.sie_block->scaoh = scaoh;
2199                 vcpu->arch.sie_block->scaol = scaol;
2200                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2201         }
2202         kvm->arch.sca = new_sca;
2203         kvm->arch.use_esca = 1;
2204
2205         write_unlock(&kvm->arch.sca_lock);
2206         kvm_s390_vcpu_unblock_all(kvm);
2207
2208         free_page((unsigned long)old_sca);
2209
2210         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2211                  old_sca, kvm->arch.sca);
2212         return 0;
2213 }
2214
2215 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2216 {
2217         int rc;
2218
2219         if (!kvm_s390_use_sca_entries()) {
2220                 if (id < KVM_MAX_VCPUS)
2221                         return true;
2222                 return false;
2223         }
2224         if (id < KVM_S390_BSCA_CPU_SLOTS)
2225                 return true;
2226         if (!sclp.has_esca || !sclp.has_64bscao)
2227                 return false;
2228
2229         mutex_lock(&kvm->lock);
2230         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2231         mutex_unlock(&kvm->lock);
2232
2233         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2234 }
2235
2236 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2237 {
2238         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2239         kvm_clear_async_pf_completion_queue(vcpu);
2240         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2241                                     KVM_SYNC_GPRS |
2242                                     KVM_SYNC_ACRS |
2243                                     KVM_SYNC_CRS |
2244                                     KVM_SYNC_ARCH0 |
2245                                     KVM_SYNC_PFAULT;
2246         kvm_s390_set_prefix(vcpu, 0);
2247         if (test_kvm_facility(vcpu->kvm, 64))
2248                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2249         if (test_kvm_facility(vcpu->kvm, 82))
2250                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2251         if (test_kvm_facility(vcpu->kvm, 133))
2252                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2253         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2254          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2255          */
2256         if (MACHINE_HAS_VX)
2257                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2258         else
2259                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2260
2261         if (kvm_is_ucontrol(vcpu->kvm))
2262                 return __kvm_ucontrol_vcpu_init(vcpu);
2263
2264         return 0;
2265 }
2266
2267 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2268 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2269 {
2270         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2271         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2272         vcpu->arch.cputm_start = get_tod_clock_fast();
2273         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2274 }
2275
2276 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2277 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2278 {
2279         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2280         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2281         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2282         vcpu->arch.cputm_start = 0;
2283         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2284 }
2285
2286 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2287 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2288 {
2289         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2290         vcpu->arch.cputm_enabled = true;
2291         __start_cpu_timer_accounting(vcpu);
2292 }
2293
2294 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2295 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2296 {
2297         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2298         __stop_cpu_timer_accounting(vcpu);
2299         vcpu->arch.cputm_enabled = false;
2300 }
2301
2302 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2303 {
2304         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2305         __enable_cpu_timer_accounting(vcpu);
2306         preempt_enable();
2307 }
2308
2309 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2310 {
2311         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2312         __disable_cpu_timer_accounting(vcpu);
2313         preempt_enable();
2314 }
2315
2316 /* set the cpu timer - may only be called from the VCPU thread itself */
2317 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2318 {
2319         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2320         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2321         if (vcpu->arch.cputm_enabled)
2322                 vcpu->arch.cputm_start = get_tod_clock_fast();
2323         vcpu->arch.sie_block->cputm = cputm;
2324         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2325         preempt_enable();
2326 }
2327
2328 /* update and get the cpu timer - can also be called from other VCPU threads */
2329 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2330 {
2331         unsigned int seq;
2332         __u64 value;
2333
2334         if (unlikely(!vcpu->arch.cputm_enabled))
2335                 return vcpu->arch.sie_block->cputm;
2336
2337         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2338         do {
2339                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2340                 /*
2341                  * If the writer would ever execute a read in the critical
2342                  * section, e.g. in irq context, we have a deadlock.
2343                  */
2344                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2345                 value = vcpu->arch.sie_block->cputm;
2346                 /* if cputm_start is 0, accounting is being started/stopped */
2347                 if (likely(vcpu->arch.cputm_start))
2348                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2349         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2350         preempt_enable();
2351         return value;
2352 }
2353
2354 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2355 {
2356
2357         gmap_enable(vcpu->arch.enabled_gmap);
2358         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2359         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2360                 __start_cpu_timer_accounting(vcpu);
2361         vcpu->cpu = cpu;
2362 }
2363
2364 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2365 {
2366         vcpu->cpu = -1;
2367         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2368                 __stop_cpu_timer_accounting(vcpu);
2369         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2370         vcpu->arch.enabled_gmap = gmap_get_enabled();
2371         gmap_disable(vcpu->arch.enabled_gmap);
2372
2373 }
2374
2375 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2376 {
2377         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2378         vcpu->arch.sie_block->gpsw.mask = 0UL;
2379         vcpu->arch.sie_block->gpsw.addr = 0UL;
2380         kvm_s390_set_prefix(vcpu, 0);
2381         kvm_s390_set_cpu_timer(vcpu, 0);
2382         vcpu->arch.sie_block->ckc       = 0UL;
2383         vcpu->arch.sie_block->todpr     = 0;
2384         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2385         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2386         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2387         vcpu->run->s.regs.fpc = 0;
2388         vcpu->arch.sie_block->gbea = 1;
2389         vcpu->arch.sie_block->pp = 0;
2390         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2391         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2392         kvm_clear_async_pf_completion_queue(vcpu);
2393         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2394                 kvm_s390_vcpu_stop(vcpu);
2395         kvm_s390_clear_local_irqs(vcpu);
2396 }
2397
2398 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2399 {
2400         mutex_lock(&vcpu->kvm->lock);
2401         preempt_disable();
2402         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2403         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2404         preempt_enable();
2405         mutex_unlock(&vcpu->kvm->lock);
2406         if (!kvm_is_ucontrol(vcpu->kvm)) {
2407                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2408                 sca_add_vcpu(vcpu);
2409         }
2410         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2411                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2412         /* make vcpu_load load the right gmap on the first trigger */
2413         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2414 }
2415
2416 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2417 {
2418         if (!test_kvm_facility(vcpu->kvm, 76))
2419                 return;
2420
2421         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2422
2423         if (vcpu->kvm->arch.crypto.aes_kw)
2424                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2425         if (vcpu->kvm->arch.crypto.dea_kw)
2426                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2427
2428         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2429 }
2430
2431 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2432 {
2433         free_page(vcpu->arch.sie_block->cbrlo);
2434         vcpu->arch.sie_block->cbrlo = 0;
2435 }
2436
2437 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2438 {
2439         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2440         if (!vcpu->arch.sie_block->cbrlo)
2441                 return -ENOMEM;
2442
2443         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2444         return 0;
2445 }
2446
2447 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2448 {
2449         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2450
2451         vcpu->arch.sie_block->ibc = model->ibc;
2452         if (test_kvm_facility(vcpu->kvm, 7))
2453                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2454 }
2455
2456 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2457 {
2458         int rc = 0;
2459
2460         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2461                                                     CPUSTAT_SM |
2462                                                     CPUSTAT_STOPPED);
2463
2464         if (test_kvm_facility(vcpu->kvm, 78))
2465                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2466         else if (test_kvm_facility(vcpu->kvm, 8))
2467                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2468
2469         kvm_s390_vcpu_setup_model(vcpu);
2470
2471         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2472         if (MACHINE_HAS_ESOP)
2473                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2474         if (test_kvm_facility(vcpu->kvm, 9))
2475                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2476         if (test_kvm_facility(vcpu->kvm, 73))
2477                 vcpu->arch.sie_block->ecb |= ECB_TE;
2478
2479         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2480                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2481         if (test_kvm_facility(vcpu->kvm, 130))
2482                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2483         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2484         if (sclp.has_cei)
2485                 vcpu->arch.sie_block->eca |= ECA_CEI;
2486         if (sclp.has_ib)
2487                 vcpu->arch.sie_block->eca |= ECA_IB;
2488         if (sclp.has_siif)
2489                 vcpu->arch.sie_block->eca |= ECA_SII;
2490         if (sclp.has_sigpif)
2491                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2492         if (test_kvm_facility(vcpu->kvm, 129)) {
2493                 vcpu->arch.sie_block->eca |= ECA_VX;
2494                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2495         }
2496         if (test_kvm_facility(vcpu->kvm, 139))
2497                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2498
2499         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2500                                         | SDNXC;
2501         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2502
2503         if (sclp.has_kss)
2504                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2505         else
2506                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2507
2508         if (vcpu->kvm->arch.use_cmma) {
2509                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2510                 if (rc)
2511                         return rc;
2512         }
2513         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2514         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2515
2516         kvm_s390_vcpu_crypto_setup(vcpu);
2517
2518         return rc;
2519 }
2520
2521 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2522                                       unsigned int id)
2523 {
2524         struct kvm_vcpu *vcpu;
2525         struct sie_page *sie_page;
2526         int rc = -EINVAL;
2527
2528         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2529                 goto out;
2530
2531         rc = -ENOMEM;
2532
2533         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2534         if (!vcpu)
2535                 goto out;
2536
2537         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2538         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2539         if (!sie_page)
2540                 goto out_free_cpu;
2541
2542         vcpu->arch.sie_block = &sie_page->sie_block;
2543         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2544
2545         /* the real guest size will always be smaller than msl */
2546         vcpu->arch.sie_block->mso = 0;
2547         vcpu->arch.sie_block->msl = sclp.hamax;
2548
2549         vcpu->arch.sie_block->icpua = id;
2550         spin_lock_init(&vcpu->arch.local_int.lock);
2551         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2552         vcpu->arch.local_int.wq = &vcpu->wq;
2553         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2554         seqcount_init(&vcpu->arch.cputm_seqcount);
2555
2556         rc = kvm_vcpu_init(vcpu, kvm, id);
2557         if (rc)
2558                 goto out_free_sie_block;
2559         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2560                  vcpu->arch.sie_block);
2561         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2562
2563         return vcpu;
2564 out_free_sie_block:
2565         free_page((unsigned long)(vcpu->arch.sie_block));
2566 out_free_cpu:
2567         kmem_cache_free(kvm_vcpu_cache, vcpu);
2568 out:
2569         return ERR_PTR(rc);
2570 }
2571
2572 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2573 {
2574         return kvm_s390_vcpu_has_irq(vcpu, 0);
2575 }
2576
2577 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2578 {
2579         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2580 }
2581
2582 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2583 {
2584         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2585         exit_sie(vcpu);
2586 }
2587
2588 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2589 {
2590         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2591 }
2592
2593 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2594 {
2595         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2596         exit_sie(vcpu);
2597 }
2598
2599 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2600 {
2601         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2602 }
2603
2604 /*
2605  * Kick a guest cpu out of SIE and wait until SIE is not running.
2606  * If the CPU is not running (e.g. waiting as idle) the function will
2607  * return immediately. */
2608 void exit_sie(struct kvm_vcpu *vcpu)
2609 {
2610         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2611         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2612                 cpu_relax();
2613 }
2614
2615 /* Kick a guest cpu out of SIE to process a request synchronously */
2616 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2617 {
2618         kvm_make_request(req, vcpu);
2619         kvm_s390_vcpu_request(vcpu);
2620 }
2621
2622 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2623                               unsigned long end)
2624 {
2625         struct kvm *kvm = gmap->private;
2626         struct kvm_vcpu *vcpu;
2627         unsigned long prefix;
2628         int i;
2629
2630         if (gmap_is_shadow(gmap))
2631                 return;
2632         if (start >= 1UL << 31)
2633                 /* We are only interested in prefix pages */
2634                 return;
2635         kvm_for_each_vcpu(i, vcpu, kvm) {
2636                 /* match against both prefix pages */
2637                 prefix = kvm_s390_get_prefix(vcpu);
2638                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2639                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2640                                    start, end);
2641                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2642                 }
2643         }
2644 }
2645
2646 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2647 {
2648         /* kvm common code refers to this, but never calls it */
2649         BUG();
2650         return 0;
2651 }
2652
2653 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2654                                            struct kvm_one_reg *reg)
2655 {
2656         int r = -EINVAL;
2657
2658         switch (reg->id) {
2659         case KVM_REG_S390_TODPR:
2660                 r = put_user(vcpu->arch.sie_block->todpr,
2661                              (u32 __user *)reg->addr);
2662                 break;
2663         case KVM_REG_S390_EPOCHDIFF:
2664                 r = put_user(vcpu->arch.sie_block->epoch,
2665                              (u64 __user *)reg->addr);
2666                 break;
2667         case KVM_REG_S390_CPU_TIMER:
2668                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2669                              (u64 __user *)reg->addr);
2670                 break;
2671         case KVM_REG_S390_CLOCK_COMP:
2672                 r = put_user(vcpu->arch.sie_block->ckc,
2673                              (u64 __user *)reg->addr);
2674                 break;
2675         case KVM_REG_S390_PFTOKEN:
2676                 r = put_user(vcpu->arch.pfault_token,
2677                              (u64 __user *)reg->addr);
2678                 break;
2679         case KVM_REG_S390_PFCOMPARE:
2680                 r = put_user(vcpu->arch.pfault_compare,
2681                              (u64 __user *)reg->addr);
2682                 break;
2683         case KVM_REG_S390_PFSELECT:
2684                 r = put_user(vcpu->arch.pfault_select,
2685                              (u64 __user *)reg->addr);
2686                 break;
2687         case KVM_REG_S390_PP:
2688                 r = put_user(vcpu->arch.sie_block->pp,
2689                              (u64 __user *)reg->addr);
2690                 break;
2691         case KVM_REG_S390_GBEA:
2692                 r = put_user(vcpu->arch.sie_block->gbea,
2693                              (u64 __user *)reg->addr);
2694                 break;
2695         default:
2696                 break;
2697         }
2698
2699         return r;
2700 }
2701
2702 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2703                                            struct kvm_one_reg *reg)
2704 {
2705         int r = -EINVAL;
2706         __u64 val;
2707
2708         switch (reg->id) {
2709         case KVM_REG_S390_TODPR:
2710                 r = get_user(vcpu->arch.sie_block->todpr,
2711                              (u32 __user *)reg->addr);
2712                 break;
2713         case KVM_REG_S390_EPOCHDIFF:
2714                 r = get_user(vcpu->arch.sie_block->epoch,
2715                              (u64 __user *)reg->addr);
2716                 break;
2717         case KVM_REG_S390_CPU_TIMER:
2718                 r = get_user(val, (u64 __user *)reg->addr);
2719                 if (!r)
2720                         kvm_s390_set_cpu_timer(vcpu, val);
2721                 break;
2722         case KVM_REG_S390_CLOCK_COMP:
2723                 r = get_user(vcpu->arch.sie_block->ckc,
2724                              (u64 __user *)reg->addr);
2725                 break;
2726         case KVM_REG_S390_PFTOKEN:
2727                 r = get_user(vcpu->arch.pfault_token,
2728                              (u64 __user *)reg->addr);
2729                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2730                         kvm_clear_async_pf_completion_queue(vcpu);
2731                 break;
2732         case KVM_REG_S390_PFCOMPARE:
2733                 r = get_user(vcpu->arch.pfault_compare,
2734                              (u64 __user *)reg->addr);
2735                 break;
2736         case KVM_REG_S390_PFSELECT:
2737                 r = get_user(vcpu->arch.pfault_select,
2738                              (u64 __user *)reg->addr);
2739                 break;
2740         case KVM_REG_S390_PP:
2741                 r = get_user(vcpu->arch.sie_block->pp,
2742                              (u64 __user *)reg->addr);
2743                 break;
2744         case KVM_REG_S390_GBEA:
2745                 r = get_user(vcpu->arch.sie_block->gbea,
2746                              (u64 __user *)reg->addr);
2747                 break;
2748         default:
2749                 break;
2750         }
2751
2752         return r;
2753 }
2754
2755 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2756 {
2757         kvm_s390_vcpu_initial_reset(vcpu);
2758         return 0;
2759 }
2760
2761 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2762 {
2763         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2764         return 0;
2765 }
2766
2767 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2768 {
2769         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2770         return 0;
2771 }
2772
2773 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2774                                   struct kvm_sregs *sregs)
2775 {
2776         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2777         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2778         return 0;
2779 }
2780
2781 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2782                                   struct kvm_sregs *sregs)
2783 {
2784         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2785         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2786         return 0;
2787 }
2788
2789 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2790 {
2791         if (test_fp_ctl(fpu->fpc))
2792                 return -EINVAL;
2793         vcpu->run->s.regs.fpc = fpu->fpc;
2794         if (MACHINE_HAS_VX)
2795                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2796                                  (freg_t *) fpu->fprs);
2797         else
2798                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2799         return 0;
2800 }
2801
2802 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2803 {
2804         /* make sure we have the latest values */
2805         save_fpu_regs();
2806         if (MACHINE_HAS_VX)
2807                 convert_vx_to_fp((freg_t *) fpu->fprs,
2808                                  (__vector128 *) vcpu->run->s.regs.vrs);
2809         else
2810                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2811         fpu->fpc = vcpu->run->s.regs.fpc;
2812         return 0;
2813 }
2814
2815 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2816 {
2817         int rc = 0;
2818
2819         if (!is_vcpu_stopped(vcpu))
2820                 rc = -EBUSY;
2821         else {
2822                 vcpu->run->psw_mask = psw.mask;
2823                 vcpu->run->psw_addr = psw.addr;
2824         }
2825         return rc;
2826 }
2827
2828 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2829                                   struct kvm_translation *tr)
2830 {
2831         return -EINVAL; /* not implemented yet */
2832 }
2833
2834 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2835                               KVM_GUESTDBG_USE_HW_BP | \
2836                               KVM_GUESTDBG_ENABLE)
2837
2838 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2839                                         struct kvm_guest_debug *dbg)
2840 {
2841         int rc = 0;
2842
2843         vcpu->guest_debug = 0;
2844         kvm_s390_clear_bp_data(vcpu);
2845
2846         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2847                 return -EINVAL;
2848         if (!sclp.has_gpere)
2849                 return -EINVAL;
2850
2851         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2852                 vcpu->guest_debug = dbg->control;
2853                 /* enforce guest PER */
2854                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2855
2856                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2857                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2858         } else {
2859                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2860                 vcpu->arch.guestdbg.last_bp = 0;
2861         }
2862
2863         if (rc) {
2864                 vcpu->guest_debug = 0;
2865                 kvm_s390_clear_bp_data(vcpu);
2866                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2867         }
2868
2869         return rc;
2870 }
2871
2872 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2873                                     struct kvm_mp_state *mp_state)
2874 {
2875         /* CHECK_STOP and LOAD are not supported yet */
2876         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2877                                        KVM_MP_STATE_OPERATING;
2878 }
2879
2880 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2881                                     struct kvm_mp_state *mp_state)
2882 {
2883         int rc = 0;
2884
2885         /* user space knows about this interface - let it control the state */
2886         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2887
2888         switch (mp_state->mp_state) {
2889         case KVM_MP_STATE_STOPPED:
2890                 kvm_s390_vcpu_stop(vcpu);
2891                 break;
2892         case KVM_MP_STATE_OPERATING:
2893                 kvm_s390_vcpu_start(vcpu);
2894                 break;
2895         case KVM_MP_STATE_LOAD:
2896         case KVM_MP_STATE_CHECK_STOP:
2897                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2898         default:
2899                 rc = -ENXIO;
2900         }
2901
2902         return rc;
2903 }
2904
2905 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2906 {
2907         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2908 }
2909
2910 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2911 {
2912 retry:
2913         kvm_s390_vcpu_request_handled(vcpu);
2914         if (!kvm_request_pending(vcpu))
2915                 return 0;
2916         /*
2917          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2918          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2919          * This ensures that the ipte instruction for this request has
2920          * already finished. We might race against a second unmapper that
2921          * wants to set the blocking bit. Lets just retry the request loop.
2922          */
2923         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2924                 int rc;
2925                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2926                                           kvm_s390_get_prefix(vcpu),
2927                                           PAGE_SIZE * 2, PROT_WRITE);
2928                 if (rc) {
2929                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2930                         return rc;
2931                 }
2932                 goto retry;
2933         }
2934
2935         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2936                 vcpu->arch.sie_block->ihcpu = 0xffff;
2937                 goto retry;
2938         }
2939
2940         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2941                 if (!ibs_enabled(vcpu)) {
2942                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2943                         atomic_or(CPUSTAT_IBS,
2944                                         &vcpu->arch.sie_block->cpuflags);
2945                 }
2946                 goto retry;
2947         }
2948
2949         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2950                 if (ibs_enabled(vcpu)) {
2951                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2952                         atomic_andnot(CPUSTAT_IBS,
2953                                           &vcpu->arch.sie_block->cpuflags);
2954                 }
2955                 goto retry;
2956         }
2957
2958         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2959                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2960                 goto retry;
2961         }
2962
2963         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2964                 /*
2965                  * Disable CMMA virtualization; we will emulate the ESSA
2966                  * instruction manually, in order to provide additional
2967                  * functionalities needed for live migration.
2968                  */
2969                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2970                 goto retry;
2971         }
2972
2973         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2974                 /*
2975                  * Re-enable CMMA virtualization if CMMA is available and
2976                  * was used.
2977                  */
2978                 if ((vcpu->kvm->arch.use_cmma) &&
2979                     (vcpu->kvm->mm->context.use_cmma))
2980                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2981                 goto retry;
2982         }
2983
2984         /* nothing to do, just clear the request */
2985         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2986
2987         return 0;
2988 }
2989
2990 void kvm_s390_set_tod_clock(struct kvm *kvm,
2991                             const struct kvm_s390_vm_tod_clock *gtod)
2992 {
2993         struct kvm_vcpu *vcpu;
2994         struct kvm_s390_tod_clock_ext htod;
2995         int i;
2996
2997         mutex_lock(&kvm->lock);
2998         preempt_disable();
2999
3000         get_tod_clock_ext((char *)&htod);
3001
3002         kvm->arch.epoch = gtod->tod - htod.tod;
3003         kvm->arch.epdx = 0;
3004         if (test_kvm_facility(kvm, 139)) {
3005                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3006                 if (kvm->arch.epoch > gtod->tod)
3007                         kvm->arch.epdx -= 1;
3008         }
3009
3010         kvm_s390_vcpu_block_all(kvm);
3011         kvm_for_each_vcpu(i, vcpu, kvm) {
3012                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3013                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3014         }
3015
3016         kvm_s390_vcpu_unblock_all(kvm);
3017         preempt_enable();
3018         mutex_unlock(&kvm->lock);
3019 }
3020
3021 /**
3022  * kvm_arch_fault_in_page - fault-in guest page if necessary
3023  * @vcpu: The corresponding virtual cpu
3024  * @gpa: Guest physical address
3025  * @writable: Whether the page should be writable or not
3026  *
3027  * Make sure that a guest page has been faulted-in on the host.
3028  *
3029  * Return: Zero on success, negative error code otherwise.
3030  */
3031 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3032 {
3033         return gmap_fault(vcpu->arch.gmap, gpa,
3034                           writable ? FAULT_FLAG_WRITE : 0);
3035 }
3036
3037 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3038                                       unsigned long token)
3039 {
3040         struct kvm_s390_interrupt inti;
3041         struct kvm_s390_irq irq;
3042
3043         if (start_token) {
3044                 irq.u.ext.ext_params2 = token;
3045                 irq.type = KVM_S390_INT_PFAULT_INIT;
3046                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3047         } else {
3048                 inti.type = KVM_S390_INT_PFAULT_DONE;
3049                 inti.parm64 = token;
3050                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3051         }
3052 }
3053
3054 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3055                                      struct kvm_async_pf *work)
3056 {
3057         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3058         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3059 }
3060
3061 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3062                                  struct kvm_async_pf *work)
3063 {
3064         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3065         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3066 }
3067
3068 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3069                                struct kvm_async_pf *work)
3070 {
3071         /* s390 will always inject the page directly */
3072 }
3073
3074 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3075 {
3076         /*
3077          * s390 will always inject the page directly,
3078          * but we still want check_async_completion to cleanup
3079          */
3080         return true;
3081 }
3082
3083 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3084 {
3085         hva_t hva;
3086         struct kvm_arch_async_pf arch;
3087         int rc;
3088
3089         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3090                 return 0;
3091         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3092             vcpu->arch.pfault_compare)
3093                 return 0;
3094         if (psw_extint_disabled(vcpu))
3095                 return 0;
3096         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3097                 return 0;
3098         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3099                 return 0;
3100         if (!vcpu->arch.gmap->pfault_enabled)
3101                 return 0;
3102
3103         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3104         hva += current->thread.gmap_addr & ~PAGE_MASK;
3105         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3106                 return 0;
3107
3108         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3109         return rc;
3110 }
3111
3112 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3113 {
3114         int rc, cpuflags;
3115
3116         /*
3117          * On s390 notifications for arriving pages will be delivered directly
3118          * to the guest but the house keeping for completed pfaults is
3119          * handled outside the worker.
3120          */
3121         kvm_check_async_pf_completion(vcpu);
3122
3123         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3124         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3125
3126         if (need_resched())
3127                 schedule();
3128
3129         if (test_cpu_flag(CIF_MCCK_PENDING))
3130                 s390_handle_mcck();
3131
3132         if (!kvm_is_ucontrol(vcpu->kvm)) {
3133                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3134                 if (rc)
3135                         return rc;
3136         }
3137
3138         rc = kvm_s390_handle_requests(vcpu);
3139         if (rc)
3140                 return rc;
3141
3142         if (guestdbg_enabled(vcpu)) {
3143                 kvm_s390_backup_guest_per_regs(vcpu);
3144                 kvm_s390_patch_guest_per_regs(vcpu);
3145         }
3146
3147         vcpu->arch.sie_block->icptcode = 0;
3148         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3149         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3150         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3151
3152         return 0;
3153 }
3154
3155 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3156 {
3157         struct kvm_s390_pgm_info pgm_info = {
3158                 .code = PGM_ADDRESSING,
3159         };
3160         u8 opcode, ilen;
3161         int rc;
3162
3163         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3164         trace_kvm_s390_sie_fault(vcpu);
3165
3166         /*
3167          * We want to inject an addressing exception, which is defined as a
3168          * suppressing or terminating exception. However, since we came here
3169          * by a DAT access exception, the PSW still points to the faulting
3170          * instruction since DAT exceptions are nullifying. So we've got
3171          * to look up the current opcode to get the length of the instruction
3172          * to be able to forward the PSW.
3173          */
3174         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3175         ilen = insn_length(opcode);
3176         if (rc < 0) {
3177                 return rc;
3178         } else if (rc) {
3179                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3180                  * Forward by arbitrary ilc, injection will take care of
3181                  * nullification if necessary.
3182                  */
3183                 pgm_info = vcpu->arch.pgm;
3184                 ilen = 4;
3185         }
3186         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3187         kvm_s390_forward_psw(vcpu, ilen);
3188         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3189 }
3190
3191 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3192 {
3193         struct mcck_volatile_info *mcck_info;
3194         struct sie_page *sie_page;
3195
3196         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3197                    vcpu->arch.sie_block->icptcode);
3198         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3199
3200         if (guestdbg_enabled(vcpu))
3201                 kvm_s390_restore_guest_per_regs(vcpu);
3202
3203         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3204         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3205
3206         if (exit_reason == -EINTR) {
3207                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3208                 sie_page = container_of(vcpu->arch.sie_block,
3209                                         struct sie_page, sie_block);
3210                 mcck_info = &sie_page->mcck_info;
3211                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3212                 return 0;
3213         }
3214
3215         if (vcpu->arch.sie_block->icptcode > 0) {
3216                 int rc = kvm_handle_sie_intercept(vcpu);
3217
3218                 if (rc != -EOPNOTSUPP)
3219                         return rc;
3220                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3221                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3222                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3223                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3224                 return -EREMOTE;
3225         } else if (exit_reason != -EFAULT) {
3226                 vcpu->stat.exit_null++;
3227                 return 0;
3228         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3229                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3230                 vcpu->run->s390_ucontrol.trans_exc_code =
3231                                                 current->thread.gmap_addr;
3232                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3233                 return -EREMOTE;
3234         } else if (current->thread.gmap_pfault) {
3235                 trace_kvm_s390_major_guest_pfault(vcpu);
3236                 current->thread.gmap_pfault = 0;
3237                 if (kvm_arch_setup_async_pf(vcpu))
3238                         return 0;
3239                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3240         }
3241         return vcpu_post_run_fault_in_sie(vcpu);
3242 }
3243
3244 static int __vcpu_run(struct kvm_vcpu *vcpu)
3245 {
3246         int rc, exit_reason;
3247
3248         /*
3249          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3250          * ning the guest), so that memslots (and other stuff) are protected
3251          */
3252         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3253
3254         do {
3255                 rc = vcpu_pre_run(vcpu);
3256                 if (rc)
3257                         break;
3258
3259                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3260                 /*
3261                  * As PF_VCPU will be used in fault handler, between
3262                  * guest_enter and guest_exit should be no uaccess.
3263                  */
3264                 local_irq_disable();
3265                 guest_enter_irqoff();
3266                 __disable_cpu_timer_accounting(vcpu);
3267                 local_irq_enable();
3268                 exit_reason = sie64a(vcpu->arch.sie_block,
3269                                      vcpu->run->s.regs.gprs);
3270                 local_irq_disable();
3271                 __enable_cpu_timer_accounting(vcpu);
3272                 guest_exit_irqoff();
3273                 local_irq_enable();
3274                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3275
3276                 rc = vcpu_post_run(vcpu, exit_reason);
3277         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3278
3279         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3280         return rc;
3281 }
3282
3283 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3284 {
3285         struct runtime_instr_cb *riccb;
3286         struct gs_cb *gscb;
3287
3288         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3289         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3290         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3291         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3292         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3293                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3294         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3295                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3296                 /* some control register changes require a tlb flush */
3297                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3298         }
3299         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3300                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3301                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3302                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3303                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3304                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3305         }
3306         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3307                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3308                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3309                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3310                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3311                         kvm_clear_async_pf_completion_queue(vcpu);
3312         }
3313         /*
3314          * If userspace sets the riccb (e.g. after migration) to a valid state,
3315          * we should enable RI here instead of doing the lazy enablement.
3316          */
3317         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3318             test_kvm_facility(vcpu->kvm, 64) &&
3319             riccb->valid &&
3320             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3321                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3322                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3323         }
3324         /*
3325          * If userspace sets the gscb (e.g. after migration) to non-zero,
3326          * we should enable GS here instead of doing the lazy enablement.
3327          */
3328         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3329             test_kvm_facility(vcpu->kvm, 133) &&
3330             gscb->gssm &&
3331             !vcpu->arch.gs_enabled) {
3332                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3333                 vcpu->arch.sie_block->ecb |= ECB_GS;
3334                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3335                 vcpu->arch.gs_enabled = 1;
3336         }
3337         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3338             test_kvm_facility(vcpu->kvm, 82)) {
3339                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3340                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3341         }
3342         save_access_regs(vcpu->arch.host_acrs);
3343         restore_access_regs(vcpu->run->s.regs.acrs);
3344         /* save host (userspace) fprs/vrs */
3345         save_fpu_regs();
3346         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3347         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3348         if (MACHINE_HAS_VX)
3349                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3350         else
3351                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3352         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3353         if (test_fp_ctl(current->thread.fpu.fpc))
3354                 /* User space provided an invalid FPC, let's clear it */
3355                 current->thread.fpu.fpc = 0;
3356         if (MACHINE_HAS_GS) {
3357                 preempt_disable();
3358                 __ctl_set_bit(2, 4);
3359                 if (current->thread.gs_cb) {
3360                         vcpu->arch.host_gscb = current->thread.gs_cb;
3361                         save_gs_cb(vcpu->arch.host_gscb);
3362                 }
3363                 if (vcpu->arch.gs_enabled) {
3364                         current->thread.gs_cb = (struct gs_cb *)
3365                                                 &vcpu->run->s.regs.gscb;
3366                         restore_gs_cb(current->thread.gs_cb);
3367                 }
3368                 preempt_enable();
3369         }
3370
3371         kvm_run->kvm_dirty_regs = 0;
3372 }
3373
3374 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3375 {
3376         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3377         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3378         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3379         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3380         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3381         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3382         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3383         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3384         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3385         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3386         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3387         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3388         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3389         save_access_regs(vcpu->run->s.regs.acrs);
3390         restore_access_regs(vcpu->arch.host_acrs);
3391         /* Save guest register state */
3392         save_fpu_regs();
3393         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3394         /* Restore will be done lazily at return */
3395         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3396         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3397         if (MACHINE_HAS_GS) {
3398                 preempt_disable();
3399                 __ctl_set_bit(2, 4);
3400                 if (vcpu->arch.gs_enabled)
3401                         save_gs_cb(current->thread.gs_cb);
3402                 current->thread.gs_cb = vcpu->arch.host_gscb;
3403                 restore_gs_cb(vcpu->arch.host_gscb);
3404                 if (!vcpu->arch.host_gscb)
3405                         __ctl_clear_bit(2, 4);
3406                 vcpu->arch.host_gscb = NULL;
3407                 preempt_enable();
3408         }
3409
3410 }
3411
3412 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3413 {
3414         int rc;
3415
3416         if (kvm_run->immediate_exit)
3417                 return -EINTR;
3418
3419         if (guestdbg_exit_pending(vcpu)) {
3420                 kvm_s390_prepare_debug_exit(vcpu);
3421                 return 0;
3422         }
3423
3424         kvm_sigset_activate(vcpu);
3425
3426         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3427                 kvm_s390_vcpu_start(vcpu);
3428         } else if (is_vcpu_stopped(vcpu)) {
3429                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3430                                    vcpu->vcpu_id);
3431                 return -EINVAL;
3432         }
3433
3434         sync_regs(vcpu, kvm_run);
3435         enable_cpu_timer_accounting(vcpu);
3436
3437         might_fault();
3438         rc = __vcpu_run(vcpu);
3439
3440         if (signal_pending(current) && !rc) {
3441                 kvm_run->exit_reason = KVM_EXIT_INTR;
3442                 rc = -EINTR;
3443         }
3444
3445         if (guestdbg_exit_pending(vcpu) && !rc)  {
3446                 kvm_s390_prepare_debug_exit(vcpu);
3447                 rc = 0;
3448         }
3449
3450         if (rc == -EREMOTE) {
3451                 /* userspace support is needed, kvm_run has been prepared */
3452                 rc = 0;
3453         }
3454
3455         disable_cpu_timer_accounting(vcpu);
3456         store_regs(vcpu, kvm_run);
3457
3458         kvm_sigset_deactivate(vcpu);
3459
3460         vcpu->stat.exit_userspace++;
3461         return rc;
3462 }
3463
3464 /*
3465  * store status at address
3466  * we use have two special cases:
3467  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3468  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3469  */
3470 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3471 {
3472         unsigned char archmode = 1;
3473         freg_t fprs[NUM_FPRS];
3474         unsigned int px;
3475         u64 clkcomp, cputm;
3476         int rc;
3477
3478         px = kvm_s390_get_prefix(vcpu);
3479         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3480                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3481                         return -EFAULT;
3482                 gpa = 0;
3483         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3484                 if (write_guest_real(vcpu, 163, &archmode, 1))
3485                         return -EFAULT;
3486                 gpa = px;
3487         } else
3488                 gpa -= __LC_FPREGS_SAVE_AREA;
3489
3490         /* manually convert vector registers if necessary */
3491         if (MACHINE_HAS_VX) {
3492                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3493                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3494                                      fprs, 128);
3495         } else {
3496                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3497                                      vcpu->run->s.regs.fprs, 128);
3498         }
3499         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3500                               vcpu->run->s.regs.gprs, 128);
3501         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3502                               &vcpu->arch.sie_block->gpsw, 16);
3503         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3504                               &px, 4);
3505         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3506                               &vcpu->run->s.regs.fpc, 4);
3507         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3508                               &vcpu->arch.sie_block->todpr, 4);
3509         cputm = kvm_s390_get_cpu_timer(vcpu);
3510         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3511                               &cputm, 8);
3512         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3513         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3514                               &clkcomp, 8);
3515         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3516                               &vcpu->run->s.regs.acrs, 64);
3517         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3518                               &vcpu->arch.sie_block->gcr, 128);
3519         return rc ? -EFAULT : 0;
3520 }
3521
3522 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3523 {
3524         /*
3525          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3526          * switch in the run ioctl. Let's update our copies before we save
3527          * it into the save area
3528          */
3529         save_fpu_regs();
3530         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3531         save_access_regs(vcpu->run->s.regs.acrs);
3532
3533         return kvm_s390_store_status_unloaded(vcpu, addr);
3534 }
3535
3536 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3537 {
3538         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3539         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3540 }
3541
3542 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3543 {
3544         unsigned int i;
3545         struct kvm_vcpu *vcpu;
3546
3547         kvm_for_each_vcpu(i, vcpu, kvm) {
3548                 __disable_ibs_on_vcpu(vcpu);
3549         }
3550 }
3551
3552 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3553 {
3554         if (!sclp.has_ibs)
3555                 return;
3556         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3557         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3558 }
3559
3560 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3561 {
3562         int i, online_vcpus, started_vcpus = 0;
3563
3564         if (!is_vcpu_stopped(vcpu))
3565                 return;
3566
3567         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3568         /* Only one cpu at a time may enter/leave the STOPPED state. */
3569         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3570         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3571
3572         for (i = 0; i < online_vcpus; i++) {
3573                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3574                         started_vcpus++;
3575         }
3576
3577         if (started_vcpus == 0) {
3578                 /* we're the only active VCPU -> speed it up */
3579                 __enable_ibs_on_vcpu(vcpu);
3580         } else if (started_vcpus == 1) {
3581                 /*
3582                  * As we are starting a second VCPU, we have to disable
3583                  * the IBS facility on all VCPUs to remove potentially
3584                  * oustanding ENABLE requests.
3585                  */
3586                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3587         }
3588
3589         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3590         /*
3591          * Another VCPU might have used IBS while we were offline.
3592          * Let's play safe and flush the VCPU at startup.
3593          */
3594         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3595         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3596         return;
3597 }
3598
3599 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3600 {
3601         int i, online_vcpus, started_vcpus = 0;
3602         struct kvm_vcpu *started_vcpu = NULL;
3603
3604         if (is_vcpu_stopped(vcpu))
3605                 return;
3606
3607         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3608         /* Only one cpu at a time may enter/leave the STOPPED state. */
3609         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3610         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3611
3612         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3613         kvm_s390_clear_stop_irq(vcpu);
3614
3615         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3616         __disable_ibs_on_vcpu(vcpu);
3617
3618         for (i = 0; i < online_vcpus; i++) {
3619                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3620                         started_vcpus++;
3621                         started_vcpu = vcpu->kvm->vcpus[i];
3622                 }
3623         }
3624
3625         if (started_vcpus == 1) {
3626                 /*
3627                  * As we only have one VCPU left, we want to enable the
3628                  * IBS facility for that VCPU to speed it up.
3629                  */
3630                 __enable_ibs_on_vcpu(started_vcpu);
3631         }
3632
3633         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3634         return;
3635 }
3636
3637 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3638                                      struct kvm_enable_cap *cap)
3639 {
3640         int r;
3641
3642         if (cap->flags)
3643                 return -EINVAL;
3644
3645         switch (cap->cap) {
3646         case KVM_CAP_S390_CSS_SUPPORT:
3647                 if (!vcpu->kvm->arch.css_support) {
3648                         vcpu->kvm->arch.css_support = 1;
3649                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3650                         trace_kvm_s390_enable_css(vcpu->kvm);
3651                 }
3652                 r = 0;
3653                 break;
3654         default:
3655                 r = -EINVAL;
3656                 break;
3657         }
3658         return r;
3659 }
3660
3661 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3662                                   struct kvm_s390_mem_op *mop)
3663 {
3664         void __user *uaddr = (void __user *)mop->buf;
3665         void *tmpbuf = NULL;
3666         int r, srcu_idx;
3667         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3668                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3669
3670         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
3671                 return -EINVAL;
3672
3673         if (mop->size > MEM_OP_MAX_SIZE)
3674                 return -E2BIG;
3675
3676         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3677                 tmpbuf = vmalloc(mop->size);
3678                 if (!tmpbuf)
3679                         return -ENOMEM;
3680         }
3681
3682         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3683
3684         switch (mop->op) {
3685         case KVM_S390_MEMOP_LOGICAL_READ:
3686                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3687                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3688                                             mop->size, GACC_FETCH);
3689                         break;
3690                 }
3691                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3692                 if (r == 0) {
3693                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3694                                 r = -EFAULT;
3695                 }
3696                 break;
3697         case KVM_S390_MEMOP_LOGICAL_WRITE:
3698                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3699                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3700                                             mop->size, GACC_STORE);
3701                         break;
3702                 }
3703                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3704                         r = -EFAULT;
3705                         break;
3706                 }
3707                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3708                 break;
3709         default:
3710                 r = -EINVAL;
3711         }
3712
3713         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3714
3715         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3716                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3717
3718         vfree(tmpbuf);
3719         return r;
3720 }
3721
3722 long kvm_arch_vcpu_ioctl(struct file *filp,
3723                          unsigned int ioctl, unsigned long arg)
3724 {
3725         struct kvm_vcpu *vcpu = filp->private_data;
3726         void __user *argp = (void __user *)arg;
3727         int idx;
3728         long r;
3729
3730         switch (ioctl) {
3731         case KVM_S390_IRQ: {
3732                 struct kvm_s390_irq s390irq;
3733
3734                 r = -EFAULT;
3735                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3736                         break;
3737                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3738                 break;
3739         }
3740         case KVM_S390_INTERRUPT: {
3741                 struct kvm_s390_interrupt s390int;
3742                 struct kvm_s390_irq s390irq = {};
3743
3744                 r = -EFAULT;
3745                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3746                         break;
3747                 if (s390int_to_s390irq(&s390int, &s390irq))
3748                         return -EINVAL;
3749                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3750                 break;
3751         }
3752         case KVM_S390_STORE_STATUS:
3753                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3754                 r = kvm_s390_store_status_unloaded(vcpu, arg);
3755                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3756                 break;
3757         case KVM_S390_SET_INITIAL_PSW: {
3758                 psw_t psw;
3759
3760                 r = -EFAULT;
3761                 if (copy_from_user(&psw, argp, sizeof(psw)))
3762                         break;
3763                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3764                 break;
3765         }
3766         case KVM_S390_INITIAL_RESET:
3767                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3768                 break;
3769         case KVM_SET_ONE_REG:
3770         case KVM_GET_ONE_REG: {
3771                 struct kvm_one_reg reg;
3772                 r = -EFAULT;
3773                 if (copy_from_user(&reg, argp, sizeof(reg)))
3774                         break;
3775                 if (ioctl == KVM_SET_ONE_REG)
3776                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3777                 else
3778                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3779                 break;
3780         }
3781 #ifdef CONFIG_KVM_S390_UCONTROL
3782         case KVM_S390_UCAS_MAP: {
3783                 struct kvm_s390_ucas_mapping ucasmap;
3784
3785                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3786                         r = -EFAULT;
3787                         break;
3788                 }
3789
3790                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3791                         r = -EINVAL;
3792                         break;
3793                 }
3794
3795                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3796                                      ucasmap.vcpu_addr, ucasmap.length);
3797                 break;
3798         }
3799         case KVM_S390_UCAS_UNMAP: {
3800                 struct kvm_s390_ucas_mapping ucasmap;
3801
3802                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3803                         r = -EFAULT;
3804                         break;
3805                 }
3806
3807                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3808                         r = -EINVAL;
3809                         break;
3810                 }
3811
3812                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3813                         ucasmap.length);
3814                 break;
3815         }
3816 #endif
3817         case KVM_S390_VCPU_FAULT: {
3818                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3819                 break;
3820         }
3821         case KVM_ENABLE_CAP:
3822         {
3823                 struct kvm_enable_cap cap;
3824                 r = -EFAULT;
3825                 if (copy_from_user(&cap, argp, sizeof(cap)))
3826                         break;
3827                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3828                 break;
3829         }
3830         case KVM_S390_MEM_OP: {
3831                 struct kvm_s390_mem_op mem_op;
3832
3833                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3834                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3835                 else
3836                         r = -EFAULT;
3837                 break;
3838         }
3839         case KVM_S390_SET_IRQ_STATE: {
3840                 struct kvm_s390_irq_state irq_state;
3841
3842                 r = -EFAULT;
3843                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3844                         break;
3845                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3846                     irq_state.len == 0 ||
3847                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3848                         r = -EINVAL;
3849                         break;
3850                 }
3851                 r = kvm_s390_set_irq_state(vcpu,
3852                                            (void __user *) irq_state.buf,
3853                                            irq_state.len);
3854                 break;
3855         }
3856         case KVM_S390_GET_IRQ_STATE: {
3857                 struct kvm_s390_irq_state irq_state;
3858
3859                 r = -EFAULT;
3860                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3861                         break;
3862                 if (irq_state.len == 0) {
3863                         r = -EINVAL;
3864                         break;
3865                 }
3866                 r = kvm_s390_get_irq_state(vcpu,
3867                                            (__u8 __user *)  irq_state.buf,
3868                                            irq_state.len);
3869                 break;
3870         }
3871         default:
3872                 r = -ENOTTY;
3873         }
3874         return r;
3875 }
3876
3877 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3878 {
3879 #ifdef CONFIG_KVM_S390_UCONTROL
3880         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3881                  && (kvm_is_ucontrol(vcpu->kvm))) {
3882                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3883                 get_page(vmf->page);
3884                 return 0;
3885         }
3886 #endif
3887         return VM_FAULT_SIGBUS;
3888 }
3889
3890 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3891                             unsigned long npages)
3892 {
3893         return 0;
3894 }
3895
3896 /* Section: memory related */
3897 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3898                                    struct kvm_memory_slot *memslot,
3899                                    const struct kvm_userspace_memory_region *mem,
3900                                    enum kvm_mr_change change)
3901 {
3902         /* A few sanity checks. We can have memory slots which have to be
3903            located/ended at a segment boundary (1MB). The memory in userland is
3904            ok to be fragmented into various different vmas. It is okay to mmap()
3905            and munmap() stuff in this slot after doing this call at any time */
3906
3907         if (mem->userspace_addr & 0xffffful)
3908                 return -EINVAL;
3909
3910         if (mem->memory_size & 0xffffful)
3911                 return -EINVAL;
3912
3913         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3914                 return -EINVAL;
3915
3916         return 0;
3917 }
3918
3919 void kvm_arch_commit_memory_region(struct kvm *kvm,
3920                                 const struct kvm_userspace_memory_region *mem,
3921                                 const struct kvm_memory_slot *old,
3922                                 const struct kvm_memory_slot *new,
3923                                 enum kvm_mr_change change)
3924 {
3925         int rc = 0;
3926
3927         switch (change) {
3928         case KVM_MR_DELETE:
3929                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3930                                         old->npages * PAGE_SIZE);
3931                 break;
3932         case KVM_MR_MOVE:
3933                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
3934                                         old->npages * PAGE_SIZE);
3935                 if (rc)
3936                         break;
3937                 /* FALLTHROUGH */
3938         case KVM_MR_CREATE:
3939                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3940                                       mem->guest_phys_addr, mem->memory_size);
3941                 break;
3942         case KVM_MR_FLAGS_ONLY:
3943                 break;
3944         default:
3945                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
3946         }
3947         if (rc)
3948                 pr_warn("failed to commit memory region\n");
3949         return;
3950 }
3951
3952 static inline unsigned long nonhyp_mask(int i)
3953 {
3954         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3955
3956         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3957 }
3958
3959 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3960 {
3961         vcpu->valid_wakeup = false;
3962 }
3963
3964 static int __init kvm_s390_init(void)
3965 {
3966         int i;
3967
3968         if (!sclp.has_sief2) {
3969                 pr_info("SIE not available\n");
3970                 return -ENODEV;
3971         }
3972
3973         for (i = 0; i < 16; i++)
3974                 kvm_s390_fac_list_mask[i] |=
3975                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3976
3977         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3978 }
3979
3980 static void __exit kvm_s390_exit(void)
3981 {
3982         kvm_exit();
3983 }
3984
3985 module_init(kvm_s390_init);
3986 module_exit(kvm_s390_exit);
3987
3988 /*
3989  * Enable autoloading of the kvm module.
3990  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3991  * since x86 takes a different approach.
3992  */
3993 #include <linux/miscdevice.h>
3994 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3995 MODULE_ALIAS("devname:kvm");