arch/x86/kvm/vmx/sgx.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*  Copyright(c) 2021 Intel Corporation. */
   3
   4 #include <asm/sgx.h>
   5
   6 #include "cpuid.h"
   7 #include "kvm_cache_regs.h"
   8 #include "nested.h"
   9 #include "sgx.h"
  10 #include "vmx.h"
  11 #include "x86.h"
  12
  13 bool __read_mostly enable_sgx = 1;
  14 module_param_named(sgx, enable_sgx, bool, 0444);
  15
  16 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
  17 static u64 sgx_pubkey_hash[4] __ro_after_init;
  18
  19 /*
  20  * ENCLS's memory operands use a fixed segment (DS) and a fixed
  21  * address size based on the mode.  Related prefixes are ignored.
  22  */
  23 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
  24                              int size, int alignment, gva_t *gva)
  25 {
  26         struct kvm_segment s;
  27         bool fault;
  28
  29         /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
  30         *gva = offset;
  31         if (!is_long_mode(vcpu)) {
  32                 vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
  33                 *gva += s.base;
  34         }
  35
  36         if (!IS_ALIGNED(*gva, alignment)) {
  37                 fault = true;
  38         } else if (likely(is_long_mode(vcpu))) {
  39                 fault = is_noncanonical_address(*gva, vcpu);
  40         } else {
  41                 *gva &= 0xffffffff;
  42                 fault = (s.unusable) ||
  43                         (s.type != 2 && s.type != 3) ||
  44                         (*gva > s.limit) ||
  45                         ((s.base != 0 || s.limit != 0xffffffff) &&
  46                         (((u64)*gva + size - 1) > s.limit + 1));
  47         }
  48         if (fault)
  49                 kvm_inject_gp(vcpu, 0);
  50         return fault ? -EINVAL : 0;
  51 }
  52
  53 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
  54                                          unsigned int size)
  55 {
  56         uint64_t data[2] = { addr, size };
  57
  58         __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
  59 }
  60
  61 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
  62                         unsigned int size)
  63 {
  64         if (__copy_from_user(data, (void __user *)hva, size)) {
  65                 sgx_handle_emulation_failure(vcpu, hva, size);
  66                 return -EFAULT;
  67         }
  68
  69         return 0;
  70 }
  71
  72 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
  73                           gpa_t *gpa)
  74 {
  75         struct x86_exception ex;
  76
  77         if (write)
  78                 *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
  79         else
  80                 *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
  81
  82         if (*gpa == UNMAPPED_GVA) {
  83                 kvm_inject_emulated_page_fault(vcpu, &ex);
  84                 return -EFAULT;
  85         }
  86
  87         return 0;
  88 }
  89
  90 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
  91 {
  92         *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
  93         if (kvm_is_error_hva(*hva)) {
  94                 sgx_handle_emulation_failure(vcpu, gpa, 1);
  95                 return -EFAULT;
  96         }
  97
  98         *hva |= gpa & ~PAGE_MASK;
  99
 100         return 0;
 101 }
 102
 103 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
 104 {
 105         struct x86_exception ex;
 106
 107         /*
 108          * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
 109          * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
 110          * but the error code isn't (yet) plumbed through the ENCLS helpers.
 111          */
 112         if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
 113                 kvm_prepare_emulation_failure_exit(vcpu);
 114                 return 0;
 115         }
 116
 117         /*
 118          * If the guest thinks it's running on SGX2 hardware, inject an SGX
 119          * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
 120          * #PF on SGX2).  The assumption is that EPCM faults are much more
 121          * likely than a bad userspace address.
 122          */
 123         if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
 124             guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
 125                 memset(&ex, 0, sizeof(ex));
 126                 ex.vector = PF_VECTOR;
 127                 ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
 128                                 PFERR_SGX_MASK;
 129                 ex.address = gva;
 130                 ex.error_code_valid = true;
 131                 ex.nested_page_fault = false;
 132                 kvm_inject_page_fault(vcpu, &ex);
 133         } else {
 134                 kvm_inject_gp(vcpu, 0);
 135         }
 136         return 1;
 137 }
 138
 139 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
 140                                   struct sgx_pageinfo *pageinfo,
 141                                   unsigned long secs_hva,
 142                                   gva_t secs_gva)
 143 {
 144         struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
 145         struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
 146         u64 attributes, xfrm, size;
 147         u32 miscselect;
 148         u8 max_size_log2;
 149         int trapnr, ret;
 150
 151         sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0);
 152         sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1);
 153         if (!sgx_12_0 || !sgx_12_1) {
 154                 kvm_prepare_emulation_failure_exit(vcpu);
 155                 return 0;
 156         }
 157
 158         miscselect = contents->miscselect;
 159         attributes = contents->attributes;
 160         xfrm = contents->xfrm;
 161         size = contents->size;
 162
 163         /* Enforce restriction of access to the PROVISIONKEY. */
 164         if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
 165             (attributes & SGX_ATTR_PROVISIONKEY)) {
 166                 if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
 167                         pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n");
 168                 kvm_inject_gp(vcpu, 0);
 169                 return 1;
 170         }
 171
 172         /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
 173         if ((u32)miscselect & ~sgx_12_0->ebx ||
 174             (u32)attributes & ~sgx_12_1->eax ||
 175             (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
 176             (u32)xfrm & ~sgx_12_1->ecx ||
 177             (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
 178                 kvm_inject_gp(vcpu, 0);
 179                 return 1;
 180         }
 181
 182         /* Enforce CPUID restriction on max enclave size. */
 183         max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
 184                                                             sgx_12_0->edx;
 185         if (size >= BIT_ULL(max_size_log2))
 186                 kvm_inject_gp(vcpu, 0);
 187
 188         /*
 189          * sgx_virt_ecreate() returns:
 190          *  1) 0:       ECREATE was successful
 191          *  2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the
 192          *              exception number.
 193          *  3) -EINVAL: access_ok() on @secs_hva failed. This should never
 194          *              happen as KVM checks host addresses at memslot creation.
 195          *              sgx_virt_ecreate() has already warned in this case.
 196          */
 197         ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
 198         if (!ret)
 199                 return kvm_skip_emulated_instruction(vcpu);
 200         if (ret == -EFAULT)
 201                 return sgx_inject_fault(vcpu, secs_gva, trapnr);
 202
 203         return ret;
 204 }
 205
 206 static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
 207 {
 208         gva_t pageinfo_gva, secs_gva;
 209         gva_t metadata_gva, contents_gva;
 210         gpa_t metadata_gpa, contents_gpa, secs_gpa;
 211         unsigned long metadata_hva, contents_hva, secs_hva;
 212         struct sgx_pageinfo pageinfo;
 213         struct sgx_secs *contents;
 214         struct x86_exception ex;
 215         int r;
 216
 217         if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
 218             sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
 219                 return 1;
 220
 221         /*
 222          * Copy the PAGEINFO to local memory, its pointers need to be
 223          * translated, i.e. we need to do a deep copy/translate.
 224          */
 225         r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
 226                                 sizeof(pageinfo), &ex);
 227         if (r == X86EMUL_PROPAGATE_FAULT) {
 228                 kvm_inject_emulated_page_fault(vcpu, &ex);
 229                 return 1;
 230         } else if (r != X86EMUL_CONTINUE) {
 231                 sgx_handle_emulation_failure(vcpu, pageinfo_gva,
 232                                              sizeof(pageinfo));
 233                 return 0;
 234         }
 235
 236         if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
 237             sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
 238                               &contents_gva))
 239                 return 1;
 240
 241         /*
 242          * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
 243          * Resume the guest on failure to inject a #PF.
 244          */
 245         if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
 246             sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
 247             sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
 248                 return 1;
 249
 250         /*
 251          * ...and then to HVA.  The order of accesses isn't architectural, i.e.
 252          * KVM doesn't have to fully process one address at a time.  Exit to
 253          * userspace if a GPA is invalid.
 254          */
 255         if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
 256             sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
 257             sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
 258                 return 0;
 259
 260         /*
 261          * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
 262          * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
 263          * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
 264          * enforce restriction of access to the PROVISIONKEY.
 265          */
 266         contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
 267         if (!contents)
 268                 return -ENOMEM;
 269
 270         /* Exit to userspace if copying from a host userspace address fails. */
 271         if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
 272                 free_page((unsigned long)contents);
 273                 return 0;
 274         }
 275
 276         pageinfo.metadata = metadata_hva;
 277         pageinfo.contents = (u64)contents;
 278
 279         r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
 280
 281         free_page((unsigned long)contents);
 282
 283         return r;
 284 }
 285
 286 static int handle_encls_einit(struct kvm_vcpu *vcpu)
 287 {
 288         unsigned long sig_hva, secs_hva, token_hva, rflags;
 289         struct vcpu_vmx *vmx = to_vmx(vcpu);
 290         gva_t sig_gva, secs_gva, token_gva;
 291         gpa_t sig_gpa, secs_gpa, token_gpa;
 292         int ret, trapnr;
 293
 294         if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
 295             sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
 296             sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
 297                 return 1;
 298
 299         /*
 300          * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
 301          * Resume the guest on failure to inject a #PF.
 302          */
 303         if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
 304             sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
 305             sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
 306                 return 1;
 307
 308         /*
 309          * ...and then to HVA.  The order of accesses isn't architectural, i.e.
 310          * KVM doesn't have to fully process one address at a time.  Exit to
 311          * userspace if a GPA is invalid.  Note, all structures are aligned and
 312          * cannot split pages.
 313          */
 314         if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
 315             sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
 316             sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
 317                 return 0;
 318
 319         ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
 320                              (void __user *)secs_hva,
 321                              vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
 322
 323         if (ret == -EFAULT)
 324                 return sgx_inject_fault(vcpu, secs_gva, trapnr);
 325
 326         /*
 327          * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
 328          * @token_hva or @secs_hva. This should never happen as KVM checks host
 329          * addresses at memslot creation. sgx_virt_einit() has already warned
 330          * in this case, so just return.
 331          */
 332         if (ret < 0)
 333                 return ret;
 334
 335         rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
 336                                           X86_EFLAGS_AF | X86_EFLAGS_SF |
 337                                           X86_EFLAGS_OF);
 338         if (ret)
 339                 rflags |= X86_EFLAGS_ZF;
 340         else
 341                 rflags &= ~X86_EFLAGS_ZF;
 342         vmx_set_rflags(vcpu, rflags);
 343
 344         kvm_rax_write(vcpu, ret);
 345         return kvm_skip_emulated_instruction(vcpu);
 346 }
 347
 348 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
 349 {
 350         if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
 351                 return false;
 352
 353         if (leaf >= ECREATE && leaf <= ETRACK)
 354                 return guest_cpuid_has(vcpu, X86_FEATURE_SGX1);
 355
 356         if (leaf >= EAUG && leaf <= EMODT)
 357                 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
 358
 359         return false;
 360 }
 361
 362 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
 363 {
 364         const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
 365
 366         return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
 367 }
 368
 369 int handle_encls(struct kvm_vcpu *vcpu)
 370 {
 371         u32 leaf = (u32)kvm_rax_read(vcpu);
 372
 373         if (!encls_leaf_enabled_in_guest(vcpu, leaf)) {
 374                 kvm_queue_exception(vcpu, UD_VECTOR);
 375         } else if (!sgx_enabled_in_guest_bios(vcpu)) {
 376                 kvm_inject_gp(vcpu, 0);
 377         } else {
 378                 if (leaf == ECREATE)
 379                         return handle_encls_ecreate(vcpu);
 380                 if (leaf == EINIT)
 381                         return handle_encls_einit(vcpu);
 382                 WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf);
 383                 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
 384                 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
 385                 return 0;
 386         }
 387         return 1;
 388 }
 389
 390 void setup_default_sgx_lepubkeyhash(void)
 391 {
 392         /*
 393          * Use Intel's default value for Skylake hardware if Launch Control is
 394          * not supported, i.e. Intel's hash is hardcoded into silicon, or if
 395          * Launch Control is supported and enabled, i.e. mimic the reset value
 396          * and let the guest write the MSRs at will.  If Launch Control is
 397          * supported but disabled, then use the current MSR values as the hash
 398          * MSRs exist but are read-only (locked and not writable).
 399          */
 400         if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
 401             rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
 402                 sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
 403                 sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
 404                 sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
 405                 sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
 406         } else {
 407                 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
 408                 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
 409                 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
 410                 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
 411         }
 412 }
 413
 414 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
 415 {
 416         struct vcpu_vmx *vmx = to_vmx(vcpu);
 417
 418         memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
 419                sizeof(sgx_pubkey_hash));
 420 }
 421
 422 /*
 423  * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
 424  * restrictions if the guest's allowed-1 settings diverge from hardware.
 425  */
 426 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
 427 {
 428         struct kvm_cpuid_entry2 *guest_cpuid;
 429         u32 eax, ebx, ecx, edx;
 430
 431         if (!vcpu->kvm->arch.sgx_provisioning_allowed)
 432                 return true;
 433
 434         guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 0);
 435         if (!guest_cpuid)
 436                 return true;
 437
 438         cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
 439         if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
 440                 return true;
 441
 442         guest_cpuid = kvm_find_cpuid_entry(vcpu, 0x12, 1);
 443         if (!guest_cpuid)
 444                 return true;
 445
 446         cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
 447         if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
 448             guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
 449                 return true;
 450
 451         return false;
 452 }
 453
 454 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 455 {
 456         /*
 457          * There is no software enable bit for SGX that is virtualized by
 458          * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
 459          * guest (either by the host or by the guest's BIOS) but enabled in the
 460          * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
 461          * the expected system behavior for ENCLS.
 462          */
 463         u64 bitmap = -1ull;
 464
 465         /* Nothing to do if hardware doesn't support SGX */
 466         if (!cpu_has_vmx_encls_vmexit())
 467                 return;
 468
 469         if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
 470             sgx_enabled_in_guest_bios(vcpu)) {
 471                 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
 472                         bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
 473                         if (sgx_intercept_encls_ecreate(vcpu))
 474                                 bitmap |= (1 << ECREATE);
 475                 }
 476
 477                 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
 478                         bitmap &= ~GENMASK_ULL(EMODT, EAUG);
 479
 480                 /*
 481                  * Trap and execute EINIT if launch control is enabled in the
 482                  * host using the guest's values for launch control MSRs, even
 483                  * if the guest's values are fixed to hardware default values.
 484                  * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
 485                  * the MSRs is extraordinarily expensive.
 486                  */
 487                 if (boot_cpu_has(X86_FEATURE_SGX_LC))
 488                         bitmap |= (1 << EINIT);
 489
 490                 if (!vmcs12 && is_guest_mode(vcpu))
 491                         vmcs12 = get_vmcs12(vcpu);
 492                 if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
 493                         bitmap |= vmcs12->encls_exiting_bitmap;
 494         }
 495         vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
 496 }