GNU Linux-libre 5.10.217-gnu1
[releases.git] / arch / x86 / kernel / paravirt.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*  Paravirtualization interfaces
3     Copyright (C) 2006 Rusty Russell IBM Corporation
4
5
6     2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
7 */
8
9 #include <linux/errno.h>
10 #include <linux/init.h>
11 #include <linux/export.h>
12 #include <linux/efi.h>
13 #include <linux/bcd.h>
14 #include <linux/highmem.h>
15 #include <linux/kprobes.h>
16 #include <linux/pgtable.h>
17
18 #include <asm/bug.h>
19 #include <asm/paravirt.h>
20 #include <asm/debugreg.h>
21 #include <asm/desc.h>
22 #include <asm/setup.h>
23 #include <asm/time.h>
24 #include <asm/pgalloc.h>
25 #include <asm/irq.h>
26 #include <asm/delay.h>
27 #include <asm/fixmap.h>
28 #include <asm/apic.h>
29 #include <asm/tlbflush.h>
30 #include <asm/timer.h>
31 #include <asm/special_insns.h>
32 #include <asm/tlb.h>
33 #include <asm/io_bitmap.h>
34 #include <asm/text-patching.h>
35
36 /*
37  * nop stub, which must not clobber anything *including the stack* to
38  * avoid confusing the entry prologues.
39  */
40 extern void _paravirt_nop(void);
41 asm (".pushsection .entry.text, \"ax\"\n"
42      ".global _paravirt_nop\n"
43      "_paravirt_nop:\n\t"
44      ASM_RET
45      ".size _paravirt_nop, . - _paravirt_nop\n\t"
46      ".type _paravirt_nop, @function\n\t"
47      ".popsection");
48
49 void __init default_banner(void)
50 {
51         printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
52                pv_info.name);
53 }
54
55 /* Undefined instruction for dealing with missing ops pointers. */
56 static const unsigned char ud2a[] = { 0x0f, 0x0b };
57
58 struct branch {
59        unsigned char opcode;
60        u32 delta;
61 } __attribute__((packed));
62
63 static unsigned paravirt_patch_call(void *insn_buff, const void *target,
64                                     unsigned long addr, unsigned len)
65 {
66         __text_gen_insn(insn_buff, CALL_INSN_OPCODE,
67                         (void *)addr, target, CALL_INSN_SIZE);
68         return CALL_INSN_SIZE;
69 }
70
71 #ifdef CONFIG_PARAVIRT_XXL
72 /* identity function, which can be inlined */
73 u64 notrace _paravirt_ident_64(u64 x)
74 {
75         return x;
76 }
77
78 static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
79                                    unsigned long addr, unsigned len)
80 {
81         struct branch *b = insn_buff;
82         unsigned long delta = (unsigned long)target - (addr+5);
83
84         if (len < 5) {
85 #ifdef CONFIG_RETPOLINE
86                 WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
87 #endif
88                 return len;     /* call too long for patch site */
89         }
90
91         b->opcode = 0xe9;       /* jmp */
92         b->delta = delta;
93
94         return 5;
95 }
96 #endif
97
98 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
99
100 void __init native_pv_lock_init(void)
101 {
102         if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
103                 static_branch_disable(&virt_spin_lock_key);
104 }
105
106 unsigned paravirt_patch_default(u8 type, void *insn_buff,
107                                 unsigned long addr, unsigned len)
108 {
109         /*
110          * Neat trick to map patch type back to the call within the
111          * corresponding structure.
112          */
113         void *opfunc = *((void **)&pv_ops + type);
114         unsigned ret;
115
116         if (opfunc == NULL)
117                 /* If there's no function, patch it with a ud2a (BUG) */
118                 ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
119         else if (opfunc == _paravirt_nop)
120                 ret = 0;
121
122 #ifdef CONFIG_PARAVIRT_XXL
123         /* identity functions just return their single argument */
124         else if (opfunc == _paravirt_ident_64)
125                 ret = paravirt_patch_ident_64(insn_buff, len);
126
127         else if (type == PARAVIRT_PATCH(cpu.iret) ||
128                  type == PARAVIRT_PATCH(cpu.usergs_sysret64))
129                 /* If operation requires a jmp, then jmp */
130                 ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
131 #endif
132         else
133                 /* Otherwise call the function. */
134                 ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
135
136         return ret;
137 }
138
139 unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
140                               const char *start, const char *end)
141 {
142         unsigned insn_len = end - start;
143
144         /* Alternative instruction is too large for the patch site and we cannot continue: */
145         BUG_ON(insn_len > len || start == NULL);
146
147         memcpy(insn_buff, start, insn_len);
148
149         return insn_len;
150 }
151
152 struct static_key paravirt_steal_enabled;
153 struct static_key paravirt_steal_rq_enabled;
154
155 static u64 native_steal_clock(int cpu)
156 {
157         return 0;
158 }
159
160 /* These are in entry.S */
161 extern void native_iret(void);
162 extern void native_usergs_sysret64(void);
163
164 static struct resource reserve_ioports = {
165         .start = 0,
166         .end = IO_SPACE_LIMIT,
167         .name = "paravirt-ioport",
168         .flags = IORESOURCE_IO | IORESOURCE_BUSY,
169 };
170
171 /*
172  * Reserve the whole legacy IO space to prevent any legacy drivers
173  * from wasting time probing for their hardware.  This is a fairly
174  * brute-force approach to disabling all non-virtual drivers.
175  *
176  * Note that this must be called very early to have any effect.
177  */
178 int paravirt_disable_iospace(void)
179 {
180         return request_resource(&ioport_resource, &reserve_ioports);
181 }
182
183 static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
184
185 static inline void enter_lazy(enum paravirt_lazy_mode mode)
186 {
187         BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
188
189         this_cpu_write(paravirt_lazy_mode, mode);
190 }
191
192 static void leave_lazy(enum paravirt_lazy_mode mode)
193 {
194         BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
195
196         this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
197 }
198
199 void paravirt_enter_lazy_mmu(void)
200 {
201         enter_lazy(PARAVIRT_LAZY_MMU);
202 }
203
204 void paravirt_leave_lazy_mmu(void)
205 {
206         leave_lazy(PARAVIRT_LAZY_MMU);
207 }
208
209 void paravirt_flush_lazy_mmu(void)
210 {
211         preempt_disable();
212
213         if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
214                 arch_leave_lazy_mmu_mode();
215                 arch_enter_lazy_mmu_mode();
216         }
217
218         preempt_enable();
219 }
220
221 #ifdef CONFIG_PARAVIRT_XXL
222 void paravirt_start_context_switch(struct task_struct *prev)
223 {
224         BUG_ON(preemptible());
225
226         if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
227                 arch_leave_lazy_mmu_mode();
228                 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
229         }
230         enter_lazy(PARAVIRT_LAZY_CPU);
231 }
232
233 void paravirt_end_context_switch(struct task_struct *next)
234 {
235         BUG_ON(preemptible());
236
237         leave_lazy(PARAVIRT_LAZY_CPU);
238
239         if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
240                 arch_enter_lazy_mmu_mode();
241 }
242 #endif
243
244 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
245 {
246         if (in_interrupt())
247                 return PARAVIRT_LAZY_NONE;
248
249         return this_cpu_read(paravirt_lazy_mode);
250 }
251
252 struct pv_info pv_info = {
253         .name = "bare hardware",
254 #ifdef CONFIG_PARAVIRT_XXL
255         .extra_user_64bit_cs = __USER_CS,
256 #endif
257 };
258
259 /* 64-bit pagetable entries */
260 #define PTE_IDENT       __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
261
262 struct paravirt_patch_template pv_ops = {
263         /* Init ops. */
264         .init.patch             = native_patch,
265
266         /* Time ops. */
267         .time.sched_clock       = native_sched_clock,
268         .time.steal_clock       = native_steal_clock,
269
270         /* Cpu ops. */
271         .cpu.io_delay           = native_io_delay,
272
273 #ifdef CONFIG_PARAVIRT_XXL
274         .cpu.cpuid              = native_cpuid,
275         .cpu.get_debugreg       = native_get_debugreg,
276         .cpu.set_debugreg       = native_set_debugreg,
277         .cpu.read_cr0           = native_read_cr0,
278         .cpu.write_cr0          = native_write_cr0,
279         .cpu.write_cr4          = native_write_cr4,
280         .cpu.wbinvd             = native_wbinvd,
281         .cpu.read_msr           = native_read_msr,
282         .cpu.write_msr          = native_write_msr,
283         .cpu.read_msr_safe      = native_read_msr_safe,
284         .cpu.write_msr_safe     = native_write_msr_safe,
285         .cpu.read_pmc           = native_read_pmc,
286         .cpu.load_tr_desc       = native_load_tr_desc,
287         .cpu.set_ldt            = native_set_ldt,
288         .cpu.load_gdt           = native_load_gdt,
289         .cpu.load_idt           = native_load_idt,
290         .cpu.store_tr           = native_store_tr,
291         .cpu.load_tls           = native_load_tls,
292         .cpu.load_gs_index      = native_load_gs_index,
293         .cpu.write_ldt_entry    = native_write_ldt_entry,
294         .cpu.write_gdt_entry    = native_write_gdt_entry,
295         .cpu.write_idt_entry    = native_write_idt_entry,
296
297         .cpu.alloc_ldt          = paravirt_nop,
298         .cpu.free_ldt           = paravirt_nop,
299
300         .cpu.load_sp0           = native_load_sp0,
301
302         .cpu.usergs_sysret64    = native_usergs_sysret64,
303         .cpu.iret               = native_iret,
304
305 #ifdef CONFIG_X86_IOPL_IOPERM
306         .cpu.invalidate_io_bitmap       = native_tss_invalidate_io_bitmap,
307         .cpu.update_io_bitmap           = native_tss_update_io_bitmap,
308 #endif
309
310         .cpu.start_context_switch       = paravirt_nop,
311         .cpu.end_context_switch         = paravirt_nop,
312
313         /* Irq ops. */
314         .irq.save_fl            = __PV_IS_CALLEE_SAVE(native_save_fl),
315         .irq.restore_fl         = __PV_IS_CALLEE_SAVE(native_restore_fl),
316         .irq.irq_disable        = __PV_IS_CALLEE_SAVE(native_irq_disable),
317         .irq.irq_enable         = __PV_IS_CALLEE_SAVE(native_irq_enable),
318         .irq.safe_halt          = native_safe_halt,
319         .irq.halt               = native_halt,
320 #endif /* CONFIG_PARAVIRT_XXL */
321
322         /* Mmu ops. */
323         .mmu.flush_tlb_user     = native_flush_tlb_local,
324         .mmu.flush_tlb_kernel   = native_flush_tlb_global,
325         .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
326         .mmu.flush_tlb_others   = native_flush_tlb_others,
327         .mmu.tlb_remove_table   =
328                         (void (*)(struct mmu_gather *, void *))tlb_remove_page,
329
330         .mmu.exit_mmap          = paravirt_nop,
331
332 #ifdef CONFIG_PARAVIRT_XXL
333         .mmu.read_cr2           = __PV_IS_CALLEE_SAVE(native_read_cr2),
334         .mmu.write_cr2          = native_write_cr2,
335         .mmu.read_cr3           = __native_read_cr3,
336         .mmu.write_cr3          = native_write_cr3,
337
338         .mmu.pgd_alloc          = __paravirt_pgd_alloc,
339         .mmu.pgd_free           = paravirt_nop,
340
341         .mmu.alloc_pte          = paravirt_nop,
342         .mmu.alloc_pmd          = paravirt_nop,
343         .mmu.alloc_pud          = paravirt_nop,
344         .mmu.alloc_p4d          = paravirt_nop,
345         .mmu.release_pte        = paravirt_nop,
346         .mmu.release_pmd        = paravirt_nop,
347         .mmu.release_pud        = paravirt_nop,
348         .mmu.release_p4d        = paravirt_nop,
349
350         .mmu.set_pte            = native_set_pte,
351         .mmu.set_pmd            = native_set_pmd,
352
353         .mmu.ptep_modify_prot_start     = __ptep_modify_prot_start,
354         .mmu.ptep_modify_prot_commit    = __ptep_modify_prot_commit,
355
356         .mmu.set_pud            = native_set_pud,
357
358         .mmu.pmd_val            = PTE_IDENT,
359         .mmu.make_pmd           = PTE_IDENT,
360
361         .mmu.pud_val            = PTE_IDENT,
362         .mmu.make_pud           = PTE_IDENT,
363
364         .mmu.set_p4d            = native_set_p4d,
365
366 #if CONFIG_PGTABLE_LEVELS >= 5
367         .mmu.p4d_val            = PTE_IDENT,
368         .mmu.make_p4d           = PTE_IDENT,
369
370         .mmu.set_pgd            = native_set_pgd,
371 #endif /* CONFIG_PGTABLE_LEVELS >= 5 */
372
373         .mmu.pte_val            = PTE_IDENT,
374         .mmu.pgd_val            = PTE_IDENT,
375
376         .mmu.make_pte           = PTE_IDENT,
377         .mmu.make_pgd           = PTE_IDENT,
378
379         .mmu.dup_mmap           = paravirt_nop,
380         .mmu.activate_mm        = paravirt_nop,
381
382         .mmu.lazy_mode = {
383                 .enter          = paravirt_nop,
384                 .leave          = paravirt_nop,
385                 .flush          = paravirt_nop,
386         },
387
388         .mmu.set_fixmap         = native_set_fixmap,
389 #endif /* CONFIG_PARAVIRT_XXL */
390
391 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
392         /* Lock ops. */
393 #ifdef CONFIG_SMP
394         .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
395         .lock.queued_spin_unlock        =
396                                 PV_CALLEE_SAVE(__native_queued_spin_unlock),
397         .lock.wait                      = paravirt_nop,
398         .lock.kick                      = paravirt_nop,
399         .lock.vcpu_is_preempted         =
400                                 PV_CALLEE_SAVE(__native_vcpu_is_preempted),
401 #endif /* SMP */
402 #endif
403 };
404
405 #ifdef CONFIG_PARAVIRT_XXL
406 /* At this point, native_get/set_debugreg has real function entries */
407 NOKPROBE_SYMBOL(native_get_debugreg);
408 NOKPROBE_SYMBOL(native_set_debugreg);
409 NOKPROBE_SYMBOL(native_load_idt);
410 #endif
411
412 EXPORT_SYMBOL(pv_ops);
413 EXPORT_SYMBOL_GPL(pv_info);