1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
5 * Copyright (C) 1996-2000 Russell King
6 * Copyright (C) 2012 ARM Ltd.
9 #error "Only include this from assembly code"
12 #ifndef __ASM_ASSEMBLER_H
13 #define __ASM_ASSEMBLER_H
15 #include <asm-generic/export.h>
17 #include <asm/asm-offsets.h>
18 #include <asm/cpufeature.h>
19 #include <asm/cputype.h>
20 #include <asm/debug-monitors.h>
22 #include <asm/pgtable-hwdef.h>
23 #include <asm/ptrace.h>
24 #include <asm/thread_info.h>
26 .macro save_and_disable_daif, flags
39 .macro restore_daif, flags:req
43 /* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */
44 .macro inherit_daif, pstate:req, tmp:req
45 and \tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
49 /* IRQ is the lowest priority flag, unconditionally unmask the rest. */
51 msr daifclr, #(8 | 4 | 1)
55 * Save/restore interrupts.
57 .macro save_and_disable_irq, flags
62 .macro restore_irq, flags
70 .macro disable_step_tsk, flgs, tmp
71 tbz \flgs, #TIF_SINGLESTEP, 9990f
73 bic \tmp, \tmp, #DBG_MDSCR_SS
75 isb // Synchronise with enable_dbg
79 /* call with daif masked */
80 .macro enable_step_tsk, flgs, tmp
81 tbz \flgs, #TIF_SINGLESTEP, 9990f
83 orr \tmp, \tmp, #DBG_MDSCR_SS
89 * SMP data memory barrier
96 * RAS Error Synchronization barrier
99 #ifdef CONFIG_ARM64_RAS_EXTN
107 * Value prediction barrier
114 * Clear Branch History instruction
121 * Speculation barrier
124 alternative_if_not ARM64_HAS_SB
143 * Emit an entry into the exception table
145 .macro _asm_extable, from, to
146 .pushsection __ex_table, "a"
148 .long (\from - .), (\to - .)
152 #define USER(l, x...) \
154 _asm_extable 9999b, l
159 lr .req x30 // link register
170 * Select code when configured for BE.
172 #ifdef CONFIG_CPU_BIG_ENDIAN
173 #define CPU_BE(code...) code
175 #define CPU_BE(code...)
179 * Select code when configured for LE.
181 #ifdef CONFIG_CPU_BIG_ENDIAN
182 #define CPU_LE(code...)
184 #define CPU_LE(code...) code
188 * Define a macro that constructs a 64-bit value by concatenating two
189 * 32-bit registers. Note that on big endian systems the order of the
190 * registers is swapped.
192 #ifndef CONFIG_CPU_BIG_ENDIAN
193 .macro regs_to_64, rd, lbits, hbits
195 .macro regs_to_64, rd, hbits, lbits
197 orr \rd, \lbits, \hbits, lsl #32
201 * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
202 * <symbol> is within the range +/- 4 GB of the PC.
205 * @dst: destination register (64 bit wide)
206 * @sym: name of the symbol
208 .macro adr_l, dst, sym
210 add \dst, \dst, :lo12:\sym
214 * @dst: destination register (32 or 64 bit wide)
215 * @sym: name of the symbol
216 * @tmp: optional 64-bit scratch register to be used if <dst> is a
217 * 32-bit wide register, in which case it cannot be used to hold
220 .macro ldr_l, dst, sym, tmp=
223 ldr \dst, [\dst, :lo12:\sym]
226 ldr \dst, [\tmp, :lo12:\sym]
231 * @src: source register (32 or 64 bit wide)
232 * @sym: name of the symbol
233 * @tmp: mandatory 64-bit scratch register to calculate the address
234 * while <src> needs to be preserved.
236 .macro str_l, src, sym, tmp
238 str \src, [\tmp, :lo12:\sym]
242 * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
243 * @sym: The name of the per-cpu variable
244 * @tmp: scratch register
246 .macro adr_this_cpu, dst, sym, tmp
248 add \dst, \tmp, #:lo12:\sym
249 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
258 * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
259 * @sym: The name of the per-cpu variable
260 * @tmp: scratch register
262 .macro ldr_this_cpu dst, sym, tmp
264 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
269 ldr \dst, [\dst, \tmp]
273 * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
275 .macro vma_vm_mm, rd, rn
276 ldr \rd, [\rn, #VMA_VM_MM]
280 * mmid - get context id from mm pointer (mm->context.id)
283 ldr \rd, [\rn, #MM_CONTEXT_ID]
286 * read_ctr - read CTR_EL0. If the system has mismatched register fields,
287 * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
290 alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
291 mrs \reg, ctr_el0 // read CTR
294 ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
300 * raw_dcache_line_size - get the minimum D-cache line size on this CPU
301 * from the CTR register.
303 .macro raw_dcache_line_size, reg, tmp
304 mrs \tmp, ctr_el0 // read CTR
305 ubfm \tmp, \tmp, #16, #19 // cache line size encoding
306 mov \reg, #4 // bytes per word
307 lsl \reg, \reg, \tmp // actual cache line size
311 * dcache_line_size - get the safe D-cache line size across all CPUs
313 .macro dcache_line_size, reg, tmp
315 ubfm \tmp, \tmp, #16, #19 // cache line size encoding
316 mov \reg, #4 // bytes per word
317 lsl \reg, \reg, \tmp // actual cache line size
321 * raw_icache_line_size - get the minimum I-cache line size on this CPU
322 * from the CTR register.
324 .macro raw_icache_line_size, reg, tmp
325 mrs \tmp, ctr_el0 // read CTR
326 and \tmp, \tmp, #0xf // cache line size encoding
327 mov \reg, #4 // bytes per word
328 lsl \reg, \reg, \tmp // actual cache line size
332 * icache_line_size - get the safe I-cache line size across all CPUs
334 .macro icache_line_size, reg, tmp
336 and \tmp, \tmp, #0xf // cache line size encoding
337 mov \reg, #4 // bytes per word
338 lsl \reg, \reg, \tmp // actual cache line size
342 * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
344 .macro tcr_set_t0sz, valreg, t0sz
345 bfi \valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
349 * tcr_set_t1sz - update TCR.T1SZ
351 .macro tcr_set_t1sz, valreg, t1sz
352 bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
356 * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
357 * ID_AA64MMFR0_EL1.PARange value
359 * tcr: register with the TCR_ELx value to be updated
360 * pos: IPS or PS bitfield position
361 * tmp{0,1}: temporary registers
363 .macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1
364 mrs \tmp0, ID_AA64MMFR0_EL1
365 // Narrow PARange to fit the PS field in TCR_ELx
366 ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
367 mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX
369 csel \tmp0, \tmp1, \tmp0, hi
370 bfi \tcr, \tmp0, \pos, #3
374 * Macro to perform a data cache maintenance for the interval
375 * [kaddr, kaddr + size)
377 * op: operation passed to dc instruction
378 * domain: domain used in dsb instruciton
379 * kaddr: starting virtual address of the region
380 * size: size of the region
381 * Corrupts: kaddr, size, tmp1, tmp2
383 .macro __dcache_op_workaround_clean_cache, op, kaddr
384 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
391 .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
392 dcache_line_size \tmp1, \tmp2
393 add \size, \kaddr, \size
395 bic \kaddr, \kaddr, \tmp2
398 __dcache_op_workaround_clean_cache \op, \kaddr
401 __dcache_op_workaround_clean_cache \op, \kaddr
404 sys 3, c7, c12, 1, \kaddr // dc cvap
407 sys 3, c7, c13, 1, \kaddr // dc cvadp
414 add \kaddr, \kaddr, \tmp1
421 * Macro to perform an instruction cache maintenance for the interval
424 * start, end: virtual addresses describing the region
425 * label: A label to branch to on user fault.
426 * Corrupts: tmp1, tmp2
428 .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
429 icache_line_size \tmp1, \tmp2
431 bic \tmp2, \start, \tmp2
433 USER(\label, ic ivau, \tmp2) // invalidate I line PoU
434 add \tmp2, \tmp2, \tmp1
442 * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
444 .macro reset_pmuserenr_el0, tmpreg
445 mrs \tmpreg, id_aa64dfr0_el1
446 sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
447 cmp \tmpreg, #1 // Skip if no PMU present
449 msr pmuserenr_el0, xzr // Disable PMU access from EL0
454 * copy_page - copy src to dest using temp registers t1-t8
456 .macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
457 9998: ldp \t1, \t2, [\src]
458 ldp \t3, \t4, [\src, #16]
459 ldp \t5, \t6, [\src, #32]
460 ldp \t7, \t8, [\src, #48]
462 stnp \t1, \t2, [\dest]
463 stnp \t3, \t4, [\dest, #16]
464 stnp \t5, \t6, [\dest, #32]
465 stnp \t7, \t8, [\dest, #48]
466 add \dest, \dest, #64
467 tst \src, #(PAGE_SIZE - 1)
472 * Deprecated! Use SYM_FUNC_{START,START_WEAK,END}_PI instead.
473 * Annotate a function as position independent, i.e., safe to be called before
474 * the kernel virtual mapping is activated.
476 #define ENDPIPROC(x) \
478 .type __pi_##x, %function; \
480 .size __pi_##x, . - x; \
484 * Annotate a function as being unsuitable for kprobes.
486 #ifdef CONFIG_KPROBES
487 #define NOKPROBE(x) \
488 .pushsection "_kprobe_blacklist", "aw"; \
496 #define EXPORT_SYMBOL_NOKASAN(name)
498 #define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
502 * Emit a 64-bit absolute little endian symbol reference in a way that
503 * ensures that it will be resolved at build time, even when building a
504 * PIE binary. This requires cooperation from the linker script, which
505 * must emit the lo32/hi32 halves individually.
513 * mov_q - move an immediate constant into a 64-bit register using
514 * between 2 and 4 movz/movk instructions (depending on the
515 * magnitude and sign of the operand)
517 .macro mov_q, reg, val
518 .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
519 movz \reg, :abs_g1_s:\val
521 .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
522 movz \reg, :abs_g2_s:\val
524 movz \reg, :abs_g3:\val
525 movk \reg, :abs_g2_nc:\val
527 movk \reg, :abs_g1_nc:\val
529 movk \reg, :abs_g0_nc:\val
533 * Return the current task_struct.
535 .macro get_current_task, rd
540 * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
541 * orr is used as it can cover the immediate value (and is idempotent).
542 * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
543 * ttbr: Value of ttbr to set, modified.
545 .macro offset_ttbr1, ttbr, tmp
546 #ifdef CONFIG_ARM64_VA_BITS_52
547 mrs_s \tmp, SYS_ID_AA64MMFR2_EL1
548 and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
549 cbnz \tmp, .Lskipoffs_\@
550 orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
556 * Perform the reverse of offset_ttbr1.
557 * bic is used as it can cover the immediate value and, in future, won't need
558 * to be nop'ed out when dealing with 52-bit kernel VAs.
560 .macro restore_ttbr1, ttbr
561 #ifdef CONFIG_ARM64_VA_BITS_52
562 bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
567 * Arrange a physical address in a TTBR register, taking care of 52-bit
570 * phys: physical address, preserved
571 * ttbr: returns the TTBR value
573 .macro phys_to_ttbr, ttbr, phys
574 #ifdef CONFIG_ARM64_PA_BITS_52
575 orr \ttbr, \phys, \phys, lsr #46
576 and \ttbr, \ttbr, #TTBR_BADDR_MASK_52
582 .macro phys_to_pte, pte, phys
583 #ifdef CONFIG_ARM64_PA_BITS_52
585 * We assume \phys is 64K aligned and this is guaranteed by only
586 * supporting this configuration with 64K pages.
588 orr \pte, \phys, \phys, lsr #36
589 and \pte, \pte, #PTE_ADDR_MASK
595 .macro pte_to_phys, phys, pte
596 #ifdef CONFIG_ARM64_PA_BITS_52
597 ubfiz \phys, \pte, #(48 - 16 - 12), #16
598 bfxil \phys, \pte, #16, #32
599 lsl \phys, \phys, #16
601 and \phys, \pte, #PTE_ADDR_MASK
606 * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
608 .macro tcr_clear_errata_bits, tcr, tmp1, tmp2
609 #ifdef CONFIG_FUJITSU_ERRATUM_010001
612 mov_q \tmp2, MIDR_FUJITSU_ERRATUM_010001_MASK
613 and \tmp1, \tmp1, \tmp2
614 mov_q \tmp2, MIDR_FUJITSU_ERRATUM_010001
618 mov_q \tmp2, TCR_CLEAR_FUJITSU_ERRATUM_010001
619 bic \tcr, \tcr, \tmp2
621 #endif /* CONFIG_FUJITSU_ERRATUM_010001 */
625 * Errata workaround prior to disable MMU. Insert an ISB immediately prior
626 * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
628 .macro pre_disable_mmu_workaround
629 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
635 * frame_push - Push @regcount callee saved registers to the stack,
636 * starting at x19, as well as x29/x30, and set x29 to
637 * the new value of sp. Add @extra bytes of stack space
640 .macro frame_push, regcount:req, extra
641 __frame st, \regcount, \extra
645 * frame_pop - Pop the callee saved registers from the stack that were
646 * pushed in the most recent call to frame_push, as well
647 * as x29/x30 and any extra stack space that may have been
654 .macro __frame_regs, reg1, reg2, op, num
655 .if .Lframe_regcount == \num
656 \op\()r \reg1, [sp, #(\num + 1) * 8]
657 .elseif .Lframe_regcount > \num
658 \op\()p \reg1, \reg2, [sp, #(\num + 1) * 8]
662 .macro __frame, op, regcount, extra=0
664 .if (\regcount) < 0 || (\regcount) > 10
665 .error "regcount should be in the range [0 ... 10]"
667 .if ((\extra) % 16) != 0
668 .error "extra should be a multiple of 16 bytes"
670 .ifdef .Lframe_regcount
671 .if .Lframe_regcount != -1
672 .error "frame_push/frame_pop may not be nested"
675 .set .Lframe_regcount, \regcount
676 .set .Lframe_extra, \extra
677 .set .Lframe_local_offset, ((\regcount + 3) / 2) * 16
678 stp x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
682 __frame_regs x19, x20, \op, 1
683 __frame_regs x21, x22, \op, 3
684 __frame_regs x23, x24, \op, 5
685 __frame_regs x25, x26, \op, 7
686 __frame_regs x27, x28, \op, 9
689 .if .Lframe_regcount == -1
690 .error "frame_push/frame_pop may not be nested"
692 ldp x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
693 .set .Lframe_regcount, -1
698 * Check whether to yield to another runnable task from kernel mode NEON code
699 * (which runs with preemption disabled).
701 * if_will_cond_yield_neon
702 * // pre-yield patchup code
704 * // post-yield patchup code
705 * endif_yield_neon <label>
707 * where <label> is optional, and marks the point where execution will resume
708 * after a yield has been performed. If omitted, execution resumes right after
709 * the endif_yield_neon invocation. Note that the entire sequence, including
710 * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
713 * As a convenience, in the case where no patchup code is required, the above
714 * sequence may be abbreviated to
716 * cond_yield_neon <label>
718 * Note that the patchup code does not support assembler directives that change
719 * the output section, any use of such directives is undefined.
721 * The yield itself consists of the following:
722 * - Check whether the preempt count is exactly 1 and a reschedule is also
723 * needed. If so, calling of preempt_enable() in kernel_neon_end() will
724 * trigger a reschedule. If it is not the case, yielding is pointless.
725 * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
728 * This macro sequence may clobber all CPU state that is not guaranteed by the
729 * AAPCS to be preserved across an ordinary function call.
732 .macro cond_yield_neon, lbl
733 if_will_cond_yield_neon
735 endif_yield_neon \lbl
738 .macro if_will_cond_yield_neon
739 #ifdef CONFIG_PREEMPT
741 ldr x0, [x0, #TSK_TI_PREEMPT]
742 sub x0, x0, #PREEMPT_DISABLE_OFFSET
744 /* fall through to endif_yield_neon */
748 .section ".discard.cond_yield_neon", "ax"
752 .macro do_cond_yield_neon
757 .macro endif_yield_neon, lbl
767 .macro __mitigate_spectre_bhb_loop tmp
768 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
769 alternative_cb spectre_bhb_patch_loop_iter
770 mov \tmp, #32 // Patched to correct the immediate
772 .Lspectre_bhb_loop\@:
775 b.ne .Lspectre_bhb_loop\@
777 #endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
780 /* Save/restores x0-x3 to the stack */
781 .macro __mitigate_spectre_bhb_fw
782 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
783 stp x0, x1, [sp, #-16]!
784 stp x2, x3, [sp, #-16]!
785 mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3
786 alternative_cb arm64_update_smccc_conduit
787 nop // Patched to SMC/HVC #0
789 ldp x2, x3, [sp], #16
790 ldp x0, x1, [sp], #16
791 #endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
793 #endif /* __ASM_ASSEMBLER_H */