1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * FP/SIMD state saving and restoring macros
5 * Copyright (C) 2012 ARM Ltd.
6 * Author: Catalin Marinas <catalin.marinas@arm.com>
9 #include <asm/assembler.h>
11 .macro fpsimd_save state, tmpnr
12 stp q0, q1, [\state, #16 * 0]
13 stp q2, q3, [\state, #16 * 2]
14 stp q4, q5, [\state, #16 * 4]
15 stp q6, q7, [\state, #16 * 6]
16 stp q8, q9, [\state, #16 * 8]
17 stp q10, q11, [\state, #16 * 10]
18 stp q12, q13, [\state, #16 * 12]
19 stp q14, q15, [\state, #16 * 14]
20 stp q16, q17, [\state, #16 * 16]
21 stp q18, q19, [\state, #16 * 18]
22 stp q20, q21, [\state, #16 * 20]
23 stp q22, q23, [\state, #16 * 22]
24 stp q24, q25, [\state, #16 * 24]
25 stp q26, q27, [\state, #16 * 26]
26 stp q28, q29, [\state, #16 * 28]
27 stp q30, q31, [\state, #16 * 30]!
29 str w\tmpnr, [\state, #16 * 2]
31 str w\tmpnr, [\state, #16 * 2 + 4]
34 .macro fpsimd_restore_fpcr state, tmp
36 * Writes to fpcr may be self-synchronising, so avoid restoring
37 * the register if it hasn't changed.
47 .macro fpsimd_restore state, tmpnr
48 ldp q0, q1, [\state, #16 * 0]
49 ldp q2, q3, [\state, #16 * 2]
50 ldp q4, q5, [\state, #16 * 4]
51 ldp q6, q7, [\state, #16 * 6]
52 ldp q8, q9, [\state, #16 * 8]
53 ldp q10, q11, [\state, #16 * 10]
54 ldp q12, q13, [\state, #16 * 12]
55 ldp q14, q15, [\state, #16 * 14]
56 ldp q16, q17, [\state, #16 * 16]
57 ldp q18, q19, [\state, #16 * 18]
58 ldp q20, q21, [\state, #16 * 20]
59 ldp q22, q23, [\state, #16 * 22]
60 ldp q24, q25, [\state, #16 * 24]
61 ldp q26, q27, [\state, #16 * 26]
62 ldp q28, q29, [\state, #16 * 28]
63 ldp q30, q31, [\state, #16 * 30]!
64 ldr w\tmpnr, [\state, #16 * 2]
66 ldr w\tmpnr, [\state, #16 * 2 + 4]
67 fpsimd_restore_fpcr x\tmpnr, \state
70 /* Sanity-check macros to help avoid encoding garbage instructions */
72 .macro _check_general_reg nr
73 .if (\nr) < 0 || (\nr) > 30
74 .error "Bad register number \nr."
78 .macro _sve_check_zreg znr
79 .if (\znr) < 0 || (\znr) > 31
80 .error "Bad Scalable Vector Extension vector register number \znr."
84 .macro _sve_check_preg pnr
85 .if (\pnr) < 0 || (\pnr) > 15
86 .error "Bad Scalable Vector Extension predicate register number \pnr."
90 .macro _check_num n, min, max
91 .if (\n) < (\min) || (\n) > (\max)
92 .error "Number \n out of range [\min,\max]"
96 .macro _sme_check_wv v
97 .if (\v) < 12 || (\v) > 15
98 .error "Bad vector select register \v."
102 /* SVE instruction encodings for non-SVE-capable assemblers */
103 /* (pre binutils 2.28, all kernel capable clang versions support SVE) */
105 /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
106 .macro _sve_str_v nz, nxbase, offset=0
108 _check_general_reg \nxbase
109 _check_num (\offset), -0x100, 0xff
113 | (((\offset) & 7) << 10) \
114 | (((\offset) & 0x1f8) << 13)
117 /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
118 .macro _sve_ldr_v nz, nxbase, offset=0
120 _check_general_reg \nxbase
121 _check_num (\offset), -0x100, 0xff
125 | (((\offset) & 7) << 10) \
126 | (((\offset) & 0x1f8) << 13)
129 /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
130 .macro _sve_str_p np, nxbase, offset=0
132 _check_general_reg \nxbase
133 _check_num (\offset), -0x100, 0xff
137 | (((\offset) & 7) << 10) \
138 | (((\offset) & 0x1f8) << 13)
141 /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
142 .macro _sve_ldr_p np, nxbase, offset=0
144 _check_general_reg \nxbase
145 _check_num (\offset), -0x100, 0xff
149 | (((\offset) & 7) << 10) \
150 | (((\offset) & 0x1f8) << 13)
153 /* RDVL X\nx, #\imm */
154 .macro _sve_rdvl nx, imm
155 _check_general_reg \nx
156 _check_num (\imm), -0x20, 0x1f
159 | (((\imm) & 0x3f) << 5)
162 /* RDFFR (unpredicated): RDFFR P\np.B */
177 .macro _sve_pfalse np
183 /* SME instruction encodings for non-SME-capable assemblers */
184 /* (pre binutils 2.38/LLVM 13) */
186 /* RDSVL X\nx, #\imm */
187 .macro _sme_rdsvl nx, imm
188 _check_general_reg \nx
189 _check_num (\imm), -0x20, 0x1f
192 | (((\imm) & 0x3f) << 5)
196 * STR (vector from ZA array):
197 * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
199 .macro _sme_str_zav nw, nxbase, offset=0
201 _check_general_reg \nxbase
202 _check_num (\offset), -0x100, 0xff
204 | (((\nw) & 3) << 13) \
210 * LDR (vector to ZA array):
211 * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
213 .macro _sme_ldr_zav nw, nxbase, offset=0
215 _check_general_reg \nxbase
216 _check_num (\offset), -0x100, 0xff
218 | (((\nw) & 3) << 13) \
229 _check_general_reg \nx
240 _check_general_reg \nx
246 * Zero the entire ZA array
253 .macro __for from:req, to:req
257 __for %\from, %((\from) + ((\to) - (\from)) / 2)
258 __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
262 .macro _for var:req, from:req, to:req, insn:vararg
263 .macro _for__body \var:req
276 /* Update ZCR_EL1.LEN with the new VQ */
277 .macro sve_load_vq xvqminus1, xtmp, xtmp2
278 mrs_s \xtmp, SYS_ZCR_EL1
279 bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
280 orr \xtmp2, \xtmp2, \xvqminus1
283 msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
287 /* Update SMCR_EL1.LEN with the new VQ */
288 .macro sme_load_vq xvqminus1, xtmp, xtmp2
289 mrs_s \xtmp, SYS_SMCR_EL1
290 bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
291 orr \xtmp2, \xtmp2, \xvqminus1
294 msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
298 /* Preserve the first 128-bits of Znz and zero the rest. */
299 .macro _sve_flush_z nz
301 mov v\nz\().16b, v\nz\().16b
305 _for n, 0, 31, _sve_flush_z \n
308 _for n, 0, 15, _sve_pfalse \n
314 .macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
315 _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
316 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
321 _sve_pfalse 0 // Zero out FFR
323 _sve_str_p 0, \nxbase
324 _sve_ldr_p 0, \nxbase, -16
326 str w\nxtmp, [\xpfpsr]
328 str w\nxtmp, [\xpfpsr, #4]
331 .macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
332 _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
333 cbz \restore_ffr, 921f
334 _sve_ldr_p 0, \nxbase
337 _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
339 ldr w\nxtmp, [\xpfpsr]
341 ldr w\nxtmp, [\xpfpsr, #4]
345 .macro sme_save_za nxbase, xvl, nw
349 _sme_str_zav \nw, \nxbase
350 add x\nxbase, x\nxbase, \xvl
356 .macro sme_load_za nxbase, xvl, nw
360 _sme_ldr_zav \nw, \nxbase
361 add x\nxbase, x\nxbase, \xvl