2 * arch/xtensa/lib/usercopy.S
4 * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
6 * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7 * It needs to remain separate and distinct. The hal files are part
8 * of the Xtensa link-time HAL, and those files may differ per
9 * processor configuration. Patching the kernel for another
10 * processor configuration includes replacing the hal files, and we
11 * could lose the special functionality for accessing user-space
12 * memory during such a patch. We sacrifice a little code space here
13 * in favor to simplify code maintenance.
15 * This file is subject to the terms and conditions of the GNU General
16 * Public License. See the file "COPYING" in the main directory of
17 * this archive for more details.
19 * Copyright (C) 2002 Tensilica Inc.
24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
26 * The returned value is the number of bytes not copied. Implies zero
29 * The general case algorithm is as follows:
30 * If the destination and source are both aligned,
31 * do 16B chunks with a loop, and then finish up with
32 * 8B, 4B, 2B, and 1B copies conditional on the length.
33 * If destination is aligned and source unaligned,
34 * do the same, but use SRC to align the source data.
35 * If destination is unaligned, align it by conditionally
36 * copying 1B and 2B and then retest.
37 * This code tries to use fall-through braches for the common
38 * case of aligned destinations (except for the branches to
39 * the alignment label).
53 * a11/ original length
56 #include <variant/core.h>
59 #define ALIGN(R, W0, W1) src R, W0, W1
60 #define SSA8(R) ssa8b R
62 #define ALIGN(R, W0, W1) src R, W1, W0
63 #define SSA8(R) ssa8l R
66 /* Load or store instructions that may cause exceptions use the EX macro. */
68 #define EX(insn,reg1,reg2,offset,handler) \
69 9: insn reg1, reg2, offset; \
70 .section __ex_table, "a"; \
77 .global __xtensa_copy_user
78 .type __xtensa_copy_user,@function
80 entry sp, 16 # minimal stack frame
81 # a2/ dst, a3/ src, a4/ len
82 mov a5, a2 # copy dst so that a2 is return value
83 mov a11, a4 # preserve original len for error case
85 bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
86 bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
87 .Ldstaligned: # return here from .Ldstunaligned when dst is aligned
88 srli a7, a4, 4 # number of loop iterations with 16B
90 movi a8, 3 # if source is also aligned,
91 bnone a3, a8, .Laligned # then use word copy
92 SSA8( a3) # set shift amount from byte offset
93 bnez a4, .Lsrcunaligned
94 movi a2, 0 # return success for len==0
98 * Destination is unaligned
101 .Ldst1mod2: # dst is only byte aligned
102 bltui a4, 7, .Lbytecopy # do short copies byte by byte
105 EX(l8ui, a6, a3, 0, fixup)
107 EX(s8i, a6, a5, 0, fixup)
110 bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
111 # return to main algorithm
112 .Ldst2mod4: # dst 16-bit aligned
114 bltui a4, 6, .Lbytecopy # do short copies byte by byte
115 EX(l8ui, a6, a3, 0, fixup)
116 EX(l8ui, a7, a3, 1, fixup)
118 EX(s8i, a6, a5, 0, fixup)
119 EX(s8i, a7, a5, 1, fixup)
122 j .Ldstaligned # dst is now aligned, return to main algorithm
128 .byte 0 # 1 mod 4 alignment for LOOPNEZ
129 # (0 mod 4 alignment for LBEG)
132 loopnez a4, .Lbytecopydone
133 #else /* !XCHAL_HAVE_LOOPS */
134 beqz a4, .Lbytecopydone
135 add a7, a3, a4 # a7 = end address for source
136 #endif /* !XCHAL_HAVE_LOOPS */
138 EX(l8ui, a6, a3, 0, fixup)
140 EX(s8i, a6, a5, 0, fixup)
142 #if !XCHAL_HAVE_LOOPS
143 blt a3, a7, .Lnextbyte
144 #endif /* !XCHAL_HAVE_LOOPS */
146 movi a2, 0 # return success for len bytes copied
150 * Destination and source are word-aligned.
152 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
153 .align 4 # 1 mod 4 alignment for LOOPNEZ
154 .byte 0 # (0 mod 4 alignment for LBEG)
157 loopnez a7, .Loop1done
158 #else /* !XCHAL_HAVE_LOOPS */
161 add a8, a8, a3 # a8 = end of last 16B source chunk
162 #endif /* !XCHAL_HAVE_LOOPS */
164 EX(l32i, a6, a3, 0, fixup)
165 EX(l32i, a7, a3, 4, fixup)
166 EX(s32i, a6, a5, 0, fixup)
167 EX(l32i, a6, a3, 8, fixup)
168 EX(s32i, a7, a5, 4, fixup)
169 EX(l32i, a7, a3, 12, fixup)
170 EX(s32i, a6, a5, 8, fixup)
172 EX(s32i, a7, a5, 12, fixup)
174 #if !XCHAL_HAVE_LOOPS
176 #endif /* !XCHAL_HAVE_LOOPS */
180 EX(l32i, a6, a3, 0, fixup)
181 EX(l32i, a7, a3, 4, fixup)
183 EX(s32i, a6, a5, 0, fixup)
184 EX(s32i, a7, a5, 4, fixup)
189 EX(l32i, a6, a3, 0, fixup)
191 EX(s32i, a6, a5, 0, fixup)
196 EX(l16ui, a6, a3, 0, fixup)
198 EX(s16i, a6, a5, 0, fixup)
203 EX(l8ui, a6, a3, 0, fixup)
204 EX(s8i, a6, a5, 0, fixup)
206 movi a2, 0 # return success for len bytes copied
210 * Destination is aligned, Source is unaligned
214 .byte 0 # 1 mod 4 alignement for LOOPNEZ
215 # (0 mod 4 alignment for LBEG)
217 # copy 16 bytes per iteration for word-aligned dst and unaligned src
218 and a10, a3, a8 # save unalignment offset for below
219 sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
220 EX(l32i, a6, a3, 0, fixup) # load first word
222 loopnez a7, .Loop2done
223 #else /* !XCHAL_HAVE_LOOPS */
226 add a12, a12, a3 # a12 = end of last 16B source chunk
227 #endif /* !XCHAL_HAVE_LOOPS */
229 EX(l32i, a7, a3, 4, fixup)
230 EX(l32i, a8, a3, 8, fixup)
232 EX(s32i, a6, a5, 0, fixup)
233 EX(l32i, a9, a3, 12, fixup)
235 EX(s32i, a7, a5, 4, fixup)
236 EX(l32i, a6, a3, 16, fixup)
238 EX(s32i, a8, a5, 8, fixup)
241 EX(s32i, a9, a5, 12, fixup)
243 #if !XCHAL_HAVE_LOOPS
245 #endif /* !XCHAL_HAVE_LOOPS */
249 EX(l32i, a7, a3, 4, fixup)
250 EX(l32i, a8, a3, 8, fixup)
252 EX(s32i, a6, a5, 0, fixup)
255 EX(s32i, a7, a5, 4, fixup)
261 EX(l32i, a7, a3, 4, fixup)
264 EX(s32i, a6, a5, 0, fixup)
268 add a3, a3, a10 # readjust a3 with correct misalignment
271 EX(l8ui, a6, a3, 0, fixup)
272 EX(l8ui, a7, a3, 1, fixup)
274 EX(s8i, a6, a5, 0, fixup)
275 EX(s8i, a7, a5, 1, fixup)
280 EX(l8ui, a6, a3, 0, fixup)
281 EX(s8i, a6, a5, 0, fixup)
283 movi a2, 0 # return success for len bytes copied
287 .section .fixup, "ax"
290 /* a2 = original dst; a5 = current dst; a11= original len
291 * bytes_copied = a5 - a2
292 * retval = bytes_not_copied = original len - bytes_copied
293 * retval = a11 - (a5 - a2)
298 sub a2, a5, a2 /* a2 <-- bytes copied */
299 sub a2, a11, a2 /* a2 <-- bytes not copied */