2 * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
3 * xthal_memcpy and xthal_bcopy
5 * This file is subject to the terms and conditions of the GNU General Public
6 * License. See the file "COPYING" in the main directory of this archive
9 * Copyright (C) 2002 - 2012 Tensilica Inc.
12 #include <linux/linkage.h>
13 #include <asm/asmmacro.h>
17 * void *memcpy(void *dst, const void *src, size_t len);
19 * This function is intended to do the same thing as the standard
20 * library function memcpy() for most cases.
21 * However, where the source and/or destination references
22 * an instruction RAM or ROM or a data RAM or ROM, that
23 * source and/or destination will always be accessed with
24 * 32-bit load and store instructions (as required for these
28 * !!!!!!! Handling of IRAM/IROM has not yet
29 * !!!!!!! been implemented.
31 * The (general case) algorithm is as follows:
32 * If destination is unaligned, align it by conditionally
33 * copying 1 and 2 bytes.
34 * If source is aligned,
35 * do 16 bytes with a loop, and then finish up with
36 * 8, 4, 2, and 1 byte copies conditional on the length;
37 * else (if source is unaligned),
38 * do the same, but use SRC to align the source data.
39 * This code tries to use fall-through branches for the common
40 * case of aligned source and destination and multiple
64 .byte 0 # 1 mod 4 alignment for LOOPNEZ
65 # (0 mod 4 alignment for LBEG)
68 loopnez a4, .Lbytecopydone
69 #else /* !XCHAL_HAVE_LOOPS */
70 beqz a4, .Lbytecopydone
71 add a7, a3, a4 # a7 = end address for source
72 #endif /* !XCHAL_HAVE_LOOPS */
79 bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
80 #endif /* !XCHAL_HAVE_LOOPS */
85 * Destination is unaligned
89 .Ldst1mod2: # dst is only byte aligned
90 _bltui a4, 7, .Lbytecopy # do short copies byte by byte
98 _bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
99 # return to main algorithm
100 .Ldst2mod4: # dst 16-bit aligned
102 _bltui a4, 6, .Lbytecopy # do short copies byte by byte
110 j .Ldstaligned # dst is now aligned, return to main algorithm
116 # a2/ dst, a3/ src, a4/ len
117 mov a5, a2 # copy dst so that a2 is return value
119 _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
120 _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
121 .Ldstaligned: # return here from .Ldst?mod? once dst is aligned
122 srli a7, a4, 4 # number of loop iterations with 16B
124 movi a8, 3 # if source is not aligned,
125 _bany a3, a8, .Lsrcunaligned # then use shifting copy
127 * Destination and source are word-aligned, use word copy.
129 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
131 loopnez a7, .Loop1done
132 #else /* !XCHAL_HAVE_LOOPS */
135 add a8, a8, a3 # a8 = end of last 16B source chunk
136 #endif /* !XCHAL_HAVE_LOOPS */
148 #if !XCHAL_HAVE_LOOPS
149 bne a3, a8, .Loop1 # continue loop if a3:src != a8:src_end
150 #endif /* !XCHAL_HAVE_LOOPS */
189 * Destination is aligned, Source is unaligned
194 _beqz a4, .Ldone # avoid loading anything for zero-length copies
195 # copy 16 bytes per iteration for word-aligned dst and unaligned src
196 __ssa8 a3 # set shift amount from byte offset
198 /* set to 1 when running on ISS (simulator) with the
199 lint or ferret client, or 0 to save a few cycles */
200 #define SIM_CHECKS_ALIGNMENT 1
201 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
202 and a11, a3, a8 # save unalignment offset for below
203 sub a3, a3, a11 # align a3
205 l32i a6, a3, 0 # load first word
207 loopnez a7, .Loop2done
208 #else /* !XCHAL_HAVE_LOOPS */
211 add a10, a10, a3 # a10 = end of last 16B source chunk
212 #endif /* !XCHAL_HAVE_LOOPS */
228 #if !XCHAL_HAVE_LOOPS
229 bne a3, a10, .Loop2 # continue loop if a3:src != a10:src_end
230 #endif /* !XCHAL_HAVE_LOOPS */
253 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
254 add a3, a3, a11 # readjust a3 with correct misalignment
258 .Ldone: abi_ret_default
276 EXPORT_SYMBOL(__memcpy)
277 EXPORT_SYMBOL(memcpy)
280 * void *memmove(void *dst, const void *src, size_t len);
282 * This function is intended to do the same thing as the standard
283 * library function memmove() for most cases.
284 * However, where the source and/or destination references
285 * an instruction RAM or ROM or a data RAM or ROM, that
286 * source and/or destination will always be accessed with
287 * 32-bit load and store instructions (as required for these
291 * !!!!!!! Handling of IRAM/IROM has not yet
292 * !!!!!!! been implemented.
294 * The (general case) algorithm is as follows:
295 * If end of source doesn't overlap destination then use memcpy.
296 * Otherwise do memcpy backwards.
317 .byte 0 # 1 mod 4 alignment for LOOPNEZ
318 # (0 mod 4 alignment for LBEG)
321 loopnez a4, .Lbackbytecopydone
322 #else /* !XCHAL_HAVE_LOOPS */
323 beqz a4, .Lbackbytecopydone
324 sub a7, a3, a4 # a7 = start address for source
325 #endif /* !XCHAL_HAVE_LOOPS */
331 #if !XCHAL_HAVE_LOOPS
332 bne a3, a7, .Lbacknextbyte # continue loop if
333 # $a3:src != $a7:src_start
334 #endif /* !XCHAL_HAVE_LOOPS */
339 * Destination is unaligned
343 .Lbackdst1mod2: # dst is only byte aligned
344 _bltui a4, 7, .Lbackbytecopy # do short copies byte by byte
352 _bbci.l a5, 1, .Lbackdstaligned # if dst is now aligned, then
353 # return to main algorithm
354 .Lbackdst2mod4: # dst 16-bit aligned
356 _bltui a4, 6, .Lbackbytecopy # do short copies byte by byte
364 j .Lbackdstaligned # dst is now aligned,
365 # return to main algorithm
371 # a2/ dst, a3/ src, a4/ len
372 mov a5, a2 # copy dst so that a2 is return value
375 bgeu a6, a4, .Lcommon
380 _bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2
381 _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
382 .Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned
383 srli a7, a4, 4 # number of loop iterations with 16B
385 movi a8, 3 # if source is not aligned,
386 _bany a3, a8, .Lbacksrcunaligned # then use shifting copy
388 * Destination and source are word-aligned, use word copy.
390 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
392 loopnez a7, .LbackLoop1done
393 #else /* !XCHAL_HAVE_LOOPS */
394 beqz a7, .LbackLoop1done
396 sub a8, a3, a8 # a8 = start of first 16B source chunk
397 #endif /* !XCHAL_HAVE_LOOPS */
409 #if !XCHAL_HAVE_LOOPS
410 bne a3, a8, .LbackLoop1 # continue loop if a3:src != a8:src_start
411 #endif /* !XCHAL_HAVE_LOOPS */
413 bbci.l a4, 3, .Lback2
422 bbsi.l a4, 2, .Lback3
423 bbsi.l a4, 1, .Lback4
424 bbsi.l a4, 0, .Lback5
432 bbsi.l a4, 1, .Lback4
433 bbsi.l a4, 0, .Lback5
441 bbsi.l a4, 0, .Lback5
452 * Destination is aligned, Source is unaligned
457 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
458 # copy 16 bytes per iteration for word-aligned dst and unaligned src
459 __ssa8 a3 # set shift amount from byte offset
460 #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
461 * the lint or ferret client, or 0
462 * to save a few cycles */
463 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
464 and a11, a3, a8 # save unalignment offset for below
465 sub a3, a3, a11 # align a3
467 l32i a6, a3, 0 # load first word
469 loopnez a7, .LbackLoop2done
470 #else /* !XCHAL_HAVE_LOOPS */
471 beqz a7, .LbackLoop2done
473 sub a10, a3, a10 # a10 = start of first 16B source chunk
474 #endif /* !XCHAL_HAVE_LOOPS */
490 #if !XCHAL_HAVE_LOOPS
491 bne a3, a10, .LbackLoop2 # continue loop if a3:src != a10:src_start
492 #endif /* !XCHAL_HAVE_LOOPS */
494 bbci.l a4, 3, .Lback12
506 bbci.l a4, 2, .Lback13
515 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
516 add a3, a3, a11 # readjust a3 with correct misalignment
518 bbsi.l a4, 1, .Lback14
519 bbsi.l a4, 0, .Lback15
530 bbsi.l a4, 0, .Lback15
541 EXPORT_SYMBOL(__memmove)
542 EXPORT_SYMBOL(memmove)