2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11 #include <asm/export.h>
12 #include <asm/asm-compat.h>
13 #include <asm/feature-fixups.h>
16 /* 0 == most CPUs, 1 == POWER6, 2 == Cell */
17 #define SELFTEST_CASE 0
21 #define sLd sld /* Shift towards low-numbered address. */
22 #define sHd srd /* Shift towards high-numbered address. */
24 #define sLd srd /* Shift towards low-numbered address. */
25 #define sHd sld /* Shift towards high-numbered address. */
29 * These macros are used to generate exception table entries.
30 * The exception handlers below use the original arguments
31 * (stored on the stack) and the point where we're up to in
32 * the destination buffer, i.e. the address of the first
33 * unmodified byte. Generally r3 points into the destination
34 * buffer, but the first unmodified byte is at a variable
35 * offset from r3. In the code below, the symbol r3_offset
36 * is set to indicate the current offset at each point in
37 * the code. This offset is then used as a negative offset
38 * from the exception handler code, and those instructions
39 * before the exception handlers are addi instructions that
40 * adjust r3 to point to the correct place.
42 .macro lex /* exception handler for load */
43 100: EX_TABLE(100b, .Lld_exc - r3_offset)
46 .macro stex /* exception handler for store */
47 100: EX_TABLE(100b, .Lst_exc - r3_offset)
51 _GLOBAL_TOC(__copy_tofrom_user)
52 #ifdef CONFIG_PPC_BOOK3S_64
56 b __copy_tofrom_user_power7
57 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
59 _GLOBAL(__copy_tofrom_user_base)
60 /* first check for a 4kB copy on a 4kB boundary */
64 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
67 crand cr0*4+2,cr0*4+2,cr6*4+2
75 /* Below we want to nop out the bne if we're on a CPU that has the
76 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
78 * At the time of writing the only CPU that has this combination of bits
81 test_feature = (SELFTEST_CASE == 1)
86 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
87 CPU_FTR_UNALIGNED_LD_STD)
91 test_feature = (SELFTEST_CASE == 0)
95 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
96 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
174 lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */
186 lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */
241 #ifdef __BIG_ENDIAN__
245 #ifdef __LITTLE_ENDIAN__
250 #ifdef __BIG_ENDIAN__
254 #ifdef __LITTLE_ENDIAN__
259 #ifdef __BIG_ENDIAN__
263 #ifdef __LITTLE_ENDIAN__
271 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
276 100: EX_TABLE(100b, .Lld_exc_r7)
278 100: EX_TABLE(100b, .Lst_exc_r7)
282 100: EX_TABLE(100b, .Lld_exc_r7)
284 100: EX_TABLE(100b, .Lst_exc_r7)
288 100: EX_TABLE(100b, .Lld_exc_r7)
290 100: EX_TABLE(100b, .Lst_exc_r7)
292 3: PPC_MTOCRF(0x01,r5)
323 * exception handlers follow
324 * we have to return the number of bytes not copied
325 * for an exception on a load, we set the rest of the destination to 0
326 * Note that the number of bytes of instructions for adjusting r3 needs
327 * to equal the amount of the adjustment, due to the trick of using
328 * .Lld_exc - r3_offset as the handler address.
346 * Here we have had a fault on a load and r3 points to the first
347 * unmodified byte of the destination. We use the original arguments
348 * and r3 to work out how much wasn't copied. Since we load some
349 * distance ahead of the stores, we continue copying byte-by-byte until
350 * we hit the load fault again in order to copy as much as possible.
358 subf r5,r6,r5 /* #bytes left to go */
361 * first see if we can copy any more bytes before hitting another exception
365 100: EX_TABLE(100b, .Ldone)
371 li r3,0 /* huh? all copied successfully this time? */
375 * here we have trapped again, amount remaining is in ctr.
382 * exception handlers for stores: we need to work out how many bytes
383 * weren't copied, and we may need to copy some more.
384 * Note that the number of bytes of instructions for adjusting r3 needs
385 * to equal the amount of the adjustment, due to the trick of using
386 * .Lst_exc - r3_offset as the handler address.
403 ld r6,-24(r1) /* original destination pointer */
404 ld r4,-16(r1) /* original source pointer */
405 ld r5,-8(r1) /* original number of bytes */
408 * If the destination pointer isn't 8-byte aligned,
409 * we may have got the exception as a result of a
410 * store that overlapped a page boundary, so we may be
411 * able to copy a few more bytes.
415 subf r8,r6,r3 /* #bytes copied */
416 100: EX_TABLE(100b,19f)
418 100: EX_TABLE(100b,19f)
423 19: subf r3,r3,r7 /* #bytes not copied in r3 */
427 * Routine to copy a whole page of data, optimized for POWER4.
428 * On POWER4 it is more than 50% faster than the simple loop
429 * above (following the .Ldst_aligned label).
432 100: EX_TABLE(100b, .Labort)
548 * on an exception, reset to the beginning and jump back into the
549 * standard __copy_tofrom_user
568 EXPORT_SYMBOL(__copy_tofrom_user)