2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 * Copyright (C) IBM Corporation, 2011
18 * Author: Anton Blanchard <anton@au.ibm.com>
20 #include <asm/ppc_asm.h>
23 #define LVS(VRT,RA,RB) lvsl VRT,RA,RB
24 #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
26 #define LVS(VRT,RA,RB) lvsr VRT,RA,RB
27 #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
32 EX_TABLE(100b,.Ldo_err1)
37 EX_TABLE(200b,.Ldo_err2)
43 EX_TABLE(300b,.Ldo_err3)
48 EX_TABLE(400b,.Ldo_err4)
53 ld r16,STK_REG(R16)(r1)
54 ld r15,STK_REG(R15)(r1)
55 ld r14,STK_REG(R14)(r1)
58 ld r0,STACKFRAMESIZE+16(r1)
61 #endif /* CONFIG_ALTIVEC */
64 ld r22,STK_REG(R22)(r1)
65 ld r21,STK_REG(R21)(r1)
66 ld r20,STK_REG(R20)(r1)
67 ld r19,STK_REG(R19)(r1)
68 ld r18,STK_REG(R18)(r1)
69 ld r17,STK_REG(R17)(r1)
70 ld r16,STK_REG(R16)(r1)
71 ld r15,STK_REG(R15)(r1)
72 ld r14,STK_REG(R14)(r1)
74 addi r1,r1,STACKFRAMESIZE
76 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
77 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
78 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
79 b __copy_tofrom_user_base
82 _GLOBAL(__copy_tofrom_user_power7)
87 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
88 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
89 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
96 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
97 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
98 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
104 /* Get the source 8B aligned */
132 stdu r1,-STACKFRAMESIZE(r1)
133 std r14,STK_REG(R14)(r1)
134 std r15,STK_REG(R15)(r1)
135 std r16,STK_REG(R16)(r1)
136 std r17,STK_REG(R17)(r1)
137 std r18,STK_REG(R18)(r1)
138 std r19,STK_REG(R19)(r1)
139 std r20,STK_REG(R20)(r1)
140 std r21,STK_REG(R21)(r1)
141 std r22,STK_REG(R22)(r1)
142 std r0,STACKFRAMESIZE+16(r1)
147 /* Now do cacheline (128B) sized loads and stores. */
180 err2; std r19,104(r3)
181 err2; std r20,112(r3)
182 err2; std r21,120(r3)
188 ld r14,STK_REG(R14)(r1)
189 ld r15,STK_REG(R15)(r1)
190 ld r16,STK_REG(R16)(r1)
191 ld r17,STK_REG(R17)(r1)
192 ld r18,STK_REG(R18)(r1)
193 ld r19,STK_REG(R19)(r1)
194 ld r20,STK_REG(R20)(r1)
195 ld r21,STK_REG(R21)(r1)
196 ld r22,STK_REG(R22)(r1)
197 addi r1,r1,STACKFRAMESIZE
199 /* Up to 127B to go */
223 /* Up to 63B to go */
236 /* Up to 31B to go */
245 9: clrldi r5,r5,(64-4)
247 /* Up to 15B to go */
251 err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
277 .Lunwind_stack_nonvmx_copy:
278 addi r1,r1,STACKFRAMESIZE
281 #ifdef CONFIG_ALTIVEC
285 stdu r1,-STACKFRAMESIZE(r1)
286 bl enter_vmx_usercopy
288 ld r0,STACKFRAMESIZE+16(r1)
289 ld r3,STK_REG(R31)(r1)
290 ld r4,STK_REG(R30)(r1)
291 ld r5,STK_REG(R29)(r1)
295 * We prefetch both the source and destination using enhanced touch
296 * instructions. We use a stream ID of 0 for the load side and
297 * 1 for the store side.
301 ori r9,r9,1 /* stream=1 */
303 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
307 1: lis r0,0x0E00 /* depth=7 */
310 ori r10,r7,1 /* stream=1 */
312 lis r8,0x8000 /* GO=1 */
317 /* setup read stream 0 */
318 dcbt 0,r6,0b01000 /* addr from */
319 dcbt 0,r7,0b01010 /* length and depth from */
320 /* setup write stream 1 */
321 dcbtst 0,r9,0b01000 /* addr to */
322 dcbtst 0,r10,0b01010 /* length and depth to */
324 dcbt 0,r8,0b01010 /* all streams GO */
327 beq cr1,.Lunwind_stack_nonvmx_copy
330 * If source and destination are not relatively aligned we use a
331 * slower permute loop.
334 rldicl. r6,r6,0,(64-4)
335 bne .Lvmx_unaligned_copy
337 /* Get the destination 16B aligned */
368 /* Get the desination 128B aligned */
407 std r14,STK_REG(R14)(r1)
408 std r15,STK_REG(R15)(r1)
409 std r16,STK_REG(R16)(r1)
419 * Now do cacheline sized loads and stores. By this stage the
420 * cacheline stores are also cacheline aligned.
444 ld r14,STK_REG(R14)(r1)
445 ld r15,STK_REG(R15)(r1)
446 ld r16,STK_REG(R16)(r1)
448 /* Up to 127B to go */
479 /* Up to 15B to go */
480 11: clrldi r5,r5,(64-4)
504 15: addi r1,r1,STACKFRAMESIZE
505 b exit_vmx_usercopy /* tail call optimise */
507 .Lvmx_unaligned_copy:
508 /* Get the destination 16B aligned */
532 err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
541 /* Get the desination 128B aligned */
551 LVS(v16,0,r4) /* Setup permute control vector */
585 err3; stvx v10,r3,r10
586 err3; stvx v11,r3,r11
592 std r14,STK_REG(R14)(r1)
593 std r15,STK_REG(R15)(r1)
594 std r16,STK_REG(R16)(r1)
604 * Now do cacheline sized loads and stores. By this stage the
605 * cacheline stores are also cacheline aligned.
628 err4; stvx v10,r3,r10
629 err4; stvx v11,r3,r11
630 err4; stvx v12,r3,r12
631 err4; stvx v13,r3,r14
632 err4; stvx v14,r3,r15
633 err4; stvx v15,r3,r16
637 ld r14,STK_REG(R14)(r1)
638 ld r15,STK_REG(R15)(r1)
639 ld r16,STK_REG(R16)(r1)
641 /* Up to 127B to go */
658 err3; stvx v10,r3,r10
659 err3; stvx v11,r3,r11
679 /* Up to 15B to go */
680 11: clrldi r5,r5,(64-4)
681 addi r4,r4,-16 /* Unwind the +16 load offset */
684 err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
707 15: addi r1,r1,STACKFRAMESIZE
708 b exit_vmx_usercopy /* tail call optimise */
709 #endif /* CONFIG_ALTIVEC */