2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
9 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
11 This file is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by the
13 Free Software Foundation; either version 2, or (at your option) any
16 In addition to the permissions in the GNU General Public License, the
17 Free Software Foundation gives you unlimited permission to link the
18 compiled version of this file into combinations with other programs,
19 and to distribute those combinations without any restriction coming
20 from the use of this file. (The General Public License restrictions
21 do apply in other respects; for example, they cover modification of
22 the file, and distribution when not linked into a combine
25 This file is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 General Public License for more details.
30 You should have received a copy of the GNU General Public License
31 along with this program; see the file COPYING. If not, write to
32 the Free Software Foundation, 59 Temple Place - Suite 330,
33 Boston, MA 02111-1307, USA. */
36 #include <linux/linkage.h>
37 #include <asm/assembler.h>
38 #include <asm/unwind.h>
40 .macro ARM_DIV_BODY dividend, divisor, result, curbit
42 #if __LINUX_ARM_ARCH__ >= 5
45 clz \result, \dividend
46 sub \result, \curbit, \result
48 mov \divisor, \divisor, lsl \result
49 mov \curbit, \curbit, lsl \result
54 @ Initially shift the divisor left 3 bits if possible,
55 @ set curbit accordingly. This allows for curbit to be located
56 @ at the left end of each 4 bit nibbles in the division loop
57 @ to save one loop in most cases.
58 tst \divisor, #0xe0000000
59 moveq \divisor, \divisor, lsl #3
63 @ Unless the divisor is very big, shift it up in multiples of
64 @ four bits, since this is the amount of unwinding in the main
65 @ division loop. Continue shifting until the divisor is
66 @ larger than the dividend.
67 1: cmp \divisor, #0x10000000
68 cmplo \divisor, \dividend
69 movlo \divisor, \divisor, lsl #4
70 movlo \curbit, \curbit, lsl #4
73 @ For very big divisors, we must shift it a bit at a time, or
74 @ we will be in danger of overflowing.
75 1: cmp \divisor, #0x80000000
76 cmplo \divisor, \dividend
77 movlo \divisor, \divisor, lsl #1
78 movlo \curbit, \curbit, lsl #1
86 1: cmp \dividend, \divisor
87 subhs \dividend, \dividend, \divisor
88 orrhs \result, \result, \curbit
89 cmp \dividend, \divisor, lsr #1
90 subhs \dividend, \dividend, \divisor, lsr #1
91 orrhs \result, \result, \curbit, lsr #1
92 cmp \dividend, \divisor, lsr #2
93 subhs \dividend, \dividend, \divisor, lsr #2
94 orrhs \result, \result, \curbit, lsr #2
95 cmp \dividend, \divisor, lsr #3
96 subhs \dividend, \dividend, \divisor, lsr #3
97 orrhs \result, \result, \curbit, lsr #3
98 cmp \dividend, #0 @ Early termination?
99 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
100 movne \divisor, \divisor, lsr #4
106 .macro ARM_DIV2_ORDER divisor, order
108 #if __LINUX_ARM_ARCH__ >= 5
111 rsb \order, \order, #31
115 cmp \divisor, #(1 << 16)
116 movhs \divisor, \divisor, lsr #16
120 cmp \divisor, #(1 << 8)
121 movhs \divisor, \divisor, lsr #8
122 addhs \order, \order, #8
124 cmp \divisor, #(1 << 4)
125 movhs \divisor, \divisor, lsr #4
126 addhs \order, \order, #4
128 cmp \divisor, #(1 << 2)
129 addhi \order, \order, #3
130 addls \order, \order, \divisor, lsr #1
137 .macro ARM_MOD_BODY dividend, divisor, order, spare
139 #if __LINUX_ARM_ARCH__ >= 5
142 clz \spare, \dividend
143 sub \order, \order, \spare
144 mov \divisor, \divisor, lsl \order
150 @ Unless the divisor is very big, shift it up in multiples of
151 @ four bits, since this is the amount of unwinding in the main
152 @ division loop. Continue shifting until the divisor is
153 @ larger than the dividend.
154 1: cmp \divisor, #0x10000000
155 cmplo \divisor, \dividend
156 movlo \divisor, \divisor, lsl #4
157 addlo \order, \order, #4
160 @ For very big divisors, we must shift it a bit at a time, or
161 @ we will be in danger of overflowing.
162 1: cmp \divisor, #0x80000000
163 cmplo \divisor, \dividend
164 movlo \divisor, \divisor, lsl #1
165 addlo \order, \order, #1
170 @ Perform all needed subtractions to keep only the reminder.
171 @ Do comparisons in batch of 4 first.
172 subs \order, \order, #3 @ yes, 3 is intended here
175 1: cmp \dividend, \divisor
176 subhs \dividend, \dividend, \divisor
177 cmp \dividend, \divisor, lsr #1
178 subhs \dividend, \dividend, \divisor, lsr #1
179 cmp \dividend, \divisor, lsr #2
180 subhs \dividend, \dividend, \divisor, lsr #2
181 cmp \dividend, \divisor, lsr #3
182 subhs \dividend, \dividend, \divisor, lsr #3
184 mov \divisor, \divisor, lsr #4
185 subges \order, \order, #4
192 @ Either 1, 2 or 3 comparison/subtractions are left.
196 cmp \dividend, \divisor
197 subhs \dividend, \dividend, \divisor
198 mov \divisor, \divisor, lsr #1
199 3: cmp \dividend, \divisor
200 subhs \dividend, \dividend, \divisor
201 mov \divisor, \divisor, lsr #1
202 4: cmp \dividend, \divisor
203 subhs \dividend, \dividend, \divisor
208 #ifdef CONFIG_ARM_PATCH_IDIV
224 ARM_DIV_BODY r0, r1, r2, r3
233 12: ARM_DIV2_ORDER r1, r2
240 ENDPROC(__aeabi_uidiv)
245 subs r2, r1, #1 @ compare divisor with 1
247 cmpne r0, r1 @ compare dividend with divisor
249 tsthi r1, r2 @ see if divisor is power of 2
253 ARM_MOD_BODY r0, r1, r2, r3
260 #ifdef CONFIG_ARM_PATCH_IDIV
269 eor ip, r0, r1 @ save the sign of the result.
271 rsbmi r1, r1, #0 @ loops below use unsigned.
272 subs r2, r1, #1 @ division by 1 or -1 ?
275 rsbmi r3, r0, #0 @ positive dividend value
278 tst r1, r2 @ divisor is power of 2 ?
281 ARM_DIV_BODY r3, r1, r0, r2
287 10: teq ip, r0 @ same sign ?
292 moveq r0, ip, asr #31
296 12: ARM_DIV2_ORDER r1, r2
305 ENDPROC(__aeabi_idiv)
312 rsbmi r1, r1, #0 @ loops below use unsigned.
313 movs ip, r0 @ preserve sign of dividend
314 rsbmi r0, r0, #0 @ if negative make positive
315 subs r2, r1, #1 @ compare divisor with 1
316 cmpne r0, r1 @ compare dividend with divisor
318 tsthi r1, r2 @ see if divisor is power of 2
322 ARM_MOD_BODY r0, r1, r2, r3
333 ENTRY(__aeabi_uidivmod)
335 UNWIND(.save {r0, r1, ip, lr} )
337 stmfd sp!, {r0, r1, ip, lr}
339 ldmfd sp!, {r1, r2, ip, lr}
345 ENDPROC(__aeabi_uidivmod)
347 ENTRY(__aeabi_idivmod)
349 UNWIND(.save {r0, r1, ip, lr} )
350 stmfd sp!, {r0, r1, ip, lr}
352 ldmfd sp!, {r1, r2, ip, lr}
358 ENDPROC(__aeabi_idivmod)
368 mov r0, #0 @ About as wrong as it could be.