1 | @ libgcc1 routines for ARM cpu. |
---|
2 | @ Division and remainder, from Appendix E of the Sparc Version 8 |
---|
3 | @ Architecture Manual, with fixes from Gordon Irlam. |
---|
4 | @ Rewritten for the ARM by Richard Earnshaw (rwe@pegasus.esprit.ec.org) |
---|
5 | |
---|
6 | /* Copyright (C) 1995 Free Software Foundation, Inc. |
---|
7 | |
---|
8 | This file is free software; you can redistribute it and/or modify it |
---|
9 | under the terms of the GNU General Public License as published by the |
---|
10 | Free Software Foundation; either version 2, or (at your option) any |
---|
11 | later version. |
---|
12 | |
---|
13 | In addition to the permissions in the GNU General Public License, the |
---|
14 | Free Software Foundation gives you unlimited permission to link the |
---|
15 | compiled version of this file with other programs, and to distribute |
---|
16 | those programs without any restriction coming from the use of this |
---|
17 | file. (The General Public License restrictions do apply in other |
---|
18 | respects; for example, they cover modification of the file, and |
---|
19 | distribution when not linked into another program.) |
---|
20 | |
---|
21 | This file is distributed in the hope that it will be useful, but |
---|
22 | WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
23 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
24 | General Public License for more details. |
---|
25 | |
---|
26 | You should have received a copy of the GNU General Public License |
---|
27 | along with this program; see the file COPYING. If not, write to |
---|
28 | the Free Software Foundation, 59 Temple Place - Suite 330, |
---|
29 | Boston, MA 02111-1307, USA. */ |
---|
30 | |
---|
31 | /* As a special exception, if you link this library with other files, |
---|
32 | some of which are compiled with GCC, to produce an executable, |
---|
33 | this library does not by itself cause the resulting executable |
---|
34 | to be covered by the GNU General Public License. |
---|
35 | This exception does not however invalidate any other reasons why |
---|
36 | the executable file might be covered by the GNU General Public License. */ |
---|
37 | |
---|
38 | /* |
---|
39 | * Input: dividend and divisor in r0 and r1 respectively. |
---|
40 | * |
---|
41 | * m4 parameters: |
---|
42 | * NAME name of function to generate |
---|
43 | * OP OP=div => r0 / r1; OP=mod => r0 % r1 |
---|
44 | * S S=true => signed; S=false => unsigned |
---|
45 | * |
---|
46 | * Algorithm parameters: |
---|
47 | * N how many bits per iteration we try to get (4) |
---|
48 | * WORDSIZE total number of bits (32) |
---|
49 | * |
---|
50 | * Derived constants: |
---|
51 | * TOPBITS number of bits in the top `decade' of a number |
---|
52 | * |
---|
53 | * Important variables: |
---|
54 | * Q the partial quotient under development (initially 0) |
---|
55 | * R the remainder so far, initially the dividend |
---|
56 | * ITER number of main division loop iterations required; |
---|
57 | * equal to ceil(log2(quotient) / N). Note that this |
---|
58 | * is the log base (2^N) of the quotient. |
---|
59 | * V the current comparand, initially divisor*2^(ITER*N-1) |
---|
60 | * |
---|
61 | * Cost: |
---|
62 | * Current estimate for non-large dividend is |
---|
63 | * ceil(log2(quotient) / N) * (10 + 7N/2) + C |
---|
64 | * A large dividend is one greater than 2^(31-TOPBITS) and takes a |
---|
65 | * different path, as the upper bits of the quotient must be developed |
---|
66 | * one bit at a time. |
---|
67 | */ |
---|
68 | |
---|
69 | /* |
---|
70 | define(N, `4')dnl |
---|
71 | define(WORDSIZE, `32')dnl |
---|
72 | define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl |
---|
73 | dnl |
---|
74 | define(dividend, `r0')dnl |
---|
75 | define(divisor, `r1')dnl |
---|
76 | define(Q, `r2')dnl |
---|
77 | define(R, `r3')dnl |
---|
78 | define(ITER, `ip')dnl |
---|
79 | define(V, `lr')dnl |
---|
80 | dnl |
---|
81 | dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d |
---|
82 | define(T, `r4')dnl |
---|
83 | define(SC, `r5')dnl |
---|
84 | ifelse(S, `true', `define(SIGN, `r6')')dnl |
---|
85 | define(REGLIST, `ifelse(S, `true', `{r4, r5, r6,', `{r4, r5,')')dnl |
---|
86 | define(ret, `ldmia sp!, REGLIST pc}')dnl |
---|
87 | dnl |
---|
88 | dnl This is the recursive definition for developing quotient digits. |
---|
89 | dnl |
---|
90 | dnl Parameters: |
---|
91 | dnl $1 the current depth, 1 <= $1 <= N |
---|
92 | dnl $2 the current accumulation of quotient bits |
---|
93 | dnl N max depth |
---|
94 | dnl |
---|
95 | dnl We add a new bit to $2 and either recurse or insert the bits in |
---|
96 | dnl the quotient. R, Q, and V are inputs and outputs as defined above; |
---|
97 | dnl the condition codes are expected to reflect the input R, and are |
---|
98 | dnl modified to reflect the output R. |
---|
99 | dnl |
---|
100 | define(DEVELOP_QUOTIENT_BITS, |
---|
101 | ` @ depth $1, accumulated bits $2 |
---|
102 | mov V, V, lsr #1 |
---|
103 | blt L.$1.eval(2^N+$2+999) |
---|
104 | @ remainder is positive |
---|
105 | subs R, R, V |
---|
106 | ifelse($1, N, |
---|
107 | ` ifelse(eval(2*$2+1<0), `0', |
---|
108 | `add Q, Q, `#'eval($2*2+1)', |
---|
109 | `sub Q, Q, `#'eval(-($2*2+1))') |
---|
110 | |
---|
111 | b 9f |
---|
112 | ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') |
---|
113 | L.$1.eval(2^N+$2+999): |
---|
114 | @ remainder is negative |
---|
115 | adds R, R, V |
---|
116 | ifelse($1, N, |
---|
117 | ` ifelse(eval(2*$2-1<0), `0', |
---|
118 | `add Q, Q, `#'eval($2*2-1)', |
---|
119 | `sub Q, Q, `#'eval(-($2*2-1))') |
---|
120 | b 9f |
---|
121 | |
---|
122 | ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') |
---|
123 | ifelse($1, 1, `9:')')dnl |
---|
124 | |
---|
125 | #include "trap.h" |
---|
126 | |
---|
127 | ip .req r12 |
---|
128 | sp .req r13 |
---|
129 | lr .req r14 |
---|
130 | pc .req r15 |
---|
131 | .text |
---|
132 | .globl NAME |
---|
133 | .align 0 |
---|
134 | NAME: |
---|
135 | stmdb sp!, REGLIST lr} |
---|
136 | ifelse(S, `true', |
---|
137 | ` @ compute sign of result; if neither is negative, no problem |
---|
138 | ifelse(OP, `div', `eor SIGN, divisor, dividend @ compute sign', |
---|
139 | `mov SIGN, dividend') |
---|
140 | cmp divisor, #0 |
---|
141 | rsbmi divisor, divisor, #0 |
---|
142 | beq Ldiv_zero |
---|
143 | mov V, divisor |
---|
144 | movs R, dividend |
---|
145 | rsbmi R, R, #0 @ make dividend nonnegative |
---|
146 | ', |
---|
147 | ` @ Ready to divide. Compute size of quotient; scale comparand. |
---|
148 | movs V, divisor |
---|
149 | mov R, dividend |
---|
150 | beq Ldiv_zero |
---|
151 | ') |
---|
152 | |
---|
153 | cmp R, V @ if divisor exceeds dividend, done |
---|
154 | mov Q, #0 |
---|
155 | bcc Lgot_result @ (and algorithm fails otherwise) |
---|
156 | mov T, `#'(1 << (WORDSIZE - TOPBITS - 1)) |
---|
157 | cmp R, T |
---|
158 | mov ITER, #0 |
---|
159 | bcc Lnot_really_big |
---|
160 | |
---|
161 | @ `Here the dividend is >= 2^(31-N) or so. We must be careful here, |
---|
162 | @ as our usual N-at-a-shot divide step will cause overflow and havoc. |
---|
163 | @ The number of bits in the result here is N*ITER+SC, where SC <= N. |
---|
164 | @ Compute ITER in an unorthodox manner: know we need to shift V into |
---|
165 | @ the top decade: so do not even bother to compare to R.' |
---|
166 | mov SC, #1 |
---|
167 | 1: |
---|
168 | cmp V, T |
---|
169 | bcs 3f |
---|
170 | mov V, V, lsl `#'N |
---|
171 | add ITER, ITER, #1 |
---|
172 | b 1b |
---|
173 | |
---|
174 | @ Now compute SC. |
---|
175 | 2: adds V, V, V |
---|
176 | add SC, SC, #1 |
---|
177 | bcc Lnot_too_big |
---|
178 | |
---|
179 | @ We get here if the divisor overflowed while shifting. |
---|
180 | @ This means that R has the high-order bit set. |
---|
181 | @ Restore V and subtract from R. |
---|
182 | mov T, T, lsl `#'TOPBITS |
---|
183 | mov V, V, lsr #1 |
---|
184 | add V, T, V |
---|
185 | sub SC, SC, #1 |
---|
186 | b Ldo_single_div |
---|
187 | |
---|
188 | Lnot_too_big: |
---|
189 | 3: cmp V, R |
---|
190 | bcc 2b |
---|
191 | @ beq Ldo_single_div |
---|
192 | |
---|
193 | /-* NB: these are commented out in the V8-Sparc manual as well *-/ |
---|
194 | /-* (I do not understand this) *-/ |
---|
195 | @ V > R: went too far: back up 1 step |
---|
196 | @ srl V, 1, V |
---|
197 | @ dec SC |
---|
198 | @ do single-bit divide steps |
---|
199 | @ |
---|
200 | @ We have to be careful here. We know that R >= V, so we can do the |
---|
201 | @ first divide step without thinking. BUT, the others are conditional, |
---|
202 | @ and are only done if R >= 0. Because both R and V may have the high- |
---|
203 | @ order bit set in the first step, just falling into the regular |
---|
204 | @ division loop will mess up the first time around. |
---|
205 | @ So we unroll slightly... |
---|
206 | Ldo_single_div: |
---|
207 | subs SC, SC, #1 |
---|
208 | blt Lend_regular_divide |
---|
209 | sub R, R, V |
---|
210 | mov Q, #1 |
---|
211 | b Lend_single_divloop |
---|
212 | Lsingle_divloop: |
---|
213 | cmp R, #0 |
---|
214 | mov Q, Q, lsl #1 |
---|
215 | mov V, V, lsr #1 |
---|
216 | @ R >= 0 |
---|
217 | subpl R, R, V |
---|
218 | addpl Q, Q, #1 |
---|
219 | @ R < 0 |
---|
220 | addmi R, R, V |
---|
221 | submi Q, Q, #1 |
---|
222 | Lend_single_divloop: |
---|
223 | subs SC, SC, #1 |
---|
224 | bge Lsingle_divloop |
---|
225 | b Lend_regular_divide |
---|
226 | |
---|
227 | 1: |
---|
228 | add ITER, ITER, #1 |
---|
229 | Lnot_really_big: |
---|
230 | mov V, V, lsl `#'N |
---|
231 | cmp V, R |
---|
232 | bls 1b |
---|
233 | @ |
---|
234 | @ HOW CAN ITER EVER BE -1 HERE ????? |
---|
235 | @ |
---|
236 | cmn ITER, #1 |
---|
237 | beq Lgot_result |
---|
238 | |
---|
239 | Ldivloop: |
---|
240 | cmp R, #0 @ set up for initial iteration |
---|
241 | mov Q, Q, lsl `#'N |
---|
242 | DEVELOP_QUOTIENT_BITS(1, 0) |
---|
243 | Lend_regular_divide: |
---|
244 | subs ITER, ITER, #1 |
---|
245 | bge Ldivloop |
---|
246 | cmp R, #0 |
---|
247 | @ non-restoring fixup here (one instruction only!) |
---|
248 | ifelse(OP, `div', |
---|
249 | ` sublt Q, Q, #1 |
---|
250 | ', ` addlt R, divisor, R |
---|
251 | ') |
---|
252 | |
---|
253 | Lgot_result: |
---|
254 | ifelse(S, `true', |
---|
255 | ` @ check to see if answer should be < 0 |
---|
256 | cmp SIGN, #0 |
---|
257 | ifelse(OP, `div', `rsbmi Q, Q, #0', `rsbmi R, R, #0') |
---|
258 | ') |
---|
259 | ifelse(OP, `div', `mov r0, Q', `mov r0, R') |
---|
260 | ret |
---|
261 | |
---|
262 | Ldiv_zero: |
---|
263 | @ Divide by zero trap. If it returns, return 0 (about as |
---|
264 | @ wrong as possible, but that is what SunOS does...). |
---|
265 | bl ___div0 |
---|
266 | mov r0, #0 |
---|
267 | ret |
---|
268 | */ |
---|
269 | |
---|
270 | #ifdef L_udivsi3 |
---|
271 | |
---|
272 | ip .req r12 |
---|
273 | sp .req r13 |
---|
274 | lr .req r14 |
---|
275 | pc .req r15 |
---|
276 | .text |
---|
277 | .globl ___udivsi3 |
---|
278 | .align 0 |
---|
279 | ___udivsi3: |
---|
280 | stmdb sp!, {r4, r5, lr} |
---|
281 | @ Ready to divide. Compute size of quotient; scale comparand. |
---|
282 | movs lr, r1 |
---|
283 | mov r3, r0 |
---|
284 | beq Ldiv_zero |
---|
285 | |
---|
286 | |
---|
287 | cmp r3, lr @ if r1 exceeds r0, done |
---|
288 | mov r2, #0 |
---|
289 | bcc Lgot_result @ (and algorithm fails otherwise) |
---|
290 | mov r4, #(1 << (32 - 4 - 1)) |
---|
291 | cmp r3, r4 |
---|
292 | mov ip, #0 |
---|
293 | bcc Lnot_really_big |
---|
294 | |
---|
295 | @ Here the dividend is >= 2^(31-N) or so. We must be careful here, |
---|
296 | @ as our usual N-at-a-shot divide step will cause overflow and havoc. |
---|
297 | @ The number of bits in the result here is N*ITER+SC, where SC <= N. |
---|
298 | @ Compute ITER in an unorthodox manner: know we need to shift V into |
---|
299 | @ the top decade: so do not even bother to compare to R. |
---|
300 | mov r5, #1 |
---|
301 | 1: |
---|
302 | cmp lr, r4 |
---|
303 | bcs 3f |
---|
304 | mov lr, lr, lsl #4 |
---|
305 | add ip, ip, #1 |
---|
306 | b 1b |
---|
307 | |
---|
308 | @ Now compute r5. |
---|
309 | 2: adds lr, lr, lr |
---|
310 | add r5, r5, #1 |
---|
311 | bcc Lnot_too_big |
---|
312 | |
---|
313 | @ We get here if the r1 overflowed while shifting. |
---|
314 | @ This means that r3 has the high-order bit set. |
---|
315 | @ Restore lr and subtract from r3. |
---|
316 | mov r4, r4, lsl #4 |
---|
317 | mov lr, lr, lsr #1 |
---|
318 | add lr, r4, lr |
---|
319 | sub r5, r5, #1 |
---|
320 | b Ldo_single_div |
---|
321 | |
---|
322 | Lnot_too_big: |
---|
323 | 3: cmp lr, r3 |
---|
324 | bcc 2b |
---|
325 | @ beq Ldo_single_div |
---|
326 | |
---|
327 | /* NB: these are commented out in the V8-Sparc manual as well */ |
---|
328 | /* (I do not understand this) */ |
---|
329 | @ lr > r3: went too far: back up 1 step |
---|
330 | @ srl lr, 1, lr |
---|
331 | @ dec r5 |
---|
332 | @ do single-bit divide steps |
---|
333 | @ |
---|
334 | @ We have to be careful here. We know that r3 >= lr, so we can do the |
---|
335 | @ first divide step without thinking. BUT, the others are conditional, |
---|
336 | @ and are only done if r3 >= 0. Because both r3 and lr may have the high- |
---|
337 | @ order bit set in the first step, just falling into the regular |
---|
338 | @ division loop will mess up the first time around. |
---|
339 | @ So we unroll slightly... |
---|
340 | Ldo_single_div: |
---|
341 | subs r5, r5, #1 |
---|
342 | blt Lend_regular_divide |
---|
343 | sub r3, r3, lr |
---|
344 | mov r2, #1 |
---|
345 | b Lend_single_divloop |
---|
346 | Lsingle_divloop: |
---|
347 | cmp r3, #0 |
---|
348 | mov r2, r2, lsl #1 |
---|
349 | mov lr, lr, lsr #1 |
---|
350 | @ r3 >= 0 |
---|
351 | subpl r3, r3, lr |
---|
352 | addpl r2, r2, #1 |
---|
353 | @ r3 < 0 |
---|
354 | addmi r3, r3, lr |
---|
355 | submi r2, r2, #1 |
---|
356 | Lend_single_divloop: |
---|
357 | subs r5, r5, #1 |
---|
358 | bge Lsingle_divloop |
---|
359 | b Lend_regular_divide |
---|
360 | |
---|
361 | 1: |
---|
362 | add ip, ip, #1 |
---|
363 | Lnot_really_big: |
---|
364 | mov lr, lr, lsl #4 |
---|
365 | cmp lr, r3 |
---|
366 | bls 1b |
---|
367 | @ |
---|
368 | @ HOW CAN ip EVER BE -1 HERE ????? |
---|
369 | @ |
---|
370 | cmn ip, #1 |
---|
371 | beq Lgot_result |
---|
372 | |
---|
373 | Ldivloop: |
---|
374 | cmp r3, #0 @ set up for initial iteration |
---|
375 | mov r2, r2, lsl #4 |
---|
376 | @ depth 1, accumulated bits 0 |
---|
377 | mov lr, lr, lsr #1 |
---|
378 | blt L.1.1015 |
---|
379 | @ remainder is positive |
---|
380 | subs r3, r3, lr |
---|
381 | @ depth 2, accumulated bits 1 |
---|
382 | mov lr, lr, lsr #1 |
---|
383 | blt L.2.1016 |
---|
384 | @ remainder is positive |
---|
385 | subs r3, r3, lr |
---|
386 | @ depth 3, accumulated bits 3 |
---|
387 | mov lr, lr, lsr #1 |
---|
388 | blt L.3.1018 |
---|
389 | @ remainder is positive |
---|
390 | subs r3, r3, lr |
---|
391 | @ depth 4, accumulated bits 7 |
---|
392 | mov lr, lr, lsr #1 |
---|
393 | blt L.4.1022 |
---|
394 | @ remainder is positive |
---|
395 | subs r3, r3, lr |
---|
396 | add r2, r2, #15 |
---|
397 | |
---|
398 | b 9f |
---|
399 | |
---|
400 | L.4.1022: |
---|
401 | @ remainder is negative |
---|
402 | adds r3, r3, lr |
---|
403 | add r2, r2, #13 |
---|
404 | b 9f |
---|
405 | |
---|
406 | |
---|
407 | |
---|
408 | L.3.1018: |
---|
409 | @ remainder is negative |
---|
410 | adds r3, r3, lr |
---|
411 | @ depth 4, accumulated bits 5 |
---|
412 | mov lr, lr, lsr #1 |
---|
413 | blt L.4.1020 |
---|
414 | @ remainder is positive |
---|
415 | subs r3, r3, lr |
---|
416 | add r2, r2, #11 |
---|
417 | |
---|
418 | b 9f |
---|
419 | |
---|
420 | L.4.1020: |
---|
421 | @ remainder is negative |
---|
422 | adds r3, r3, lr |
---|
423 | add r2, r2, #9 |
---|
424 | b 9f |
---|
425 | |
---|
426 | |
---|
427 | |
---|
428 | |
---|
429 | L.2.1016: |
---|
430 | @ remainder is negative |
---|
431 | adds r3, r3, lr |
---|
432 | @ depth 3, accumulated bits 1 |
---|
433 | mov lr, lr, lsr #1 |
---|
434 | blt L.3.1016 |
---|
435 | @ remainder is positive |
---|
436 | subs r3, r3, lr |
---|
437 | @ depth 4, accumulated bits 3 |
---|
438 | mov lr, lr, lsr #1 |
---|
439 | blt L.4.1018 |
---|
440 | @ remainder is positive |
---|
441 | subs r3, r3, lr |
---|
442 | add r2, r2, #7 |
---|
443 | |
---|
444 | b 9f |
---|
445 | |
---|
446 | L.4.1018: |
---|
447 | @ remainder is negative |
---|
448 | adds r3, r3, lr |
---|
449 | add r2, r2, #5 |
---|
450 | b 9f |
---|
451 | |
---|
452 | |
---|
453 | |
---|
454 | L.3.1016: |
---|
455 | @ remainder is negative |
---|
456 | adds r3, r3, lr |
---|
457 | @ depth 4, accumulated bits 1 |
---|
458 | mov lr, lr, lsr #1 |
---|
459 | blt L.4.1016 |
---|
460 | @ remainder is positive |
---|
461 | subs r3, r3, lr |
---|
462 | add r2, r2, #3 |
---|
463 | |
---|
464 | b 9f |
---|
465 | |
---|
466 | L.4.1016: |
---|
467 | @ remainder is negative |
---|
468 | adds r3, r3, lr |
---|
469 | add r2, r2, #1 |
---|
470 | b 9f |
---|
471 | |
---|
472 | |
---|
473 | |
---|
474 | |
---|
475 | |
---|
476 | L.1.1015: |
---|
477 | @ remainder is negative |
---|
478 | adds r3, r3, lr |
---|
479 | @ depth 2, accumulated bits -1 |
---|
480 | mov lr, lr, lsr #1 |
---|
481 | blt L.2.1014 |
---|
482 | @ remainder is positive |
---|
483 | subs r3, r3, lr |
---|
484 | @ depth 3, accumulated bits -1 |
---|
485 | mov lr, lr, lsr #1 |
---|
486 | blt L.3.1014 |
---|
487 | @ remainder is positive |
---|
488 | subs r3, r3, lr |
---|
489 | @ depth 4, accumulated bits -1 |
---|
490 | mov lr, lr, lsr #1 |
---|
491 | blt L.4.1014 |
---|
492 | @ remainder is positive |
---|
493 | subs r3, r3, lr |
---|
494 | sub r2, r2, #1 |
---|
495 | |
---|
496 | b 9f |
---|
497 | |
---|
498 | L.4.1014: |
---|
499 | @ remainder is negative |
---|
500 | adds r3, r3, lr |
---|
501 | sub r2, r2, #3 |
---|
502 | b 9f |
---|
503 | |
---|
504 | |
---|
505 | |
---|
506 | L.3.1014: |
---|
507 | @ remainder is negative |
---|
508 | adds r3, r3, lr |
---|
509 | @ depth 4, accumulated bits -3 |
---|
510 | mov lr, lr, lsr #1 |
---|
511 | blt L.4.1012 |
---|
512 | @ remainder is positive |
---|
513 | subs r3, r3, lr |
---|
514 | sub r2, r2, #5 |
---|
515 | |
---|
516 | b 9f |
---|
517 | |
---|
518 | L.4.1012: |
---|
519 | @ remainder is negative |
---|
520 | adds r3, r3, lr |
---|
521 | sub r2, r2, #7 |
---|
522 | b 9f |
---|
523 | |
---|
524 | |
---|
525 | |
---|
526 | |
---|
527 | L.2.1014: |
---|
528 | @ remainder is negative |
---|
529 | adds r3, r3, lr |
---|
530 | @ depth 3, accumulated bits -3 |
---|
531 | mov lr, lr, lsr #1 |
---|
532 | blt L.3.1012 |
---|
533 | @ remainder is positive |
---|
534 | subs r3, r3, lr |
---|
535 | @ depth 4, accumulated bits -5 |
---|
536 | mov lr, lr, lsr #1 |
---|
537 | blt L.4.1010 |
---|
538 | @ remainder is positive |
---|
539 | subs r3, r3, lr |
---|
540 | sub r2, r2, #9 |
---|
541 | |
---|
542 | b 9f |
---|
543 | |
---|
544 | L.4.1010: |
---|
545 | @ remainder is negative |
---|
546 | adds r3, r3, lr |
---|
547 | sub r2, r2, #11 |
---|
548 | b 9f |
---|
549 | |
---|
550 | |
---|
551 | |
---|
552 | L.3.1012: |
---|
553 | @ remainder is negative |
---|
554 | adds r3, r3, lr |
---|
555 | @ depth 4, accumulated bits -7 |
---|
556 | mov lr, lr, lsr #1 |
---|
557 | blt L.4.1008 |
---|
558 | @ remainder is positive |
---|
559 | subs r3, r3, lr |
---|
560 | sub r2, r2, #13 |
---|
561 | |
---|
562 | b 9f |
---|
563 | |
---|
564 | L.4.1008: |
---|
565 | @ remainder is negative |
---|
566 | adds r3, r3, lr |
---|
567 | sub r2, r2, #15 |
---|
568 | b 9f |
---|
569 | |
---|
570 | |
---|
571 | |
---|
572 | |
---|
573 | |
---|
574 | 9: |
---|
575 | Lend_regular_divide: |
---|
576 | subs ip, ip, #1 |
---|
577 | bge Ldivloop |
---|
578 | cmp r3, #0 |
---|
579 | @ non-restoring fixup here (one instruction only!) |
---|
580 | sublt r2, r2, #1 |
---|
581 | |
---|
582 | |
---|
583 | Lgot_result: |
---|
584 | |
---|
585 | mov r0, r2 |
---|
586 | ldmia sp!, {r4, r5, pc} |
---|
587 | |
---|
588 | Ldiv_zero: |
---|
589 | @ Divide by zero trap. If it returns, return 0 (about as |
---|
590 | @ wrong as possible, but that is what SunOS does...). |
---|
591 | bl ___div0 |
---|
592 | mov r0, #0 |
---|
593 | ldmia sp!, {r4, r5, pc} |
---|
594 | |
---|
595 | #endif /* L_udivsi3 */ |
---|
596 | |
---|
597 | #ifdef L_divsi3 |
---|
598 | |
---|
599 | ip .req r12 |
---|
600 | sp .req r13 |
---|
601 | lr .req r14 |
---|
602 | pc .req r15 |
---|
603 | .text |
---|
604 | .globl ___divsi3 |
---|
605 | .align 0 |
---|
606 | ___divsi3: |
---|
607 | stmdb sp!, {r4, r5, r6, lr} |
---|
608 | @ compute sign of result; if neither is negative, no problem |
---|
609 | eor r6, r1, r0 @ compute sign |
---|
610 | cmp r1, #0 |
---|
611 | rsbmi r1, r1, #0 |
---|
612 | beq Ldiv_zero |
---|
613 | mov lr, r1 |
---|
614 | movs r3, r0 |
---|
615 | rsbmi r3, r3, #0 @ make dividend nonnegative |
---|
616 | |
---|
617 | |
---|
618 | cmp r3, lr @ if r1 exceeds r0, done |
---|
619 | mov r2, #0 |
---|
620 | bcc Lgot_result @ (and algorithm fails otherwise) |
---|
621 | mov r4, #(1 << (32 - 4 - 1)) |
---|
622 | cmp r3, r4 |
---|
623 | mov ip, #0 |
---|
624 | bcc Lnot_really_big |
---|
625 | |
---|
626 | @ Here the dividend is >= 2^(31-N) or so. We must be careful here, |
---|
627 | @ as our usual N-at-a-shot divide step will cause overflow and havoc. |
---|
628 | @ The number of bits in the result here is N*ITER+SC, where SC <= N. |
---|
629 | @ Compute ITER in an unorthodox manner: know we need to shift V into |
---|
630 | @ the top decade: so do not even bother to compare to R. |
---|
631 | mov r5, #1 |
---|
632 | 1: |
---|
633 | cmp lr, r4 |
---|
634 | bcs 3f |
---|
635 | mov lr, lr, lsl #4 |
---|
636 | add ip, ip, #1 |
---|
637 | b 1b |
---|
638 | |
---|
639 | @ Now compute r5. |
---|
640 | 2: adds lr, lr, lr |
---|
641 | add r5, r5, #1 |
---|
642 | bcc Lnot_too_big |
---|
643 | |
---|
644 | @ We get here if the r1 overflowed while shifting. |
---|
645 | @ This means that r3 has the high-order bit set. |
---|
646 | @ Restore lr and subtract from r3. |
---|
647 | mov r4, r4, lsl #4 |
---|
648 | mov lr, lr, lsr #1 |
---|
649 | add lr, r4, lr |
---|
650 | sub r5, r5, #1 |
---|
651 | b Ldo_single_div |
---|
652 | |
---|
653 | Lnot_too_big: |
---|
654 | 3: cmp lr, r3 |
---|
655 | bcc 2b |
---|
656 | @ beq Ldo_single_div |
---|
657 | |
---|
658 | /* NB: these are commented out in the V8-Sparc manual as well */ |
---|
659 | /* (I do not understand this) */ |
---|
660 | @ lr > r3: went too far: back up 1 step |
---|
661 | @ srl lr, 1, lr |
---|
662 | @ dec r5 |
---|
663 | @ do single-bit divide steps |
---|
664 | @ |
---|
665 | @ We have to be careful here. We know that r3 >= lr, so we can do the |
---|
666 | @ first divide step without thinking. BUT, the others are conditional, |
---|
667 | @ and are only done if r3 >= 0. Because both r3 and lr may have the high- |
---|
668 | @ order bit set in the first step, just falling into the regular |
---|
669 | @ division loop will mess up the first time around. |
---|
670 | @ So we unroll slightly... |
---|
671 | Ldo_single_div: |
---|
672 | subs r5, r5, #1 |
---|
673 | blt Lend_regular_divide |
---|
674 | sub r3, r3, lr |
---|
675 | mov r2, #1 |
---|
676 | b Lend_single_divloop |
---|
677 | Lsingle_divloop: |
---|
678 | cmp r3, #0 |
---|
679 | mov r2, r2, lsl #1 |
---|
680 | mov lr, lr, lsr #1 |
---|
681 | @ r3 >= 0 |
---|
682 | subpl r3, r3, lr |
---|
683 | addpl r2, r2, #1 |
---|
684 | @ r3 < 0 |
---|
685 | addmi r3, r3, lr |
---|
686 | submi r2, r2, #1 |
---|
687 | Lend_single_divloop: |
---|
688 | subs r5, r5, #1 |
---|
689 | bge Lsingle_divloop |
---|
690 | b Lend_regular_divide |
---|
691 | |
---|
692 | 1: |
---|
693 | add ip, ip, #1 |
---|
694 | Lnot_really_big: |
---|
695 | mov lr, lr, lsl #4 |
---|
696 | cmp lr, r3 |
---|
697 | bls 1b |
---|
698 | @ |
---|
699 | @ HOW CAN ip EVER BE -1 HERE ????? |
---|
700 | @ |
---|
701 | cmn ip, #1 |
---|
702 | beq Lgot_result |
---|
703 | |
---|
704 | Ldivloop: |
---|
705 | cmp r3, #0 @ set up for initial iteration |
---|
706 | mov r2, r2, lsl #4 |
---|
707 | @ depth 1, accumulated bits 0 |
---|
708 | mov lr, lr, lsr #1 |
---|
709 | blt L.1.1015 |
---|
710 | @ remainder is positive |
---|
711 | subs r3, r3, lr |
---|
712 | @ depth 2, accumulated bits 1 |
---|
713 | mov lr, lr, lsr #1 |
---|
714 | blt L.2.1016 |
---|
715 | @ remainder is positive |
---|
716 | subs r3, r3, lr |
---|
717 | @ depth 3, accumulated bits 3 |
---|
718 | mov lr, lr, lsr #1 |
---|
719 | blt L.3.1018 |
---|
720 | @ remainder is positive |
---|
721 | subs r3, r3, lr |
---|
722 | @ depth 4, accumulated bits 7 |
---|
723 | mov lr, lr, lsr #1 |
---|
724 | blt L.4.1022 |
---|
725 | @ remainder is positive |
---|
726 | subs r3, r3, lr |
---|
727 | add r2, r2, #15 |
---|
728 | |
---|
729 | b 9f |
---|
730 | |
---|
731 | L.4.1022: |
---|
732 | @ remainder is negative |
---|
733 | adds r3, r3, lr |
---|
734 | add r2, r2, #13 |
---|
735 | b 9f |
---|
736 | |
---|
737 | |
---|
738 | |
---|
739 | L.3.1018: |
---|
740 | @ remainder is negative |
---|
741 | adds r3, r3, lr |
---|
742 | @ depth 4, accumulated bits 5 |
---|
743 | mov lr, lr, lsr #1 |
---|
744 | blt L.4.1020 |
---|
745 | @ remainder is positive |
---|
746 | subs r3, r3, lr |
---|
747 | add r2, r2, #11 |
---|
748 | |
---|
749 | b 9f |
---|
750 | |
---|
751 | L.4.1020: |
---|
752 | @ remainder is negative |
---|
753 | adds r3, r3, lr |
---|
754 | add r2, r2, #9 |
---|
755 | b 9f |
---|
756 | |
---|
757 | |
---|
758 | |
---|
759 | |
---|
760 | L.2.1016: |
---|
761 | @ remainder is negative |
---|
762 | adds r3, r3, lr |
---|
763 | @ depth 3, accumulated bits 1 |
---|
764 | mov lr, lr, lsr #1 |
---|
765 | blt L.3.1016 |
---|
766 | @ remainder is positive |
---|
767 | subs r3, r3, lr |
---|
768 | @ depth 4, accumulated bits 3 |
---|
769 | mov lr, lr, lsr #1 |
---|
770 | blt L.4.1018 |
---|
771 | @ remainder is positive |
---|
772 | subs r3, r3, lr |
---|
773 | add r2, r2, #7 |
---|
774 | |
---|
775 | b 9f |
---|
776 | |
---|
777 | L.4.1018: |
---|
778 | @ remainder is negative |
---|
779 | adds r3, r3, lr |
---|
780 | add r2, r2, #5 |
---|
781 | b 9f |
---|
782 | |
---|
783 | |
---|
784 | |
---|
785 | L.3.1016: |
---|
786 | @ remainder is negative |
---|
787 | adds r3, r3, lr |
---|
788 | @ depth 4, accumulated bits 1 |
---|
789 | mov lr, lr, lsr #1 |
---|
790 | blt L.4.1016 |
---|
791 | @ remainder is positive |
---|
792 | subs r3, r3, lr |
---|
793 | add r2, r2, #3 |
---|
794 | |
---|
795 | b 9f |
---|
796 | |
---|
797 | L.4.1016: |
---|
798 | @ remainder is negative |
---|
799 | adds r3, r3, lr |
---|
800 | add r2, r2, #1 |
---|
801 | b 9f |
---|
802 | |
---|
803 | |
---|
804 | |
---|
805 | |
---|
806 | |
---|
807 | L.1.1015: |
---|
808 | @ remainder is negative |
---|
809 | adds r3, r3, lr |
---|
810 | @ depth 2, accumulated bits -1 |
---|
811 | mov lr, lr, lsr #1 |
---|
812 | blt L.2.1014 |
---|
813 | @ remainder is positive |
---|
814 | subs r3, r3, lr |
---|
815 | @ depth 3, accumulated bits -1 |
---|
816 | mov lr, lr, lsr #1 |
---|
817 | blt L.3.1014 |
---|
818 | @ remainder is positive |
---|
819 | subs r3, r3, lr |
---|
820 | @ depth 4, accumulated bits -1 |
---|
821 | mov lr, lr, lsr #1 |
---|
822 | blt L.4.1014 |
---|
823 | @ remainder is positive |
---|
824 | subs r3, r3, lr |
---|
825 | sub r2, r2, #1 |
---|
826 | |
---|
827 | b 9f |
---|
828 | |
---|
829 | L.4.1014: |
---|
830 | @ remainder is negative |
---|
831 | adds r3, r3, lr |
---|
832 | sub r2, r2, #3 |
---|
833 | b 9f |
---|
834 | |
---|
835 | |
---|
836 | |
---|
837 | L.3.1014: |
---|
838 | @ remainder is negative |
---|
839 | adds r3, r3, lr |
---|
840 | @ depth 4, accumulated bits -3 |
---|
841 | mov lr, lr, lsr #1 |
---|
842 | blt L.4.1012 |
---|
843 | @ remainder is positive |
---|
844 | subs r3, r3, lr |
---|
845 | sub r2, r2, #5 |
---|
846 | |
---|
847 | b 9f |
---|
848 | |
---|
849 | L.4.1012: |
---|
850 | @ remainder is negative |
---|
851 | adds r3, r3, lr |
---|
852 | sub r2, r2, #7 |
---|
853 | b 9f |
---|
854 | |
---|
855 | |
---|
856 | |
---|
857 | |
---|
858 | L.2.1014: |
---|
859 | @ remainder is negative |
---|
860 | adds r3, r3, lr |
---|
861 | @ depth 3, accumulated bits -3 |
---|
862 | mov lr, lr, lsr #1 |
---|
863 | blt L.3.1012 |
---|
864 | @ remainder is positive |
---|
865 | subs r3, r3, lr |
---|
866 | @ depth 4, accumulated bits -5 |
---|
867 | mov lr, lr, lsr #1 |
---|
868 | blt L.4.1010 |
---|
869 | @ remainder is positive |
---|
870 | subs r3, r3, lr |
---|
871 | sub r2, r2, #9 |
---|
872 | |
---|
873 | b 9f |
---|
874 | |
---|
875 | L.4.1010: |
---|
876 | @ remainder is negative |
---|
877 | adds r3, r3, lr |
---|
878 | sub r2, r2, #11 |
---|
879 | b 9f |
---|
880 | |
---|
881 | |
---|
882 | |
---|
883 | L.3.1012: |
---|
884 | @ remainder is negative |
---|
885 | adds r3, r3, lr |
---|
886 | @ depth 4, accumulated bits -7 |
---|
887 | mov lr, lr, lsr #1 |
---|
888 | blt L.4.1008 |
---|
889 | @ remainder is positive |
---|
890 | subs r3, r3, lr |
---|
891 | sub r2, r2, #13 |
---|
892 | |
---|
893 | b 9f |
---|
894 | |
---|
895 | L.4.1008: |
---|
896 | @ remainder is negative |
---|
897 | adds r3, r3, lr |
---|
898 | sub r2, r2, #15 |
---|
899 | b 9f |
---|
900 | |
---|
901 | |
---|
902 | |
---|
903 | |
---|
904 | |
---|
905 | 9: |
---|
906 | Lend_regular_divide: |
---|
907 | subs ip, ip, #1 |
---|
908 | bge Ldivloop |
---|
909 | cmp r3, #0 |
---|
910 | @ non-restoring fixup here (one instruction only!) |
---|
911 | sublt r2, r2, #1 |
---|
912 | |
---|
913 | |
---|
914 | Lgot_result: |
---|
915 | @ check to see if answer should be < 0 |
---|
916 | cmp r6, #0 |
---|
917 | rsbmi r2, r2, #0 |
---|
918 | |
---|
919 | mov r0, r2 |
---|
920 | ldmia sp!, {r4, r5, r6, pc} |
---|
921 | |
---|
922 | Ldiv_zero: |
---|
923 | @ Divide by zero trap. If it returns, return 0 (about as |
---|
924 | @ wrong as possible, but that is what SunOS does...). |
---|
925 | bl ___div0 |
---|
926 | mov r0, #0 |
---|
927 | ldmia sp!, {r4, r5, r6, pc} |
---|
928 | |
---|
929 | #endif /* L_divsi3 */ |
---|
930 | |
---|
931 | #ifdef L_umodsi3 |
---|
932 | |
---|
933 | ip .req r12 |
---|
934 | sp .req r13 |
---|
935 | lr .req r14 |
---|
936 | pc .req r15 |
---|
937 | .text |
---|
938 | .globl ___umodsi3 |
---|
939 | .align 0 |
---|
940 | ___umodsi3: |
---|
941 | stmdb sp!, {r4, r5, lr} |
---|
942 | @ Ready to divide. Compute size of quotient; scale comparand. |
---|
943 | movs lr, r1 |
---|
944 | mov r3, r0 |
---|
945 | beq Ldiv_zero |
---|
946 | |
---|
947 | |
---|
948 | cmp r3, lr @ if r1 exceeds r0, done |
---|
949 | mov r2, #0 |
---|
950 | bcc Lgot_result @ (and algorithm fails otherwise) |
---|
951 | mov r4, #(1 << (32 - 4 - 1)) |
---|
952 | cmp r3, r4 |
---|
953 | mov ip, #0 |
---|
954 | bcc Lnot_really_big |
---|
955 | |
---|
956 | @ Here the dividend is >= 2^(31-N) or so. We must be careful here, |
---|
957 | @ as our usual N-at-a-shot divide step will cause overflow and havoc. |
---|
958 | @ The number of bits in the result here is N*ITER+SC, where SC <= N. |
---|
959 | @ Compute ITER in an unorthodox manner: know we need to shift V into |
---|
960 | @ the top decade: so do not even bother to compare to R. |
---|
961 | mov r5, #1 |
---|
962 | 1: |
---|
963 | cmp lr, r4 |
---|
964 | bcs 3f |
---|
965 | mov lr, lr, lsl #4 |
---|
966 | add ip, ip, #1 |
---|
967 | b 1b |
---|
968 | |
---|
969 | @ Now compute r5. |
---|
970 | 2: adds lr, lr, lr |
---|
971 | add r5, r5, #1 |
---|
972 | bcc Lnot_too_big |
---|
973 | |
---|
974 | @ We get here if the r1 overflowed while shifting. |
---|
975 | @ This means that r3 has the high-order bit set. |
---|
976 | @ Restore lr and subtract from r3. |
---|
977 | mov r4, r4, lsl #4 |
---|
978 | mov lr, lr, lsr #1 |
---|
979 | add lr, r4, lr |
---|
980 | sub r5, r5, #1 |
---|
981 | b Ldo_single_div |
---|
982 | |
---|
983 | Lnot_too_big: |
---|
984 | 3: cmp lr, r3 |
---|
985 | bcc 2b |
---|
986 | @ beq Ldo_single_div |
---|
987 | |
---|
988 | /* NB: these are commented out in the V8-Sparc manual as well */ |
---|
989 | /* (I do not understand this) */ |
---|
990 | @ lr > r3: went too far: back up 1 step |
---|
991 | @ srl lr, 1, lr |
---|
992 | @ dec r5 |
---|
993 | @ do single-bit divide steps |
---|
994 | @ |
---|
995 | @ We have to be careful here. We know that r3 >= lr, so we can do the |
---|
996 | @ first divide step without thinking. BUT, the others are conditional, |
---|
997 | @ and are only done if r3 >= 0. Because both r3 and lr may have the high- |
---|
998 | @ order bit set in the first step, just falling into the regular |
---|
999 | @ division loop will mess up the first time around. |
---|
1000 | @ So we unroll slightly... |
---|
1001 | Ldo_single_div: |
---|
1002 | subs r5, r5, #1 |
---|
1003 | blt Lend_regular_divide |
---|
1004 | sub r3, r3, lr |
---|
1005 | mov r2, #1 |
---|
1006 | b Lend_single_divloop |
---|
1007 | Lsingle_divloop: |
---|
1008 | cmp r3, #0 |
---|
1009 | mov r2, r2, lsl #1 |
---|
1010 | mov lr, lr, lsr #1 |
---|
1011 | @ r3 >= 0 |
---|
1012 | subpl r3, r3, lr |
---|
1013 | addpl r2, r2, #1 |
---|
1014 | @ r3 < 0 |
---|
1015 | addmi r3, r3, lr |
---|
1016 | submi r2, r2, #1 |
---|
1017 | Lend_single_divloop: |
---|
1018 | subs r5, r5, #1 |
---|
1019 | bge Lsingle_divloop |
---|
1020 | b Lend_regular_divide |
---|
1021 | |
---|
1022 | 1: |
---|
1023 | add ip, ip, #1 |
---|
1024 | Lnot_really_big: |
---|
1025 | mov lr, lr, lsl #4 |
---|
1026 | cmp lr, r3 |
---|
1027 | bls 1b |
---|
1028 | @ |
---|
1029 | @ HOW CAN ip EVER BE -1 HERE ????? |
---|
1030 | @ |
---|
1031 | cmn ip, #1 |
---|
1032 | beq Lgot_result |
---|
1033 | |
---|
1034 | Ldivloop: |
---|
1035 | cmp r3, #0 @ set up for initial iteration |
---|
1036 | mov r2, r2, lsl #4 |
---|
1037 | @ depth 1, accumulated bits 0 |
---|
1038 | mov lr, lr, lsr #1 |
---|
1039 | blt L.1.1015 |
---|
1040 | @ remainder is positive |
---|
1041 | subs r3, r3, lr |
---|
1042 | @ depth 2, accumulated bits 1 |
---|
1043 | mov lr, lr, lsr #1 |
---|
1044 | blt L.2.1016 |
---|
1045 | @ remainder is positive |
---|
1046 | subs r3, r3, lr |
---|
1047 | @ depth 3, accumulated bits 3 |
---|
1048 | mov lr, lr, lsr #1 |
---|
1049 | blt L.3.1018 |
---|
1050 | @ remainder is positive |
---|
1051 | subs r3, r3, lr |
---|
1052 | @ depth 4, accumulated bits 7 |
---|
1053 | mov lr, lr, lsr #1 |
---|
1054 | blt L.4.1022 |
---|
1055 | @ remainder is positive |
---|
1056 | subs r3, r3, lr |
---|
1057 | add r2, r2, #15 |
---|
1058 | |
---|
1059 | b 9f |
---|
1060 | |
---|
1061 | L.4.1022: |
---|
1062 | @ remainder is negative |
---|
1063 | adds r3, r3, lr |
---|
1064 | add r2, r2, #13 |
---|
1065 | b 9f |
---|
1066 | |
---|
1067 | |
---|
1068 | |
---|
1069 | L.3.1018: |
---|
1070 | @ remainder is negative |
---|
1071 | adds r3, r3, lr |
---|
1072 | @ depth 4, accumulated bits 5 |
---|
1073 | mov lr, lr, lsr #1 |
---|
1074 | blt L.4.1020 |
---|
1075 | @ remainder is positive |
---|
1076 | subs r3, r3, lr |
---|
1077 | add r2, r2, #11 |
---|
1078 | |
---|
1079 | b 9f |
---|
1080 | |
---|
1081 | L.4.1020: |
---|
1082 | @ remainder is negative |
---|
1083 | adds r3, r3, lr |
---|
1084 | add r2, r2, #9 |
---|
1085 | b 9f |
---|
1086 | |
---|
1087 | |
---|
1088 | |
---|
1089 | |
---|
1090 | L.2.1016: |
---|
1091 | @ remainder is negative |
---|
1092 | adds r3, r3, lr |
---|
1093 | @ depth 3, accumulated bits 1 |
---|
1094 | mov lr, lr, lsr #1 |
---|
1095 | blt L.3.1016 |
---|
1096 | @ remainder is positive |
---|
1097 | subs r3, r3, lr |
---|
1098 | @ depth 4, accumulated bits 3 |
---|
1099 | mov lr, lr, lsr #1 |
---|
1100 | blt L.4.1018 |
---|
1101 | @ remainder is positive |
---|
1102 | subs r3, r3, lr |
---|
1103 | add r2, r2, #7 |
---|
1104 | |
---|
1105 | b 9f |
---|
1106 | |
---|
1107 | L.4.1018: |
---|
1108 | @ remainder is negative |
---|
1109 | adds r3, r3, lr |
---|
1110 | add r2, r2, #5 |
---|
1111 | b 9f |
---|
1112 | |
---|
1113 | |
---|
1114 | |
---|
1115 | L.3.1016: |
---|
1116 | @ remainder is negative |
---|
1117 | adds r3, r3, lr |
---|
1118 | @ depth 4, accumulated bits 1 |
---|
1119 | mov lr, lr, lsr #1 |
---|
1120 | blt L.4.1016 |
---|
1121 | @ remainder is positive |
---|
1122 | subs r3, r3, lr |
---|
1123 | add r2, r2, #3 |
---|
1124 | |
---|
1125 | b 9f |
---|
1126 | |
---|
1127 | L.4.1016: |
---|
1128 | @ remainder is negative |
---|
1129 | adds r3, r3, lr |
---|
1130 | add r2, r2, #1 |
---|
1131 | b 9f |
---|
1132 | |
---|
1133 | |
---|
1134 | |
---|
1135 | |
---|
1136 | |
---|
1137 | L.1.1015: |
---|
1138 | @ remainder is negative |
---|
1139 | adds r3, r3, lr |
---|
1140 | @ depth 2, accumulated bits -1 |
---|
1141 | mov lr, lr, lsr #1 |
---|
1142 | blt L.2.1014 |
---|
1143 | @ remainder is positive |
---|
1144 | subs r3, r3, lr |
---|
1145 | @ depth 3, accumulated bits -1 |
---|
1146 | mov lr, lr, lsr #1 |
---|
1147 | blt L.3.1014 |
---|
1148 | @ remainder is positive |
---|
1149 | subs r3, r3, lr |
---|
1150 | @ depth 4, accumulated bits -1 |
---|
1151 | mov lr, lr, lsr #1 |
---|
1152 | blt L.4.1014 |
---|
1153 | @ remainder is positive |
---|
1154 | subs r3, r3, lr |
---|
1155 | sub r2, r2, #1 |
---|
1156 | |
---|
1157 | b 9f |
---|
1158 | |
---|
1159 | L.4.1014: |
---|
1160 | @ remainder is negative |
---|
1161 | adds r3, r3, lr |
---|
1162 | sub r2, r2, #3 |
---|
1163 | b 9f |
---|
1164 | |
---|
1165 | |
---|
1166 | |
---|
1167 | L.3.1014: |
---|
1168 | @ remainder is negative |
---|
1169 | adds r3, r3, lr |
---|
1170 | @ depth 4, accumulated bits -3 |
---|
1171 | mov lr, lr, lsr #1 |
---|
1172 | blt L.4.1012 |
---|
1173 | @ remainder is positive |
---|
1174 | subs r3, r3, lr |
---|
1175 | sub r2, r2, #5 |
---|
1176 | |
---|
1177 | b 9f |
---|
1178 | |
---|
1179 | L.4.1012: |
---|
1180 | @ remainder is negative |
---|
1181 | adds r3, r3, lr |
---|
1182 | sub r2, r2, #7 |
---|
1183 | b 9f |
---|
1184 | |
---|
1185 | |
---|
1186 | |
---|
1187 | |
---|
1188 | L.2.1014: |
---|
1189 | @ remainder is negative |
---|
1190 | adds r3, r3, lr |
---|
1191 | @ depth 3, accumulated bits -3 |
---|
1192 | mov lr, lr, lsr #1 |
---|
1193 | blt L.3.1012 |
---|
1194 | @ remainder is positive |
---|
1195 | subs r3, r3, lr |
---|
1196 | @ depth 4, accumulated bits -5 |
---|
1197 | mov lr, lr, lsr #1 |
---|
1198 | blt L.4.1010 |
---|
1199 | @ remainder is positive |
---|
1200 | subs r3, r3, lr |
---|
1201 | sub r2, r2, #9 |
---|
1202 | |
---|
1203 | b 9f |
---|
1204 | |
---|
1205 | L.4.1010: |
---|
1206 | @ remainder is negative |
---|
1207 | adds r3, r3, lr |
---|
1208 | sub r2, r2, #11 |
---|
1209 | b 9f |
---|
1210 | |
---|
1211 | |
---|
1212 | |
---|
1213 | L.3.1012: |
---|
1214 | @ remainder is negative |
---|
1215 | adds r3, r3, lr |
---|
1216 | @ depth 4, accumulated bits -7 |
---|
1217 | mov lr, lr, lsr #1 |
---|
1218 | blt L.4.1008 |
---|
1219 | @ remainder is positive |
---|
1220 | subs r3, r3, lr |
---|
1221 | sub r2, r2, #13 |
---|
1222 | |
---|
1223 | b 9f |
---|
1224 | |
---|
1225 | L.4.1008: |
---|
1226 | @ remainder is negative |
---|
1227 | adds r3, r3, lr |
---|
1228 | sub r2, r2, #15 |
---|
1229 | b 9f |
---|
1230 | |
---|
1231 | |
---|
1232 | |
---|
1233 | |
---|
1234 | |
---|
1235 | 9: |
---|
1236 | Lend_regular_divide: |
---|
1237 | subs ip, ip, #1 |
---|
1238 | bge Ldivloop |
---|
1239 | cmp r3, #0 |
---|
1240 | @ non-restoring fixup here (one instruction only!) |
---|
1241 | addlt r3, r1, r3 |
---|
1242 | |
---|
1243 | |
---|
1244 | Lgot_result: |
---|
1245 | |
---|
1246 | mov r0, r3 |
---|
1247 | ldmia sp!, {r4, r5, pc} |
---|
1248 | |
---|
1249 | Ldiv_zero: |
---|
1250 | @ Divide by zero trap. If it returns, return 0 (about as |
---|
1251 | @ wrong as possible, but that is what SunOS does...). |
---|
1252 | bl ___div0 |
---|
1253 | mov r0, #0 |
---|
1254 | ldmia sp!, {r4, r5, pc} |
---|
1255 | |
---|
1256 | #endif /* L_umodsi3 */ |
---|
1257 | |
---|
1258 | #ifdef L_modsi3 |
---|
1259 | |
---|
1260 | ip .req r12 |
---|
1261 | sp .req r13 |
---|
1262 | lr .req r14 |
---|
1263 | pc .req r15 |
---|
1264 | .text |
---|
1265 | .globl ___modsi3 |
---|
1266 | .align 0 |
---|
1267 | ___modsi3: |
---|
1268 | stmdb sp!, {r4, r5, r6, lr} |
---|
1269 | @ compute sign of result; if neither is negative, no problem |
---|
1270 | mov r6, r0 |
---|
1271 | cmp r1, #0 |
---|
1272 | rsbmi r1, r1, #0 |
---|
1273 | beq Ldiv_zero |
---|
1274 | mov lr, r1 |
---|
1275 | movs r3, r0 |
---|
1276 | rsbmi r3, r3, #0 @ make dividend nonnegative |
---|
1277 | |
---|
1278 | |
---|
1279 | cmp r3, lr @ if r1 exceeds r0, done |
---|
1280 | mov r2, #0 |
---|
1281 | bcc Lgot_result @ (and algorithm fails otherwise) |
---|
1282 | mov r4, #(1 << (32 - 4 - 1)) |
---|
1283 | cmp r3, r4 |
---|
1284 | mov ip, #0 |
---|
1285 | bcc Lnot_really_big |
---|
1286 | |
---|
1287 | @ Here the dividend is >= 2^(31-N) or so. We must be careful here, |
---|
1288 | @ as our usual N-at-a-shot divide step will cause overflow and havoc. |
---|
1289 | @ The number of bits in the result here is N*ITER+SC, where SC <= N. |
---|
1290 | @ Compute ITER in an unorthodox manner: know we need to shift V into |
---|
1291 | @ the top decade: so do not even bother to compare to R. |
---|
1292 | mov r5, #1 |
---|
1293 | 1: |
---|
1294 | cmp lr, r4 |
---|
1295 | bcs 3f |
---|
1296 | mov lr, lr, lsl #4 |
---|
1297 | add ip, ip, #1 |
---|
1298 | b 1b |
---|
1299 | |
---|
1300 | @ Now compute r5. |
---|
1301 | 2: adds lr, lr, lr |
---|
1302 | add r5, r5, #1 |
---|
1303 | bcc Lnot_too_big |
---|
1304 | |
---|
1305 | @ We get here if the r1 overflowed while shifting. |
---|
1306 | @ This means that r3 has the high-order bit set. |
---|
1307 | @ Restore lr and subtract from r3. |
---|
1308 | mov r4, r4, lsl #4 |
---|
1309 | mov lr, lr, lsr #1 |
---|
1310 | add lr, r4, lr |
---|
1311 | sub r5, r5, #1 |
---|
1312 | b Ldo_single_div |
---|
1313 | |
---|
1314 | Lnot_too_big: |
---|
1315 | 3: cmp lr, r3 |
---|
1316 | bcc 2b |
---|
1317 | @ beq Ldo_single_div |
---|
1318 | |
---|
1319 | /* NB: these are commented out in the V8-Sparc manual as well */ |
---|
1320 | /* (I do not understand this) */ |
---|
1321 | @ lr > r3: went too far: back up 1 step |
---|
1322 | @ srl lr, 1, lr |
---|
1323 | @ dec r5 |
---|
1324 | @ do single-bit divide steps |
---|
1325 | @ |
---|
1326 | @ We have to be careful here. We know that r3 >= lr, so we can do the |
---|
1327 | @ first divide step without thinking. BUT, the others are conditional, |
---|
1328 | @ and are only done if r3 >= 0. Because both r3 and lr may have the high- |
---|
1329 | @ order bit set in the first step, just falling into the regular |
---|
1330 | @ division loop will mess up the first time around. |
---|
1331 | @ So we unroll slightly... |
---|
1332 | Ldo_single_div: |
---|
1333 | subs r5, r5, #1 |
---|
1334 | blt Lend_regular_divide |
---|
1335 | sub r3, r3, lr |
---|
1336 | mov r2, #1 |
---|
1337 | b Lend_single_divloop |
---|
1338 | Lsingle_divloop: |
---|
1339 | cmp r3, #0 |
---|
1340 | mov r2, r2, lsl #1 |
---|
1341 | mov lr, lr, lsr #1 |
---|
1342 | @ r3 >= 0 |
---|
1343 | subpl r3, r3, lr |
---|
1344 | addpl r2, r2, #1 |
---|
1345 | @ r3 < 0 |
---|
1346 | addmi r3, r3, lr |
---|
1347 | submi r2, r2, #1 |
---|
1348 | Lend_single_divloop: |
---|
1349 | subs r5, r5, #1 |
---|
1350 | bge Lsingle_divloop |
---|
1351 | b Lend_regular_divide |
---|
1352 | |
---|
1353 | 1: |
---|
1354 | add ip, ip, #1 |
---|
1355 | Lnot_really_big: |
---|
1356 | mov lr, lr, lsl #4 |
---|
1357 | cmp lr, r3 |
---|
1358 | bls 1b |
---|
1359 | @ |
---|
1360 | @ HOW CAN ip EVER BE -1 HERE ????? |
---|
1361 | @ |
---|
1362 | cmn ip, #1 |
---|
1363 | beq Lgot_result |
---|
1364 | |
---|
1365 | Ldivloop: |
---|
1366 | cmp r3, #0 @ set up for initial iteration |
---|
1367 | mov r2, r2, lsl #4 |
---|
1368 | @ depth 1, accumulated bits 0 |
---|
1369 | mov lr, lr, lsr #1 |
---|
1370 | blt L.1.1015 |
---|
1371 | @ remainder is positive |
---|
1372 | subs r3, r3, lr |
---|
1373 | @ depth 2, accumulated bits 1 |
---|
1374 | mov lr, lr, lsr #1 |
---|
1375 | blt L.2.1016 |
---|
1376 | @ remainder is positive |
---|
1377 | subs r3, r3, lr |
---|
1378 | @ depth 3, accumulated bits 3 |
---|
1379 | mov lr, lr, lsr #1 |
---|
1380 | blt L.3.1018 |
---|
1381 | @ remainder is positive |
---|
1382 | subs r3, r3, lr |
---|
1383 | @ depth 4, accumulated bits 7 |
---|
1384 | mov lr, lr, lsr #1 |
---|
1385 | blt L.4.1022 |
---|
1386 | @ remainder is positive |
---|
1387 | subs r3, r3, lr |
---|
1388 | add r2, r2, #15 |
---|
1389 | |
---|
1390 | b 9f |
---|
1391 | |
---|
1392 | L.4.1022: |
---|
1393 | @ remainder is negative |
---|
1394 | adds r3, r3, lr |
---|
1395 | add r2, r2, #13 |
---|
1396 | b 9f |
---|
1397 | |
---|
1398 | |
---|
1399 | |
---|
1400 | L.3.1018: |
---|
1401 | @ remainder is negative |
---|
1402 | adds r3, r3, lr |
---|
1403 | @ depth 4, accumulated bits 5 |
---|
1404 | mov lr, lr, lsr #1 |
---|
1405 | blt L.4.1020 |
---|
1406 | @ remainder is positive |
---|
1407 | subs r3, r3, lr |
---|
1408 | add r2, r2, #11 |
---|
1409 | |
---|
1410 | b 9f |
---|
1411 | |
---|
1412 | L.4.1020: |
---|
1413 | @ remainder is negative |
---|
1414 | adds r3, r3, lr |
---|
1415 | add r2, r2, #9 |
---|
1416 | b 9f |
---|
1417 | |
---|
1418 | |
---|
1419 | |
---|
1420 | |
---|
1421 | L.2.1016: |
---|
1422 | @ remainder is negative |
---|
1423 | adds r3, r3, lr |
---|
1424 | @ depth 3, accumulated bits 1 |
---|
1425 | mov lr, lr, lsr #1 |
---|
1426 | blt L.3.1016 |
---|
1427 | @ remainder is positive |
---|
1428 | subs r3, r3, lr |
---|
1429 | @ depth 4, accumulated bits 3 |
---|
1430 | mov lr, lr, lsr #1 |
---|
1431 | blt L.4.1018 |
---|
1432 | @ remainder is positive |
---|
1433 | subs r3, r3, lr |
---|
1434 | add r2, r2, #7 |
---|
1435 | |
---|
1436 | b 9f |
---|
1437 | |
---|
1438 | L.4.1018: |
---|
1439 | @ remainder is negative |
---|
1440 | adds r3, r3, lr |
---|
1441 | add r2, r2, #5 |
---|
1442 | b 9f |
---|
1443 | |
---|
1444 | |
---|
1445 | |
---|
1446 | L.3.1016: |
---|
1447 | @ remainder is negative |
---|
1448 | adds r3, r3, lr |
---|
1449 | @ depth 4, accumulated bits 1 |
---|
1450 | mov lr, lr, lsr #1 |
---|
1451 | blt L.4.1016 |
---|
1452 | @ remainder is positive |
---|
1453 | subs r3, r3, lr |
---|
1454 | add r2, r2, #3 |
---|
1455 | |
---|
1456 | b 9f |
---|
1457 | |
---|
1458 | L.4.1016: |
---|
1459 | @ remainder is negative |
---|
1460 | adds r3, r3, lr |
---|
1461 | add r2, r2, #1 |
---|
1462 | b 9f |
---|
1463 | |
---|
1464 | |
---|
1465 | |
---|
1466 | |
---|
1467 | |
---|
1468 | L.1.1015: |
---|
1469 | @ remainder is negative |
---|
1470 | adds r3, r3, lr |
---|
1471 | @ depth 2, accumulated bits -1 |
---|
1472 | mov lr, lr, lsr #1 |
---|
1473 | blt L.2.1014 |
---|
1474 | @ remainder is positive |
---|
1475 | subs r3, r3, lr |
---|
1476 | @ depth 3, accumulated bits -1 |
---|
1477 | mov lr, lr, lsr #1 |
---|
1478 | blt L.3.1014 |
---|
1479 | @ remainder is positive |
---|
1480 | subs r3, r3, lr |
---|
1481 | @ depth 4, accumulated bits -1 |
---|
1482 | mov lr, lr, lsr #1 |
---|
1483 | blt L.4.1014 |
---|
1484 | @ remainder is positive |
---|
1485 | subs r3, r3, lr |
---|
1486 | sub r2, r2, #1 |
---|
1487 | |
---|
1488 | b 9f |
---|
1489 | |
---|
1490 | L.4.1014: |
---|
1491 | @ remainder is negative |
---|
1492 | adds r3, r3, lr |
---|
1493 | sub r2, r2, #3 |
---|
1494 | b 9f |
---|
1495 | |
---|
1496 | |
---|
1497 | |
---|
1498 | L.3.1014: |
---|
1499 | @ remainder is negative |
---|
1500 | adds r3, r3, lr |
---|
1501 | @ depth 4, accumulated bits -3 |
---|
1502 | mov lr, lr, lsr #1 |
---|
1503 | blt L.4.1012 |
---|
1504 | @ remainder is positive |
---|
1505 | subs r3, r3, lr |
---|
1506 | sub r2, r2, #5 |
---|
1507 | |
---|
1508 | b 9f |
---|
1509 | |
---|
1510 | L.4.1012: |
---|
1511 | @ remainder is negative |
---|
1512 | adds r3, r3, lr |
---|
1513 | sub r2, r2, #7 |
---|
1514 | b 9f |
---|
1515 | |
---|
1516 | |
---|
1517 | |
---|
1518 | |
---|
1519 | L.2.1014: |
---|
1520 | @ remainder is negative |
---|
1521 | adds r3, r3, lr |
---|
1522 | @ depth 3, accumulated bits -3 |
---|
1523 | mov lr, lr, lsr #1 |
---|
1524 | blt L.3.1012 |
---|
1525 | @ remainder is positive |
---|
1526 | subs r3, r3, lr |
---|
1527 | @ depth 4, accumulated bits -5 |
---|
1528 | mov lr, lr, lsr #1 |
---|
1529 | blt L.4.1010 |
---|
1530 | @ remainder is positive |
---|
1531 | subs r3, r3, lr |
---|
1532 | sub r2, r2, #9 |
---|
1533 | |
---|
1534 | b 9f |
---|
1535 | |
---|
1536 | L.4.1010: |
---|
1537 | @ remainder is negative |
---|
1538 | adds r3, r3, lr |
---|
1539 | sub r2, r2, #11 |
---|
1540 | b 9f |
---|
1541 | |
---|
1542 | |
---|
1543 | |
---|
1544 | L.3.1012: |
---|
1545 | @ remainder is negative |
---|
1546 | adds r3, r3, lr |
---|
1547 | @ depth 4, accumulated bits -7 |
---|
1548 | mov lr, lr, lsr #1 |
---|
1549 | blt L.4.1008 |
---|
1550 | @ remainder is positive |
---|
1551 | subs r3, r3, lr |
---|
1552 | sub r2, r2, #13 |
---|
1553 | |
---|
1554 | b 9f |
---|
1555 | |
---|
1556 | L.4.1008: |
---|
1557 | @ remainder is negative |
---|
1558 | adds r3, r3, lr |
---|
1559 | sub r2, r2, #15 |
---|
1560 | b 9f |
---|
1561 | |
---|
1562 | |
---|
1563 | |
---|
1564 | |
---|
1565 | |
---|
1566 | 9: |
---|
1567 | Lend_regular_divide: |
---|
1568 | subs ip, ip, #1 |
---|
1569 | bge Ldivloop |
---|
1570 | cmp r3, #0 |
---|
1571 | @ non-restoring fixup here (one instruction only!) |
---|
1572 | addlt r3, r1, r3 |
---|
1573 | |
---|
1574 | |
---|
1575 | Lgot_result: |
---|
1576 | @ check to see if answer should be < 0 |
---|
1577 | cmp r6, #0 |
---|
1578 | rsbmi r3, r3, #0 |
---|
1579 | |
---|
1580 | mov r0, r3 |
---|
1581 | ldmia sp!, {r4, r5, r6, pc} |
---|
1582 | |
---|
1583 | Ldiv_zero: |
---|
1584 | @ Divide by zero trap. If it returns, return 0 (about as |
---|
1585 | @ wrong as possible, but that is what SunOS does...). |
---|
1586 | bl ___div0 |
---|
1587 | mov r0, #0 |
---|
1588 | ldmia sp!, {r4, r5, r6, pc} |
---|
1589 | |
---|
1590 | #endif /* L_modsi3 */ |
---|
1591 | |
---|
1592 | #ifdef L_dvmd_tls |
---|
1593 | |
---|
1594 | .globl ___div0 |
---|
1595 | .align 0 |
---|
1596 | ___div0: |
---|
1597 | mov pc, lr |
---|
1598 | |
---|
1599 | #endif /* L_divmodsi_tools */ |
---|